string – faster substring processing C++

string – faster substring processing C++

Perhaps like this. It avoids multiple string handling, by passing the pointer of the current substring, and the length of the string to match.

#include <stdio.h>
#include <string.h>

int hd(char *str, char *cmp, int len)
// find hamming distance between substring *str and *cmp of length len
{
    int ind, hamming = 0;
    for(ind=0; ind<len; ind++) {
        if(str[ind] != cmp[ind]) {
            hamming++;
        }
    }
    return hamming;
}

int main(void)
// find hamming distance
{
    char str[] = abcdcddcdcdcdcd;
    char cmp[] = abc;
    int lens = strlen(str);
    int lenc = strlen(cmp);
    int ind, max;
    max = lens - lenc;
    // analyse each possible substring
    for(ind=0; ind<=max; ind++) {
        printf(%dn, hd(str + ind, cmp, lenc));
    }
}

By moving malloc and free outside of the loop makes the program much faster.

char str[] = abcdcddcdcdcdcd..................... // large string
int n = strlen(str), m = 20;
char *substr = (char*) malloc(sizeof(char)*m);
for(int i=0; i<n; i++){
  //char *substr = (char*) malloc(sizeof(char)*m);
  strncpy(substr, str+i, m);
  // do some processing
  int h = hd(substr, X) // X is another string of same length
  //free(substr); 
}
free(substr);

unsigned int hd(const std::string& s1, const std::string& s2)
{

    return std::inner_product(
        s1.begin(), s1.end(), s2.begin(),
        0, std::plus<unsigned int>(),
        std::not2(std::equal_to<std::string::value_type>())
    );
}

string – faster substring processing C++

Leave a Reply

Your email address will not be published. Required fields are marked *