Browse Source

Add caseless version of rabin-karp substring search

tags/1.1.0
Vsevolod Stakhov 8 years ago
parent
commit
7b71ab5663
2 changed files with 51 additions and 0 deletions
  1. 40
    0
      src/libutil/str_util.c
  2. 11
    0
      src/libutil/str_util.h

+ 40
- 0
src/libutil/str_util.c View File

@@ -1118,6 +1118,46 @@ rspamd_substring_search (const gchar *in, gsize inlen,
return -1;
}

goffset
rspamd_substring_search_caseless (const gchar *in, gsize inlen,
const gchar *srch, gsize srchlen)
{
gint d, hash_srch, hash_in;
gsize i, j;
gchar c1, c2;

if (inlen < srchlen) {
return -1;
}

/* Preprocessing */
for (d = i = 1; i < srchlen; ++i) {
/* computes d = 2^(m-1) with the left-shift operator */
d = (d << 1);
}

for (hash_in = hash_srch = i = 0; i < srchlen; ++i) {
hash_srch = ((hash_srch << 1) + g_ascii_tolower (srch[i]));
hash_in = ((hash_in << 1) + g_ascii_tolower (in[i]));
}

/* Searching */
j = 0;
while (j <= inlen - srchlen) {

if (hash_srch == hash_in && g_ascii_strncasecmp (srch, in + j, srchlen) == 0) {
return (goffset) j;
}

c1 = g_ascii_tolower (in[j]);
c2 = g_ascii_tolower (in[j + srchlen]);
hash_in = RKHASH (c1, c2, hash_in);
++j;
}

return -1;
}

goffset
rspamd_string_find_eoh (GString *input)
{

+ 11
- 0
src/libutil/str_util.h View File

@@ -189,6 +189,17 @@ GString *rspamd_header_value_fold (const gchar *name,
goffset rspamd_substring_search (const gchar *in, gsize inlen,
const gchar *srch, gsize srchlen);

/**
* Search for a substring `srch` in the text `in` using Karp-Rabin algorithm in caseless matter (ASCII only)
* @param in input
* @param inlen input len
* @param srch search string
* @param srchlen length of the search string
* @return position of the first substring match or (-1) if not found
*/
goffset rspamd_substring_search_caseless (const gchar *in, gsize inlen,
const gchar *srch, gsize srchlen);


/**
* Search for end-of-headers mark in the input string. Returns position just after

Loading…
Cancel
Save