diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-12-29 09:47:34 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-12-29 09:47:34 +0000 |
commit | 7b71ab56637b83470fc0bbcfab9f96422dc83c40 (patch) | |
tree | 3b36c82ed8d4e590c499a34bc37bcc239e3062f2 | |
parent | 66efb875d0e363c230f1e30d826694b0719a0d40 (diff) | |
download | rspamd-7b71ab56637b83470fc0bbcfab9f96422dc83c40.tar.gz rspamd-7b71ab56637b83470fc0bbcfab9f96422dc83c40.zip |
Add caseless version of rabin-karp substring search
-rw-r--r-- | src/libutil/str_util.c | 40 | ||||
-rw-r--r-- | src/libutil/str_util.h | 11 |
2 files changed, 51 insertions, 0 deletions
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 8487e8ef2..62d71dd2c 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -1119,6 +1119,46 @@ rspamd_substring_search (const gchar *in, gsize inlen, } goffset +rspamd_substring_search_caseless (const gchar *in, gsize inlen, + const gchar *srch, gsize srchlen) +{ + gint d, hash_srch, hash_in; + gsize i, j; + gchar c1, c2; + + if (inlen < srchlen) { + return -1; + } + + /* Preprocessing */ + for (d = i = 1; i < srchlen; ++i) { + /* computes d = 2^(m-1) with the left-shift operator */ + d = (d << 1); + } + + for (hash_in = hash_srch = i = 0; i < srchlen; ++i) { + hash_srch = ((hash_srch << 1) + g_ascii_tolower (srch[i])); + hash_in = ((hash_in << 1) + g_ascii_tolower (in[i])); + } + + /* Searching */ + j = 0; + while (j <= inlen - srchlen) { + + if (hash_srch == hash_in && g_ascii_strncasecmp (srch, in + j, srchlen) == 0) { + return (goffset) j; + } + + c1 = g_ascii_tolower (in[j]); + c2 = g_ascii_tolower (in[j + srchlen]); + hash_in = RKHASH (c1, c2, hash_in); + ++j; + } + + return -1; +} + +goffset rspamd_string_find_eoh (GString *input) { const gchar *p, *c = NULL, *end; diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index 8ea5e3fce..da794bc16 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -189,6 +189,17 @@ GString *rspamd_header_value_fold (const gchar *name, goffset rspamd_substring_search (const gchar *in, gsize inlen, const gchar *srch, gsize srchlen); +/** + * Search for a substring `srch` in the text `in` using Karp-Rabin algorithm in caseless matter (ASCII only) + * @param in input + * @param inlen input len + * @param srch search string + * @param srchlen length of the search string + * @return position of the first substring match or (-1) if not found + */ +goffset rspamd_substring_search_caseless (const gchar *in, gsize inlen, + const gchar *srch, gsize srchlen); + /** * Search for end-of-headers mark in the input string. Returns position just after |