From: Vsevolod Stakhov Date: Wed, 16 Sep 2015 14:04:24 +0000 (+0100) Subject: Add Karp-Rabin algorithm for substrings search. X-Git-Tag: 1.0.0~22 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=2c557496aa34962fc92abe6a2e1a1fcf391a4cb3;p=rspamd.git Add Karp-Rabin algorithm for substrings search. --- diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 2203cf0d4..00f689bff 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -1034,3 +1034,38 @@ rspamd_header_value_fold (const gchar *name, return res; } + +#define RKHASH(a, b, h) ((((h) - (a)*d) << 1) + (b)) + +goffset +rspamd_substring_search (const gchar *in, gsize inlen, + const gchar *srch, gsize srchlen) +{ + gint d, hash_srch, hash_in; + gsize i, j; + + /* Preprocessing */ + for (d = i = 1; i < srchlen; ++i) { + /* computes d = 2^(m-1) with the left-shift operator */ + d = (d << 1); + } + + for (hash_in = hash_srch = i = 0; i < srchlen; ++i) { + hash_srch = ((hash_srch << 1) + srch[i]); + hash_in = ((hash_in << 1) + in[i]); + } + + /* Searching */ + j = 0; + while (j <= inlen - srchlen) { + + if (hash_srch == hash_in && memcmp (srch, in + j, srchlen) == 0) { + return (goffset)j; + } + + hash_in = RKHASH (in[j], in[j + srchlen], hash_in); + ++j; + } + + return -1; +} diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index 1d0a9eadb..3babc71f9 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -172,4 +172,15 @@ GString *rspamd_header_value_fold (const gchar *name, const gchar *value, guint fold_max); +/** + * Search for a substring `srch` in the text `in` using Karp-Rabin algorithm + * @param in input + * @param inlen input len + * @param srch search string + * @param srchlen length of the search string + * @return position of the first substring match or (-1) if not found + */ +goffset rspamd_substring_search (const gchar *in, gsize inlen, + const gchar *srch, gsize srchlen); + #endif /* SRC_LIBUTIL_STR_UTIL_H_ */