diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-09-16 15:04:24 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-09-16 15:04:24 +0100 |
commit | 2c557496aa34962fc92abe6a2e1a1fcf391a4cb3 (patch) | |
tree | 181e0299003821e2058a655953624006f8d1a693 | |
parent | 7351103009045a1bb2e291dfd137e328a8e845dd (diff) | |
download | rspamd-2c557496aa34962fc92abe6a2e1a1fcf391a4cb3.tar.gz rspamd-2c557496aa34962fc92abe6a2e1a1fcf391a4cb3.zip |
Add Karp-Rabin algorithm for substrings search.
-rw-r--r-- | src/libutil/str_util.c | 35 | ||||
-rw-r--r-- | src/libutil/str_util.h | 11 |
2 files changed, 46 insertions, 0 deletions
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 2203cf0d4..00f689bff 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -1034,3 +1034,38 @@ rspamd_header_value_fold (const gchar *name, return res; } + +#define RKHASH(a, b, h) ((((h) - (a)*d) << 1) + (b)) + +goffset +rspamd_substring_search (const gchar *in, gsize inlen, + const gchar *srch, gsize srchlen) +{ + gint d, hash_srch, hash_in; + gsize i, j; + + /* Preprocessing */ + for (d = i = 1; i < srchlen; ++i) { + /* computes d = 2^(m-1) with the left-shift operator */ + d = (d << 1); + } + + for (hash_in = hash_srch = i = 0; i < srchlen; ++i) { + hash_srch = ((hash_srch << 1) + srch[i]); + hash_in = ((hash_in << 1) + in[i]); + } + + /* Searching */ + j = 0; + while (j <= inlen - srchlen) { + + if (hash_srch == hash_in && memcmp (srch, in + j, srchlen) == 0) { + return (goffset)j; + } + + hash_in = RKHASH (in[j], in[j + srchlen], hash_in); + ++j; + } + + return -1; +} diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index 1d0a9eadb..3babc71f9 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -172,4 +172,15 @@ GString *rspamd_header_value_fold (const gchar *name, const gchar *value, guint fold_max); +/** + * Search for a substring `srch` in the text `in` using Karp-Rabin algorithm + * @param in input + * @param inlen input len + * @param srch search string + * @param srchlen length of the search string + * @return position of the first substring match or (-1) if not found + */ +goffset rspamd_substring_search (const gchar *in, gsize inlen, + const gchar *srch, gsize srchlen); + #endif /* SRC_LIBUTIL_STR_UTIL_H_ */ |