summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-09-16 15:04:24 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-09-16 15:04:24 +0100
commit2c557496aa34962fc92abe6a2e1a1fcf391a4cb3 (patch)
tree181e0299003821e2058a655953624006f8d1a693
parent7351103009045a1bb2e291dfd137e328a8e845dd (diff)
downloadrspamd-2c557496aa34962fc92abe6a2e1a1fcf391a4cb3.tar.gz
rspamd-2c557496aa34962fc92abe6a2e1a1fcf391a4cb3.zip
Add Karp-Rabin algorithm for substrings search.
-rw-r--r--src/libutil/str_util.c35
-rw-r--r--src/libutil/str_util.h11
2 files changed, 46 insertions, 0 deletions
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index 2203cf0d4..00f689bff 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -1034,3 +1034,38 @@ rspamd_header_value_fold (const gchar *name,
return res;
}
+
+#define RKHASH(a, b, h) ((((h) - (a)*d) << 1) + (b))
+
+goffset
+rspamd_substring_search (const gchar *in, gsize inlen,
+ const gchar *srch, gsize srchlen)
+{
+ gint d, hash_srch, hash_in;
+ gsize i, j;
+
+ /* Preprocessing */
+ for (d = i = 1; i < srchlen; ++i) {
+ /* computes d = 2^(m-1) with the left-shift operator */
+ d = (d << 1);
+ }
+
+ for (hash_in = hash_srch = i = 0; i < srchlen; ++i) {
+ hash_srch = ((hash_srch << 1) + srch[i]);
+ hash_in = ((hash_in << 1) + in[i]);
+ }
+
+ /* Searching */
+ j = 0;
+ while (j <= inlen - srchlen) {
+
+ if (hash_srch == hash_in && memcmp (srch, in + j, srchlen) == 0) {
+ return (goffset)j;
+ }
+
+ hash_in = RKHASH (in[j], in[j + srchlen], hash_in);
+ ++j;
+ }
+
+ return -1;
+}
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
index 1d0a9eadb..3babc71f9 100644
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -172,4 +172,15 @@ GString *rspamd_header_value_fold (const gchar *name,
const gchar *value,
guint fold_max);
+/**
+ * Search for a substring `srch` in the text `in` using Karp-Rabin algorithm
+ * @param in input
+ * @param inlen input len
+ * @param srch search string
+ * @param srchlen length of the search string
+ * @return position of the first substring match or (-1) if not found
+ */
+goffset rspamd_substring_search (const gchar *in, gsize inlen,
+ const gchar *srch, gsize srchlen);
+
#endif /* SRC_LIBUTIL_STR_UTIL_H_ */