aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-29 09:47:34 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-29 09:47:34 +0000
commit7b71ab56637b83470fc0bbcfab9f96422dc83c40 (patch)
tree3b36c82ed8d4e590c499a34bc37bcc239e3062f2
parent66efb875d0e363c230f1e30d826694b0719a0d40 (diff)
downloadrspamd-7b71ab56637b83470fc0bbcfab9f96422dc83c40.tar.gz
rspamd-7b71ab56637b83470fc0bbcfab9f96422dc83c40.zip
Add caseless version of rabin-karp substring search
-rw-r--r--src/libutil/str_util.c40
-rw-r--r--src/libutil/str_util.h11
2 files changed, 51 insertions, 0 deletions
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index 8487e8ef2..62d71dd2c 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -1119,6 +1119,46 @@ rspamd_substring_search (const gchar *in, gsize inlen,
}
goffset
+rspamd_substring_search_caseless (const gchar *in, gsize inlen,
+ const gchar *srch, gsize srchlen)
+{
+ gint d, hash_srch, hash_in;
+ gsize i, j;
+ gchar c1, c2;
+
+ if (inlen < srchlen) {
+ return -1;
+ }
+
+ /* Preprocessing */
+ for (d = i = 1; i < srchlen; ++i) {
+ /* computes d = 2^(m-1) with the left-shift operator */
+ d = (d << 1);
+ }
+
+ for (hash_in = hash_srch = i = 0; i < srchlen; ++i) {
+ hash_srch = ((hash_srch << 1) + g_ascii_tolower (srch[i]));
+ hash_in = ((hash_in << 1) + g_ascii_tolower (in[i]));
+ }
+
+ /* Searching */
+ j = 0;
+ while (j <= inlen - srchlen) {
+
+ if (hash_srch == hash_in && g_ascii_strncasecmp (srch, in + j, srchlen) == 0) {
+ return (goffset) j;
+ }
+
+ c1 = g_ascii_tolower (in[j]);
+ c2 = g_ascii_tolower (in[j + srchlen]);
+ hash_in = RKHASH (c1, c2, hash_in);
+ ++j;
+ }
+
+ return -1;
+}
+
+goffset
rspamd_string_find_eoh (GString *input)
{
const gchar *p, *c = NULL, *end;
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
index 8ea5e3fce..da794bc16 100644
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -189,6 +189,17 @@ GString *rspamd_header_value_fold (const gchar *name,
goffset rspamd_substring_search (const gchar *in, gsize inlen,
const gchar *srch, gsize srchlen);
+/**
+ * Search for a substring `srch` in the text `in` using Karp-Rabin algorithm in caseless matter (ASCII only)
+ * @param in input
+ * @param inlen input len
+ * @param srch search string
+ * @param srchlen length of the search string
+ * @return position of the first substring match or (-1) if not found
+ */
+goffset rspamd_substring_search_caseless (const gchar *in, gsize inlen,
+ const gchar *srch, gsize srchlen);
+
/**
* Search for end-of-headers mark in the input string. Returns position just after