diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-12-19 12:15:34 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-12-19 12:15:34 +0000 |
commit | 123886d49937ccefc547079f57ca4ef2f11bc312 (patch) | |
tree | a124a38e3bca532cb52be6d24b7db6593428c5e6 /src | |
parent | dd1134f06b174f1829d81b67d7c1a6aeaf8bcc1e (diff) | |
download | rspamd-123886d49937ccefc547079f57ca4ef2f11bc312.tar.gz rspamd-123886d49937ccefc547079f57ca4ef2f11bc312.zip |
Several improvements to the shingles calculations.
Diffstat (limited to 'src')
-rw-r--r-- | src/libutil/shingles.c | 12 | ||||
-rw-r--r-- | src/libutil/shingles.h | 6 |
2 files changed, 9 insertions, 9 deletions
diff --git a/src/libutil/shingles.c b/src/libutil/shingles.c index 9421c1529..fa49fdadd 100644 --- a/src/libutil/shingles.c +++ b/src/libutil/shingles.c @@ -63,6 +63,7 @@ rspamd_shingles_generate (GArray *input, } blake2b_init (&bs, BLAKE2B_OUTBYTES); + memset (h, 0, sizeof (h)); cur_key = key; out_key = (guchar *)&keys[0]; @@ -85,14 +86,13 @@ rspamd_shingles_generate (GArray *input, blake2b_init (&bs, BLAKE2B_OUTBYTES); cur_key = out_key; out_key += 16; - memset (&h[i], 0, sizeof (h[0])); sip24_init (&h[i], &keys[i]); } /* Now parse input words into a vector of hashes using rolling window */ - for (i = 0; i < (gint)input->len; i ++) { - if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len - 1) { - for (j = beg; j <= i; j ++) { + for (i = 0; i <= (gint)input->len; i ++) { + if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) { + for (j = beg; j < i; j ++) { rspamd_shingles_update_row (&g_array_index (input, rspamd_fstring_t, j), h); } @@ -114,7 +114,7 @@ rspamd_shingles_generate (GArray *input, /* Now we need to filter all hashes and make a shingles result */ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { res->hashes[i] = filter ((guint64 *)hashes[i]->data, hashes[i]->len, - filterd); + i, key, filterd); g_array_free (hashes[i], TRUE); } @@ -124,7 +124,7 @@ rspamd_shingles_generate (GArray *input, guint64 rspamd_shingles_default_filter (guint64 *input, gsize count, - gpointer ud) + gint shno, const guchar *key, gpointer ud) { guint64 minimal = G_MAXUINT64; gsize i; diff --git a/src/libutil/shingles.h b/src/libutil/shingles.h index 61b3b24c3..d9025946e 100644 --- a/src/libutil/shingles.h +++ b/src/libutil/shingles.h @@ -26,7 +26,7 @@ #include "config.h" #include "mem_pool.h" -#define RSPAMD_SHINGLE_SIZE 23 +#define RSPAMD_SHINGLE_SIZE 32 struct rspamd_shingle { guint64 hashes[RSPAMD_SHINGLE_SIZE]; @@ -39,7 +39,7 @@ struct rspamd_shingle { * @return shingle value */ typedef guint64 (*rspamd_shingles_filter) (guint64 *input, gsize count, - gpointer ud); + gint shno, const guchar *key, gpointer ud); /** * Generate shingles from the input of fixed size strings using lemmatizer @@ -71,6 +71,6 @@ gdouble rspamd_shingles_compare (const struct rspamd_shingle *a, * Default filtering function */ guint64 rspamd_shingles_default_filter (guint64 *input, gsize count, - gpointer ud); + gint shno, const guchar *key, gpointer ud); #endif /* SHINGLES_H_ */ |