]> source.dussan.org Git - rspamd.git/commitdiff
Tune shingles settings.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 17 Dec 2014 18:29:33 +0000 (18:29 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 17 Dec 2014 18:29:33 +0000 (18:29 +0000)
src/libutil/shingles.c
src/libutil/shingles.h

index 653fa5356eeb2337ebd22c9635e15939f3c83388..3775b9e16065acd0c43ee33787953de77910da1c 100644 (file)
@@ -25,7 +25,7 @@
 #include "fstring.h"
 #include "siphash.h"
 
-#define SHINGLES_WINDOW 10
+#define SHINGLES_WINDOW 3
 
 static void
 rspamd_shingles_update_row (rspamd_fstring_t *in, struct siphash *h)
@@ -84,6 +84,7 @@ rspamd_shingles_generate (GArray *input,
                g_checksum_reset (cksum);
                cur_key = out_key;
                out_key += 16;
+               memset (&h[i], 0, sizeof (h[0]));
                sip24_init (&h[i], &keys[i]);
        }
 
@@ -96,15 +97,17 @@ rspamd_shingles_generate (GArray *input,
                                rspamd_shingles_update_row (&g_array_index (input,
                                                rspamd_fstring_t, j), h);
                        }
+                       beg++;
 
                        /* Now we need to create a new row here */
                        for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) {
                                guint64 val;
 
-                               val = sip24_final (&h[i]);
+                               val = sip24_final (&h[j]);
                                /* Reinit siphash state */
-                               sip24_init (&h[i], &keys[i]);
-                               g_array_append_val (hashes[i], val);
+                               memset (&h[j], 0, sizeof (h[0]));
+                               sip24_init (&h[j], &keys[j]);
+                               g_array_append_val (hashes[j], val);
                        }
                }
        }
@@ -148,5 +151,5 @@ gdouble rspamd_shingles_compare (const struct rspamd_shingle *a,
                }
        }
 
-       return (gdouble)common / 84.0;
+       return (gdouble)common / (gdouble)RSPAMD_SHINGLE_SIZE;
 }
index 39e9cf38dd79a727c24e91a32d5f6b25640b1fe1..61b3b24c306031147502beb7dd9beada23f814db 100644 (file)
@@ -26,7 +26,7 @@
 #include "config.h"
 #include "mem_pool.h"
 
-#define RSPAMD_SHINGLE_SIZE 84
+#define RSPAMD_SHINGLE_SIZE 23
 
 struct rspamd_shingle {
        guint64 hashes[RSPAMD_SHINGLE_SIZE];