]> source.dussan.org Git - rspamd.git/commitdiff
New statistics token definition.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 17 Jan 2015 22:11:02 +0000 (22:11 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 18 Jan 2015 23:05:15 +0000 (23:05 +0000)
src/libstat/tokenizers.h
src/libstat/tokenizers/osb.c
src/libstat/tokenizers/tokenizers.c

index c0d2e893464ca921c0c62fe3ddd96b2ba30b774a..7943c212aaec5a8299200fef6172c9e1eba4aada 100644 (file)
@@ -8,12 +8,19 @@
 
 /* Size for features pipe */
 #define FEATURE_WINDOW_SIZE 5
+#define MAX_DATA_LEN 64
+#define MAX_VALUES 32
 
-typedef struct token_node_s {
-       guint32 h1;
-       guint32 h2;
+struct token_result {
        double value;
-       uintptr_t extra;
+       struct rspamd_statfile_config *st;
+};
+
+typedef struct token_node_s {
+       guchar data[MAX_DATA_LEN];
+       guint datalen;
+       struct token_result *results;
+       guint results_len;
 } token_node_t;
 
 /* Common tokenizer structure */
index 9dd12a8dd2e3d175982668561f16b7436e79eadf..4016842b6af52ae0d0e5366f72d6ee73f533f6ea 100644 (file)
@@ -83,12 +83,9 @@ osb_tokenize_text (struct tokenizer *tokenizer,
                                h2 = hashpipe[0] * primes[1] + hashpipe[i] *
                                        primes[(i << 1) - 1];
                                new = rspamd_mempool_alloc0 (pool, sizeof (token_node_t));
-                               new->h1 = h1;
-                               new->h2 = h2;
-                               if (save_token) {
-                                       new->extra =
-                                               (uintptr_t)rspamd_mempool_fstrdup (pool, token);
-                               }
+                               new->datalen = sizeof(gint32) * 2;
+                               memcpy(new->data, &h1, sizeof(h1));
+                               memcpy(new->data + sizeof(h1), &h2, sizeof(h2));
 
                                if (g_tree_lookup (*tree, new) == NULL) {
                                        g_tree_insert (*tree, new, new);
@@ -102,11 +99,9 @@ osb_tokenize_text (struct tokenizer *tokenizer,
                        h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1];
                        h2 = hashpipe[0] * primes[1] + hashpipe[i] * primes[(i << 1) - 1];
                        new = rspamd_mempool_alloc0 (pool, sizeof (token_node_t));
-                       new->h1 = h1;
-                       new->h2 = h2;
-                       if (save_token) {
-                               new->extra = (uintptr_t)rspamd_mempool_fstrdup (pool, token);
-                       }
+                       new->datalen = sizeof(gint32) * 2;
+                       memcpy(new->data, &h1, sizeof(h1));
+                       memcpy(new->data + sizeof(h1), &h2, sizeof(h2));
 
                        if (g_tree_lookup (*tree, new) == NULL) {
                                g_tree_insert (*tree, new, new);
index ce221397d656edcf40ed47c5b201b092b1005b6a..7d00f693aba0bf60a2ec9aec231d130ce5377b0a 100644 (file)
@@ -94,11 +94,11 @@ token_node_compare_func (gconstpointer a, gconstpointer b)
 {
        const token_node_t *aa = a, *bb = b;
 
-       if (aa->h1 == bb->h1) {
-               return aa->h2 - bb->h2;
+       if (aa->datalen != bb->datalen) {
+               return aa->datalen - bb->datalen;
        }
 
-       return aa->h1 - bb->h1;
+       return memcmp (aa->data, bb->data, aa->datalen);
 }
 
 /* Get next word from specified f_str_t buf */