]> source.dussan.org Git - rspamd.git/commitdiff
Add compatibility layer for tokenization.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 1 Apr 2015 12:21:26 +0000 (13:21 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 1 Apr 2015 12:21:26 +0000 (13:21 +0100)
src/libstat/stat_config.c
src/libstat/tokenizers/osb.c
src/libstat/tokenizers/tokenizers.c
src/libstat/tokenizers/tokenizers.h

index 4be313e7b06ac4dbfb24efc48fab72d0ce9f8176..8c935ec4e7a5454e56cad2a9e2d30929463bb0d8 100644 (file)
@@ -41,8 +41,18 @@ static struct rspamd_stat_classifier stat_classifiers[] = {
 };
 
 static struct rspamd_stat_tokenizer stat_tokenizers[] = {
-       {"osb-text", rspamd_tokenizer_osb},
-       {"osb", rspamd_tokenizer_osb}
+       {
+               .name = "osb-text",
+               .get_config = rspamd_tokenizer_osb_get_config,
+               .compatible_config = rspamd_tokenizer_osb_compatible_config,
+               .tokenize_func = rspamd_tokenizer_osb
+       },
+       {
+               .name = "osb",
+               .get_config = rspamd_tokenizer_osb_get_config,
+               .compatible_config = rspamd_tokenizer_osb_compatible_config,
+               .tokenize_func = rspamd_tokenizer_osb
+       },
 };
 
 static struct rspamd_stat_backend stat_backends[] = {
index 65d36745538ecdda5ef368701c7fd0d473f69523..dc6808753794c1dcb39f44e3b704d47cf7d38153 100644 (file)
@@ -95,8 +95,15 @@ rspamd_tokenizer_osb_config_from_ucl (rspamd_mempool_t * pool,
        guchar *key = NULL;
        gsize keylen;
 
+
+       if (pool != NULL) {
+               cf = rspamd_mempool_alloc (pool, sizeof (*cf));
+       }
+       else {
+               cf = g_slice_alloc (sizeof (*cf));
+       }
+
        /* Use default config */
-       cf = rspamd_mempool_alloc (pool, sizeof (*cf));
        def = rspamd_tokenizer_osb_default_config ();
        memcpy (cf, def, sizeof (*cf));
 
@@ -146,7 +153,61 @@ rspamd_tokenizer_osb_config_from_ucl (rspamd_mempool_t * pool,
        return cf;
 }
 
-int
+gpointer
+rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf,
+               gsize *len)
+{
+       struct rspamd_osb_tokenizer_config *osb_cf, *def;
+
+       if (cf != NULL && cf->opts != NULL) {
+               osb_cf = rspamd_tokenizer_osb_config_from_ucl (NULL, cf->opts);
+       }
+       else {
+               def = rspamd_tokenizer_osb_default_config ();
+               osb_cf = g_slice_alloc (sizeof (*osb_cf));
+               memcpy (osb_cf, def, sizeof (*osb_cf));
+       }
+
+       if (len != NULL) {
+               *len = sizeof (*osb_cf);
+       }
+
+       return osb_cf;
+}
+
+gboolean
+rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf,
+                       gpointer ptr, gsize len)
+{
+       struct rspamd_osb_tokenizer_config *osb_cf, *test_cf;
+       gboolean ret = FALSE;
+
+       test_cf = rspamd_tokenizer_osb_get_config (cf, NULL);
+
+       if (len == sizeof (*osb_cf)) {
+               osb_cf = ptr;
+
+               if (memcmp (osb_cf, osb_tokenizer_magic, sizeof (osb_tokenizer_magic)) != 0) {
+                       ret = test_cf->ht == RSPAMD_OSB_HASH_COMPAT;
+               }
+               else {
+                       if (osb_cf->version == DEFAULT_OSB_VERSION) {
+                               /* We can compare them directly now */
+                               ret = memcmp (osb_cf, test_cf, sizeof (*osb_cf)) == 0;
+                       }
+               }
+       }
+       else {
+               /* We are compatible now merely with fallback config */
+               if (test_cf->ht == RSPAMD_OSB_HASH_COMPAT) {
+                       ret = TRUE;
+               }
+       }
+
+       return ret;
+}
+
+gint
 rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
        rspamd_mempool_t * pool,
        GArray * input,
index 63452dfb1b1aebb79f7e81e14ad4cb3e5116f6f8..b9a4bd68b9f6d40f1e5239b3850ab535b4e19e10 100644 (file)
@@ -59,7 +59,7 @@ const gchar t_delimiters[255] = {
        0, 0, 0, 0, 0
 };
 
-int
+gint
 token_node_compare_func (gconstpointer a, gconstpointer b)
 {
        const rspamd_token_t *aa = a, *bb = b;
index a93f5329d0e1d1ff45ddc8d23514bd5374207c56..1cf3a1589e62c0637bcfba06de256acab9e5b331 100644 (file)
@@ -12,6 +12,9 @@
 /* Common tokenizer structure */
 struct rspamd_stat_tokenizer {
        gchar *name;
+       gpointer (*get_config) (struct rspamd_tokenizer_config *cf, gsize *len);
+       gboolean (*compatible_config) (struct rspamd_tokenizer_config *cf,
+                       gpointer ptr, gsize len);
        gint (*tokenize_func)(struct rspamd_tokenizer_config *cf,
                        rspamd_mempool_t *pool,
                        GArray *words,
@@ -20,7 +23,7 @@ struct rspamd_stat_tokenizer {
 };
 
 /* Compare two token nodes */
-int token_node_compare_func (gconstpointer a, gconstpointer b);
+gint token_node_compare_func (gconstpointer a, gconstpointer b);
 
 
 /* Tokenize text into array of words (rspamd_fstring_t type) */
@@ -28,12 +31,19 @@ GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf,
                gsize min_len, GList **exceptions);
 
 /* OSB tokenize function */
-int rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
+gint rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
        rspamd_mempool_t *pool,
        GArray *input,
        GTree *tokens,
        gboolean is_utf);
 
+gpointer rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf,
+               gsize *len);
+
+gboolean
+rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf,
+                       gpointer ptr, gsize len);
+
 #endif
 /*
  * vi:ts=4