From ff5ab129dd4d8b9960621d7318e29c28e8c8d0b9 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 1 Apr 2015 13:21:26 +0100 Subject: [PATCH] Add compatibility layer for tokenization. --- src/libstat/stat_config.c | 14 ++++++- src/libstat/tokenizers/osb.c | 65 ++++++++++++++++++++++++++++- src/libstat/tokenizers/tokenizers.c | 2 +- src/libstat/tokenizers/tokenizers.h | 14 ++++++- 4 files changed, 88 insertions(+), 7 deletions(-) diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c index 4be313e7b..8c935ec4e 100644 --- a/src/libstat/stat_config.c +++ b/src/libstat/stat_config.c @@ -41,8 +41,18 @@ static struct rspamd_stat_classifier stat_classifiers[] = { }; static struct rspamd_stat_tokenizer stat_tokenizers[] = { - {"osb-text", rspamd_tokenizer_osb}, - {"osb", rspamd_tokenizer_osb} + { + .name = "osb-text", + .get_config = rspamd_tokenizer_osb_get_config, + .compatible_config = rspamd_tokenizer_osb_compatible_config, + .tokenize_func = rspamd_tokenizer_osb + }, + { + .name = "osb", + .get_config = rspamd_tokenizer_osb_get_config, + .compatible_config = rspamd_tokenizer_osb_compatible_config, + .tokenize_func = rspamd_tokenizer_osb + }, }; static struct rspamd_stat_backend stat_backends[] = { diff --git a/src/libstat/tokenizers/osb.c b/src/libstat/tokenizers/osb.c index 65d367455..dc6808753 100644 --- a/src/libstat/tokenizers/osb.c +++ b/src/libstat/tokenizers/osb.c @@ -95,8 +95,15 @@ rspamd_tokenizer_osb_config_from_ucl (rspamd_mempool_t * pool, guchar *key = NULL; gsize keylen; + + if (pool != NULL) { + cf = rspamd_mempool_alloc (pool, sizeof (*cf)); + } + else { + cf = g_slice_alloc (sizeof (*cf)); + } + /* Use default config */ - cf = rspamd_mempool_alloc (pool, sizeof (*cf)); def = rspamd_tokenizer_osb_default_config (); memcpy (cf, def, sizeof (*cf)); @@ -146,7 +153,61 @@ rspamd_tokenizer_osb_config_from_ucl (rspamd_mempool_t * pool, return cf; } -int +gpointer +rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf, + gsize *len) +{ + struct rspamd_osb_tokenizer_config *osb_cf, *def; + + if (cf != NULL && cf->opts != NULL) { + osb_cf = rspamd_tokenizer_osb_config_from_ucl (NULL, cf->opts); + } + else { + def = rspamd_tokenizer_osb_default_config (); + osb_cf = g_slice_alloc (sizeof (*osb_cf)); + memcpy (osb_cf, def, sizeof (*osb_cf)); + } + + if (len != NULL) { + *len = sizeof (*osb_cf); + } + + return osb_cf; +} + +gboolean +rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf, + gpointer ptr, gsize len) +{ + struct rspamd_osb_tokenizer_config *osb_cf, *test_cf; + gboolean ret = FALSE; + + test_cf = rspamd_tokenizer_osb_get_config (cf, NULL); + + if (len == sizeof (*osb_cf)) { + osb_cf = ptr; + + if (memcmp (osb_cf, osb_tokenizer_magic, sizeof (osb_tokenizer_magic)) != 0) { + ret = test_cf->ht == RSPAMD_OSB_HASH_COMPAT; + } + else { + if (osb_cf->version == DEFAULT_OSB_VERSION) { + /* We can compare them directly now */ + ret = memcmp (osb_cf, test_cf, sizeof (*osb_cf)) == 0; + } + } + } + else { + /* We are compatible now merely with fallback config */ + if (test_cf->ht == RSPAMD_OSB_HASH_COMPAT) { + ret = TRUE; + } + } + + return ret; +} + +gint rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf, rspamd_mempool_t * pool, GArray * input, diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index 63452dfb1..b9a4bd68b 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -59,7 +59,7 @@ const gchar t_delimiters[255] = { 0, 0, 0, 0, 0 }; -int +gint token_node_compare_func (gconstpointer a, gconstpointer b) { const rspamd_token_t *aa = a, *bb = b; diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h index a93f5329d..1cf3a1589 100644 --- a/src/libstat/tokenizers/tokenizers.h +++ b/src/libstat/tokenizers/tokenizers.h @@ -12,6 +12,9 @@ /* Common tokenizer structure */ struct rspamd_stat_tokenizer { gchar *name; + gpointer (*get_config) (struct rspamd_tokenizer_config *cf, gsize *len); + gboolean (*compatible_config) (struct rspamd_tokenizer_config *cf, + gpointer ptr, gsize len); gint (*tokenize_func)(struct rspamd_tokenizer_config *cf, rspamd_mempool_t *pool, GArray *words, @@ -20,7 +23,7 @@ struct rspamd_stat_tokenizer { }; /* Compare two token nodes */ -int token_node_compare_func (gconstpointer a, gconstpointer b); +gint token_node_compare_func (gconstpointer a, gconstpointer b); /* Tokenize text into array of words (rspamd_fstring_t type) */ @@ -28,12 +31,19 @@ GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf, gsize min_len, GList **exceptions); /* OSB tokenize function */ -int rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf, +gint rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf, rspamd_mempool_t *pool, GArray *input, GTree *tokens, gboolean is_utf); +gpointer rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf, + gsize *len); + +gboolean +rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf, + gpointer ptr, gsize len); + #endif /* * vi:ts=4 -- 2.39.5