summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-04-01 13:21:26 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-04-01 13:21:26 +0100
commitff5ab129dd4d8b9960621d7318e29c28e8c8d0b9 (patch)
tree2b1d65e5953f72b4153d5294e716ff4f8d70158b
parentc82249130c10e51a541b879a193ca16c7f182c3e (diff)
downloadrspamd-ff5ab129dd4d8b9960621d7318e29c28e8c8d0b9.tar.gz
rspamd-ff5ab129dd4d8b9960621d7318e29c28e8c8d0b9.zip
Add compatibility layer for tokenization.
-rw-r--r--src/libstat/stat_config.c14
-rw-r--r--src/libstat/tokenizers/osb.c65
-rw-r--r--src/libstat/tokenizers/tokenizers.c2
-rw-r--r--src/libstat/tokenizers/tokenizers.h14
4 files changed, 88 insertions, 7 deletions
diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c
index 4be313e7b..8c935ec4e 100644
--- a/src/libstat/stat_config.c
+++ b/src/libstat/stat_config.c
@@ -41,8 +41,18 @@ static struct rspamd_stat_classifier stat_classifiers[] = {
};
static struct rspamd_stat_tokenizer stat_tokenizers[] = {
- {"osb-text", rspamd_tokenizer_osb},
- {"osb", rspamd_tokenizer_osb}
+ {
+ .name = "osb-text",
+ .get_config = rspamd_tokenizer_osb_get_config,
+ .compatible_config = rspamd_tokenizer_osb_compatible_config,
+ .tokenize_func = rspamd_tokenizer_osb
+ },
+ {
+ .name = "osb",
+ .get_config = rspamd_tokenizer_osb_get_config,
+ .compatible_config = rspamd_tokenizer_osb_compatible_config,
+ .tokenize_func = rspamd_tokenizer_osb
+ },
};
static struct rspamd_stat_backend stat_backends[] = {
diff --git a/src/libstat/tokenizers/osb.c b/src/libstat/tokenizers/osb.c
index 65d367455..dc6808753 100644
--- a/src/libstat/tokenizers/osb.c
+++ b/src/libstat/tokenizers/osb.c
@@ -95,8 +95,15 @@ rspamd_tokenizer_osb_config_from_ucl (rspamd_mempool_t * pool,
guchar *key = NULL;
gsize keylen;
+
+ if (pool != NULL) {
+ cf = rspamd_mempool_alloc (pool, sizeof (*cf));
+ }
+ else {
+ cf = g_slice_alloc (sizeof (*cf));
+ }
+
/* Use default config */
- cf = rspamd_mempool_alloc (pool, sizeof (*cf));
def = rspamd_tokenizer_osb_default_config ();
memcpy (cf, def, sizeof (*cf));
@@ -146,7 +153,61 @@ rspamd_tokenizer_osb_config_from_ucl (rspamd_mempool_t * pool,
return cf;
}
-int
+gpointer
+rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf,
+ gsize *len)
+{
+ struct rspamd_osb_tokenizer_config *osb_cf, *def;
+
+ if (cf != NULL && cf->opts != NULL) {
+ osb_cf = rspamd_tokenizer_osb_config_from_ucl (NULL, cf->opts);
+ }
+ else {
+ def = rspamd_tokenizer_osb_default_config ();
+ osb_cf = g_slice_alloc (sizeof (*osb_cf));
+ memcpy (osb_cf, def, sizeof (*osb_cf));
+ }
+
+ if (len != NULL) {
+ *len = sizeof (*osb_cf);
+ }
+
+ return osb_cf;
+}
+
+gboolean
+rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf,
+ gpointer ptr, gsize len)
+{
+ struct rspamd_osb_tokenizer_config *osb_cf, *test_cf;
+ gboolean ret = FALSE;
+
+ test_cf = rspamd_tokenizer_osb_get_config (cf, NULL);
+
+ if (len == sizeof (*osb_cf)) {
+ osb_cf = ptr;
+
+ if (memcmp (osb_cf, osb_tokenizer_magic, sizeof (osb_tokenizer_magic)) != 0) {
+ ret = test_cf->ht == RSPAMD_OSB_HASH_COMPAT;
+ }
+ else {
+ if (osb_cf->version == DEFAULT_OSB_VERSION) {
+ /* We can compare them directly now */
+ ret = memcmp (osb_cf, test_cf, sizeof (*osb_cf)) == 0;
+ }
+ }
+ }
+ else {
+ /* We are compatible now merely with fallback config */
+ if (test_cf->ht == RSPAMD_OSB_HASH_COMPAT) {
+ ret = TRUE;
+ }
+ }
+
+ return ret;
+}
+
+gint
rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
rspamd_mempool_t * pool,
GArray * input,
diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c
index 63452dfb1..b9a4bd68b 100644
--- a/src/libstat/tokenizers/tokenizers.c
+++ b/src/libstat/tokenizers/tokenizers.c
@@ -59,7 +59,7 @@ const gchar t_delimiters[255] = {
0, 0, 0, 0, 0
};
-int
+gint
token_node_compare_func (gconstpointer a, gconstpointer b)
{
const rspamd_token_t *aa = a, *bb = b;
diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h
index a93f5329d..1cf3a1589 100644
--- a/src/libstat/tokenizers/tokenizers.h
+++ b/src/libstat/tokenizers/tokenizers.h
@@ -12,6 +12,9 @@
/* Common tokenizer structure */
struct rspamd_stat_tokenizer {
gchar *name;
+ gpointer (*get_config) (struct rspamd_tokenizer_config *cf, gsize *len);
+ gboolean (*compatible_config) (struct rspamd_tokenizer_config *cf,
+ gpointer ptr, gsize len);
gint (*tokenize_func)(struct rspamd_tokenizer_config *cf,
rspamd_mempool_t *pool,
GArray *words,
@@ -20,7 +23,7 @@ struct rspamd_stat_tokenizer {
};
/* Compare two token nodes */
-int token_node_compare_func (gconstpointer a, gconstpointer b);
+gint token_node_compare_func (gconstpointer a, gconstpointer b);
/* Tokenize text into array of words (rspamd_fstring_t type) */
@@ -28,12 +31,19 @@ GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf,
gsize min_len, GList **exceptions);
/* OSB tokenize function */
-int rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
+gint rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
rspamd_mempool_t *pool,
GArray *input,
GTree *tokens,
gboolean is_utf);
+gpointer rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf,
+ gsize *len);
+
+gboolean
+rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf,
+ gpointer ptr, gsize len);
+
#endif
/*
* vi:ts=4