diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/libstat/tokenizers/osb.c | 21 | ||||
-rw-r--r-- | src/libstat/tokenizers/tokenizers.c | 2 | ||||
-rw-r--r-- | src/libstat/tokenizers/tokenizers.h | 21 |
3 files changed, 17 insertions, 27 deletions
diff --git a/src/libstat/tokenizers/osb.c b/src/libstat/tokenizers/osb.c index abf547f43..f9307ded4 100644 --- a/src/libstat/tokenizers/osb.c +++ b/src/libstat/tokenizers/osb.c @@ -38,13 +38,11 @@ extern const int primes[]; int -osb_tokenize_text (struct tokenizer *tokenizer, +osb_tokenize_text (struct rspamd_stat_tokenizer *tokenizer, rspamd_mempool_t * pool, GArray * input, - GTree ** tree, - gboolean save_token, - gboolean is_utf, - GList *exceptions) + GTree * tree, + gboolean is_utf) { rspamd_token_t *new = NULL; rspamd_fstring_t *token; @@ -52,17 +50,12 @@ osb_tokenize_text (struct tokenizer *tokenizer, gint i, processed = 0; guint w; + g_assert (tree != NULL); + if (input == NULL) { return FALSE; } - if (*tree == NULL) { - *tree = g_tree_new (token_node_compare_func); - rspamd_mempool_add_destructor (pool, - (rspamd_mempool_destruct_t) g_tree_destroy, - *tree); - } - memset (hashpipe, 0xfe, FEATURE_WINDOW_SIZE * sizeof (hashpipe[0])); for (w = 0; w < input->len; w ++) { @@ -106,8 +99,8 @@ osb_tokenize_text (struct tokenizer *tokenizer, memcpy(new->data, &h1, sizeof(h1)); memcpy(new->data + sizeof(h1), &h2, sizeof(h2)); - if (g_tree_lookup (*tree, new) == NULL) { - g_tree_insert (*tree, new, new); + if (g_tree_lookup (tree, new) == NULL) { + g_tree_insert (tree, new, new); } } } diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index eee41a971..5cc2a83ea 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -202,7 +202,7 @@ void tokenize_subject (struct rspamd_task *task, GTree ** tree) { gchar *sub; - struct tokenizer *osb_tokenizer; + struct rspamd_stat_tokenizer *osb_tokenizer; GArray *words; if (*tree == NULL) { diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h index 8ee11cea1..7d4523bfb 100644 --- a/src/libstat/tokenizers/tokenizers.h +++ b/src/libstat/tokenizers/tokenizers.h @@ -7,17 +7,16 @@ #include "main.h" #include "stat_api.h" +#define RSPAMD_DEFAULT_TOKENIZER "osb" + /* Common tokenizer structure */ -struct tokenizer { +struct rspamd_stat_tokenizer { gchar *name; - gint (*tokenize_func)(struct tokenizer *tokenizer, + gint (*tokenize_func)(struct rspamd_stat_tokenizer *rspamd_stat_tokenizer, rspamd_mempool_t *pool, GArray *words, - GTree **cur, - gboolean save_token, - gboolean is_utf, - GList *exceptions); - gchar * (*get_next_word)(rspamd_fstring_t *buf, rspamd_fstring_t *token, GList **exceptions); + GTree *result, + gboolean is_utf); }; /* Compare two token nodes */ @@ -32,13 +31,11 @@ GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf, gsize min_len, GList **exceptions); /* OSB tokenize function */ -int osb_tokenize_text (struct tokenizer *tokenizer, +int osb_tokenize_text (struct rspamd_stat_tokenizer *tokenizer, rspamd_mempool_t *pool, GArray *input, - GTree **cur, - gboolean save_token, - gboolean is_utf, - GList *exceptions); + GTree *tokens, + gboolean is_utf); /* Make tokens for a subject */ void tokenize_subject (struct rspamd_task *task, GTree ** tree); |