]> source.dussan.org Git - rspamd.git/commitdiff
Rework types for tokenizers functions.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 23 Jan 2015 16:26:38 +0000 (16:26 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 23 Jan 2015 16:26:38 +0000 (16:26 +0000)
src/libstat/tokenizers/osb.c
src/libstat/tokenizers/tokenizers.c
src/libstat/tokenizers/tokenizers.h

index abf547f4339e16adb71da618502822e3191ef9e5..f9307ded46666017f19093fc76d6188ce68fa3cf 100644 (file)
 extern const int primes[];
 
 int
-osb_tokenize_text (struct tokenizer *tokenizer,
+osb_tokenize_text (struct rspamd_stat_tokenizer *tokenizer,
        rspamd_mempool_t * pool,
        GArray * input,
-       GTree ** tree,
-       gboolean save_token,
-       gboolean is_utf,
-       GList *exceptions)
+       GTree * tree,
+       gboolean is_utf)
 {
        rspamd_token_t *new = NULL;
        rspamd_fstring_t *token;
@@ -52,17 +50,12 @@ osb_tokenize_text (struct tokenizer *tokenizer,
        gint i, processed = 0;
        guint w;
 
+       g_assert (tree != NULL);
+
        if (input == NULL) {
                return FALSE;
        }
 
-       if (*tree == NULL) {
-               *tree = g_tree_new (token_node_compare_func);
-               rspamd_mempool_add_destructor (pool,
-                       (rspamd_mempool_destruct_t) g_tree_destroy,
-                       *tree);
-       }
-
        memset (hashpipe, 0xfe, FEATURE_WINDOW_SIZE * sizeof (hashpipe[0]));
 
        for (w = 0; w < input->len; w ++) {
@@ -106,8 +99,8 @@ osb_tokenize_text (struct tokenizer *tokenizer,
                        memcpy(new->data, &h1, sizeof(h1));
                        memcpy(new->data + sizeof(h1), &h2, sizeof(h2));
 
-                       if (g_tree_lookup (*tree, new) == NULL) {
-                               g_tree_insert (*tree, new, new);
+                       if (g_tree_lookup (tree, new) == NULL) {
+                               g_tree_insert (tree, new, new);
                        }
                }
        }
index eee41a9714d6292505b9fe46f52fd91ac7b98f61..5cc2a83eab6ad3d95cdc2a049174db02dc17a882 100644 (file)
@@ -202,7 +202,7 @@ void
 tokenize_subject (struct rspamd_task *task, GTree ** tree)
 {
        gchar *sub;
-       struct tokenizer *osb_tokenizer;
+       struct rspamd_stat_tokenizer *osb_tokenizer;
        GArray *words;
 
        if (*tree == NULL) {
index 8ee11cea1f2b77098884b3ca0d91e754f15815c6..7d4523bfbca2a2d9ff9ca36bfb21c1194f350fe1 100644 (file)
@@ -7,17 +7,16 @@
 #include "main.h"
 #include "stat_api.h"
 
+#define RSPAMD_DEFAULT_TOKENIZER "osb"
+
 /* Common tokenizer structure */
-struct tokenizer {
+struct rspamd_stat_tokenizer {
        gchar *name;
-       gint (*tokenize_func)(struct tokenizer *tokenizer,
+       gint (*tokenize_func)(struct rspamd_stat_tokenizer *rspamd_stat_tokenizer,
                        rspamd_mempool_t *pool,
                        GArray *words,
-                       GTree **cur,
-                       gboolean save_token,
-                       gboolean is_utf,
-                       GList *exceptions);
-       gchar * (*get_next_word)(rspamd_fstring_t *buf, rspamd_fstring_t *token, GList **exceptions);
+                       GTree *result,
+                       gboolean is_utf);
 };
 
 /* Compare two token nodes */
@@ -32,13 +31,11 @@ GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf,
                gsize min_len, GList **exceptions);
 
 /* OSB tokenize function */
-int osb_tokenize_text (struct tokenizer *tokenizer,
+int osb_tokenize_text (struct rspamd_stat_tokenizer *tokenizer,
        rspamd_mempool_t *pool,
        GArray *input,
-       GTree **cur,
-       gboolean save_token,
-       gboolean is_utf,
-       GList *exceptions);
+       GTree *tokens,
+       gboolean is_utf);
 
 /* Make tokens for a subject */
 void tokenize_subject (struct rspamd_task *task, GTree ** tree);