aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/libstat/tokenizers/osb.c21
-rw-r--r--src/libstat/tokenizers/tokenizers.c2
-rw-r--r--src/libstat/tokenizers/tokenizers.h21
3 files changed, 17 insertions, 27 deletions
diff --git a/src/libstat/tokenizers/osb.c b/src/libstat/tokenizers/osb.c
index abf547f43..f9307ded4 100644
--- a/src/libstat/tokenizers/osb.c
+++ b/src/libstat/tokenizers/osb.c
@@ -38,13 +38,11 @@
extern const int primes[];
int
-osb_tokenize_text (struct tokenizer *tokenizer,
+osb_tokenize_text (struct rspamd_stat_tokenizer *tokenizer,
rspamd_mempool_t * pool,
GArray * input,
- GTree ** tree,
- gboolean save_token,
- gboolean is_utf,
- GList *exceptions)
+ GTree * tree,
+ gboolean is_utf)
{
rspamd_token_t *new = NULL;
rspamd_fstring_t *token;
@@ -52,17 +50,12 @@ osb_tokenize_text (struct tokenizer *tokenizer,
gint i, processed = 0;
guint w;
+ g_assert (tree != NULL);
+
if (input == NULL) {
return FALSE;
}
- if (*tree == NULL) {
- *tree = g_tree_new (token_node_compare_func);
- rspamd_mempool_add_destructor (pool,
- (rspamd_mempool_destruct_t) g_tree_destroy,
- *tree);
- }
-
memset (hashpipe, 0xfe, FEATURE_WINDOW_SIZE * sizeof (hashpipe[0]));
for (w = 0; w < input->len; w ++) {
@@ -106,8 +99,8 @@ osb_tokenize_text (struct tokenizer *tokenizer,
memcpy(new->data, &h1, sizeof(h1));
memcpy(new->data + sizeof(h1), &h2, sizeof(h2));
- if (g_tree_lookup (*tree, new) == NULL) {
- g_tree_insert (*tree, new, new);
+ if (g_tree_lookup (tree, new) == NULL) {
+ g_tree_insert (tree, new, new);
}
}
}
diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c
index eee41a971..5cc2a83ea 100644
--- a/src/libstat/tokenizers/tokenizers.c
+++ b/src/libstat/tokenizers/tokenizers.c
@@ -202,7 +202,7 @@ void
tokenize_subject (struct rspamd_task *task, GTree ** tree)
{
gchar *sub;
- struct tokenizer *osb_tokenizer;
+ struct rspamd_stat_tokenizer *osb_tokenizer;
GArray *words;
if (*tree == NULL) {
diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h
index 8ee11cea1..7d4523bfb 100644
--- a/src/libstat/tokenizers/tokenizers.h
+++ b/src/libstat/tokenizers/tokenizers.h
@@ -7,17 +7,16 @@
#include "main.h"
#include "stat_api.h"
+#define RSPAMD_DEFAULT_TOKENIZER "osb"
+
/* Common tokenizer structure */
-struct tokenizer {
+struct rspamd_stat_tokenizer {
gchar *name;
- gint (*tokenize_func)(struct tokenizer *tokenizer,
+ gint (*tokenize_func)(struct rspamd_stat_tokenizer *rspamd_stat_tokenizer,
rspamd_mempool_t *pool,
GArray *words,
- GTree **cur,
- gboolean save_token,
- gboolean is_utf,
- GList *exceptions);
- gchar * (*get_next_word)(rspamd_fstring_t *buf, rspamd_fstring_t *token, GList **exceptions);
+ GTree *result,
+ gboolean is_utf);
};
/* Compare two token nodes */
@@ -32,13 +31,11 @@ GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf,
gsize min_len, GList **exceptions);
/* OSB tokenize function */
-int osb_tokenize_text (struct tokenizer *tokenizer,
+int osb_tokenize_text (struct rspamd_stat_tokenizer *tokenizer,
rspamd_mempool_t *pool,
GArray *input,
- GTree **cur,
- gboolean save_token,
- gboolean is_utf,
- GList *exceptions);
+ GTree *tokens,
+ gboolean is_utf);
/* Make tokens for a subject */
void tokenize_subject (struct rspamd_task *task, GTree ** tree);