extern const int primes[];
int
-osb_tokenize_text (struct tokenizer *tokenizer,
+osb_tokenize_text (struct rspamd_stat_tokenizer *tokenizer,
rspamd_mempool_t * pool,
GArray * input,
- GTree ** tree,
- gboolean save_token,
- gboolean is_utf,
- GList *exceptions)
+ GTree * tree,
+ gboolean is_utf)
{
rspamd_token_t *new = NULL;
rspamd_fstring_t *token;
gint i, processed = 0;
guint w;
+ g_assert (tree != NULL);
+
if (input == NULL) {
return FALSE;
}
- if (*tree == NULL) {
- *tree = g_tree_new (token_node_compare_func);
- rspamd_mempool_add_destructor (pool,
- (rspamd_mempool_destruct_t) g_tree_destroy,
- *tree);
- }
-
memset (hashpipe, 0xfe, FEATURE_WINDOW_SIZE * sizeof (hashpipe[0]));
for (w = 0; w < input->len; w ++) {
memcpy(new->data, &h1, sizeof(h1));
memcpy(new->data + sizeof(h1), &h2, sizeof(h2));
- if (g_tree_lookup (*tree, new) == NULL) {
- g_tree_insert (*tree, new, new);
+ if (g_tree_lookup (tree, new) == NULL) {
+ g_tree_insert (tree, new, new);
}
}
}
#include "main.h"
#include "stat_api.h"
+#define RSPAMD_DEFAULT_TOKENIZER "osb"
+
/* Common tokenizer structure */
-struct tokenizer {
+struct rspamd_stat_tokenizer {
gchar *name;
- gint (*tokenize_func)(struct tokenizer *tokenizer,
+ gint (*tokenize_func)(struct rspamd_stat_tokenizer *rspamd_stat_tokenizer,
rspamd_mempool_t *pool,
GArray *words,
- GTree **cur,
- gboolean save_token,
- gboolean is_utf,
- GList *exceptions);
- gchar * (*get_next_word)(rspamd_fstring_t *buf, rspamd_fstring_t *token, GList **exceptions);
+ GTree *result,
+ gboolean is_utf);
};
/* Compare two token nodes */
gsize min_len, GList **exceptions);
/* OSB tokenize function */
-int osb_tokenize_text (struct tokenizer *tokenizer,
+int osb_tokenize_text (struct rspamd_stat_tokenizer *tokenizer,
rspamd_mempool_t *pool,
GArray *input,
- GTree **cur,
- gboolean save_token,
- gboolean is_utf,
- GList *exceptions);
+ GTree *tokens,
+ gboolean is_utf);
/* Make tokens for a subject */
void tokenize_subject (struct rspamd_task *task, GTree ** tree);