aboutsummaryrefslogtreecommitdiffstats
path: root/src/tokenizers
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-05-06 19:18:40 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-05-06 19:18:40 +0400
commit683b90f4c6c744557f7429ce6ff77c0f7d2175e1 (patch)
tree6e5f5cfdb0070cc7387d4045e955c6226d9f225d /src/tokenizers
parent56586078f92c4cf71fad46e1f4888a49749a6313 (diff)
downloadrspamd-683b90f4c6c744557f7429ce6ff77c0f7d2175e1.tar.gz
rspamd-683b90f4c6c744557f7429ce6ff77c0f7d2175e1.zip
* Major cleanup of cmake build system
* Add initial version of statshow utility for statfiles debugging * Add debugging for statistics * Remove unused utilities
Diffstat (limited to 'src/tokenizers')
-rw-r--r--src/tokenizers/osb.c5
-rw-r--r--src/tokenizers/tokenizers.c4
-rw-r--r--src/tokenizers/tokenizers.h4
3 files changed, 8 insertions, 5 deletions
diff --git a/src/tokenizers/osb.c b/src/tokenizers/osb.c
index ae59cf8ea..41bcce737 100644
--- a/src/tokenizers/osb.c
+++ b/src/tokenizers/osb.c
@@ -35,7 +35,7 @@
extern const int primes[];
int
-osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t * input, GTree ** tree)
+osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t * input, GTree ** tree, gboolean save_token)
{
token_node_t *new = NULL;
f_str_t token = { NULL, 0, 0 }, *res;
@@ -69,6 +69,9 @@ osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t *
new = memory_pool_alloc0 (pool, sizeof (token_node_t));
new->h1 = h1;
new->h2 = h2;
+ if (save_token) {
+ new->extra = (uintptr_t)memory_pool_fstrdup (pool, &token);
+ }
if (g_tree_lookup (*tree, new) == NULL) {
g_tree_insert (*tree, new, new);
diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c
index b7318bdfc..5af3fe6d5 100644
--- a/src/tokenizers/tokenizers.c
+++ b/src/tokenizers/tokenizers.c
@@ -239,13 +239,13 @@ tokenize_subject (struct worker_task *task, GTree ** tree)
new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
subject.begin = task->subject;
subject.len = strlen (task->subject);
- osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree);
+ osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE);
}
if ((sub = g_mime_message_get_subject (task->message)) != NULL) {
new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
subject.begin = (gchar *)sub;
subject.len = strlen (sub);
- osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree);
+ osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE);
}
}
diff --git a/src/tokenizers/tokenizers.h b/src/tokenizers/tokenizers.h
index 59a2684d0..741753328 100644
--- a/src/tokenizers/tokenizers.h
+++ b/src/tokenizers/tokenizers.h
@@ -24,7 +24,7 @@ typedef struct token_node_s {
/* Common tokenizer structure */
struct tokenizer {
char *name;
- int (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur);
+ int (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur, gboolean save_token);
f_str_t* (*get_next_word)(f_str_t *buf, f_str_t *token);
};
@@ -35,7 +35,7 @@ struct tokenizer* get_tokenizer (char *name);
/* Get next word from specified f_str_t buf */
f_str_t *get_next_word (f_str_t *buf, f_str_t *token);
/* OSB tokenize function */
-int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur);
+int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur, gboolean save_token);
/* Common tokenizer for headers */
int tokenize_headers (memory_pool_t *pool, struct worker_task *task, GTree **cur);
/* Make tokens for a subject */