* Write log message about symbols that are removed when composite symbol is insertedtags/0.3.7
@@ -845,7 +845,9 @@ controller_read_socket (f_str_t * in, void *arg) | |||
session->state = STATE_REPLY; | |||
return TRUE; | |||
} | |||
/* Take care of subject */ | |||
tokenize_subject (task, &tokens); | |||
/* Init classifier */ | |||
cls_ctx = session->learn_classifier->classifier->init_func (session->session_pool, session->learn_classifier); |
@@ -319,6 +319,8 @@ composites_foreach_callback (gpointer key, gpointer value, void *data) | |||
GQueue *stack; | |||
GList *symbols = NULL, *s; | |||
gsize cur, op1, op2; | |||
gchar logbuf[256]; | |||
gint r; | |||
stack = g_queue_new (); | |||
@@ -367,8 +369,15 @@ composites_foreach_callback (gpointer key, gpointer value, void *data) | |||
if (op1) { | |||
/* Remove all symbols that are in composite symbol */ | |||
s = g_list_first (symbols); | |||
r = rspamd_snprintf (logbuf, sizeof (logbuf), "<%s>, insert symbol %s instead of symbols: ", cd->task->message_id, key); | |||
while (s) { | |||
g_hash_table_remove (cd->metric_res->symbols, s->data); | |||
if (s->next) { | |||
r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s, ", s->data); | |||
} | |||
else { | |||
r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s", s->data); | |||
} | |||
s = g_list_next (s); | |||
} | |||
/* Add new symbol */ | |||
@@ -432,6 +441,8 @@ process_autolearn (struct statfile *st, struct worker_task *task, GTree * tokens | |||
if (check_autolearn (st->autolearn, task)) { | |||
if (tokens) { | |||
/* Take care of subject */ | |||
tokenize_subject (task, &tokens); | |||
msg_info ("message with id <%s> autolearned statfile '%s'", task->message_id, filename); | |||
/* Get or create statfile */ | |||
@@ -527,6 +538,8 @@ classifiers_callback (gpointer value, void *arg) | |||
return; | |||
} | |||
/* Take care of subject */ | |||
tokenize_subject (task, &tokens); | |||
cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task); | |||
/* Autolearning */ |
@@ -222,6 +222,44 @@ tokenize_headers (memory_pool_t * pool, struct worker_task *task, GTree ** tree) | |||
return TRUE; | |||
} | |||
void | |||
tokenize_subject (struct worker_task *task, GTree ** tree) | |||
{ | |||
f_str_t subject, subject_name; | |||
const gchar *sub; | |||
token_node_t *new = NULL; | |||
if (*tree == NULL) { | |||
*tree = g_tree_new (token_node_compare_func); | |||
memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_tree_destroy, *tree); | |||
} | |||
subject_name.begin = "Subject:"; | |||
subject_name.len = sizeof ("Subject:") - 1; | |||
/* Try to use pre-defined subject */ | |||
if (task->subject != NULL) { | |||
new = memory_pool_alloc (task->task_pool, sizeof (token_node_t)); | |||
subject.begin = task->subject; | |||
subject.len = strlen (task->subject); | |||
new->h1 = fstrhash (&subject_name) * primes[0]; | |||
new->h2 = fstrhash (&subject) * primes[1]; | |||
if (g_tree_lookup (*tree, new) == NULL) { | |||
g_tree_insert (*tree, new, new); | |||
} | |||
} | |||
if ((sub = g_mime_message_get_subject (task->message)) != NULL) { | |||
new = memory_pool_alloc (task->task_pool, sizeof (token_node_t)); | |||
subject.begin = (gchar *)sub; | |||
subject.len = strlen (sub); | |||
new->h1 = fstrhash (&subject_name) * primes[0]; | |||
new->h2 = fstrhash (&subject) * primes[1]; | |||
if (g_tree_lookup (*tree, new) == NULL) { | |||
g_tree_insert (*tree, new, new); | |||
} | |||
} | |||
} | |||
/* | |||
* vi:ts=4 | |||
*/ |
@@ -40,6 +40,8 @@ int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t | |||
int tokenize_urls (memory_pool_t *pool, struct worker_task *task, GTree **cur); | |||
/* Common tokenizer for headers */ | |||
int tokenize_headers (memory_pool_t *pool, struct worker_task *task, GTree **cur); | |||
/* Make tokens for a subject */ | |||
void tokenize_subject (struct worker_task *task, GTree ** tree); | |||
/* Array of all defined tokenizers */ | |||
extern struct tokenizer tokenizers[]; |