Browse Source

* Add Subject header to statistics

* Write log message about symbols that are removed when composite symbol is inserted
tags/0.3.7
Vsevolod Stakhov 13 years ago
parent
commit
6ae69bd5f7
4 changed files with 56 additions and 1 deletions
  1. 3
    1
      src/controller.c
  2. 13
    0
      src/filter.c
  3. 38
    0
      src/tokenizers/tokenizers.c
  4. 2
    0
      src/tokenizers/tokenizers.h

+ 3
- 1
src/controller.c View File

@@ -845,7 +845,9 @@ controller_read_socket (f_str_t * in, void *arg)
session->state = STATE_REPLY;
return TRUE;
}

/* Take care of subject */
tokenize_subject (task, &tokens);

/* Init classifier */
cls_ctx = session->learn_classifier->classifier->init_func (session->session_pool, session->learn_classifier);

+ 13
- 0
src/filter.c View File

@@ -319,6 +319,8 @@ composites_foreach_callback (gpointer key, gpointer value, void *data)
GQueue *stack;
GList *symbols = NULL, *s;
gsize cur, op1, op2;
gchar logbuf[256];
gint r;

stack = g_queue_new ();

@@ -367,8 +369,15 @@ composites_foreach_callback (gpointer key, gpointer value, void *data)
if (op1) {
/* Remove all symbols that are in composite symbol */
s = g_list_first (symbols);
r = rspamd_snprintf (logbuf, sizeof (logbuf), "<%s>, insert symbol %s instead of symbols: ", cd->task->message_id, key);
while (s) {
g_hash_table_remove (cd->metric_res->symbols, s->data);
if (s->next) {
r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s, ", s->data);
}
else {
r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s", s->data);
}
s = g_list_next (s);
}
/* Add new symbol */
@@ -432,6 +441,8 @@ process_autolearn (struct statfile *st, struct worker_task *task, GTree * tokens

if (check_autolearn (st->autolearn, task)) {
if (tokens) {
/* Take care of subject */
tokenize_subject (task, &tokens);
msg_info ("message with id <%s> autolearned statfile '%s'", task->message_id, filename);
/* Get or create statfile */
@@ -527,6 +538,8 @@ classifiers_callback (gpointer value, void *arg)
return;
}

/* Take care of subject */
tokenize_subject (task, &tokens);
cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task);

/* Autolearning */

+ 38
- 0
src/tokenizers/tokenizers.c View File

@@ -222,6 +222,44 @@ tokenize_headers (memory_pool_t * pool, struct worker_task *task, GTree ** tree)
return TRUE;
}

void
tokenize_subject (struct worker_task *task, GTree ** tree)
{
f_str_t subject, subject_name;
const gchar *sub;
token_node_t *new = NULL;

if (*tree == NULL) {
*tree = g_tree_new (token_node_compare_func);
memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_tree_destroy, *tree);
}

subject_name.begin = "Subject:";
subject_name.len = sizeof ("Subject:") - 1;

/* Try to use pre-defined subject */
if (task->subject != NULL) {
new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
subject.begin = task->subject;
subject.len = strlen (task->subject);
new->h1 = fstrhash (&subject_name) * primes[0];
new->h2 = fstrhash (&subject) * primes[1];
if (g_tree_lookup (*tree, new) == NULL) {
g_tree_insert (*tree, new, new);
}
}
if ((sub = g_mime_message_get_subject (task->message)) != NULL) {
new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
subject.begin = (gchar *)sub;
subject.len = strlen (sub);
new->h1 = fstrhash (&subject_name) * primes[0];
new->h2 = fstrhash (&subject) * primes[1];
if (g_tree_lookup (*tree, new) == NULL) {
g_tree_insert (*tree, new, new);
}
}
}

/*
* vi:ts=4
*/

+ 2
- 0
src/tokenizers/tokenizers.h View File

@@ -40,6 +40,8 @@ int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t
int tokenize_urls (memory_pool_t *pool, struct worker_task *task, GTree **cur);
/* Common tokenizer for headers */
int tokenize_headers (memory_pool_t *pool, struct worker_task *task, GTree **cur);
/* Make tokens for a subject */
void tokenize_subject (struct worker_task *task, GTree ** tree);

/* Array of all defined tokenizers */
extern struct tokenizer tokenizers[];

Loading…
Cancel
Save