]> source.dussan.org Git - rspamd.git/commitdiff
* Add Subject header to statistics
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Fri, 24 Dec 2010 15:57:55 +0000 (18:57 +0300)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Fri, 24 Dec 2010 15:57:55 +0000 (18:57 +0300)
* Write log message about symbols that are removed when composite symbol is inserted

src/controller.c
src/filter.c
src/tokenizers/tokenizers.c
src/tokenizers/tokenizers.h

index 617821bf6af2027a974ec637293738150dbdb256..a39ec21cb31b6118ccc0706d55ed7ee894a243d4 100644 (file)
@@ -845,7 +845,9 @@ controller_read_socket (f_str_t * in, void *arg)
                        session->state = STATE_REPLY;
                        return TRUE;
                }
-       
+
+               /* Take care of subject */
+               tokenize_subject (task, &tokens);
 
                /* Init classifier */
                cls_ctx = session->learn_classifier->classifier->init_func (session->session_pool, session->learn_classifier);
index f47dd1a3ae6b8c240da6d0455fe44bbab0e4c30d..e1e5d06bc5aa877404832c159a170cd5bd172687 100644 (file)
@@ -319,6 +319,8 @@ composites_foreach_callback (gpointer key, gpointer value, void *data)
        GQueue                         *stack;
        GList                          *symbols = NULL, *s;
        gsize                           cur, op1, op2;
+       gchar                           logbuf[256];
+       gint                            r;
 
        stack = g_queue_new ();
 
@@ -367,8 +369,15 @@ composites_foreach_callback (gpointer key, gpointer value, void *data)
                if (op1) {
                        /* Remove all symbols that are in composite symbol */
                        s = g_list_first (symbols);
+                       r = rspamd_snprintf (logbuf, sizeof (logbuf), "<%s>, insert symbol %s instead of symbols: ", cd->task->message_id, key);
                        while (s) {
                                g_hash_table_remove (cd->metric_res->symbols, s->data);
+                               if (s->next) {
+                                       r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s, ", s->data);
+                               }
+                               else {
+                                       r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s", s->data);
+                               }
                                s = g_list_next (s);
                        }
                        /* Add new symbol */
@@ -432,6 +441,8 @@ process_autolearn (struct statfile *st, struct worker_task *task, GTree * tokens
 
        if (check_autolearn (st->autolearn, task)) {
                if (tokens) {
+                       /* Take care of subject */
+                       tokenize_subject (task, &tokens);
                        msg_info ("message with id <%s> autolearned statfile '%s'", task->message_id, filename);
                        
                        /* Get or create statfile */
@@ -527,6 +538,8 @@ classifiers_callback (gpointer value, void *arg)
                return;
        }
 
+       /* Take care of subject */
+       tokenize_subject (task, &tokens);
        cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task);
 
        /* Autolearning */
index 9ca690e4703285eae9b3a4626a8a3b40312da697..ab073a28c6faaf95536cdd6822893a1cc465e2de 100644 (file)
@@ -222,6 +222,44 @@ tokenize_headers (memory_pool_t * pool, struct worker_task *task, GTree ** tree)
        return TRUE;
 }
 
+void
+tokenize_subject (struct worker_task *task, GTree ** tree)
+{
+       f_str_t                         subject, subject_name;
+       const gchar                    *sub;
+       token_node_t                   *new = NULL;
+
+       if (*tree == NULL) {
+               *tree = g_tree_new (token_node_compare_func);
+               memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_tree_destroy, *tree);
+       }
+
+       subject_name.begin = "Subject:";
+       subject_name.len = sizeof ("Subject:") - 1;
+
+       /* Try to use pre-defined subject */
+       if (task->subject != NULL) {
+               new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
+               subject.begin = task->subject;
+               subject.len = strlen (task->subject);
+               new->h1 = fstrhash (&subject_name) * primes[0];
+               new->h2 = fstrhash (&subject) * primes[1];
+               if (g_tree_lookup (*tree, new) == NULL) {
+                       g_tree_insert (*tree, new, new);
+               }
+       }
+       if ((sub = g_mime_message_get_subject (task->message)) != NULL) {
+               new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
+               subject.begin = (gchar *)sub;
+               subject.len = strlen (sub);
+               new->h1 = fstrhash (&subject_name) * primes[0];
+               new->h2 = fstrhash (&subject) * primes[1];
+               if (g_tree_lookup (*tree, new) == NULL) {
+                       g_tree_insert (*tree, new, new);
+               }
+       }
+}
+
 /*
  * vi:ts=4
  */
index 9a16e907c6078f331e6818f1a352a5ccdf76d76b..21e454e6bc38ca2e23e57c032d280aa6ef1660d5 100644 (file)
@@ -40,6 +40,8 @@ int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t
 int tokenize_urls (memory_pool_t *pool, struct worker_task *task, GTree **cur);
 /* Common tokenizer for headers */
 int tokenize_headers (memory_pool_t *pool, struct worker_task *task, GTree **cur);
+/* Make tokens for a subject */
+void tokenize_subject (struct worker_task *task, GTree ** tree);
 
 /* Array of all defined tokenizers */
 extern struct tokenizer tokenizers[];