]> source.dussan.org Git - rspamd.git/commitdiff
* Implement learning using classifiers and tokenizers API
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 8 Dec 2008 10:30:55 +0000 (13:30 +0300)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 8 Dec 2008 10:30:55 +0000 (13:30 +0300)
src/controller.c
src/main.h

index b85db1b2d25474a43e26e391ccdcd809ce268be3..fa2fa268f96c6ba6a9ea30a37d02f7b9cc3d767f 100644 (file)
@@ -23,6 +23,7 @@
 #include "cfg_file.h"
 #include "modules.h"
 #include "tokenizers/tokenizers.h"
+#include "classifiers/classifiers.h"
 
 #define CRLF "\r\n"
 
@@ -235,6 +236,9 @@ process_command (struct controller_command *cmd, char **cmd_args, struct control
                                session->learn_from = NULL;
                                session->learn_filename = NULL;
                                session->learn_tokenizer = get_tokenizer ("osb-text");
+                               session->learn_classifier = get_classifier ("winnow");
+                               /* By default learn positive */
+                               session->in_class = 1;
                                /* Get all arguments */
                                while (*cmd_args++) {
                                        arg = *cmd_args;
@@ -266,6 +270,21 @@ process_command (struct controller_command *cmd, char **cmd_args, struct control
                                                                        return;
                                                                }
                                                                break;
+                                                       case 'c':
+                                                               arg = *(cmd_args + 1);
+                                                               if (!arg || *arg == '\0' || (session->learn_classifier = get_classifier (arg)) == NULL) {
+                                                                       r = snprintf (out_buf, sizeof (out_buf), "classifier is not defined" CRLF, arg);
+                                                                       bufferevent_write (session->bev, out_buf, r);
+                                                                       return;
+                                                               }
+                                                               break;
+                                                       case 'n':
+                                                               session->in_class = 0;
+                                                               break;
+                                                       default:
+                                                               r = snprintf (out_buf, sizeof (out_buf), "tokenizer is not defined" CRLF, arg);
+                                                               bufferevent_write (session->bev, out_buf, r);
+                                                               return;
                                                }
                                        }
                                }
@@ -298,6 +317,7 @@ read_socket (struct bufferevent *bev, void *arg)
        int len, i;
        char *s, **params, *cmd, out_buf[128];
        GList *comp_list;
+       GTree *tokens;
 
        switch (session->state) {
                case STATE_COMMAND:
@@ -342,7 +362,14 @@ read_socket (struct bufferevent *bev, void *arg)
                                session->learn_buf->pos += i;
                                update_buf_size (session->learn_buf);
                                if (session->learn_buf->free == 0) {
-                                       /* XXX: require to insert real learning code here */
+                                       tokens = session->learn_tokenizer->tokenize_func (session->learn_tokenizer, session->session_pool, session->learn_buf->buf);
+                                       if (tokens == NULL) {
+                                               i = snprintf (out_buf, sizeof (out_buf), "learn fail, tokenizer error" CRLF);
+                                               bufferevent_write (bev, out_buf, i);
+                                               session->state = STATE_COMMAND;
+                                               return;
+                                       }
+                                       session->learn_classifier->learn_func (session->worker->srv->statfile_pool, session->learn_filename, tokens, session->in_class);
                                        session->worker->srv->stat->messages_learned ++;
                                        i = snprintf (out_buf, sizeof (out_buf), "learn ok" CRLF);
                                        bufferevent_write (bev, out_buf, i);
index 7f2a60c4cf9d984720dcf73ad1f9f3888e90c578..abe163b3c767150422187ccfc630c6756e37be92 100644 (file)
@@ -83,6 +83,7 @@ struct rspamd_worker {
 struct pidfh;
 struct config_file;
 struct tokenizer;
+struct classifier;
 
 /* Server statistics */
 struct rspamd_stat {
@@ -138,8 +139,10 @@ struct controller_session {
        char *learn_rcpt;
        char *learn_from;
        struct tokenizer *learn_tokenizer;
+       struct classifier *learn_classifier;
        char *learn_filename;
        f_str_buf_t *learn_buf;
+       int in_class;
 };
 
 /* Worker task structure */