diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2008-12-08 13:30:55 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2008-12-08 13:30:55 +0300 |
commit | c9df6177c0b5f8fb5bd2c6c4947c52e184b0b556 (patch) | |
tree | 7a352eccab4173ba0a23def11b5bf17d8ed46f34 /src | |
parent | 1162170387a535c21a63777c5d73ecbf706d0e02 (diff) | |
download | rspamd-c9df6177c0b5f8fb5bd2c6c4947c52e184b0b556.tar.gz rspamd-c9df6177c0b5f8fb5bd2c6c4947c52e184b0b556.zip |
* Implement learning using classifiers and tokenizers API
Diffstat (limited to 'src')
-rw-r--r-- | src/controller.c | 29 | ||||
-rw-r--r-- | src/main.h | 3 |
2 files changed, 31 insertions, 1 deletions
diff --git a/src/controller.c b/src/controller.c index b85db1b2d..fa2fa268f 100644 --- a/src/controller.c +++ b/src/controller.c @@ -23,6 +23,7 @@ #include "cfg_file.h" #include "modules.h" #include "tokenizers/tokenizers.h" +#include "classifiers/classifiers.h" #define CRLF "\r\n" @@ -235,6 +236,9 @@ process_command (struct controller_command *cmd, char **cmd_args, struct control session->learn_from = NULL; session->learn_filename = NULL; session->learn_tokenizer = get_tokenizer ("osb-text"); + session->learn_classifier = get_classifier ("winnow"); + /* By default learn positive */ + session->in_class = 1; /* Get all arguments */ while (*cmd_args++) { arg = *cmd_args; @@ -266,6 +270,21 @@ process_command (struct controller_command *cmd, char **cmd_args, struct control return; } break; + case 'c': + arg = *(cmd_args + 1); + if (!arg || *arg == '\0' || (session->learn_classifier = get_classifier (arg)) == NULL) { + r = snprintf (out_buf, sizeof (out_buf), "classifier is not defined" CRLF, arg); + bufferevent_write (session->bev, out_buf, r); + return; + } + break; + case 'n': + session->in_class = 0; + break; + default: + r = snprintf (out_buf, sizeof (out_buf), "tokenizer is not defined" CRLF, arg); + bufferevent_write (session->bev, out_buf, r); + return; } } } @@ -298,6 +317,7 @@ read_socket (struct bufferevent *bev, void *arg) int len, i; char *s, **params, *cmd, out_buf[128]; GList *comp_list; + GTree *tokens; switch (session->state) { case STATE_COMMAND: @@ -342,7 +362,14 @@ read_socket (struct bufferevent *bev, void *arg) session->learn_buf->pos += i; update_buf_size (session->learn_buf); if (session->learn_buf->free == 0) { - /* XXX: require to insert real learning code here */ + tokens = session->learn_tokenizer->tokenize_func (session->learn_tokenizer, session->session_pool, session->learn_buf->buf); + if (tokens == NULL) { + i = snprintf (out_buf, sizeof (out_buf), "learn fail, tokenizer error" CRLF); + bufferevent_write (bev, out_buf, i); + session->state = STATE_COMMAND; + return; + } + session->learn_classifier->learn_func (session->worker->srv->statfile_pool, session->learn_filename, tokens, session->in_class); session->worker->srv->stat->messages_learned ++; i = snprintf (out_buf, sizeof (out_buf), "learn ok" CRLF); bufferevent_write (bev, out_buf, i); diff --git a/src/main.h b/src/main.h index 7f2a60c4c..abe163b3c 100644 --- a/src/main.h +++ b/src/main.h @@ -83,6 +83,7 @@ struct rspamd_worker { struct pidfh; struct config_file; struct tokenizer; +struct classifier; /* Server statistics */ struct rspamd_stat { @@ -138,8 +139,10 @@ struct controller_session { char *learn_rcpt; char *learn_from; struct tokenizer *learn_tokenizer; + struct classifier *learn_classifier; char *learn_filename; f_str_buf_t *learn_buf; + int in_class; }; /* Worker task structure */ |