aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2008-12-08 13:30:55 +0300
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2008-12-08 13:30:55 +0300
commitc9df6177c0b5f8fb5bd2c6c4947c52e184b0b556 (patch)
tree7a352eccab4173ba0a23def11b5bf17d8ed46f34
parent1162170387a535c21a63777c5d73ecbf706d0e02 (diff)
downloadrspamd-c9df6177c0b5f8fb5bd2c6c4947c52e184b0b556.tar.gz
rspamd-c9df6177c0b5f8fb5bd2c6c4947c52e184b0b556.zip
* Implement learning using classifiers and tokenizers API
-rw-r--r--src/controller.c29
-rw-r--r--src/main.h3
2 files changed, 31 insertions, 1 deletions
diff --git a/src/controller.c b/src/controller.c
index b85db1b2d..fa2fa268f 100644
--- a/src/controller.c
+++ b/src/controller.c
@@ -23,6 +23,7 @@
#include "cfg_file.h"
#include "modules.h"
#include "tokenizers/tokenizers.h"
+#include "classifiers/classifiers.h"
#define CRLF "\r\n"
@@ -235,6 +236,9 @@ process_command (struct controller_command *cmd, char **cmd_args, struct control
session->learn_from = NULL;
session->learn_filename = NULL;
session->learn_tokenizer = get_tokenizer ("osb-text");
+ session->learn_classifier = get_classifier ("winnow");
+ /* By default learn positive */
+ session->in_class = 1;
/* Get all arguments */
while (*cmd_args++) {
arg = *cmd_args;
@@ -266,6 +270,21 @@ process_command (struct controller_command *cmd, char **cmd_args, struct control
return;
}
break;
+ case 'c':
+ arg = *(cmd_args + 1);
+ if (!arg || *arg == '\0' || (session->learn_classifier = get_classifier (arg)) == NULL) {
+ r = snprintf (out_buf, sizeof (out_buf), "classifier is not defined" CRLF, arg);
+ bufferevent_write (session->bev, out_buf, r);
+ return;
+ }
+ break;
+ case 'n':
+ session->in_class = 0;
+ break;
+ default:
+ r = snprintf (out_buf, sizeof (out_buf), "tokenizer is not defined" CRLF, arg);
+ bufferevent_write (session->bev, out_buf, r);
+ return;
}
}
}
@@ -298,6 +317,7 @@ read_socket (struct bufferevent *bev, void *arg)
int len, i;
char *s, **params, *cmd, out_buf[128];
GList *comp_list;
+ GTree *tokens;
switch (session->state) {
case STATE_COMMAND:
@@ -342,7 +362,14 @@ read_socket (struct bufferevent *bev, void *arg)
session->learn_buf->pos += i;
update_buf_size (session->learn_buf);
if (session->learn_buf->free == 0) {
- /* XXX: require to insert real learning code here */
+ tokens = session->learn_tokenizer->tokenize_func (session->learn_tokenizer, session->session_pool, session->learn_buf->buf);
+ if (tokens == NULL) {
+ i = snprintf (out_buf, sizeof (out_buf), "learn fail, tokenizer error" CRLF);
+ bufferevent_write (bev, out_buf, i);
+ session->state = STATE_COMMAND;
+ return;
+ }
+ session->learn_classifier->learn_func (session->worker->srv->statfile_pool, session->learn_filename, tokens, session->in_class);
session->worker->srv->stat->messages_learned ++;
i = snprintf (out_buf, sizeof (out_buf), "learn ok" CRLF);
bufferevent_write (bev, out_buf, i);
diff --git a/src/main.h b/src/main.h
index 7f2a60c4c..abe163b3c 100644
--- a/src/main.h
+++ b/src/main.h
@@ -83,6 +83,7 @@ struct rspamd_worker {
struct pidfh;
struct config_file;
struct tokenizer;
+struct classifier;
/* Server statistics */
struct rspamd_stat {
@@ -138,8 +139,10 @@ struct controller_session {
char *learn_rcpt;
char *learn_from;
struct tokenizer *learn_tokenizer;
+ struct classifier *learn_classifier;
char *learn_filename;
f_str_buf_t *learn_buf;
+ int in_class;
};
/* Worker task structure */