aboutsummaryrefslogtreecommitdiffstats
path: root/src/filter.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-03-31 20:06:25 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-03-31 20:06:25 +0400
commite414be40592724a884b4900c7ab199ebeaf5e171 (patch)
tree33f4f7abc0d9a975b4a2e124e6ee064d75529ff3 /src/filter.c
parent33b5aa19d7ec8ff59601c8495a8bc7813b0e6939 (diff)
downloadrspamd-e414be40592724a884b4900c7ab199ebeaf5e171.tar.gz
rspamd-e414be40592724a884b4900c7ab199ebeaf5e171.zip
* Add ability to learn rspamd via worker (without password)0.3.11
Diffstat (limited to 'src/filter.c')
-rw-r--r--src/filter.c109
1 files changed, 109 insertions, 0 deletions
diff --git a/src/filter.c b/src/filter.c
index ec7b5a5ed..df8e1a9e0 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -43,6 +43,12 @@
# include "lua/lua_common.h"
#endif
+static inline GQuark
+filter_error_quark (void)
+{
+ return g_quark_from_static_string ("g-filter-error-quark");
+}
+
static void
insert_metric_result (struct worker_task *task, struct metric *metric, const gchar *symbol,
double flag, GList * opts, gboolean single)
@@ -799,6 +805,109 @@ check_metric_action (double score, double required_score, struct metric *metric)
}
}
+gboolean
+learn_task (const gchar *statfile, struct worker_task *task, GError **err)
+{
+ GList *cur;
+ struct classifier_config *cl;
+ struct classifier_ctx *cls_ctx;
+ gchar *s;
+ f_str_t c;
+ GTree *tokens = NULL;
+ struct statfile *st;
+ stat_file_t *stf;
+ gdouble sum;
+ struct mime_text_part *part;
+
+ /* Load classifier by symbol */
+ cl = g_hash_table_lookup (task->cfg->classifiers_symbols, statfile);
+ if (cl == NULL) {
+ g_set_error (err, filter_error_quark(), 1, "Statfile %s is not configured in any classifier", statfile);
+ return FALSE;
+ }
+
+ /* If classifier has 'header' option just classify header of this type */
+ if ((s = g_hash_table_lookup (cl->opts, "header")) != NULL) {
+ cur = message_get_header (task->task_pool, task->message, s, FALSE);
+ if (cur) {
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, cur);
+ }
+ }
+ else {
+ /* Classify message otherwise */
+ cur = g_list_first (task->text_parts);
+ }
+
+ /* Get tokens from each element */
+ while (cur) {
+ if (s != NULL) {
+ c.len = strlen (cur->data);
+ c.begin = cur->data;
+ }
+ else {
+ part = cur->data;
+ /* Skip empty parts */
+ if (part->is_empty) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ c.begin = part->content->data;
+ c.len = part->content->len;
+ }
+ /* Get tokens */
+ if (!cl->tokenizer->tokenize_func (
+ cl->tokenizer, task->task_pool,
+ &c, &tokens)) {
+ g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message");
+ return FALSE;
+ }
+ cur = g_list_next (cur);
+ }
+
+ /* Handle messages without text */
+ if (tokens == NULL) {
+ g_set_error (err, filter_error_quark(), 3, "Cannot tokenize message, no text data");
+ msg_info ("learn failed for message <%s>, no tokens to extract", task->message_id);
+ return FALSE;
+ }
+
+ /* Take care of subject */
+ tokenize_subject (task, &tokens);
+
+ /* Init classifier */
+ cls_ctx = cl->classifier->init_func (
+ task->task_pool, cl);
+ /* Get or create statfile */
+ stf = get_statfile_by_symbol (task->worker->srv->statfile_pool,
+ cl, statfile, &st, TRUE);
+
+ /* Learn */
+ if (stf== NULL || !cl->classifier->learn_func (
+ cls_ctx, task->worker->srv->statfile_pool,
+ statfile, tokens, TRUE, &sum,
+ 1.0, err)) {
+ if (*err) {
+ msg_info ("learn failed for message <%s>, learn error: %s", task->message_id, (*err)->message);
+ return FALSE;
+ }
+ else {
+ g_set_error (err, filter_error_quark(), 4, "Learn failed, unknown learn classifier error");
+ msg_info ("learn failed for message <%s>, unknown learn error", task->message_id);
+ return FALSE;
+ }
+ }
+ /* Increase statistics */
+ task->worker->srv->stat->messages_learned++;
+
+ maybe_write_binlog (cl, st, stf, tokens);
+ msg_info ("learn success for message <%s>, for statfile: %s, sum weight: %.2f",
+ task->message_id, statfile, sum);
+ statfile_pool_plan_invalidate (task->worker->srv->statfile_pool,
+ DEFAULT_STATFILE_INVALIDATE_TIME,
+ DEFAULT_STATFILE_INVALIDATE_JITTER);
+
+ return TRUE;
+}
/*
* vi:ts=4