From a31ea02deb31a3ec805a795a656ca62065fc38b0 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 2 Jun 2015 12:37:22 +0100 Subject: [PATCH] Reorganize structure of filter components. --- src/libmime/filter.c | 418 ----------------------------------- src/libmime/filter.h | 44 ---- src/libserver/CMakeLists.txt | 1 + src/libserver/cfg_rcl.c | 4 +- src/libserver/composites.c | 335 ++++++++++++++++++++++++++++ src/libserver/composites.h | 50 +++++ src/libserver/task.c | 89 ++++++++ src/libserver/task.h | 12 + src/lua/lua_cfg_file.c | 1 + src/lua/lua_config.c | 1 + 10 files changed, 491 insertions(+), 464 deletions(-) create mode 100644 src/libserver/composites.c create mode 100644 src/libserver/composites.h diff --git a/src/libmime/filter.c b/src/libmime/filter.c index ddab0e81b..a9089818c 100644 --- a/src/libmime/filter.c +++ b/src/libmime/filter.c @@ -40,25 +40,6 @@ #define COMMON_PART_FACTOR 95 -static rspamd_expression_atom_t * rspamd_composite_expr_parse (const gchar *line, gsize len, - rspamd_mempool_t *pool, gpointer ud, GError **err); -static gint rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom); -static gint rspamd_composite_expr_priority (rspamd_expression_atom_t *atom); -static void rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom); - -const struct rspamd_atom_subr composite_expr_subr = { - .parse = rspamd_composite_expr_parse, - .process = rspamd_composite_expr_process, - .priority = rspamd_composite_expr_priority, - .destroy = rspamd_composite_expr_destroy -}; - -static inline GQuark -filter_error_quark (void) -{ - return g_quark_from_static_string ("g-filter-error-quark"); -} - struct metric_result * rspamd_create_metric_result (struct rspamd_task *task, const gchar *name) { @@ -343,392 +324,6 @@ check_metric_settings (struct rspamd_task *task, struct metric *metric, return FALSE; } -/* Return true if metric has score that is more than spam score for it */ -static gboolean -check_metric_is_spam (struct rspamd_task *task, struct metric *metric) -{ - struct metric_result *res; - double ms; - - /* Avoid concurrency while checking results */ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - g_static_mutex_lock (&result_mtx); -#else - G_LOCK (result_mtx); -#endif - res = g_hash_table_lookup (task->results, metric->name); - if (res) { -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - g_static_mutex_unlock (&result_mtx); -#else - G_UNLOCK (result_mtx); -#endif - if (!check_metric_settings (task, metric, &ms)) { - ms = metric->actions[METRIC_ACTION_REJECT].score; - } - return (ms > 0 && res->score >= ms); - } - -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - g_static_mutex_unlock (&result_mtx); -#else - G_UNLOCK (result_mtx); -#endif - - return FALSE; -} - -gint -rspamd_process_filters (struct rspamd_task *task) -{ - GList *cur; - struct metric *metric; - gpointer item = NULL; - - /* Insert default metric to be sure that it exists all the time */ - rspamd_create_metric_result (task, DEFAULT_METRIC); - if (task->settings) { - const ucl_object_t *wl; - - wl = ucl_object_find_key (task->settings, "whitelist"); - if (wl != NULL) { - msg_info ("<%s> is whitelisted", task->message_id); - task->flags |= RSPAMD_TASK_FLAG_SKIP; - return 0; - } - } - - /* Process metrics symbols */ - while (rspamd_symbols_cache_process_symbol (task, task->cfg->cache, &item)) { - /* Check reject actions */ - cur = task->cfg->metrics_list; - while (cur) { - metric = cur->data; - if (!(task->flags & RSPAMD_TASK_FLAG_PASS_ALL) && - metric->actions[METRIC_ACTION_REJECT].score > 0 && - check_metric_is_spam (task, metric)) { - msg_info ("<%s> has already scored more than %.2f, so do not " - "plan any more checks", task->message_id, - metric->actions[METRIC_ACTION_REJECT].score); - return 1; - } - cur = g_list_next (cur); - } - } - - if (rspamd_session_events_pending (task->s) != 0) { - task->state = WAIT_FILTER; - } - - return 1; -} - - -struct composites_data { - struct rspamd_task *task; - struct rspamd_composite *composite; - struct metric_result *metric_res; - GTree *symbols_to_remove; - guint8 *checked; -}; - -struct symbol_remove_data { - struct symbol *ms; - gboolean remove_weight; - gboolean remove_symbol; - GList *comp; -}; - - -/* - * Composites are just sequences of symbols - */ -static rspamd_expression_atom_t * -rspamd_composite_expr_parse (const gchar *line, gsize len, - rspamd_mempool_t *pool, gpointer ud, GError **err) -{ - gsize clen; - rspamd_expression_atom_t *res; - - clen = strcspn (line, ", \t()><+!|&\n"); - if (clen == 0) { - /* Invalid composite atom */ - g_set_error (err, filter_error_quark (), 100, "Invalid composite: %s", - line); - return NULL; - } - - res = rspamd_mempool_alloc0 (pool, sizeof (*res)); - res->len = clen; - res->str = line; - res->data = rspamd_mempool_alloc (pool, clen + 1); - rspamd_strlcpy (res->data, line, clen + 1); - - return res; -} - -static gint -rspamd_composite_process_single_symbol (struct composites_data *cd, - const gchar *sym, struct symbol **pms) -{ - struct symbol *ms = NULL; - gint rc = 0; - struct rspamd_composite *ncomp; - - if ((ms = g_hash_table_lookup (cd->metric_res->symbols, sym)) == NULL) { - if ((ncomp = - g_hash_table_lookup (cd->task->cfg->composite_symbols, - sym)) != NULL) { - /* Set checked for this symbol to avoid cyclic references */ - if (isclr (cd->checked, ncomp->id * 2)) { - setbit (cd->checked, cd->composite->id * 2); - rc = rspamd_process_expression (ncomp->expr, - RSPAMD_EXPRESSION_FLAG_NOOPT, cd); - clrbit (cd->checked, cd->composite->id * 2); - - if (rc) { - setbit (cd->checked, ncomp->id * 2 + 1); - } - setbit (cd->checked, ncomp->id * 2); - - ms = g_hash_table_lookup (cd->metric_res->symbols, sym); - } - else { - /* - * XXX: in case of cyclic references this would return 0 - */ - rc = isset (cd->checked, ncomp->id * 2 + 1); - } - } - } - else { - rc = 1; - } - - *pms = ms; - return rc; -} - -static gint -rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom) -{ - struct composites_data *cd = (struct composites_data *)input; - const gchar *sym = atom->data; - struct symbol_remove_data *rd; - struct symbol *ms; - struct rspamd_symbols_group *gr; - struct rspamd_symbol_def *sdef; - gint rc = 0; - gchar t = '\0'; - - if (isset (cd->checked, cd->composite->id * 2)) { - /* We have already checked this composite, so just return its value */ - rc = isset (cd->checked, cd->composite->id * 2 + 1); - return rc; - } - - if (*sym == '~' || *sym == '-') { - t = *sym ++; - } - - if (strncmp (sym, "g:", 2) == 0) { - gr = g_hash_table_lookup (cd->task->cfg->symbols_groups, sym + 2); - - if (gr != NULL) { - LL_FOREACH (gr->symbols, sdef) { - rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms); - if (rc) { - break; - } - } - } - } - else { - rc = rspamd_composite_process_single_symbol (cd, sym, &ms); - } - - if (rc && ms) { - /* - * At this point we know that we need to do something about this symbol, - * however, we don't know whether we need to delete it unfortunately, - * that depends on the later decisions when the complete expression is - * evaluated. - */ - if ((rd = g_tree_lookup (cd->symbols_to_remove, ms->name)) == NULL) { - rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*rd)); - rd->ms = ms; - - if (G_UNLIKELY (t == '~')) { - rd->remove_weight = FALSE; - rd->remove_symbol = TRUE; - } - else if (G_UNLIKELY (t == '-')) { - rd->remove_symbol = FALSE; - rd->remove_weight = FALSE; - } - else { - rd->remove_symbol = TRUE; - rd->remove_weight = TRUE; - } - - rd->comp = g_list_prepend (NULL, cd->composite); - g_tree_insert (cd->symbols_to_remove, - (gpointer)ms->name, - rd); - } - else { - /* - * XXX: what if we have different preferences regarding - * weight and symbol removal in different composites? - */ - rd->comp = g_list_prepend (rd->comp, cd->composite); - } - } - - return rc; -} - -/* - * We don't have preferences for composites - */ -static gint -rspamd_composite_expr_priority (rspamd_expression_atom_t *atom) -{ - return 0; -} - -static void -rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom) -{ - /* Composite atoms are destroyed just with the pool */ -} - -static gint -remove_compare_data (gconstpointer a, gconstpointer b) -{ - const gchar *ca = a, *cb = b; - - return strcmp (ca, cb); -} - -static void -composites_foreach_callback (gpointer key, gpointer value, void *data) -{ - struct composites_data *cd = data; - struct rspamd_composite *comp = value; - gint rc; - - cd->composite = comp; - - rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT, cd); - - /* Checked bit */ - setbit (cd->checked, comp->id * 2); - - /* Result bit */ - if (rc) { - setbit (cd->checked, comp->id * 2 + 1); - rspamd_task_insert_result_single (cd->task, key, 1.0, NULL); - } - else { - clrbit (cd->checked, comp->id * 2 + 1); - } -} - - -static gboolean -composites_remove_symbols (gpointer key, gpointer value, gpointer data) -{ - struct composites_data *cd = data; - struct symbol_remove_data *rd = value; - GList *cur; - struct rspamd_composite *comp; - gboolean matched = FALSE; - - cur = rd->comp; - - /* - * XXX: actually, this is a weak assumption as we are unaware here about - * negate operation and so on. We need to parse AST directly and remove - * only those symbols that could be removed. - */ - while (cur) { - comp = cur->data; - - if (isset (cd->checked, comp->id * 2 + 1)) { - matched = TRUE; - break; - } - - cur = g_list_next (cur); - } - - g_list_free (rd->comp); - - if (matched) { - if (rd->remove_symbol) { - g_hash_table_remove (cd->metric_res->symbols, key); - } - if (rd->remove_weight) { - cd->metric_res->score -= rd->ms->score; - } - } - - return FALSE; -} - -static void -composites_metric_callback (gpointer key, gpointer value, gpointer data) -{ - struct rspamd_task *task = (struct rspamd_task *)data; - struct composites_data *cd = - rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data)); - struct metric_result *metric_res = (struct metric_result *)value; - - cd->task = task; - cd->metric_res = (struct metric_result *)metric_res; - cd->symbols_to_remove = g_tree_new (remove_compare_data); - cd->checked = - rspamd_mempool_alloc0 (task->task_pool, - NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2)); - - /* Process hash table */ - g_hash_table_foreach (task->cfg->composite_symbols, - composites_foreach_callback, - cd); - - /* Remove symbols that are in composites */ - g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd); - /* Free list */ - g_tree_destroy (cd->symbols_to_remove); -} - -void -rspamd_make_composites (struct rspamd_task *task) -{ - g_hash_table_foreach (task->results, composites_metric_callback, task); -} - -struct classifiers_cbdata { - struct rspamd_task *task; - struct lua_locked_state *nL; -}; - - -void -rspamd_process_statistics (struct rspamd_task *task) -{ - if (RSPAMD_TASK_IS_SKIPPED (task)) { - return; - } - - /* TODO: handle err here */ - rspamd_stat_classify (task, task->cfg->lua_state, NULL); - - /* Process results */ - rspamd_make_composites (task); -} - static void insert_metric_header (gpointer metric_name, gpointer metric_value, gpointer data) @@ -910,16 +505,3 @@ rspamd_check_action_metric (struct rspamd_task *task, return METRIC_ACTION_NOACTION; } - -gboolean -rspamd_learn_task_spam (struct rspamd_classifier_config *cl, - struct rspamd_task *task, - gboolean is_spam, - GError **err) -{ - return rspamd_stat_learn (task, is_spam, task->cfg->lua_state, err); -} - -/* - * vi:ts=4 - */ diff --git a/src/libmime/filter.h b/src/libmime/filter.h index 67dc60010..d0a3d17ec 100644 --- a/src/libmime/filter.h +++ b/src/libmime/filter.h @@ -73,19 +73,6 @@ struct metric_result { double grow_factor; /**< current grow factor */ }; - -/** - * Subr for composite expressions - */ -extern const struct rspamd_atom_subr composite_expr_subr; -/** - * Composite structure - */ -struct rspamd_composite { - struct rspamd_expression *expr; - gint id; -}; - /** * Create or return existing result for the specified metric name * @param task task object @@ -95,19 +82,6 @@ struct rspamd_composite { struct metric_result * rspamd_create_metric_result (struct rspamd_task *task, const gchar *name); -/** - * Process all filters - * @param task worker's task that present message from user - * @return 0 - if there is non-finished tasks and 1 if processing is completed - */ -gint rspamd_process_filters (struct rspamd_task *task); - -/** - * Process message with statfiles - * @param task worker's task that present message from user - */ -void rspamd_process_statistics (struct rspamd_task *task); - /** * Insert a result to task * @param task worker's task that present message from user @@ -134,12 +108,6 @@ void rspamd_task_insert_result_single (struct rspamd_task *task, double flag, GList *opts); -/** - * Process all results and form composite metrics from existent metrics as it is defined in config - * @param task worker's task that present message from user - */ -void rspamd_make_composites (struct rspamd_task *task); - /** * Default consolidation function for metric, it get all symbols and multiply symbol * weight by some factor that is specified in config. Default factor is 1. @@ -152,18 +120,6 @@ double rspamd_factor_consolidation_func (struct rspamd_task *task, const gchar *unused); -/** - * Learn specified statfile with message in a task - * @param statfile symbol of statfile - * @param task worker's task object - * @param err pointer to GError - * @return true if learn succeed - */ -gboolean rspamd_learn_task_spam (struct rspamd_classifier_config *cl, - struct rspamd_task *task, - gboolean is_spam, - GError **err); - /* * Get action from a string */ diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt index 4edbe054d..1f8df6c13 100644 --- a/src/libserver/CMakeLists.txt +++ b/src/libserver/CMakeLists.txt @@ -3,6 +3,7 @@ SET(LIBRSPAMDSERVERSRC ${CMAKE_CURRENT_SOURCE_DIR}/buffer.c ${CMAKE_CURRENT_SOURCE_DIR}/cfg_utils.c ${CMAKE_CURRENT_SOURCE_DIR}/cfg_rcl.c + ${CMAKE_CURRENT_SOURCE_DIR}/composites.c ${CMAKE_CURRENT_SOURCE_DIR}/dkim.c ${CMAKE_CURRENT_SOURCE_DIR}/dns.c ${CMAKE_CURRENT_SOURCE_DIR}/dynamic_cfg.c diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index 314be10d0..fc8ada749 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2013, Vsevolod Stakhov +/* Copyright (c) 2013-2015, Vsevolod Stakhov * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,7 +28,7 @@ #include "cfg_file.h" #include "lua/lua_common.h" #include "expression.h" - +#include "composites.h" struct rspamd_rcl_default_handler_data { struct rspamd_rcl_struct_parser pd; diff --git a/src/libserver/composites.c b/src/libserver/composites.c new file mode 100644 index 000000000..b6d2fb863 --- /dev/null +++ b/src/libserver/composites.c @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "logger.h" +#include "expression.h" +#include "task.h" +#include "utlist.h" +#include "filter.h" +#include "composites.h" + +struct composites_data { + struct rspamd_task *task; + struct rspamd_composite *composite; + struct metric_result *metric_res; + GTree *symbols_to_remove; + guint8 *checked; +}; + +struct symbol_remove_data { + struct symbol *ms; + gboolean remove_weight; + gboolean remove_symbol; + GList *comp; +}; + +static rspamd_expression_atom_t * rspamd_composite_expr_parse (const gchar *line, gsize len, + rspamd_mempool_t *pool, gpointer ud, GError **err); +static gint rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom); +static gint rspamd_composite_expr_priority (rspamd_expression_atom_t *atom); +static void rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom); + +const struct rspamd_atom_subr composite_expr_subr = { + .parse = rspamd_composite_expr_parse, + .process = rspamd_composite_expr_process, + .priority = rspamd_composite_expr_priority, + .destroy = rspamd_composite_expr_destroy +}; + +static GQuark +rspamd_composites_quark (void) +{ + return g_quark_from_static_string ("composites"); +} + +static rspamd_expression_atom_t * +rspamd_composite_expr_parse (const gchar *line, gsize len, + rspamd_mempool_t *pool, gpointer ud, GError **err) +{ + gsize clen; + rspamd_expression_atom_t *res; + + /* + * Composites are just sequences of symbols + */ + clen = strcspn (line, ", \t()><+!|&\n"); + if (clen == 0) { + /* Invalid composite atom */ + g_set_error (err, rspamd_composites_quark (), 100, "Invalid composite: %s", + line); + return NULL; + } + + res = rspamd_mempool_alloc0 (pool, sizeof (*res)); + res->len = clen; + res->str = line; + res->data = rspamd_mempool_alloc (pool, clen + 1); + rspamd_strlcpy (res->data, line, clen + 1); + + return res; +} + +static gint +rspamd_composite_process_single_symbol (struct composites_data *cd, + const gchar *sym, struct symbol **pms) +{ + struct symbol *ms = NULL; + gint rc = 0; + struct rspamd_composite *ncomp; + + if ((ms = g_hash_table_lookup (cd->metric_res->symbols, sym)) == NULL) { + if ((ncomp = + g_hash_table_lookup (cd->task->cfg->composite_symbols, + sym)) != NULL) { + /* Set checked for this symbol to avoid cyclic references */ + if (isclr (cd->checked, ncomp->id * 2)) { + setbit (cd->checked, cd->composite->id * 2); + rc = rspamd_process_expression (ncomp->expr, + RSPAMD_EXPRESSION_FLAG_NOOPT, cd); + clrbit (cd->checked, cd->composite->id * 2); + + if (rc) { + setbit (cd->checked, ncomp->id * 2 + 1); + } + setbit (cd->checked, ncomp->id * 2); + + ms = g_hash_table_lookup (cd->metric_res->symbols, sym); + } + else { + /* + * XXX: in case of cyclic references this would return 0 + */ + rc = isset (cd->checked, ncomp->id * 2 + 1); + } + } + } + else { + rc = 1; + } + + *pms = ms; + return rc; +} + +static gint +rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom) +{ + struct composites_data *cd = (struct composites_data *)input; + const gchar *sym = atom->data; + struct symbol_remove_data *rd; + struct symbol *ms; + struct rspamd_symbols_group *gr; + struct rspamd_symbol_def *sdef; + gint rc = 0; + gchar t = '\0'; + + if (isset (cd->checked, cd->composite->id * 2)) { + /* We have already checked this composite, so just return its value */ + rc = isset (cd->checked, cd->composite->id * 2 + 1); + return rc; + } + + if (*sym == '~' || *sym == '-') { + t = *sym ++; + } + + if (strncmp (sym, "g:", 2) == 0) { + gr = g_hash_table_lookup (cd->task->cfg->symbols_groups, sym + 2); + + if (gr != NULL) { + LL_FOREACH (gr->symbols, sdef) { + rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms); + if (rc) { + break; + } + } + } + } + else { + rc = rspamd_composite_process_single_symbol (cd, sym, &ms); + } + + if (rc && ms) { + /* + * At this point we know that we need to do something about this symbol, + * however, we don't know whether we need to delete it unfortunately, + * that depends on the later decisions when the complete expression is + * evaluated. + */ + if ((rd = g_tree_lookup (cd->symbols_to_remove, ms->name)) == NULL) { + rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*rd)); + rd->ms = ms; + + if (G_UNLIKELY (t == '~')) { + rd->remove_weight = FALSE; + rd->remove_symbol = TRUE; + } + else if (G_UNLIKELY (t == '-')) { + rd->remove_symbol = FALSE; + rd->remove_weight = FALSE; + } + else { + rd->remove_symbol = TRUE; + rd->remove_weight = TRUE; + } + + rd->comp = g_list_prepend (NULL, cd->composite); + g_tree_insert (cd->symbols_to_remove, + (gpointer)ms->name, + rd); + } + else { + /* + * XXX: what if we have different preferences regarding + * weight and symbol removal in different composites? + */ + rd->comp = g_list_prepend (rd->comp, cd->composite); + } + } + + return rc; +} + +/* + * We don't have preferences for composites + */ +static gint +rspamd_composite_expr_priority (rspamd_expression_atom_t *atom) +{ + return 0; +} + +static void +rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom) +{ + /* Composite atoms are destroyed just with the pool */ +} + +static gint +remove_compare_data (gconstpointer a, gconstpointer b) +{ + const gchar *ca = a, *cb = b; + + return strcmp (ca, cb); +} + +static void +composites_foreach_callback (gpointer key, gpointer value, void *data) +{ + struct composites_data *cd = data; + struct rspamd_composite *comp = value; + gint rc; + + cd->composite = comp; + + rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT, cd); + + /* Checked bit */ + setbit (cd->checked, comp->id * 2); + + /* Result bit */ + if (rc) { + setbit (cd->checked, comp->id * 2 + 1); + rspamd_task_insert_result_single (cd->task, key, 1.0, NULL); + } + else { + clrbit (cd->checked, comp->id * 2 + 1); + } +} + + +static gboolean +composites_remove_symbols (gpointer key, gpointer value, gpointer data) +{ + struct composites_data *cd = data; + struct symbol_remove_data *rd = value; + GList *cur; + struct rspamd_composite *comp; + gboolean matched = FALSE; + + cur = rd->comp; + + /* + * XXX: actually, this is a weak assumption as we are unaware here about + * negate operation and so on. We need to parse AST directly and remove + * only those symbols that could be removed. + */ + while (cur) { + comp = cur->data; + + if (isset (cd->checked, comp->id * 2 + 1)) { + matched = TRUE; + break; + } + + cur = g_list_next (cur); + } + + g_list_free (rd->comp); + + if (matched) { + if (rd->remove_symbol) { + g_hash_table_remove (cd->metric_res->symbols, key); + } + if (rd->remove_weight) { + cd->metric_res->score -= rd->ms->score; + } + } + + return FALSE; +} + +static void +composites_metric_callback (gpointer key, gpointer value, gpointer data) +{ + struct rspamd_task *task = (struct rspamd_task *)data; + struct composites_data *cd = + rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data)); + struct metric_result *metric_res = (struct metric_result *)value; + + cd->task = task; + cd->metric_res = (struct metric_result *)metric_res; + cd->symbols_to_remove = g_tree_new (remove_compare_data); + cd->checked = + rspamd_mempool_alloc0 (task->task_pool, + NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2)); + + /* Process hash table */ + g_hash_table_foreach (task->cfg->composite_symbols, + composites_foreach_callback, + cd); + + /* Remove symbols that are in composites */ + g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd); + /* Free list */ + g_tree_destroy (cd->symbols_to_remove); +} + +void +rspamd_make_composites (struct rspamd_task *task) +{ + g_hash_table_foreach (task->results, composites_metric_callback, task); +} diff --git a/src/libserver/composites.h b/src/libserver/composites.h new file mode 100644 index 000000000..fdcfe8241 --- /dev/null +++ b/src/libserver/composites.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SRC_LIBSERVER_COMPOSITES_H_ +#define SRC_LIBSERVER_COMPOSITES_H_ + +#include "config.h" + +struct rspamd_task; + +/** + * Subr for composite expressions + */ +extern const struct rspamd_atom_subr composite_expr_subr; +/** + * Composite structure + */ +struct rspamd_composite { + struct rspamd_expression *expr; + gint id; +}; + +/** + * Process all results and form composite metrics from existent metrics as it is defined in config + * @param task worker's task that present message from user + */ +void rspamd_make_composites (struct rspamd_task *task); + +#endif /* SRC_LIBSERVER_COMPOSITES_H_ */ diff --git a/src/libserver/task.c b/src/libserver/task.c index ba4f0c1bd..70da55afe 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -492,3 +492,92 @@ rspamd_task_re_cache_check (struct rspamd_task *task, const gchar *re) return ret; } + +gboolean +rspamd_learn_task_spam (struct rspamd_classifier_config *cl, + struct rspamd_task *task, + gboolean is_spam, + GError **err) +{ + return rspamd_stat_learn (task, is_spam, task->cfg->lua_state, err); +} + +/* Return true if metric has score that is more than spam score for it */ +static gboolean +check_metric_is_spam (struct rspamd_task *task, struct metric *metric) +{ + struct metric_result *res; + double ms; + + /* Avoid concurrency while checking results */ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + g_static_mutex_lock (&result_mtx); +#else + G_LOCK (result_mtx); +#endif + res = g_hash_table_lookup (task->results, metric->name); + if (res) { +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + g_static_mutex_unlock (&result_mtx); +#else + G_UNLOCK (result_mtx); +#endif + if (!check_metric_settings (task, metric, &ms)) { + ms = metric->actions[METRIC_ACTION_REJECT].score; + } + return (ms > 0 && res->score >= ms); + } + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + g_static_mutex_unlock (&result_mtx); +#else + G_UNLOCK (result_mtx); +#endif + + return FALSE; +} + +gint +rspamd_process_filters (struct rspamd_task *task) +{ + GList *cur; + struct metric *metric; + gpointer item = NULL; + + /* Insert default metric to be sure that it exists all the time */ + rspamd_create_metric_result (task, DEFAULT_METRIC); + if (task->settings) { + const ucl_object_t *wl; + + wl = ucl_object_find_key (task->settings, "whitelist"); + if (wl != NULL) { + msg_info ("<%s> is whitelisted", task->message_id); + task->flags |= RSPAMD_TASK_FLAG_SKIP; + return 0; + } + } + + /* Process metrics symbols */ + while (rspamd_symbols_cache_process_symbol (task, task->cfg->cache, &item)) { + /* Check reject actions */ + cur = task->cfg->metrics_list; + while (cur) { + metric = cur->data; + if (!(task->flags & RSPAMD_TASK_FLAG_PASS_ALL) && + metric->actions[METRIC_ACTION_REJECT].score > 0 && + check_metric_is_spam (task, metric)) { + msg_info ("<%s> has already scored more than %.2f, so do not " + "plan any more checks", task->message_id, + metric->actions[METRIC_ACTION_REJECT].score); + return 1; + } + cur = g_list_next (cur); + } + } + + if (rspamd_session_events_pending (task->s) != 0) { + task->state = WAIT_FILTER; + } + + return 1; +} diff --git a/src/libserver/task.h b/src/libserver/task.h index 5ce24176c..45e720eb8 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -239,4 +239,16 @@ guint rspamd_task_re_cache_add (struct rspamd_task *task, const gchar *re, */ guint rspamd_task_re_cache_check (struct rspamd_task *task, const gchar *re); +/** + * Learn specified statfile with message in a task + * @param statfile symbol of statfile + * @param task worker's task object + * @param err pointer to GError + * @return true if learn succeed + */ +gboolean rspamd_learn_task_spam (struct rspamd_classifier_config *cl, + struct rspamd_task *task, + gboolean is_spam, + GError **err); + #endif /* TASK_H_ */ diff --git a/src/lua/lua_cfg_file.c b/src/lua/lua_cfg_file.c index 047a5119b..6e49d9512 100644 --- a/src/lua/lua_cfg_file.c +++ b/src/lua/lua_cfg_file.c @@ -26,6 +26,7 @@ #include "symbols_cache.h" #include "expression.h" #include "filter.h" +#include "composites.h" #ifdef HAVE_SYS_UTSNAME_H #include #endif diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c index 410595d08..8262f51ea 100644 --- a/src/lua/lua_config.c +++ b/src/lua/lua_config.c @@ -28,6 +28,7 @@ #include "message.h" #include "radix.h" #include "expression.h" +#include "composites.h" #include "utlist.h" /*** -- 2.39.5