From: Vsevolod Stakhov Date: Thu, 15 Jul 2021 09:54:08 +0000 (+0100) Subject: [Rework] Composites: Start rework of the composites framework X-Git-Tag: 3.0~159 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=b2c36feea701c6685d83b8c4e6282fe7e307609d;p=rspamd.git [Rework] Composites: Start rework of the composites framework --- diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt index 189e9fe03..75fad36ac 100644 --- a/src/libserver/CMakeLists.txt +++ b/src/libserver/CMakeLists.txt @@ -3,7 +3,7 @@ ADD_SUBDIRECTORY(css) SET(LIBRSPAMDSERVERSRC ${CMAKE_CURRENT_SOURCE_DIR}/cfg_utils.c ${CMAKE_CURRENT_SOURCE_DIR}/cfg_rcl.c - ${CMAKE_CURRENT_SOURCE_DIR}/composites.c + ${CMAKE_CURRENT_SOURCE_DIR}/composites/composites.cxx ${CMAKE_CURRENT_SOURCE_DIR}/dkim.c ${CMAKE_CURRENT_SOURCE_DIR}/dns.c ${CMAKE_CURRENT_SOURCE_DIR}/dynamic_cfg.c diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index 73b9a3b1d..68b94abfe 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -20,7 +20,7 @@ #include "cfg_file.h" #include "lua/lua_common.h" #include "expression.h" -#include "composites.h" +#include "src/libserver/composites/composites.h" #include "libserver/worker_util.h" #include "unix-std.h" #include "cryptobox.h" diff --git a/src/libserver/composites.c b/src/libserver/composites.c deleted file mode 100644 index 6f3e8a7b0..000000000 --- a/src/libserver/composites.c +++ /dev/null @@ -1,989 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "config.h" -#include "logger.h" -#include "expression.h" -#include "task.h" -#include "utlist.h" -#include "scan_result.h" -#include "composites.h" - -#include - -#define msg_err_composites(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ - "composites", task->task_pool->tag.uid, \ - G_STRFUNC, \ - __VA_ARGS__) -#define msg_warn_composites(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \ - "composites", task->task_pool->tag.uid, \ - G_STRFUNC, \ - __VA_ARGS__) -#define msg_info_composites(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \ - "composites", task->task_pool->tag.uid, \ - G_STRFUNC, \ - __VA_ARGS__) - -#define msg_debug_composites(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \ - rspamd_composites_log_id, "composites", task->task_pool->tag.uid, \ - G_STRFUNC, \ - __VA_ARGS__) - -INIT_LOG_MODULE(composites) - -struct composites_data { - struct rspamd_task *task; - struct rspamd_composite *composite; - struct rspamd_scan_result *metric_res; - GHashTable *symbols_to_remove; - guint8 *checked; - struct composites_data *next; -}; - -struct rspamd_composite_option_match { - enum { - RSPAMD_COMPOSITE_OPTION_PLAIN, - RSPAMD_COMPOSITE_OPTION_RE - } type; - - union { - rspamd_regexp_t *re; - gchar *match; - } data; - struct rspamd_composite_option_match *prev, *next; -}; - -struct rspamd_composite_atom { - gchar *symbol; - enum { - ATOM_UNKNOWN, - ATOM_COMPOSITE, - ATOM_PLAIN - } comp_type; - - struct rspamd_composite *ncomp; /* underlying composite */ - struct rspamd_composite_option_match *opts; -}; - -enum rspamd_composite_action { - RSPAMD_COMPOSITE_UNTOUCH = 0, - RSPAMD_COMPOSITE_REMOVE_SYMBOL = (1 << 0), - RSPAMD_COMPOSITE_REMOVE_WEIGHT = (1 << 1), - RSPAMD_COMPOSITE_REMOVE_FORCED = (1 << 2) -}; - -struct symbol_remove_data { - const gchar *sym; - struct rspamd_composite *comp; - GNode *parent; - guint action; - struct symbol_remove_data *prev, *next; -}; - -static rspamd_expression_atom_t * rspamd_composite_expr_parse (const gchar *line, gsize len, - rspamd_mempool_t *pool, gpointer ud, GError **err); -static gdouble rspamd_composite_expr_process (void *ud, rspamd_expression_atom_t *atom); -static gint rspamd_composite_expr_priority (rspamd_expression_atom_t *atom); -static void rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom); -static void composites_foreach_callback (gpointer key, gpointer value, void *data); - -const struct rspamd_atom_subr composite_expr_subr = { - .parse = rspamd_composite_expr_parse, - .process = rspamd_composite_expr_process, - .priority = rspamd_composite_expr_priority, - .destroy = rspamd_composite_expr_destroy -}; - -static GQuark -rspamd_composites_quark (void) -{ - return g_quark_from_static_string ("composites"); -} - -static rspamd_expression_atom_t * -rspamd_composite_expr_parse (const gchar *line, gsize len, - rspamd_mempool_t *pool, gpointer ud, GError **err) -{ - gsize clen = 0; - rspamd_expression_atom_t *res; - struct rspamd_composite_atom *atom; - const gchar *p, *end; - enum composite_expr_state { - comp_state_read_symbol = 0, - comp_state_read_obrace, - comp_state_read_option, - comp_state_read_regexp, - comp_state_read_regexp_end, - comp_state_read_comma, - comp_state_read_ebrace, - comp_state_read_end - } state = comp_state_read_symbol; - - end = line + len; - p = line; - - /* Find length of the atom using a reduced state machine */ - while (p < end) { - if (state == comp_state_read_end) { - break; - } - - switch (state) { - case comp_state_read_symbol: - clen = rspamd_memcspn (p, "[; \t()>comp_type = ATOM_UNKNOWN; - res = rspamd_mempool_alloc0 (pool, sizeof (*res)); - res->len = clen; - res->str = line; - - /* Full state machine to fill a composite atom */ - const gchar *opt_start = NULL; - - while (p < end) { - struct rspamd_composite_option_match *opt_match; - - if (state == comp_state_read_end) { - break; - } - - switch (state) { - case comp_state_read_symbol: - clen = rspamd_memcspn (p, "[; \t()>symbol = rspamd_mempool_alloc (pool, clen + 1); - rspamd_strlcpy (atom->symbol, line, clen + 1); - - break; - case comp_state_read_obrace: - p ++; - - if (*p == '/') { - opt_start = p; - p ++; /* Starting slash */ - state = comp_state_read_regexp; - } - else { - state = comp_state_read_option; - opt_start = p; - } - - break; - case comp_state_read_regexp: - if (*p == '\\' && p + 1 < end) { - /* Escaping */ - p ++; - } - else if (*p == '/') { - /* End of regexp, possible flags */ - state = comp_state_read_regexp_end; - } - p ++; - break; - case comp_state_read_option: - if (*p == ',' || *p == ']') { - opt_match = rspamd_mempool_alloc (pool, sizeof (*opt_match)); - /* Plain match */ - gchar *opt_buf; - gint opt_len = p - opt_start; - - opt_buf = rspamd_mempool_alloc (pool, opt_len + 1); - rspamd_strlcpy (opt_buf, opt_start, opt_len + 1); - - opt_match->data.match = opt_buf; - opt_match->type = RSPAMD_COMPOSITE_OPTION_PLAIN; - - DL_APPEND (atom->opts, opt_match); - - if (*p == ',') { - p++; - state = comp_state_read_comma; - } - else { - state = comp_state_read_ebrace; - } - } - else { - p ++; - } - break; - case comp_state_read_regexp_end: - if (*p == ',' || *p == ']') { - opt_match = rspamd_mempool_alloc (pool, sizeof (*opt_match)); - /* Plain match */ - gchar *opt_buf; - gint opt_len = p - opt_start; - - opt_buf = rspamd_mempool_alloc (pool, opt_len + 1); - rspamd_strlcpy (opt_buf, opt_start, opt_len + 1); - - rspamd_regexp_t *re; - GError *re_err = NULL; - - re = rspamd_regexp_new (opt_buf, NULL, &re_err); - - if (re == NULL) { - msg_err_pool ("cannot create regexp from string %s: %e", - opt_buf, re_err); - - g_error_free (re_err); - } - else { - rspamd_mempool_add_destructor (pool, - (rspamd_mempool_destruct_t)rspamd_regexp_unref, - re); - opt_match->data.re = re; - opt_match->type = RSPAMD_COMPOSITE_OPTION_RE; - - DL_APPEND (atom->opts, opt_match); - } - - if (*p == ',') { - p++; - state = comp_state_read_comma; - } - else { - state = comp_state_read_ebrace; - } - } - else { - p ++; - } - break; - case comp_state_read_comma: - if (!g_ascii_isspace (*p)) { - if (*p == '/') { - state = comp_state_read_regexp; - opt_start = p; - } - else if (*p == ']') { - state = comp_state_read_ebrace; - } - else { - opt_start = p; - state = comp_state_read_option; - } - } - else { - /* Skip spaces after comma */ - p ++; - } - break; - case comp_state_read_ebrace: - p ++; - state = comp_state_read_end; - break; - case comp_state_read_end: - g_assert_not_reached (); - } - } - - res->data = atom; - - return res; -} - -static gdouble -rspamd_composite_process_single_symbol (struct composites_data *cd, - const gchar *sym, - struct rspamd_symbol_result **pms, - struct rspamd_composite_atom *atom) -{ - struct rspamd_symbol_result *ms = NULL; - gdouble rc = 0; - struct rspamd_task *task = cd->task; - - if ((ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res)) == NULL) { - msg_debug_composites ("not found symbol %s in composite %s", sym, - cd->composite->sym); - - if (atom->comp_type == ATOM_UNKNOWN) { - struct rspamd_composite *ncomp; - - if ((ncomp = - g_hash_table_lookup (cd->task->cfg->composite_symbols, - sym)) != NULL) { - atom->comp_type = ATOM_COMPOSITE; - atom->ncomp = ncomp; - } - else { - atom->comp_type = ATOM_PLAIN; - } - } - - if (atom->comp_type == ATOM_COMPOSITE) { - msg_debug_composites ("symbol %s for composite %s is another composite", - sym, cd->composite->sym); - - if (isclr (cd->checked, atom->ncomp->id * 2)) { - struct rspamd_composite *saved; - - msg_debug_composites ("composite dependency %s for %s is not checked", - sym, cd->composite->sym); - /* Set checked for this symbol to avoid cyclic references */ - setbit (cd->checked, cd->composite->id * 2); - saved = cd->composite; /* Save the current composite */ - composites_foreach_callback ((gpointer)atom->ncomp->sym, atom->ncomp, cd); - - /* Restore state */ - cd->composite = saved; - clrbit (cd->checked, cd->composite->id * 2); - - ms = rspamd_task_find_symbol_result (cd->task, sym, - cd->metric_res); - } - else { - /* - * XXX: in case of cyclic references this would return 0 - */ - if (isset (cd->checked, atom->ncomp->id * 2 + 1)) { - ms = rspamd_task_find_symbol_result (cd->task, sym, - cd->metric_res); - } - } - } - } - - if (ms) { - msg_debug_composites ("found symbol %s in composite %s, weight: %.3f", - sym, cd->composite->sym, ms->score); - - /* Now check options */ - struct rspamd_composite_option_match *cur_opt; - - DL_FOREACH (atom->opts, cur_opt) { - struct rspamd_symbol_option *opt; - bool found = false; - - DL_FOREACH (ms->opts_head, opt) { - if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) { - gsize mlen = strlen (cur_opt->data.match); - - if (opt->optlen == mlen && - memcmp (opt->option, cur_opt->data.match, mlen) == 0) { - - found = true; - - break; - } - } - else { - if (rspamd_regexp_search (cur_opt->data.re, - opt->option, opt->optlen, NULL, NULL, FALSE, NULL)) { - found = true; - - break; - } - } - } - - - if (!found) { - if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) { - msg_debug_composites ("symbol %s in composite %s misses required option %s", - sym, - cd->composite->sym, - cur_opt->data.match); - } - else { - msg_debug_composites ("symbol %s in composite %s failed to match regexp %s", - sym, - cd->composite->sym, - rspamd_regexp_get_pattern (cur_opt->data.re)); - } - - ms = NULL; - - break; - } - } - - if (ms) { - if (ms->score == 0) { - rc = 0.001; /* Distinguish from 0 */ - } - else { - rc = ms->score; - } - } - } - - *pms = ms; - return rc; -} - -static void -rspamd_composite_process_symbol_removal (rspamd_expression_atom_t *atom, - struct composites_data *cd, - struct rspamd_symbol_result *ms, - const gchar *beg) -{ - gchar t; - struct symbol_remove_data *rd, *nrd; - struct rspamd_task *task = cd->task; - - if (ms == NULL) { - return; - } - - /* - * At this point we know that we need to do something about this symbol, - * however, we don't know whether we need to delete it unfortunately, - * that depends on the later decisions when the complete expression is - * evaluated. - */ - rd = g_hash_table_lookup (cd->symbols_to_remove, ms->name); - - nrd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*nrd)); - nrd->sym = ms->name; - - /* By default remove symbols */ - switch (cd->composite->policy) { - case RSPAMD_COMPOSITE_POLICY_REMOVE_ALL: - default: - nrd->action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL|RSPAMD_COMPOSITE_REMOVE_WEIGHT); - break; - case RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL: - nrd->action = RSPAMD_COMPOSITE_REMOVE_SYMBOL; - break; - case RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT: - nrd->action = RSPAMD_COMPOSITE_REMOVE_WEIGHT; - break; - case RSPAMD_COMPOSITE_POLICY_LEAVE: - nrd->action = 0; - break; - } - - for (;;) { - t = *beg; - - if (t == '~') { - nrd->action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL; - } - else if (t == '-') { - nrd->action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT| - RSPAMD_COMPOSITE_REMOVE_SYMBOL); - } - else if (t == '^') { - nrd->action |= RSPAMD_COMPOSITE_REMOVE_FORCED; - } - else { - break; - } - - beg ++; - } - - nrd->comp = cd->composite; - nrd->parent = atom->parent; - - if (rd == NULL) { - DL_APPEND (rd, nrd); - g_hash_table_insert (cd->symbols_to_remove, (gpointer)ms->name, rd); - msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s", - cd->metric_res->name, - ms->name, nrd->action, - cd->composite->sym); - } - else { - DL_APPEND (rd, nrd); - msg_debug_composites ("%s: append symbol %s to removal: %d policy, from composite %s", - cd->metric_res->name, - ms->name, nrd->action, - cd->composite->sym); - } -} - -static gdouble -rspamd_composite_expr_process (void *ud, - rspamd_expression_atom_t *atom) -{ - static const double epsilon = 0.00001; - struct composites_data *cd = (struct composites_data *)ud; - const gchar *sym = NULL; - struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *)atom->data; - - struct rspamd_symbol_result *ms = NULL; - struct rspamd_symbols_group *gr; - struct rspamd_symbol *sdef; - struct rspamd_task *task = cd->task; - GHashTableIter it; - gpointer k, v; - gdouble rc = 0, max = 0; - - if (isset (cd->checked, cd->composite->id * 2)) { - /* We have already checked this composite, so just return its value */ - if (isset (cd->checked, cd->composite->id * 2 + 1)) { - ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res); - } - - if (ms) { - if (ms->score == 0) { - rc = epsilon; /* Distinguish from 0 */ - } - else { - /* Treat negative and positive scores equally... */ - rc = fabs (ms->score); - } - } - - msg_debug_composites ("composite %s is already checked, result: %.2f", - cd->composite->sym, rc); - - return rc; - } - - sym = comp_atom->symbol; - guint slen = strlen (sym); - - while (*sym != '\0' && !g_ascii_isalnum (*sym)) { - sym ++; - slen --; - } - - if (slen > 2) { - if (G_UNLIKELY (memcmp (sym, "g:", 2) == 0)) { - gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 2); - - if (gr != NULL) { - g_hash_table_iter_init (&it, gr->symbols); - - while (g_hash_table_iter_next (&it, &k, &v)) { - sdef = v; - rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms, - comp_atom); - - if (rc) { - rspamd_composite_process_symbol_removal (atom, - cd, - ms, - comp_atom->symbol); - - if (fabs (rc) > max) { - max = fabs (rc); - } - } - } - } - - rc = max; - } - else if (G_UNLIKELY (memcmp (sym, "g+:", 3) == 0)) { - /* Group, positive symbols only */ - gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 3); - - if (gr != NULL) { - g_hash_table_iter_init (&it, gr->symbols); - - while (g_hash_table_iter_next (&it, &k, &v)) { - sdef = v; - - if (sdef->score > 0) { - rc = rspamd_composite_process_single_symbol (cd, - sdef->name, - &ms, - comp_atom); - - if (rc) { - rspamd_composite_process_symbol_removal (atom, - cd, - ms, - comp_atom->symbol); - - if (fabs (rc) > max) { - max = fabs (rc); - } - } - } - } - - rc = max; - } - } - else if (G_UNLIKELY (memcmp (sym, "g-:", 3) == 0)) { - /* Group, negative symbols only */ - gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 3); - - if (gr != NULL) { - g_hash_table_iter_init (&it, gr->symbols); - - while (g_hash_table_iter_next (&it, &k, &v)) { - sdef = v; - - if (sdef->score < 0) { - rc = rspamd_composite_process_single_symbol (cd, - sdef->name, - &ms, - comp_atom); - - if (rc) { - rspamd_composite_process_symbol_removal (atom, - cd, - ms, - comp_atom->symbol); - - if (fabs (rc) > max) { - max = fabs (rc); - } - } - } - } - - rc = max; - } - } - else { - rc = rspamd_composite_process_single_symbol (cd, sym, &ms, comp_atom); - - if (rc) { - rspamd_composite_process_symbol_removal (atom, - cd, - ms, - comp_atom->symbol); - } - } - } - else { - rc = rspamd_composite_process_single_symbol (cd, sym, &ms, comp_atom); - - if (rc) { - rspamd_composite_process_symbol_removal (atom, - cd, - ms, - comp_atom->symbol); - } - } - - msg_debug_composites ("%s: final result for composite %s is %.2f", - cd->metric_res->name, - cd->composite->sym, rc); - - return rc; -} - -/* - * We don't have preferences for composites - */ -static gint -rspamd_composite_expr_priority (rspamd_expression_atom_t *atom) -{ - return 0; -} - -static void -rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom) -{ - /* Composite atoms are destroyed just with the pool */ -} - - -static void -composites_foreach_callback (gpointer key, gpointer value, void *data) -{ - struct composites_data *cd = data; - struct rspamd_composite *comp = value; - struct rspamd_task *task; - gdouble rc; - - cd->composite = comp; - task = cd->task; - - if (!isset (cd->checked, cd->composite->id * 2)) { - if (rspamd_symcache_is_checked (cd->task, cd->task->cfg->cache, - key)) { - msg_debug_composites ("composite %s is checked in symcache but not " - "in composites bitfield", cd->composite->sym); - setbit (cd->checked, comp->id * 2); - clrbit (cd->checked, comp->id * 2 + 1); - } - else { - if (rspamd_task_find_symbol_result (cd->task, key, - cd->metric_res) != NULL) { - /* Already set, no need to check */ - msg_debug_composites ("composite %s is already in metric " - "in composites bitfield", cd->composite->sym); - setbit (cd->checked, comp->id * 2); - clrbit (cd->checked, comp->id * 2 + 1); - - return; - } - - rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT, - cd); - - /* Checked bit */ - setbit (cd->checked, comp->id * 2); - - /* Result bit */ - if (rc != 0) { - setbit (cd->checked, comp->id * 2 + 1); - rspamd_task_insert_result_full (cd->task, key, 1.0, NULL, - RSPAMD_SYMBOL_INSERT_SINGLE, cd->metric_res); - } - else { - clrbit (cd->checked, comp->id * 2 + 1); - } - } - } -} - - -static void -composites_remove_symbols (gpointer key, gpointer value, gpointer data) -{ - struct composites_data *cd = data; - struct rspamd_task *task; - struct symbol_remove_data *rd = value, *cur; - struct rspamd_symbol_result *ms; - gboolean skip = FALSE, - has_valid_op = FALSE, - want_remove_score = TRUE, - want_remove_symbol = TRUE, - want_forced = FALSE; - const gchar *disable_score_reason = "no policy", - *disable_symbol_reason = "no policy"; - GNode *par; - - task = cd->task; - - DL_FOREACH (rd, cur) { - if (!isset (cd->checked, cur->comp->id * 2 + 1)) { - continue; - } - /* - * First of all exclude all elements with any parent that is negation: - * !A || B -> here we can have both !A and B matched, but we do *NOT* - * want to remove symbol in that case - */ - par = cur->parent; - skip = FALSE; - - while (par) { - if (rspamd_expression_node_is_op (par, OP_NOT)) { - skip = TRUE; - break; - } - - par = par->parent; - } - - if (skip) { - continue; - } - - has_valid_op = TRUE; - /* - * Now we can try to remove symbols/scores - * - * We apply the following logic here: - * - if no composites would like to save score then we remove score - * - if no composites would like to save symbol then we remove symbol - */ - if (!want_forced) { - if (!(cur->action & RSPAMD_COMPOSITE_REMOVE_SYMBOL)) { - want_remove_symbol = FALSE; - disable_symbol_reason = cur->comp->sym; - } - - if (!(cur->action & RSPAMD_COMPOSITE_REMOVE_WEIGHT)) { - want_remove_score = FALSE; - disable_score_reason = cur->comp->sym; - } - - if (cur->action & RSPAMD_COMPOSITE_REMOVE_FORCED) { - want_forced = TRUE; - disable_symbol_reason = cur->comp->sym; - disable_score_reason = cur->comp->sym; - } - } - } - - ms = rspamd_task_find_symbol_result (task, rd->sym, cd->metric_res); - - if (has_valid_op && ms && !(ms->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) { - - if (want_remove_score || want_forced) { - msg_debug_composites ("%s: %s remove symbol weight for %s (was %.2f), " - "score removal affected by %s, symbol removal affected by %s", - cd->metric_res->name, - (want_forced ? "forced" : "normal"), key, ms->score, - disable_score_reason, disable_symbol_reason); - cd->metric_res->score -= ms->score; - ms->score = 0.0; - } - - if (want_remove_symbol || want_forced) { - ms->flags |= RSPAMD_SYMBOL_RESULT_IGNORED; - msg_debug_composites ("%s: %s remove symbol %s (score %.2f), " - "score removal affected by %s, symbol removal affected by %s", - cd->metric_res->name, - (want_forced ? "forced" : "normal"), key, ms->score, - disable_score_reason, disable_symbol_reason); - } - } -} - -static void -composites_metric_callback (struct rspamd_task *task) -{ - struct composites_data *cd, *first_cd = NULL; - struct rspamd_scan_result *mres; - - DL_FOREACH (task->result, mres) { - cd = rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data)); - cd->task = task; - cd->metric_res = mres; - cd->symbols_to_remove = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - cd->checked = - rspamd_mempool_alloc0 (task->task_pool, - NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2)); - - /* Process hash table */ - rspamd_symcache_composites_foreach (task, - task->cfg->cache, - composites_foreach_callback, - cd); - LL_PREPEND (first_cd, cd); - } - - LL_REVERSE (first_cd); - - LL_FOREACH (first_cd, cd) { - /* Remove symbols that are in composites */ - g_hash_table_foreach (cd->symbols_to_remove, composites_remove_symbols, cd); - /* Free list */ - g_hash_table_unref (cd->symbols_to_remove); - } -} - -void -rspamd_composites_process_task (struct rspamd_task *task) -{ - if (task->result && !RSPAMD_TASK_IS_SKIPPED (task)) { - composites_metric_callback (task); - } -} - - -enum rspamd_composite_policy -rspamd_composite_policy_from_str (const gchar *string) -{ - enum rspamd_composite_policy ret = RSPAMD_COMPOSITE_POLICY_UNKNOWN; - - if (strcmp (string, "remove") == 0 || strcmp (string, "remove_all") == 0 || - strcmp (string, "default") == 0) { - ret = RSPAMD_COMPOSITE_POLICY_REMOVE_ALL; - } - else if (strcmp (string, "remove_symbol") == 0) { - ret = RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL; - } - else if (strcmp (string, "remove_weight") == 0) { - ret = RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT; - } - else if (strcmp (string, "leave") == 0 || strcmp (string, "remove_none") == 0) { - ret = RSPAMD_COMPOSITE_POLICY_LEAVE; - } - - return ret; -} diff --git a/src/libserver/composites.h b/src/libserver/composites.h deleted file mode 100644 index bb7eb8994..000000000 --- a/src/libserver/composites.h +++ /dev/null @@ -1,63 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef SRC_LIBSERVER_COMPOSITES_H_ -#define SRC_LIBSERVER_COMPOSITES_H_ - -#include "config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct rspamd_task; - -/** - * Subr for composite expressions - */ -extern const struct rspamd_atom_subr composite_expr_subr; - -enum rspamd_composite_policy { - RSPAMD_COMPOSITE_POLICY_REMOVE_ALL = 0, - RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL, - RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT, - RSPAMD_COMPOSITE_POLICY_LEAVE, - RSPAMD_COMPOSITE_POLICY_UNKNOWN -}; - -/** - * Composite structure - */ -struct rspamd_composite { - const gchar *str_expr; - const gchar *sym; - struct rspamd_expression *expr; - gint id; - enum rspamd_composite_policy policy; -}; - -/** - * Process all results and form composite metrics from existent metrics as it is defined in config - * @param task worker's task that present message from user - */ -void rspamd_composites_process_task (struct rspamd_task *task); - -enum rspamd_composite_policy rspamd_composite_policy_from_str (const gchar *string); - -#ifdef __cplusplus -} -#endif - -#endif /* SRC_LIBSERVER_COMPOSITES_H_ */ diff --git a/src/libserver/composites/composites.cxx b/src/libserver/composites/composites.cxx new file mode 100644 index 000000000..0d9534681 --- /dev/null +++ b/src/libserver/composites/composites.cxx @@ -0,0 +1,1019 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "logger.h" +#include "expression.h" +#include "task.h" +#include "utlist.h" +#include "scan_result.h" +#include "composites.h" + +#include +#include +#include +#include "contrib/robin-hood/robin_hood.h" + +#define msg_err_composites(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ + "composites", task->task_pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_warn_composites(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \ + "composites", task->task_pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_info_composites(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \ + "composites", task->task_pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) + +#define msg_debug_composites(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \ + rspamd_composites_log_id, "composites", task->task_pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) + +INIT_LOG_MODULE(composites) + + +namespace rspamd::composites { +static rspamd_expression_atom_t *rspamd_composite_expr_parse(const gchar *line, gsize len, + rspamd_mempool_t *pool, + gpointer ud, GError **err); +static gdouble rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom); +static gint rspamd_composite_expr_priority(rspamd_expression_atom_t *atom); +static void rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom); +} + +const struct rspamd_atom_subr composite_expr_subr = { + .parse = rspamd::composites::rspamd_composite_expr_parse, + .process = rspamd::composites::rspamd_composite_expr_process, + .priority = rspamd::composites::rspamd_composite_expr_priority, + .destroy = rspamd::composites::rspamd_composite_expr_destroy +}; + +namespace rspamd::composites { + +enum class rspamd_composite_policy { + RSPAMD_COMPOSITE_POLICY_REMOVE_ALL = 0, + RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL, + RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT, + RSPAMD_COMPOSITE_POLICY_LEAVE, + RSPAMD_COMPOSITE_POLICY_UNKNOWN +}; + +/** + * Static composites structure + */ +struct rspamd_composite { + std::string str_expr; + std::string sym; + struct rspamd_expression *expr; + gint id; + rspamd_composite_policy policy; +}; + +struct composites_data { + struct rspamd_task *task; + struct rspamd_composite *composite; + struct rspamd_scan_result *metric_res; + GHashTable *symbols_to_remove; + guint8 *checked; + struct composites_data *next; +}; + +struct rspamd_composite_option_match { + std::variant match; + + ~rspamd_composite_option_match() { + if (std::holds_alternative(match)) { + rspamd_regexp_unref(std::get(match)); + } + } +}; + +enum class rspamd_composite_atom_type { + ATOM_UNKNOWN, + ATOM_COMPOSITE, + ATOM_PLAIN +}; +struct rspamd_composite_atom { + std::string symbol; + rspamd_composite_atom_type comp_type; + struct rspamd_composite *ncomp; /* underlying composite */ + std::vector opts; +}; + +enum rspamd_composite_action : std::uint8_t { + RSPAMD_COMPOSITE_UNTOUCH = 0, + RSPAMD_COMPOSITE_REMOVE_SYMBOL = (1u << 0), + RSPAMD_COMPOSITE_REMOVE_WEIGHT = (1u << 1), + RSPAMD_COMPOSITE_REMOVE_FORCED = (1u << 2) +}; + +struct symbol_remove_data { + const char *sym; + struct rspamd_composite *comp; + GNode *parent; + std::uint8_t action; + struct symbol_remove_data *prev, *next; +}; + +static GQuark +rspamd_composites_quark (void) +{ + return g_quark_from_static_string ("composites"); +} + +static rspamd_expression_atom_t * +rspamd_composite_expr_parse(const gchar *line, gsize len, + rspamd_mempool_t *pool, + gpointer ud, GError **err) +{ + gsize clen = 0; + rspamd_expression_atom_t *res; + struct rspamd_composite_atom *atom; + const gchar *p, *end; + enum composite_expr_state { + comp_state_read_symbol = 0, + comp_state_read_obrace, + comp_state_read_option, + comp_state_read_regexp, + comp_state_read_regexp_end, + comp_state_read_comma, + comp_state_read_ebrace, + comp_state_read_end + } state = comp_state_read_symbol; + + end = line + len; + p = line; + + /* Find length of the atom using a reduced state machine */ + while (p < end) { + if (state == comp_state_read_end) { + break; + } + + switch (state) { + case comp_state_read_symbol: + clen = rspamd_memcspn(p, "[; \t()>comp_type = ATOM_UNKNOWN; + res = rspamd_mempool_alloc0 (pool, sizeof(*res)); + res->len = clen; + res->str = line; + + /* Full state machine to fill a composite atom */ + const gchar *opt_start = NULL; + + while (p < end) { + struct rspamd_composite_option_match *opt_match; + + if (state == comp_state_read_end) { + break; + } + + switch (state) { + case comp_state_read_symbol: + clen = rspamd_memcspn(p, "[; \t()>symbol = rspamd_mempool_alloc (pool, clen + 1); + rspamd_strlcpy(atom->symbol, line, clen + 1); + + break; + case comp_state_read_obrace: + p++; + + if (*p == '/') { + opt_start = p; + p++; /* Starting slash */ + state = comp_state_read_regexp; + } + else { + state = comp_state_read_option; + opt_start = p; + } + + break; + case comp_state_read_regexp: + if (*p == '\\' && p + 1 < end) { + /* Escaping */ + p++; + } + else if (*p == '/') { + /* End of regexp, possible flags */ + state = comp_state_read_regexp_end; + } + p++; + break; + case comp_state_read_option: + if (*p == ',' || *p == ']') { + opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match)); + /* Plain match */ + gchar *opt_buf; + gint opt_len = p - opt_start; + + opt_buf = rspamd_mempool_alloc (pool, opt_len + 1); + rspamd_strlcpy(opt_buf, opt_start, opt_len + 1); + + opt_match->data.match = opt_buf; + opt_match->type = RSPAMD_COMPOSITE_OPTION_PLAIN; + + DL_APPEND (atom->opts, opt_match); + + if (*p == ',') { + p++; + state = comp_state_read_comma; + } + else { + state = comp_state_read_ebrace; + } + } + else { + p++; + } + break; + case comp_state_read_regexp_end: + if (*p == ',' || *p == ']') { + opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match)); + /* Plain match */ + gchar *opt_buf; + gint opt_len = p - opt_start; + + opt_buf = rspamd_mempool_alloc (pool, opt_len + 1); + rspamd_strlcpy(opt_buf, opt_start, opt_len + 1); + + rspamd_regexp_t *re; + GError *re_err = NULL; + + re = rspamd_regexp_new(opt_buf, NULL, &re_err); + + if (re == NULL) { + msg_err_pool ("cannot create regexp from string %s: %e", + opt_buf, re_err); + + g_error_free(re_err); + } + else { + rspamd_mempool_add_destructor (pool, + (rspamd_mempool_destruct_t) rspamd_regexp_unref, + re); + opt_match->data.re = re; + opt_match->type = RSPAMD_COMPOSITE_OPTION_RE; + + DL_APPEND (atom->opts, opt_match); + } + + if (*p == ',') { + p++; + state = comp_state_read_comma; + } + else { + state = comp_state_read_ebrace; + } + } + else { + p++; + } + break; + case comp_state_read_comma: + if (!g_ascii_isspace (*p)) { + if (*p == '/') { + state = comp_state_read_regexp; + opt_start = p; + } + else if (*p == ']') { + state = comp_state_read_ebrace; + } + else { + opt_start = p; + state = comp_state_read_option; + } + } + else { + /* Skip spaces after comma */ + p++; + } + break; + case comp_state_read_ebrace: + p++; + state = comp_state_read_end; + break; + case comp_state_read_end: + g_assert_not_reached (); + } + } + + res->data = atom; + + return res; +} + +} + +static void composites_foreach_callback (gpointer key, gpointer value, void *data); + + +static gdouble +rspamd_composite_process_single_symbol (struct composites_data *cd, + const gchar *sym, + struct rspamd_symbol_result **pms, + struct rspamd_composite_atom *atom) +{ + struct rspamd_symbol_result *ms = NULL; + gdouble rc = 0; + struct rspamd_task *task = cd->task; + + if ((ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res)) == NULL) { + msg_debug_composites ("not found symbol %s in composite %s", sym, + cd->composite->sym); + + if (atom->comp_type == ATOM_UNKNOWN) { + struct rspamd_composite *ncomp; + + if ((ncomp = + g_hash_table_lookup (cd->task->cfg->composite_symbols, + sym)) != NULL) { + atom->comp_type = ATOM_COMPOSITE; + atom->ncomp = ncomp; + } + else { + atom->comp_type = ATOM_PLAIN; + } + } + + if (atom->comp_type == ATOM_COMPOSITE) { + msg_debug_composites ("symbol %s for composite %s is another composite", + sym, cd->composite->sym); + + if (isclr (cd->checked, atom->ncomp->id * 2)) { + struct rspamd_composite *saved; + + msg_debug_composites ("composite dependency %s for %s is not checked", + sym, cd->composite->sym); + /* Set checked for this symbol to avoid cyclic references */ + setbit (cd->checked, cd->composite->id * 2); + saved = cd->composite; /* Save the current composite */ + composites_foreach_callback ((gpointer)atom->ncomp->sym, atom->ncomp, cd); + + /* Restore state */ + cd->composite = saved; + clrbit (cd->checked, cd->composite->id * 2); + + ms = rspamd_task_find_symbol_result (cd->task, sym, + cd->metric_res); + } + else { + /* + * XXX: in case of cyclic references this would return 0 + */ + if (isset (cd->checked, atom->ncomp->id * 2 + 1)) { + ms = rspamd_task_find_symbol_result (cd->task, sym, + cd->metric_res); + } + } + } + } + + if (ms) { + msg_debug_composites ("found symbol %s in composite %s, weight: %.3f", + sym, cd->composite->sym, ms->score); + + /* Now check options */ + struct rspamd_composite_option_match *cur_opt; + + DL_FOREACH (atom->opts, cur_opt) { + struct rspamd_symbol_option *opt; + bool found = false; + + DL_FOREACH (ms->opts_head, opt) { + if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) { + gsize mlen = strlen (cur_opt->data.match); + + if (opt->optlen == mlen && + memcmp (opt->option, cur_opt->data.match, mlen) == 0) { + + found = true; + + break; + } + } + else { + if (rspamd_regexp_search (cur_opt->data.re, + opt->option, opt->optlen, NULL, NULL, FALSE, NULL)) { + found = true; + + break; + } + } + } + + + if (!found) { + if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) { + msg_debug_composites ("symbol %s in composite %s misses required option %s", + sym, + cd->composite->sym, + cur_opt->data.match); + } + else { + msg_debug_composites ("symbol %s in composite %s failed to match regexp %s", + sym, + cd->composite->sym, + rspamd_regexp_get_pattern (cur_opt->data.re)); + } + + ms = NULL; + + break; + } + } + + if (ms) { + if (ms->score == 0) { + rc = 0.001; /* Distinguish from 0 */ + } + else { + rc = ms->score; + } + } + } + + *pms = ms; + return rc; +} + +static void +rspamd_composite_process_symbol_removal (rspamd_expression_atom_t *atom, + struct composites_data *cd, + struct rspamd_symbol_result *ms, + const gchar *beg) +{ + gchar t; + struct symbol_remove_data *rd, *nrd; + struct rspamd_task *task = cd->task; + + if (ms == NULL) { + return; + } + + /* + * At this point we know that we need to do something about this symbol, + * however, we don't know whether we need to delete it unfortunately, + * that depends on the later decisions when the complete expression is + * evaluated. + */ + rd = g_hash_table_lookup (cd->symbols_to_remove, ms->name); + + nrd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*nrd)); + nrd->sym = ms->name; + + /* By default remove symbols */ + switch (cd->composite->policy) { + case RSPAMD_COMPOSITE_POLICY_REMOVE_ALL: + default: + nrd->action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL|RSPAMD_COMPOSITE_REMOVE_WEIGHT); + break; + case RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL: + nrd->action = RSPAMD_COMPOSITE_REMOVE_SYMBOL; + break; + case RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT: + nrd->action = RSPAMD_COMPOSITE_REMOVE_WEIGHT; + break; + case RSPAMD_COMPOSITE_POLICY_LEAVE: + nrd->action = 0; + break; + } + + for (;;) { + t = *beg; + + if (t == '~') { + nrd->action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL; + } + else if (t == '-') { + nrd->action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT| + RSPAMD_COMPOSITE_REMOVE_SYMBOL); + } + else if (t == '^') { + nrd->action |= RSPAMD_COMPOSITE_REMOVE_FORCED; + } + else { + break; + } + + beg ++; + } + + nrd->comp = cd->composite; + nrd->parent = atom->parent; + + if (rd == NULL) { + DL_APPEND (rd, nrd); + g_hash_table_insert (cd->symbols_to_remove, (gpointer)ms->name, rd); + msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s", + cd->metric_res->name, + ms->name, nrd->action, + cd->composite->sym); + } + else { + DL_APPEND (rd, nrd); + msg_debug_composites ("%s: append symbol %s to removal: %d policy, from composite %s", + cd->metric_res->name, + ms->name, nrd->action, + cd->composite->sym); + } +} + +static gdouble +rspamd_composite_expr_process (void *ud, + rspamd_expression_atom_t *atom) +{ + static const double epsilon = 0.00001; + struct composites_data *cd = (struct composites_data *)ud; + const gchar *sym = NULL; + struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *)atom->data; + + struct rspamd_symbol_result *ms = NULL; + struct rspamd_symbols_group *gr; + struct rspamd_symbol *sdef; + struct rspamd_task *task = cd->task; + GHashTableIter it; + gpointer k, v; + gdouble rc = 0, max = 0; + + if (isset (cd->checked, cd->composite->id * 2)) { + /* We have already checked this composite, so just return its value */ + if (isset (cd->checked, cd->composite->id * 2 + 1)) { + ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res); + } + + if (ms) { + if (ms->score == 0) { + rc = epsilon; /* Distinguish from 0 */ + } + else { + /* Treat negative and positive scores equally... */ + rc = fabs (ms->score); + } + } + + msg_debug_composites ("composite %s is already checked, result: %.2f", + cd->composite->sym, rc); + + return rc; + } + + sym = comp_atom->symbol; + guint slen = strlen (sym); + + while (*sym != '\0' && !g_ascii_isalnum (*sym)) { + sym ++; + slen --; + } + + if (slen > 2) { + if (G_UNLIKELY (memcmp (sym, "g:", 2) == 0)) { + gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 2); + + if (gr != NULL) { + g_hash_table_iter_init (&it, gr->symbols); + + while (g_hash_table_iter_next (&it, &k, &v)) { + sdef = v; + rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms, + comp_atom); + + if (rc) { + rspamd_composite_process_symbol_removal (atom, + cd, + ms, + comp_atom->symbol); + + if (fabs (rc) > max) { + max = fabs (rc); + } + } + } + } + + rc = max; + } + else if (G_UNLIKELY (memcmp (sym, "g+:", 3) == 0)) { + /* Group, positive symbols only */ + gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 3); + + if (gr != NULL) { + g_hash_table_iter_init (&it, gr->symbols); + + while (g_hash_table_iter_next (&it, &k, &v)) { + sdef = v; + + if (sdef->score > 0) { + rc = rspamd_composite_process_single_symbol (cd, + sdef->name, + &ms, + comp_atom); + + if (rc) { + rspamd_composite_process_symbol_removal (atom, + cd, + ms, + comp_atom->symbol); + + if (fabs (rc) > max) { + max = fabs (rc); + } + } + } + } + + rc = max; + } + } + else if (G_UNLIKELY (memcmp (sym, "g-:", 3) == 0)) { + /* Group, negative symbols only */ + gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 3); + + if (gr != NULL) { + g_hash_table_iter_init (&it, gr->symbols); + + while (g_hash_table_iter_next (&it, &k, &v)) { + sdef = v; + + if (sdef->score < 0) { + rc = rspamd_composite_process_single_symbol (cd, + sdef->name, + &ms, + comp_atom); + + if (rc) { + rspamd_composite_process_symbol_removal (atom, + cd, + ms, + comp_atom->symbol); + + if (fabs (rc) > max) { + max = fabs (rc); + } + } + } + } + + rc = max; + } + } + else { + rc = rspamd_composite_process_single_symbol (cd, sym, &ms, comp_atom); + + if (rc) { + rspamd_composite_process_symbol_removal (atom, + cd, + ms, + comp_atom->symbol); + } + } + } + else { + rc = rspamd_composite_process_single_symbol (cd, sym, &ms, comp_atom); + + if (rc) { + rspamd_composite_process_symbol_removal (atom, + cd, + ms, + comp_atom->symbol); + } + } + + msg_debug_composites ("%s: final result for composite %s is %.2f", + cd->metric_res->name, + cd->composite->sym, rc); + + return rc; +} + +/* + * We don't have preferences for composites + */ +static gint +rspamd_composite_expr_priority (rspamd_expression_atom_t *atom) +{ + return 0; +} + +static void +rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom) +{ + /* Composite atoms are destroyed just with the pool */ +} + + +static void +composites_foreach_callback (gpointer key, gpointer value, void *data) +{ + struct composites_data *cd = data; + struct rspamd_composite *comp = value; + struct rspamd_task *task; + gdouble rc; + + cd->composite = comp; + task = cd->task; + + if (!isset (cd->checked, cd->composite->id * 2)) { + if (rspamd_symcache_is_checked (cd->task, cd->task->cfg->cache, + key)) { + msg_debug_composites ("composite %s is checked in symcache but not " + "in composites bitfield", cd->composite->sym); + setbit (cd->checked, comp->id * 2); + clrbit (cd->checked, comp->id * 2 + 1); + } + else { + if (rspamd_task_find_symbol_result (cd->task, key, + cd->metric_res) != NULL) { + /* Already set, no need to check */ + msg_debug_composites ("composite %s is already in metric " + "in composites bitfield", cd->composite->sym); + setbit (cd->checked, comp->id * 2); + clrbit (cd->checked, comp->id * 2 + 1); + + return; + } + + rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT, + cd); + + /* Checked bit */ + setbit (cd->checked, comp->id * 2); + + /* Result bit */ + if (rc != 0) { + setbit (cd->checked, comp->id * 2 + 1); + rspamd_task_insert_result_full (cd->task, key, 1.0, NULL, + RSPAMD_SYMBOL_INSERT_SINGLE, cd->metric_res); + } + else { + clrbit (cd->checked, comp->id * 2 + 1); + } + } + } +} + + +static void +composites_remove_symbols (gpointer key, gpointer value, gpointer data) +{ + struct composites_data *cd = data; + struct rspamd_task *task; + struct symbol_remove_data *rd = value, *cur; + struct rspamd_symbol_result *ms; + gboolean skip = FALSE, + has_valid_op = FALSE, + want_remove_score = TRUE, + want_remove_symbol = TRUE, + want_forced = FALSE; + const gchar *disable_score_reason = "no policy", + *disable_symbol_reason = "no policy"; + GNode *par; + + task = cd->task; + + DL_FOREACH (rd, cur) { + if (!isset (cd->checked, cur->comp->id * 2 + 1)) { + continue; + } + /* + * First of all exclude all elements with any parent that is negation: + * !A || B -> here we can have both !A and B matched, but we do *NOT* + * want to remove symbol in that case + */ + par = cur->parent; + skip = FALSE; + + while (par) { + if (rspamd_expression_node_is_op (par, OP_NOT)) { + skip = TRUE; + break; + } + + par = par->parent; + } + + if (skip) { + continue; + } + + has_valid_op = TRUE; + /* + * Now we can try to remove symbols/scores + * + * We apply the following logic here: + * - if no composites would like to save score then we remove score + * - if no composites would like to save symbol then we remove symbol + */ + if (!want_forced) { + if (!(cur->action & RSPAMD_COMPOSITE_REMOVE_SYMBOL)) { + want_remove_symbol = FALSE; + disable_symbol_reason = cur->comp->sym; + } + + if (!(cur->action & RSPAMD_COMPOSITE_REMOVE_WEIGHT)) { + want_remove_score = FALSE; + disable_score_reason = cur->comp->sym; + } + + if (cur->action & RSPAMD_COMPOSITE_REMOVE_FORCED) { + want_forced = TRUE; + disable_symbol_reason = cur->comp->sym; + disable_score_reason = cur->comp->sym; + } + } + } + + ms = rspamd_task_find_symbol_result (task, rd->sym, cd->metric_res); + + if (has_valid_op && ms && !(ms->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) { + + if (want_remove_score || want_forced) { + msg_debug_composites ("%s: %s remove symbol weight for %s (was %.2f), " + "score removal affected by %s, symbol removal affected by %s", + cd->metric_res->name, + (want_forced ? "forced" : "normal"), key, ms->score, + disable_score_reason, disable_symbol_reason); + cd->metric_res->score -= ms->score; + ms->score = 0.0; + } + + if (want_remove_symbol || want_forced) { + ms->flags |= RSPAMD_SYMBOL_RESULT_IGNORED; + msg_debug_composites ("%s: %s remove symbol %s (score %.2f), " + "score removal affected by %s, symbol removal affected by %s", + cd->metric_res->name, + (want_forced ? "forced" : "normal"), key, ms->score, + disable_score_reason, disable_symbol_reason); + } + } +} + +static void +composites_metric_callback (struct rspamd_task *task) +{ + struct composites_data *cd, *first_cd = NULL; + struct rspamd_scan_result *mres; + + DL_FOREACH (task->result, mres) { + cd = rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data)); + cd->task = task; + cd->metric_res = mres; + cd->symbols_to_remove = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + cd->checked = + rspamd_mempool_alloc0 (task->task_pool, + NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2)); + + /* Process hash table */ + rspamd_symcache_composites_foreach (task, + task->cfg->cache, + composites_foreach_callback, + cd); + LL_PREPEND (first_cd, cd); + } + + LL_REVERSE (first_cd); + + LL_FOREACH (first_cd, cd) { + /* Remove symbols that are in composites */ + g_hash_table_foreach (cd->symbols_to_remove, composites_remove_symbols, cd); + /* Free list */ + g_hash_table_unref (cd->symbols_to_remove); + } +} + +void +rspamd_composites_process_task (struct rspamd_task *task) +{ + if (task->result && !RSPAMD_TASK_IS_SKIPPED (task)) { + composites_metric_callback (task); + } +} + + +enum rspamd_composite_policy +rspamd_composite_policy_from_str (const gchar *string) +{ + enum rspamd_composite_policy ret = RSPAMD_COMPOSITE_POLICY_UNKNOWN; + + if (strcmp (string, "remove") == 0 || strcmp (string, "remove_all") == 0 || + strcmp (string, "default") == 0) { + ret = RSPAMD_COMPOSITE_POLICY_REMOVE_ALL; + } + else if (strcmp (string, "remove_symbol") == 0) { + ret = RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL; + } + else if (strcmp (string, "remove_weight") == 0) { + ret = RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT; + } + else if (strcmp (string, "leave") == 0 || strcmp (string, "remove_none") == 0) { + ret = RSPAMD_COMPOSITE_POLICY_LEAVE; + } + + return ret; +} diff --git a/src/libserver/composites/composites.h b/src/libserver/composites/composites.h new file mode 100644 index 000000000..d39863b88 --- /dev/null +++ b/src/libserver/composites/composites.h @@ -0,0 +1,44 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBSERVER_COMPOSITES_H_ +#define SRC_LIBSERVER_COMPOSITES_H_ + +#include "config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_task; + +/** + * Subr for composite expressions + */ +extern const struct rspamd_atom_subr composite_expr_subr; + +/** + * Process all results and form composite metrics from existent metrics as it is defined in config + * @param task worker's task that present message from user + */ +void rspamd_composites_process_task (struct rspamd_task *task); + +enum rspamd_composite_policy rspamd_composite_policy_from_str (const gchar *string); + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBSERVER_COMPOSITES_H_ */ diff --git a/src/libserver/task.c b/src/libserver/task.c index aae374c21..c9f3fb627 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -21,7 +21,7 @@ #include "message.h" #include "lua/lua_common.h" #include "email_addr.h" -#include "composites.h" +#include "src/libserver/composites/composites.h" #include "stat_api.h" #include "unix-std.h" #include "utlist.h" diff --git a/src/lua/lua_cfg_file.c b/src/lua/lua_cfg_file.c index 68acdd368..af8964b32 100644 --- a/src/lua/lua_cfg_file.c +++ b/src/lua/lua_cfg_file.c @@ -15,7 +15,7 @@ */ #include "lua_common.h" #include "expression.h" -#include "composites.h" +#include "src/libserver/composites/composites.h" #ifdef HAVE_SYS_UTSNAME_H #endif diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c index 2631f1cec..c2f5efb42 100644 --- a/src/lua/lua_config.c +++ b/src/lua/lua_config.c @@ -16,7 +16,7 @@ #include "lua_common.h" #include "libmime/message.h" #include "libutil/expression.h" -#include "libserver/composites.h" +#include "src/libserver/composites/composites.h" #include "libserver/cfg_file_private.h" #include "libmime/lang_detection.h" #include "lua/lua_map.h"