diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-07-15 17:10:03 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-07-15 17:10:03 +0100 |
commit | f87014f894470620f39b93f390413be9555c1a0b (patch) | |
tree | 38ab5e435a78acde70ff5a387c85ba9f35fe2670 | |
parent | 5f80e2e32e9e82c769b3f28be29cff4f62d9ecce (diff) | |
download | rspamd-f87014f894470620f39b93f390413be9555c1a0b.tar.gz rspamd-f87014f894470620f39b93f390413be9555c1a0b.zip |
[Rework] Composites: Rewrite the composites logic
-rw-r--r-- | src/libmime/scan_result.c | 2 | ||||
-rw-r--r-- | src/libmime/scan_result.h | 2 | ||||
-rw-r--r-- | src/libserver/composites/composites.cxx | 696 |
3 files changed, 314 insertions, 386 deletions
diff --git a/src/libmime/scan_result.c b/src/libmime/scan_result.c index e7fab8c6d..16ec9b0c5 100644 --- a/src/libmime/scan_result.c +++ b/src/libmime/scan_result.c @@ -921,7 +921,7 @@ rspamd_task_find_symbol_result (struct rspamd_task *task, const char *sym, result = task->result; } - k = kh_get (rspamd_symbols_hash, result->symbols, sym); + k = kh_get(rspamd_symbols_hash, result->symbols, sym); if (k != kh_end (result->symbols)) { res = kh_value (result->symbols, k); diff --git a/src/libmime/scan_result.h b/src/libmime/scan_result.h index b7a548be0..c8bacf3e8 100644 --- a/src/libmime/scan_result.h +++ b/src/libmime/scan_result.h @@ -42,7 +42,7 @@ struct rspamd_symbol_result { struct rspamd_symbol *sym; /**< symbol configuration */ gssize opts_len; /**< total size of all options (negative if truncated option is added) */ guint nshots; - enum rspamd_symbol_result_flags flags; + int flags; struct rspamd_symbol_result *next; }; diff --git a/src/libserver/composites/composites.cxx b/src/libserver/composites/composites.cxx index 0d9534681..56ca554b3 100644 --- a/src/libserver/composites/composites.cxx +++ b/src/libserver/composites/composites.cxx @@ -1,5 +1,5 @@ /*- - * Copyright 2016 Vsevolod Stakhov + * Copyright 2021 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,6 +54,7 @@ static rspamd_expression_atom_t *rspamd_composite_expr_parse(const gchar *line, static gdouble rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom); static gint rspamd_composite_expr_priority(rspamd_expression_atom_t *atom); static void rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom); +static void composites_foreach_callback(gpointer key, gpointer value, void *data); } const struct rspamd_atom_subr composite_expr_subr = { @@ -65,6 +66,8 @@ const struct rspamd_atom_subr composite_expr_subr = { namespace rspamd::composites { +static constexpr const double epsilon = 0.00001; + enum class rspamd_composite_policy { RSPAMD_COMPOSITE_POLICY_REMOVE_ALL = 0, RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL, @@ -84,23 +87,72 @@ struct rspamd_composite { rspamd_composite_policy policy; }; +struct symbol_remove_data { + const char *sym; + struct rspamd_composite *comp; + GNode *parent; + std::uint8_t action; +}; + struct composites_data { struct rspamd_task *task; struct rspamd_composite *composite; struct rspamd_scan_result *metric_res; - GHashTable *symbols_to_remove; - guint8 *checked; - struct composites_data *next; + robin_hood::unordered_flat_map<std::string_view, + std::vector<symbol_remove_data>> symbols_to_remove; + std::vector<bool> checked; + + explicit composites_data(struct rspamd_task *task, struct rspamd_scan_result *mres) : + task(task), composite(nullptr), metric_res(mres) { + checked.resize(g_hash_table_size(task->cfg->composite_symbols) * 2); + } }; struct rspamd_composite_option_match { - std::variant<rspamd_regexp_t *, std::string> match; + std::variant<rspamd_regexp_t *, std::string_view> match; - ~rspamd_composite_option_match() { + explicit rspamd_composite_option_match(const char *start, std::size_t len) + { + match = std::string_view{start, len}; + } + + explicit rspamd_composite_option_match(rspamd_regexp_t *re) + { + match = re; + } + + ~rspamd_composite_option_match() + { if (std::holds_alternative<rspamd_regexp_t *>(match)) { rspamd_regexp_unref(std::get<rspamd_regexp_t *>(match)); } } + + auto math_opt(const std::string_view &data) const -> bool + { + return std::visit([&](auto arg) -> bool { + if constexpr (std::is_same_v<decltype(arg), std::string_view>) { + return data == arg; + } + else { + return rspamd_regexp_search(arg, + data.data(), data.size(), + nullptr, nullptr, false, nullptr); + } + }, match); + } + + auto get_pat() const -> std::string_view + { + return std::visit([&](auto arg) -> std::string_view { + if constexpr (std::is_same_v<decltype(arg), std::string_view>) { + return std::string_view(arg); + } + else { + return std::string_view(rspamd_regexp_get_pattern(arg)); + } + }, match); + } }; enum class rspamd_composite_atom_type { @@ -108,9 +160,10 @@ enum class rspamd_composite_atom_type { ATOM_COMPOSITE, ATOM_PLAIN }; + struct rspamd_composite_atom { std::string symbol; - rspamd_composite_atom_type comp_type; + rspamd_composite_atom_type comp_type = rspamd_composite_atom_type::ATOM_UNKNOWN; struct rspamd_composite *ncomp; /* underlying composite */ std::vector<rspamd_composite_option_match> opts; }; @@ -122,18 +175,18 @@ enum rspamd_composite_action : std::uint8_t { RSPAMD_COMPOSITE_REMOVE_FORCED = (1u << 2) }; -struct symbol_remove_data { - const char *sym; - struct rspamd_composite *comp; - GNode *parent; - std::uint8_t action; - struct symbol_remove_data *prev, *next; -}; - static GQuark -rspamd_composites_quark (void) +rspamd_composites_quark(void) { - return g_quark_from_static_string ("composites"); + return g_quark_from_static_string("composites"); +} + +static auto +rspamd_composite_atom_dtor(void *ptr) +{ + auto *atom = reinterpret_cast<rspamd_expression_atom_t *>(ptr); + + delete atom; } static rspamd_expression_atom_t * @@ -142,8 +195,6 @@ rspamd_composite_expr_parse(const gchar *line, gsize len, gpointer ud, GError **err) { gsize clen = 0; - rspamd_expression_atom_t *res; - struct rspamd_composite_atom *atom; const gchar *p, *end; enum composite_expr_state { comp_state_read_symbol = 0, @@ -249,18 +300,15 @@ rspamd_composite_expr_parse(const gchar *line, gsize len, p = line; state = comp_state_read_symbol; - atom = rspamd_mempool_alloc0 (pool, sizeof(*atom)); - atom->comp_type = ATOM_UNKNOWN; - res = rspamd_mempool_alloc0 (pool, sizeof(*res)); + auto *atom = new rspamd_composite_atom; + auto *res = rspamd_mempool_alloc0_type(pool, rspamd_expression_atom_t); res->len = clen; res->str = line; /* Full state machine to fill a composite atom */ - const gchar *opt_start = NULL; + const gchar *opt_start = nullptr; while (p < end) { - struct rspamd_composite_option_match *opt_match; - if (state == comp_state_read_end) { break; } @@ -277,9 +325,7 @@ rspamd_composite_expr_parse(const gchar *line, gsize len, state = comp_state_read_end; } - atom->symbol = rspamd_mempool_alloc (pool, clen + 1); - rspamd_strlcpy(atom->symbol, line, clen + 1); - + atom->symbol = std::string{line, clen}; break; case comp_state_read_obrace: p++; @@ -308,18 +354,12 @@ rspamd_composite_expr_parse(const gchar *line, gsize len, break; case comp_state_read_option: if (*p == ',' || *p == ']') { - opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match)); - /* Plain match */ - gchar *opt_buf; + /* Plain match, copy option to ensure string_view validity */ gint opt_len = p - opt_start; - - opt_buf = rspamd_mempool_alloc (pool, opt_len + 1); + auto *opt_buf = rspamd_mempool_alloc_buffer(pool, opt_len + 1); rspamd_strlcpy(opt_buf, opt_start, opt_len + 1); - - opt_match->data.match = opt_buf; - opt_match->type = RSPAMD_COMPOSITE_OPTION_PLAIN; - - DL_APPEND (atom->opts, opt_match); + opt_buf = g_strstrip(opt_buf); + atom->opts.emplace_back(opt_buf, strlen(opt_buf)); if (*p == ',') { p++; @@ -335,33 +375,20 @@ rspamd_composite_expr_parse(const gchar *line, gsize len, break; case comp_state_read_regexp_end: if (*p == ',' || *p == ']') { - opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match)); - /* Plain match */ - gchar *opt_buf; - gint opt_len = p - opt_start; - - opt_buf = rspamd_mempool_alloc (pool, opt_len + 1); - rspamd_strlcpy(opt_buf, opt_start, opt_len + 1); - + auto opt_len = p - opt_start; rspamd_regexp_t *re; - GError *re_err = NULL; + GError *re_err = nullptr; - re = rspamd_regexp_new(opt_buf, NULL, &re_err); + re = rspamd_regexp_new_len(opt_start, opt_len, nullptr, &re_err); - if (re == NULL) { - msg_err_pool ("cannot create regexp from string %s: %e", - opt_buf, re_err); + if (re == nullptr) { + msg_err_pool ("cannot create regexp from string %*s: %e", + opt_len, opt_start, re_err); g_error_free(re_err); } else { - rspamd_mempool_add_destructor (pool, - (rspamd_mempool_destruct_t) rspamd_regexp_unref, - re); - opt_match->data.re = re; - opt_match->type = RSPAMD_COMPOSITE_OPTION_RE; - - DL_APPEND (atom->opts, opt_match); + atom->opts.emplace_back(re); } if (*p == ',') { @@ -409,66 +436,137 @@ rspamd_composite_expr_parse(const gchar *line, gsize len, return res; } -} +static auto +process_symbol_removal(rspamd_expression_atom_t *atom, + struct composites_data *cd, + struct rspamd_symbol_result *ms, + const std::string &beg) -> void +{ + struct rspamd_task *task = cd->task; -static void composites_foreach_callback (gpointer key, gpointer value, void *data); + if (ms == nullptr) { + return; + } + /* + * At this point we know that we need to do something about this symbol, + * however, we don't know whether we need to delete it unfortunately, + * that depends on the later decisions when the complete expression is + * evaluated. + */ + auto rd_it = cd->symbols_to_remove.find(ms->name); + + auto fill_removal_structure = [&](symbol_remove_data &nrd) { + nrd.sym = ms->name; + + /* By default remove symbols */ + switch (cd->composite->policy) { + case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_ALL: + default: + nrd.action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL | RSPAMD_COMPOSITE_REMOVE_WEIGHT); + break; + case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL: + nrd.action = RSPAMD_COMPOSITE_REMOVE_SYMBOL; + break; + case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT: + nrd.action = RSPAMD_COMPOSITE_REMOVE_WEIGHT; + break; + case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_LEAVE: + nrd.action = 0; + break; + } + + for (auto t : beg) { + if (t == '~') { + nrd.action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL; + } + else if (t == '-') { + nrd.action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT | + RSPAMD_COMPOSITE_REMOVE_SYMBOL); + } + else if (t == '^') { + nrd.action |= RSPAMD_COMPOSITE_REMOVE_FORCED; + } + else { + break; + } + } + + nrd.comp = cd->composite; + nrd.parent = atom->parent; + }; + + if (rd_it != cd->symbols_to_remove.end()) { + fill_removal_structure(rd_it->second.emplace_back()); + msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s", + cd->metric_res->name, + ms->name, rd_it->second.back().action, + cd->composite->sym.c_str()); + } + else { + std::vector<symbol_remove_data> nrd; + fill_removal_structure(nrd.emplace_back()); + msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s", + cd->metric_res->name, + ms->name, nrd.front().action, + cd->composite->sym.c_str()); + cd->symbols_to_remove[ms->name] = std::move(nrd); + } +} -static gdouble -rspamd_composite_process_single_symbol (struct composites_data *cd, - const gchar *sym, - struct rspamd_symbol_result **pms, - struct rspamd_composite_atom *atom) +static auto +process_single_symbol(struct composites_data *cd, + const gchar *sym, + struct rspamd_symbol_result **pms, + struct rspamd_composite_atom *atom) -> double { - struct rspamd_symbol_result *ms = NULL; + struct rspamd_symbol_result *ms = nullptr; gdouble rc = 0; struct rspamd_task *task = cd->task; - if ((ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res)) == NULL) { + if ((ms = rspamd_task_find_symbol_result(cd->task, sym, cd->metric_res)) == nullptr) { msg_debug_composites ("not found symbol %s in composite %s", sym, - cd->composite->sym); + cd->composite->sym.c_str()); - if (atom->comp_type == ATOM_UNKNOWN) { + if (G_UNLIKELY(atom->comp_type == rspamd_composite_atom_type::ATOM_UNKNOWN)) { struct rspamd_composite *ncomp; if ((ncomp = - g_hash_table_lookup (cd->task->cfg->composite_symbols, + g_hash_table_lookup(cd->task->cfg->composite_symbols, sym)) != NULL) { - atom->comp_type = ATOM_COMPOSITE; + atom->comp_type = rspamd_composite_atom_type::ATOM_COMPOSITE; atom->ncomp = ncomp; } else { - atom->comp_type = ATOM_PLAIN; + atom->comp_type = rspamd_composite_atom_type::ATOM_PLAIN; } } - if (atom->comp_type == ATOM_COMPOSITE) { + if (atom->comp_type == rspamd_composite_atom_type::ATOM_COMPOSITE) { msg_debug_composites ("symbol %s for composite %s is another composite", - sym, cd->composite->sym); - - if (isclr (cd->checked, atom->ncomp->id * 2)) { - struct rspamd_composite *saved; + sym, cd->composite->sym.c_str()); - msg_debug_composites ("composite dependency %s for %s is not checked", - sym, cd->composite->sym); + if (!cd->checked[atom->ncomp->id * 2]) { + msg_debug_composites("composite dependency %s for %s is not checked", + sym, cd->composite->sym.c_str()); /* Set checked for this symbol to avoid cyclic references */ - setbit (cd->checked, cd->composite->id * 2); - saved = cd->composite; /* Save the current composite */ - composites_foreach_callback ((gpointer)atom->ncomp->sym, atom->ncomp, cd); - + cd->checked[cd->composite->id * 2] = true; + auto *saved = cd->composite; /* Save the current composite */ + composites_foreach_callback((gpointer)atom->ncomp->sym.c_str(), + (gpointer)atom->ncomp, (gpointer)cd); /* Restore state */ cd->composite = saved; - clrbit (cd->checked, cd->composite->id * 2); + cd->checked[cd->composite->id * 2] = false; - ms = rspamd_task_find_symbol_result (cd->task, sym, + ms = rspamd_task_find_symbol_result(cd->task, sym, cd->metric_res); } else { /* * XXX: in case of cyclic references this would return 0 */ - if (isset (cd->checked, atom->ncomp->id * 2 + 1)) { - ms = rspamd_task_find_symbol_result (cd->task, sym, + if (cd->checked[atom->ncomp->id * 2 + 1]) { + ms = rspamd_task_find_symbol_result(cd->task, sym, cd->metric_res); } } @@ -476,54 +574,28 @@ rspamd_composite_process_single_symbol (struct composites_data *cd, } if (ms) { - msg_debug_composites ("found symbol %s in composite %s, weight: %.3f", - sym, cd->composite->sym, ms->score); + msg_debug_composites("found symbol %s in composite %s, weight: %.3f", + sym, cd->composite->sym.c_str(), ms->score); /* Now check options */ - struct rspamd_composite_option_match *cur_opt; - - DL_FOREACH (atom->opts, cur_opt) { + for (const auto &cur_opt : atom->opts) { struct rspamd_symbol_option *opt; - bool found = false; + auto found = false; DL_FOREACH (ms->opts_head, opt) { - if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) { - gsize mlen = strlen (cur_opt->data.match); - - if (opt->optlen == mlen && - memcmp (opt->option, cur_opt->data.match, mlen) == 0) { - - found = true; - - break; - } - } - else { - if (rspamd_regexp_search (cur_opt->data.re, - opt->option, opt->optlen, NULL, NULL, FALSE, NULL)) { - found = true; - - break; - } + if (cur_opt.math_opt({opt->option, opt->optlen})) { + found = true; + break; } } - if (!found) { - if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) { - msg_debug_composites ("symbol %s in composite %s misses required option %s", - sym, - cd->composite->sym, - cur_opt->data.match); - } - else { - msg_debug_composites ("symbol %s in composite %s failed to match regexp %s", - sym, - cd->composite->sym, - rspamd_regexp_get_pattern (cur_opt->data.re)); - } - - ms = NULL; + auto pat = cur_opt.get_pat(); + msg_debug_composites ("symbol %s in composite %s misses required option %*s", + sym, + cd->composite->sym.c_str(), + (int) pat.size(), pat.data()); + ms = nullptr; break; } @@ -543,109 +615,21 @@ rspamd_composite_process_single_symbol (struct composites_data *cd, return rc; } -static void -rspamd_composite_process_symbol_removal (rspamd_expression_atom_t *atom, - struct composites_data *cd, - struct rspamd_symbol_result *ms, - const gchar *beg) -{ - gchar t; - struct symbol_remove_data *rd, *nrd; - struct rspamd_task *task = cd->task; - - if (ms == NULL) { - return; - } - - /* - * At this point we know that we need to do something about this symbol, - * however, we don't know whether we need to delete it unfortunately, - * that depends on the later decisions when the complete expression is - * evaluated. - */ - rd = g_hash_table_lookup (cd->symbols_to_remove, ms->name); - - nrd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*nrd)); - nrd->sym = ms->name; - - /* By default remove symbols */ - switch (cd->composite->policy) { - case RSPAMD_COMPOSITE_POLICY_REMOVE_ALL: - default: - nrd->action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL|RSPAMD_COMPOSITE_REMOVE_WEIGHT); - break; - case RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL: - nrd->action = RSPAMD_COMPOSITE_REMOVE_SYMBOL; - break; - case RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT: - nrd->action = RSPAMD_COMPOSITE_REMOVE_WEIGHT; - break; - case RSPAMD_COMPOSITE_POLICY_LEAVE: - nrd->action = 0; - break; - } - - for (;;) { - t = *beg; - - if (t == '~') { - nrd->action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL; - } - else if (t == '-') { - nrd->action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT| - RSPAMD_COMPOSITE_REMOVE_SYMBOL); - } - else if (t == '^') { - nrd->action |= RSPAMD_COMPOSITE_REMOVE_FORCED; - } - else { - break; - } - - beg ++; - } - - nrd->comp = cd->composite; - nrd->parent = atom->parent; - - if (rd == NULL) { - DL_APPEND (rd, nrd); - g_hash_table_insert (cd->symbols_to_remove, (gpointer)ms->name, rd); - msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s", - cd->metric_res->name, - ms->name, nrd->action, - cd->composite->sym); - } - else { - DL_APPEND (rd, nrd); - msg_debug_composites ("%s: append symbol %s to removal: %d policy, from composite %s", - cd->metric_res->name, - ms->name, nrd->action, - cd->composite->sym); - } -} - -static gdouble -rspamd_composite_expr_process (void *ud, - rspamd_expression_atom_t *atom) +static auto +rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom) -> double { - static const double epsilon = 0.00001; - struct composites_data *cd = (struct composites_data *)ud; - const gchar *sym = NULL; - struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *)atom->data; + struct composites_data *cd = (struct composites_data *) ud; + struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *) atom->data; struct rspamd_symbol_result *ms = NULL; - struct rspamd_symbols_group *gr; - struct rspamd_symbol *sdef; struct rspamd_task *task = cd->task; - GHashTableIter it; - gpointer k, v; - gdouble rc = 0, max = 0; + gdouble rc = 0; - if (isset (cd->checked, cd->composite->id * 2)) { + if (cd->checked[cd->composite->id * 2]) { /* We have already checked this composite, so just return its value */ - if (isset (cd->checked, cd->composite->id * 2 + 1)) { - ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res); + if (cd->checked[cd->composite->id * 2 + 1]) { + ms = rspamd_task_find_symbol_result(cd->task, comp_atom->symbol.c_str(), + cd->metric_res); } if (ms) { @@ -654,120 +638,72 @@ rspamd_composite_expr_process (void *ud, } else { /* Treat negative and positive scores equally... */ - rc = fabs (ms->score); + rc = fabs(ms->score); } } - msg_debug_composites ("composite %s is already checked, result: %.2f", - cd->composite->sym, rc); + msg_debug_composites("composite %s is already checked, result: %.2f", + cd->composite->sym.c_str(), rc); return rc; } - sym = comp_atom->symbol; - guint slen = strlen (sym); + /* Note: sym is zero terminated as it is a view on std::string */ + auto sym = std::string_view{comp_atom->symbol}; + auto group_process_functor = [&](auto cond, int sub_start) -> double { + auto max = 0.; + GHashTableIter it; + gpointer k, v; + struct rspamd_symbols_group *gr; - while (*sym != '\0' && !g_ascii_isalnum (*sym)) { - sym ++; - slen --; - } + gr = (struct rspamd_symbols_group *) g_hash_table_lookup(cd->task->cfg->groups, + sym.substr(sub_start).data()); - if (slen > 2) { - if (G_UNLIKELY (memcmp (sym, "g:", 2) == 0)) { - gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 2); + if (gr != nullptr) { + g_hash_table_iter_init(&it, gr->symbols); - if (gr != NULL) { - g_hash_table_iter_init (&it, gr->symbols); + while (g_hash_table_iter_next(&it, &k, &v)) { + auto *sdef = (rspamd_symbol *) v; - while (g_hash_table_iter_next (&it, &k, &v)) { - sdef = v; - rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms, + if (cond(sdef->score)) { + rc = process_single_symbol(cd, + sdef->name, + &ms, comp_atom); - if (rc) { - rspamd_composite_process_symbol_removal (atom, + if (fabs(rc) > epsilon) { + process_symbol_removal(atom, cd, ms, comp_atom->symbol); - if (fabs (rc) > max) { - max = fabs (rc); + if (fabs(rc) > max) { + max = fabs(rc); } } } } - - rc = max; } - else if (G_UNLIKELY (memcmp (sym, "g+:", 3) == 0)) { - /* Group, positive symbols only */ - gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 3); - - if (gr != NULL) { - g_hash_table_iter_init (&it, gr->symbols); - - while (g_hash_table_iter_next (&it, &k, &v)) { - sdef = v; - if (sdef->score > 0) { - rc = rspamd_composite_process_single_symbol (cd, - sdef->name, - &ms, - comp_atom); - - if (rc) { - rspamd_composite_process_symbol_removal (atom, - cd, - ms, - comp_atom->symbol); - - if (fabs (rc) > max) { - max = fabs (rc); - } - } - } - } + return max; + }; - rc = max; - } + if (sym.size() > 2) { + if (sym.substr(0, 2) == "g:") { + rc = group_process_functor([](auto _) { return true; }, 2); } - else if (G_UNLIKELY (memcmp (sym, "g-:", 3) == 0)) { - /* Group, negative symbols only */ - gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 3); - - if (gr != NULL) { - g_hash_table_iter_init (&it, gr->symbols); - - while (g_hash_table_iter_next (&it, &k, &v)) { - sdef = v; - - if (sdef->score < 0) { - rc = rspamd_composite_process_single_symbol (cd, - sdef->name, - &ms, - comp_atom); - - if (rc) { - rspamd_composite_process_symbol_removal (atom, - cd, - ms, - comp_atom->symbol); - - if (fabs (rc) > max) { - max = fabs (rc); - } - } - } - } - - rc = max; - } + else if (sym.substr(0, 3) == "g+:") { + /* Group, positive symbols only */ + rc = group_process_functor([](auto sc) { return sc > 0.; }, 3); + } + else if (sym.substr(0, 3) == "g-:") { + rc = group_process_functor([](auto sc) { return sc < 0.; }, 3); } else { - rc = rspamd_composite_process_single_symbol (cd, sym, &ms, comp_atom); + rc = process_single_symbol(cd, sym.data(), &ms, comp_atom); if (rc) { - rspamd_composite_process_symbol_removal (atom, + process_symbol_removal(atom, cd, ms, comp_atom->symbol); @@ -775,10 +711,10 @@ rspamd_composite_expr_process (void *ud, } } else { - rc = rspamd_composite_process_single_symbol (cd, sym, &ms, comp_atom); + rc = process_single_symbol(cd, sym.data(), &ms, comp_atom); - if (rc) { - rspamd_composite_process_symbol_removal (atom, + if (fabs(rc) > epsilon) { + process_symbol_removal(atom, cd, ms, comp_atom->symbol); @@ -787,7 +723,7 @@ rspamd_composite_expr_process (void *ud, msg_debug_composites ("%s: final result for composite %s is %.2f", cd->metric_res->name, - cd->composite->sym, rc); + cd->composite->sym.c_str(), rc); return rc; } @@ -796,89 +732,85 @@ rspamd_composite_expr_process (void *ud, * We don't have preferences for composites */ static gint -rspamd_composite_expr_priority (rspamd_expression_atom_t *atom) +rspamd_composite_expr_priority(rspamd_expression_atom_t *atom) { return 0; } static void -rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom) +rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom) { - /* Composite atoms are destroyed just with the pool */ + rspamd_composite_atom_dtor(atom->data); } - static void -composites_foreach_callback (gpointer key, gpointer value, void *data) +composites_foreach_callback(gpointer key, gpointer value, void *data) { - struct composites_data *cd = data; - struct rspamd_composite *comp = value; + auto *cd = (struct composites_data *) data; + auto *comp = (struct rspamd_composite *) value; + auto *str_key = (const gchar *)key; struct rspamd_task *task; gdouble rc; cd->composite = comp; task = cd->task; - if (!isset (cd->checked, cd->composite->id * 2)) { - if (rspamd_symcache_is_checked (cd->task, cd->task->cfg->cache, - key)) { + if (!cd->checked[cd->composite->id * 2]) { + if (rspamd_symcache_is_checked(cd->task, cd->task->cfg->cache, + str_key)) { msg_debug_composites ("composite %s is checked in symcache but not " - "in composites bitfield", cd->composite->sym); - setbit (cd->checked, comp->id * 2); - clrbit (cd->checked, comp->id * 2 + 1); + "in composites bitfield", cd->composite->sym.c_str()); + cd->checked[comp->id * 2] = true; + cd->checked[comp->id * 2 + 1] = false; } else { - if (rspamd_task_find_symbol_result (cd->task, key, - cd->metric_res) != NULL) { + if (rspamd_task_find_symbol_result(cd->task, str_key, + cd->metric_res) != nullptr) { /* Already set, no need to check */ msg_debug_composites ("composite %s is already in metric " - "in composites bitfield", cd->composite->sym); - setbit (cd->checked, comp->id * 2); - clrbit (cd->checked, comp->id * 2 + 1); + "in composites bitfield", cd->composite->sym.c_str()); + cd->checked[comp->id * 2] = true; + cd->checked[comp->id * 2 + 1] = true; return; } - rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT, + rc = rspamd_process_expression(comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT, cd); /* Checked bit */ - setbit (cd->checked, comp->id * 2); + cd->checked[comp->id * 2] = true; /* Result bit */ - if (rc != 0) { - setbit (cd->checked, comp->id * 2 + 1); - rspamd_task_insert_result_full (cd->task, key, 1.0, NULL, + if (fabs(rc) > epsilon) { + cd->checked[comp->id * 2 + 1] = true; + rspamd_task_insert_result_full(cd->task, str_key, 1.0, NULL, RSPAMD_SYMBOL_INSERT_SINGLE, cd->metric_res); } else { - clrbit (cd->checked, comp->id * 2 + 1); + cd->checked[comp->id * 2 + 1] = false; } } } } -static void -composites_remove_symbols (gpointer key, gpointer value, gpointer data) +static auto +remove_symbols(const composites_data &cd, const std::vector<symbol_remove_data> &rd) -> void { - struct composites_data *cd = data; - struct rspamd_task *task; - struct symbol_remove_data *rd = value, *cur; - struct rspamd_symbol_result *ms; + struct rspamd_task *task = cd.task; gboolean skip = FALSE, has_valid_op = FALSE, want_remove_score = TRUE, want_remove_symbol = TRUE, want_forced = FALSE; const gchar *disable_score_reason = "no policy", - *disable_symbol_reason = "no policy"; - GNode *par; + *disable_symbol_reason = "no policy"; - task = cd->task; + task = cd.task; - DL_FOREACH (rd, cur) { - if (!isset (cd->checked, cur->comp->id * 2 + 1)) { + for (const auto &cur : rd) { + if (!cd.checked[cur.comp->id * 2 + 1]) { continue; } /* @@ -886,11 +818,11 @@ composites_remove_symbols (gpointer key, gpointer value, gpointer data) * !A || B -> here we can have both !A and B matched, but we do *NOT* * want to remove symbol in that case */ - par = cur->parent; + auto *par = cur.parent; skip = FALSE; while (par) { - if (rspamd_expression_node_is_op (par, OP_NOT)) { + if (rspamd_expression_node_is_op(par, OP_NOT)) { skip = TRUE; break; } @@ -911,35 +843,35 @@ composites_remove_symbols (gpointer key, gpointer value, gpointer data) * - if no composites would like to save symbol then we remove symbol */ if (!want_forced) { - if (!(cur->action & RSPAMD_COMPOSITE_REMOVE_SYMBOL)) { + if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_SYMBOL)) { want_remove_symbol = FALSE; - disable_symbol_reason = cur->comp->sym; + disable_symbol_reason = cur.comp->sym.c_str(); } - if (!(cur->action & RSPAMD_COMPOSITE_REMOVE_WEIGHT)) { + if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_WEIGHT)) { want_remove_score = FALSE; - disable_score_reason = cur->comp->sym; + disable_score_reason = cur.comp->sym.c_str(); } - if (cur->action & RSPAMD_COMPOSITE_REMOVE_FORCED) { + if (cur.action & RSPAMD_COMPOSITE_REMOVE_FORCED) { want_forced = TRUE; - disable_symbol_reason = cur->comp->sym; - disable_score_reason = cur->comp->sym; + disable_symbol_reason = cur.comp->sym.c_str(); + disable_score_reason = cur.comp->sym.c_str(); } } } - ms = rspamd_task_find_symbol_result (task, rd->sym, cd->metric_res); + auto *ms = rspamd_task_find_symbol_result(task, rd.front().sym, cd.metric_res); if (has_valid_op && ms && !(ms->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) { if (want_remove_score || want_forced) { msg_debug_composites ("%s: %s remove symbol weight for %s (was %.2f), " - "score removal affected by %s, symbol removal affected by %s", - cd->metric_res->name, - (want_forced ? "forced" : "normal"), key, ms->score, + "score removal affected by %s, symbol removal affected by %s", + cd.metric_res->name, + (want_forced ? "forced" : "normal"), rd.front().sym, ms->score, disable_score_reason, disable_symbol_reason); - cd->metric_res->score -= ms->score; + cd.metric_res->score -= ms->score; ms->score = 0.0; } @@ -947,51 +879,47 @@ composites_remove_symbols (gpointer key, gpointer value, gpointer data) ms->flags |= RSPAMD_SYMBOL_RESULT_IGNORED; msg_debug_composites ("%s: %s remove symbol %s (score %.2f), " "score removal affected by %s, symbol removal affected by %s", - cd->metric_res->name, - (want_forced ? "forced" : "normal"), key, ms->score, + cd.metric_res->name, + (want_forced ? "forced" : "normal"), rd.front().sym, ms->score, disable_score_reason, disable_symbol_reason); } } } static void -composites_metric_callback (struct rspamd_task *task) +composites_metric_callback(struct rspamd_task *task) { - struct composites_data *cd, *first_cd = NULL; + std::vector<composites_data> comp_data_vec; struct rspamd_scan_result *mres; + comp_data_vec.reserve(1); + DL_FOREACH (task->result, mres) { - cd = rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data)); - cd->task = task; - cd->metric_res = mres; - cd->symbols_to_remove = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - cd->checked = - rspamd_mempool_alloc0 (task->task_pool, - NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2)); - - /* Process hash table */ - rspamd_symcache_composites_foreach (task, + auto &cd = comp_data_vec.emplace_back(task, mres); + + /* Process metric result */ + rspamd_symcache_composites_foreach(task, task->cfg->cache, composites_foreach_callback, - cd); - LL_PREPEND (first_cd, cd); + &cd); } - LL_REVERSE (first_cd); - - LL_FOREACH (first_cd, cd) { + for (const auto &cd : comp_data_vec) { /* Remove symbols that are in composites */ - g_hash_table_foreach (cd->symbols_to_remove, composites_remove_symbols, cd); - /* Free list */ - g_hash_table_unref (cd->symbols_to_remove); + for (const auto &srd_it : cd.symbols_to_remove) { + remove_symbols(cd, srd_it.second); + } } } +} + + void rspamd_composites_process_task (struct rspamd_task *task) { if (task->result && !RSPAMD_TASK_IS_SKIPPED (task)) { - composites_metric_callback (task); + rspamd::composites::composites_metric_callback(task); } } |