aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2021-07-15 17:10:03 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2021-07-15 17:10:03 +0100
commitf87014f894470620f39b93f390413be9555c1a0b (patch)
tree38ab5e435a78acde70ff5a387c85ba9f35fe2670
parent5f80e2e32e9e82c769b3f28be29cff4f62d9ecce (diff)
downloadrspamd-f87014f894470620f39b93f390413be9555c1a0b.tar.gz
rspamd-f87014f894470620f39b93f390413be9555c1a0b.zip
[Rework] Composites: Rewrite the composites logic
-rw-r--r--src/libmime/scan_result.c2
-rw-r--r--src/libmime/scan_result.h2
-rw-r--r--src/libserver/composites/composites.cxx696
3 files changed, 314 insertions, 386 deletions
diff --git a/src/libmime/scan_result.c b/src/libmime/scan_result.c
index e7fab8c6d..16ec9b0c5 100644
--- a/src/libmime/scan_result.c
+++ b/src/libmime/scan_result.c
@@ -921,7 +921,7 @@ rspamd_task_find_symbol_result (struct rspamd_task *task, const char *sym,
result = task->result;
}
- k = kh_get (rspamd_symbols_hash, result->symbols, sym);
+ k = kh_get(rspamd_symbols_hash, result->symbols, sym);
if (k != kh_end (result->symbols)) {
res = kh_value (result->symbols, k);
diff --git a/src/libmime/scan_result.h b/src/libmime/scan_result.h
index b7a548be0..c8bacf3e8 100644
--- a/src/libmime/scan_result.h
+++ b/src/libmime/scan_result.h
@@ -42,7 +42,7 @@ struct rspamd_symbol_result {
struct rspamd_symbol *sym; /**< symbol configuration */
gssize opts_len; /**< total size of all options (negative if truncated option is added) */
guint nshots;
- enum rspamd_symbol_result_flags flags;
+ int flags;
struct rspamd_symbol_result *next;
};
diff --git a/src/libserver/composites/composites.cxx b/src/libserver/composites/composites.cxx
index 0d9534681..56ca554b3 100644
--- a/src/libserver/composites/composites.cxx
+++ b/src/libserver/composites/composites.cxx
@@ -1,5 +1,5 @@
/*-
- * Copyright 2016 Vsevolod Stakhov
+ * Copyright 2021 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -54,6 +54,7 @@ static rspamd_expression_atom_t *rspamd_composite_expr_parse(const gchar *line,
static gdouble rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom);
static gint rspamd_composite_expr_priority(rspamd_expression_atom_t *atom);
static void rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom);
+static void composites_foreach_callback(gpointer key, gpointer value, void *data);
}
const struct rspamd_atom_subr composite_expr_subr = {
@@ -65,6 +66,8 @@ const struct rspamd_atom_subr composite_expr_subr = {
namespace rspamd::composites {
+static constexpr const double epsilon = 0.00001;
+
enum class rspamd_composite_policy {
RSPAMD_COMPOSITE_POLICY_REMOVE_ALL = 0,
RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL,
@@ -84,23 +87,72 @@ struct rspamd_composite {
rspamd_composite_policy policy;
};
+struct symbol_remove_data {
+ const char *sym;
+ struct rspamd_composite *comp;
+ GNode *parent;
+ std::uint8_t action;
+};
+
struct composites_data {
struct rspamd_task *task;
struct rspamd_composite *composite;
struct rspamd_scan_result *metric_res;
- GHashTable *symbols_to_remove;
- guint8 *checked;
- struct composites_data *next;
+ robin_hood::unordered_flat_map<std::string_view,
+ std::vector<symbol_remove_data>> symbols_to_remove;
+ std::vector<bool> checked;
+
+ explicit composites_data(struct rspamd_task *task, struct rspamd_scan_result *mres) :
+ task(task), composite(nullptr), metric_res(mres) {
+ checked.resize(g_hash_table_size(task->cfg->composite_symbols) * 2);
+ }
};
struct rspamd_composite_option_match {
- std::variant<rspamd_regexp_t *, std::string> match;
+ std::variant<rspamd_regexp_t *, std::string_view> match;
- ~rspamd_composite_option_match() {
+ explicit rspamd_composite_option_match(const char *start, std::size_t len)
+ {
+ match = std::string_view{start, len};
+ }
+
+ explicit rspamd_composite_option_match(rspamd_regexp_t *re)
+ {
+ match = re;
+ }
+
+ ~rspamd_composite_option_match()
+ {
if (std::holds_alternative<rspamd_regexp_t *>(match)) {
rspamd_regexp_unref(std::get<rspamd_regexp_t *>(match));
}
}
+
+ auto math_opt(const std::string_view &data) const -> bool
+ {
+ return std::visit([&](auto arg) -> bool {
+ if constexpr (std::is_same_v<decltype(arg), std::string_view>) {
+ return data == arg;
+ }
+ else {
+ return rspamd_regexp_search(arg,
+ data.data(), data.size(),
+ nullptr, nullptr, false, nullptr);
+ }
+ }, match);
+ }
+
+ auto get_pat() const -> std::string_view
+ {
+ return std::visit([&](auto arg) -> std::string_view {
+ if constexpr (std::is_same_v<decltype(arg), std::string_view>) {
+ return std::string_view(arg);
+ }
+ else {
+ return std::string_view(rspamd_regexp_get_pattern(arg));
+ }
+ }, match);
+ }
};
enum class rspamd_composite_atom_type {
@@ -108,9 +160,10 @@ enum class rspamd_composite_atom_type {
ATOM_COMPOSITE,
ATOM_PLAIN
};
+
struct rspamd_composite_atom {
std::string symbol;
- rspamd_composite_atom_type comp_type;
+ rspamd_composite_atom_type comp_type = rspamd_composite_atom_type::ATOM_UNKNOWN;
struct rspamd_composite *ncomp; /* underlying composite */
std::vector<rspamd_composite_option_match> opts;
};
@@ -122,18 +175,18 @@ enum rspamd_composite_action : std::uint8_t {
RSPAMD_COMPOSITE_REMOVE_FORCED = (1u << 2)
};
-struct symbol_remove_data {
- const char *sym;
- struct rspamd_composite *comp;
- GNode *parent;
- std::uint8_t action;
- struct symbol_remove_data *prev, *next;
-};
-
static GQuark
-rspamd_composites_quark (void)
+rspamd_composites_quark(void)
{
- return g_quark_from_static_string ("composites");
+ return g_quark_from_static_string("composites");
+}
+
+static auto
+rspamd_composite_atom_dtor(void *ptr)
+{
+ auto *atom = reinterpret_cast<rspamd_expression_atom_t *>(ptr);
+
+ delete atom;
}
static rspamd_expression_atom_t *
@@ -142,8 +195,6 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
gpointer ud, GError **err)
{
gsize clen = 0;
- rspamd_expression_atom_t *res;
- struct rspamd_composite_atom *atom;
const gchar *p, *end;
enum composite_expr_state {
comp_state_read_symbol = 0,
@@ -249,18 +300,15 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
p = line;
state = comp_state_read_symbol;
- atom = rspamd_mempool_alloc0 (pool, sizeof(*atom));
- atom->comp_type = ATOM_UNKNOWN;
- res = rspamd_mempool_alloc0 (pool, sizeof(*res));
+ auto *atom = new rspamd_composite_atom;
+ auto *res = rspamd_mempool_alloc0_type(pool, rspamd_expression_atom_t);
res->len = clen;
res->str = line;
/* Full state machine to fill a composite atom */
- const gchar *opt_start = NULL;
+ const gchar *opt_start = nullptr;
while (p < end) {
- struct rspamd_composite_option_match *opt_match;
-
if (state == comp_state_read_end) {
break;
}
@@ -277,9 +325,7 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
state = comp_state_read_end;
}
- atom->symbol = rspamd_mempool_alloc (pool, clen + 1);
- rspamd_strlcpy(atom->symbol, line, clen + 1);
-
+ atom->symbol = std::string{line, clen};
break;
case comp_state_read_obrace:
p++;
@@ -308,18 +354,12 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
break;
case comp_state_read_option:
if (*p == ',' || *p == ']') {
- opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match));
- /* Plain match */
- gchar *opt_buf;
+ /* Plain match, copy option to ensure string_view validity */
gint opt_len = p - opt_start;
-
- opt_buf = rspamd_mempool_alloc (pool, opt_len + 1);
+ auto *opt_buf = rspamd_mempool_alloc_buffer(pool, opt_len + 1);
rspamd_strlcpy(opt_buf, opt_start, opt_len + 1);
-
- opt_match->data.match = opt_buf;
- opt_match->type = RSPAMD_COMPOSITE_OPTION_PLAIN;
-
- DL_APPEND (atom->opts, opt_match);
+ opt_buf = g_strstrip(opt_buf);
+ atom->opts.emplace_back(opt_buf, strlen(opt_buf));
if (*p == ',') {
p++;
@@ -335,33 +375,20 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
break;
case comp_state_read_regexp_end:
if (*p == ',' || *p == ']') {
- opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match));
- /* Plain match */
- gchar *opt_buf;
- gint opt_len = p - opt_start;
-
- opt_buf = rspamd_mempool_alloc (pool, opt_len + 1);
- rspamd_strlcpy(opt_buf, opt_start, opt_len + 1);
-
+ auto opt_len = p - opt_start;
rspamd_regexp_t *re;
- GError *re_err = NULL;
+ GError *re_err = nullptr;
- re = rspamd_regexp_new(opt_buf, NULL, &re_err);
+ re = rspamd_regexp_new_len(opt_start, opt_len, nullptr, &re_err);
- if (re == NULL) {
- msg_err_pool ("cannot create regexp from string %s: %e",
- opt_buf, re_err);
+ if (re == nullptr) {
+ msg_err_pool ("cannot create regexp from string %*s: %e",
+ opt_len, opt_start, re_err);
g_error_free(re_err);
}
else {
- rspamd_mempool_add_destructor (pool,
- (rspamd_mempool_destruct_t) rspamd_regexp_unref,
- re);
- opt_match->data.re = re;
- opt_match->type = RSPAMD_COMPOSITE_OPTION_RE;
-
- DL_APPEND (atom->opts, opt_match);
+ atom->opts.emplace_back(re);
}
if (*p == ',') {
@@ -409,66 +436,137 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
return res;
}
-}
+static auto
+process_symbol_removal(rspamd_expression_atom_t *atom,
+ struct composites_data *cd,
+ struct rspamd_symbol_result *ms,
+ const std::string &beg) -> void
+{
+ struct rspamd_task *task = cd->task;
-static void composites_foreach_callback (gpointer key, gpointer value, void *data);
+ if (ms == nullptr) {
+ return;
+ }
+ /*
+ * At this point we know that we need to do something about this symbol,
+ * however, we don't know whether we need to delete it unfortunately,
+ * that depends on the later decisions when the complete expression is
+ * evaluated.
+ */
+ auto rd_it = cd->symbols_to_remove.find(ms->name);
+
+ auto fill_removal_structure = [&](symbol_remove_data &nrd) {
+ nrd.sym = ms->name;
+
+ /* By default remove symbols */
+ switch (cd->composite->policy) {
+ case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_ALL:
+ default:
+ nrd.action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL | RSPAMD_COMPOSITE_REMOVE_WEIGHT);
+ break;
+ case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL:
+ nrd.action = RSPAMD_COMPOSITE_REMOVE_SYMBOL;
+ break;
+ case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT:
+ nrd.action = RSPAMD_COMPOSITE_REMOVE_WEIGHT;
+ break;
+ case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_LEAVE:
+ nrd.action = 0;
+ break;
+ }
+
+ for (auto t : beg) {
+ if (t == '~') {
+ nrd.action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL;
+ }
+ else if (t == '-') {
+ nrd.action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT |
+ RSPAMD_COMPOSITE_REMOVE_SYMBOL);
+ }
+ else if (t == '^') {
+ nrd.action |= RSPAMD_COMPOSITE_REMOVE_FORCED;
+ }
+ else {
+ break;
+ }
+ }
+
+ nrd.comp = cd->composite;
+ nrd.parent = atom->parent;
+ };
+
+ if (rd_it != cd->symbols_to_remove.end()) {
+ fill_removal_structure(rd_it->second.emplace_back());
+ msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s",
+ cd->metric_res->name,
+ ms->name, rd_it->second.back().action,
+ cd->composite->sym.c_str());
+ }
+ else {
+ std::vector<symbol_remove_data> nrd;
+ fill_removal_structure(nrd.emplace_back());
+ msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s",
+ cd->metric_res->name,
+ ms->name, nrd.front().action,
+ cd->composite->sym.c_str());
+ cd->symbols_to_remove[ms->name] = std::move(nrd);
+ }
+}
-static gdouble
-rspamd_composite_process_single_symbol (struct composites_data *cd,
- const gchar *sym,
- struct rspamd_symbol_result **pms,
- struct rspamd_composite_atom *atom)
+static auto
+process_single_symbol(struct composites_data *cd,
+ const gchar *sym,
+ struct rspamd_symbol_result **pms,
+ struct rspamd_composite_atom *atom) -> double
{
- struct rspamd_symbol_result *ms = NULL;
+ struct rspamd_symbol_result *ms = nullptr;
gdouble rc = 0;
struct rspamd_task *task = cd->task;
- if ((ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res)) == NULL) {
+ if ((ms = rspamd_task_find_symbol_result(cd->task, sym, cd->metric_res)) == nullptr) {
msg_debug_composites ("not found symbol %s in composite %s", sym,
- cd->composite->sym);
+ cd->composite->sym.c_str());
- if (atom->comp_type == ATOM_UNKNOWN) {
+ if (G_UNLIKELY(atom->comp_type == rspamd_composite_atom_type::ATOM_UNKNOWN)) {
struct rspamd_composite *ncomp;
if ((ncomp =
- g_hash_table_lookup (cd->task->cfg->composite_symbols,
+ g_hash_table_lookup(cd->task->cfg->composite_symbols,
sym)) != NULL) {
- atom->comp_type = ATOM_COMPOSITE;
+ atom->comp_type = rspamd_composite_atom_type::ATOM_COMPOSITE;
atom->ncomp = ncomp;
}
else {
- atom->comp_type = ATOM_PLAIN;
+ atom->comp_type = rspamd_composite_atom_type::ATOM_PLAIN;
}
}
- if (atom->comp_type == ATOM_COMPOSITE) {
+ if (atom->comp_type == rspamd_composite_atom_type::ATOM_COMPOSITE) {
msg_debug_composites ("symbol %s for composite %s is another composite",
- sym, cd->composite->sym);
-
- if (isclr (cd->checked, atom->ncomp->id * 2)) {
- struct rspamd_composite *saved;
+ sym, cd->composite->sym.c_str());
- msg_debug_composites ("composite dependency %s for %s is not checked",
- sym, cd->composite->sym);
+ if (!cd->checked[atom->ncomp->id * 2]) {
+ msg_debug_composites("composite dependency %s for %s is not checked",
+ sym, cd->composite->sym.c_str());
/* Set checked for this symbol to avoid cyclic references */
- setbit (cd->checked, cd->composite->id * 2);
- saved = cd->composite; /* Save the current composite */
- composites_foreach_callback ((gpointer)atom->ncomp->sym, atom->ncomp, cd);
-
+ cd->checked[cd->composite->id * 2] = true;
+ auto *saved = cd->composite; /* Save the current composite */
+ composites_foreach_callback((gpointer)atom->ncomp->sym.c_str(),
+ (gpointer)atom->ncomp, (gpointer)cd);
/* Restore state */
cd->composite = saved;
- clrbit (cd->checked, cd->composite->id * 2);
+ cd->checked[cd->composite->id * 2] = false;
- ms = rspamd_task_find_symbol_result (cd->task, sym,
+ ms = rspamd_task_find_symbol_result(cd->task, sym,
cd->metric_res);
}
else {
/*
* XXX: in case of cyclic references this would return 0
*/
- if (isset (cd->checked, atom->ncomp->id * 2 + 1)) {
- ms = rspamd_task_find_symbol_result (cd->task, sym,
+ if (cd->checked[atom->ncomp->id * 2 + 1]) {
+ ms = rspamd_task_find_symbol_result(cd->task, sym,
cd->metric_res);
}
}
@@ -476,54 +574,28 @@ rspamd_composite_process_single_symbol (struct composites_data *cd,
}
if (ms) {
- msg_debug_composites ("found symbol %s in composite %s, weight: %.3f",
- sym, cd->composite->sym, ms->score);
+ msg_debug_composites("found symbol %s in composite %s, weight: %.3f",
+ sym, cd->composite->sym.c_str(), ms->score);
/* Now check options */
- struct rspamd_composite_option_match *cur_opt;
-
- DL_FOREACH (atom->opts, cur_opt) {
+ for (const auto &cur_opt : atom->opts) {
struct rspamd_symbol_option *opt;
- bool found = false;
+ auto found = false;
DL_FOREACH (ms->opts_head, opt) {
- if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) {
- gsize mlen = strlen (cur_opt->data.match);
-
- if (opt->optlen == mlen &&
- memcmp (opt->option, cur_opt->data.match, mlen) == 0) {
-
- found = true;
-
- break;
- }
- }
- else {
- if (rspamd_regexp_search (cur_opt->data.re,
- opt->option, opt->optlen, NULL, NULL, FALSE, NULL)) {
- found = true;
-
- break;
- }
+ if (cur_opt.math_opt({opt->option, opt->optlen})) {
+ found = true;
+ break;
}
}
-
if (!found) {
- if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) {
- msg_debug_composites ("symbol %s in composite %s misses required option %s",
- sym,
- cd->composite->sym,
- cur_opt->data.match);
- }
- else {
- msg_debug_composites ("symbol %s in composite %s failed to match regexp %s",
- sym,
- cd->composite->sym,
- rspamd_regexp_get_pattern (cur_opt->data.re));
- }
-
- ms = NULL;
+ auto pat = cur_opt.get_pat();
+ msg_debug_composites ("symbol %s in composite %s misses required option %*s",
+ sym,
+ cd->composite->sym.c_str(),
+ (int) pat.size(), pat.data());
+ ms = nullptr;
break;
}
@@ -543,109 +615,21 @@ rspamd_composite_process_single_symbol (struct composites_data *cd,
return rc;
}
-static void
-rspamd_composite_process_symbol_removal (rspamd_expression_atom_t *atom,
- struct composites_data *cd,
- struct rspamd_symbol_result *ms,
- const gchar *beg)
-{
- gchar t;
- struct symbol_remove_data *rd, *nrd;
- struct rspamd_task *task = cd->task;
-
- if (ms == NULL) {
- return;
- }
-
- /*
- * At this point we know that we need to do something about this symbol,
- * however, we don't know whether we need to delete it unfortunately,
- * that depends on the later decisions when the complete expression is
- * evaluated.
- */
- rd = g_hash_table_lookup (cd->symbols_to_remove, ms->name);
-
- nrd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*nrd));
- nrd->sym = ms->name;
-
- /* By default remove symbols */
- switch (cd->composite->policy) {
- case RSPAMD_COMPOSITE_POLICY_REMOVE_ALL:
- default:
- nrd->action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL|RSPAMD_COMPOSITE_REMOVE_WEIGHT);
- break;
- case RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL:
- nrd->action = RSPAMD_COMPOSITE_REMOVE_SYMBOL;
- break;
- case RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT:
- nrd->action = RSPAMD_COMPOSITE_REMOVE_WEIGHT;
- break;
- case RSPAMD_COMPOSITE_POLICY_LEAVE:
- nrd->action = 0;
- break;
- }
-
- for (;;) {
- t = *beg;
-
- if (t == '~') {
- nrd->action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL;
- }
- else if (t == '-') {
- nrd->action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT|
- RSPAMD_COMPOSITE_REMOVE_SYMBOL);
- }
- else if (t == '^') {
- nrd->action |= RSPAMD_COMPOSITE_REMOVE_FORCED;
- }
- else {
- break;
- }
-
- beg ++;
- }
-
- nrd->comp = cd->composite;
- nrd->parent = atom->parent;
-
- if (rd == NULL) {
- DL_APPEND (rd, nrd);
- g_hash_table_insert (cd->symbols_to_remove, (gpointer)ms->name, rd);
- msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s",
- cd->metric_res->name,
- ms->name, nrd->action,
- cd->composite->sym);
- }
- else {
- DL_APPEND (rd, nrd);
- msg_debug_composites ("%s: append symbol %s to removal: %d policy, from composite %s",
- cd->metric_res->name,
- ms->name, nrd->action,
- cd->composite->sym);
- }
-}
-
-static gdouble
-rspamd_composite_expr_process (void *ud,
- rspamd_expression_atom_t *atom)
+static auto
+rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom) -> double
{
- static const double epsilon = 0.00001;
- struct composites_data *cd = (struct composites_data *)ud;
- const gchar *sym = NULL;
- struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *)atom->data;
+ struct composites_data *cd = (struct composites_data *) ud;
+ struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *) atom->data;
struct rspamd_symbol_result *ms = NULL;
- struct rspamd_symbols_group *gr;
- struct rspamd_symbol *sdef;
struct rspamd_task *task = cd->task;
- GHashTableIter it;
- gpointer k, v;
- gdouble rc = 0, max = 0;
+ gdouble rc = 0;
- if (isset (cd->checked, cd->composite->id * 2)) {
+ if (cd->checked[cd->composite->id * 2]) {
/* We have already checked this composite, so just return its value */
- if (isset (cd->checked, cd->composite->id * 2 + 1)) {
- ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res);
+ if (cd->checked[cd->composite->id * 2 + 1]) {
+ ms = rspamd_task_find_symbol_result(cd->task, comp_atom->symbol.c_str(),
+ cd->metric_res);
}
if (ms) {
@@ -654,120 +638,72 @@ rspamd_composite_expr_process (void *ud,
}
else {
/* Treat negative and positive scores equally... */
- rc = fabs (ms->score);
+ rc = fabs(ms->score);
}
}
- msg_debug_composites ("composite %s is already checked, result: %.2f",
- cd->composite->sym, rc);
+ msg_debug_composites("composite %s is already checked, result: %.2f",
+ cd->composite->sym.c_str(), rc);
return rc;
}
- sym = comp_atom->symbol;
- guint slen = strlen (sym);
+ /* Note: sym is zero terminated as it is a view on std::string */
+ auto sym = std::string_view{comp_atom->symbol};
+ auto group_process_functor = [&](auto cond, int sub_start) -> double {
+ auto max = 0.;
+ GHashTableIter it;
+ gpointer k, v;
+ struct rspamd_symbols_group *gr;
- while (*sym != '\0' && !g_ascii_isalnum (*sym)) {
- sym ++;
- slen --;
- }
+ gr = (struct rspamd_symbols_group *) g_hash_table_lookup(cd->task->cfg->groups,
+ sym.substr(sub_start).data());
- if (slen > 2) {
- if (G_UNLIKELY (memcmp (sym, "g:", 2) == 0)) {
- gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 2);
+ if (gr != nullptr) {
+ g_hash_table_iter_init(&it, gr->symbols);
- if (gr != NULL) {
- g_hash_table_iter_init (&it, gr->symbols);
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ auto *sdef = (rspamd_symbol *) v;
- while (g_hash_table_iter_next (&it, &k, &v)) {
- sdef = v;
- rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms,
+ if (cond(sdef->score)) {
+ rc = process_single_symbol(cd,
+ sdef->name,
+ &ms,
comp_atom);
- if (rc) {
- rspamd_composite_process_symbol_removal (atom,
+ if (fabs(rc) > epsilon) {
+ process_symbol_removal(atom,
cd,
ms,
comp_atom->symbol);
- if (fabs (rc) > max) {
- max = fabs (rc);
+ if (fabs(rc) > max) {
+ max = fabs(rc);
}
}
}
}
-
- rc = max;
}
- else if (G_UNLIKELY (memcmp (sym, "g+:", 3) == 0)) {
- /* Group, positive symbols only */
- gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 3);
-
- if (gr != NULL) {
- g_hash_table_iter_init (&it, gr->symbols);
-
- while (g_hash_table_iter_next (&it, &k, &v)) {
- sdef = v;
- if (sdef->score > 0) {
- rc = rspamd_composite_process_single_symbol (cd,
- sdef->name,
- &ms,
- comp_atom);
-
- if (rc) {
- rspamd_composite_process_symbol_removal (atom,
- cd,
- ms,
- comp_atom->symbol);
-
- if (fabs (rc) > max) {
- max = fabs (rc);
- }
- }
- }
- }
+ return max;
+ };
- rc = max;
- }
+ if (sym.size() > 2) {
+ if (sym.substr(0, 2) == "g:") {
+ rc = group_process_functor([](auto _) { return true; }, 2);
}
- else if (G_UNLIKELY (memcmp (sym, "g-:", 3) == 0)) {
- /* Group, negative symbols only */
- gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 3);
-
- if (gr != NULL) {
- g_hash_table_iter_init (&it, gr->symbols);
-
- while (g_hash_table_iter_next (&it, &k, &v)) {
- sdef = v;
-
- if (sdef->score < 0) {
- rc = rspamd_composite_process_single_symbol (cd,
- sdef->name,
- &ms,
- comp_atom);
-
- if (rc) {
- rspamd_composite_process_symbol_removal (atom,
- cd,
- ms,
- comp_atom->symbol);
-
- if (fabs (rc) > max) {
- max = fabs (rc);
- }
- }
- }
- }
-
- rc = max;
- }
+ else if (sym.substr(0, 3) == "g+:") {
+ /* Group, positive symbols only */
+ rc = group_process_functor([](auto sc) { return sc > 0.; }, 3);
+ }
+ else if (sym.substr(0, 3) == "g-:") {
+ rc = group_process_functor([](auto sc) { return sc < 0.; }, 3);
}
else {
- rc = rspamd_composite_process_single_symbol (cd, sym, &ms, comp_atom);
+ rc = process_single_symbol(cd, sym.data(), &ms, comp_atom);
if (rc) {
- rspamd_composite_process_symbol_removal (atom,
+ process_symbol_removal(atom,
cd,
ms,
comp_atom->symbol);
@@ -775,10 +711,10 @@ rspamd_composite_expr_process (void *ud,
}
}
else {
- rc = rspamd_composite_process_single_symbol (cd, sym, &ms, comp_atom);
+ rc = process_single_symbol(cd, sym.data(), &ms, comp_atom);
- if (rc) {
- rspamd_composite_process_symbol_removal (atom,
+ if (fabs(rc) > epsilon) {
+ process_symbol_removal(atom,
cd,
ms,
comp_atom->symbol);
@@ -787,7 +723,7 @@ rspamd_composite_expr_process (void *ud,
msg_debug_composites ("%s: final result for composite %s is %.2f",
cd->metric_res->name,
- cd->composite->sym, rc);
+ cd->composite->sym.c_str(), rc);
return rc;
}
@@ -796,89 +732,85 @@ rspamd_composite_expr_process (void *ud,
* We don't have preferences for composites
*/
static gint
-rspamd_composite_expr_priority (rspamd_expression_atom_t *atom)
+rspamd_composite_expr_priority(rspamd_expression_atom_t *atom)
{
return 0;
}
static void
-rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom)
+rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom)
{
- /* Composite atoms are destroyed just with the pool */
+ rspamd_composite_atom_dtor(atom->data);
}
-
static void
-composites_foreach_callback (gpointer key, gpointer value, void *data)
+composites_foreach_callback(gpointer key, gpointer value, void *data)
{
- struct composites_data *cd = data;
- struct rspamd_composite *comp = value;
+ auto *cd = (struct composites_data *) data;
+ auto *comp = (struct rspamd_composite *) value;
+ auto *str_key = (const gchar *)key;
struct rspamd_task *task;
gdouble rc;
cd->composite = comp;
task = cd->task;
- if (!isset (cd->checked, cd->composite->id * 2)) {
- if (rspamd_symcache_is_checked (cd->task, cd->task->cfg->cache,
- key)) {
+ if (!cd->checked[cd->composite->id * 2]) {
+ if (rspamd_symcache_is_checked(cd->task, cd->task->cfg->cache,
+ str_key)) {
msg_debug_composites ("composite %s is checked in symcache but not "
- "in composites bitfield", cd->composite->sym);
- setbit (cd->checked, comp->id * 2);
- clrbit (cd->checked, comp->id * 2 + 1);
+ "in composites bitfield", cd->composite->sym.c_str());
+ cd->checked[comp->id * 2] = true;
+ cd->checked[comp->id * 2 + 1] = false;
}
else {
- if (rspamd_task_find_symbol_result (cd->task, key,
- cd->metric_res) != NULL) {
+ if (rspamd_task_find_symbol_result(cd->task, str_key,
+ cd->metric_res) != nullptr) {
/* Already set, no need to check */
msg_debug_composites ("composite %s is already in metric "
- "in composites bitfield", cd->composite->sym);
- setbit (cd->checked, comp->id * 2);
- clrbit (cd->checked, comp->id * 2 + 1);
+ "in composites bitfield", cd->composite->sym.c_str());
+ cd->checked[comp->id * 2] = true;
+ cd->checked[comp->id * 2 + 1] = true;
return;
}
- rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT,
+ rc = rspamd_process_expression(comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT,
cd);
/* Checked bit */
- setbit (cd->checked, comp->id * 2);
+ cd->checked[comp->id * 2] = true;
/* Result bit */
- if (rc != 0) {
- setbit (cd->checked, comp->id * 2 + 1);
- rspamd_task_insert_result_full (cd->task, key, 1.0, NULL,
+ if (fabs(rc) > epsilon) {
+ cd->checked[comp->id * 2 + 1] = true;
+ rspamd_task_insert_result_full(cd->task, str_key, 1.0, NULL,
RSPAMD_SYMBOL_INSERT_SINGLE, cd->metric_res);
}
else {
- clrbit (cd->checked, comp->id * 2 + 1);
+ cd->checked[comp->id * 2 + 1] = false;
}
}
}
}
-static void
-composites_remove_symbols (gpointer key, gpointer value, gpointer data)
+static auto
+remove_symbols(const composites_data &cd, const std::vector<symbol_remove_data> &rd) -> void
{
- struct composites_data *cd = data;
- struct rspamd_task *task;
- struct symbol_remove_data *rd = value, *cur;
- struct rspamd_symbol_result *ms;
+ struct rspamd_task *task = cd.task;
gboolean skip = FALSE,
has_valid_op = FALSE,
want_remove_score = TRUE,
want_remove_symbol = TRUE,
want_forced = FALSE;
const gchar *disable_score_reason = "no policy",
- *disable_symbol_reason = "no policy";
- GNode *par;
+ *disable_symbol_reason = "no policy";
- task = cd->task;
+ task = cd.task;
- DL_FOREACH (rd, cur) {
- if (!isset (cd->checked, cur->comp->id * 2 + 1)) {
+ for (const auto &cur : rd) {
+ if (!cd.checked[cur.comp->id * 2 + 1]) {
continue;
}
/*
@@ -886,11 +818,11 @@ composites_remove_symbols (gpointer key, gpointer value, gpointer data)
* !A || B -> here we can have both !A and B matched, but we do *NOT*
* want to remove symbol in that case
*/
- par = cur->parent;
+ auto *par = cur.parent;
skip = FALSE;
while (par) {
- if (rspamd_expression_node_is_op (par, OP_NOT)) {
+ if (rspamd_expression_node_is_op(par, OP_NOT)) {
skip = TRUE;
break;
}
@@ -911,35 +843,35 @@ composites_remove_symbols (gpointer key, gpointer value, gpointer data)
* - if no composites would like to save symbol then we remove symbol
*/
if (!want_forced) {
- if (!(cur->action & RSPAMD_COMPOSITE_REMOVE_SYMBOL)) {
+ if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_SYMBOL)) {
want_remove_symbol = FALSE;
- disable_symbol_reason = cur->comp->sym;
+ disable_symbol_reason = cur.comp->sym.c_str();
}
- if (!(cur->action & RSPAMD_COMPOSITE_REMOVE_WEIGHT)) {
+ if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_WEIGHT)) {
want_remove_score = FALSE;
- disable_score_reason = cur->comp->sym;
+ disable_score_reason = cur.comp->sym.c_str();
}
- if (cur->action & RSPAMD_COMPOSITE_REMOVE_FORCED) {
+ if (cur.action & RSPAMD_COMPOSITE_REMOVE_FORCED) {
want_forced = TRUE;
- disable_symbol_reason = cur->comp->sym;
- disable_score_reason = cur->comp->sym;
+ disable_symbol_reason = cur.comp->sym.c_str();
+ disable_score_reason = cur.comp->sym.c_str();
}
}
}
- ms = rspamd_task_find_symbol_result (task, rd->sym, cd->metric_res);
+ auto *ms = rspamd_task_find_symbol_result(task, rd.front().sym, cd.metric_res);
if (has_valid_op && ms && !(ms->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) {
if (want_remove_score || want_forced) {
msg_debug_composites ("%s: %s remove symbol weight for %s (was %.2f), "
- "score removal affected by %s, symbol removal affected by %s",
- cd->metric_res->name,
- (want_forced ? "forced" : "normal"), key, ms->score,
+ "score removal affected by %s, symbol removal affected by %s",
+ cd.metric_res->name,
+ (want_forced ? "forced" : "normal"), rd.front().sym, ms->score,
disable_score_reason, disable_symbol_reason);
- cd->metric_res->score -= ms->score;
+ cd.metric_res->score -= ms->score;
ms->score = 0.0;
}
@@ -947,51 +879,47 @@ composites_remove_symbols (gpointer key, gpointer value, gpointer data)
ms->flags |= RSPAMD_SYMBOL_RESULT_IGNORED;
msg_debug_composites ("%s: %s remove symbol %s (score %.2f), "
"score removal affected by %s, symbol removal affected by %s",
- cd->metric_res->name,
- (want_forced ? "forced" : "normal"), key, ms->score,
+ cd.metric_res->name,
+ (want_forced ? "forced" : "normal"), rd.front().sym, ms->score,
disable_score_reason, disable_symbol_reason);
}
}
}
static void
-composites_metric_callback (struct rspamd_task *task)
+composites_metric_callback(struct rspamd_task *task)
{
- struct composites_data *cd, *first_cd = NULL;
+ std::vector<composites_data> comp_data_vec;
struct rspamd_scan_result *mres;
+ comp_data_vec.reserve(1);
+
DL_FOREACH (task->result, mres) {
- cd = rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data));
- cd->task = task;
- cd->metric_res = mres;
- cd->symbols_to_remove = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
- cd->checked =
- rspamd_mempool_alloc0 (task->task_pool,
- NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2));
-
- /* Process hash table */
- rspamd_symcache_composites_foreach (task,
+ auto &cd = comp_data_vec.emplace_back(task, mres);
+
+ /* Process metric result */
+ rspamd_symcache_composites_foreach(task,
task->cfg->cache,
composites_foreach_callback,
- cd);
- LL_PREPEND (first_cd, cd);
+ &cd);
}
- LL_REVERSE (first_cd);
-
- LL_FOREACH (first_cd, cd) {
+ for (const auto &cd : comp_data_vec) {
/* Remove symbols that are in composites */
- g_hash_table_foreach (cd->symbols_to_remove, composites_remove_symbols, cd);
- /* Free list */
- g_hash_table_unref (cd->symbols_to_remove);
+ for (const auto &srd_it : cd.symbols_to_remove) {
+ remove_symbols(cd, srd_it.second);
+ }
}
}
+}
+
+
void
rspamd_composites_process_task (struct rspamd_task *task)
{
if (task->result && !RSPAMD_TASK_IS_SKIPPED (task)) {
- composites_metric_callback (task);
+ rspamd::composites::composites_metric_callback(task);
}
}