]> source.dussan.org Git - rspamd.git/commitdiff
[Rework] Composites: Rewrite the composites logic
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 15 Jul 2021 16:10:03 +0000 (17:10 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 15 Jul 2021 16:10:03 +0000 (17:10 +0100)
src/libmime/scan_result.c
src/libmime/scan_result.h
src/libserver/composites/composites.cxx

index e7fab8c6dda9379130a3da19424a73c69f0e1851..16ec9b0c59386d6c5d31d695c0705cdebfca3e86 100644 (file)
@@ -921,7 +921,7 @@ rspamd_task_find_symbol_result (struct rspamd_task *task, const char *sym,
                result = task->result;
        }
 
-       k = kh_get (rspamd_symbols_hash, result->symbols, sym);
+       k = kh_get(rspamd_symbols_hash, result->symbols, sym);
 
        if (k != kh_end (result->symbols)) {
                res = kh_value (result->symbols, k);
index b7a548be04a5d41851be03bea82eb1ca67b36c13..c8bacf3e86768ac5a3084ec7b00d98ccc20e24d3 100644 (file)
@@ -42,7 +42,7 @@ struct rspamd_symbol_result {
        struct rspamd_symbol *sym;                     /**< symbol configuration                                        */
        gssize opts_len;                               /**< total size of all options (negative if truncated option is added) */
        guint nshots;
-       enum rspamd_symbol_result_flags flags;
+       int flags;
        struct rspamd_symbol_result *next;
 };
 
index 0d95346815b43248e7443a69391b7bb9b6c7b8d1..56ca554b3603bd4d76332f856850020c8c4bdade 100644 (file)
@@ -1,5 +1,5 @@
 /*-
- * Copyright 2016 Vsevolod Stakhov
+ * Copyright 2021 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -54,6 +54,7 @@ static rspamd_expression_atom_t *rspamd_composite_expr_parse(const gchar *line,
 static gdouble rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom);
 static gint rspamd_composite_expr_priority(rspamd_expression_atom_t *atom);
 static void rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom);
+static void composites_foreach_callback(gpointer key, gpointer value, void *data);
 }
 
 const struct rspamd_atom_subr composite_expr_subr = {
@@ -65,6 +66,8 @@ const struct rspamd_atom_subr composite_expr_subr = {
 
 namespace rspamd::composites {
 
+static constexpr const double epsilon = 0.00001;
+
 enum class rspamd_composite_policy {
        RSPAMD_COMPOSITE_POLICY_REMOVE_ALL = 0,
        RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL,
@@ -84,23 +87,72 @@ struct rspamd_composite {
        rspamd_composite_policy policy;
 };
 
+struct symbol_remove_data {
+       const char *sym;
+       struct rspamd_composite *comp;
+       GNode *parent;
+       std::uint8_t action;
+};
+
 struct composites_data {
        struct rspamd_task *task;
        struct rspamd_composite *composite;
        struct rspamd_scan_result *metric_res;
-       GHashTable *symbols_to_remove;
-       guint8 *checked;
-       struct composites_data *next;
+       robin_hood::unordered_flat_map<std::string_view,
+                       std::vector<symbol_remove_data>> symbols_to_remove;
+       std::vector<bool> checked;
+
+       explicit composites_data(struct rspamd_task *task, struct rspamd_scan_result *mres) :
+                       task(task), composite(nullptr), metric_res(mres) {
+               checked.resize(g_hash_table_size(task->cfg->composite_symbols) * 2);
+       }
 };
 
 struct rspamd_composite_option_match {
-       std::variant<rspamd_regexp_t *, std::string> match;
+       std::variant<rspamd_regexp_t *, std::string_view> match;
 
-       ~rspamd_composite_option_match() {
+       explicit rspamd_composite_option_match(const char *start, std::size_t len)
+       {
+               match = std::string_view{start, len};
+       }
+
+       explicit rspamd_composite_option_match(rspamd_regexp_t *re)
+       {
+               match = re;
+       }
+
+       ~rspamd_composite_option_match()
+       {
                if (std::holds_alternative<rspamd_regexp_t *>(match)) {
                        rspamd_regexp_unref(std::get<rspamd_regexp_t *>(match));
                }
        }
+
+       auto math_opt(const std::string_view &data) const -> bool
+       {
+               return std::visit([&](auto arg) -> bool {
+                       if constexpr (std::is_same_v<decltype(arg), std::string_view>) {
+                               return data == arg;
+                       }
+                       else {
+                               return rspamd_regexp_search(arg,
+                                               data.data(), data.size(),
+                                               nullptr, nullptr, false, nullptr);
+                       }
+               }, match);
+       }
+
+       auto get_pat() const -> std::string_view
+       {
+               return std::visit([&](auto arg) -> std::string_view {
+                       if constexpr (std::is_same_v<decltype(arg), std::string_view>) {
+                               return std::string_view(arg);
+                       }
+                       else {
+                               return std::string_view(rspamd_regexp_get_pattern(arg));
+                       }
+               }, match);
+       }
 };
 
 enum class rspamd_composite_atom_type {
@@ -108,9 +160,10 @@ enum class rspamd_composite_atom_type {
        ATOM_COMPOSITE,
        ATOM_PLAIN
 };
+
 struct rspamd_composite_atom {
        std::string symbol;
-       rspamd_composite_atom_type comp_type;
+       rspamd_composite_atom_type comp_type = rspamd_composite_atom_type::ATOM_UNKNOWN;
        struct rspamd_composite *ncomp; /* underlying composite */
        std::vector<rspamd_composite_option_match> opts;
 };
@@ -122,18 +175,18 @@ enum rspamd_composite_action : std::uint8_t {
        RSPAMD_COMPOSITE_REMOVE_FORCED = (1u << 2)
 };
 
-struct symbol_remove_data {
-       const char *sym;
-       struct rspamd_composite *comp;
-       GNode *parent;
-       std::uint8_t action;
-       struct symbol_remove_data *prev, *next;
-};
-
 static GQuark
-rspamd_composites_quark (void)
+rspamd_composites_quark(void)
 {
-       return g_quark_from_static_string ("composites");
+       return g_quark_from_static_string("composites");
+}
+
+static auto
+rspamd_composite_atom_dtor(void *ptr)
+{
+       auto *atom = reinterpret_cast<rspamd_expression_atom_t *>(ptr);
+
+       delete atom;
 }
 
 static rspamd_expression_atom_t *
@@ -142,8 +195,6 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
                                                        gpointer ud, GError **err)
 {
        gsize clen = 0;
-       rspamd_expression_atom_t *res;
-       struct rspamd_composite_atom *atom;
        const gchar *p, *end;
        enum composite_expr_state {
                comp_state_read_symbol = 0,
@@ -249,18 +300,15 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
        p = line;
        state = comp_state_read_symbol;
 
-       atom = rspamd_mempool_alloc0 (pool, sizeof(*atom));
-       atom->comp_type = ATOM_UNKNOWN;
-       res = rspamd_mempool_alloc0 (pool, sizeof(*res));
+       auto *atom = new rspamd_composite_atom;
+       auto *res = rspamd_mempool_alloc0_type(pool, rspamd_expression_atom_t);
        res->len = clen;
        res->str = line;
 
        /* Full state machine to fill a composite atom */
-       const gchar *opt_start = NULL;
+       const gchar *opt_start = nullptr;
 
        while (p < end) {
-               struct rspamd_composite_option_match *opt_match;
-
                if (state == comp_state_read_end) {
                        break;
                }
@@ -277,9 +325,7 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
                                state = comp_state_read_end;
                        }
 
-                       atom->symbol = rspamd_mempool_alloc (pool, clen + 1);
-                       rspamd_strlcpy(atom->symbol, line, clen + 1);
-
+                       atom->symbol = std::string{line, clen};
                        break;
                case comp_state_read_obrace:
                        p++;
@@ -308,18 +354,12 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
                        break;
                case comp_state_read_option:
                        if (*p == ',' || *p == ']') {
-                               opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match));
-                               /* Plain match */
-                               gchar *opt_buf;
+                               /* Plain match, copy option to ensure string_view validity */
                                gint opt_len = p - opt_start;
-
-                               opt_buf = rspamd_mempool_alloc (pool, opt_len + 1);
+                               auto *opt_buf = rspamd_mempool_alloc_buffer(pool, opt_len + 1);
                                rspamd_strlcpy(opt_buf, opt_start, opt_len + 1);
-
-                               opt_match->data.match = opt_buf;
-                               opt_match->type = RSPAMD_COMPOSITE_OPTION_PLAIN;
-
-                               DL_APPEND (atom->opts, opt_match);
+                               opt_buf = g_strstrip(opt_buf);
+                               atom->opts.emplace_back(opt_buf, strlen(opt_buf));
 
                                if (*p == ',') {
                                        p++;
@@ -335,33 +375,20 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
                        break;
                case comp_state_read_regexp_end:
                        if (*p == ',' || *p == ']') {
-                               opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match));
-                               /* Plain match */
-                               gchar *opt_buf;
-                               gint opt_len = p - opt_start;
-
-                               opt_buf = rspamd_mempool_alloc (pool, opt_len + 1);
-                               rspamd_strlcpy(opt_buf, opt_start, opt_len + 1);
-
+                               auto opt_len = p - opt_start;
                                rspamd_regexp_t *re;
-                               GError *re_err = NULL;
+                               GError *re_err = nullptr;
 
-                               re = rspamd_regexp_new(opt_buf, NULL, &re_err);
+                               re = rspamd_regexp_new_len(opt_start, opt_len, nullptr, &re_err);
 
-                               if (re == NULL) {
-                                       msg_err_pool ("cannot create regexp from string %s: %e",
-                                                       opt_buf, re_err);
+                               if (re == nullptr) {
+                                       msg_err_pool ("cannot create regexp from string %*s: %e",
+                                                       opt_len, opt_start, re_err);
 
                                        g_error_free(re_err);
                                }
                                else {
-                                       rspamd_mempool_add_destructor (pool,
-                                                       (rspamd_mempool_destruct_t) rspamd_regexp_unref,
-                                                       re);
-                                       opt_match->data.re = re;
-                                       opt_match->type = RSPAMD_COMPOSITE_OPTION_RE;
-
-                                       DL_APPEND (atom->opts, opt_match);
+                                       atom->opts.emplace_back(re);
                                }
 
                                if (*p == ',') {
@@ -409,66 +436,137 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
        return res;
 }
 
-}
+static auto
+process_symbol_removal(rspamd_expression_atom_t *atom,
+                                          struct composites_data *cd,
+                                          struct rspamd_symbol_result *ms,
+                                          const std::string &beg) -> void
+{
+       struct rspamd_task *task = cd->task;
 
-static void composites_foreach_callback (gpointer key, gpointer value, void *data);
+       if (ms == nullptr) {
+               return;
+       }
 
+       /*
+        * At this point we know that we need to do something about this symbol,
+        * however, we don't know whether we need to delete it unfortunately,
+        * that depends on the later decisions when the complete expression is
+        * evaluated.
+        */
+       auto rd_it = cd->symbols_to_remove.find(ms->name);
+
+       auto fill_removal_structure = [&](symbol_remove_data &nrd) {
+               nrd.sym = ms->name;
+
+               /* By default remove symbols */
+               switch (cd->composite->policy) {
+               case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_ALL:
+               default:
+                       nrd.action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL | RSPAMD_COMPOSITE_REMOVE_WEIGHT);
+                       break;
+               case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL:
+                       nrd.action = RSPAMD_COMPOSITE_REMOVE_SYMBOL;
+                       break;
+               case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT:
+                       nrd.action = RSPAMD_COMPOSITE_REMOVE_WEIGHT;
+                       break;
+               case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_LEAVE:
+                       nrd.action = 0;
+                       break;
+               }
+
+               for (auto t : beg) {
+                       if (t == '~') {
+                               nrd.action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL;
+                       }
+                       else if (t == '-') {
+                               nrd.action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT |
+                                                               RSPAMD_COMPOSITE_REMOVE_SYMBOL);
+                       }
+                       else if (t == '^') {
+                               nrd.action |= RSPAMD_COMPOSITE_REMOVE_FORCED;
+                       }
+                       else {
+                               break;
+                       }
+               }
+
+               nrd.comp = cd->composite;
+               nrd.parent = atom->parent;
+       };
+
+       if (rd_it != cd->symbols_to_remove.end()) {
+               fill_removal_structure(rd_it->second.emplace_back());
+               msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s",
+                               cd->metric_res->name,
+                               ms->name, rd_it->second.back().action,
+                               cd->composite->sym.c_str());
+       }
+       else {
+               std::vector<symbol_remove_data> nrd;
+               fill_removal_structure(nrd.emplace_back());
+               msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s",
+                               cd->metric_res->name,
+                               ms->name, nrd.front().action,
+                               cd->composite->sym.c_str());
+               cd->symbols_to_remove[ms->name] = std::move(nrd);
+       }
+}
 
-static gdouble
-rspamd_composite_process_single_symbol (struct composites_data *cd,
-                                                                               const gchar *sym,
-                                                                               struct rspamd_symbol_result **pms,
-                                                                               struct rspamd_composite_atom *atom)
+static auto
+process_single_symbol(struct composites_data *cd,
+                                         const gchar *sym,
+                                         struct rspamd_symbol_result **pms,
+                                         struct rspamd_composite_atom *atom) -> double
 {
-       struct rspamd_symbol_result *ms = NULL;
+       struct rspamd_symbol_result *ms = nullptr;
        gdouble rc = 0;
        struct rspamd_task *task = cd->task;
 
-       if ((ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res)) == NULL) {
+       if ((ms = rspamd_task_find_symbol_result(cd->task, sym, cd->metric_res)) == nullptr) {
                msg_debug_composites ("not found symbol %s in composite %s", sym,
-                               cd->composite->sym);
+                               cd->composite->sym.c_str());
 
-               if (atom->comp_type == ATOM_UNKNOWN) {
+               if (G_UNLIKELY(atom->comp_type == rspamd_composite_atom_type::ATOM_UNKNOWN)) {
                        struct rspamd_composite *ncomp;
 
                        if ((ncomp =
-                                                g_hash_table_lookup (cd->task->cfg->composite_symbols,
+                                                g_hash_table_lookup(cd->task->cfg->composite_symbols,
                                                                 sym)) != NULL) {
-                               atom->comp_type = ATOM_COMPOSITE;
+                               atom->comp_type = rspamd_composite_atom_type::ATOM_COMPOSITE;
                                atom->ncomp = ncomp;
                        }
                        else {
-                               atom->comp_type = ATOM_PLAIN;
+                               atom->comp_type = rspamd_composite_atom_type::ATOM_PLAIN;
                        }
                }
 
-               if (atom->comp_type == ATOM_COMPOSITE) {
+               if (atom->comp_type == rspamd_composite_atom_type::ATOM_COMPOSITE) {
                        msg_debug_composites ("symbol %s for composite %s is another composite",
-                                       sym, cd->composite->sym);
-
-                       if (isclr (cd->checked, atom->ncomp->id * 2)) {
-                               struct rspamd_composite *saved;
+                                       sym, cd->composite->sym.c_str());
 
-                               msg_debug_composites ("composite dependency %s for %s is not checked",
-                                               sym, cd->composite->sym);
+                       if (!cd->checked[atom->ncomp->id * 2]) {
+                               msg_debug_composites("composite dependency %s for %s is not checked",
+                                               sym, cd->composite->sym.c_str());
                                /* Set checked for this symbol to avoid cyclic references */
-                               setbit (cd->checked, cd->composite->id * 2);
-                               saved = cd->composite; /* Save the current composite */
-                               composites_foreach_callback ((gpointer)atom->ncomp->sym, atom->ncomp, cd);
-
+                               cd->checked[cd->composite->id * 2] = true;
+                               auto *saved = cd->composite; /* Save the current composite */
+                               composites_foreach_callback((gpointer)atom->ncomp->sym.c_str(),
+                                               (gpointer)atom->ncomp, (gpointer)cd);
                                /* Restore state */
                                cd->composite = saved;
-                               clrbit (cd->checked, cd->composite->id * 2);
+                               cd->checked[cd->composite->id * 2] = false;
 
-                               ms = rspamd_task_find_symbol_result (cd->task, sym,
+                               ms = rspamd_task_find_symbol_result(cd->task, sym,
                                                cd->metric_res);
                        }
                        else {
                                /*
                                 * XXX: in case of cyclic references this would return 0
                                 */
-                               if (isset (cd->checked, atom->ncomp->id * 2 + 1)) {
-                                       ms = rspamd_task_find_symbol_result (cd->task, sym,
+                               if (cd->checked[atom->ncomp->id * 2 + 1]) {
+                                       ms = rspamd_task_find_symbol_result(cd->task, sym,
                                                        cd->metric_res);
                                }
                        }
@@ -476,54 +574,28 @@ rspamd_composite_process_single_symbol (struct composites_data *cd,
        }
 
        if (ms) {
-               msg_debug_composites ("found symbol %s in composite %s, weight: %.3f",
-                               sym, cd->composite->sym, ms->score);
+               msg_debug_composites("found symbol %s in composite %s, weight: %.3f",
+                               sym, cd->composite->sym.c_str(), ms->score);
 
                /* Now check options */
-               struct rspamd_composite_option_match *cur_opt;
-
-               DL_FOREACH (atom->opts, cur_opt) {
+               for (const auto &cur_opt : atom->opts) {
                        struct rspamd_symbol_option *opt;
-                       bool found = false;
+                       auto found = false;
 
                        DL_FOREACH (ms->opts_head, opt) {
-                               if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) {
-                                       gsize mlen = strlen (cur_opt->data.match);
-
-                                       if (opt->optlen == mlen &&
-                                               memcmp (opt->option, cur_opt->data.match, mlen) == 0) {
-
-                                               found = true;
-
-                                               break;
-                                       }
-                               }
-                               else {
-                                       if (rspamd_regexp_search (cur_opt->data.re,
-                                                       opt->option, opt->optlen, NULL, NULL, FALSE, NULL)) {
-                                               found = true;
-
-                                               break;
-                                       }
+                               if (cur_opt.math_opt({opt->option, opt->optlen})) {
+                                       found = true;
+                                       break;
                                }
                        }
 
-
                        if (!found) {
-                               if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) {
-                                       msg_debug_composites ("symbol %s in composite %s misses required option %s",
-                                                       sym,
-                                                       cd->composite->sym,
-                                                       cur_opt->data.match);
-                               }
-                               else {
-                                       msg_debug_composites ("symbol %s in composite %s failed to match regexp %s",
-                                                       sym,
-                                                       cd->composite->sym,
-                                                       rspamd_regexp_get_pattern (cur_opt->data.re));
-                               }
-
-                               ms = NULL;
+                               auto pat = cur_opt.get_pat();
+                               msg_debug_composites ("symbol %s in composite %s misses required option %*s",
+                                               sym,
+                                               cd->composite->sym.c_str(),
+                                               (int) pat.size(), pat.data());
+                               ms = nullptr;
 
                                break;
                        }
@@ -543,109 +615,21 @@ rspamd_composite_process_single_symbol (struct composites_data *cd,
        return rc;
 }
 
-static void
-rspamd_composite_process_symbol_removal (rspamd_expression_atom_t *atom,
-                                                                                struct composites_data *cd,
-                                                                                struct rspamd_symbol_result *ms,
-                                                                                const gchar *beg)
-{
-       gchar t;
-       struct symbol_remove_data *rd, *nrd;
-       struct rspamd_task *task = cd->task;
-
-       if (ms == NULL) {
-               return;
-       }
-
-       /*
-        * At this point we know that we need to do something about this symbol,
-        * however, we don't know whether we need to delete it unfortunately,
-        * that depends on the later decisions when the complete expression is
-        * evaluated.
-        */
-       rd = g_hash_table_lookup (cd->symbols_to_remove, ms->name);
-
-       nrd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*nrd));
-       nrd->sym = ms->name;
-
-       /* By default remove symbols */
-       switch (cd->composite->policy) {
-       case RSPAMD_COMPOSITE_POLICY_REMOVE_ALL:
-       default:
-               nrd->action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL|RSPAMD_COMPOSITE_REMOVE_WEIGHT);
-               break;
-       case RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL:
-               nrd->action = RSPAMD_COMPOSITE_REMOVE_SYMBOL;
-               break;
-       case RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT:
-               nrd->action = RSPAMD_COMPOSITE_REMOVE_WEIGHT;
-               break;
-       case RSPAMD_COMPOSITE_POLICY_LEAVE:
-               nrd->action = 0;
-               break;
-       }
-
-       for (;;) {
-               t = *beg;
-
-               if (t == '~') {
-                       nrd->action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL;
-               }
-               else if (t == '-') {
-                       nrd->action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT|
-                                                        RSPAMD_COMPOSITE_REMOVE_SYMBOL);
-               }
-               else if (t == '^') {
-                       nrd->action |= RSPAMD_COMPOSITE_REMOVE_FORCED;
-               }
-               else {
-                       break;
-               }
-
-               beg ++;
-       }
-
-       nrd->comp = cd->composite;
-       nrd->parent = atom->parent;
-
-       if (rd == NULL) {
-               DL_APPEND (rd, nrd);
-               g_hash_table_insert (cd->symbols_to_remove, (gpointer)ms->name, rd);
-               msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s",
-                               cd->metric_res->name,
-                               ms->name, nrd->action,
-                               cd->composite->sym);
-       }
-       else {
-               DL_APPEND (rd, nrd);
-               msg_debug_composites ("%s: append symbol %s to removal: %d policy, from composite %s",
-                               cd->metric_res->name,
-                               ms->name, nrd->action,
-                               cd->composite->sym);
-       }
-}
-
-static gdouble
-rspamd_composite_expr_process (void *ud,
-               rspamd_expression_atom_t *atom)
+static auto
+rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom) -> double
 {
-       static const double epsilon = 0.00001;
-       struct composites_data *cd = (struct composites_data *)ud;
-       const gchar *sym = NULL;
-       struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *)atom->data;
+       struct composites_data *cd = (struct composites_data *) ud;
+       struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *) atom->data;
 
        struct rspamd_symbol_result *ms = NULL;
-       struct rspamd_symbols_group *gr;
-       struct rspamd_symbol *sdef;
        struct rspamd_task *task = cd->task;
-       GHashTableIter it;
-       gpointer k, v;
-       gdouble rc = 0, max = 0;
+       gdouble rc = 0;
 
-       if (isset (cd->checked, cd->composite->id * 2)) {
+       if (cd->checked[cd->composite->id * 2]) {
                /* We have already checked this composite, so just return its value */
-               if (isset (cd->checked, cd->composite->id * 2 + 1)) {
-                       ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res);
+               if (cd->checked[cd->composite->id * 2 + 1]) {
+                       ms = rspamd_task_find_symbol_result(cd->task, comp_atom->symbol.c_str(),
+                                       cd->metric_res);
                }
 
                if (ms) {
@@ -654,120 +638,72 @@ rspamd_composite_expr_process (void *ud,
                        }
                        else {
                                /* Treat negative and positive scores equally... */
-                               rc = fabs (ms->score);
+                               rc = fabs(ms->score);
                        }
                }
 
-               msg_debug_composites ("composite %s is already checked, result: %.2f",
-                               cd->composite->sym, rc);
+               msg_debug_composites("composite %s is already checked, result: %.2f",
+                               cd->composite->sym.c_str(), rc);
 
                return rc;
        }
 
-       sym = comp_atom->symbol;
-       guint slen = strlen (sym);
+       /* Note: sym is zero terminated as it is a view on std::string */
+       auto sym = std::string_view{comp_atom->symbol};
+       auto group_process_functor = [&](auto cond, int sub_start) -> double {
+               auto max = 0.;
+               GHashTableIter it;
+               gpointer k, v;
+               struct rspamd_symbols_group *gr;
 
-       while (*sym != '\0' && !g_ascii_isalnum (*sym)) {
-               sym ++;
-               slen --;
-       }
+               gr = (struct rspamd_symbols_group *) g_hash_table_lookup(cd->task->cfg->groups,
+                               sym.substr(sub_start).data());
 
-       if (slen > 2) {
-               if (G_UNLIKELY (memcmp (sym, "g:", 2) == 0)) {
-                       gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 2);
+               if (gr != nullptr) {
+                       g_hash_table_iter_init(&it, gr->symbols);
 
-                       if (gr != NULL) {
-                               g_hash_table_iter_init (&it, gr->symbols);
+                       while (g_hash_table_iter_next(&it, &k, &v)) {
+                               auto *sdef = (rspamd_symbol *) v;
 
-                               while (g_hash_table_iter_next (&it, &k, &v)) {
-                                       sdef = v;
-                                       rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms,
+                               if (cond(sdef->score)) {
+                                       rc = process_single_symbol(cd,
+                                                       sdef->name,
+                                                       &ms,
                                                        comp_atom);
 
-                                       if (rc) {
-                                               rspamd_composite_process_symbol_removal (atom,
+                                       if (fabs(rc) > epsilon) {
+                                               process_symbol_removal(atom,
                                                                cd,
                                                                ms,
                                                                comp_atom->symbol);
 
-                                               if (fabs (rc) > max) {
-                                                       max = fabs (rc);
+                                               if (fabs(rc) > max) {
+                                                       max = fabs(rc);
                                                }
                                        }
                                }
                        }
-
-                       rc = max;
                }
-               else if (G_UNLIKELY (memcmp (sym, "g+:", 3) == 0)) {
-                       /* Group, positive symbols only */
-                       gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 3);
-
-                       if (gr != NULL) {
-                               g_hash_table_iter_init (&it, gr->symbols);
-
-                               while (g_hash_table_iter_next (&it, &k, &v)) {
-                                       sdef = v;
 
-                                       if (sdef->score > 0) {
-                                               rc = rspamd_composite_process_single_symbol (cd,
-                                                               sdef->name,
-                                                               &ms,
-                                                               comp_atom);
-
-                                               if (rc) {
-                                                       rspamd_composite_process_symbol_removal (atom,
-                                                                       cd,
-                                                                       ms,
-                                                                       comp_atom->symbol);
-
-                                                       if (fabs (rc) > max) {
-                                                               max = fabs (rc);
-                                                       }
-                                               }
-                                       }
-                               }
+               return max;
+       };
 
-                               rc = max;
-                       }
+       if (sym.size() > 2) {
+               if (sym.substr(0, 2) == "g:") {
+                       rc = group_process_functor([](auto _) { return true; }, 2);
                }
-               else if (G_UNLIKELY (memcmp (sym, "g-:", 3) == 0)) {
-                       /* Group, negative symbols only */
-                       gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 3);
-
-                       if (gr != NULL) {
-                               g_hash_table_iter_init (&it, gr->symbols);
-
-                               while (g_hash_table_iter_next (&it, &k, &v)) {
-                                       sdef = v;
-
-                                       if (sdef->score < 0) {
-                                               rc = rspamd_composite_process_single_symbol (cd,
-                                                               sdef->name,
-                                                               &ms,
-                                                               comp_atom);
-
-                                               if (rc) {
-                                                       rspamd_composite_process_symbol_removal (atom,
-                                                                       cd,
-                                                                       ms,
-                                                                       comp_atom->symbol);
-
-                                                       if (fabs (rc) > max) {
-                                                               max = fabs (rc);
-                                                       }
-                                               }
-                                       }
-                               }
-
-                               rc = max;
-                       }
+               else if (sym.substr(0, 3) == "g+:") {
+                       /* Group, positive symbols only */
+                       rc = group_process_functor([](auto sc) { return sc > 0.; }, 3);
+               }
+               else if (sym.substr(0, 3) == "g-:") {
+                       rc = group_process_functor([](auto sc) { return sc < 0.; }, 3);
                }
                else {
-                       rc = rspamd_composite_process_single_symbol (cd, sym, &ms, comp_atom);
+                       rc = process_single_symbol(cd, sym.data(), &ms, comp_atom);
 
                        if (rc) {
-                               rspamd_composite_process_symbol_removal (atom,
+                               process_symbol_removal(atom,
                                                cd,
                                                ms,
                                                comp_atom->symbol);
@@ -775,10 +711,10 @@ rspamd_composite_expr_process (void *ud,
                }
        }
        else {
-               rc = rspamd_composite_process_single_symbol (cd, sym, &ms, comp_atom);
+               rc = process_single_symbol(cd, sym.data(), &ms, comp_atom);
 
-               if (rc) {
-                       rspamd_composite_process_symbol_removal (atom,
+               if (fabs(rc) > epsilon) {
+                       process_symbol_removal(atom,
                                        cd,
                                        ms,
                                        comp_atom->symbol);
@@ -787,7 +723,7 @@ rspamd_composite_expr_process (void *ud,
 
        msg_debug_composites ("%s: final result for composite %s is %.2f",
                        cd->metric_res->name,
-                       cd->composite->sym, rc);
+                       cd->composite->sym.c_str(), rc);
 
        return rc;
 }
@@ -796,89 +732,85 @@ rspamd_composite_expr_process (void *ud,
  * We don't have preferences for composites
  */
 static gint
-rspamd_composite_expr_priority (rspamd_expression_atom_t *atom)
+rspamd_composite_expr_priority(rspamd_expression_atom_t *atom)
 {
        return 0;
 }
 
 static void
-rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom)
+rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom)
 {
-       /* Composite atoms are destroyed just with the pool */
+       rspamd_composite_atom_dtor(atom->data);
 }
 
-
 static void
-composites_foreach_callback (gpointer key, gpointer value, void *data)
+composites_foreach_callback(gpointer key, gpointer value, void *data)
 {
-       struct composites_data *cd = data;
-       struct rspamd_composite *comp = value;
+       auto *cd = (struct composites_data *) data;
+       auto *comp = (struct rspamd_composite *) value;
+       auto *str_key = (const gchar *)key;
        struct rspamd_task *task;
        gdouble rc;
 
        cd->composite = comp;
        task = cd->task;
 
-       if (!isset (cd->checked, cd->composite->id * 2)) {
-               if (rspamd_symcache_is_checked (cd->task, cd->task->cfg->cache,
-                               key)) {
+       if (!cd->checked[cd->composite->id * 2]) {
+               if (rspamd_symcache_is_checked(cd->task, cd->task->cfg->cache,
+                               str_key)) {
                        msg_debug_composites ("composite %s is checked in symcache but not "
-                                       "in composites bitfield", cd->composite->sym);
-                       setbit (cd->checked, comp->id * 2);
-                       clrbit (cd->checked, comp->id * 2 + 1);
+                                                                 "in composites bitfield", cd->composite->sym.c_str());
+                       cd->checked[comp->id * 2] = true;
+                       cd->checked[comp->id * 2 + 1] = false;
                }
                else {
-                       if (rspamd_task_find_symbol_result (cd->task, key,
-                                       cd->metric_res) != NULL) {
+                       if (rspamd_task_find_symbol_result(cd->task, str_key,
+                                       cd->metric_res) != nullptr) {
                                /* Already set, no need to check */
                                msg_debug_composites ("composite %s is already in metric "
-                                               "in composites bitfield", cd->composite->sym);
-                               setbit (cd->checked, comp->id * 2);
-                               clrbit (cd->checked, comp->id * 2 + 1);
+                                                                         "in composites bitfield", cd->composite->sym.c_str());
+                               cd->checked[comp->id * 2] = true;
+                               cd->checked[comp->id * 2 + 1] = true;
 
                                return;
                        }
 
-                       rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT,
+                       rc = rspamd_process_expression(comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT,
                                        cd);
 
                        /* Checked bit */
-                       setbit (cd->checked, comp->id * 2);
+                       cd->checked[comp->id * 2] = true;
 
                        /* Result bit */
-                       if (rc != 0) {
-                               setbit (cd->checked, comp->id * 2 + 1);
-                               rspamd_task_insert_result_full (cd->task, key, 1.0, NULL,
+                       if (fabs(rc) > epsilon) {
+                               cd->checked[comp->id * 2 + 1] = true;
+                               rspamd_task_insert_result_full(cd->task, str_key, 1.0, NULL,
                                                RSPAMD_SYMBOL_INSERT_SINGLE, cd->metric_res);
                        }
                        else {
-                               clrbit (cd->checked, comp->id * 2 + 1);
+                               cd->checked[comp->id * 2 + 1] = false;
                        }
                }
        }
 }
 
 
-static void
-composites_remove_symbols (gpointer key, gpointer value, gpointer data)
+static auto
+remove_symbols(const composites_data &cd, const std::vector<symbol_remove_data> &rd) -> void
 {
-       struct composites_data *cd = data;
-       struct rspamd_task *task;
-       struct symbol_remove_data *rd = value, *cur;
-       struct rspamd_symbol_result *ms;
+       struct rspamd_task *task = cd.task;
        gboolean skip = FALSE,
                        has_valid_op = FALSE,
                        want_remove_score = TRUE,
                        want_remove_symbol = TRUE,
                        want_forced = FALSE;
        const gchar *disable_score_reason = "no policy",
-               *disable_symbol_reason = "no policy";
-       GNode *par;
+                       *disable_symbol_reason = "no policy";
 
-       task = cd->task;
+       task = cd.task;
 
-       DL_FOREACH (rd, cur) {
-               if (!isset (cd->checked, cur->comp->id * 2 + 1)) {
+       for (const auto &cur : rd) {
+               if (!cd.checked[cur.comp->id * 2 + 1]) {
                        continue;
                }
                /*
@@ -886,11 +818,11 @@ composites_remove_symbols (gpointer key, gpointer value, gpointer data)
                 * !A || B -> here we can have both !A and B matched, but we do *NOT*
                 * want to remove symbol in that case
                 */
-               par = cur->parent;
+               auto *par = cur.parent;
                skip = FALSE;
 
                while (par) {
-                       if (rspamd_expression_node_is_op (par, OP_NOT)) {
+                       if (rspamd_expression_node_is_op(par, OP_NOT)) {
                                skip = TRUE;
                                break;
                        }
@@ -911,35 +843,35 @@ composites_remove_symbols (gpointer key, gpointer value, gpointer data)
                 * - if no composites would like to save symbol then we remove symbol
                 */
                if (!want_forced) {
-                       if (!(cur->action & RSPAMD_COMPOSITE_REMOVE_SYMBOL)) {
+                       if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_SYMBOL)) {
                                want_remove_symbol = FALSE;
-                               disable_symbol_reason = cur->comp->sym;
+                               disable_symbol_reason = cur.comp->sym.c_str();
                        }
 
-                       if (!(cur->action & RSPAMD_COMPOSITE_REMOVE_WEIGHT)) {
+                       if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_WEIGHT)) {
                                want_remove_score = FALSE;
-                               disable_score_reason = cur->comp->sym;
+                               disable_score_reason = cur.comp->sym.c_str();
                        }
 
-                       if (cur->action & RSPAMD_COMPOSITE_REMOVE_FORCED) {
+                       if (cur.action & RSPAMD_COMPOSITE_REMOVE_FORCED) {
                                want_forced = TRUE;
-                               disable_symbol_reason = cur->comp->sym;
-                               disable_score_reason = cur->comp->sym;
+                               disable_symbol_reason = cur.comp->sym.c_str();
+                               disable_score_reason = cur.comp->sym.c_str();
                        }
                }
        }
 
-       ms = rspamd_task_find_symbol_result (task, rd->sym, cd->metric_res);
+       auto *ms = rspamd_task_find_symbol_result(task, rd.front().sym, cd.metric_res);
 
        if (has_valid_op && ms && !(ms->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) {
 
                if (want_remove_score || want_forced) {
                        msg_debug_composites ("%s: %s remove symbol weight for %s (was %.2f), "
-                                                "score removal affected by %s, symbol removal affected by %s",
-                                       cd->metric_res->name,
-                                       (want_forced ? "forced" : "normal"), key, ms->score,
+                                                                 "score removal affected by %s, symbol removal affected by %s",
+                                       cd.metric_res->name,
+                                       (want_forced ? "forced" : "normal"), rd.front().sym, ms->score,
                                        disable_score_reason, disable_symbol_reason);
-                       cd->metric_res->score -= ms->score;
+                       cd.metric_res->score -= ms->score;
                        ms->score = 0.0;
                }
 
@@ -947,51 +879,47 @@ composites_remove_symbols (gpointer key, gpointer value, gpointer data)
                        ms->flags |= RSPAMD_SYMBOL_RESULT_IGNORED;
                        msg_debug_composites ("%s: %s remove symbol %s (score %.2f), "
                                                                  "score removal affected by %s, symbol removal affected by %s",
-                                       cd->metric_res->name,
-                                       (want_forced ? "forced" : "normal"), key, ms->score,
+                                       cd.metric_res->name,
+                                       (want_forced ? "forced" : "normal"), rd.front().sym, ms->score,
                                        disable_score_reason, disable_symbol_reason);
                }
        }
 }
 
 static void
-composites_metric_callback (struct rspamd_task *task)
+composites_metric_callback(struct rspamd_task *task)
 {
-       struct composites_data *cd, *first_cd = NULL;
+       std::vector<composites_data> comp_data_vec;
        struct rspamd_scan_result *mres;
 
+       comp_data_vec.reserve(1);
+
        DL_FOREACH (task->result, mres) {
-               cd = rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data));
-               cd->task = task;
-               cd->metric_res = mres;
-               cd->symbols_to_remove = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
-               cd->checked =
-                               rspamd_mempool_alloc0 (task->task_pool,
-                                               NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2));
-
-               /* Process hash table */
-               rspamd_symcache_composites_foreach (task,
+               auto &cd = comp_data_vec.emplace_back(task, mres);
+
+               /* Process metric result */
+               rspamd_symcache_composites_foreach(task,
                                task->cfg->cache,
                                composites_foreach_callback,
-                               cd);
-               LL_PREPEND (first_cd, cd);
+                               &cd);
        }
 
-       LL_REVERSE (first_cd);
-
-       LL_FOREACH (first_cd, cd) {
+       for (const auto &cd : comp_data_vec) {
                /* Remove symbols that are in composites */
-               g_hash_table_foreach (cd->symbols_to_remove, composites_remove_symbols, cd);
-               /* Free list */
-               g_hash_table_unref (cd->symbols_to_remove);
+               for (const auto &srd_it : cd.symbols_to_remove) {
+                       remove_symbols(cd, srd_it.second);
+               }
        }
 }
 
+}
+
+
 void
 rspamd_composites_process_task (struct rspamd_task *task)
 {
        if (task->result && !RSPAMD_TASK_IS_SKIPPED (task)) {
-               composites_metric_callback (task);
+               rspamd::composites::composites_metric_callback(task);
        }
 }