123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989 |
- /*-
- * Copyright 2021 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include "config.h"
- #include "logger.h"
- #include "expression.h"
- #include "task.h"
- #include "utlist.h"
- #include "scan_result.h"
- #include "composites.h"
-
- #include <cmath>
- #include <vector>
- #include <variant>
- #include "libutil/cxx/util.hxx"
- #include "contrib/ankerl/unordered_dense.h"
-
- #include "composites_internal.hxx"
-
- #define msg_err_composites(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
- "composites", task->task_pool->tag.uid, \
- RSPAMD_LOG_FUNC, \
- __VA_ARGS__)
- #define msg_warn_composites(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
- "composites", task->task_pool->tag.uid, \
- RSPAMD_LOG_FUNC, \
- __VA_ARGS__)
- #define msg_info_composites(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
- "composites", task->task_pool->tag.uid, \
- RSPAMD_LOG_FUNC, \
- __VA_ARGS__)
-
- #define msg_debug_composites(...) rspamd_conditional_debug_fast(NULL, task->from_addr, \
- rspamd_composites_log_id, "composites", task->task_pool->tag.uid, \
- RSPAMD_LOG_FUNC, \
- __VA_ARGS__)
-
- INIT_LOG_MODULE(composites)
-
-
- namespace rspamd::composites {
- static rspamd_expression_atom_t *rspamd_composite_expr_parse(const char *line, gsize len,
- rspamd_mempool_t *pool,
- gpointer ud, GError **err);
- static double rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom);
- static int rspamd_composite_expr_priority(rspamd_expression_atom_t *atom);
- static void rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom);
- static void composites_foreach_callback(gpointer key, gpointer value, void *data);
-
- const struct rspamd_atom_subr composite_expr_subr = {
- .parse = rspamd::composites::rspamd_composite_expr_parse,
- .process = rspamd::composites::rspamd_composite_expr_process,
- .priority = rspamd::composites::rspamd_composite_expr_priority,
- .destroy = rspamd::composites::rspamd_composite_expr_destroy};
- }// namespace rspamd::composites
-
- namespace rspamd::composites {
-
- static constexpr const double epsilon = 0.00001;
-
- struct symbol_remove_data {
- const char *sym;
- struct rspamd_composite *comp;
- GNode *parent;
- std::uint8_t action;
- };
-
- struct composites_data {
- struct rspamd_task *task;
- struct rspamd_composite *composite;
- struct rspamd_scan_result *metric_res;
- ankerl::unordered_dense::map<std::string_view,
- std::vector<symbol_remove_data>>
- symbols_to_remove;
- std::vector<bool> checked;
-
- explicit composites_data(struct rspamd_task *task, struct rspamd_scan_result *mres)
- : task(task), composite(nullptr), metric_res(mres)
- {
- checked.resize(rspamd_composites_manager_nelts(task->cfg->composites_manager) * 2,
- false);
- }
- };
-
- struct rspamd_composite_option_match {
- rspamd_regexp_t *re;
- std::string match;
-
- explicit rspamd_composite_option_match(const char *start, std::size_t len) noexcept
- : re(nullptr), match(start, len)
- {
- }
-
- explicit rspamd_composite_option_match(rspamd_regexp_t *re) noexcept
- : re(rspamd_regexp_ref(re))
- {
- }
-
- rspamd_composite_option_match(const rspamd_composite_option_match &other) noexcept
- {
- if (other.re) {
- re = rspamd_regexp_ref(other.re);
- }
- else {
- match = other.match;
- re = nullptr;
- }
- }
- rspamd_composite_option_match &operator=(const rspamd_composite_option_match &other) noexcept
- {
- if (other.re) {
- if (re) {
- rspamd_regexp_unref(re);
- }
- re = rspamd_regexp_ref(other.re);
- }
- else {
- if (re) {
- rspamd_regexp_unref(re);
- }
- re = nullptr;
- match = other.match;
- }
-
- return *this;
- }
-
- rspamd_composite_option_match(rspamd_composite_option_match &&other) noexcept
- {
- if (other.re) {
- re = other.re;
- other.re = nullptr;
- }
- else {
- re = nullptr;
- match = std::move(other.match);
- }
- }
- rspamd_composite_option_match &operator=(rspamd_composite_option_match &&other) noexcept
- {
- if (other.re) {
- if (re) {
- rspamd_regexp_unref(re);
- }
- re = other.re;
- other.re = nullptr;
- }
- else {
- if (re) {
- rspamd_regexp_unref(re);
- }
- re = nullptr;
- match = std::move(other.match);
- }
-
- return *this;
- }
-
- ~rspamd_composite_option_match()
- {
- if (re) {
- rspamd_regexp_unref(re);
- }
- }
-
- auto match_opt(const std::string_view &data) const -> bool
- {
- if (re) {
- return rspamd_regexp_search(re,
- data.data(), data.size(),
- nullptr, nullptr, false, nullptr);
- }
- else {
- return data == match;
- }
- }
-
- auto get_pat() const -> std::string_view
- {
- if (re) {
- return std::string_view(rspamd_regexp_get_pattern(re));
- }
- else {
- return match;
- }
- }
- };
-
- enum class rspamd_composite_atom_type {
- ATOM_UNKNOWN,
- ATOM_COMPOSITE,
- ATOM_PLAIN
- };
-
- struct rspamd_composite_atom {
- std::string symbol;
- std::string_view norm_symbol;
- rspamd_composite_atom_type comp_type = rspamd_composite_atom_type::ATOM_UNKNOWN;
- const struct rspamd_composite *ncomp; /* underlying composite */
- std::vector<rspamd_composite_option_match> opts;
- };
-
- enum rspamd_composite_action : std::uint8_t {
- RSPAMD_COMPOSITE_UNTOUCH = 0,
- RSPAMD_COMPOSITE_REMOVE_SYMBOL = (1u << 0),
- RSPAMD_COMPOSITE_REMOVE_WEIGHT = (1u << 1),
- RSPAMD_COMPOSITE_REMOVE_FORCED = (1u << 2)
- };
-
- static GQuark
- rspamd_composites_quark(void)
- {
- return g_quark_from_static_string("composites");
- }
-
- static auto
- rspamd_composite_atom_dtor(void *ptr)
- {
- auto *atom = reinterpret_cast<rspamd_composite_atom *>(ptr);
-
- delete atom;
- }
-
- static rspamd_expression_atom_t *
- rspamd_composite_expr_parse(const char *line, gsize len,
- rspamd_mempool_t *pool,
- gpointer ud, GError **err)
- {
- gsize clen = 0;
- const char *p, *end;
- enum composite_expr_state {
- comp_state_read_symbol = 0,
- comp_state_read_obrace,
- comp_state_read_option,
- comp_state_read_regexp,
- comp_state_read_regexp_end,
- comp_state_read_comma,
- comp_state_read_ebrace,
- comp_state_read_end
- } state = comp_state_read_symbol;
-
- end = line + len;
- p = line;
-
- /* Find length of the atom using a reduced state machine */
- while (p < end) {
- if (state == comp_state_read_end) {
- break;
- }
-
- switch (state) {
- case comp_state_read_symbol:
- clen = rspamd_memcspn(p, "[; \t()><!|&\n", len);
- p += clen;
-
- if (*p == '[') {
- state = comp_state_read_obrace;
- }
- else {
- state = comp_state_read_end;
- }
- break;
- case comp_state_read_obrace:
- p++;
-
- if (*p == '/') {
- p++;
- state = comp_state_read_regexp;
- }
- else {
- state = comp_state_read_option;
- }
- break;
- case comp_state_read_regexp:
- if (*p == '\\' && p + 1 < end) {
- /* Escaping */
- p++;
- }
- else if (*p == '/') {
- /* End of regexp, possible flags */
- state = comp_state_read_regexp_end;
- }
- p++;
- break;
- case comp_state_read_option:
- case comp_state_read_regexp_end:
- if (*p == ',') {
- p++;
- state = comp_state_read_comma;
- }
- else if (*p == ']') {
- state = comp_state_read_ebrace;
- }
- else {
- p++;
- }
- break;
- case comp_state_read_comma:
- if (!g_ascii_isspace(*p)) {
- if (*p == '/') {
- state = comp_state_read_regexp;
- }
- else if (*p == ']') {
- state = comp_state_read_ebrace;
- }
- else {
- state = comp_state_read_option;
- }
- }
- else {
- /* Skip spaces after comma */
- p++;
- }
- break;
- case comp_state_read_ebrace:
- p++;
- state = comp_state_read_end;
- break;
- case comp_state_read_end:
- g_assert_not_reached();
- }
- }
-
- if (state != comp_state_read_end) {
- g_set_error(err, rspamd_composites_quark(), 100, "invalid composite: %s;"
- "parser stopped in state %d",
- line, state);
- return NULL;
- }
-
- clen = p - line;
- p = line;
- state = comp_state_read_symbol;
-
- auto *atom = new rspamd_composite_atom;
- auto *res = rspamd_mempool_alloc0_type(pool, rspamd_expression_atom_t);
- res->len = clen;
- res->str = line;
-
- /* Full state machine to fill a composite atom */
- const char *opt_start = nullptr;
-
- while (p < end) {
- if (state == comp_state_read_end) {
- break;
- }
-
- switch (state) {
- case comp_state_read_symbol: {
- clen = rspamd_memcspn(p, "[; \t()><!|&\n", len);
- p += clen;
-
- if (*p == '[') {
- state = comp_state_read_obrace;
- }
- else {
- state = comp_state_read_end;
- }
-
- atom->symbol = std::string{line, clen};
- auto norm_start = std::find_if(atom->symbol.begin(), atom->symbol.end(),
- [](char c) { return g_ascii_isalnum(c); });
- if (norm_start == atom->symbol.end()) {
- msg_err_pool("invalid composite atom: %s", atom->symbol.c_str());
- }
- atom->norm_symbol = make_string_view_from_it(norm_start, atom->symbol.end());
- break;
- }
- case comp_state_read_obrace:
- p++;
-
- if (*p == '/') {
- opt_start = p;
- p++; /* Starting slash */
- state = comp_state_read_regexp;
- }
- else {
- state = comp_state_read_option;
- opt_start = p;
- }
-
- break;
- case comp_state_read_regexp:
- if (*p == '\\' && p + 1 < end) {
- /* Escaping */
- p++;
- }
- else if (*p == '/') {
- /* End of regexp, possible flags */
- state = comp_state_read_regexp_end;
- }
- p++;
- break;
- case comp_state_read_option:
- if (*p == ',' || *p == ']') {
- /* Plain match, copy option to ensure string_view validity */
- int opt_len = p - opt_start;
- auto *opt_buf = rspamd_mempool_alloc_buffer(pool, opt_len + 1);
- rspamd_strlcpy(opt_buf, opt_start, opt_len + 1);
- opt_buf = g_strstrip(opt_buf);
- atom->opts.emplace_back(opt_buf, strlen(opt_buf));
-
- if (*p == ',') {
- p++;
- state = comp_state_read_comma;
- }
- else {
- state = comp_state_read_ebrace;
- }
- }
- else {
- p++;
- }
- break;
- case comp_state_read_regexp_end:
- if (*p == ',' || *p == ']') {
- auto opt_len = p - opt_start;
- rspamd_regexp_t *re;
- GError *re_err = nullptr;
-
- re = rspamd_regexp_new_len(opt_start, opt_len, nullptr, &re_err);
-
- if (re == nullptr) {
- msg_err_pool("cannot create regexp from string %*s: %e",
- opt_len, opt_start, re_err);
-
- g_error_free(re_err);
- }
- else {
- atom->opts.emplace_back(re);
- rspamd_regexp_unref(re);
- }
-
- if (*p == ',') {
- p++;
- state = comp_state_read_comma;
- }
- else {
- state = comp_state_read_ebrace;
- }
- }
- else {
- p++;
- }
- break;
- case comp_state_read_comma:
- if (!g_ascii_isspace(*p)) {
- if (*p == '/') {
- state = comp_state_read_regexp;
- opt_start = p;
- }
- else if (*p == ']') {
- state = comp_state_read_ebrace;
- }
- else {
- opt_start = p;
- state = comp_state_read_option;
- }
- }
- else {
- /* Skip spaces after comma */
- p++;
- }
- break;
- case comp_state_read_ebrace:
- p++;
- state = comp_state_read_end;
- break;
- case comp_state_read_end:
- g_assert_not_reached();
- }
- }
-
- res->data = atom;
-
- return res;
- }
-
- static auto
- process_symbol_removal(rspamd_expression_atom_t *atom,
- struct composites_data *cd,
- struct rspamd_symbol_result *ms,
- const std::string &beg) -> void
- {
- struct rspamd_task *task = cd->task;
-
- if (ms == nullptr) {
- return;
- }
-
- /*
- * At this point we know that we need to do something about this symbol,
- * however, we don't know whether we need to delete it unfortunately,
- * that depends on the later decisions when the complete expression is
- * evaluated.
- */
- auto rd_it = cd->symbols_to_remove.find(ms->name);
-
- auto fill_removal_structure = [&](symbol_remove_data &nrd) {
- nrd.sym = ms->name;
-
- /* By default remove symbols */
- switch (cd->composite->policy) {
- case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_ALL:
- default:
- nrd.action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL | RSPAMD_COMPOSITE_REMOVE_WEIGHT);
- break;
- case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL:
- nrd.action = RSPAMD_COMPOSITE_REMOVE_SYMBOL;
- break;
- case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT:
- nrd.action = RSPAMD_COMPOSITE_REMOVE_WEIGHT;
- break;
- case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_LEAVE:
- nrd.action = 0;
- break;
- }
-
- for (auto t: beg) {
- if (t == '~') {
- nrd.action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL;
- }
- else if (t == '-') {
- nrd.action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT |
- RSPAMD_COMPOSITE_REMOVE_SYMBOL);
- }
- else if (t == '^') {
- nrd.action |= RSPAMD_COMPOSITE_REMOVE_FORCED;
- }
- else {
- break;
- }
- }
-
- nrd.comp = cd->composite;
- nrd.parent = atom->parent;
- };
-
- if (rd_it != cd->symbols_to_remove.end()) {
- fill_removal_structure(rd_it->second.emplace_back());
- msg_debug_composites("%s: added symbol %s to removal: %d policy, from composite %s",
- cd->metric_res->name,
- ms->name, rd_it->second.back().action,
- cd->composite->sym.c_str());
- }
- else {
- std::vector<symbol_remove_data> nrd;
- fill_removal_structure(nrd.emplace_back());
- msg_debug_composites("%s: added symbol %s to removal: %d policy, from composite %s",
- cd->metric_res->name,
- ms->name, nrd.front().action,
- cd->composite->sym.c_str());
- cd->symbols_to_remove[ms->name] = std::move(nrd);
- }
- }
-
- static auto
- process_single_symbol(struct composites_data *cd,
- std::string_view sym,
- struct rspamd_symbol_result **pms,
- struct rspamd_composite_atom *atom) -> double
- {
- struct rspamd_symbol_result *ms = nullptr;
- double rc = 0;
- struct rspamd_task *task = cd->task;
-
- if ((ms = rspamd_task_find_symbol_result(cd->task, sym.data(), cd->metric_res)) == nullptr) {
- msg_debug_composites("not found symbol %s in composite %s", sym.data(),
- cd->composite->sym.c_str());
-
- if (G_UNLIKELY(atom->comp_type == rspamd_composite_atom_type::ATOM_UNKNOWN)) {
- const struct rspamd_composite *ncomp;
-
- if ((ncomp = COMPOSITE_MANAGER_FROM_PTR(task->cfg->composites_manager)->find(sym)) != NULL) {
- atom->comp_type = rspamd_composite_atom_type::ATOM_COMPOSITE;
- atom->ncomp = ncomp;
- }
- else {
- atom->comp_type = rspamd_composite_atom_type::ATOM_PLAIN;
- }
- }
-
- if (atom->comp_type == rspamd_composite_atom_type::ATOM_COMPOSITE) {
- msg_debug_composites("symbol %s for composite %s is another composite",
- sym.data(), cd->composite->sym.c_str());
-
- if (!cd->checked[atom->ncomp->id * 2]) {
- msg_debug_composites("composite dependency %s for %s is not checked",
- sym.data(), cd->composite->sym.c_str());
- /* Set checked for this symbol to avoid cyclic references */
- cd->checked[cd->composite->id * 2] = true;
- auto *saved = cd->composite; /* Save the current composite */
- composites_foreach_callback((gpointer) atom->ncomp->sym.c_str(),
- (gpointer) atom->ncomp, (gpointer) cd);
- /* Restore state */
- cd->composite = saved;
- cd->checked[cd->composite->id * 2] = false;
-
- ms = rspamd_task_find_symbol_result(cd->task, sym.data(),
- cd->metric_res);
- }
- else {
- /*
- * XXX: in case of cyclic references this would return 0
- */
- if (cd->checked[atom->ncomp->id * 2 + 1]) {
- ms = rspamd_task_find_symbol_result(cd->task, sym.data(),
- cd->metric_res);
- }
- }
- }
- }
-
- if (ms) {
- msg_debug_composites("found symbol %s in composite %s, weight: %.3f",
- sym.data(), cd->composite->sym.c_str(), ms->score);
-
- /* Now check options */
- for (const auto &cur_opt: atom->opts) {
- struct rspamd_symbol_option *opt;
- auto found = false;
-
- DL_FOREACH(ms->opts_head, opt)
- {
- if (cur_opt.match_opt({opt->option, opt->optlen})) {
- found = true;
- break;
- }
- }
-
- if (!found) {
- auto pat = cur_opt.get_pat();
- msg_debug_composites("symbol %s in composite %s misses required option %*s",
- sym.data(),
- cd->composite->sym.c_str(),
- (int) pat.size(), pat.data());
- ms = nullptr;
-
- break;
- }
- }
-
- if (ms) {
- if (ms->score == 0) {
- rc = epsilon * 16.0; /* Distinguish from 0 */
- }
- else {
- rc = ms->score;
- }
- }
- }
-
- *pms = ms;
- return rc;
- }
-
- static auto
- rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom) -> double
- {
- struct composites_data *cd = (struct composites_data *) ud;
- struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *) atom->data;
-
- struct rspamd_symbol_result *ms = NULL;
- struct rspamd_task *task = cd->task;
- double rc = 0;
-
- if (cd->checked[cd->composite->id * 2]) {
- /* We have already checked this composite, so just return its value */
- if (cd->checked[cd->composite->id * 2 + 1]) {
- ms = rspamd_task_find_symbol_result(cd->task,
- comp_atom->norm_symbol.data(),
- cd->metric_res);
- }
-
- if (ms) {
- if (ms->score == 0) {
- rc = epsilon; /* Distinguish from 0 */
- }
- else {
- /* Treat negative and positive scores equally... */
- rc = fabs(ms->score);
- }
- }
-
- msg_debug_composites("composite %s is already checked, result: %.2f",
- cd->composite->sym.c_str(), rc);
-
- return rc;
- }
-
- /* Note: sym is zero terminated as it is a view on std::string */
- auto sym = comp_atom->norm_symbol;
- auto group_process_functor = [&](auto cond, int sub_start) -> double {
- auto max = 0.;
- GHashTableIter it;
- gpointer k, v;
- struct rspamd_symbols_group *gr;
-
- gr = (struct rspamd_symbols_group *) g_hash_table_lookup(cd->task->cfg->groups,
- sym.substr(sub_start).data());
-
- if (gr != nullptr) {
- g_hash_table_iter_init(&it, gr->symbols);
-
- while (g_hash_table_iter_next(&it, &k, &v)) {
- auto *sdef = (rspamd_symbol *) v;
-
- if (cond(sdef->score)) {
- rc = process_single_symbol(cd,
- std::string_view(sdef->name),
- &ms,
- comp_atom);
-
- if (fabs(rc) > epsilon) {
- process_symbol_removal(atom,
- cd,
- ms,
- comp_atom->symbol);
-
- if (fabs(rc) > max) {
- max = fabs(rc);
- }
- }
- }
- }
- }
-
- return max;
- };
-
- if (sym.size() > 2) {
- if (sym.substr(0, 2) == "g:") {
- rc = group_process_functor([](auto _) { return true; }, 2);
- }
- else if (sym.substr(0, 3) == "g+:") {
- /* Group, positive symbols only */
- rc = group_process_functor([](auto sc) { return sc > 0.; }, 3);
- }
- else if (sym.substr(0, 3) == "g-:") {
- rc = group_process_functor([](auto sc) { return sc < 0.; }, 3);
- }
- else {
- rc = process_single_symbol(cd, sym, &ms, comp_atom);
-
- if (fabs(rc) > epsilon) {
- process_symbol_removal(atom,
- cd,
- ms,
- comp_atom->symbol);
- }
- }
- }
- else {
- rc = process_single_symbol(cd, sym, &ms, comp_atom);
-
- if (fabs(rc) > epsilon) {
- process_symbol_removal(atom,
- cd,
- ms,
- comp_atom->symbol);
- }
- }
-
- msg_debug_composites("%s: result for atom %s in composite %s is %.4f",
- cd->metric_res->name,
- comp_atom->norm_symbol.data(),
- cd->composite->sym.c_str(), rc);
-
- return rc;
- }
-
- /*
- * We don't have preferences for composites
- */
- static int
- rspamd_composite_expr_priority(rspamd_expression_atom_t *atom)
- {
- return 0;
- }
-
- static void
- rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom)
- {
- rspamd_composite_atom_dtor(atom->data);
- }
-
- static void
- composites_foreach_callback(gpointer key, gpointer value, void *data)
- {
- auto *cd = (struct composites_data *) data;
- auto *comp = (struct rspamd_composite *) value;
- auto *str_key = (const char *) key;
- struct rspamd_task *task;
- double rc;
-
- cd->composite = comp;
- task = cd->task;
-
- msg_debug_composites("process composite %s", str_key);
-
- if (!cd->checked[cd->composite->id * 2]) {
- if (rspamd_symcache_is_checked(cd->task, cd->task->cfg->cache,
- str_key)) {
- msg_debug_composites("composite %s is checked in symcache but not "
- "in composites bitfield",
- cd->composite->sym.c_str());
- cd->checked[comp->id * 2] = true;
- cd->checked[comp->id * 2 + 1] = false;
- }
- else {
- if (rspamd_task_find_symbol_result(cd->task, str_key,
- cd->metric_res) != nullptr) {
- /* Already set, no need to check */
- msg_debug_composites("composite %s is already in metric "
- "in composites bitfield",
- cd->composite->sym.c_str());
- cd->checked[comp->id * 2] = true;
- cd->checked[comp->id * 2 + 1] = true;
-
- return;
- }
-
- msg_debug_composites("%s: start processing composite %s",
- cd->metric_res->name,
- cd->composite->sym.c_str());
-
- rc = rspamd_process_expression(comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT,
- cd);
-
- /* Checked bit */
- cd->checked[comp->id * 2] = true;
-
- msg_debug_composites("%s: final result for composite %s is %.4f",
- cd->metric_res->name,
- cd->composite->sym.c_str(), rc);
-
- /* Result bit */
- if (fabs(rc) > epsilon) {
- cd->checked[comp->id * 2 + 1] = true;
- rspamd_task_insert_result_full(cd->task, str_key, 1.0, NULL,
- RSPAMD_SYMBOL_INSERT_SINGLE, cd->metric_res);
- }
- else {
- cd->checked[comp->id * 2 + 1] = false;
- }
- }
- }
- }
-
-
- static auto
- remove_symbols(const composites_data &cd, const std::vector<symbol_remove_data> &rd) -> void
- {
- struct rspamd_task *task = cd.task;
- gboolean skip = FALSE,
- has_valid_op = FALSE,
- want_remove_score = TRUE,
- want_remove_symbol = TRUE,
- want_forced = FALSE;
- const char *disable_score_reason = "no policy",
- *disable_symbol_reason = "no policy";
-
- task = cd.task;
-
- for (const auto &cur: rd) {
- if (!cd.checked[cur.comp->id * 2 + 1]) {
- continue;
- }
- /*
- * First of all exclude all elements with any parent that is negation:
- * !A || B -> here we can have both !A and B matched, but we do *NOT*
- * want to remove symbol in that case
- */
- auto *par = cur.parent;
- skip = FALSE;
-
- while (par) {
- if (rspamd_expression_node_is_op(par, OP_NOT)) {
- skip = TRUE;
- break;
- }
-
- par = par->parent;
- }
-
- if (skip) {
- continue;
- }
-
- has_valid_op = TRUE;
- /*
- * Now we can try to remove symbols/scores
- *
- * We apply the following logic here:
- * - if no composites would like to save score then we remove score
- * - if no composites would like to save symbol then we remove symbol
- */
- if (!want_forced) {
- if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_SYMBOL)) {
- want_remove_symbol = FALSE;
- disable_symbol_reason = cur.comp->sym.c_str();
- }
-
- if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_WEIGHT)) {
- want_remove_score = FALSE;
- disable_score_reason = cur.comp->sym.c_str();
- }
-
- if (cur.action & RSPAMD_COMPOSITE_REMOVE_FORCED) {
- want_forced = TRUE;
- disable_symbol_reason = cur.comp->sym.c_str();
- disable_score_reason = cur.comp->sym.c_str();
- }
- }
- }
-
- auto *ms = rspamd_task_find_symbol_result(task, rd.front().sym, cd.metric_res);
-
- if (has_valid_op && ms && !(ms->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) {
-
- if (want_remove_score || want_forced) {
- msg_debug_composites("%s: %s remove symbol weight for %s (was %.2f), "
- "score removal affected by %s, symbol removal affected by %s",
- cd.metric_res->name,
- (want_forced ? "forced" : "normal"), rd.front().sym, ms->score,
- disable_score_reason, disable_symbol_reason);
- cd.metric_res->score -= ms->score;
- ms->score = 0.0;
- }
-
- if (want_remove_symbol || want_forced) {
- ms->flags |= RSPAMD_SYMBOL_RESULT_IGNORED;
- msg_debug_composites("%s: %s remove symbol %s (score %.2f), "
- "score removal affected by %s, symbol removal affected by %s",
- cd.metric_res->name,
- (want_forced ? "forced" : "normal"), rd.front().sym, ms->score,
- disable_score_reason, disable_symbol_reason);
- }
- }
- }
-
- static void
- composites_metric_callback(struct rspamd_task *task)
- {
- std::vector<composites_data> comp_data_vec;
- struct rspamd_scan_result *mres;
-
- comp_data_vec.reserve(1);
-
- DL_FOREACH(task->result, mres)
- {
- auto &cd = comp_data_vec.emplace_back(task, mres);
-
- /* Process metric result */
- rspamd_symcache_composites_foreach(task,
- task->cfg->cache,
- composites_foreach_callback,
- &cd);
- }
-
- for (const auto &cd: comp_data_vec) {
- /* Remove symbols that are in composites */
- for (const auto &srd_it: cd.symbols_to_remove) {
- remove_symbols(cd, srd_it.second);
- }
- }
- }
-
- }// namespace rspamd::composites
-
-
- void rspamd_composites_process_task(struct rspamd_task *task)
- {
- if (task->result && !RSPAMD_TASK_IS_SKIPPED(task)) {
- rspamd::composites::composites_metric_callback(task);
- }
- }
|