aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver/composites
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2021-07-15 10:54:08 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2021-07-15 10:54:08 +0100
commitb2c36feea701c6685d83b8c4e6282fe7e307609d (patch)
treed28781581fb80bcf97b631749067e1e8bfac76b4 /src/libserver/composites
parentacaf4fe66e6b09938437ab992c015da1a9ac9eaf (diff)
downloadrspamd-b2c36feea701c6685d83b8c4e6282fe7e307609d.tar.gz
rspamd-b2c36feea701c6685d83b8c4e6282fe7e307609d.zip
[Rework] Composites: Start rework of the composites framework
Diffstat (limited to 'src/libserver/composites')
-rw-r--r--src/libserver/composites/composites.cxx1019
-rw-r--r--src/libserver/composites/composites.h44
2 files changed, 1063 insertions, 0 deletions
diff --git a/src/libserver/composites/composites.cxx b/src/libserver/composites/composites.cxx
new file mode 100644
index 000000000..0d9534681
--- /dev/null
+++ b/src/libserver/composites/composites.cxx
@@ -0,0 +1,1019 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "logger.h"
+#include "expression.h"
+#include "task.h"
+#include "utlist.h"
+#include "scan_result.h"
+#include "composites.h"
+
+#include <cmath>
+#include <vector>
+#include <variant>
+#include "contrib/robin-hood/robin_hood.h"
+
+#define msg_err_composites(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
+ "composites", task->task_pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_warn_composites(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
+ "composites", task->task_pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_info_composites(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \
+ "composites", task->task_pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+
+#define msg_debug_composites(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \
+ rspamd_composites_log_id, "composites", task->task_pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(composites)
+
+
+namespace rspamd::composites {
+static rspamd_expression_atom_t *rspamd_composite_expr_parse(const gchar *line, gsize len,
+ rspamd_mempool_t *pool,
+ gpointer ud, GError **err);
+static gdouble rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom);
+static gint rspamd_composite_expr_priority(rspamd_expression_atom_t *atom);
+static void rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom);
+}
+
+const struct rspamd_atom_subr composite_expr_subr = {
+ .parse = rspamd::composites::rspamd_composite_expr_parse,
+ .process = rspamd::composites::rspamd_composite_expr_process,
+ .priority = rspamd::composites::rspamd_composite_expr_priority,
+ .destroy = rspamd::composites::rspamd_composite_expr_destroy
+};
+
+namespace rspamd::composites {
+
+enum class rspamd_composite_policy {
+ RSPAMD_COMPOSITE_POLICY_REMOVE_ALL = 0,
+ RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL,
+ RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT,
+ RSPAMD_COMPOSITE_POLICY_LEAVE,
+ RSPAMD_COMPOSITE_POLICY_UNKNOWN
+};
+
+/**
+ * Static composites structure
+ */
+struct rspamd_composite {
+ std::string str_expr;
+ std::string sym;
+ struct rspamd_expression *expr;
+ gint id;
+ rspamd_composite_policy policy;
+};
+
+struct composites_data {
+ struct rspamd_task *task;
+ struct rspamd_composite *composite;
+ struct rspamd_scan_result *metric_res;
+ GHashTable *symbols_to_remove;
+ guint8 *checked;
+ struct composites_data *next;
+};
+
+struct rspamd_composite_option_match {
+ std::variant<rspamd_regexp_t *, std::string> match;
+
+ ~rspamd_composite_option_match() {
+ if (std::holds_alternative<rspamd_regexp_t *>(match)) {
+ rspamd_regexp_unref(std::get<rspamd_regexp_t *>(match));
+ }
+ }
+};
+
+enum class rspamd_composite_atom_type {
+ ATOM_UNKNOWN,
+ ATOM_COMPOSITE,
+ ATOM_PLAIN
+};
+struct rspamd_composite_atom {
+ std::string symbol;
+ rspamd_composite_atom_type comp_type;
+ struct rspamd_composite *ncomp; /* underlying composite */
+ std::vector<rspamd_composite_option_match> opts;
+};
+
+enum rspamd_composite_action : std::uint8_t {
+ RSPAMD_COMPOSITE_UNTOUCH = 0,
+ RSPAMD_COMPOSITE_REMOVE_SYMBOL = (1u << 0),
+ RSPAMD_COMPOSITE_REMOVE_WEIGHT = (1u << 1),
+ RSPAMD_COMPOSITE_REMOVE_FORCED = (1u << 2)
+};
+
+struct symbol_remove_data {
+ const char *sym;
+ struct rspamd_composite *comp;
+ GNode *parent;
+ std::uint8_t action;
+ struct symbol_remove_data *prev, *next;
+};
+
+static GQuark
+rspamd_composites_quark (void)
+{
+ return g_quark_from_static_string ("composites");
+}
+
+static rspamd_expression_atom_t *
+rspamd_composite_expr_parse(const gchar *line, gsize len,
+ rspamd_mempool_t *pool,
+ gpointer ud, GError **err)
+{
+ gsize clen = 0;
+ rspamd_expression_atom_t *res;
+ struct rspamd_composite_atom *atom;
+ const gchar *p, *end;
+ enum composite_expr_state {
+ comp_state_read_symbol = 0,
+ comp_state_read_obrace,
+ comp_state_read_option,
+ comp_state_read_regexp,
+ comp_state_read_regexp_end,
+ comp_state_read_comma,
+ comp_state_read_ebrace,
+ comp_state_read_end
+ } state = comp_state_read_symbol;
+
+ end = line + len;
+ p = line;
+
+ /* Find length of the atom using a reduced state machine */
+ while (p < end) {
+ if (state == comp_state_read_end) {
+ break;
+ }
+
+ switch (state) {
+ case comp_state_read_symbol:
+ clen = rspamd_memcspn(p, "[; \t()><!|&\n", len);
+ p += clen;
+
+ if (*p == '[') {
+ state = comp_state_read_obrace;
+ }
+ else {
+ state = comp_state_read_end;
+ }
+ break;
+ case comp_state_read_obrace:
+ p++;
+
+ if (*p == '/') {
+ p++;
+ state = comp_state_read_regexp;
+ }
+ else {
+ state = comp_state_read_option;
+ }
+ break;
+ case comp_state_read_regexp:
+ if (*p == '\\' && p + 1 < end) {
+ /* Escaping */
+ p++;
+ }
+ else if (*p == '/') {
+ /* End of regexp, possible flags */
+ state = comp_state_read_regexp_end;
+ }
+ p++;
+ break;
+ case comp_state_read_option:
+ case comp_state_read_regexp_end:
+ if (*p == ',') {
+ p++;
+ state = comp_state_read_comma;
+ }
+ else if (*p == ']') {
+ state = comp_state_read_ebrace;
+ }
+ else {
+ p++;
+ }
+ break;
+ case comp_state_read_comma:
+ if (!g_ascii_isspace (*p)) {
+ if (*p == '/') {
+ state = comp_state_read_regexp;
+ }
+ else if (*p == ']') {
+ state = comp_state_read_ebrace;
+ }
+ else {
+ state = comp_state_read_option;
+ }
+ }
+ else {
+ /* Skip spaces after comma */
+ p++;
+ }
+ break;
+ case comp_state_read_ebrace:
+ p++;
+ state = comp_state_read_end;
+ break;
+ case comp_state_read_end:
+ g_assert_not_reached ();
+ }
+ }
+
+ if (state != comp_state_read_end) {
+ g_set_error(err, rspamd_composites_quark(), 100, "invalid composite: %s;"
+ "parser stopped in state %d",
+ line, state);
+ return NULL;
+ }
+
+ clen = p - line;
+ p = line;
+ state = comp_state_read_symbol;
+
+ atom = rspamd_mempool_alloc0 (pool, sizeof(*atom));
+ atom->comp_type = ATOM_UNKNOWN;
+ res = rspamd_mempool_alloc0 (pool, sizeof(*res));
+ res->len = clen;
+ res->str = line;
+
+ /* Full state machine to fill a composite atom */
+ const gchar *opt_start = NULL;
+
+ while (p < end) {
+ struct rspamd_composite_option_match *opt_match;
+
+ if (state == comp_state_read_end) {
+ break;
+ }
+
+ switch (state) {
+ case comp_state_read_symbol:
+ clen = rspamd_memcspn(p, "[; \t()><!|&\n", len);
+ p += clen;
+
+ if (*p == '[') {
+ state = comp_state_read_obrace;
+ }
+ else {
+ state = comp_state_read_end;
+ }
+
+ atom->symbol = rspamd_mempool_alloc (pool, clen + 1);
+ rspamd_strlcpy(atom->symbol, line, clen + 1);
+
+ break;
+ case comp_state_read_obrace:
+ p++;
+
+ if (*p == '/') {
+ opt_start = p;
+ p++; /* Starting slash */
+ state = comp_state_read_regexp;
+ }
+ else {
+ state = comp_state_read_option;
+ opt_start = p;
+ }
+
+ break;
+ case comp_state_read_regexp:
+ if (*p == '\\' && p + 1 < end) {
+ /* Escaping */
+ p++;
+ }
+ else if (*p == '/') {
+ /* End of regexp, possible flags */
+ state = comp_state_read_regexp_end;
+ }
+ p++;
+ break;
+ case comp_state_read_option:
+ if (*p == ',' || *p == ']') {
+ opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match));
+ /* Plain match */
+ gchar *opt_buf;
+ gint opt_len = p - opt_start;
+
+ opt_buf = rspamd_mempool_alloc (pool, opt_len + 1);
+ rspamd_strlcpy(opt_buf, opt_start, opt_len + 1);
+
+ opt_match->data.match = opt_buf;
+ opt_match->type = RSPAMD_COMPOSITE_OPTION_PLAIN;
+
+ DL_APPEND (atom->opts, opt_match);
+
+ if (*p == ',') {
+ p++;
+ state = comp_state_read_comma;
+ }
+ else {
+ state = comp_state_read_ebrace;
+ }
+ }
+ else {
+ p++;
+ }
+ break;
+ case comp_state_read_regexp_end:
+ if (*p == ',' || *p == ']') {
+ opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match));
+ /* Plain match */
+ gchar *opt_buf;
+ gint opt_len = p - opt_start;
+
+ opt_buf = rspamd_mempool_alloc (pool, opt_len + 1);
+ rspamd_strlcpy(opt_buf, opt_start, opt_len + 1);
+
+ rspamd_regexp_t *re;
+ GError *re_err = NULL;
+
+ re = rspamd_regexp_new(opt_buf, NULL, &re_err);
+
+ if (re == NULL) {
+ msg_err_pool ("cannot create regexp from string %s: %e",
+ opt_buf, re_err);
+
+ g_error_free(re_err);
+ }
+ else {
+ rspamd_mempool_add_destructor (pool,
+ (rspamd_mempool_destruct_t) rspamd_regexp_unref,
+ re);
+ opt_match->data.re = re;
+ opt_match->type = RSPAMD_COMPOSITE_OPTION_RE;
+
+ DL_APPEND (atom->opts, opt_match);
+ }
+
+ if (*p == ',') {
+ p++;
+ state = comp_state_read_comma;
+ }
+ else {
+ state = comp_state_read_ebrace;
+ }
+ }
+ else {
+ p++;
+ }
+ break;
+ case comp_state_read_comma:
+ if (!g_ascii_isspace (*p)) {
+ if (*p == '/') {
+ state = comp_state_read_regexp;
+ opt_start = p;
+ }
+ else if (*p == ']') {
+ state = comp_state_read_ebrace;
+ }
+ else {
+ opt_start = p;
+ state = comp_state_read_option;
+ }
+ }
+ else {
+ /* Skip spaces after comma */
+ p++;
+ }
+ break;
+ case comp_state_read_ebrace:
+ p++;
+ state = comp_state_read_end;
+ break;
+ case comp_state_read_end:
+ g_assert_not_reached ();
+ }
+ }
+
+ res->data = atom;
+
+ return res;
+}
+
+}
+
+static void composites_foreach_callback (gpointer key, gpointer value, void *data);
+
+
+static gdouble
+rspamd_composite_process_single_symbol (struct composites_data *cd,
+ const gchar *sym,
+ struct rspamd_symbol_result **pms,
+ struct rspamd_composite_atom *atom)
+{
+ struct rspamd_symbol_result *ms = NULL;
+ gdouble rc = 0;
+ struct rspamd_task *task = cd->task;
+
+ if ((ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res)) == NULL) {
+ msg_debug_composites ("not found symbol %s in composite %s", sym,
+ cd->composite->sym);
+
+ if (atom->comp_type == ATOM_UNKNOWN) {
+ struct rspamd_composite *ncomp;
+
+ if ((ncomp =
+ g_hash_table_lookup (cd->task->cfg->composite_symbols,
+ sym)) != NULL) {
+ atom->comp_type = ATOM_COMPOSITE;
+ atom->ncomp = ncomp;
+ }
+ else {
+ atom->comp_type = ATOM_PLAIN;
+ }
+ }
+
+ if (atom->comp_type == ATOM_COMPOSITE) {
+ msg_debug_composites ("symbol %s for composite %s is another composite",
+ sym, cd->composite->sym);
+
+ if (isclr (cd->checked, atom->ncomp->id * 2)) {
+ struct rspamd_composite *saved;
+
+ msg_debug_composites ("composite dependency %s for %s is not checked",
+ sym, cd->composite->sym);
+ /* Set checked for this symbol to avoid cyclic references */
+ setbit (cd->checked, cd->composite->id * 2);
+ saved = cd->composite; /* Save the current composite */
+ composites_foreach_callback ((gpointer)atom->ncomp->sym, atom->ncomp, cd);
+
+ /* Restore state */
+ cd->composite = saved;
+ clrbit (cd->checked, cd->composite->id * 2);
+
+ ms = rspamd_task_find_symbol_result (cd->task, sym,
+ cd->metric_res);
+ }
+ else {
+ /*
+ * XXX: in case of cyclic references this would return 0
+ */
+ if (isset (cd->checked, atom->ncomp->id * 2 + 1)) {
+ ms = rspamd_task_find_symbol_result (cd->task, sym,
+ cd->metric_res);
+ }
+ }
+ }
+ }
+
+ if (ms) {
+ msg_debug_composites ("found symbol %s in composite %s, weight: %.3f",
+ sym, cd->composite->sym, ms->score);
+
+ /* Now check options */
+ struct rspamd_composite_option_match *cur_opt;
+
+ DL_FOREACH (atom->opts, cur_opt) {
+ struct rspamd_symbol_option *opt;
+ bool found = false;
+
+ DL_FOREACH (ms->opts_head, opt) {
+ if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) {
+ gsize mlen = strlen (cur_opt->data.match);
+
+ if (opt->optlen == mlen &&
+ memcmp (opt->option, cur_opt->data.match, mlen) == 0) {
+
+ found = true;
+
+ break;
+ }
+ }
+ else {
+ if (rspamd_regexp_search (cur_opt->data.re,
+ opt->option, opt->optlen, NULL, NULL, FALSE, NULL)) {
+ found = true;
+
+ break;
+ }
+ }
+ }
+
+
+ if (!found) {
+ if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) {
+ msg_debug_composites ("symbol %s in composite %s misses required option %s",
+ sym,
+ cd->composite->sym,
+ cur_opt->data.match);
+ }
+ else {
+ msg_debug_composites ("symbol %s in composite %s failed to match regexp %s",
+ sym,
+ cd->composite->sym,
+ rspamd_regexp_get_pattern (cur_opt->data.re));
+ }
+
+ ms = NULL;
+
+ break;
+ }
+ }
+
+ if (ms) {
+ if (ms->score == 0) {
+ rc = 0.001; /* Distinguish from 0 */
+ }
+ else {
+ rc = ms->score;
+ }
+ }
+ }
+
+ *pms = ms;
+ return rc;
+}
+
+static void
+rspamd_composite_process_symbol_removal (rspamd_expression_atom_t *atom,
+ struct composites_data *cd,
+ struct rspamd_symbol_result *ms,
+ const gchar *beg)
+{
+ gchar t;
+ struct symbol_remove_data *rd, *nrd;
+ struct rspamd_task *task = cd->task;
+
+ if (ms == NULL) {
+ return;
+ }
+
+ /*
+ * At this point we know that we need to do something about this symbol,
+ * however, we don't know whether we need to delete it unfortunately,
+ * that depends on the later decisions when the complete expression is
+ * evaluated.
+ */
+ rd = g_hash_table_lookup (cd->symbols_to_remove, ms->name);
+
+ nrd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*nrd));
+ nrd->sym = ms->name;
+
+ /* By default remove symbols */
+ switch (cd->composite->policy) {
+ case RSPAMD_COMPOSITE_POLICY_REMOVE_ALL:
+ default:
+ nrd->action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL|RSPAMD_COMPOSITE_REMOVE_WEIGHT);
+ break;
+ case RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL:
+ nrd->action = RSPAMD_COMPOSITE_REMOVE_SYMBOL;
+ break;
+ case RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT:
+ nrd->action = RSPAMD_COMPOSITE_REMOVE_WEIGHT;
+ break;
+ case RSPAMD_COMPOSITE_POLICY_LEAVE:
+ nrd->action = 0;
+ break;
+ }
+
+ for (;;) {
+ t = *beg;
+
+ if (t == '~') {
+ nrd->action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL;
+ }
+ else if (t == '-') {
+ nrd->action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT|
+ RSPAMD_COMPOSITE_REMOVE_SYMBOL);
+ }
+ else if (t == '^') {
+ nrd->action |= RSPAMD_COMPOSITE_REMOVE_FORCED;
+ }
+ else {
+ break;
+ }
+
+ beg ++;
+ }
+
+ nrd->comp = cd->composite;
+ nrd->parent = atom->parent;
+
+ if (rd == NULL) {
+ DL_APPEND (rd, nrd);
+ g_hash_table_insert (cd->symbols_to_remove, (gpointer)ms->name, rd);
+ msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s",
+ cd->metric_res->name,
+ ms->name, nrd->action,
+ cd->composite->sym);
+ }
+ else {
+ DL_APPEND (rd, nrd);
+ msg_debug_composites ("%s: append symbol %s to removal: %d policy, from composite %s",
+ cd->metric_res->name,
+ ms->name, nrd->action,
+ cd->composite->sym);
+ }
+}
+
+static gdouble
+rspamd_composite_expr_process (void *ud,
+ rspamd_expression_atom_t *atom)
+{
+ static const double epsilon = 0.00001;
+ struct composites_data *cd = (struct composites_data *)ud;
+ const gchar *sym = NULL;
+ struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *)atom->data;
+
+ struct rspamd_symbol_result *ms = NULL;
+ struct rspamd_symbols_group *gr;
+ struct rspamd_symbol *sdef;
+ struct rspamd_task *task = cd->task;
+ GHashTableIter it;
+ gpointer k, v;
+ gdouble rc = 0, max = 0;
+
+ if (isset (cd->checked, cd->composite->id * 2)) {
+ /* We have already checked this composite, so just return its value */
+ if (isset (cd->checked, cd->composite->id * 2 + 1)) {
+ ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res);
+ }
+
+ if (ms) {
+ if (ms->score == 0) {
+ rc = epsilon; /* Distinguish from 0 */
+ }
+ else {
+ /* Treat negative and positive scores equally... */
+ rc = fabs (ms->score);
+ }
+ }
+
+ msg_debug_composites ("composite %s is already checked, result: %.2f",
+ cd->composite->sym, rc);
+
+ return rc;
+ }
+
+ sym = comp_atom->symbol;
+ guint slen = strlen (sym);
+
+ while (*sym != '\0' && !g_ascii_isalnum (*sym)) {
+ sym ++;
+ slen --;
+ }
+
+ if (slen > 2) {
+ if (G_UNLIKELY (memcmp (sym, "g:", 2) == 0)) {
+ gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 2);
+
+ if (gr != NULL) {
+ g_hash_table_iter_init (&it, gr->symbols);
+
+ while (g_hash_table_iter_next (&it, &k, &v)) {
+ sdef = v;
+ rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms,
+ comp_atom);
+
+ if (rc) {
+ rspamd_composite_process_symbol_removal (atom,
+ cd,
+ ms,
+ comp_atom->symbol);
+
+ if (fabs (rc) > max) {
+ max = fabs (rc);
+ }
+ }
+ }
+ }
+
+ rc = max;
+ }
+ else if (G_UNLIKELY (memcmp (sym, "g+:", 3) == 0)) {
+ /* Group, positive symbols only */
+ gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 3);
+
+ if (gr != NULL) {
+ g_hash_table_iter_init (&it, gr->symbols);
+
+ while (g_hash_table_iter_next (&it, &k, &v)) {
+ sdef = v;
+
+ if (sdef->score > 0) {
+ rc = rspamd_composite_process_single_symbol (cd,
+ sdef->name,
+ &ms,
+ comp_atom);
+
+ if (rc) {
+ rspamd_composite_process_symbol_removal (atom,
+ cd,
+ ms,
+ comp_atom->symbol);
+
+ if (fabs (rc) > max) {
+ max = fabs (rc);
+ }
+ }
+ }
+ }
+
+ rc = max;
+ }
+ }
+ else if (G_UNLIKELY (memcmp (sym, "g-:", 3) == 0)) {
+ /* Group, negative symbols only */
+ gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 3);
+
+ if (gr != NULL) {
+ g_hash_table_iter_init (&it, gr->symbols);
+
+ while (g_hash_table_iter_next (&it, &k, &v)) {
+ sdef = v;
+
+ if (sdef->score < 0) {
+ rc = rspamd_composite_process_single_symbol (cd,
+ sdef->name,
+ &ms,
+ comp_atom);
+
+ if (rc) {
+ rspamd_composite_process_symbol_removal (atom,
+ cd,
+ ms,
+ comp_atom->symbol);
+
+ if (fabs (rc) > max) {
+ max = fabs (rc);
+ }
+ }
+ }
+ }
+
+ rc = max;
+ }
+ }
+ else {
+ rc = rspamd_composite_process_single_symbol (cd, sym, &ms, comp_atom);
+
+ if (rc) {
+ rspamd_composite_process_symbol_removal (atom,
+ cd,
+ ms,
+ comp_atom->symbol);
+ }
+ }
+ }
+ else {
+ rc = rspamd_composite_process_single_symbol (cd, sym, &ms, comp_atom);
+
+ if (rc) {
+ rspamd_composite_process_symbol_removal (atom,
+ cd,
+ ms,
+ comp_atom->symbol);
+ }
+ }
+
+ msg_debug_composites ("%s: final result for composite %s is %.2f",
+ cd->metric_res->name,
+ cd->composite->sym, rc);
+
+ return rc;
+}
+
+/*
+ * We don't have preferences for composites
+ */
+static gint
+rspamd_composite_expr_priority (rspamd_expression_atom_t *atom)
+{
+ return 0;
+}
+
+static void
+rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom)
+{
+ /* Composite atoms are destroyed just with the pool */
+}
+
+
+static void
+composites_foreach_callback (gpointer key, gpointer value, void *data)
+{
+ struct composites_data *cd = data;
+ struct rspamd_composite *comp = value;
+ struct rspamd_task *task;
+ gdouble rc;
+
+ cd->composite = comp;
+ task = cd->task;
+
+ if (!isset (cd->checked, cd->composite->id * 2)) {
+ if (rspamd_symcache_is_checked (cd->task, cd->task->cfg->cache,
+ key)) {
+ msg_debug_composites ("composite %s is checked in symcache but not "
+ "in composites bitfield", cd->composite->sym);
+ setbit (cd->checked, comp->id * 2);
+ clrbit (cd->checked, comp->id * 2 + 1);
+ }
+ else {
+ if (rspamd_task_find_symbol_result (cd->task, key,
+ cd->metric_res) != NULL) {
+ /* Already set, no need to check */
+ msg_debug_composites ("composite %s is already in metric "
+ "in composites bitfield", cd->composite->sym);
+ setbit (cd->checked, comp->id * 2);
+ clrbit (cd->checked, comp->id * 2 + 1);
+
+ return;
+ }
+
+ rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT,
+ cd);
+
+ /* Checked bit */
+ setbit (cd->checked, comp->id * 2);
+
+ /* Result bit */
+ if (rc != 0) {
+ setbit (cd->checked, comp->id * 2 + 1);
+ rspamd_task_insert_result_full (cd->task, key, 1.0, NULL,
+ RSPAMD_SYMBOL_INSERT_SINGLE, cd->metric_res);
+ }
+ else {
+ clrbit (cd->checked, comp->id * 2 + 1);
+ }
+ }
+ }
+}
+
+
+static void
+composites_remove_symbols (gpointer key, gpointer value, gpointer data)
+{
+ struct composites_data *cd = data;
+ struct rspamd_task *task;
+ struct symbol_remove_data *rd = value, *cur;
+ struct rspamd_symbol_result *ms;
+ gboolean skip = FALSE,
+ has_valid_op = FALSE,
+ want_remove_score = TRUE,
+ want_remove_symbol = TRUE,
+ want_forced = FALSE;
+ const gchar *disable_score_reason = "no policy",
+ *disable_symbol_reason = "no policy";
+ GNode *par;
+
+ task = cd->task;
+
+ DL_FOREACH (rd, cur) {
+ if (!isset (cd->checked, cur->comp->id * 2 + 1)) {
+ continue;
+ }
+ /*
+ * First of all exclude all elements with any parent that is negation:
+ * !A || B -> here we can have both !A and B matched, but we do *NOT*
+ * want to remove symbol in that case
+ */
+ par = cur->parent;
+ skip = FALSE;
+
+ while (par) {
+ if (rspamd_expression_node_is_op (par, OP_NOT)) {
+ skip = TRUE;
+ break;
+ }
+
+ par = par->parent;
+ }
+
+ if (skip) {
+ continue;
+ }
+
+ has_valid_op = TRUE;
+ /*
+ * Now we can try to remove symbols/scores
+ *
+ * We apply the following logic here:
+ * - if no composites would like to save score then we remove score
+ * - if no composites would like to save symbol then we remove symbol
+ */
+ if (!want_forced) {
+ if (!(cur->action & RSPAMD_COMPOSITE_REMOVE_SYMBOL)) {
+ want_remove_symbol = FALSE;
+ disable_symbol_reason = cur->comp->sym;
+ }
+
+ if (!(cur->action & RSPAMD_COMPOSITE_REMOVE_WEIGHT)) {
+ want_remove_score = FALSE;
+ disable_score_reason = cur->comp->sym;
+ }
+
+ if (cur->action & RSPAMD_COMPOSITE_REMOVE_FORCED) {
+ want_forced = TRUE;
+ disable_symbol_reason = cur->comp->sym;
+ disable_score_reason = cur->comp->sym;
+ }
+ }
+ }
+
+ ms = rspamd_task_find_symbol_result (task, rd->sym, cd->metric_res);
+
+ if (has_valid_op && ms && !(ms->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) {
+
+ if (want_remove_score || want_forced) {
+ msg_debug_composites ("%s: %s remove symbol weight for %s (was %.2f), "
+ "score removal affected by %s, symbol removal affected by %s",
+ cd->metric_res->name,
+ (want_forced ? "forced" : "normal"), key, ms->score,
+ disable_score_reason, disable_symbol_reason);
+ cd->metric_res->score -= ms->score;
+ ms->score = 0.0;
+ }
+
+ if (want_remove_symbol || want_forced) {
+ ms->flags |= RSPAMD_SYMBOL_RESULT_IGNORED;
+ msg_debug_composites ("%s: %s remove symbol %s (score %.2f), "
+ "score removal affected by %s, symbol removal affected by %s",
+ cd->metric_res->name,
+ (want_forced ? "forced" : "normal"), key, ms->score,
+ disable_score_reason, disable_symbol_reason);
+ }
+ }
+}
+
+static void
+composites_metric_callback (struct rspamd_task *task)
+{
+ struct composites_data *cd, *first_cd = NULL;
+ struct rspamd_scan_result *mres;
+
+ DL_FOREACH (task->result, mres) {
+ cd = rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data));
+ cd->task = task;
+ cd->metric_res = mres;
+ cd->symbols_to_remove = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ cd->checked =
+ rspamd_mempool_alloc0 (task->task_pool,
+ NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2));
+
+ /* Process hash table */
+ rspamd_symcache_composites_foreach (task,
+ task->cfg->cache,
+ composites_foreach_callback,
+ cd);
+ LL_PREPEND (first_cd, cd);
+ }
+
+ LL_REVERSE (first_cd);
+
+ LL_FOREACH (first_cd, cd) {
+ /* Remove symbols that are in composites */
+ g_hash_table_foreach (cd->symbols_to_remove, composites_remove_symbols, cd);
+ /* Free list */
+ g_hash_table_unref (cd->symbols_to_remove);
+ }
+}
+
+void
+rspamd_composites_process_task (struct rspamd_task *task)
+{
+ if (task->result && !RSPAMD_TASK_IS_SKIPPED (task)) {
+ composites_metric_callback (task);
+ }
+}
+
+
+enum rspamd_composite_policy
+rspamd_composite_policy_from_str (const gchar *string)
+{
+ enum rspamd_composite_policy ret = RSPAMD_COMPOSITE_POLICY_UNKNOWN;
+
+ if (strcmp (string, "remove") == 0 || strcmp (string, "remove_all") == 0 ||
+ strcmp (string, "default") == 0) {
+ ret = RSPAMD_COMPOSITE_POLICY_REMOVE_ALL;
+ }
+ else if (strcmp (string, "remove_symbol") == 0) {
+ ret = RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL;
+ }
+ else if (strcmp (string, "remove_weight") == 0) {
+ ret = RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT;
+ }
+ else if (strcmp (string, "leave") == 0 || strcmp (string, "remove_none") == 0) {
+ ret = RSPAMD_COMPOSITE_POLICY_LEAVE;
+ }
+
+ return ret;
+}
diff --git a/src/libserver/composites/composites.h b/src/libserver/composites/composites.h
new file mode 100644
index 000000000..d39863b88
--- /dev/null
+++ b/src/libserver/composites/composites.h
@@ -0,0 +1,44 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBSERVER_COMPOSITES_H_
+#define SRC_LIBSERVER_COMPOSITES_H_
+
+#include "config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_task;
+
+/**
+ * Subr for composite expressions
+ */
+extern const struct rspamd_atom_subr composite_expr_subr;
+
+/**
+ * Process all results and form composite metrics from existent metrics as it is defined in config
+ * @param task worker's task that present message from user
+ */
+void rspamd_composites_process_task (struct rspamd_task *task);
+
+enum rspamd_composite_policy rspamd_composite_policy_from_str (const gchar *string);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBSERVER_COMPOSITES_H_ */