From: Vsevolod Stakhov Date: Thu, 17 Mar 2016 14:14:04 +0000 (+0000) Subject: [Feature] Rework composite rules application X-Git-Tag: 1.2.0~13 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=4502a6e8dfa44915e26774002b5cd10e537ce27d;p=rspamd.git [Feature] Rework composite rules application First of all exclude all elements with any parent that is negation: `!A || B` -> here we can have both !A and B matched, but we do *NOT* want to remove symbol in that case. We apply the following logic subsequently: - if no composites would like to save score then we remove score - if no composites would like to save symbol then we remove symbol --- diff --git a/src/libserver/composites.c b/src/libserver/composites.c index 38305b658..f18b1b560 100644 --- a/src/libserver/composites.c +++ b/src/libserver/composites.c @@ -25,15 +25,17 @@ struct composites_data { struct rspamd_task *task; struct rspamd_composite *composite; struct metric_result *metric_res; - GTree *symbols_to_remove; + GHashTable *symbols_to_remove; guint8 *checked; }; struct symbol_remove_data { struct symbol *ms; + struct rspamd_composite *comp; + GNode *parent; gboolean remove_weight; gboolean remove_symbol; - GList *comp; + struct symbol_remove_data *prev, *next; }; static rspamd_expression_atom_t * rspamd_composite_expr_parse (const gchar *line, gsize len, @@ -129,7 +131,7 @@ rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom) { struct composites_data *cd = (struct composites_data *)input; const gchar *sym = atom->data; - struct symbol_remove_data *rd; + struct symbol_remove_data *rd, *nrd; struct symbol *ms; struct rspamd_symbols_group *gr; struct rspamd_symbol_def *sdef; @@ -178,34 +180,35 @@ rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom) * that depends on the later decisions when the complete expression is * evaluated. */ - if ((rd = g_tree_lookup (cd->symbols_to_remove, ms->name)) == NULL) { - rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*rd)); - rd->ms = ms; + rd = g_hash_table_lookup (cd->symbols_to_remove, ms->name); - if (G_UNLIKELY (t == '~')) { - rd->remove_weight = FALSE; - rd->remove_symbol = TRUE; - } - else if (G_UNLIKELY (t == '-')) { - rd->remove_symbol = FALSE; - rd->remove_weight = FALSE; - } - else { - rd->remove_symbol = TRUE; - rd->remove_weight = TRUE; - } + nrd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*nrd)); + nrd->ms = ms; + + if (G_UNLIKELY (t == '~')) { + nrd->remove_weight = FALSE; + nrd->remove_symbol = TRUE; + } + else if (G_UNLIKELY (t == '-')) { + nrd->remove_symbol = FALSE; + nrd->remove_weight = FALSE; + } + else { + nrd->remove_symbol = TRUE; + nrd->remove_weight = TRUE; + } - rd->comp = g_list_prepend (NULL, cd->composite); - g_tree_insert (cd->symbols_to_remove, - (gpointer)ms->name, - rd); + nrd->comp = cd->composite; + nrd->parent = atom->parent; + + if (rd == NULL) { + DL_APPEND (rd, nrd); + g_hash_table_insert (cd->symbols_to_remove, + (gpointer)ms->name, + rd); } else { - /* - * XXX: what if we have different preferences regarding - * weight and symbol removal in different composites? - */ - rd->comp = g_list_prepend (rd->comp, cd->composite); + DL_APPEND (rd, nrd); } } @@ -227,13 +230,6 @@ rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom) /* Composite atoms are destroyed just with the pool */ } -static gint -remove_compare_data (gconstpointer a, gconstpointer b) -{ - const gchar *ca = a, *cb = b; - - return strcmp (ca, cb); -} static void composites_foreach_callback (gpointer key, gpointer value, void *data) @@ -260,45 +256,65 @@ composites_foreach_callback (gpointer key, gpointer value, void *data) } -static gboolean +static void composites_remove_symbols (gpointer key, gpointer value, gpointer data) { struct composites_data *cd = data; - struct symbol_remove_data *rd = value; - GList *cur; - struct rspamd_composite *comp; - gboolean matched = FALSE; + struct symbol_remove_data *rd = value, *cur; + gboolean skip = FALSE, has_valid_op = FALSE, + want_remove_score = TRUE, want_remove_symbol = TRUE; + GNode *par; + + DL_FOREACH (rd, cur) { + if (!isset (cd->checked, cur->comp->id * 2 + 1)) { + continue; + } + /* + * First of all exclude all elements with any parent that is negation: + * !A || B -> here we can have both !A and B matched, but we do *NOT* + * want to remove symbol in that case + */ + par = cur->parent; + skip = FALSE; - cur = rd->comp; + while (par) { + if (rspamd_expression_node_is_op (par, OP_NOT)) { + skip = TRUE; + break; + } - /* - * XXX: actually, this is a weak assumption as we are unaware here about - * negate operation and so on. We need to parse AST directly and remove - * only those symbols that could be removed. - */ - while (cur) { - comp = cur->data; + par = par->parent; + } - if (isset (cd->checked, comp->id * 2 + 1)) { - matched = TRUE; - break; + if (skip) { + continue; } - cur = g_list_next (cur); - } + has_valid_op = TRUE; + /* + * Now we can try to remove symbols/scores + * + * We apply the following logic here: + * - if no composites would like to save score then we remove score + * - if no composites would like to save symbol then we remove symbol + */ + if (!cur->remove_symbol) { + want_remove_symbol = FALSE; + } - g_list_free (rd->comp); + if (!cur->remove_weight) { + want_remove_score = FALSE; + } + } - if (matched) { - if (rd->remove_symbol) { + if (has_valid_op) { + if (want_remove_symbol) { g_hash_table_remove (cd->metric_res->symbols, key); } - if (rd->remove_weight) { + if (want_remove_score) { cd->metric_res->score -= rd->ms->score; } } - - return FALSE; } static void @@ -311,7 +327,7 @@ composites_metric_callback (gpointer key, gpointer value, gpointer data) cd->task = task; cd->metric_res = (struct metric_result *)metric_res; - cd->symbols_to_remove = g_tree_new (remove_compare_data); + cd->symbols_to_remove = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); cd->checked = rspamd_mempool_alloc0 (task->task_pool, NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2)); @@ -322,9 +338,9 @@ composites_metric_callback (gpointer key, gpointer value, gpointer data) cd); /* Remove symbols that are in composites */ - g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd); + g_hash_table_foreach (cd->symbols_to_remove, composites_remove_symbols, cd); /* Free list */ - g_tree_destroy (cd->symbols_to_remove); + g_hash_table_unref (cd->symbols_to_remove); } void