diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-06-16 20:11:54 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-06-16 20:11:54 +0100 |
commit | 986426d29fa6d1e520e6dad10740337d6caacb4c (patch) | |
tree | 4197e5f71e47f01dd1f2db3a7ec9fc6bf7e81704 | |
parent | 9a0a491315d573fe19eae5187ab7463ac996aa03 (diff) | |
download | rspamd-986426d29fa6d1e520e6dad10740337d6caacb4c.tar.gz rspamd-986426d29fa6d1e520e6dad10740337d6caacb4c.zip |
[Rework] Modernize logging for expressions
-rw-r--r-- | src/libutil/expression.c | 103 | ||||
-rw-r--r-- | test/lua/unit/expressions.lua | 1 |
2 files changed, 79 insertions, 25 deletions
diff --git a/src/libutil/expression.c b/src/libutil/expression.c index 9b166ee25..b7dc0cc96 100644 --- a/src/libutil/expression.c +++ b/src/libutil/expression.c @@ -20,6 +20,8 @@ #include "util.h" #include "utlist.h" #include "ottery.h" +#include "libserver/logger.h" +#include "libcryptobox/cryptobox.h" #include <math.h> #define RSPAMD_EXPR_FLAG_NEGATE (1 << 0) @@ -35,6 +37,17 @@ enum rspamd_expression_elt_type { ELT_LIMIT }; +enum rspamd_expression_op_flag { + RSPAMD_EXPRESSION_UNARY = 1u << 0u, + RSPAMD_EXPRESSION_BINARY = 1u << 1u, + RSPAMD_EXPRESSION_NARY = 1u << 2u +}; + +struct rspamd_expression_operation { + enum rspamd_expression_op op; + guint logical_priority; +}; + struct rspamd_expression_elt { enum rspamd_expression_elt_type type; union { @@ -53,6 +66,7 @@ struct rspamd_expression { GArray *expressions; GPtrArray *expression_stack; GNode *ast; + gchar *log_id; guint next_resort; guint evals; }; @@ -65,6 +79,13 @@ struct rspamd_expr_process_data { rspamd_expression_process_cb process_closure; }; +#define msg_debug_expression(...) rspamd_conditional_debug_fast (NULL, NULL, \ + rspamd_expression_log_id, "expression", e->log_id, \ + G_STRFUNC, \ + __VA_ARGS__) + +INIT_LOG_MODULE(expression) + static GQuark rspamd_expr_quark (void) { @@ -389,6 +410,9 @@ rspamd_expression_destroy (struct rspamd_expression *expr) if (expr->ast) { g_node_destroy (expr->ast); } + if (expr->log_id) { + g_free (expr->log_id); + } g_free (expr); } @@ -632,6 +656,9 @@ rspamd_parse_expression (const gchar *line, gsize len, e->subr = subr; e->evals = 0; e->next_resort = ottery_rand_range (MAX_RESORT_EVALS) + MIN_RESORT_EVALS; + e->log_id = g_malloc0 (RSPAMD_LOG_ID_LEN + 1); + guint64 h = rspamd_cryptobox_fast_hash (line, len, 0xdeadbabe); + rspamd_snprintf (e->log_id, RSPAMD_LOG_ID_LEN + 1, "%xL", h); /* Shunting-yard algorithm */ while (p < end) { @@ -959,6 +986,9 @@ err: return FALSE; } +/* + * Node optimizer function: skip nodes that are not relevant + */ static gboolean rspamd_ast_node_done (struct rspamd_expression_elt *elt, struct rspamd_expression_elt *parelt, gdouble acc, gdouble lim) @@ -1008,10 +1038,10 @@ rspamd_ast_node_done (struct rspamd_expression_elt *elt, break; case OP_MULT: case OP_AND: - ret = !acc; + ret = acc == 0; break; case OP_OR: - ret = !!acc; + ret = acc != 0; break; default: g_assert (0); @@ -1023,45 +1053,67 @@ rspamd_ast_node_done (struct rspamd_expression_elt *elt, static gdouble rspamd_ast_do_op (struct rspamd_expression_elt *elt, gdouble val, - gdouble acc, gdouble lim, gboolean first_elt) + gdouble acc, gdouble lim) { gdouble ret = val; g_assert (elt->type == ELT_OP); + if (isnan (acc)) { + switch (elt->p.op) { + case OP_PLUS: + case OP_MINUS: + case OP_OR: + case OP_AND: + case OP_MULT: + case OP_DIVIDE: + /* Hack for arithmetics */ + if (!isnan (lim)) { + acc = lim; + } + else { + return val; + } + break; + default: + acc = val; + break; + } + } + switch (elt->p.op) { case OP_NOT: ret = fabs (val) > DOUBLE_EPSILON ? 0.0 : 1.0; break; case OP_PLUS: - ret = first_elt ? (val) : (acc + val); + ret = acc + val; break; case OP_MULT: - ret = first_elt ? (val) : (acc * val); + ret = acc * val; break; case OP_MINUS: - ret = first_elt ? (val) : (acc - val); + ret = acc - val; break; case OP_DIVIDE: - ret = first_elt ? (val) : (acc / val); + ret = acc / val; break; case OP_GE: - ret = first_elt ? (val >= lim) : (acc >= lim); + ret = acc >= lim; break; case OP_GT: - ret = first_elt ? (val > lim) : (acc > lim); + ret = acc > lim; break; case OP_LE: - ret = first_elt ? (val <= lim) : (acc <= lim); + ret = acc <= lim; break; case OP_LT: - ret = first_elt ? (val < lim) : (acc < lim); + ret = acc < lim; break; case OP_AND: - ret = first_elt ? (val) : (acc * val); + ret = (acc * val); break; case OP_OR: - ret = first_elt ? (val) : (acc + val); + ret = (acc + val); break; default: g_assert (0); @@ -1072,17 +1124,18 @@ rspamd_ast_do_op (struct rspamd_expression_elt *elt, gdouble val, } static gdouble -rspamd_ast_process_node (struct rspamd_expression *expr, GNode *node, +rspamd_ast_process_node (struct rspamd_expression *e, GNode *node, struct rspamd_expr_process_data *process_data) { struct rspamd_expression_elt *elt, *celt, *parelt = NULL; GNode *cld; - gdouble acc = NAN, lim = 0; + gdouble acc = NAN, lim = NAN; gdouble t1, t2, val; gboolean calc_ticks = FALSE; elt = node->data; + switch (elt->type) { case ELT_ATOM: if (!(elt->flags & RSPAMD_EXPR_FLAG_PROCESSED)) { @@ -1093,7 +1146,7 @@ rspamd_ast_process_node (struct rspamd_expression *expr, GNode *node, * of some shifted address to provide some sort of jittering for * ticks evaluation */ - if ((expr->evals & 0x1F) == (GPOINTER_TO_UINT (node) >> 4 & 0x1F)) { + if ((e->evals & 0x1F) == (GPOINTER_TO_UINT (node) >> 4 & 0x1F)) { calc_ticks = TRUE; t1 = rspamd_get_ticks (TRUE); } @@ -1111,17 +1164,19 @@ rspamd_ast_process_node (struct rspamd_expression *expr, GNode *node, if (calc_ticks) { t2 = rspamd_get_ticks (TRUE); elt->p.atom->avg_ticks += ((t2 - t1) - elt->p.atom->avg_ticks) / - (expr->evals); + (e->evals); } elt->flags |= RSPAMD_EXPR_FLAG_PROCESSED; } acc = elt->value; + msg_debug_expression ("atom: elt=%s; acc=%.1f", elt->p.atom->str, acc); break; case ELT_LIMIT: acc = elt->p.lim; + msg_debug_expression ("limit: lim=%.1f; acc=%.1f;", elt->p.lim, acc); break; case ELT_OP: g_assert (node->children != NULL); @@ -1145,17 +1200,15 @@ rspamd_ast_process_node (struct rspamd_expression *expr, GNode *node, continue; } - val = rspamd_ast_process_node (expr, cld, process_data); - - if (isnan (acc)) { - acc = rspamd_ast_do_op (elt, val, 0, lim, TRUE); - } - else { - acc = rspamd_ast_do_op (elt, val, acc, lim, FALSE); - } + val = rspamd_ast_process_node (e, cld, process_data); + msg_debug_expression ("before op: op=%s; lim=%.1f; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op), lim, acc, val); + acc = rspamd_ast_do_op (elt, val, acc, lim); + msg_debug_expression ("after op: op=%s; lim=%.1f; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op), lim, acc, val); + /* Check if we need to process further */ if (!(process_data->flags & RSPAMD_EXPRESSION_FLAG_NOOPT)) { if (rspamd_ast_node_done (elt, parelt, acc, lim)) { + msg_debug_expression ("optimizer: done"); return acc; } } diff --git a/test/lua/unit/expressions.lua b/test/lua/unit/expressions.lua index 34dc8e13a..8633560ba 100644 --- a/test/lua/unit/expressions.lua +++ b/test/lua/unit/expressions.lua @@ -76,6 +76,7 @@ context("Rspamd expressions", function() end -- Expression is destroyed when the corresponding pool is destroyed cases = { + {'(E) && ((B + B + B + B) >= 1)', 0}, {'A & B | !C', 0}, {'A & (!B | C)', 1}, {'A + B + C + D + E + F >= 2', 1}, |