|
|
@@ -22,17 +22,21 @@ |
|
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
|
|
*/ |
|
|
|
|
|
|
|
#include <expression.h> |
|
|
|
#include "config.h" |
|
|
|
#include "expression.h" |
|
|
|
#include "regexp.h" |
|
|
|
|
|
|
|
enum rspamd_expression_op { |
|
|
|
OP_INVALID = 0, |
|
|
|
OP_PLUS, /* || or + */ |
|
|
|
OP_MULT, /* && or * */ |
|
|
|
OP_NOT, /* ! */ |
|
|
|
OP_LT, /* < */ |
|
|
|
OP_GT, /* > */ |
|
|
|
OP_LE, /* <= */ |
|
|
|
OP_GE /* >= */ |
|
|
|
OP_GE, /* >= */ |
|
|
|
OP_OBRACE, /* ( */ |
|
|
|
OP_CBRACE /* ) */ |
|
|
|
}; |
|
|
|
|
|
|
|
struct rspamd_expression_elt { |
|
|
@@ -55,20 +59,26 @@ struct rspamd_expression_elt { |
|
|
|
struct rspamd_expression { |
|
|
|
struct rspamd_atom_subr *subr; |
|
|
|
GArray *expressions; |
|
|
|
GPtrArray *expression_stack; |
|
|
|
GArray *expression_stack; |
|
|
|
}; |
|
|
|
|
|
|
|
static GQuark |
|
|
|
rspamd_expr_quark (void) |
|
|
|
{ |
|
|
|
return g_quark_from_static_string ("rspamd-expression"); |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
|
rspamd_expr_stack_push (struct rspamd_expression *expr, |
|
|
|
struct rspamd_expression_elt *elt) |
|
|
|
gpointer elt) |
|
|
|
{ |
|
|
|
g_ptr_array_add (expr->expression_stack, elt); |
|
|
|
g_array_append_val (expr->expression_stack, elt); |
|
|
|
} |
|
|
|
|
|
|
|
static struct rspamd_expression_elt * |
|
|
|
static gpointer |
|
|
|
rspamd_expr_stack_pop (struct rspamd_expression *expr) |
|
|
|
{ |
|
|
|
struct rspamd_expression_elt *e; |
|
|
|
gpointer e; |
|
|
|
gint idx; |
|
|
|
|
|
|
|
if (expr->expression_stack->len == 0) { |
|
|
@@ -76,18 +86,191 @@ rspamd_expr_stack_pop (struct rspamd_expression *expr) |
|
|
|
} |
|
|
|
|
|
|
|
idx = expr->expression_stack->len - 1; |
|
|
|
e = g_ptr_array_index (expr->expression_stack, idx); |
|
|
|
g_ptr_array_remove_index_fast (expr->expression_stack, idx); |
|
|
|
e = g_array_index (expr->expression_stack, gpointer, idx); |
|
|
|
g_array_remove_index_fast (expr->expression_stack, idx); |
|
|
|
|
|
|
|
return e; |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|
* Return operation priority |
|
|
|
*/ |
|
|
|
static gint |
|
|
|
rspamd_expr_logic_priority (enum rspamd_expression_op op) |
|
|
|
{ |
|
|
|
gint ret = 0; |
|
|
|
|
|
|
|
switch (op) { |
|
|
|
case OP_NOT: |
|
|
|
ret = 5; |
|
|
|
break; |
|
|
|
case OP_MULT: |
|
|
|
ret = 4; |
|
|
|
break; |
|
|
|
case OP_PLUS: |
|
|
|
ret = 3; |
|
|
|
break; |
|
|
|
case OP_GE: |
|
|
|
case OP_GT: |
|
|
|
case OP_LE: |
|
|
|
case OP_LT: |
|
|
|
ret = 2; |
|
|
|
break; |
|
|
|
case OP_OBRACE: |
|
|
|
case OP_CBRACE: |
|
|
|
ret = 1; |
|
|
|
break; |
|
|
|
case OP_INVALID: |
|
|
|
ret = -1; |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|
* Return FALSE if symbol is not operation symbol (operand) |
|
|
|
* Return TRUE if symbol is operation symbol |
|
|
|
*/ |
|
|
|
static gboolean |
|
|
|
rspamd_expr_is_operation_symbol (gchar a) |
|
|
|
{ |
|
|
|
switch (a) { |
|
|
|
case '!': |
|
|
|
case '&': |
|
|
|
case '|': |
|
|
|
case '(': |
|
|
|
case ')': |
|
|
|
case '>': |
|
|
|
case '<': |
|
|
|
return TRUE; |
|
|
|
} |
|
|
|
|
|
|
|
return FALSE; |
|
|
|
} |
|
|
|
|
|
|
|
/* Return character representation of operation */ |
|
|
|
static enum rspamd_expression_op |
|
|
|
rspamd_expr_str_to_op (const gchar *a, const gchar *end, const gchar **next) |
|
|
|
{ |
|
|
|
enum rspamd_expression_op op = OP_INVALID; |
|
|
|
|
|
|
|
g_assert (a < end); |
|
|
|
|
|
|
|
switch (*a) { |
|
|
|
case '!': |
|
|
|
case '&': |
|
|
|
case '|': |
|
|
|
case '+': |
|
|
|
case '*': |
|
|
|
case '(': |
|
|
|
case ')': { |
|
|
|
if (a < end - 1) { |
|
|
|
if ((a[0] == '&' && a[1] == '&') || |
|
|
|
(a[0] == '|' && a[1] == '|')) { |
|
|
|
*next = a + 2; |
|
|
|
} |
|
|
|
else { |
|
|
|
*next = a + 1; |
|
|
|
} |
|
|
|
} |
|
|
|
/* XXX: not especially effective */ |
|
|
|
switch (*a) { |
|
|
|
case '!': |
|
|
|
op = OP_NOT; |
|
|
|
break; |
|
|
|
case '&': |
|
|
|
case '*': |
|
|
|
op = OP_MULT; |
|
|
|
break; |
|
|
|
case '|': |
|
|
|
case '+': |
|
|
|
op = OP_PLUS; |
|
|
|
break; |
|
|
|
case ')': |
|
|
|
op = OP_CBRACE; |
|
|
|
break; |
|
|
|
case '(': |
|
|
|
op = OP_OBRACE; |
|
|
|
break; |
|
|
|
default: |
|
|
|
op = OP_INVALID; |
|
|
|
break; |
|
|
|
} |
|
|
|
break; |
|
|
|
} |
|
|
|
case 'O': |
|
|
|
case 'o': |
|
|
|
if ((gulong)(end - a) >= sizeof ("or") && |
|
|
|
g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) { |
|
|
|
*next = a + sizeof ("or") - 1; |
|
|
|
op = OP_PLUS; |
|
|
|
} |
|
|
|
break; |
|
|
|
case 'A': |
|
|
|
case 'a': |
|
|
|
if ((gulong)(end - a) >= sizeof ("and") && |
|
|
|
g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) { |
|
|
|
*next = a + sizeof ("and") - 1; |
|
|
|
op = OP_MULT; |
|
|
|
} |
|
|
|
break; |
|
|
|
case 'N': |
|
|
|
case 'n': |
|
|
|
if ((gulong)(end - a) >= sizeof ("not") && |
|
|
|
g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) { |
|
|
|
*next = a + sizeof ("not") - 1; |
|
|
|
op = OP_NOT; |
|
|
|
} |
|
|
|
break; |
|
|
|
case '>': |
|
|
|
if (a < end - 1 && a[1] == '=') { |
|
|
|
*next = a + 2; |
|
|
|
op = OP_GE; |
|
|
|
} |
|
|
|
else { |
|
|
|
*next = a + 1; |
|
|
|
op = OP_GT; |
|
|
|
} |
|
|
|
break; |
|
|
|
case '<': |
|
|
|
if (a < end - 1 && a[1] == '=') { |
|
|
|
*next = a + 2; |
|
|
|
op = OP_LE; |
|
|
|
} |
|
|
|
else { |
|
|
|
*next = a + 1; |
|
|
|
op = OP_LT; |
|
|
|
} |
|
|
|
break; |
|
|
|
default: |
|
|
|
op = OP_INVALID; |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
return op; |
|
|
|
} |
|
|
|
|
|
|
|
gboolean |
|
|
|
rspamd_parse_expression (const gchar *line, gsize len, |
|
|
|
struct rspamd_atom_subr *subr, gpointer subr_data, |
|
|
|
rspamd_mempool_t *pool, GError **err, |
|
|
|
struct rspamd_expression **target) |
|
|
|
{ |
|
|
|
struct rspamd_expression *e; |
|
|
|
struct rspamd_expression_elt elt; |
|
|
|
rspamd_expression_atom_t *atom; |
|
|
|
rspamd_regexp_t *num_re; |
|
|
|
enum rspamd_expression_op op, op_stack; |
|
|
|
const gchar *p, *c, *end; |
|
|
|
|
|
|
|
enum { |
|
|
|
PARSE_ATOM = 0, |
|
|
|
PARSE_OP, |
|
|
|
PARSE_LIM, |
|
|
|
SKIP_SPACES |
|
|
|
} state; |
|
|
|
|
|
|
|
g_assert (line != NULL); |
|
|
|
g_assert (subr != NULL && subr->parse != NULL); |
|
|
|
|
|
|
@@ -95,6 +278,203 @@ rspamd_parse_expression (const gchar *line, gsize len, |
|
|
|
len = strlen (line); |
|
|
|
} |
|
|
|
|
|
|
|
num_re = rspamd_regexp_cache_create (NULL, "/^\\d+\\s*[><]/", NULL, NULL); |
|
|
|
|
|
|
|
p = line; |
|
|
|
c = line; |
|
|
|
end = line + len; |
|
|
|
e = g_slice_alloc (sizeof (*e)); |
|
|
|
e->expressions = g_array_new (FALSE, FALSE, |
|
|
|
sizeof (struct rspamd_expression_elt)); |
|
|
|
e->expression_stack = g_array_sized_new (FALSE, FALSE, sizeof (gpointer), 32); |
|
|
|
|
|
|
|
/* Shunting-yard algorithm */ |
|
|
|
while (p < end) { |
|
|
|
switch (state) { |
|
|
|
case PARSE_ATOM: |
|
|
|
if (g_ascii_isspace (*p)) { |
|
|
|
state = SKIP_SPACES; |
|
|
|
} |
|
|
|
else if (rspamd_expr_is_operation_symbol (*p)) { |
|
|
|
state = PARSE_ATOM; |
|
|
|
} |
|
|
|
else { |
|
|
|
/* |
|
|
|
* First of all, we check some pre-conditions: |
|
|
|
* 1) if we have 'and ' or 'or ' or 'not ' strings, they are op |
|
|
|
* 2) if we have full numeric string, then we check for the following: |
|
|
|
* ^\d+\s*[><]$ |
|
|
|
*/ |
|
|
|
if ((gulong)(end - p) > sizeof ("and ") && |
|
|
|
(g_ascii_strncasecmp (p, "and ", sizeof ("and ") - 1) == 0 || |
|
|
|
g_ascii_strncasecmp (p, "not ", sizeof ("not ") - 1) == 0 )) { |
|
|
|
state = PARSE_OP; |
|
|
|
} |
|
|
|
else if ((gulong)(end - p) > sizeof ("or ") && |
|
|
|
g_ascii_strncasecmp (p, "or ", sizeof ("or ") - 1) == 0) { |
|
|
|
state = PARSE_OP; |
|
|
|
} |
|
|
|
else if (rspamd_regexp_search (num_re, p, end - p, NULL, NULL, |
|
|
|
FALSE)) { |
|
|
|
c = p; |
|
|
|
state = PARSE_LIM; |
|
|
|
} |
|
|
|
else { |
|
|
|
/* Try to parse atom */ |
|
|
|
atom = subr->parse (p, end - p, pool, subr_data, err); |
|
|
|
if (atom == NULL) { |
|
|
|
/* We couldn't parse the atom, so go out */ |
|
|
|
goto err; |
|
|
|
} |
|
|
|
g_assert (atom->len != 0); |
|
|
|
p = p + atom->len; |
|
|
|
|
|
|
|
/* Push to output */ |
|
|
|
elt.type = ELT_ATOM; |
|
|
|
elt.p.atom = atom; |
|
|
|
g_array_append_val (e->expressions, elt); |
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
case PARSE_LIM: |
|
|
|
if (g_ascii_isdigit (*p)) { |
|
|
|
p ++; |
|
|
|
} |
|
|
|
else { |
|
|
|
if (p - c > 0) { |
|
|
|
elt.type = ELT_LIMIT; |
|
|
|
elt.p.lim.val = strtoul (c, NULL, 10); |
|
|
|
g_array_append_val (e->expressions, elt); |
|
|
|
c = p; |
|
|
|
state = SKIP_SPACES; |
|
|
|
} |
|
|
|
else { |
|
|
|
g_set_error (err, rspamd_expr_quark(), 400, "Empty number"); |
|
|
|
goto err; |
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
case PARSE_OP: |
|
|
|
op = rspamd_expr_str_to_op (p, end, &p); |
|
|
|
if (op == OP_INVALID) { |
|
|
|
g_set_error (err, rspamd_expr_quark(), 500, "Bad operator %c", |
|
|
|
*p); |
|
|
|
goto err; |
|
|
|
} |
|
|
|
else if (op == OP_OBRACE) { |
|
|
|
/* |
|
|
|
* If the token is a left parenthesis, then push it onto |
|
|
|
* the stack. |
|
|
|
*/ |
|
|
|
rspamd_expr_stack_push (e, GINT_TO_POINTER (op)); |
|
|
|
} |
|
|
|
else if (op == OP_CBRACE) { |
|
|
|
/* |
|
|
|
* Until the token at the top of the stack is a left |
|
|
|
* parenthesis, pop operators off the stack onto the |
|
|
|
* output queue. |
|
|
|
* |
|
|
|
* Pop the left parenthesis from the stack, |
|
|
|
* but not onto the output queue. |
|
|
|
* |
|
|
|
* If the stack runs out without finding a left parenthesis, |
|
|
|
* then there are mismatched parentheses. |
|
|
|
*/ |
|
|
|
do { |
|
|
|
op = GPOINTER_TO_INT (rspamd_expr_stack_pop (e)); |
|
|
|
|
|
|
|
if (op == OP_INVALID) { |
|
|
|
g_set_error (err, rspamd_expr_quark(), 600, |
|
|
|
"Braces mismatch"); |
|
|
|
goto err; |
|
|
|
} |
|
|
|
|
|
|
|
if (op != OP_OBRACE) { |
|
|
|
elt.type = ELT_OP; |
|
|
|
elt.p.op = op; |
|
|
|
g_array_append_val (e->expressions, elt); |
|
|
|
} |
|
|
|
|
|
|
|
} while (op != OP_OBRACE); |
|
|
|
} |
|
|
|
else { |
|
|
|
/* |
|
|
|
* While there is an operator token, o2, at the top of |
|
|
|
* the operator stack, and either: |
|
|
|
* |
|
|
|
* - o1 is left-associative and its precedence is less than |
|
|
|
* or equal to that of o2, or |
|
|
|
* - o1 is right associative, and has precedence less than |
|
|
|
* that of o2, |
|
|
|
* |
|
|
|
* then pop o2 off the operator stack, onto the output queue; |
|
|
|
* |
|
|
|
* push o1 onto the operator stack. |
|
|
|
*/ |
|
|
|
|
|
|
|
for (;;) { |
|
|
|
op_stack = GPOINTER_TO_INT (rspamd_expr_stack_pop (e)); |
|
|
|
|
|
|
|
if (op_stack == OP_INVALID) { |
|
|
|
/* Stack is empty */ |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
/* We ignore associativity for now */ |
|
|
|
if (op_stack != OP_OBRACE && |
|
|
|
rspamd_expr_logic_priority (op) <= |
|
|
|
rspamd_expr_logic_priority(op_stack)) { |
|
|
|
elt.type = ELT_OP; |
|
|
|
elt.p.op = op_stack; |
|
|
|
g_array_append_val (e->expressions, elt); |
|
|
|
} |
|
|
|
else { |
|
|
|
/* Push op_stack back */ |
|
|
|
rspamd_expr_stack_push (e, GINT_TO_POINTER (op_stack)); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* Push new operator itself */ |
|
|
|
rspamd_expr_stack_push (e, GINT_TO_POINTER (op)); |
|
|
|
} |
|
|
|
|
|
|
|
break; |
|
|
|
case SKIP_SPACES: |
|
|
|
if (g_ascii_isspace (*p)) { |
|
|
|
p ++; |
|
|
|
} |
|
|
|
else if (rspamd_expr_is_operation_symbol (*p)) { |
|
|
|
state = PARSE_OP; |
|
|
|
} |
|
|
|
else { |
|
|
|
state = PARSE_ATOM; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* Now we process the stack and push operators to the output */ |
|
|
|
while ((op_stack = GPOINTER_TO_INT (rspamd_expr_stack_pop (e))) |
|
|
|
!= OP_INVALID) { |
|
|
|
if (op_stack != OP_OBRACE) { |
|
|
|
elt.type = ELT_OP; |
|
|
|
elt.p.op = op_stack; |
|
|
|
g_array_append_val (e->expressions, elt); |
|
|
|
} |
|
|
|
else { |
|
|
|
g_set_error (err, rspamd_expr_quark(), 600, |
|
|
|
"Braces mismatch"); |
|
|
|
goto err; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (*target) { |
|
|
|
*target = e; |
|
|
|
} |
|
|
|
|
|
|
|
return TRUE; |
|
|
|
|
|
|
|
err: |
|
|
|
return FALSE; |
|
|
|
} |
|
|
|
|