aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-23 16:18:00 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-23 16:18:00 +0400
commit140910a0f6c3dc857f5b949bd9caff91188e78b0 (patch)
tree5cf5e6c746330bb82b4dee06bfb25fec82d68db1
parentbf47a84713c6705326b0eb02e9fab7a061f86695 (diff)
downloadrspamd-140910a0f6c3dc857f5b949bd9caff91188e78b0.tar.gz
rspamd-140910a0f6c3dc857f5b949bd9caff91188e78b0.zip
* Fix expression parser: make it recursive and allow expressions inside function's arguments
* Rewrite functions interface and implement arguments parsing
-rw-r--r--src/expressions.c188
-rw-r--r--src/expressions.h11
-rw-r--r--src/plugins/regexp.c41
3 files changed, 176 insertions, 64 deletions
diff --git a/src/expressions.c b/src/expressions.c
index cd92e979b..c115ad85c 100644
--- a/src/expressions.c
+++ b/src/expressions.c
@@ -248,6 +248,27 @@ insert_expression (memory_pool_t *pool, struct expression **head, int type, char
}
}
+static struct expression*
+maybe_parse_expression (memory_pool_t *pool, char *line)
+{
+ struct expression *expr;
+ char *p = line;
+
+ while (*p) {
+ if (is_operation_symbol (*p)) {
+ return parse_expression (pool, line);
+ }
+ p ++;
+ }
+
+ expr = memory_pool_alloc (pool, sizeof (struct expression));
+ expr->type = EXPR_STR;
+ expr->content.operand = memory_pool_strdup (pool, line);
+ expr->next = NULL;
+
+ return expr;
+}
+
/*
* Make inverse polish record for specified expression
* Memory is allocated from given pool
@@ -258,10 +279,11 @@ parse_expression (memory_pool_t *pool, char *line)
struct expression *expr = NULL;
struct expression_stack *stack = NULL;
struct expression_function *func = NULL, *old;
- struct expression_argument *arg;
+ struct expression *arg;
GQueue *function_stack;
char *p, *c, *str, op;
gboolean in_regexp = FALSE;
+ int brackets = 0;
enum {
SKIP_SPACES,
@@ -303,7 +325,7 @@ parse_expression (memory_pool_t *pool, char *line)
return NULL;
}
/* Pop all operators from stack to nearest '(' or to head */
- while (stack->op != '(') {
+ while (stack && stack->op != '(') {
op = delete_expression_stack (&stack);
if (op != '(') {
insert_expression (pool, &expr, EXPR_OPERATION, op, NULL);
@@ -416,22 +438,14 @@ parse_expression (memory_pool_t *pool, char *line)
}
if (!in_regexp) {
/* Append argument to list */
- if (*p == ',' || *p == ')') {
- arg = memory_pool_alloc (pool, sizeof (struct expression_argument));
- if (*(p - 1) != ')') {
- /* Not a function argument */
- str = memory_pool_alloc (pool, p - c + 1);
- g_strlcpy (str, c, (p - c + 1));
- g_strstrip (str);
- arg->type = EXPRESSION_ARGUMENT_NORMAL;
- arg->data = str;
- func->args = g_list_append (func->args, arg);
- }
- else {
- arg->type = EXPRESSION_ARGUMENT_FUNCTION;
- arg->data = old;
- func->args = g_list_append (func->args, arg);
- }
+ if (*p == ',' || (*p == ')' && brackets == 0)) {
+ arg = memory_pool_alloc (pool, sizeof (struct expression));
+ str = memory_pool_alloc (pool, p - c + 1);
+ g_strlcpy (str, c, (p - c + 1));
+ g_strstrip (str);
+ /* Recursive call */
+ arg = maybe_parse_expression (pool, str);
+ func->args = g_list_append (func->args, arg);
/* Pop function */
if (*p == ')') {
/* Last function in chain, goto skipping spaces state */
@@ -443,16 +457,11 @@ parse_expression (memory_pool_t *pool, char *line)
}
c = p + 1;
}
- if (*p == '(') {
- /* Push current function to stack */
- g_queue_push_tail (function_stack, func);
- func = memory_pool_alloc (pool, sizeof (struct expression_function));
- func->name = memory_pool_alloc (pool, p - c + 1);
- func->args = NULL;
- g_strlcpy (func->name, c, (p - c + 1));
- g_strstrip (func->name);
- state = READ_FUNCTION_ARGUMENT;
- c = p + 1;
+ else if (*p == '(') {
+ brackets ++;
+ }
+ else if (*p == ')') {
+ brackets --;
}
}
else if (*p == '/' && *(p - 1) != '\\') {
@@ -684,6 +693,95 @@ call_expression_function (struct expression_function *func, struct worker_task *
return selected->func (task, func->args);
}
+struct expression_argument *
+get_function_arg (struct expression *expr, struct worker_task *task, gboolean want_string)
+{
+ GQueue *stack;
+ gsize cur, op1, op2;
+ struct expression_argument *res;
+ struct expression *it;
+
+ if (expr == NULL) {
+ msg_warn ("get_function_arg: NULL expression passed");
+ return NULL;
+ }
+ if (expr->next == NULL) {
+ res = memory_pool_alloc (task->task_pool, sizeof (struct expression_argument));
+ if (expr->type == EXPR_REGEXP || expr->type == EXPR_STR) {
+ res->type = EXPRESSION_ARGUMENT_NORMAL;
+ res->data = expr->content.operand;
+ }
+ else if (expr->type == EXPR_FUNCTION && !want_string) {
+ res->type = EXPRESSION_ARGUMENT_BOOL;
+ cur = call_expression_function (expr->content.operand, task);
+ res->data = GSIZE_TO_POINTER (cur);
+ }
+ else {
+ msg_warn ("get_function_arg: cannot parse argument: it contains operator or bool expression that is not wanted");
+ return NULL;
+ }
+ return res;
+ }
+ else if (!want_string) {
+ res = memory_pool_alloc (task->task_pool, sizeof (struct expression_argument));
+ res->type = EXPRESSION_ARGUMENT_BOOL;
+ stack = g_queue_new ();
+ it = expr;
+
+ while (it) {
+ if (it->type == EXPR_REGEXP || it->type == EXPR_STR) {
+ g_queue_free (stack);
+ msg_warn ("get_function_arg: cannot parse function arguments that contains regexps or strings");
+ return NULL;
+ } else if (it->type == EXPR_FUNCTION) {
+ cur = (gsize)call_expression_function ((struct expression_function *)it->content.operand, task);
+ msg_debug ("get_function_arg: function %s returned %s", ((struct expression_function *)it->content.operand)->name,
+ cur ? "true" : "false");
+ } else if (it->type == EXPR_OPERATION) {
+ if (g_queue_is_empty (stack)) {
+ /* Queue has no operands for operation, exiting */
+ msg_warn ("get_function_arg: invalid expression");
+ g_queue_free (stack);
+ return NULL;
+ }
+ switch (it->content.operation) {
+ case '!':
+ op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ op1 = !op1;
+ g_queue_push_head (stack, GSIZE_TO_POINTER (op1));
+ break;
+ case '&':
+ op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2));
+ case '|':
+ op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2));
+ default:
+ it = it->next;
+ continue;
+ }
+ }
+ if (it) {
+ it = it->next;
+ }
+ }
+ if (!g_queue_is_empty (stack)) {
+ res->data = g_queue_pop_head (stack);
+ }
+ else {
+ res->data = GSIZE_TO_POINTER (FALSE);
+ }
+
+ return res;
+ }
+
+ msg_warn ("get_function_arg: invalid expression argument");
+
+ return NULL;
+}
+
void
register_expression_function (const char *name, rspamd_internal_func_t func)
{
@@ -713,8 +811,8 @@ rspamd_compare_encoding (struct worker_task *task, GList *args)
return FALSE;
}
- arg = args->data;
- if (arg->type == EXPRESSION_ARGUMENT_FUNCTION) {
+ arg = get_function_arg (args->data, task, TRUE);
+ if (arg->type == EXPRESSION_ARGUMENT_BOOL) {
msg_warn ("rspamd_compare_encoding: invalid argument to function is passed");
return FALSE;
}
@@ -733,8 +831,8 @@ rspamd_header_exists (struct worker_task *task, GList *args)
return FALSE;
}
- arg = args->data;
- if (arg->type == EXPRESSION_ARGUMENT_FUNCTION) {
+ arg = get_function_arg (args->data, task, TRUE);
+ if (arg->type == EXPRESSION_ARGUMENT_BOOL) {
msg_warn ("rspamd_header_exists: invalid argument to function is passed");
return FALSE;
}
@@ -767,7 +865,7 @@ rspamd_parts_distance (struct worker_task *task, GList *args)
}
else {
errno = 0;
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
threshold = strtoul ((char *)arg->data, NULL, 10);
if (errno != 0) {
msg_info ("rspamd_parts_distance: bad numeric value for threshold \"%s\", assume it 100", (char *)args->data);
@@ -811,14 +909,14 @@ rspamd_content_type_compare_param (struct worker_task *task, GList *args)
msg_warn ("rspamd_content_type_compare_param: no parameters to function");
return FALSE;
}
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
param_name = arg->data;
args = g_list_next (args);
if (args == NULL) {
msg_warn ("rspamd_content_type_compare_param: too few params to function");
return FALSE;
}
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
param_pattern = arg->data;
part = g_mime_message_get_mime_part (task->message);
@@ -875,7 +973,7 @@ rspamd_content_type_has_param (struct worker_task *task, GList *args)
msg_warn ("rspamd_content_type_compare_param: no parameters to function");
return FALSE;
}
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
param_name = arg->data;
part = g_mime_message_get_mime_part (task->message);
if (part) {
@@ -916,7 +1014,7 @@ rspamd_content_type_is_subtype (struct worker_task *task, GList *args)
return FALSE;
}
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
param_pattern = arg->data;
part = g_mime_message_get_mime_part (task->message);
if (part) {
@@ -974,7 +1072,7 @@ rspamd_content_type_is_type (struct worker_task *task, GList *args)
return FALSE;
}
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
param_pattern = arg->data;
part = g_mime_message_get_mime_part (task->message);
@@ -1042,7 +1140,7 @@ rspamd_recipients_distance (struct worker_task *task, GList *args)
return FALSE;
}
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
errno = 0;
threshold = strtod ((char *)arg->data, NULL);
if (errno != 0) {
@@ -1310,7 +1408,7 @@ rspamd_has_content_part (struct worker_task *task, GList *args)
return FALSE;
}
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
param_type = arg->data;
args = args->next;
if (args) {
@@ -1333,15 +1431,15 @@ rspamd_has_content_part_len (struct worker_task *task, GList *args)
return FALSE;
}
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
param_type = arg->data;
args = args->next;
if (args) {
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
param_subtype = arg->data;
args = args->next;
if (args) {
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
errno = 0;
min = strtoul (arg->data, NULL, 10);
if (errno != 0) {
@@ -1350,7 +1448,7 @@ rspamd_has_content_part_len (struct worker_task *task, GList *args)
}
args = args->next;
if (args) {
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
max = strtoul (arg->data, NULL, 10);
if (errno != 0) {
msg_warn ("rspamd_has_content_part_len: invalid numeric value '%s': %s", (char *)arg->data, strerror (errno));
@@ -1375,7 +1473,7 @@ rspamd_compare_transfer_encoding (struct worker_task *task, GList *args)
return FALSE;
}
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
enc_req = g_mime_part_encoding_from_string (arg->data);
#ifndef GMIME24
if (enc_req == GMIME_PART_ENCODING_DEFAULT) {
diff --git a/src/expressions.h b/src/expressions.h
index 501974112..5debe87b2 100644
--- a/src/expressions.h
+++ b/src/expressions.h
@@ -24,7 +24,7 @@ struct expression_function {
struct expression_argument {
enum {
EXPRESSION_ARGUMENT_NORMAL,
- EXPRESSION_ARGUMENT_FUNCTION
+ EXPRESSION_ARGUMENT_BOOL,
} type; /**< type of argument (text or other function) */
void *data; /**< pointer to its data */
};
@@ -104,4 +104,13 @@ void task_cache_add (struct worker_task *task, void *pointer, int32_t result);
*/
int32_t task_cache_check (struct worker_task *task, void *pointer);
+/**
+ * Parse and return a single function argument for a function (may recurse)
+ * @param expr expression structure that represents function's argument
+ * @param task task object
+ * @param want_string return NULL if argument is not a string
+ * @return expression argument structure or NULL if failed
+ */
+struct expression_argument *get_function_arg (struct expression *expr, struct worker_task *task, gboolean want_string);
+
#endif
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index 6af883943..4de5984c9 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -38,8 +38,6 @@
struct regexp_module_item {
struct expression *expr;
- int regexp_number;
- int op_number;
char *symbol;
};
@@ -96,10 +94,6 @@ read_regexp_expression (memory_pool_t *pool, struct regexp_module_item *chain, c
msg_warn ("read_regexp_expression: cannot parse regexp, skip expression %s = \"%s\"", symbol, line);
return FALSE;
}
- chain->regexp_number ++;
- }
- else {
- chain->op_number ++;
}
cur = cur->next;
}
@@ -331,12 +325,12 @@ optimize_regexp_expression (struct expression **e, GQueue *stack, gboolean res)
return ret;
}
-static void
-process_regexp_item (struct regexp_module_item *item, struct worker_task *task)
+static gboolean
+process_regexp_expression (struct expression *expr, struct worker_task *task)
{
GQueue *stack;
gsize cur, op1, op2;
- struct expression *it = item->expr;
+ struct expression *it = expr;
gboolean try_optimize = TRUE;
stack = g_queue_new ();
@@ -365,7 +359,7 @@ process_regexp_item (struct regexp_module_item *item, struct worker_task *task)
if (g_queue_is_empty (stack)) {
/* Queue has no operands for operation, exiting */
g_queue_free (stack);
- return;
+ return FALSE;
}
try_optimize = TRUE;
switch (it->content.operation) {
@@ -394,12 +388,21 @@ process_regexp_item (struct regexp_module_item *item, struct worker_task *task)
if (!g_queue_is_empty (stack)) {
op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
if (op1) {
- /* Add symbol to results */
- insert_result (task, regexp_module_ctx->metric, item->symbol, op1, NULL);
+ return TRUE;
}
}
g_queue_free (stack);
+
+ return FALSE;
+}
+
+static void
+process_regexp_item (struct regexp_module_item *item, struct worker_task *task)
+{
+ if (process_regexp_expression (item->expr, task)) {
+ insert_result (task, regexp_module_ctx->metric, item->symbol, 1, NULL);
+ }
}
static int
@@ -429,14 +432,14 @@ rspamd_regexp_match_number (struct worker_task *task, GList *args)
return FALSE;
}
- arg = args->data;
+ arg = get_function_arg (args->data, task, TRUE);
param_count = strtoul (arg->data, NULL, 10);
- cur = g_list_next (args);
+ cur = args->next;
while (cur) {
- arg = args->data;
- if (arg->type == EXPRESSION_ARGUMENT_FUNCTION) {
- if (call_expression_function ((struct expression_function *)arg->data, task)) {
+ arg = get_function_arg (cur->data, task, FALSE);
+ if (arg && arg->type == EXPRESSION_ARGUMENT_BOOL) {
+ if ((gboolean)GPOINTER_TO_SIZE (arg->data)) {
res ++;
}
}
@@ -456,7 +459,9 @@ rspamd_regexp_match_number (struct worker_task *task, GList *args)
}
re_cache_add (param_pattern, re);
}
- res += process_regexp (re, task);
+ if (process_regexp_expression (cur->data, task)) {
+ res ++;
+ }
if (res >= param_count) {
return TRUE;
}