]> source.dussan.org Git - rspamd.git/commitdiff
* Fix expression parser: make it recursive and allow expressions inside function...
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Thu, 23 Apr 2009 12:18:00 +0000 (16:18 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Thu, 23 Apr 2009 12:18:00 +0000 (16:18 +0400)
* Rewrite functions interface and implement arguments parsing

src/expressions.c
src/expressions.h
src/plugins/regexp.c

index cd92e979b8495b9e0140d5f85b275de172157ffb..c115ad85ca97c2e09e4a67a2a9121f281a2e36f4 100644 (file)
@@ -248,6 +248,27 @@ insert_expression (memory_pool_t *pool, struct expression **head, int type, char
        }
 }
 
+static struct expression*
+maybe_parse_expression (memory_pool_t *pool, char *line)
+{
+       struct expression *expr;
+       char *p = line;
+
+       while (*p) {
+               if (is_operation_symbol (*p)) {
+                       return parse_expression (pool, line);
+               }
+               p ++;
+       }
+
+       expr = memory_pool_alloc (pool, sizeof (struct expression));
+       expr->type = EXPR_STR;
+       expr->content.operand = memory_pool_strdup (pool, line);
+       expr->next = NULL;
+
+       return expr;
+}
+
 /*
  * Make inverse polish record for specified expression
  * Memory is allocated from given pool
@@ -258,10 +279,11 @@ parse_expression (memory_pool_t *pool, char *line)
        struct expression *expr = NULL;
        struct expression_stack *stack = NULL;
        struct expression_function *func = NULL, *old;
-       struct expression_argument *arg;
+       struct expression *arg;
        GQueue *function_stack;
        char *p, *c, *str, op;
        gboolean in_regexp = FALSE;
+       int brackets = 0;
 
        enum {
                SKIP_SPACES,
@@ -303,7 +325,7 @@ parse_expression (memory_pool_t *pool, char *line)
                                                return NULL;
                                        }
                                        /* Pop all operators from stack to nearest '(' or to head */
-                                       while (stack->op != '(') {
+                                       while (stack && stack->op != '(') {
                                                op = delete_expression_stack (&stack);
                                                if (op != '(') {
                                                        insert_expression (pool, &expr, EXPR_OPERATION, op, NULL);
@@ -416,22 +438,14 @@ parse_expression (memory_pool_t *pool, char *line)
                                }
                                if (!in_regexp) {
                                        /* Append argument to list */
-                                       if (*p == ',' || *p == ')') {
-                                               arg = memory_pool_alloc (pool, sizeof (struct expression_argument));
-                                               if (*(p - 1) != ')') {
-                                                       /* Not a function argument */
-                                                       str = memory_pool_alloc (pool, p - c + 1);
-                                                       g_strlcpy (str, c, (p - c + 1));
-                                                       g_strstrip (str);
-                                                       arg->type = EXPRESSION_ARGUMENT_NORMAL;
-                                                       arg->data = str;
-                                                       func->args = g_list_append (func->args, arg);
-                                               }
-                                               else {
-                                                       arg->type = EXPRESSION_ARGUMENT_FUNCTION;
-                                                       arg->data = old;
-                                                       func->args = g_list_append (func->args, arg);
-                                               }
+                                       if (*p == ',' || (*p == ')' && brackets == 0)) {
+                                               arg = memory_pool_alloc (pool, sizeof (struct expression));
+                                               str = memory_pool_alloc (pool, p - c + 1);
+                                               g_strlcpy (str, c, (p - c + 1));
+                                               g_strstrip (str);
+                                               /* Recursive call */
+                                               arg = maybe_parse_expression (pool, str);
+                                               func->args = g_list_append (func->args, arg);
                                                /* Pop function */
                                                if (*p == ')') {
                                                        /* Last function in chain, goto skipping spaces state */
@@ -443,16 +457,11 @@ parse_expression (memory_pool_t *pool, char *line)
                                                }
                                                c = p + 1;
                                        }
-                                       if (*p == '(') {
-                                               /* Push current function to stack */
-                                               g_queue_push_tail (function_stack, func);
-                                               func = memory_pool_alloc (pool, sizeof (struct expression_function));
-                                               func->name = memory_pool_alloc (pool, p - c + 1);
-                                               func->args = NULL;
-                                               g_strlcpy (func->name, c, (p - c + 1));
-                                               g_strstrip (func->name);
-                                               state = READ_FUNCTION_ARGUMENT;
-                                               c = p + 1;
+                                       else if (*p == '(') {
+                                               brackets ++;
+                                       }
+                                       else if (*p == ')') {
+                                               brackets --;
                                        }
                                }
                                else if (*p == '/' && *(p - 1) != '\\') {
@@ -684,6 +693,95 @@ call_expression_function (struct expression_function *func, struct worker_task *
        return selected->func (task, func->args);
 }
 
+struct expression_argument *
+get_function_arg (struct expression *expr, struct worker_task *task, gboolean want_string)
+{
+       GQueue *stack;
+       gsize cur, op1, op2;
+       struct expression_argument *res;
+       struct expression *it;
+
+       if (expr == NULL) {
+               msg_warn ("get_function_arg: NULL expression passed");
+               return NULL;
+       }
+       if (expr->next == NULL) {
+               res = memory_pool_alloc (task->task_pool, sizeof (struct expression_argument));
+               if (expr->type == EXPR_REGEXP || expr->type == EXPR_STR) {
+                       res->type = EXPRESSION_ARGUMENT_NORMAL;
+                       res->data = expr->content.operand;
+               }
+               else if (expr->type == EXPR_FUNCTION && !want_string) {
+                       res->type = EXPRESSION_ARGUMENT_BOOL;
+                       cur = call_expression_function (expr->content.operand, task);
+                       res->data = GSIZE_TO_POINTER (cur);
+               }
+               else {
+                       msg_warn ("get_function_arg: cannot parse argument: it contains operator or bool expression that is not wanted");
+                       return NULL;
+               }
+               return res;
+       }
+       else if (!want_string) {
+               res = memory_pool_alloc (task->task_pool, sizeof (struct expression_argument));
+               res->type = EXPRESSION_ARGUMENT_BOOL;
+               stack = g_queue_new ();
+               it = expr;
+
+               while (it) {
+                       if (it->type == EXPR_REGEXP || it->type == EXPR_STR) {
+                               g_queue_free (stack);
+                               msg_warn ("get_function_arg: cannot parse function arguments that contains regexps or strings");
+                               return NULL;
+                       } else if (it->type == EXPR_FUNCTION) {
+                               cur = (gsize)call_expression_function ((struct expression_function *)it->content.operand, task);
+                               msg_debug ("get_function_arg: function %s returned %s", ((struct expression_function *)it->content.operand)->name,
+                                                                                                                               cur ? "true" : "false");
+                       } else if (it->type == EXPR_OPERATION) {
+                               if (g_queue_is_empty (stack)) {
+                                       /* Queue has no operands for operation, exiting */
+                                       msg_warn ("get_function_arg: invalid expression");
+                                       g_queue_free (stack);
+                                       return NULL;
+                               }
+                               switch (it->content.operation) {
+                                       case '!':
+                                               op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+                                               op1 = !op1;
+                                               g_queue_push_head (stack, GSIZE_TO_POINTER (op1));
+                                               break;
+                                       case '&':
+                                               op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+                                               op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+                                               g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2));
+                                       case '|':
+                                               op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+                                               op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+                                               g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2));
+                                       default:
+                                               it = it->next;
+                                               continue;
+                               }
+                       }
+                       if (it) {
+                               it = it->next;
+                       }
+               }
+               if (!g_queue_is_empty (stack)) {
+                       res->data = g_queue_pop_head (stack);
+               }
+               else {
+                       res->data = GSIZE_TO_POINTER (FALSE);
+               }
+
+               return res;
+       }
+
+       msg_warn ("get_function_arg: invalid expression argument");
+
+       return NULL;
+}
+
 void
 register_expression_function (const char *name, rspamd_internal_func_t func)
 {
@@ -713,8 +811,8 @@ rspamd_compare_encoding (struct worker_task *task, GList *args)
                return FALSE;
        }
 
-       arg = args->data;
-       if (arg->type == EXPRESSION_ARGUMENT_FUNCTION) {
+       arg = get_function_arg (args->data, task, TRUE);
+       if (arg->type == EXPRESSION_ARGUMENT_BOOL) {
                msg_warn ("rspamd_compare_encoding: invalid argument to function is passed");
                return FALSE;
        }
@@ -733,8 +831,8 @@ rspamd_header_exists (struct worker_task *task, GList *args)
                return FALSE;
        }
 
-       arg = args->data;
-       if (arg->type == EXPRESSION_ARGUMENT_FUNCTION) {
+       arg = get_function_arg (args->data, task, TRUE);
+       if (arg->type == EXPRESSION_ARGUMENT_BOOL) {
                msg_warn ("rspamd_header_exists: invalid argument to function is passed");
                return FALSE;
        }
@@ -767,7 +865,7 @@ rspamd_parts_distance (struct worker_task *task, GList *args)
        }
        else {
                errno = 0;
-               arg = args->data;
+               arg = get_function_arg (args->data, task, TRUE);
                threshold = strtoul ((char *)arg->data, NULL, 10);
                if (errno != 0) {
                        msg_info ("rspamd_parts_distance: bad numeric value for threshold \"%s\", assume it 100", (char *)args->data);
@@ -811,14 +909,14 @@ rspamd_content_type_compare_param (struct worker_task *task, GList *args)
                msg_warn ("rspamd_content_type_compare_param: no parameters to function");
                return FALSE;
        }
-       arg = args->data;
+       arg = get_function_arg (args->data, task, TRUE);
        param_name = arg->data;
        args = g_list_next (args);
        if (args == NULL) {
                msg_warn ("rspamd_content_type_compare_param: too few params to function");
                return FALSE;
        }
-       arg = args->data;
+       arg = get_function_arg (args->data, task, TRUE);
        param_pattern = arg->data;
        
        part = g_mime_message_get_mime_part (task->message);
@@ -875,7 +973,7 @@ rspamd_content_type_has_param (struct worker_task *task, GList *args)
                msg_warn ("rspamd_content_type_compare_param: no parameters to function");
                return FALSE;
        }
-       arg = args->data;
+       arg = get_function_arg (args->data, task, TRUE);
        param_name = arg->data;
        part = g_mime_message_get_mime_part (task->message);
        if (part) {
@@ -916,7 +1014,7 @@ rspamd_content_type_is_subtype (struct worker_task *task, GList *args)
                return FALSE;
        }
        
-       arg = args->data;
+       arg = get_function_arg (args->data, task, TRUE);
        param_pattern = arg->data;
        part = g_mime_message_get_mime_part (task->message);
        if (part) {
@@ -974,7 +1072,7 @@ rspamd_content_type_is_type (struct worker_task *task, GList *args)
                return FALSE;
        }
        
-       arg = args->data;
+       arg = get_function_arg (args->data, task, TRUE);
        param_pattern = arg->data;
 
        part = g_mime_message_get_mime_part (task->message);
@@ -1042,7 +1140,7 @@ rspamd_recipients_distance (struct worker_task *task, GList *args)
                return FALSE;
        }
        
-       arg = args->data;
+       arg = get_function_arg (args->data, task, TRUE);
        errno = 0;
        threshold = strtod ((char *)arg->data, NULL);
        if (errno != 0) {
@@ -1310,7 +1408,7 @@ rspamd_has_content_part (struct worker_task *task, GList *args)
                return FALSE;
        }
        
-       arg = args->data;
+       arg = get_function_arg (args->data, task, TRUE);
        param_type = arg->data;
        args = args->next;
        if (args) {
@@ -1333,15 +1431,15 @@ rspamd_has_content_part_len (struct worker_task *task, GList *args)
                return FALSE;
        }
        
-       arg = args->data;
+       arg = get_function_arg (args->data, task, TRUE);
        param_type = arg->data;
        args = args->next;
        if (args) {
-               arg = args->data;
+               arg = get_function_arg (args->data, task, TRUE);
                param_subtype = arg->data;
                args = args->next;
                if (args) {
-                       arg = args->data;
+                       arg = get_function_arg (args->data, task, TRUE);
                        errno = 0;
                        min = strtoul (arg->data, NULL, 10);
                        if (errno != 0) {
@@ -1350,7 +1448,7 @@ rspamd_has_content_part_len (struct worker_task *task, GList *args)
                        }
                        args = args->next;
                        if (args) {
-                               arg = args->data;
+                               arg = get_function_arg (args->data, task, TRUE);
                                max = strtoul (arg->data, NULL, 10);
                                if (errno != 0) {
                                        msg_warn ("rspamd_has_content_part_len: invalid numeric value '%s': %s", (char *)arg->data, strerror (errno));
@@ -1375,7 +1473,7 @@ rspamd_compare_transfer_encoding (struct worker_task *task, GList *args)
                return FALSE;
        }
        
-       arg = args->data;
+       arg = get_function_arg (args->data, task, TRUE);
        enc_req = g_mime_part_encoding_from_string (arg->data);
 #ifndef GMIME24
        if (enc_req == GMIME_PART_ENCODING_DEFAULT) {
index 501974112741d845a4b3179f3fb9fe1867027d68..5debe87b2237cfaf3c6ac38a4fa0a02291a6ce5f 100644 (file)
@@ -24,7 +24,7 @@ struct expression_function {
 struct expression_argument {
        enum {
                EXPRESSION_ARGUMENT_NORMAL,
-               EXPRESSION_ARGUMENT_FUNCTION
+               EXPRESSION_ARGUMENT_BOOL,
        } type;                                                                                                         /**< type of argument (text or other function)          */
        void *data;                                                                                                     /**< pointer to its data                                                        */
 };
@@ -104,4 +104,13 @@ void task_cache_add (struct worker_task *task, void *pointer, int32_t result);
  */
 int32_t task_cache_check (struct worker_task *task, void *pointer);
 
+/**
+ * Parse and return a single function argument for a function (may recurse)
+ * @param expr expression structure that represents function's argument
+ * @param task task object
+ * @param want_string return NULL if argument is not a string
+ * @return expression argument structure or NULL if failed
+ */
+struct expression_argument *get_function_arg (struct expression *expr, struct worker_task *task, gboolean want_string);
+
 #endif
index 6af883943259b764436d4936d384aaefd255076a..4de5984c9ef57376418893cf7f3eb6584dfb4edd 100644 (file)
@@ -38,8 +38,6 @@
 
 struct regexp_module_item {
        struct expression *expr;
-       int regexp_number;
-       int op_number;
        char *symbol;
 };
 
@@ -96,10 +94,6 @@ read_regexp_expression (memory_pool_t *pool, struct regexp_module_item *chain, c
                                msg_warn ("read_regexp_expression: cannot parse regexp, skip expression %s = \"%s\"", symbol, line);
                                return FALSE;
                        }
-                       chain->regexp_number ++;
-               }
-               else {
-                       chain->op_number ++;
                }
                cur = cur->next;
        }
@@ -331,12 +325,12 @@ optimize_regexp_expression (struct expression **e, GQueue *stack, gboolean res)
        return ret;
 }
 
-static void
-process_regexp_item (struct regexp_module_item *item, struct worker_task *task)
+static gboolean
+process_regexp_expression (struct expression *expr, struct worker_task *task)
 {
        GQueue *stack;
        gsize cur, op1, op2;
-       struct expression *it = item->expr;
+       struct expression *it = expr;
        gboolean try_optimize = TRUE;
        
        stack = g_queue_new ();
@@ -365,7 +359,7 @@ process_regexp_item (struct regexp_module_item *item, struct worker_task *task)
                        if (g_queue_is_empty (stack)) {
                                /* Queue has no operands for operation, exiting */
                                g_queue_free (stack);
-                               return;
+                               return FALSE;
                        }
                        try_optimize = TRUE;
                        switch (it->content.operation) {
@@ -394,12 +388,21 @@ process_regexp_item (struct regexp_module_item *item, struct worker_task *task)
        if (!g_queue_is_empty (stack)) {
                op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
                if (op1) {
-                       /* Add symbol to results */
-                       insert_result (task, regexp_module_ctx->metric, item->symbol, op1, NULL);
+                       return TRUE;
                }
        }
 
        g_queue_free (stack);
+
+       return FALSE;
+}
+
+static void
+process_regexp_item (struct regexp_module_item *item, struct worker_task *task)
+{
+       if (process_regexp_expression (item->expr, task)) {
+               insert_result (task, regexp_module_ctx->metric, item->symbol, 1, NULL);
+       }
 }
 
 static int
@@ -429,14 +432,14 @@ rspamd_regexp_match_number (struct worker_task *task, GList *args)
                return FALSE;
        }
        
-       arg = args->data;
+       arg = get_function_arg (args->data, task, TRUE);
        param_count = strtoul (arg->data, NULL, 10);
        
-       cur = g_list_next (args);
+       cur = args->next;
        while (cur) {
-               arg = args->data;
-               if (arg->type == EXPRESSION_ARGUMENT_FUNCTION) {
-                       if (call_expression_function ((struct expression_function *)arg->data, task)) {
+               arg = get_function_arg (cur->data, task, FALSE);
+               if (arg && arg->type == EXPRESSION_ARGUMENT_BOOL) {
+                       if ((gboolean)GPOINTER_TO_SIZE (arg->data)) {
                                res ++;
                        }
                }
@@ -456,7 +459,9 @@ rspamd_regexp_match_number (struct worker_task *task, GList *args)
                                }
                                re_cache_add (param_pattern, re);
                        }
-                       res += process_regexp (re, task);
+                       if (process_regexp_expression (cur->data, task)) {
+                               res ++;
+                       }
                        if (res >= param_count) {
                                return TRUE;
                        }