diff options
-rw-r--r-- | src/libmime/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/libmime/expressions.c | 1582 | ||||
-rw-r--r-- | src/libmime/expressions.h | 144 | ||||
-rw-r--r-- | src/libmime/filter.c | 298 | ||||
-rw-r--r-- | src/libmime/filter.h | 7 | ||||
-rw-r--r-- | src/libmime/mime_expressions.c | 2234 | ||||
-rw-r--r-- | src/libmime/mime_expressions.h | 49 | ||||
-rw-r--r-- | src/libserver/cfg_file.h | 24 | ||||
-rw-r--r-- | src/libserver/cfg_rcl.c | 14 | ||||
-rw-r--r-- | src/libserver/task.c | 34 | ||||
-rw-r--r-- | src/libserver/task.h | 19 | ||||
-rw-r--r-- | src/libutil/regexp.c | 4 | ||||
-rw-r--r-- | src/libutil/regexp.h | 22 | ||||
-rw-r--r-- | src/lua/lua_cfg_file.c | 16 | ||||
-rw-r--r-- | src/lua/lua_common.c | 1 | ||||
-rw-r--r-- | src/lua/lua_config.c | 114 | ||||
-rw-r--r-- | src/lua/lua_regexp.c | 1 | ||||
-rw-r--r-- | src/lua/lua_task.c | 51 | ||||
-rw-r--r-- | src/plugins/chartable.c | 1 | ||||
-rw-r--r-- | src/plugins/dkim_check.c | 1 | ||||
-rw-r--r-- | src/plugins/fuzzy_check.c | 1 | ||||
-rw-r--r-- | src/plugins/regexp.c | 1863 | ||||
-rw-r--r-- | src/plugins/spf.c | 1 | ||||
-rw-r--r-- | src/plugins/surbl.c | 1 |
24 files changed, 2553 insertions, 3931 deletions
diff --git a/src/libmime/CMakeLists.txt b/src/libmime/CMakeLists.txt index 36de02c82..bb678d2f9 100644 --- a/src/libmime/CMakeLists.txt +++ b/src/libmime/CMakeLists.txt @@ -1,6 +1,6 @@ # Librspamd mime SET(LIBRSPAMDMIMESRC - ${CMAKE_CURRENT_SOURCE_DIR}/expressions.c + ${CMAKE_CURRENT_SOURCE_DIR}/mime_expressions.c ${CMAKE_CURRENT_SOURCE_DIR}/filter.c ${CMAKE_CURRENT_SOURCE_DIR}/images.c ${CMAKE_CURRENT_SOURCE_DIR}/message.c diff --git a/src/libmime/expressions.c b/src/libmime/expressions.c deleted file mode 100644 index 547cc0d58..000000000 --- a/src/libmime/expressions.c +++ /dev/null @@ -1,1582 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "util.h" -#include "cfg_file.h" -#include "main.h" -#include "message.h" -#include "fuzzy.h" -#include "expressions.h" -#include "html.h" -#include "lua/lua_common.h" -#include "diff.h" - -gboolean rspamd_compare_encoding (struct rspamd_task *task, - GList * args, - void *unused); -gboolean rspamd_header_exists (struct rspamd_task *task, - GList * args, - void *unused); -gboolean rspamd_parts_distance (struct rspamd_task *task, - GList * args, - void *unused); -gboolean rspamd_recipients_distance (struct rspamd_task *task, - GList * args, - void *unused); -gboolean rspamd_has_only_html_part (struct rspamd_task *task, - GList * args, - void *unused); -gboolean rspamd_is_recipients_sorted (struct rspamd_task *task, - GList * args, - void *unused); -gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task, - GList * args, - void *unused); -gboolean rspamd_is_html_balanced (struct rspamd_task *task, - GList * args, - void *unused); -gboolean rspamd_has_html_tag (struct rspamd_task *task, - GList * args, - void *unused); -gboolean rspamd_has_fake_html (struct rspamd_task *task, - GList * args, - void *unused); - -/* - * List of internal functions of rspamd - * Sorted by name to use bsearch - */ -static struct _fl { - const gchar *name; - rspamd_internal_func_t func; - void *user_data; -} rspamd_functions_list[] = { - {"compare_encoding", rspamd_compare_encoding, NULL}, - {"compare_parts_distance", rspamd_parts_distance, NULL}, - {"compare_recipients_distance", rspamd_recipients_distance, NULL}, - {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL}, - {"has_fake_html", rspamd_has_fake_html, NULL}, - {"has_html_tag", rspamd_has_html_tag, NULL}, - {"has_only_html_part", rspamd_has_only_html_part, NULL}, - {"header_exists", rspamd_header_exists, NULL}, - {"is_html_balanced", rspamd_is_html_balanced, NULL}, - {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL} -}; - -static struct _fl *list_ptr = &rspamd_functions_list[0]; -static guint32 functions_number = sizeof (rspamd_functions_list) / - sizeof (struct _fl); -static gboolean list_allocated = FALSE; - -/* Bsearch routine */ -static gint -fl_cmp (const void *s1, const void *s2) -{ - struct _fl *fl1 = (struct _fl *)s1; - struct _fl *fl2 = (struct _fl *)s2; - return strcmp (fl1->name, fl2->name); -} - -/* Cache for regular expressions that are used in functions */ -void * -re_cache_check (const gchar *line, rspamd_mempool_t *pool) -{ - GHashTable *re_cache; - - re_cache = rspamd_mempool_get_variable (pool, "re_cache"); - - if (re_cache == NULL) { - re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - rspamd_mempool_set_variable (pool, "re_cache", re_cache, - (rspamd_mempool_destruct_t)g_hash_table_destroy); - return NULL; - } - return g_hash_table_lookup (re_cache, line); -} - -void -re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool) -{ - GHashTable *re_cache; - - re_cache = rspamd_mempool_get_variable (pool, "re_cache"); - - if (re_cache == NULL) { - re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - rspamd_mempool_set_variable (pool, "re_cache", re_cache, - (rspamd_mempool_destruct_t)g_hash_table_destroy); - } - - g_hash_table_insert (re_cache, (gpointer)line, pointer); -} - -void -re_cache_del (const gchar *line, rspamd_mempool_t *pool) -{ - GHashTable *re_cache; - - re_cache = rspamd_mempool_get_variable (pool, "re_cache"); - - if (re_cache != NULL) { - g_hash_table_remove (re_cache, line); - } - -} - -/* - * Functions for parsing expressions - */ -struct expression_stack { - gchar op; - struct expression_stack *next; -}; - -/* - * Push operand or operator to stack - */ -static struct expression_stack * -push_expression_stack (rspamd_mempool_t * pool, - struct expression_stack *head, - gchar op) -{ - struct expression_stack *new; - new = rspamd_mempool_alloc (pool, sizeof (struct expression_stack)); - new->op = op; - new->next = head; - return new; -} - -/* - * Delete symbol from stack, return pointer to operand or operator (casted to void* ) - */ -static gchar -delete_expression_stack (struct expression_stack **head) -{ - struct expression_stack *cur; - gchar res; - - if (*head == NULL) - return 0; - - cur = *head; - res = cur->op; - - *head = cur->next; - return res; -} - -/* - * Return operation priority - */ -static gint -logic_priority (gchar a) -{ - switch (a) { - case '!': - return 3; - case '|': - case '&': - return 2; - case '(': - return 1; - default: - return 0; - } -} - -/* - * Return FALSE if symbol is not operation symbol (operand) - * Return TRUE if symbol is operation symbol - */ -static gboolean -is_operation_symbol (gchar *a) -{ - switch (*a) { - case '!': - case '&': - case '|': - case '(': - case ')': - return TRUE; - case 'O': - case 'o': - if (g_ascii_strncasecmp (a, "or", - sizeof ("or") - 1) == 0 && g_ascii_isspace (a[2])) { - return TRUE; - } - break; - case 'A': - case 'a': - if (g_ascii_strncasecmp (a, "and", - sizeof ("and") - 1) == 0 && g_ascii_isspace (a[3])) { - return TRUE; - } - break; - case 'N': - case 'n': - if (g_ascii_strncasecmp (a, "not", - sizeof ("not") - 1) == 0 && g_ascii_isspace (a[3])) { - return TRUE; - } - break; - } - - return FALSE; -} - -/* Return character representation of operation */ -static gchar -op_to_char (gchar *a, gchar **next) -{ - switch (*a) { - case '!': - case '&': - case '|': - case '(': - case ')': - if ((a[0] == '&' && a[1] == '&') || - (a[0] == '|' && a[1] == '|')) { - *next = a + 2; - } - else { - *next = a + 1; - } - return *a; - case 'O': - case 'o': - if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) { - *next = a + sizeof ("or") - 1; - return '|'; - } - break; - case 'A': - case 'a': - if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) { - *next = a + sizeof ("and") - 1; - return '&'; - } - break; - case 'N': - case 'n': - if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) { - *next = a + sizeof ("not") - 1; - return '!'; - } - break; - } - - return '\0'; -} - -/* - * Return TRUE if symbol can be regexp flag - */ -static gboolean -is_regexp_flag (gchar a) -{ - switch (a) { - case 'i': - case 'm': - case 'x': - case 's': - case 'u': - case 'o': - case 'r': - case 'H': - case 'M': - case 'P': - case 'U': - case 'X': - case 'T': - case 'S': - return TRUE; - default: - return FALSE; - } -} - -static void -insert_expression (rspamd_mempool_t * pool, - struct expression **head, - gint type, - gchar op, - void *operand, - const gchar *orig) -{ - struct expression *new, *cur; - - new = rspamd_mempool_alloc (pool, sizeof (struct expression)); - new->type = type; - new->orig = orig; - if (new->type != EXPR_OPERATION) { - new->content.operand = operand; - } - else { - new->content.operation = op; - } - new->next = NULL; - - if (!*head) { - *head = new; - } - else { - cur = *head; - while (cur->next) { - cur = cur->next; - } - cur->next = new; - } -} - -static struct expression * -maybe_parse_expression (rspamd_mempool_t * pool, gchar *line) -{ - struct expression *expr; - gchar *p = line; - - while (*p) { - if (is_operation_symbol (p)) { - return parse_expression (pool, line); - } - p++; - } - - expr = rspamd_mempool_alloc (pool, sizeof (struct expression)); - expr->type = EXPR_STR; - expr->content.operand = rspamd_mempool_strdup (pool, line); - expr->next = NULL; - - return expr; -} - -/* - * Make inverse polish record for specified expression - * Memory is allocated from given pool - */ -struct expression * -parse_expression (rspamd_mempool_t * pool, gchar *line) -{ - struct expression *expr = NULL; - struct expression_stack *stack = NULL; - struct expression_function *func = NULL; - struct expression *arg; - GQueue *function_stack; - gchar *p, *c, *str, op, newop, *copy, *next; - gboolean in_regexp = FALSE; - gint brackets = 0; - - enum { - SKIP_SPACES, - READ_OPERATOR, - READ_REGEXP, - READ_REGEXP_FLAGS, - READ_FUNCTION, - READ_FUNCTION_ARGUMENT, - } state = SKIP_SPACES; - - if (line == NULL || pool == NULL) { - return NULL; - } - - msg_debug ("parsing expression {{ %s }}", line); - - function_stack = g_queue_new (); - copy = rspamd_mempool_strdup (pool, line); - p = line; - c = p; - while (*p) { - switch (state) { - case SKIP_SPACES: - if (!g_ascii_isspace (*p)) { - if (is_operation_symbol (p)) { - state = READ_OPERATOR; - } - else if (*p == '/') { - c = ++p; - state = READ_REGEXP; - } - else { - c = p; - state = READ_FUNCTION; - } - } - else { - p++; - } - break; - case READ_OPERATOR: - if (*p == ')') { - if (stack == NULL) { - return NULL; - } - /* Pop all operators from stack to nearest '(' or to head */ - while (stack && stack->op != '(') { - op = delete_expression_stack (&stack); - if (op != '(') { - insert_expression (pool, - &expr, - EXPR_OPERATION, - op, - NULL, - copy); - } - } - if (stack) { - /* Remove open brace itself */ - delete_expression_stack (&stack); - } - } - else if (*p == '(') { - /* Push it to stack */ - stack = push_expression_stack (pool, stack, *p); - } - else { - if (stack == NULL) { - newop = op_to_char (p, &next); - if (newop != '\0') { - stack = push_expression_stack (pool, stack, newop); - p = next; - state = SKIP_SPACES; - continue; - } - } - /* Check priority of logic operation */ - else { - newop = op_to_char (p, &next); - if (newop != '\0') { - if (logic_priority (stack->op) < - logic_priority (newop)) { - stack = push_expression_stack (pool, stack, newop); - } - else { - /* Pop all operations that have higher priority than this one */ - while ((stack != NULL) && - (logic_priority (stack->op) >= - logic_priority (newop))) { - op = delete_expression_stack (&stack); - if (op != '(') { - insert_expression (pool, - &expr, - EXPR_OPERATION, - op, - NULL, - copy); - } - } - stack = push_expression_stack (pool, stack, newop); - } - } - p = next; - state = SKIP_SPACES; - continue; - } - } - p++; - state = SKIP_SPACES; - break; - - case READ_REGEXP: - if (*p == '/' && *(p - 1) != '\\') { - if (*(p + 1)) { - p++; - } - state = READ_REGEXP_FLAGS; - } - else { - p++; - } - break; - - case READ_REGEXP_FLAGS: - if (!is_regexp_flag (*p) || *(p + 1) == '\0') { - if (c != p) { - if ((is_regexp_flag (*p) || *p == - '/') && *(p + 1) == '\0') { - p++; - } - str = rspamd_mempool_alloc (pool, p - c + 2); - rspamd_strlcpy (str, c - 1, (p - c + 2)); - g_strstrip (str); - msg_debug ("found regexp: %s", str); - if (strlen (str) > 0) { - insert_expression (pool, - &expr, - EXPR_REGEXP, - 0, - str, - copy); - } - } - c = p; - state = SKIP_SPACES; - } - else { - p++; - } - break; - - case READ_FUNCTION: - if (*p == '/') { - /* In fact it is regexp */ - state = READ_REGEXP; - c++; - p++; - } - else if (*p == '(') { - func = - rspamd_mempool_alloc (pool, - sizeof (struct expression_function)); - func->name = rspamd_mempool_alloc (pool, p - c + 1); - func->args = NULL; - rspamd_strlcpy (func->name, c, (p - c + 1)); - g_strstrip (func->name); - state = READ_FUNCTION_ARGUMENT; - g_queue_push_tail (function_stack, func); - insert_expression (pool, &expr, EXPR_FUNCTION, 0, func, copy); - c = ++p; - } - else if (is_operation_symbol (p)) { - /* In fact it is not function, but symbol */ - if (c != p) { - str = rspamd_mempool_alloc (pool, p - c + 1); - rspamd_strlcpy (str, c, (p - c + 1)); - g_strstrip (str); - if (strlen (str) > 0) { - insert_expression (pool, &expr, EXPR_STR, 0, str, copy); - } - } - state = READ_OPERATOR; - } - else if (*(p + 1) == '\0') { - /* In fact it is not function, but symbol */ - p++; - if (c != p) { - str = rspamd_mempool_alloc (pool, p - c + 1); - rspamd_strlcpy (str, c, (p - c + 1)); - g_strstrip (str); - if (strlen (str) > 0) { - insert_expression (pool, &expr, EXPR_STR, 0, str, copy); - } - } - state = SKIP_SPACES; - } - else { - p++; - } - break; - - case READ_FUNCTION_ARGUMENT: - if (*p == '/' && !in_regexp) { - in_regexp = TRUE; - p++; - } - if (!in_regexp) { - /* Append argument to list */ - if (*p == ',' || (*p == ')' && brackets == 0)) { - arg = NULL; - str = rspamd_mempool_alloc (pool, p - c + 1); - rspamd_strlcpy (str, c, (p - c + 1)); - g_strstrip (str); - /* Recursive call */ - arg = maybe_parse_expression (pool, str); - func->args = g_list_append (func->args, arg); - /* Pop function */ - if (*p == ')') { - /* Last function in chain, goto skipping spaces state */ - func = g_queue_pop_tail (function_stack); - if (g_queue_get_length (function_stack) == 0) { - state = SKIP_SPACES; - } - } - c = p + 1; - } - else if (*p == '(') { - brackets++; - } - else if (*p == ')') { - brackets--; - } - } - else if (*p == '/' && *(p - 1) != '\\') { - in_regexp = FALSE; - } - p++; - break; - } - } - - g_queue_free (function_stack); - if (state != SKIP_SPACES) { - /* In fact we got bad expression */ - msg_warn ("expression \"%s\" is invalid", line); - return NULL; - } - /* Pop everything from stack */ - while (stack != NULL) { - op = delete_expression_stack (&stack); - if (op != '(') { - insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy); - } - } - - return expr; -} - -/* - * Rspamd regexp utility functions - */ -struct rspamd_regexp_element * -parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode) -{ - const gchar *begin, *end, *p, *src, *start; - gchar *dbegin, *dend; - struct rspamd_regexp_element *result; - rspamd_regexp_t *re; - GError *err = NULL; - GString *re_flags; - - if (line == NULL) { - msg_err ("cannot parse NULL line"); - return NULL; - } - - if ((re = rspamd_regexp_cache_query (NULL, line, NULL)) != NULL) { - return ((struct rspamd_regexp_element *)rspamd_regexp_get_ud (re)); - } - - src = line; - result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_element)); - /* Skip whitespaces */ - while (g_ascii_isspace (*line)) { - line++; - } - if (*line == '\0') { - msg_warn ("got empty regexp"); - return NULL; - } - start = line; - /* First try to find header name */ - begin = strchr (line, '/'); - if (begin != NULL) { - p = begin; - end = NULL; - while (p != line) { - if (*p == '=') { - end = p; - break; - } - p--; - } - if (end) { - result->header = rspamd_mempool_alloc (pool, end - line + 1); - rspamd_strlcpy (result->header, line, end - line + 1); - result->type = REGEXP_HEADER; - line = end; - } - } - else { - result->header = rspamd_mempool_strdup (pool, line); - result->type = REGEXP_HEADER; - line = start; - } - /* Find begin of regexp */ - while (*line && *line != '/') { - line++; - } - if (*line != '\0') { - begin = line + 1; - } - else if (result->header == NULL) { - /* Assume that line without // is just a header name */ - result->header = rspamd_mempool_strdup (pool, line); - result->type = REGEXP_HEADER; - return result; - } - else { - /* We got header name earlier but have not found // expression, so it is invalid regexp */ - msg_warn ( - "got no header name (eg. header=) but without corresponding regexp, %s", - src); - return NULL; - } - /* Find end */ - end = begin; - while (*end && (*end != '/' || *(end - 1) == '\\')) { - end++; - } - if (end == begin || *end != '/') { - msg_warn ("no trailing / in regexp %s", src); - return NULL; - } - /* Parse flags */ - p = end + 1; - re_flags = g_string_sized_new (32); - while (p != NULL) { - switch (*p) { - case 'i': - case 'm': - case 's': - case 'x': - case 'u': - case 'O': - case 'r': - g_string_append_c (re_flags, *p); - p++; - break; - case 'o': - p++; - break; - /* Type flags */ - case 'H': - if (result->type == REGEXP_NONE) { - result->type = REGEXP_HEADER; - } - p++; - break; - case 'M': - if (result->type == REGEXP_NONE) { - result->type = REGEXP_MESSAGE; - } - p++; - break; - case 'P': - if (result->type == REGEXP_NONE) { - result->type = REGEXP_MIME; - } - p++; - break; - case 'U': - if (result->type == REGEXP_NONE) { - result->type = REGEXP_URL; - } - p++; - break; - case 'X': - if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) { - result->type = REGEXP_RAW_HEADER; - } - p++; - break; - case 'T': - result->is_test = TRUE; - p++; - break; - case 'S': - result->is_strong = TRUE; - p++; - break; - /* Stop flags parsing */ - default: - p = NULL; - break; - } - } - - result->regexp_text = rspamd_mempool_strdup (pool, start); - dbegin = result->regexp_text + (begin - start); - dend = result->regexp_text + (end - start); - *dend = '\0'; - - if (raw_mode) { - g_string_append_c (re_flags, 'r'); - } - - result->regexp = rspamd_regexp_new (dbegin, re_flags->str, - &err); - - g_string_free (re_flags, TRUE); - - if (result->regexp == NULL || err != NULL) { - msg_warn ("could not read regexp: %s while reading regexp %s", - err ? err->message : "unknown error", - src); - return NULL; - } - - rspamd_mempool_add_destructor (pool, - (rspamd_mempool_destruct_t) rspamd_regexp_unref, - (void *)result->regexp); - - rspamd_regexp_set_ud (result->regexp, result); - - rspamd_regexp_cache_insert (NULL, line, NULL, result->regexp); - - *dend = '/'; - - return result; -} - -gboolean -call_expression_function (struct expression_function * func, - struct rspamd_task * task, - lua_State *L) -{ - struct _fl *selected, key; - - key.name = func->name; - - selected = bsearch (&key, - list_ptr, - functions_number, - sizeof (struct _fl), - fl_cmp); - if (selected == NULL) { - /* Try to check lua function */ - return FALSE; - } - - return selected->func (task, func->args, selected->user_data); -} - -struct expression_argument * -get_function_arg (struct expression *expr, - struct rspamd_task *task, - gboolean want_string) -{ - GQueue *stack; - gsize cur, op1, op2; - struct expression_argument *res; - struct expression *it; - - if (expr == NULL) { - msg_warn ("NULL expression passed"); - return NULL; - } - if (expr->next == NULL) { - res = - rspamd_mempool_alloc (task->task_pool, - sizeof (struct expression_argument)); - if (expr->type == EXPR_REGEXP || expr->type == EXPR_STR || expr->type == - EXPR_REGEXP_PARSED) { - res->type = EXPRESSION_ARGUMENT_NORMAL; - res->data = expr->content.operand; - } - else if (expr->type == EXPR_FUNCTION && !want_string) { - res->type = EXPRESSION_ARGUMENT_BOOL; - cur = call_expression_function (expr->content.operand, task, NULL); - res->data = GSIZE_TO_POINTER (cur); - } - else { - msg_warn ( - "cannot parse argument: it contains operator or bool expression that is not wanted"); - return NULL; - } - return res; - } - else if (!want_string) { - res = - rspamd_mempool_alloc (task->task_pool, - sizeof (struct expression_argument)); - res->type = EXPRESSION_ARGUMENT_BOOL; - stack = g_queue_new (); - it = expr; - - while (it) { - if (it->type == EXPR_REGEXP || it->type == EXPR_REGEXP_PARSED || - it->type == EXPR_STR) { - g_queue_free (stack); - res->type = EXPRESSION_ARGUMENT_EXPR; - res->data = expr; - return res; - } - else if (it->type == EXPR_FUNCTION) { - cur = - (gsize) call_expression_function ((struct - expression_function - *)it->content.operand, task, NULL); - debug_task ("function %s returned %s", - ((struct expression_function *)it->content.operand)->name, - cur ? "true" : "false"); - } - else if (it->type == EXPR_OPERATION) { - if (g_queue_is_empty (stack)) { - /* Queue has no operands for operation, exiting */ - debug_task ("invalid expression"); - g_queue_free (stack); - return NULL; - } - switch (it->content.operation) { - case '!': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op1 = !op1; - g_queue_push_head (stack, GSIZE_TO_POINTER (op1)); - break; - case '&': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); - break; - case '|': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); - break; - default: - it = it->next; - continue; - } - } - if (it) { - it = it->next; - } - } - if (!g_queue_is_empty (stack)) { - res->data = g_queue_pop_head (stack); - } - else { - res->data = GSIZE_TO_POINTER (FALSE); - } - - return res; - } - - msg_warn ("invalid expression argument"); - - return NULL; -} - -void -register_expression_function (const gchar *name, - rspamd_internal_func_t func, - void *user_data) -{ - static struct _fl *new; - - functions_number++; - - new = g_new (struct _fl, functions_number); - memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl)); - if (list_allocated) { - g_free (list_ptr); - } - - list_allocated = TRUE; - new[functions_number - 1].name = name; - new[functions_number - 1].func = func; - new[functions_number - 1].user_data = user_data; - qsort (new, functions_number, sizeof (struct _fl), fl_cmp); - list_ptr = new; -} - -gboolean -rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused) -{ - struct expression_argument *arg; - - if (args == NULL || task == NULL) { - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - if (arg->type == EXPRESSION_ARGUMENT_BOOL) { - msg_warn ("invalid argument to function is passed"); - return FALSE; - } - - /* XXX: really write this function */ - return TRUE; -} - -gboolean -rspamd_header_exists (struct rspamd_task * task, GList * args, void *unused) -{ - struct expression_argument *arg; - GList *headerlist; - - if (args == NULL || task == NULL) { - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) { - msg_warn ("invalid argument to function is passed"); - return FALSE; - } - - debug_task ("try to get header %s", (gchar *)arg->data); - headerlist = message_get_header (task, - (gchar *)arg->data, - FALSE); - if (headerlist) { - return TRUE; - } - return FALSE; -} - -/* - * This function is designed to find difference between text/html and text/plain parts - * It takes one argument: difference threshold, if we have two text parts, compare - * its hashes and check for threshold, if value is greater than threshold, return TRUE - * and return FALSE otherwise. - */ -gboolean -rspamd_parts_distance (struct rspamd_task * task, GList * args, void *unused) -{ - gint threshold, threshold2 = -1, diff; - struct mime_text_part *p1, *p2; - GList *cur; - struct expression_argument *arg; - GMimeObject *parent; - const GMimeContentType *ct; - gint *pdiff; - - if (args == NULL) { - debug_task ("no threshold is specified, assume it 100"); - threshold = 100; - } - else { - errno = 0; - arg = get_function_arg (args->data, task, TRUE); - threshold = strtoul ((gchar *)arg->data, NULL, 10); - if (errno != 0) { - msg_info ("bad numeric value for threshold \"%s\", assume it 100", - (gchar *)args->data); - threshold = 100; - } - if (args->next) { - arg = get_function_arg (args->next->data, task, TRUE); - errno = 0; - threshold2 = strtoul ((gchar *)arg->data, NULL, 10); - if (errno != 0) { - msg_info ("bad numeric value for threshold \"%s\", ignore it", - (gchar *)arg->data); - threshold2 = -1; - } - } - } - - if ((pdiff = - rspamd_mempool_get_variable (task->task_pool, - "parts_distance")) != NULL) { - diff = *pdiff; - if (diff != -1) { - if (threshold2 > 0) { - if (diff >= - MIN (threshold, - threshold2) && diff < MAX (threshold, threshold2)) { - return TRUE; - } - } - else { - if (diff <= threshold) { - return TRUE; - } - } - return FALSE; - } - else { - return FALSE; - } - } - - if (g_list_length (task->text_parts) == 2) { - cur = g_list_first (task->text_parts); - p1 = cur->data; - cur = g_list_next (cur); - pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint)); - *pdiff = -1; - - if (cur == NULL) { - msg_info ("bad parts list"); - return FALSE; - } - p2 = cur->data; - /* First of all check parent object */ - if (p1->parent && p1->parent == p2->parent) { - parent = p1->parent; - ct = g_mime_object_get_content_type (parent); -#ifndef GMIME24 - if (ct == NULL || - !g_mime_content_type_is_type (ct, "multipart", "alternative")) { -#else - if (ct == NULL || - !g_mime_content_type_is_type ((GMimeContentType *)ct, - "multipart", "alternative")) { -#endif - debug_task ( - "two parts are not belong to multipart/alternative container, skip check"); - rspamd_mempool_set_variable (task->task_pool, - "parts_distance", - pdiff, - NULL); - return FALSE; - } - } - else { - debug_task ( - "message contains two parts but they are in different multi-parts"); - rspamd_mempool_set_variable (task->task_pool, - "parts_distance", - pdiff, - NULL); - return FALSE; - } - if (!p1->is_empty && !p2->is_empty) { - if (p1->diff_str != NULL && p2->diff_str != NULL) { - diff = rspamd_diff_distance_normalized (p1->diff_str, - p2->diff_str); - } - else { - diff = rspamd_fuzzy_compare_parts (p1, p2); - } - debug_task ( - "got likeliness between parts of %d%%, threshold is %d%%", - diff, - threshold); - *pdiff = diff; - rspamd_mempool_set_variable (task->task_pool, - "parts_distance", - pdiff, - NULL); - if (threshold2 > 0) { - if (diff >= - MIN (threshold, - threshold2) && diff < MAX (threshold, threshold2)) { - return TRUE; - } - } - else { - if (diff <= threshold) { - return TRUE; - } - } - } - else if ((p1->is_empty && - !p2->is_empty) || (!p1->is_empty && p2->is_empty)) { - /* Empty and non empty parts are different */ - *pdiff = 0; - rspamd_mempool_set_variable (task->task_pool, - "parts_distance", - pdiff, - NULL); - return TRUE; - } - } - else { - debug_task ( - "message has too many text parts, so do not try to compare them with each other"); - rspamd_mempool_set_variable (task->task_pool, - "parts_distance", - pdiff, - NULL); - return FALSE; - } - - rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, - NULL); - return FALSE; -} - -struct addr_list { - const gchar *name; - const gchar *addr; -}; - -#define COMPARE_RCPT_LEN 3 -#define MIN_RCPT_TO_COMPARE 7 - -gboolean -rspamd_recipients_distance (struct rspamd_task *task, GList * args, - void *unused) -{ - struct expression_argument *arg; - InternetAddressList *cur; - double threshold; - struct addr_list *ar; - gchar *c; - gint num, i, j, hits = 0, total = 0; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - errno = 0; - threshold = strtod ((gchar *)arg->data, NULL); - if (errno != 0) { - msg_warn ("invalid numeric value '%s': %s", - (gchar *)arg->data, - strerror (errno)); - return FALSE; - } - - if (!task->rcpt_mime) { - return FALSE; - } - num = internet_address_list_length (task->rcpt_mime); - if (num < MIN_RCPT_TO_COMPARE) { - return FALSE; - } - ar = - rspamd_mempool_alloc0 (task->task_pool, num * - sizeof (struct addr_list)); - - /* Fill array */ - cur = task->rcpt_mime; -#ifdef GMIME24 - for (i = 0; i < num; i++) { - InternetAddress *iaelt = - internet_address_list_get_address(cur, i); - InternetAddressMailbox *iamb = - INTERNET_ADDRESS_IS_MAILBOX(iaelt) ? - INTERNET_ADDRESS_MAILBOX (iaelt) : NULL; - if (iamb) { - ar[i].name = internet_address_mailbox_get_addr (iamb); - if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) { - ar[i].addr = c + 1; - } - } - } -#else - InternetAddress *addr; - i = 0; - while (cur) { - addr = internet_address_list_get_address (cur); - if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) { - ar[i].name = rspamd_mempool_strdup (task->task_pool, - internet_address_get_addr (addr)); - if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) { - *c = '\0'; - ar[i].addr = c + 1; - } - cur = internet_address_list_next (cur); - i++; - } - else { - cur = internet_address_list_next (cur); - } - } -#endif - - /* Cycle all elements in array */ - for (i = 0; i < num; i++) { - for (j = i + 1; j < num; j++) { - if (ar[i].name && ar[j].name && - g_ascii_strncasecmp (ar[i].name, ar[j].name, - COMPARE_RCPT_LEN) == 0) { - /* Common name part */ - hits++; - } - else if (ar[i].addr && ar[j].addr && - g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) { - /* Common address part, but different name */ - hits++; - } - total++; - } - } - - if ((double)(hits * num / 2.) / (double)total >= threshold) { - return TRUE; - } - - return FALSE; -} - -gboolean -rspamd_has_only_html_part (struct rspamd_task * task, GList * args, - void *unused) -{ - struct mime_text_part *p; - GList *cur; - gboolean res = FALSE; - - cur = g_list_first (task->text_parts); - while (cur) { - p = cur->data; - if (p->is_html) { - res = TRUE; - } - else { - res = FALSE; - break; - } - cur = g_list_next (cur); - } - - return res; -} - -static gboolean -is_recipient_list_sorted (const InternetAddressList * ia) -{ - const InternetAddressList *cur; - InternetAddress *addr; - gboolean res = TRUE; - struct addr_list current = { NULL, NULL }, previous = { - NULL, NULL - }; -#ifdef GMIME24 - gint num, i; -#endif - - /* Do not check to short address lists */ - if (internet_address_list_length ((InternetAddressList *)ia) < - MIN_RCPT_TO_COMPARE) { - return FALSE; - } -#ifdef GMIME24 - num = internet_address_list_length ((InternetAddressList *)ia); - cur = ia; - for (i = 0; i < num; i++) { - addr = - internet_address_list_get_address ((InternetAddressList *)cur, i); - current.addr = (gchar *)internet_address_get_name (addr); - if (previous.addr != NULL) { - if (current.addr && - g_ascii_strcasecmp (current.addr, previous.addr) < 0) { - res = FALSE; - break; - } - } - previous.addr = current.addr; - } -#else - cur = ia; - while (cur) { - addr = internet_address_list_get_address (cur); - if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) { - current.addr = internet_address_get_addr (addr); - if (previous.addr != NULL) { - if (current.addr && - g_ascii_strcasecmp (current.addr, previous.addr) < 0) { - res = FALSE; - break; - } - } - previous.addr = current.addr; - } - cur = internet_address_list_next (cur); - } -#endif - - return res; -} - -gboolean -rspamd_is_recipients_sorted (struct rspamd_task * task, - GList * args, - void *unused) -{ - /* Check all types of addresses */ - if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, - GMIME_RECIPIENT_TYPE_TO)) == TRUE) { - return TRUE; - } - if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, - GMIME_RECIPIENT_TYPE_BCC)) == TRUE) { - return TRUE; - } - if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, - GMIME_RECIPIENT_TYPE_CC)) == TRUE) { - return TRUE; - } - - return FALSE; -} - -gboolean -rspamd_compare_transfer_encoding (struct rspamd_task * task, - GList * args, - void *unused) -{ - GMimeObject *part; -#ifndef GMIME24 - GMimePartEncodingType enc_req, part_enc; -#else - GMimeContentEncoding enc_req, part_enc; -#endif - struct expression_argument *arg; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); -#ifndef GMIME24 - enc_req = g_mime_part_encoding_from_string (arg->data); - if (enc_req == GMIME_PART_ENCODING_DEFAULT) { -#else - enc_req = g_mime_content_encoding_from_string (arg->data); - if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) { -#endif - msg_warn ("bad encoding type: %s", (gchar *)arg->data); - return FALSE; - } - - part = g_mime_message_get_mime_part (task->message); - if (part) { - if (GMIME_IS_PART (part)) { -#ifndef GMIME24 - part_enc = g_mime_part_get_encoding (GMIME_PART (part)); - if (part_enc == GMIME_PART_ENCODING_DEFAULT) { - /* Assume 7bit as default transfer encoding */ - part_enc = GMIME_PART_ENCODING_7BIT; - } -#else - part_enc = g_mime_part_get_content_encoding (GMIME_PART (part)); - if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) { - /* Assume 7bit as default transfer encoding */ - part_enc = GMIME_CONTENT_ENCODING_7BIT; - } -#endif - - - debug_task ("got encoding in part: %d and compare with %d", - (gint)part_enc, - (gint)enc_req); -#ifndef GMIME24 - g_object_unref (part); -#endif - - return part_enc == enc_req; - } -#ifndef GMIME24 - g_object_unref (part); -#endif - } - - return FALSE; -} - -gboolean -rspamd_is_html_balanced (struct rspamd_task * task, GList * args, void *unused) -{ - struct mime_text_part *p; - GList *cur; - gboolean res = TRUE; - - cur = g_list_first (task->text_parts); - while (cur) { - p = cur->data; - if (!p->is_empty && p->is_html) { - if (p->is_balanced) { - res = TRUE; - } - else { - res = FALSE; - break; - } - } - cur = g_list_next (cur); - } - - return res; - -} - -struct html_callback_data { - struct html_tag *tag; - gboolean *res; -}; - -static gboolean -search_html_node_callback (GNode * node, gpointer data) -{ - struct html_callback_data *cd = data; - struct html_node *nd; - - nd = node->data; - if (nd) { - if (nd->tag == cd->tag) { - *cd->res = TRUE; - return TRUE; - } - } - - return FALSE; -} - -gboolean -rspamd_has_html_tag (struct rspamd_task * task, GList * args, void *unused) -{ - struct mime_text_part *p; - GList *cur; - struct expression_argument *arg; - struct html_tag *tag; - gboolean res = FALSE; - struct html_callback_data cd; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - tag = get_tag_by_name (arg->data); - if (tag == NULL) { - msg_warn ("unknown tag type passed as argument: %s", - (gchar *)arg->data); - return FALSE; - } - - cur = g_list_first (task->text_parts); - cd.res = &res; - cd.tag = tag; - - while (cur && res == FALSE) { - p = cur->data; - if (!p->is_empty && p->is_html && p->html_nodes) { - g_node_traverse (p->html_nodes, - G_PRE_ORDER, - G_TRAVERSE_ALL, - -1, - search_html_node_callback, - &cd); - } - cur = g_list_next (cur); - } - - return res; - -} - -gboolean -rspamd_has_fake_html (struct rspamd_task * task, GList * args, void *unused) -{ - struct mime_text_part *p; - GList *cur; - gboolean res = FALSE; - - cur = g_list_first (task->text_parts); - - while (cur && res == FALSE) { - p = cur->data; - if (!p->is_empty && p->is_html && p->html_nodes == NULL) { - res = TRUE; - } - cur = g_list_next (cur); - } - - return res; - -} - - -/* - * vi:ts=4 - */ diff --git a/src/libmime/expressions.h b/src/libmime/expressions.h deleted file mode 100644 index 469cc690d..000000000 --- a/src/libmime/expressions.h +++ /dev/null @@ -1,144 +0,0 @@ -/** - * @file expressions.h - * Rspamd expressions API - */ - -#ifndef RSPAMD_EXPRESSIONS_H -#define RSPAMD_EXPRESSIONS_H - -#include "config.h" -#include <lua.h> - -struct rspamd_task; -struct rspamd_regexp_element; - -/** - * Rspamd expression function - */ -struct expression_function { - gchar *name; /**< name of function */ - GList *args; /**< its args */ -}; - -/** - * Function's argument - */ -struct expression_argument { - enum { - EXPRESSION_ARGUMENT_NORMAL, - EXPRESSION_ARGUMENT_BOOL, - EXPRESSION_ARGUMENT_EXPR, - } type; /**< type of argument (text or other function) */ - void *data; /**< pointer to its data */ -}; - -/** - * Logic expression - */ -struct expression { - enum { - EXPR_REGEXP, - EXPR_OPERATION, - EXPR_FUNCTION, - EXPR_STR, - EXPR_REGEXP_PARSED, - } type; /**< expression type */ - union { - void *operand; - gchar operation; - } content; /**< union for storing operand or operation code */ - const gchar *orig; /**< original line */ - struct expression *next; /**< chain link */ -}; - -typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *, GList *args, - void *user_data); - -/** - * Parse regexp line to regexp structure - * @param pool memory pool to use - * @param line incoming line - * @return regexp structure or NULL in case of error - */ -struct rspamd_regexp_element * parse_regexp (rspamd_mempool_t *pool, - const gchar *line, - gboolean raw_mode); - -/** - * Parse composites line to composites structure (eg. "SYMBOL1&SYMBOL2|!SYMBOL3") - * @param pool memory pool to use - * @param line incoming line - * @return expression structure or NULL in case of error - */ -struct expression * parse_expression (rspamd_mempool_t *pool, gchar *line); - -/** - * Call specified fucntion and return boolean result - * @param func function to call - * @param task task object - * @param L lua specific state - * @return TRUE or FALSE depending on function result - */ -gboolean call_expression_function (struct expression_function *func, - struct rspamd_task *task, - lua_State *L); - -/** - * Register specified function to rspamd internal functions list - * @param name name of function - * @param func pointer to function - */ -void register_expression_function (const gchar *name, - rspamd_internal_func_t func, - void *user_data); - -/** - * Add regexp to regexp cache - * @param line symbolic representation - * @param pointer regexp data - */ -void re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool); - -/** - * Check regexp in cache - * @param line symbolic representation - * @return pointer to regexp data or NULL if regexp is not found - */ -void * re_cache_check (const gchar *line, rspamd_mempool_t *pool); - -/** - * Remove regexp from regexp cache - * @param line symbolic representation - */ -void re_cache_del (const gchar *line, rspamd_mempool_t *pool); - -/** - * Add regexp to regexp task cache - * @param task task object - * @param pointer regexp data - * @param result numeric result of this regexp - */ -void task_cache_add (struct rspamd_task *task, - struct rspamd_regexp_element *re, - gint32 result); - -/** - * Check regexp in cache - * @param task task object - * @param pointer regexp data - * @return numeric result if value exists or -1 if not - */ -gint32 task_cache_check (struct rspamd_task *task, struct rspamd_regexp_element *re); - -/** - * Parse and return a single function argument for a function (may recurse) - * @param expr expression structure that represents function's argument - * @param task task object - * @param want_string return NULL if argument is not a string - * @return expression argument structure or NULL if failed - */ -struct expression_argument * get_function_arg (struct expression *expr, - struct rspamd_task *task, - gboolean want_string); - -#endif diff --git a/src/libmime/filter.c b/src/libmime/filter.c index 7736ba4cf..e107bc56b 100644 --- a/src/libmime/filter.c +++ b/src/libmime/filter.c @@ -29,7 +29,7 @@ #include "message.h" #include "cfg_file.h" #include "util.h" -#include "expressions.h" +#include "expression.h" #include "diff.h" #include "libstat/stat_api.h" @@ -54,6 +54,19 @@ #define BITSPERBYTE (8 * sizeof (gchar)) #define NBYTES(nbits) (((nbits) + BITSPERBYTE - 1) / BITSPERBYTE) +static rspamd_expression_atom_t * rspamd_composite_expr_parse (const gchar *line, gsize len, + rspamd_mempool_t *pool, gpointer ud, GError **err); +static gint rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom); +static gint rspamd_composite_expr_priority (rspamd_expression_atom_t *atom); +static void rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom); + +const struct rspamd_atom_subr composite_expr_subr = { + .parse = rspamd_composite_expr_parse, + .process = rspamd_composite_expr_process, + .priority = rspamd_composite_expr_priority, + .destroy = rspamd_composite_expr_destroy +}; + static inline GQuark filter_error_quark (void) { @@ -398,6 +411,7 @@ rspamd_process_filters (struct rspamd_task *task) struct composites_data { struct rspamd_task *task; + struct rspamd_composite *composite; struct metric_result *metric_res; GTree *symbols_to_remove; guint8 *checked; @@ -409,186 +423,150 @@ struct symbol_remove_data { gboolean remove_symbol; }; -static gint -remove_compare_data (gconstpointer a, gconstpointer b) + +/* + * Composites are just sequences of symbols + */ +static rspamd_expression_atom_t * +rspamd_composite_expr_parse (const gchar *line, gsize len, + rspamd_mempool_t *pool, gpointer ud, GError **err) { - const gchar *ca = a, *cb = b; + gsize clen; + rspamd_expression_atom_t *res; + + clen = strcspn (line, ", \t(+!|&\n"); + if (clen == 0) { + /* Invalid composite atom */ + g_set_error (err, filter_error_quark (), 100, "Invalid composite: %s", + line); + return NULL; + } - return strcmp (ca, cb); -} + res = rspamd_mempool_alloc0 (pool, sizeof (*res)); + res->len = clen; + res->data = rspamd_mempool_strdup (pool, line); -static void -composites_foreach_callback (gpointer key, gpointer value, void *data) + return res; +} +static gint +rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom) { - struct composites_data *cd = (struct composites_data *)data; - struct rspamd_composite *composite = value, *ncomp; - struct expression *expr; - GQueue *stack; - GList *symbols = NULL, *s; - gsize cur, op1, op2; - gchar logbuf[256], *sym, *check_sym; - gint r; - struct symbol *ms; + struct composites_data *cd = (struct composites_data *)input; + const gchar *sym = atom->str; + struct rspamd_composite *ncomp; struct symbol_remove_data *rd; + struct symbol *ms; + gint rc = 0; + gchar t; - - expr = composite->expr; - if (isset (cd->checked, composite->id)) { - /* Symbol was already checked */ - return; + if (isset (cd->checked, cd->composite->id * 2)) { + /* We have already checked this composite, so just return its value */ + rc = isset (cd->checked, cd->composite->id * 2 + 1); + return rc; } - stack = g_queue_new (); + if (*sym == '~' || *sym == '-') { + t = *sym ++; + } - while (expr) { - if (expr->type == EXPR_STR) { - /* Find corresponding symbol */ - sym = expr->content.operand; - if (*sym == '~' || *sym == '-') { - sym++; - } - if (g_hash_table_lookup (cd->metric_res->symbols, sym) == NULL) { - cur = 0; - if ((ncomp = - g_hash_table_lookup (cd->task->cfg->composite_symbols, - sym)) != NULL) { - /* Set checked for this symbol to avoid cyclic references */ - if (isclr (cd->checked, ncomp->id)) { - setbit (cd->checked, composite->id); - composites_foreach_callback (sym, ncomp, cd); - if (g_hash_table_lookup (cd->metric_res->symbols, - sym) != NULL) { - cur = 1; - } - } - } + if ((ms = g_hash_table_lookup (cd->metric_res->symbols, sym)) == NULL) { + if ((ncomp = + g_hash_table_lookup (cd->task->cfg->composite_symbols, + sym)) != NULL) { + /* Set checked for this symbol to avoid cyclic references */ + if (isclr (cd->checked, ncomp->id * 2)) { + setbit (cd->checked, cd->composite->id * 2); + rc = rspamd_process_expression (ncomp->expr, cd); + clrbit (cd->checked, cd->composite->id * 2); + ms = g_hash_table_lookup (cd->metric_res->symbols, sym); } else { - cur = 1; - symbols = g_list_prepend (symbols, expr->content.operand); + /* + * XXX: in case of cyclic references this would return 0 + */ + rc = isset (cd->checked, ncomp->id * 2 + 1); } - g_queue_push_head (stack, GSIZE_TO_POINTER (cur)); + } + } + else { + rc = 1; + } + + if (rc && ms) { + /* + * At this point we know that we need to do something about this symbol, + * however, we don't know whether we need to delete it unfortunately, + * that depends on the later decisions when the complete expression is + * evaluated. + */ + rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*cd)); + rd->ms = ms; + if (G_UNLIKELY (t == '~')) { + rd->remove_weight = FALSE; + rd->remove_symbol = TRUE; + } + else if (G_UNLIKELY (t == '-')) { + rd->remove_symbol = FALSE; + rd->remove_weight = FALSE; } else { - if (g_queue_is_empty (stack)) { - /* Queue has no operands for operation, exiting */ - g_list_free (symbols); - g_queue_free (stack); - setbit (cd->checked, composite->id); - return; - } - switch (expr->content.operation) { - case '!': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op1 = !op1; - g_queue_push_head (stack, GSIZE_TO_POINTER (op1)); - break; - case '&': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); - break; - case '|': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); - break; - default: - expr = expr->next; - continue; - } + rd->remove_symbol = TRUE; + rd->remove_weight = TRUE; } - expr = expr->next; - } - if (!g_queue_is_empty (stack)) { - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - if (op1) { - /* Remove all symbols that are in composite symbol */ - s = g_list_first (symbols); - r = rspamd_snprintf (logbuf, - sizeof (logbuf), - "<%s>, insert symbol %s instead of symbols: ", - cd->task->message_id, - key); - while (s) { - sym = s->data; - if (*sym == '~' || *sym == '-') { - check_sym = sym + 1; - } - else { - check_sym = sym; - } - ms = g_hash_table_lookup (cd->metric_res->symbols, check_sym); - - if (ms == NULL) { - /* Try to process other composites */ - if ((ncomp = - g_hash_table_lookup (cd->task->cfg->composite_symbols, - check_sym)) != NULL) { - /* Set checked for this symbol to avoid cyclic references */ - if (isclr (cd->checked, ncomp->id)) { - setbit (cd->checked, composite->id); - composites_foreach_callback (check_sym, ncomp, cd); - ms = g_hash_table_lookup (cd->metric_res->symbols, - check_sym); - } - } - } - - if (ms != NULL) { - rd = - rspamd_mempool_alloc (cd->task->task_pool, - sizeof (struct symbol_remove_data)); - rd->ms = ms; - if (G_UNLIKELY (*sym == '~')) { - rd->remove_weight = FALSE; - rd->remove_symbol = TRUE; - } - else if (G_UNLIKELY (*sym == '-')) { - rd->remove_symbol = FALSE; - rd->remove_weight = FALSE; - } - else { - rd->remove_symbol = TRUE; - rd->remove_weight = TRUE; - } - if (!g_tree_lookup (cd->symbols_to_remove, ms->name)) { - g_tree_insert (cd->symbols_to_remove, - (gpointer)ms->name, - rd); - } - } - else { - - } - - if (s->next) { - r += rspamd_snprintf (logbuf + r, - sizeof (logbuf) - r, - "%s, ", - s->data); - } - else { - r += rspamd_snprintf (logbuf + r, - sizeof (logbuf) - r, - "%s", - s->data); - } - s = g_list_next (s); - } - /* Add new symbol */ - rspamd_task_insert_result_single (cd->task, key, 1.0, NULL); - msg_info ("%s", logbuf); + if (!g_tree_lookup (cd->symbols_to_remove, ms->name)) { + g_tree_insert (cd->symbols_to_remove, + (gpointer)ms->name, + rd); } } - setbit (cd->checked, composite->id); - g_queue_free (stack); - g_list_free (symbols); + return rc; +} - return; +/* + * We don't have preferences for composites + */ +static gint +rspamd_composite_expr_priority (rspamd_expression_atom_t *atom) +{ + return 0; } +static void +rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom) +{ + /* Composite atoms are destroyed just with the pool */ +} + +static gint +remove_compare_data (gconstpointer a, gconstpointer b) +{ + const gchar *ca = a, *cb = b; + + return strcmp (ca, cb); +} + +static void +composites_foreach_callback (gpointer key, gpointer value, void *data) +{ + struct composites_data *cd = data; + struct rspamd_composite *comp = value; + gint rc; + + cd->composite = comp; + + rc = rspamd_process_expression (comp->expr, cd); + + /* Checked bit */ + setbit (cd->checked, comp->id * 2); + /* Result bit */ + if (rc) { + setbit (cd->checked, comp->id * 2 + 1); + } + else { + clrbit (cd->checked, comp->id * 2 + 1); + } +} static gboolean diff --git a/src/libmime/filter.h b/src/libmime/filter.h index de324caf7..3eef17525 100644 --- a/src/libmime/filter.h +++ b/src/libmime/filter.h @@ -71,11 +71,16 @@ struct metric_result { double grow_factor; /**< current grow factor */ }; + +/** + * Subr for composite expressions + */ +extern const struct rspamd_atom_subr composite_expr_subr; /** * Composite structure */ struct rspamd_composite { - struct expression *expr; + struct rspamd_expression *expr; gint id; }; diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c new file mode 100644 index 000000000..841610480 --- /dev/null +++ b/src/libmime/mime_expressions.c @@ -0,0 +1,2234 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "util.h" +#include "cfg_file.h" +#include "main.h" +#include "message.h" +#include "fuzzy.h" +#include "mime_expressions.h" +#include "html.h" +#include "lua/lua_common.h" +#include "diff.h" + +gboolean rspamd_compare_encoding (struct rspamd_task *task, + GArray * args, + void *unused); +gboolean rspamd_header_exists (struct rspamd_task *task, + GArray * args, + void *unused); +gboolean rspamd_parts_distance (struct rspamd_task *task, + GArray * args, + void *unused); +gboolean rspamd_recipients_distance (struct rspamd_task *task, + GArray * args, + void *unused); +gboolean rspamd_has_only_html_part (struct rspamd_task *task, + GArray * args, + void *unused); +gboolean rspamd_is_recipients_sorted (struct rspamd_task *task, + GArray * args, + void *unused); +gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task, + GArray * args, + void *unused); +gboolean rspamd_is_html_balanced (struct rspamd_task *task, + GArray * args, + void *unused); +gboolean rspamd_has_html_tag (struct rspamd_task *task, + GArray * args, + void *unused); +gboolean rspamd_has_fake_html (struct rspamd_task *task, + GArray * args, + void *unused); +static gboolean rspamd_raw_header_exists (struct rspamd_task *task, + GArray * args, + void *unused); +static gboolean rspamd_check_smtp_data (struct rspamd_task *task, + GArray * args, + void *unused); +static gboolean rspamd_content_type_is_type (struct rspamd_task * task, + GArray * args, + void *unused); +static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task, + GArray * args, + void *unused); +static gboolean rspamd_content_type_has_param (struct rspamd_task * task, + GArray * args, + void *unused); +static gboolean rspamd_content_type_compare_param (struct rspamd_task * task, + GArray * args, + void *unused); +static gboolean rspamd_has_content_part (struct rspamd_task *task, + GArray * args, + void *unused); +static gboolean rspamd_has_content_part_len (struct rspamd_task *task, + GArray * args, + void *unused); + +static rspamd_expression_atom_t * rspamd_mime_expr_parse (const gchar *line, gsize len, + rspamd_mempool_t *pool, gpointer ud, GError **err); +static gint rspamd_mime_expr_process (gpointer input, rspamd_expression_atom_t *atom); +static gint rspamd_mime_expr_priority (rspamd_expression_atom_t *atom); +static void rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom); + +/** + * Regexp type: /H - header, /M - mime, /U - url /X - raw header + */ +enum rspamd_regexp_type { + REGEXP_NONE = 0, + REGEXP_HEADER, + REGEXP_MIME, + REGEXP_MESSAGE, + REGEXP_URL, + REGEXP_RAW_HEADER +}; + +/** + * Regexp structure + */ +struct rspamd_regexp_atom { + enum rspamd_regexp_type type; /**< regexp type */ + gchar *regexp_text; /**< regexp text representation */ + rspamd_regexp_t *regexp; /**< regexp structure */ + gchar *header; /**< header name for header regexps */ + gboolean is_test; /**< true if this expression must be tested */ + gboolean is_strong; /**< true if headers search must be case sensitive */ + gboolean is_multiple; /**< true if we need to match all inclusions of atom */ +}; + +/** + * Rspamd expression function + */ +struct rspamd_function_atom { + gchar *name; /**< name of function */ + GArray *args; /**< its args */ +}; + +struct rspamd_mime_atom { + gchar *str; + union { + struct rspamd_regexp_atom *re; + struct rspamd_function_atom *func; + } d; + gboolean is_function; +}; + +/* + * List of internal functions of rspamd + * Sorted by name to use bsearch + */ +static struct _fl { + const gchar *name; + rspamd_internal_func_t func; + void *user_data; +} rspamd_functions_list[] = { + {"check_smtp_data", rspamd_check_smtp_data, NULL}, + {"compare_encoding", rspamd_compare_encoding, NULL}, + {"compare_parts_distance", rspamd_parts_distance, NULL}, + {"compare_recipients_distance", rspamd_recipients_distance, NULL}, + {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL}, + {"content_type_compare_param", rspamd_content_type_compare_param, NULL}, + {"content_type_has_param", rspamd_content_type_has_param, NULL}, + {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL}, + {"content_type_is_type", rspamd_content_type_is_type, NULL}, + {"has_content_part", rspamd_has_content_part, NULL}, + {"has_content_part_len", rspamd_has_content_part_len, NULL}, + {"has_fake_html", rspamd_has_fake_html, NULL}, + {"has_html_tag", rspamd_has_html_tag, NULL}, + {"has_only_html_part", rspamd_has_only_html_part, NULL}, + {"header_exists", rspamd_header_exists, NULL}, + {"is_html_balanced", rspamd_is_html_balanced, NULL}, + {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL}, + {"raw_header_exists", rspamd_raw_header_exists, NULL} +}; + +const struct rspamd_atom_subr mime_expr_subr = { + .parse = rspamd_mime_expr_parse, + .process = rspamd_mime_expr_process, + .priority = rspamd_mime_expr_priority, + .destroy = rspamd_mime_expr_destroy +}; + +static struct _fl *list_ptr = &rspamd_functions_list[0]; +static guint32 functions_number = sizeof (rspamd_functions_list) / + sizeof (struct _fl); +static gboolean list_allocated = FALSE; +static guint max_re_data = 0; + +/* Bsearch routine */ +static gint +fl_cmp (const void *s1, const void *s2) +{ + struct _fl *fl1 = (struct _fl *)s1; + struct _fl *fl2 = (struct _fl *)s2; + return strcmp (fl1->name, fl2->name); +} + +static GQuark +rspamd_mime_expr_quark (void) +{ + return g_quark_from_static_string ("mime-expressions"); +} + +/* + * Rspamd regexp utility functions + */ +static struct rspamd_regexp_atom * +rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line) +{ + const gchar *begin, *end, *p, *src, *start; + gchar *dbegin, *dend; + struct rspamd_regexp_atom *result; + rspamd_regexp_t *re; + GError *err = NULL; + GString *re_flags; + + if (line == NULL) { + msg_err ("cannot parse NULL line"); + return NULL; + } + + if ((re = rspamd_regexp_cache_query (NULL, line, NULL)) != NULL) { + return ((struct rspamd_regexp_atom *)rspamd_regexp_get_ud (re)); + } + + src = line; + result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_atom)); + /* Skip whitespaces */ + while (g_ascii_isspace (*line)) { + line++; + } + if (*line == '\0') { + msg_warn ("got empty regexp"); + return NULL; + } + start = line; + /* First try to find header name */ + begin = strchr (line, '/'); + if (begin != NULL) { + p = begin; + end = NULL; + while (p != line) { + if (*p == '=') { + end = p; + break; + } + p--; + } + if (end) { + result->header = rspamd_mempool_alloc (pool, end - line + 1); + rspamd_strlcpy (result->header, line, end - line + 1); + result->type = REGEXP_HEADER; + line = end; + } + } + else { + result->header = rspamd_mempool_strdup (pool, line); + result->type = REGEXP_HEADER; + line = start; + } + /* Find begin of regexp */ + while (*line && *line != '/') { + line++; + } + if (*line != '\0') { + begin = line + 1; + } + else if (result->header == NULL) { + /* Assume that line without // is just a header name */ + result->header = rspamd_mempool_strdup (pool, line); + result->type = REGEXP_HEADER; + return result; + } + else { + /* We got header name earlier but have not found // expression, so it is invalid regexp */ + msg_warn ( + "got no header name (eg. header=) but without corresponding regexp, %s", + src); + return NULL; + } + /* Find end */ + end = begin; + while (*end && (*end != '/' || *(end - 1) == '\\')) { + end++; + } + if (end == begin || *end != '/') { + msg_warn ("no trailing / in regexp %s", src); + return NULL; + } + /* Parse flags */ + p = end + 1; + re_flags = g_string_sized_new (32); + while (p != NULL) { + switch (*p) { + case 'i': + case 'm': + case 's': + case 'x': + case 'u': + case 'O': + case 'r': + g_string_append_c (re_flags, *p); + p++; + break; + case 'o': + p++; + break; + /* Type flags */ + case 'H': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_HEADER; + } + p++; + break; + case 'M': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_MESSAGE; + } + p++; + break; + case 'P': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_MIME; + } + p++; + break; + case 'U': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_URL; + } + p++; + break; + case 'X': + if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) { + result->type = REGEXP_RAW_HEADER; + } + p++; + break; + case 'T': + result->is_test = TRUE; + p++; + break; + case 'S': + result->is_strong = TRUE; + p++; + break; + case 'A': + result->is_multiple = TRUE; + p++; + break; + /* Stop flags parsing */ + default: + p = NULL; + break; + } + } + + result->regexp_text = rspamd_mempool_strdup (pool, start); + dbegin = result->regexp_text + (begin - start); + dend = result->regexp_text + (end - start); + *dend = '\0'; + + result->regexp = rspamd_regexp_new (dbegin, re_flags->str, + &err); + + g_string_free (re_flags, TRUE); + + if (result->regexp == NULL || err != NULL) { + msg_warn ("could not read regexp: %s while reading regexp %s", + err ? err->message : "unknown error", + src); + return NULL; + } + + rspamd_mempool_add_destructor (pool, + (rspamd_mempool_destruct_t) rspamd_regexp_unref, + (void *)result->regexp); + + rspamd_regexp_set_ud (result->regexp, result); + + rspamd_regexp_cache_insert (NULL, line, NULL, result->regexp); + + *dend = '/'; + + return result; +} + +struct rspamd_function_atom * +rspamd_mime_expr_parse_function_atom (const gchar *input) +{ + const gchar *obrace, *ebrace, *p, *c; + gchar t, *databuf; + struct rspamd_function_atom *res; + struct expression_argument arg; + GError *err = NULL; + enum { + start_read_argument = 0, + in_string, + in_regexp, + got_backslash, + got_comma + } state, prev_state = 0; + + obrace = strchr (input, '('); + ebrace = strrchr (input, ')'); + + g_assert (obrace != NULL && ebrace != NULL); + + res = g_slice_alloc0 (sizeof (*res)); + res->name = g_malloc (obrace - input + 1); + rspamd_strlcpy (res->name, input, obrace - input + 1); + res->args = g_array_new (FALSE, FALSE, sizeof (struct expression_argument)); + + p = obrace + 1; + c = p; + state = start_read_argument; + + /* Read arguments */ + while (p <= ebrace) { + t = *p; + switch (state) { + case start_read_argument: + if (t == '/') { + state = in_regexp; + c = p; + } + else if (!g_ascii_isspace (t)) { + state = in_string; + c = p; + } + p ++; + break; + case in_regexp: + if (t == '\\') { + state = got_backslash; + prev_state = in_regexp; + } + else if (t == ',' || p == ebrace) { + databuf = g_malloc (p - c + 1); + rspamd_strlcpy (databuf, c, p - c + 1); + arg.type = EXPRESSION_ARGUMENT_REGEXP; + arg.data = rspamd_regexp_cache_create (NULL, databuf, NULL, &err); + + if (arg.data == NULL) { + /* Fallback to string */ + msg_warn ("cannot parse slashed argument %s as regexp: %s", + databuf, err->message); + g_error_free (err); + arg.type = EXPRESSION_ARGUMENT_NORMAL; + arg.data = databuf; + } + else { + g_free (databuf); + } + + g_array_append_val (res->args, arg); + } + p ++; + break; + case in_string: + if (t == '\\') { + state = got_backslash; + prev_state = in_string; + } + else if (t == ',' || p == ebrace) { + databuf = g_malloc (p - c + 1); + rspamd_strlcpy (databuf, c, p - c + 1); + arg.type = EXPRESSION_ARGUMENT_NORMAL; + arg.data = databuf; + g_array_append_val (res->args, arg); + } + p ++; + break; + case got_backslash: + state = prev_state; + p ++; + break; + case got_comma: + state = start_read_argument; + break; + } + } + + return res; +} + +static rspamd_expression_atom_t * +rspamd_mime_expr_parse (const gchar *line, gsize len, + rspamd_mempool_t *pool, gpointer ud, GError **err) +{ + rspamd_expression_atom_t *a = NULL; + struct rspamd_mime_atom *mime_atom = NULL; + const gchar *p, *end; + gchar t; + gboolean is_function = FALSE; + enum { + in_header = 0, + got_slash, + in_regexp, + got_backslash, + got_second_slash, + in_flags, + got_obrace, + in_function, + got_ebrace, + end_atom, + bad_atom + } state = 0, prev_state = 0; + + p = line; + end = p + len; + + while (p < end) { + t = *p; + + switch (state) { + case in_header: + if (t == '/') { + /* Regexp */ + state = got_slash; + } + else if (t == '(') { + /* Function */ + state = got_obrace; + } + else if (g_ascii_isspace (t)) { + state = bad_atom; + } + p ++; + break; + case got_slash: + state = in_regexp; + break; + case in_regexp: + if (t == '\\') { + state = got_backslash; + prev_state = in_regexp; + } + else if (t == '/') { + state = got_second_slash; + } + p ++; + break; + case got_second_slash: + state = in_flags; + break; + case in_flags: + if (!g_ascii_isalpha (t)) { + state = end_atom; + } + else { + p ++; + } + break; + case got_backslash: + state = prev_state; + p ++; + break; + case got_obrace: + state = in_function; + is_function = TRUE; + break; + case in_function: + if (t == '\\') { + state = got_backslash; + prev_state = in_function; + } + else if (t == ')') { + state = got_ebrace; + } + p ++; + break; + case got_ebrace: + state = end_atom; + break; + case bad_atom: + g_set_error (err, rspamd_mime_expr_quark(), 100, "cannot parse" + " mime atom '%*.s' when reading symbol '%c'", (gint)len, line, t); + return NULL; + case end_atom: + goto set; + } + } +set: + + if (p - line == 0 || (state != got_ebrace || state != got_second_slash || + state != in_flags)) { + g_set_error (err, rspamd_mime_expr_quark(), 200, "inclomplete or empty" + " mime atom"); + return NULL; + } + + mime_atom = g_slice_alloc (sizeof (*mime_atom)); + mime_atom->is_function = is_function; + mime_atom->str = g_malloc (p - line + 1); + rspamd_strlcpy (mime_atom->str, line, p - line + 1); + + if (!is_function) { + mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom (pool, + mime_atom->str); + if (mime_atom->d.re == NULL) { + g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse regexp '%s'", + mime_atom->str); + goto err; + } + } + else { + mime_atom->d.func = rspamd_mime_expr_parse_function_atom (mime_atom->str); + if (mime_atom->d.func == NULL) { + g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse function '%s'", + mime_atom->str); + goto err; + } + } + + a = rspamd_mempool_alloc (pool, sizeof (*a)); + a->len = p - line; + a->priority = 0; + a->data = mime_atom; + + return a; + +err: + if (mime_atom != NULL) { + g_free (mime_atom->str); + g_slice_free1 (sizeof (*mime_atom), mime_atom); + } + + return NULL; +} + +static gint +rspamd_mime_regexp_element_process (struct rspamd_task *task, + struct rspamd_regexp_atom *re, const gchar *data, gsize len, + gboolean raw) +{ + guint r = 0; + const gchar *start = NULL, *end = NULL; + + if ((r = rspamd_task_re_cache_check (task, re->regexp_text)) != + RSPAMD_TASK_CACHE_NO_VALUE) { + debug_task ("regexp /%s/ is found in cache, result: %d", + re->regexp_text, r); + return r; + } + + if (len == 0) { + len = strlen (data); + } + + if (max_re_data != 0 && len > max_re_data) { + msg_info ("<%s> skip data of size %Hud", + task->message_id, + len); + + return 0; + } + + while (rspamd_regexp_search (re->regexp, data, len, &start, &end, raw)) { + if (G_UNLIKELY (re->is_test)) { + msg_info ( + "process test regexp %s for header %s with value '%s' returned TRUE", + re->regexp_text, + re->header, + data); + } + r++; + + if (!re->is_multiple) { + break; + } + } + + if (r > 0) { + rspamd_task_re_cache_add (task, re->regexp_text, r); + } + + return r; +} + +struct url_regexp_param { + struct rspamd_task *task; + rspamd_regexp_t *regexp; + struct rspamd_regexp_atom *re; + gboolean found; +}; + +static gboolean +tree_url_callback (gpointer key, gpointer value, void *data) +{ + struct url_regexp_param *param = data; + struct rspamd_url *url = value; + + if (rspamd_mime_regexp_element_process (param->task, param->re, + struri (url), 0, FALSE)) { + param->found = TRUE; + return TRUE; + } + else if (G_UNLIKELY (param->re->is_test)) { + msg_info ("process test regexp %s for url %s returned FALSE", + struri (url)); + } + + return FALSE; +} + +static gint +rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re, + struct rspamd_task *task) +{ + guint8 *ct; + gsize clen; + gboolean raw = FALSE; + const gchar *in; + + GList *cur, *headerlist; + rspamd_regexp_t *regexp; + struct url_regexp_param callback_param = { + .task = task, + .re = re, + .found = FALSE + }; + struct mime_text_part *part; + struct raw_header *rh; + + if (re == NULL) { + msg_info ("invalid regexp passed"); + return 0; + } + + callback_param.regexp = re->regexp; + + + switch (re->type) { + case REGEXP_NONE: + msg_warn ("bad error detected: %s has invalid regexp type", + re->regexp_text); + break; + case REGEXP_HEADER: + case REGEXP_RAW_HEADER: + /* Check header's name */ + if (re->header == NULL) { + msg_info ("header regexp without header name: '%s'", + re->regexp_text); + rspamd_task_re_cache_add (task, re->regexp_text, 0); + return 0; + } + debug_task ("checking %s header regexp: %s = %s", + re->type == REGEXP_RAW_HEADER ? "raw" : "decoded", + re->header, + re->regexp_text); + + /* Get list of specified headers */ + headerlist = message_get_header (task, + re->header, + re->is_strong); + if (headerlist == NULL) { + /* Header is not found */ + if (G_UNLIKELY (re->is_test)) { + msg_info ( + "process test regexp %s for header %s returned FALSE: no header found", + re->regexp_text, + re->header); + } + rspamd_task_re_cache_add (task, re->regexp_text, 0); + return 0; + } + else { + /* Check whether we have regexp for it */ + if (re->regexp == NULL) { + debug_task ("regexp contains only header and it is found %s", + re->header); + rspamd_task_re_cache_add (task, re->regexp_text, 1); + return 1; + } + /* Iterate through headers */ + cur = headerlist; + while (cur) { + rh = cur->data; + debug_task ("found header \"%s\" with value \"%s\"", + re->header, rh->decoded); + regexp = re->regexp; + + if (re->type == REGEXP_RAW_HEADER) { + in = rh->value; + raw = TRUE; + } + else { + in = rh->decoded; + /* Validate input */ + if (!in || !g_utf8_validate (in, -1, NULL)) { + cur = g_list_next (cur); + continue; + } + } + + /* Match re */ + if (in && rspamd_mime_regexp_element_process (task, re, in, + strlen (in), raw)) { + + return 1; + } + + cur = g_list_next (cur); + } + + rspamd_task_re_cache_add (task, re->regexp_text, 0); + } + break; + case REGEXP_MIME: + debug_task ("checking mime regexp: %s", re->regexp_text); + /* Iterate throught text parts */ + cur = g_list_first (task->text_parts); + while (cur) { + part = (struct mime_text_part *)cur->data; + /* Skip empty parts */ + if (part->is_empty) { + cur = g_list_next (cur); + continue; + } + + /* Check raw flags */ + if (part->is_raw) { + raw = TRUE; + } + /* Select data for regexp */ + if (raw) { + ct = part->orig->data; + clen = part->orig->len; + } + else { + ct = part->content->data; + clen = part->content->len; + } + /* If we have limit, apply regexp so much times as we can */ + if (rspamd_mime_regexp_element_process (task, re, ct, clen, raw)) { + return 1; + } + cur = g_list_next (cur); + } + rspamd_task_re_cache_add (task, re->regexp_text, 0); + break; + case REGEXP_MESSAGE: + debug_task ("checking message regexp: %s", re->regexp_text); + raw = TRUE; + ct = (guint8 *)task->msg.start; + clen = task->msg.len; + + if (rspamd_mime_regexp_element_process (task, re, ct, clen, raw)) { + return 1; + } + rspamd_task_re_cache_add (task, re->regexp_text, 0); + break; + case REGEXP_URL: + debug_task ("checking url regexp: %s", re->regexp_text); + regexp = re->regexp; + callback_param.task = task; + callback_param.regexp = regexp; + callback_param.re = re; + callback_param.found = FALSE; + if (task->urls) { + g_tree_foreach (task->urls, tree_url_callback, &callback_param); + } + if (task->emails && callback_param.found == FALSE) { + g_tree_foreach (task->emails, tree_url_callback, &callback_param); + } + if (callback_param.found == FALSE) { + rspamd_task_re_cache_add (task, re->regexp_text, 0); + } + break; + default: + msg_warn ("bad error detected: %p is not a valid regexp object", re); + break; + } + + return 0; +} + + +static gint +rspamd_mime_expr_priority (rspamd_expression_atom_t *atom) +{ + /* TODO: implement priorities for mime expressions */ + return 0; +} + +static void +rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom) +{ + struct rspamd_mime_atom *mime_atom = atom->data; + guint i; + struct expression_argument *arg; + + if (mime_atom) { + if (mime_atom->is_function) { + /* Need to cleanup arguments */ + for (i = 0; i < mime_atom->d.func->args->len; i ++) { + arg = &g_array_index (mime_atom->d.func->args, + struct expression_argument, i); + + if (arg->type == EXPRESSION_ARGUMENT_NORMAL) { + g_free (arg->data); + } + } + g_array_free (mime_atom->d.func->args, TRUE); + } + /* XXX: regexp shouldn't be special */ + g_slice_free1 (sizeof (*mime_atom), mime_atom); + } +} + +static gboolean +rspamd_mime_expr_process_function (struct rspamd_function_atom * func, + struct rspamd_task * task, + lua_State *L) +{ + struct _fl *selected, key; + + key.name = func->name; + + selected = bsearch (&key, + list_ptr, + functions_number, + sizeof (struct _fl), + fl_cmp); + if (selected == NULL) { + /* Try to check lua function */ + return FALSE; + } + + return selected->func (task, func->args, selected->user_data); +} + +static gint +rspamd_mime_expr_process (gpointer input, rspamd_expression_atom_t *atom) +{ + struct rspamd_task *task = input; + struct rspamd_mime_atom *mime_atom; + gint ret = 0; + + g_assert (task != NULL); + g_assert (atom != NULL); + + mime_atom = atom->data; + + if (!mime_atom->is_function) { + ret = rspamd_mime_expr_process_regexp (mime_atom->d.re, task); + } + else { + ret = rspamd_mime_expr_process_function (mime_atom->d.func, task, + task->cfg->lua_state); + } + + return ret; +} + +void +register_expression_function (const gchar *name, + rspamd_internal_func_t func, + void *user_data) +{ + static struct _fl *new; + + functions_number++; + + new = g_new (struct _fl, functions_number); + memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl)); + if (list_allocated) { + g_free (list_ptr); + } + + list_allocated = TRUE; + new[functions_number - 1].name = name; + new[functions_number - 1].func = func; + new[functions_number - 1].user_data = user_data; + qsort (new, functions_number, sizeof (struct _fl), fl_cmp); + list_ptr = new; +} + +gboolean +rspamd_compare_encoding (struct rspamd_task *task, GArray * args, void *unused) +{ + struct expression_argument *arg; + + if (args == NULL || task == NULL) { + return FALSE; + } + + arg = &g_array_index (args, struct expression_argument, 0); + if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + + /* XXX: really write this function */ + return TRUE; +} + +gboolean +rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused) +{ + struct expression_argument *arg; + GList *headerlist; + + if (args == NULL || task == NULL) { + return FALSE; + } + + arg = &g_array_index (args, struct expression_argument, 0); + if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + + debug_task ("try to get header %s", (gchar *)arg->data); + headerlist = message_get_header (task, + (gchar *)arg->data, + FALSE); + if (headerlist) { + return TRUE; + } + return FALSE; +} + +/* + * This function is designed to find difference between text/html and text/plain parts + * It takes one argument: difference threshold, if we have two text parts, compare + * its hashes and check for threshold, if value is greater than threshold, return TRUE + * and return FALSE otherwise. + */ +gboolean +rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused) +{ + gint threshold, threshold2 = -1, diff; + struct mime_text_part *p1, *p2; + GList *cur; + struct expression_argument *arg; + GMimeObject *parent; + const GMimeContentType *ct; + gint *pdiff; + + if (args == NULL || args->len == 0) { + debug_task ("no threshold is specified, assume it 100"); + threshold = 100; + } + else { + errno = 0; + arg = &g_array_index (args, struct expression_argument, 0); + if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + + threshold = strtoul ((gchar *)arg->data, NULL, 10); + if (errno != 0) { + msg_info ("bad numeric value for threshold \"%s\", assume it 100", + (gchar *)arg->data); + threshold = 100; + } + if (args->len == 1) { + arg = &g_array_index (args, struct expression_argument, 1); + if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + + errno = 0; + threshold2 = strtoul ((gchar *)arg->data, NULL, 10); + if (errno != 0) { + msg_info ("bad numeric value for threshold \"%s\", ignore it", + (gchar *)arg->data); + threshold2 = -1; + } + } + } + + if ((pdiff = + rspamd_mempool_get_variable (task->task_pool, + "parts_distance")) != NULL) { + diff = *pdiff; + if (diff != -1) { + if (threshold2 > 0) { + if (diff >= + MIN (threshold, + threshold2) && diff < MAX (threshold, threshold2)) { + return TRUE; + } + } + else { + if (diff <= threshold) { + return TRUE; + } + } + return FALSE; + } + else { + return FALSE; + } + } + + if (g_list_length (task->text_parts) == 2) { + cur = g_list_first (task->text_parts); + p1 = cur->data; + cur = g_list_next (cur); + pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint)); + *pdiff = -1; + + if (cur == NULL) { + msg_info ("bad parts list"); + return FALSE; + } + p2 = cur->data; + /* First of all check parent object */ + if (p1->parent && p1->parent == p2->parent) { + parent = p1->parent; + ct = g_mime_object_get_content_type (parent); +#ifndef GMIME24 + if (ct == NULL || + !g_mime_content_type_is_type (ct, "multipart", "alternative")) { +#else + if (ct == NULL || + !g_mime_content_type_is_type ((GMimeContentType *)ct, + "multipart", "alternative")) { +#endif + debug_task ( + "two parts are not belong to multipart/alternative container, skip check"); + rspamd_mempool_set_variable (task->task_pool, + "parts_distance", + pdiff, + NULL); + return FALSE; + } + } + else { + debug_task ( + "message contains two parts but they are in different multi-parts"); + rspamd_mempool_set_variable (task->task_pool, + "parts_distance", + pdiff, + NULL); + return FALSE; + } + if (!p1->is_empty && !p2->is_empty) { + if (p1->diff_str != NULL && p2->diff_str != NULL) { + diff = rspamd_diff_distance_normalized (p1->diff_str, + p2->diff_str); + } + else { + diff = rspamd_fuzzy_compare_parts (p1, p2); + } + debug_task ( + "got likeliness between parts of %d%%, threshold is %d%%", + diff, + threshold); + *pdiff = diff; + rspamd_mempool_set_variable (task->task_pool, + "parts_distance", + pdiff, + NULL); + if (threshold2 > 0) { + if (diff >= + MIN (threshold, + threshold2) && diff < MAX (threshold, threshold2)) { + return TRUE; + } + } + else { + if (diff <= threshold) { + return TRUE; + } + } + } + else if ((p1->is_empty && + !p2->is_empty) || (!p1->is_empty && p2->is_empty)) { + /* Empty and non empty parts are different */ + *pdiff = 0; + rspamd_mempool_set_variable (task->task_pool, + "parts_distance", + pdiff, + NULL); + return TRUE; + } + } + else { + debug_task ( + "message has too many text parts, so do not try to compare them with each other"); + rspamd_mempool_set_variable (task->task_pool, + "parts_distance", + pdiff, + NULL); + return FALSE; + } + + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, + NULL); + return FALSE; +} + +struct addr_list { + const gchar *name; + const gchar *addr; +}; + +#define COMPARE_RCPT_LEN 3 +#define MIN_RCPT_TO_COMPARE 7 + +gboolean +rspamd_recipients_distance (struct rspamd_task *task, GArray * args, + void *unused) +{ + struct expression_argument *arg; + InternetAddressList *cur; + double threshold; + struct addr_list *ar; + gchar *c; + gint num, i, j, hits = 0, total = 0; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = &g_array_index (args, struct expression_argument, 0); + if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + + errno = 0; + threshold = strtod ((gchar *)arg->data, NULL); + + if (errno != 0) { + msg_warn ("invalid numeric value '%s': %s", + (gchar *)arg->data, + strerror (errno)); + return FALSE; + } + + if (!task->rcpt_mime) { + return FALSE; + } + + num = internet_address_list_length (task->rcpt_mime); + + if (num < MIN_RCPT_TO_COMPARE) { + return FALSE; + } + ar = + rspamd_mempool_alloc0 (task->task_pool, num * + sizeof (struct addr_list)); + + /* Fill array */ + cur = task->rcpt_mime; +#ifdef GMIME24 + for (i = 0; i < num; i++) { + InternetAddress *iaelt = + internet_address_list_get_address(cur, i); + InternetAddressMailbox *iamb = + INTERNET_ADDRESS_IS_MAILBOX(iaelt) ? + INTERNET_ADDRESS_MAILBOX (iaelt) : NULL; + if (iamb) { + ar[i].name = internet_address_mailbox_get_addr (iamb); + if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) { + ar[i].addr = c + 1; + } + } + } +#else + InternetAddress *addr; + i = 0; + while (cur) { + addr = internet_address_list_get_address (cur); + if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) { + ar[i].name = rspamd_mempool_strdup (task->task_pool, + internet_address_get_addr (addr)); + if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) { + *c = '\0'; + ar[i].addr = c + 1; + } + cur = internet_address_list_next (cur); + i++; + } + else { + cur = internet_address_list_next (cur); + } + } +#endif + + /* Cycle all elements in array */ + for (i = 0; i < num; i++) { + for (j = i + 1; j < num; j++) { + if (ar[i].name && ar[j].name && + g_ascii_strncasecmp (ar[i].name, ar[j].name, + COMPARE_RCPT_LEN) == 0) { + /* Common name part */ + hits++; + } + else if (ar[i].addr && ar[j].addr && + g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) { + /* Common address part, but different name */ + hits++; + } + total++; + } + } + + if ((double)(hits * num / 2.) / (double)total >= threshold) { + return TRUE; + } + + return FALSE; +} + +gboolean +rspamd_has_only_html_part (struct rspamd_task * task, GArray * args, + void *unused) +{ + struct mime_text_part *p; + GList *cur; + gboolean res = FALSE; + + cur = g_list_first (task->text_parts); + while (cur) { + p = cur->data; + if (p->is_html) { + res = TRUE; + } + else { + res = FALSE; + break; + } + cur = g_list_next (cur); + } + + return res; +} + +static gboolean +is_recipient_list_sorted (const InternetAddressList * ia) +{ + const InternetAddressList *cur; + InternetAddress *addr; + gboolean res = TRUE; + struct addr_list current = { NULL, NULL }, previous = { + NULL, NULL + }; +#ifdef GMIME24 + gint num, i; +#endif + + /* Do not check to short address lists */ + if (internet_address_list_length ((InternetAddressList *)ia) < + MIN_RCPT_TO_COMPARE) { + return FALSE; + } +#ifdef GMIME24 + num = internet_address_list_length ((InternetAddressList *)ia); + cur = ia; + for (i = 0; i < num; i++) { + addr = + internet_address_list_get_address ((InternetAddressList *)cur, i); + current.addr = (gchar *)internet_address_get_name (addr); + if (previous.addr != NULL) { + if (current.addr && + g_ascii_strcasecmp (current.addr, previous.addr) < 0) { + res = FALSE; + break; + } + } + previous.addr = current.addr; + } +#else + cur = ia; + while (cur) { + addr = internet_address_list_get_address (cur); + if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) { + current.addr = internet_address_get_addr (addr); + if (previous.addr != NULL) { + if (current.addr && + g_ascii_strcasecmp (current.addr, previous.addr) < 0) { + res = FALSE; + break; + } + } + previous.addr = current.addr; + } + cur = internet_address_list_next (cur); + } +#endif + + return res; +} + +gboolean +rspamd_is_recipients_sorted (struct rspamd_task * task, + GArray * args, + void *unused) +{ + /* Check all types of addresses */ + if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, + GMIME_RECIPIENT_TYPE_TO)) == TRUE) { + return TRUE; + } + if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, + GMIME_RECIPIENT_TYPE_BCC)) == TRUE) { + return TRUE; + } + if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, + GMIME_RECIPIENT_TYPE_CC)) == TRUE) { + return TRUE; + } + + return FALSE; +} + +gboolean +rspamd_compare_transfer_encoding (struct rspamd_task * task, + GArray * args, + void *unused) +{ + GMimeObject *part; +#ifndef GMIME24 + GMimePartEncodingType enc_req, part_enc; +#else + GMimeContentEncoding enc_req, part_enc; +#endif + struct expression_argument *arg; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = &g_array_index (args, struct expression_argument, 0); + if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + +#ifndef GMIME24 + enc_req = g_mime_part_encoding_from_string (arg->data); + if (enc_req == GMIME_PART_ENCODING_DEFAULT) { +#else + enc_req = g_mime_content_encoding_from_string (arg->data); + if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) { +#endif + msg_warn ("bad encoding type: %s", (gchar *)arg->data); + return FALSE; + } + + part = g_mime_message_get_mime_part (task->message); + if (part) { + if (GMIME_IS_PART (part)) { +#ifndef GMIME24 + part_enc = g_mime_part_get_encoding (GMIME_PART (part)); + if (part_enc == GMIME_PART_ENCODING_DEFAULT) { + /* Assume 7bit as default transfer encoding */ + part_enc = GMIME_PART_ENCODING_7BIT; + } +#else + part_enc = g_mime_part_get_content_encoding (GMIME_PART (part)); + if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) { + /* Assume 7bit as default transfer encoding */ + part_enc = GMIME_CONTENT_ENCODING_7BIT; + } +#endif + + + debug_task ("got encoding in part: %d and compare with %d", + (gint)part_enc, + (gint)enc_req); +#ifndef GMIME24 + g_object_unref (part); +#endif + + return part_enc == enc_req; + } +#ifndef GMIME24 + g_object_unref (part); +#endif + } + + return FALSE; +} + +gboolean +rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + gboolean res = TRUE; + + cur = g_list_first (task->text_parts); + while (cur) { + p = cur->data; + if (!p->is_empty && p->is_html) { + if (p->is_balanced) { + res = TRUE; + } + else { + res = FALSE; + break; + } + } + cur = g_list_next (cur); + } + + return res; + +} + +struct html_callback_data { + struct html_tag *tag; + gboolean *res; +}; + +static gboolean +search_html_node_callback (GNode * node, gpointer data) +{ + struct html_callback_data *cd = data; + struct html_node *nd; + + nd = node->data; + if (nd) { + if (nd->tag == cd->tag) { + *cd->res = TRUE; + return TRUE; + } + } + + return FALSE; +} + +gboolean +rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + struct expression_argument *arg; + struct html_tag *tag; + gboolean res = FALSE; + struct html_callback_data cd; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = &g_array_index (args, struct expression_argument, 0); + if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + + tag = get_tag_by_name (arg->data); + if (tag == NULL) { + msg_warn ("unknown tag type passed as argument: %s", + (gchar *)arg->data); + return FALSE; + } + + cur = g_list_first (task->text_parts); + cd.res = &res; + cd.tag = tag; + + while (cur && res == FALSE) { + p = cur->data; + if (!p->is_empty && p->is_html && p->html_nodes) { + g_node_traverse (p->html_nodes, + G_PRE_ORDER, + G_TRAVERSE_ALL, + -1, + search_html_node_callback, + &cd); + } + cur = g_list_next (cur); + } + + return res; + +} + +gboolean +rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + gboolean res = FALSE; + + cur = g_list_first (task->text_parts); + + while (cur && res == FALSE) { + p = cur->data; + if (!p->is_empty && p->is_html && p->html_nodes == NULL) { + res = TRUE; + } + cur = g_list_next (cur); + } + + return res; + +} + +static gboolean +rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused) +{ + struct expression_argument *arg; + + if (args == NULL || task == NULL) { + return FALSE; + } + + arg = &g_array_index (args, struct expression_argument, 0); + if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + + return g_hash_table_lookup (task->raw_headers, arg->data) != NULL; +} + +static gboolean +match_smtp_data (struct rspamd_task *task, + struct expression_argument *arg, + const gchar *what) +{ + rspamd_regexp_t *re; + gint r; + + if (arg->type == EXPRESSION_ARGUMENT_REGEXP) { + /* This is a regexp */ + re = arg->data; + if (re == NULL) { + msg_warn ("cannot compile regexp for function"); + return FALSE; + } + + if ((r = rspamd_task_re_cache_check (task, + rspamd_regexp_get_pattern (re))) == -1) { + r = rspamd_regexp_search (re, what, 0, NULL, NULL, FALSE); + rspamd_task_re_cache_add (task, rspamd_regexp_get_pattern (re), r); + } + return r; + } + else if (arg->type == EXPRESSION_ARGUMENT_NORMAL && + g_ascii_strcasecmp (arg->data, what) == 0) { + return TRUE; + } + + return FALSE; +} + +static gboolean +rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused) +{ + struct expression_argument *arg; + InternetAddressList *ia = NULL; + const gchar *type, *what = NULL; + gint i, ialen; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = &g_array_index (args, struct expression_argument, 0); + + if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) { + msg_warn ("no parameters to function"); + return FALSE; + } + else { + type = arg->data; + switch (*type) { + case 'f': + case 'F': + if (g_ascii_strcasecmp (type, "from") == 0) { + what = rspamd_task_get_sender (task); + } + else { + msg_warn ("bad argument to function: %s", type); + return FALSE; + } + break; + case 'h': + case 'H': + if (g_ascii_strcasecmp (type, "helo") == 0) { + what = task->helo; + } + else { + msg_warn ("bad argument to function: %s", type); + return FALSE; + } + break; + case 'u': + case 'U': + if (g_ascii_strcasecmp (type, "user") == 0) { + what = task->user; + } + else { + msg_warn ("bad argument to function: %s", type); + return FALSE; + } + break; + case 's': + case 'S': + if (g_ascii_strcasecmp (type, "subject") == 0) { + what = task->subject; + } + else { + msg_warn ("bad argument to function: %s", type); + return FALSE; + } + break; + case 'r': + case 'R': + if (g_ascii_strcasecmp (type, "rcpt") == 0) { + ia = task->rcpt_mime; + } + else { + msg_warn ("bad argument to function: %s", type); + return FALSE; + } + break; + default: + msg_warn ("bad argument to function: %s", type); + return FALSE; + } + } + + if (what == NULL && ia == NULL) { + /* Not enough data so regexp would NOT be found anyway */ + return FALSE; + } + + /* We would process only one more argument, others are ignored */ + if (args->len >= 2) { + arg = &g_array_index (args, struct expression_argument, 1); + if (arg) { + if (what != NULL) { + return match_smtp_data (task, arg, what); + } + else { + if (ia != NULL) { + ialen = internet_address_list_length(ia); + for (i = 0; i < ialen; i ++) { + InternetAddress *iaelt = + internet_address_list_get_address(ia, i); + InternetAddressMailbox *iamb = + INTERNET_ADDRESS_IS_MAILBOX(iaelt) ? + INTERNET_ADDRESS_MAILBOX (iaelt) : NULL; + if (iamb && + match_smtp_data (task, arg, + internet_address_mailbox_get_addr(iamb))) { + return TRUE; + } + } + } + } + } + } + + return FALSE; +} + +static gboolean +rspamd_content_type_compare_param (struct rspamd_task * task, + GArray * args, + void *unused) +{ + const gchar *param_name; + const gchar *param_data; + rspamd_regexp_t *re; + struct expression_argument *arg, *arg1, *arg_pattern; + GMimeObject *part; + GMimeContentType *ct; + gint r; + gboolean recursive = FALSE, result = FALSE; + GList *cur = NULL; + struct mime_part *cur_part; + + if (args == NULL || args->len < 2) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = &g_array_index (args, struct expression_argument, 0); + g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL); + param_name = arg->data; + arg_pattern = &g_array_index (args, struct expression_argument, 1); + + + part = g_mime_message_get_mime_part (task->message); + if (part) { + ct = (GMimeContentType *)g_mime_object_get_content_type (part); + if (args->len >= 3) { + arg1 = &g_array_index (args, struct expression_argument, 2); + if (g_ascii_strncasecmp (arg1->data, "true", + sizeof ("true") - 1) == 0) { + recursive = TRUE; + } + } + else { + /* + * If user did not specify argument, let's assume that he wants + * recursive search if mime part is multipart/mixed + */ + if (g_mime_content_type_is_type (ct, "multipart", "*")) { + recursive = TRUE; + } + } + + if (recursive) { + cur = task->parts; + } + +#ifndef GMIME24 + g_object_unref (part); +#endif + for (;; ) { + if ((param_data = + g_mime_content_type_get_parameter ((GMimeContentType *)ct, + param_name)) == NULL) { + result = FALSE; + } + else { + if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) { + re = arg_pattern->data; + + if ((r = rspamd_task_re_cache_check (task, + rspamd_regexp_get_pattern (re))) == -1) { + r = rspamd_regexp_search (re, param_data, 0, + NULL, NULL, FALSE); + rspamd_task_re_cache_add (task, + rspamd_regexp_get_pattern (re), r); + } + } + else { + /* Just do strcasecmp */ + if (g_ascii_strcasecmp (param_data, arg_pattern->data) == 0) { + return TRUE; + } + } + } + /* Get next part */ + if (!recursive) { + return result; + } + else if (cur != NULL) { + cur_part = cur->data; + if (cur_part->type != NULL) { + ct = cur_part->type; + } + cur = g_list_next (cur); + } + else { + /* All is done */ + return result; + } + } + } + + return FALSE; +} + +static gboolean +rspamd_content_type_has_param (struct rspamd_task * task, + GArray * args, + void *unused) +{ + gchar *param_name; + const gchar *param_data; + struct expression_argument *arg, *arg1; + GMimeObject *part; + GMimeContentType *ct; + gboolean recursive = FALSE, result = FALSE; + GList *cur = NULL; + struct mime_part *cur_part; + + if (args == NULL || args->len < 1) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = &g_array_index (args, struct expression_argument, 0); + g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL); + param_name = arg->data; + + part = g_mime_message_get_mime_part (task->message); + if (part) { + ct = (GMimeContentType *)g_mime_object_get_content_type (part); + if (args->len >= 2) { + arg1 = &g_array_index (args, struct expression_argument, 2); + if (g_ascii_strncasecmp (arg1->data, "true", + sizeof ("true") - 1) == 0) { + recursive = TRUE; + } + } + else { + /* + * If user did not specify argument, let's assume that he wants + * recursive search if mime part is multipart/mixed + */ + if (g_mime_content_type_is_type (ct, "multipart", "*")) { + recursive = TRUE; + } + } + + if (recursive) { + cur = task->parts; + } + +#ifndef GMIME24 + g_object_unref (part); +#endif + for (;; ) { + if ((param_data = + g_mime_content_type_get_parameter ((GMimeContentType *)ct, + param_name)) != NULL) { + return TRUE; + } + /* Get next part */ + if (!recursive) { + return result; + } + else if (cur != NULL) { + cur_part = cur->data; + if (cur_part->type != NULL) { + ct = cur_part->type; + } + cur = g_list_next (cur); + } + else { + /* All is done */ + return result; + } + } + + } + + return TRUE; +} + +static gboolean +rspamd_content_type_check (struct rspamd_task *task, + GArray * args, + gboolean check_subtype) +{ + const gchar *param_data; + rspamd_regexp_t *re; + struct expression_argument *arg1, *arg_pattern; + GMimeObject *part; + GMimeContentType *ct; + gint r; + gboolean recursive = FALSE, result = FALSE; + GList *cur = NULL; + struct mime_part *cur_part; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + arg_pattern = &g_array_index (args, struct expression_argument, 1); + + part = g_mime_message_get_mime_part (task->message); + if (part) { + ct = (GMimeContentType *)g_mime_object_get_content_type (part); + if (args->len >= 2) { + arg1 = &g_array_index (args, struct expression_argument, 2); + if (g_ascii_strncasecmp (arg1->data, "true", + sizeof ("true") - 1) == 0) { + recursive = TRUE; + } + } + else { + /* + * If user did not specify argument, let's assume that he wants + * recursive search if mime part is multipart/mixed + */ + if (g_mime_content_type_is_type (ct, "multipart", "*")) { + recursive = TRUE; + } + } + + if (recursive) { + cur = task->parts; + } + +#ifndef GMIME24 + g_object_unref (part); +#endif + for (;;) { + + if (check_subtype) { + param_data = ct->subtype; + } + else { + param_data = ct->type; + } + + if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) { + re = arg_pattern->data; + + if ((r = rspamd_task_re_cache_check (task, + rspamd_regexp_get_pattern (re))) == -1) { + r = rspamd_regexp_search (re, param_data, 0, + NULL, NULL, FALSE); + rspamd_task_re_cache_add (task, + rspamd_regexp_get_pattern (re), r); + } + } + else { + /* Just do strcasecmp */ + if (g_ascii_strcasecmp (param_data, arg_pattern->data) == 0) { + return TRUE; + } + } + /* Get next part */ + if (!recursive) { + return result; + } + else if (cur != NULL) { + cur_part = cur->data; + if (cur_part->type != NULL) { + ct = cur_part->type; + } + cur = g_list_next (cur); + } + else { + /* All is done */ + return result; + } + } + + } + + return FALSE; +} + +static gboolean +rspamd_content_type_is_type (struct rspamd_task * task, + GArray * args, + void *unused) +{ + return rspamd_content_type_check (task, args, FALSE); +} + +static gboolean +rspamd_content_type_is_subtype (struct rspamd_task * task, + GArray * args, + void *unused) +{ + return rspamd_content_type_check (task, args, TRUE); +} + +static gboolean +compare_subtype (struct rspamd_task *task, GMimeContentType * ct, + struct expression_argument *subtype) +{ + rspamd_regexp_t *re; + gint r = 0; + + if (subtype == NULL || ct == NULL) { + msg_warn ("invalid parameters passed"); + return FALSE; + } + if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) { + re = subtype->data; + + if ((r = rspamd_task_re_cache_check (task, + rspamd_regexp_get_pattern (re))) == -1) { + r = rspamd_regexp_search (re, ct->subtype, 0, + NULL, NULL, FALSE); + rspamd_task_re_cache_add (task, + rspamd_regexp_get_pattern (re), r); + } + } + else { + /* Just do strcasecmp */ + if (ct->subtype && g_ascii_strcasecmp (ct->subtype, subtype->data) == 0) { + return TRUE; + } + } + + return r; +} + +static gboolean +compare_len (struct mime_part *part, guint min, guint max) +{ + if (min == 0 && max == 0) { + return TRUE; + } + + if (min == 0) { + return part->content->len <= max; + } + else if (max == 0) { + return part->content->len >= min; + } + else { + return part->content->len >= min && part->content->len <= max; + } +} + +static gboolean +common_has_content_part (struct rspamd_task * task, + struct expression_argument *param_type, + struct expression_argument *param_subtype, + gint min_len, + gint max_len) +{ + rspamd_regexp_t *re; + struct mime_part *part; + GList *cur; + GMimeContentType *ct; + gint r; + + cur = g_list_first (task->parts); + while (cur) { + part = cur->data; + ct = part->type; + if (ct == NULL) { + cur = g_list_next (cur); + continue; + } + + if (param_type->type == EXPRESSION_ARGUMENT_REGEXP) { + re = param_type->data; + + if ((r = rspamd_task_re_cache_check (task, + rspamd_regexp_get_pattern (re))) == -1) { + r = rspamd_regexp_search (re, ct->type, 0, + NULL, NULL, FALSE); + /* Also check subtype and length of the part */ + if (r && param_subtype) { + r = compare_len (part, min_len, max_len) && + compare_subtype (task, ct, param_subtype); + } + rspamd_task_re_cache_add (task, + rspamd_regexp_get_pattern (re), r); + } + } + else { + /* Just do strcasecmp */ + if (ct->type && g_ascii_strcasecmp (ct->type, param_type->data) == 0) { + if (param_subtype) { + if (compare_subtype (task, ct, param_subtype)) { + if (compare_len (part, min_len, max_len)) { + return TRUE; + } + } + } + else { + if (compare_len (part, min_len, max_len)) { + return TRUE; + } + } + } + } + cur = g_list_next (cur); + } + + return FALSE; +} + +static gboolean +rspamd_has_content_part (struct rspamd_task * task, GArray * args, void *unused) +{ + struct expression_argument *param_type = NULL, *param_subtype = NULL; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + param_type = &g_array_index (args, struct expression_argument, 0); + if (args->len >= 2) { + param_subtype = &g_array_index (args, struct expression_argument, 1); + } + + return common_has_content_part (task, param_type, param_subtype, 0, 0); +} + +static gboolean +rspamd_has_content_part_len (struct rspamd_task * task, + GArray * args, + void *unused) +{ + struct expression_argument *param_type = NULL, *param_subtype = NULL; + gint min = 0, max = 0; + struct expression_argument *arg; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + param_type = &g_array_index (args, struct expression_argument, 0); + + if (args->len >= 2) { + param_subtype = &g_array_index (args, struct expression_argument, 1); + + if (args->len >= 3) { + arg = &g_array_index (args, struct expression_argument, 2); + errno = 0; + min = strtoul (arg->data, NULL, 10); + g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL); + + if (errno != 0) { + msg_warn ("invalid numeric value '%s': %s", + (gchar *)arg->data, + strerror (errno)); + return FALSE; + } + + if (args) { + arg = &g_array_index (args, struct expression_argument, 3); + g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL); + max = strtoul (arg->data, NULL, 10); + + if (errno != 0) { + msg_warn ("invalid numeric value '%s': %s", + (gchar *)arg->data, + strerror (errno)); + return FALSE; + } + } + } + } + + return common_has_content_part (task, param_type, param_subtype, min, max); +} + +guint +rspamd_mime_expression_set_re_limit (guint limit) +{ + guint ret = max_re_data; + + max_re_data = limit; + return ret; +} diff --git a/src/libmime/mime_expressions.h b/src/libmime/mime_expressions.h new file mode 100644 index 000000000..41e8b33ac --- /dev/null +++ b/src/libmime/mime_expressions.h @@ -0,0 +1,49 @@ +/** + * @file expressions.h + * Rspamd expressions API + */ + +#ifndef RSPAMD_EXPRESSIONS_H +#define RSPAMD_EXPRESSIONS_H + +#include "config.h" +#include "expression.h" + +struct rspamd_task; + +extern const struct rspamd_atom_subr mime_expr_subr; + +/** + * Function's argument + */ +struct expression_argument { + enum { + EXPRESSION_ARGUMENT_NORMAL = 0, + EXPRESSION_ARGUMENT_BOOL, + EXPRESSION_ARGUMENT_REGEXP + } type; /**< type of argument (text or other function) */ + void *data; /**< pointer to its data */ +}; + + +typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *, + GArray *args, void *user_data); + + +/** + * Register specified function to rspamd internal functions list + * @param name name of function + * @param func pointer to function + */ +void register_expression_function (const gchar *name, + rspamd_internal_func_t func, + void *user_data); + +/** + * Set global limit of regexp data size to be processed + * @param limit new limit in bytes + * @return old limit value + */ +guint rspamd_mime_expression_set_re_limit (guint limit); + +#endif diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index 08b70f5c9..06232cff8 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -43,18 +43,6 @@ enum rspamd_cred_type { }; /** - * Regexp type: /H - header, /M - mime, /U - url /X - raw header - */ -enum rspamd_regexp_type { - REGEXP_NONE = 0, - REGEXP_HEADER, - REGEXP_MIME, - REGEXP_MESSAGE, - REGEXP_URL, - REGEXP_RAW_HEADER -}; - -/** * Logging type */ enum rspamd_log_type { @@ -64,18 +52,6 @@ enum rspamd_log_type { }; /** - * Regexp structure - */ -struct rspamd_regexp_element { - enum rspamd_regexp_type type; /**< regexp type */ - gchar *regexp_text; /**< regexp text representation */ - rspamd_regexp_t *regexp; /**< regexp structure */ - gchar *header; /**< header name for header regexps */ - gboolean is_test; /**< true if this expression must be tested */ - gboolean is_strong; /**< true if headers search must be case sensitive */ -}; - -/** * script module list item */ struct script_module { diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index 69d11f756..3cb252ae6 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -27,7 +27,7 @@ #include "utlist.h" #include "cfg_file.h" #include "lua/lua_common.h" -#include "expressions.h" +#include "expression.h" struct rspamd_rcl_default_handler_data { @@ -1000,7 +1000,7 @@ rspamd_rcl_composite_handler (struct rspamd_config *cfg, GError **err) { const ucl_object_t *val; - struct expression *expr; + struct rspamd_expression *expr; struct rspamd_composite *composite; const gchar *composite_name, *composite_expression; gboolean new = TRUE; @@ -1028,14 +1028,8 @@ rspamd_rcl_composite_handler (struct rspamd_config *cfg, return FALSE; } - if ((expr = - parse_expression (cfg->cfg_pool, - (gchar *)composite_expression)) == NULL) { - g_set_error (err, - CFG_RCL_ERROR, - EINVAL, - "cannot parse composite expression: %s", - composite_expression); + if (!rspamd_parse_expression (composite_expression, 0, &composite_expr_subr, + NULL, cfg->cfg_pool, err, &expr)) { return FALSE; } diff --git a/src/libserver/task.c b/src/libserver/task.c index f6eeef2b1..9ac66b65c 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -450,3 +450,37 @@ rspamd_task_add_sender (struct rspamd_task *task, const gchar *sender) return FALSE; } + + +guint +rspamd_task_re_cache_add (struct rspamd_task *task, const gchar *re, + guint value) +{ + guint ret = RSPAMD_TASK_CACHE_NO_VALUE; + gpointer p; + + p = g_hash_table_lookup (task->re_cache, re); + + if (p != NULL) { + ret = GPOINTER_TO_INT (p); + } + + g_hash_table_insert (task->re_cache, (gpointer)re, GINT_TO_POINTER (value)); + + return ret; +} + +guint +rspamd_task_re_cache_check (struct rspamd_task *task, const gchar *re) +{ + guint ret = RSPAMD_TASK_CACHE_NO_VALUE; + gpointer p; + + p = g_hash_table_lookup (task->re_cache, re); + + if (p != NULL) { + ret = GPOINTER_TO_INT (p); + } + + return ret; +} diff --git a/src/libserver/task.h b/src/libserver/task.h index 799182f01..21df26535 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -218,5 +218,24 @@ gboolean rspamd_task_add_recipient (struct rspamd_task *task, const gchar *rcpt) */ gboolean rspamd_task_add_sender (struct rspamd_task *task, const gchar *sender); +#define RSPAMD_TASK_CACHE_NO_VALUE ((guint)-1) + +/** + * Add or replace the value to the task cache of regular expressions results + * @param task task object + * @param re text value of regexp + * @param value value to add + * @return previous value of element or RSPAMD_TASK_CACHE_NO_VALUE + */ +guint rspamd_task_re_cache_add (struct rspamd_task *task, const gchar *re, + guint value); + +/** + * Check for cached result of re inside cache + * @param task task object + * @param re text value of regexp + * @return the current value of element or RSPAMD_TASK_CACHE_NO_VALUE + */ +guint rspamd_task_re_cache_check (struct rspamd_task *task, const gchar *re); #endif /* TASK_H_ */ diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c index b8bd4454c..bf1a1762d 100644 --- a/src/libutil/regexp.c +++ b/src/libutil/regexp.c @@ -436,7 +436,7 @@ rspamd_regexp_get_ud (rspamd_regexp_t *re) return re->ud; } -static gboolean +gboolean rspamd_regexp_equal (gconstpointer a, gconstpointer b) { const guchar *ia = a, *ib = b; @@ -444,7 +444,7 @@ rspamd_regexp_equal (gconstpointer a, gconstpointer b) return (memcmp (ia, ib, sizeof (regexp_id_t)) == 0); } -static guint32 +guint32 rspamd_regexp_hash (gconstpointer a) { const guchar *ia = a; diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h index 3c08de71a..fc236c1b3 100644 --- a/src/libutil/regexp.h +++ b/src/libutil/regexp.h @@ -90,6 +90,13 @@ void rspamd_regexp_set_ud (rspamd_regexp_t *re, gpointer ud); gpointer rspamd_regexp_get_ud (rspamd_regexp_t *re); /** + * Get regexp ID suitable for hashing + * @param re + * @return + */ +gpointer rspamd_regexp_get_id (rspamd_regexp_t *re); + +/** * Get pattern for the specified regexp object * @param re * @return @@ -152,6 +159,21 @@ gboolean rspamd_regexp_cache_remove (struct rspamd_regexp_cache *cache, void rspamd_regexp_cache_destroy (struct rspamd_regexp_cache *cache); /** + * Return the value for regexp hash based on its ID + * @param a + * @return + */ +guint32 rspamd_regexp_hash (gconstpointer a); + +/** + * Compare two regexp objects based on theirs ID + * @param a + * @param b + * @return + */ +gboolean rspamd_regexp_equal (gconstpointer a, gconstpointer b); + +/** * Initialize superglobal regexp cache and library */ void rspamd_regexp_library_init (void); diff --git a/src/lua/lua_cfg_file.c b/src/lua/lua_cfg_file.c index c29b4b2b1..c73173f97 100644 --- a/src/lua/lua_cfg_file.c +++ b/src/lua/lua_cfg_file.c @@ -23,8 +23,9 @@ */ #include "lua_common.h" -#include "expressions.h" #include "symbols_cache.h" +#include "expression.h" +#include "filter.h" #ifdef HAVE_SYS_UTSNAME_H #include <sys/utsname.h> #endif @@ -140,9 +141,10 @@ rspamd_lua_post_load_config (struct rspamd_config *cfg) lua_State *L = cfg->lua_state; const gchar *name, *val; gchar *sym; - struct expression *expr, *old_expr; + struct rspamd_expression *expr, *old_expr; ucl_object_t *obj; gsize keylen; + GError *err = NULL; /* First check all module options that may be overriden in 'config' global */ lua_getglobal (L, "config"); @@ -193,10 +195,12 @@ rspamd_lua_post_load_config (struct rspamd_config *cfg) if (name != NULL && lua_isstring (L, -1)) { val = lua_tostring (L, -1); sym = rspamd_mempool_strdup (cfg->cfg_pool, name); - if ((expr = - parse_expression (cfg->cfg_pool, - rspamd_mempool_strdup (cfg->cfg_pool, val))) == NULL) { - msg_err ("cannot parse composite expression: %s", val); + if (!rspamd_parse_expression (val, 0, &composite_expr_subr, NULL, + cfg->cfg_pool, &err, &expr)) { + msg_err ("cannot parse composite expression '%s': %s", val, + err->message); + g_error_free (err); + err = NULL; continue; } /* Now check hash table for this composite */ diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index 63d34e4e6..d970325d1 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -23,7 +23,6 @@ */ #include "lua_common.h" -#include "expressions.h" /* Lua module init function */ #define MODULE_INIT_FUNC "module_init" diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c index 5116fff93..6f77612fb 100644 --- a/src/lua/lua_config.c +++ b/src/lua/lua_config.c @@ -24,11 +24,11 @@ #include "lua_common.h" -#include "expressions.h" #include "map.h" #include "message.h" #include "radix.h" #include "trie.h" +#include "expression.h" /*** * This module is used to configure rspamd and is normally available as global @@ -70,27 +70,6 @@ LUA_FUNCTION_DEF (config, get_all_opt); */ LUA_FUNCTION_DEF (config, get_mempool); /*** - * @method rspamd_config:register_function(name, callback) - * Registers new rspamd function that could be used in symbols expressions - * @param {string} name name of function - * @param {function} callback callback to be called - * @example - -local function lua_header_exists(task, hname) - if task:get_raw_header(hname) then - return true - end - - return false -end - -rspamd_config:register_function('lua_header_exists', lua_header_exists) - --- Further in configuration it would be possible to define symbols like: --- HAS_CONTENT_TYPE = 'lua_header_exists(Content-Type)' - */ -LUA_FUNCTION_DEF (config, register_function); -/*** * @method rspamd_config:add_radix_map(mapline[, description]) * Creates new dynamic map of IP/mask addresses. * @param {string} mapline URL for a map @@ -314,7 +293,6 @@ static const struct luaL_reg configlib_m[] = { LUA_INTERFACE_DEF (config, get_module_opt), LUA_INTERFACE_DEF (config, get_mempool), LUA_INTERFACE_DEF (config, get_all_opt), - LUA_INTERFACE_DEF (config, register_function), LUA_INTERFACE_DEF (config, add_radix_map), LUA_INTERFACE_DEF (config, radix_from_config), LUA_INTERFACE_DEF (config, add_hash_map), @@ -532,90 +510,6 @@ lua_destroy_cfg_symbol (gpointer ud) } } -static gboolean -lua_config_function_callback (struct rspamd_task *task, - GList *args, - void *user_data) -{ - struct lua_callback_data *cd = user_data; - struct rspamd_task **ptask; - gint i = 1; - struct expression_argument *arg; - GList *cur; - gboolean res = FALSE; - - if (cd->cb_is_ref) { - lua_rawgeti (cd->L, LUA_REGISTRYINDEX, cd->callback.ref); - } - else { - lua_getglobal (cd->L, cd->callback.name); - } - ptask = lua_newuserdata (cd->L, sizeof (struct rspamd_task *)); - rspamd_lua_setclass (cd->L, "rspamd{task}", -1); - *ptask = task; - /* Now push all arguments */ - cur = args; - while (cur) { - arg = get_function_arg (cur->data, task, TRUE); - lua_pushstring (cd->L, (const gchar *)arg->data); - cur = g_list_next (cur); - i++; - } - - if (lua_pcall (cd->L, i, 1, 0) != 0) { - msg_info ("error processing symbol %s: call to %s failed: %s", - cd->symbol, - cd->cb_is_ref ? "local function" : - cd->callback.name, - lua_tostring (cd->L, -1)); - } - else { - if (lua_isboolean (cd->L, 1)) { - res = lua_toboolean (cd->L, 1); - } - lua_pop (cd->L, 1); - } - - return res; -} - -static gint -lua_config_register_function (lua_State *L) -{ - struct rspamd_config *cfg = lua_check_config (L); - gchar *name; - struct lua_callback_data *cd; - - if (cfg) { - name = rspamd_mempool_strdup (cfg->cfg_pool, luaL_checkstring (L, 2)); - cd = - rspamd_mempool_alloc (cfg->cfg_pool, - sizeof (struct lua_callback_data)); - - if (lua_type (L, 3) == LUA_TSTRING) { - cd->callback.name = rspamd_mempool_strdup (cfg->cfg_pool, - luaL_checkstring (L, 3)); - cd->cb_is_ref = FALSE; - } - else { - lua_pushvalue (L, 3); - /* Get a reference */ - cd->callback.ref = luaL_ref (L, LUA_REGISTRYINDEX); - cd->cb_is_ref = TRUE; - } - if (name) { - cd->L = L; - cd->symbol = name; - register_expression_function (name, lua_config_function_callback, - cd); - } - rspamd_mempool_add_destructor (cfg->cfg_pool, - (rspamd_mempool_destruct_t)lua_destroy_cfg_symbol, - cd); - } - return 1; -} - static gint lua_config_register_module_option (lua_State *L) { @@ -1238,7 +1132,7 @@ static gint lua_config_add_composite (lua_State * L) { struct rspamd_config *cfg = lua_check_config (L); - struct expression *expr; + struct rspamd_expression *expr; gchar *name; const gchar *expr_str; struct rspamd_composite *composite; @@ -1249,8 +1143,8 @@ lua_config_add_composite (lua_State * L) expr_str = luaL_checkstring (L, 3); if (name && expr_str) { - expr = parse_expression (cfg->cfg_pool, (gchar *)expr_str); - if (expr == NULL) { + if (!rspamd_parse_expression (expr_str, 0, &composite_expr_subr, + NULL, cfg->cfg_pool, NULL, &expr)) { msg_err ("cannot parse composite expression %s", expr_str); } else { diff --git a/src/lua/lua_regexp.c b/src/lua/lua_regexp.c index 6fa0de772..58073b3bc 100644 --- a/src/lua/lua_regexp.c +++ b/src/lua/lua_regexp.c @@ -22,7 +22,6 @@ */ #include "lua_common.h" -#include "expressions.h" #include "regexp.h" /*** diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index e74d7e71c..b15d3e181 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -25,7 +25,6 @@ #include "lua_common.h" #include "message.h" -#include "expressions.h" #include "protocol.h" #include "filter.h" #include "dns.h" @@ -273,15 +272,7 @@ LUA_FUNCTION_DEF (task, get_resolver); * Increment number of DNS requests for the task. Is used just for logging purposes. */ LUA_FUNCTION_DEF (task, inc_dns_req); -/*** - * @method task:call_rspamd_function(function[, param, param...]) - * Calls rspamd expression function `func` with the specified parameters. - * It returns the boolean result of function invocation. - * @param {string} function name of internal or registered lua function to call - * @param {list of strings} params parameters for a function - * @return {bool} true or false returned by expression function - */ -LUA_FUNCTION_DEF (task, call_rspamd_function); + /*** * @method task:get_recipients([type]) * Return SMTP or MIME recipients for a task. This function returns list of internet addresses each one is a table with the following structure: @@ -459,7 +450,6 @@ static const struct luaL_reg tasklib_m[] = { LUA_INTERFACE_DEF (task, get_received_headers), LUA_INTERFACE_DEF (task, get_resolver), LUA_INTERFACE_DEF (task, inc_dns_req), - LUA_INTERFACE_DEF (task, call_rspamd_function), LUA_INTERFACE_DEF (task, get_recipients), LUA_INTERFACE_DEF (task, get_from), LUA_INTERFACE_DEF (task, get_user), @@ -1255,45 +1245,6 @@ lua_task_inc_dns_req (lua_State *L) return 0; } -static gint -lua_task_call_rspamd_function (lua_State * L) -{ - struct rspamd_task *task = lua_check_task (L, 1); - struct expression_function f; - gint i, top; - gboolean res; - gchar *arg; - - if (task) { - f.name = (gchar *)luaL_checkstring (L, 2); - if (f.name) { - f.args = NULL; - top = lua_gettop (L); - /* Get arguments after function name */ - for (i = 3; i <= top; i++) { - arg = (gchar *)luaL_checkstring (L, i); - if (arg != NULL) { - f.args = g_list_prepend (f.args, arg); - } - } - res = call_expression_function (&f, task, L); - lua_pushboolean (L, res); - if (f.args) { - g_list_free (f.args); - } - - return 1; - } - } - - lua_pushnil (L); - - return 1; - -} - - - static gboolean lua_push_internet_address (lua_State *L, InternetAddress *ia) { diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index 90a76e50a..002a1d05b 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -33,7 +33,6 @@ #include "config.h" #include "libmime/message.h" -#include "libmime/expressions.h" #include "main.h" #define DEFAULT_SYMBOL "R_CHARSET_MIXED" diff --git a/src/plugins/dkim_check.c b/src/plugins/dkim_check.c index f37a465a1..0b710ce63 100644 --- a/src/plugins/dkim_check.c +++ b/src/plugins/dkim_check.c @@ -39,7 +39,6 @@ #include "config.h" #include "libmime/message.h" -#include "libmime/expressions.h" #include "libserver/dkim.h" #include "libutil/hash.h" #include "libutil/map.h" diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index 9fb62187f..a6bbacaf6 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -41,7 +41,6 @@ #include "config.h" #include "libmime/message.h" -#include "libmime/expressions.h" #include "libutil/map.h" #include "libmime/images.h" #include "fuzzy_storage.h" diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index b96fcca31..a140d616c 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -29,72 +29,25 @@ #include "config.h" #include "libmime/message.h" -#include "libmime/expressions.h" +#include "expression.h" +#include "mime_expressions.h" #include "libutil/map.h" #include "lua/lua_common.h" #include "main.h" -#define DEFAULT_STATFILE_PREFIX "./" - struct regexp_module_item { - struct expression *expr; + struct rspamd_expression *expr; const gchar *symbol; - guint32 avg_time; struct ucl_lua_funcdata *lua_function; }; struct regexp_ctx { - gchar *statfile_prefix; - rspamd_mempool_t *regexp_pool; gsize max_size; - gsize max_threads; - GThreadPool *workers; -}; - -/* Lua regexp module for checking rspamd regexps */ -LUA_FUNCTION_DEF (regexp, match); - -static const struct luaL_reg regexplib_m[] = { - LUA_INTERFACE_DEF (regexp, match), - {"__tostring", rspamd_lua_class_tostring}, - {NULL, NULL} }; static struct regexp_ctx *regexp_module_ctx = NULL; -static GMutex *workers_mtx = NULL; -static void process_regexp_item_threaded (gpointer data, gpointer user_data); -static gboolean rspamd_regexp_match_number (struct rspamd_task *task, - GList * args, - void *unused); -static gboolean rspamd_raw_header_exists (struct rspamd_task *task, - GList * args, - void *unused); -static gboolean rspamd_check_smtp_data (struct rspamd_task *task, - GList * args, - void *unused); -static gboolean rspamd_regexp_occurs_number (struct rspamd_task *task, - GList * args, - void *unused); -static gboolean rspamd_content_type_is_type (struct rspamd_task * task, - GList * args, - void *unused); -static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task, - GList * args, - void *unused); -static gboolean rspamd_content_type_has_param (struct rspamd_task * task, - GList * args, - void *unused); -static gboolean rspamd_content_type_compare_param (struct rspamd_task * task, - GList * args, - void *unused); -static gboolean rspamd_has_content_part (struct rspamd_task *task, - GList * args, - void *unused); -static gboolean rspamd_has_content_part_len (struct rspamd_task *task, - GList * args, - void *unused); static void process_regexp_item (struct rspamd_task *task, void *user_data); @@ -111,106 +64,6 @@ module_t regexp_module = { NULL }; -/* Task cache functions */ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) -static GStaticMutex task_cache_mtx = G_STATIC_MUTEX_INIT; -#else -G_LOCK_DEFINE (task_cache_mtx); -#endif - -void -task_cache_add (struct rspamd_task *task, - struct rspamd_regexp_element *re, - gint32 result) -{ - if (result == 0) { - result = -1; - } - /* Avoid concurrenting inserting of results */ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - g_static_mutex_lock (&task_cache_mtx); -#else - G_LOCK (task_cache_mtx); -#endif - g_hash_table_insert (task->re_cache, re->regexp_text, - GINT_TO_POINTER (result)); -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - g_static_mutex_unlock (&task_cache_mtx); -#else - G_UNLOCK (task_cache_mtx); -#endif -} - -gint32 -task_cache_check (struct rspamd_task *task, struct rspamd_regexp_element *re) -{ - gpointer res; - gint32 r; - -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - g_static_mutex_lock (&task_cache_mtx); -#else - G_LOCK (task_cache_mtx); -#endif - if ((res = g_hash_table_lookup (task->re_cache, re->regexp_text)) != NULL) { - r = GPOINTER_TO_INT (res); -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - g_static_mutex_unlock (&task_cache_mtx); -#else - G_UNLOCK (task_cache_mtx); -#endif - if (r == -1) { - return 0; - } - return 1; - } -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - g_static_mutex_unlock (&task_cache_mtx); -#else - G_UNLOCK (task_cache_mtx); -#endif - return -1; -} - - -static gint -luaopen_regexp (lua_State * L) -{ - luaL_register (L, "rspamd_regexp", regexplib_m); - - return 1; -} - -/* - * Utility functions for matching exact number of regexps - */ -typedef gboolean (*int_compare_func) (gint a, gint b); -static gboolean -op_equal (gint a, gint b) -{ - return a == b; -} -static gboolean -op_more (gint a, gint b) -{ - return a > b; -} -static gboolean -op_less (gint a, gint b) -{ - return a < b; -} -static gboolean -op_more_equal (gint a, gint b) -{ - return a >= b; -} -static gboolean -op_less_equal (gint a, gint b) -{ - return a <= b; -} - /* Process regexp expression */ static gboolean read_regexp_expression (rspamd_mempool_t * pool, @@ -219,30 +72,14 @@ read_regexp_expression (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode) { - struct expression *e, *cur; + struct rspamd_expression *e = NULL; - e = parse_expression (pool, (gchar *)line); + /* XXX: Implement atoms parsing */ if (e == NULL) { msg_warn ("%s = \"%s\" is invalid regexp expression", symbol, line); return FALSE; } chain->expr = e; - cur = e; - while (cur) { - if (cur->type == EXPR_REGEXP) { - cur->content.operand = parse_regexp (pool, - cur->content.operand, - raw_mode); - if (cur->content.operand == NULL) { - msg_warn ("cannot parse regexp, skip expression %s = \"%s\"", - symbol, - line); - return FALSE; - } - cur->type = EXPR_REGEXP_PARSED; - } - cur = cur->next; - } return TRUE; } @@ -256,41 +93,8 @@ regexp_module_init (struct rspamd_config *cfg, struct module_ctx **ctx) regexp_module_ctx->regexp_pool = rspamd_mempool_new ( rspamd_mempool_suggest_size ()); - regexp_module_ctx->workers = NULL; *ctx = (struct module_ctx *)regexp_module_ctx; - register_expression_function ("regexp_match_number", - rspamd_regexp_match_number, - NULL); - register_expression_function ("regexp_occurs_number", - rspamd_regexp_occurs_number, - NULL); - register_expression_function ("raw_header_exists", - rspamd_raw_header_exists, - NULL); - register_expression_function ("check_smtp_data", - rspamd_check_smtp_data, - NULL); - register_expression_function ("content_type_is_type", - rspamd_content_type_is_type, - NULL); - register_expression_function ("content_type_is_subtype", - rspamd_content_type_is_subtype, - NULL); - register_expression_function ("content_type_has_param", - rspamd_content_type_has_param, - NULL); - register_expression_function ("content_type_compare_param", - rspamd_content_type_compare_param, - NULL); - register_expression_function ("has_content_part", - rspamd_has_content_part, - NULL); - register_expression_function ("has_content_part_len", - rspamd_has_content_part_len, - NULL); - - (void)luaopen_regexp (cfg->lua_state); return 0; } @@ -310,8 +114,6 @@ regexp_module_config (struct rspamd_config *cfg) } regexp_module_ctx->max_size = 0; - regexp_module_ctx->max_threads = 0; - regexp_module_ctx->workers = NULL; while ((value = ucl_iterate_object (sec, &it, true)) != NULL) { if (g_ascii_strncasecmp (ucl_object_key (value), "max_size", @@ -320,7 +122,7 @@ regexp_module_config (struct rspamd_config *cfg) } else if (g_ascii_strncasecmp (ucl_object_key (value), "max_threads", sizeof ("max_threads") - 1) == 0) { - regexp_module_ctx->max_threads = ucl_obj_toint (value); + msg_warn ("regexp module is now single threaded, max_threads is ignored"); } else if (value->type == UCL_STRING) { cur_item = rspamd_mempool_alloc0 (regexp_module_ctx->regexp_pool, @@ -367,640 +169,48 @@ regexp_module_reconfig (struct rspamd_config *cfg) return regexp_module_config (cfg); } -struct url_regexp_param { - struct rspamd_task *task; - rspamd_regexp_t *regexp; - struct rspamd_regexp_element *re; - gboolean found; -}; - -static gboolean -tree_url_callback (gpointer key, gpointer value, void *data) -{ - struct url_regexp_param *param = data; - struct rspamd_url *url = value; - - if (rspamd_regexp_search (param->regexp, struri (url), 0, NULL, NULL, FALSE) - == TRUE) { - if (G_UNLIKELY (param->re->is_test)) { - msg_info ("process test regexp %s for url %s returned TRUE", - struri (url)); - } - task_cache_add (param->task, param->re, 1); - param->found = TRUE; - return TRUE; - } - else if (G_UNLIKELY (param->re->is_test)) { - msg_info ("process test regexp %s for url %s returned FALSE", - struri (url)); - } - - return FALSE; -} - -static gsize -process_regexp (struct rspamd_regexp_element *re, - struct rspamd_task *task, - const gchar *additional, - gint limit, - int_compare_func f) -{ - guint8 *ct; - gsize clen; - gint r, passed = 0; - gboolean matched = FALSE, raw = FALSE; - const gchar *in, *start, *end; - - GList *cur, *headerlist; - rspamd_regexp_t *regexp; - struct url_regexp_param callback_param = { - .task = task, - .re = re, - .found = FALSE - }; - struct mime_text_part *part; - struct raw_header *rh; - - if (re == NULL) { - msg_info ("invalid regexp passed"); - return 0; - } - - callback_param.regexp = re->regexp; - if ((r = task_cache_check (task, re)) != -1) { - debug_task ("regexp /%s/ is found in cache, result: %d", - re->regexp_text, - r); - return r == 1; - } - - if (additional != NULL) { - /* We have additional parameter defined, so ignore type of regexp expression and use it for parsing */ - if (G_UNLIKELY (re->is_test)) { - msg_info ("process test regexp %s with test %s", - re->regexp_text, - additional); - } - if (rspamd_regexp_search (re->regexp, additional, 0, NULL, NULL, - FALSE) == TRUE) { - if (G_UNLIKELY (re->is_test)) { - msg_info ("result of regexp %s is true", re->regexp_text); - } - task_cache_add (task, re, 1); - return 1; - } - else { - task_cache_add (task, re, 0); - return 0; - } - } - - switch (re->type) { - case REGEXP_NONE: - msg_warn ("bad error detected: %s has invalid regexp type", - re->regexp_text); - break; - case REGEXP_HEADER: - case REGEXP_RAW_HEADER: - /* Check header's name */ - if (re->header == NULL) { - msg_info ("header regexp without header name: '%s'", - re->regexp_text); - task_cache_add (task, re, 0); - return 0; - } - debug_task ("checking %s header regexp: %s = %s", - re->type == REGEXP_RAW_HEADER ? "raw" : "decoded", - re->header, - re->regexp_text); - - /* Get list of specified headers */ - headerlist = message_get_header (task, - re->header, - re->is_strong); - if (headerlist == NULL) { - /* Header is not found */ - if (G_UNLIKELY (re->is_test)) { - msg_info ( - "process test regexp %s for header %s returned FALSE: no header found", - re->regexp_text, - re->header); - } - task_cache_add (task, re, 0); - return 0; - } - else { - /* Check whether we have regexp for it */ - if (re->regexp == NULL) { - debug_task ("regexp contains only header and it is found %s", - re->header); - task_cache_add (task, re, 1); - return 1; - } - /* Iterate throught headers */ - cur = headerlist; - while (cur) { - rh = cur->data; - debug_task ("found header \"%s\" with value \"%s\"", - re->header, rh->decoded); - regexp = re->regexp; - - if (re->type == REGEXP_RAW_HEADER) { - in = rh->value; - raw = TRUE; - } - else { - in = rh->decoded; - /* Validate input */ - if (!in || !g_utf8_validate (in, -1, NULL)) { - cur = g_list_next (cur); - continue; - } - } - - /* Match re */ - if (in && - rspamd_regexp_search (regexp, in, 0, NULL, NULL, raw)) { - if (G_UNLIKELY (re->is_test)) { - msg_info ( - "process test regexp %s for header %s with value '%s' returned TRUE", - re->regexp_text, - re->header, - in); - } - if (f != NULL && limit > 1) { - /* If we have limit count, increase passed count and compare with limit */ - if (f (++passed, limit)) { - task_cache_add (task, re, 1); - return 1; - } - } - else { - task_cache_add (task, re, 1); - return 1; - } - } - else if (G_UNLIKELY (re->is_test)) { - msg_info ( - "process test regexp %s for header %s with value '%s' returned FALSE", - re->regexp_text, - re->header, - in); - } - cur = g_list_next (cur); - } - task_cache_add (task, re, 0); - return 0; - } - break; - case REGEXP_MIME: - debug_task ("checking mime regexp: %s", re->regexp_text); - /* Iterate throught text parts */ - cur = g_list_first (task->text_parts); - while (cur) { - part = (struct mime_text_part *)cur->data; - /* Skip empty parts */ - if (part->is_empty) { - cur = g_list_next (cur); - continue; - } - /* Skip too large parts */ - if (regexp_module_ctx->max_size != 0 && part->content->len > - regexp_module_ctx->max_size) { - msg_info ("<%s> skip part of size %Hud", - task->message_id, - part->content->len); - cur = g_list_next (cur); - continue; - } - - regexp = re->regexp; - - /* Check raw flags */ - if (part->is_raw) { - raw = TRUE; - } - /* Select data for regexp */ - if (raw) { - ct = part->orig->data; - clen = part->orig->len; - } - else { - ct = part->content->data; - clen = part->content->len; - } - /* If we have limit, apply regexp so much times as we can */ - if (f != NULL && limit > 1) { - end = 0; - start = NULL; - end = NULL; - while ((matched = - rspamd_regexp_search (regexp, ct, clen, &start, &end, raw))) { - if (G_UNLIKELY (re->is_test)) { - msg_info ( - "process test regexp %s for mime part of length %d returned TRUE", - re->regexp_text, - (gint)clen, - end); - } - if (f (++passed, limit)) { - task_cache_add (task, re, 1); - return 1; - } - } - } - else { - if (rspamd_regexp_search (regexp, ct, clen, NULL, NULL, raw)) { - if (G_UNLIKELY (re->is_test)) { - msg_info ( - "process test regexp %s for mime part of length %d returned TRUE", - re->regexp_text, - (gint)clen); - } - task_cache_add (task, re, 1); - return 1; - } - - } - if (!matched && G_UNLIKELY (re->is_test)) { - msg_info ( - "process test regexp %s for mime part of length %d returned FALSE", - re->regexp_text, - (gint)clen); - } - cur = g_list_next (cur); - } - task_cache_add (task, re, 0); - break; - case REGEXP_MESSAGE: - debug_task ("checking message regexp: %s", re->regexp_text); - raw = TRUE; - regexp = re->regexp; - ct = (guint8 *)task->msg.start; - clen = task->msg.len; - - if (regexp_module_ctx->max_size != 0 && clen > - regexp_module_ctx->max_size) { - msg_info ("<%s> skip message of size %Hz", task->message_id, clen); - return 0; - } - /* If we have limit, apply regexp so much times as we can */ - if (f != NULL && limit > 1) { - start = end = NULL; - while ((matched = - rspamd_regexp_search (regexp, ct, clen, &start, &end, raw))) { - if (G_UNLIKELY (re->is_test)) { - msg_info ( - "process test regexp %s for mime part of length %d returned TRUE", - re->regexp_text, - (gint)clen); - } - if (f (++passed, limit)) { - task_cache_add (task, re, 1); - return 1; - } - } - } - else { - if (rspamd_regexp_search (regexp, ct, clen, NULL, NULL, raw)) { - if (G_UNLIKELY (re->is_test)) { - msg_info ( - "process test regexp %s for message part of length %d returned TRUE", - re->regexp_text, - (gint)clen); - } - task_cache_add (task, re, 1); - return 1; - } - - } - if (!matched && G_UNLIKELY (re->is_test)) { - msg_info ( - "process test regexp %s for message part of length %d returned FALSE", - re->regexp_text, - (gint)clen); - } - task_cache_add (task, re, 0); - break; - case REGEXP_URL: - debug_task ("checking url regexp: %s", re->regexp_text); - if (f != NULL && limit > 1) { - /*XXX: add support of it */ - msg_warn ("numbered matches are not supported for url regexp"); - } - regexp = re->regexp; - callback_param.task = task; - callback_param.regexp = regexp; - callback_param.re = re; - callback_param.found = FALSE; - if (task->urls) { - g_tree_foreach (task->urls, tree_url_callback, &callback_param); - } - if (task->emails && callback_param.found == FALSE) { - g_tree_foreach (task->emails, tree_url_callback, &callback_param); - } - if (callback_param.found == FALSE) { - task_cache_add (task, re, 0); - } - break; - default: - msg_warn ("bad error detected: %p is not a valid regexp object", re); - break; - } - - /* Not reached */ - return 0; -} - -static gboolean -maybe_call_lua_function (const gchar *name, - struct rspamd_task *task, - lua_State *L) -{ - struct rspamd_task **ptask; - gboolean res; - - lua_getglobal (L, name); - if (lua_isfunction (L, -1)) { - ptask = lua_newuserdata (L, sizeof (struct rspamd_task *)); - rspamd_lua_setclass (L, "rspamd{task}", -1); - *ptask = task; - /* Call function */ - if (lua_pcall (L, 1, 1, 0) != 0) { - msg_info ("call to %s failed: %s", (gchar *)name, - lua_tostring (L, -1)); - return FALSE; - } - res = lua_toboolean (L, -1); - lua_pop (L, 1); - return res; - } - else { - lua_pop (L, 1); - } - return FALSE; -} - -static gboolean -optimize_regexp_expression (struct expression **e, GQueue * stack, gboolean res) -{ - struct expression *it = (*e)->next; - gboolean ret = FALSE, is_nearest = TRUE; - gint skip_level = 0; - - /* Skip nearest logical operators from optimization */ - if (!it || (it->type == EXPR_OPERATION && it->content.operation != '!')) { - g_queue_push_head (stack, GSIZE_TO_POINTER (res)); - return ret; - } - - while (it) { - /* Find first operation for this iterator */ - if (it->type == EXPR_OPERATION) { - /* If this operation is just ! just inverse res and check for further operators */ - if (it->content.operation == '!') { - if (is_nearest) { - msg_debug ("found '!' operator, inversing result"); - res = !res; - *e = it; - } - it = it->next; - continue; - } - else { - skip_level--; - } - /* Check whether we found corresponding operator for this operand */ - if (skip_level <= 0) { - if (it->content.operation == '|' && res == TRUE) { - msg_debug ("found '|' and previous expression is true"); - *e = it; - ret = TRUE; - } - else if (it->content.operation == '&' && res == FALSE) { - msg_debug ("found '&' and previous expression is false"); - *e = it; - ret = TRUE; - } - break; - } - } - else { - is_nearest = FALSE; - skip_level++; - } - it = it->next; - } - - g_queue_push_head (stack, GSIZE_TO_POINTER (res)); - - return ret; -} - -static gboolean -process_regexp_expression (struct expression *expr, - const gchar *symbol, - struct rspamd_task *task, - const gchar *additional, - struct lua_locked_state *nL) -{ - GQueue *stack; - gsize cur, op1, op2; - struct expression *it = expr; - struct rspamd_regexp_element *re; - gboolean try_optimize = TRUE; - - stack = g_queue_new (); - - while (it) { - if (it->type == EXPR_REGEXP_PARSED) { - /* Find corresponding symbol */ - cur = process_regexp ((struct rspamd_regexp_element *)it->content.operand, - task, - additional, - 0, - NULL); - debug_task ("regexp %s found", cur ? "is" : "is not"); - if (try_optimize) { - try_optimize = optimize_regexp_expression (&it, stack, cur); - } - else { - g_queue_push_head (stack, GSIZE_TO_POINTER (cur)); - } - } - else if (it->type == EXPR_FUNCTION) { - if (nL) { - rspamd_mutex_lock (nL->m); - cur = - (gsize) call_expression_function ((struct - expression_function - *)it->content.operand, task, nL->L); - rspamd_mutex_unlock (nL->m); - } - else { - cur = - (gsize) call_expression_function ((struct - expression_function - *)it->content.operand, task, task->cfg->lua_state); - } - debug_task ("function %s returned %s", - ((struct expression_function *)it->content.operand)->name, - cur ? "true" : "false"); - if (try_optimize) { - try_optimize = optimize_regexp_expression (&it, stack, cur); - } - else { - g_queue_push_head (stack, GSIZE_TO_POINTER (cur)); - } - } - else if (it->type == EXPR_STR) { - /* This may be lua function, try to call it */ - if (nL) { - rspamd_mutex_lock (nL->m); - cur = maybe_call_lua_function ( - (const gchar *)it->content.operand, - task, - nL->L); - rspamd_mutex_unlock (nL->m); - } - else { - cur = maybe_call_lua_function ( - (const gchar *)it->content.operand, - task, - task->cfg->lua_state); - } - debug_task ("function %s returned %s", - (const gchar *)it->content.operand, - cur ? "true" : "false"); - if (try_optimize) { - try_optimize = optimize_regexp_expression (&it, stack, cur); - } - else { - g_queue_push_head (stack, GSIZE_TO_POINTER (cur)); - } - } - else if (it->type == EXPR_REGEXP) { - /* Compile regexp if it is not parsed */ - if (it->content.operand == NULL) { - it = it->next; - continue; - } - re = parse_regexp (task->cfg->cfg_pool, - it->content.operand, - task->cfg->raw_mode); - if (re == NULL) { - msg_warn ("cannot parse regexp, skip expression"); - g_queue_free (stack); - return FALSE; - } - it->content.operand = re; - it->type = EXPR_REGEXP_PARSED; - /* Continue with this regexp once again */ - continue; - } - else if (it->type == EXPR_OPERATION) { - if (g_queue_is_empty (stack)) { - /* Queue has no operands for operation, exiting */ - msg_warn ( - "regexp expression seems to be invalid: empty stack while reading operation"); - g_queue_free (stack); - return FALSE; - } - debug_task ("got operation %c", it->content.operation); - switch (it->content.operation) { - case '!': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op1 = !op1; - try_optimize = optimize_regexp_expression (&it, stack, op1); - break; - case '&': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - try_optimize = optimize_regexp_expression (&it, - stack, - op1 && op2); - break; - case '|': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - try_optimize = optimize_regexp_expression (&it, - stack, - op1 || op2); - break; - default: - it = it->next; - continue; - } - } - if (it) { - it = it->next; - } - } - if (!g_queue_is_empty (stack)) { - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - if (op1) { - g_queue_free (stack); - return TRUE; - } - } - else { - msg_warn ( - "regexp expression seems to be invalid: empty stack at the end of expression, symbol %s", - symbol); - } - - g_queue_free (stack); - - return FALSE; -} - -/* Call custom lua function in rspamd expression */ -static gboolean -rspamd_lua_call_expression_func (struct ucl_lua_funcdata *lua_data, - struct rspamd_task *task, GList *args, gboolean *res) +static gboolean rspamd_lua_call_expression_func( + struct ucl_lua_funcdata *lua_data, struct rspamd_task *task, + GArray *args, gboolean *res) { lua_State *L = lua_data->L; struct rspamd_task **ptask; - GList *cur; struct expression_argument *arg; - int nargs = 1, pop = 0; + gint pop = 0, i; lua_rawgeti (L, LUA_REGISTRYINDEX, lua_data->idx); /* Now we got function in top of stack */ - ptask = lua_newuserdata (L, sizeof (struct rspamd_task *)); + ptask = lua_newuserdata (L, sizeof(struct rspamd_task *)); rspamd_lua_setclass (L, "rspamd{task}", -1); *ptask = task; /* Now push all arguments */ - cur = args; - while (cur) { - arg = get_function_arg (cur->data, task, FALSE); + for (i = 0; i < args->len; i ++) { + arg = &g_array_index (args, struct expression_argument, i); if (arg) { switch (arg->type) { case EXPRESSION_ARGUMENT_NORMAL: - lua_pushstring (L, (const gchar *)arg->data); + lua_pushstring (L, (const gchar *) arg->data); break; case EXPRESSION_ARGUMENT_BOOL: - lua_pushboolean (L, (gboolean) GPOINTER_TO_SIZE (arg->data)); + lua_pushboolean (L, (gboolean) GPOINTER_TO_SIZE(arg->data)); break; default: - msg_err ("cannot pass custom params to lua function"); + msg_err("cannot pass custom params to lua function"); return FALSE; } } - nargs++; - cur = g_list_next (cur); } - if (lua_pcall (L, nargs, 1, 0) != 0) { - msg_info ("call to lua function failed: %s", lua_tostring (L, -1)); + if (lua_pcall (L, args->len, 1, 0) != 0) { + msg_info("call to lua function failed: %s", lua_tostring (L, -1)); return FALSE; } pop++; if (!lua_isboolean (L, -1)) { lua_pop (L, pop); - msg_info ("lua function must return a boolean"); + msg_info("lua function must return a boolean"); return FALSE; } *res = lua_toboolean (L, -1); @@ -1009,1042 +219,27 @@ rspamd_lua_call_expression_func (struct ucl_lua_funcdata *lua_data, return TRUE; } -struct regexp_threaded_ud { - struct regexp_module_item *item; - struct rspamd_task *task; -}; - -static void -process_regexp_item_threaded (gpointer data, gpointer user_data) -{ - struct regexp_threaded_ud *ud = data; - struct lua_locked_state *nL = user_data; - - /* Process expression */ - if (process_regexp_expression (ud->item->expr, ud->item->symbol, ud->task, - NULL, nL)) { - g_mutex_lock (workers_mtx); - rspamd_task_insert_result (ud->task, ud->item->symbol, 1, NULL); - g_mutex_unlock (workers_mtx); - } - remove_async_thread (ud->task->s); -} static void process_regexp_item (struct rspamd_task *task, void *user_data) { struct regexp_module_item *item = user_data; gboolean res = FALSE; - struct regexp_threaded_ud *thr_ud; - GError *err = NULL; - struct lua_locked_state *nL; - - if (!item->lua_function && regexp_module_ctx->max_threads > 1) { - if (regexp_module_ctx->workers == NULL) { -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) -# if GLIB_MINOR_VERSION > 20 - if (!g_thread_get_initialized ()) { - g_thread_init (NULL); - } -# else - g_thread_init (NULL); -# endif - workers_mtx = g_mutex_new (); -#else - workers_mtx = rspamd_mempool_alloc (regexp_module_ctx->regexp_pool, - sizeof (GMutex)); - g_mutex_init (workers_mtx); -#endif - nL = rspamd_init_lua_locked (task->cfg); - luaopen_regexp (nL->L); - regexp_module_ctx->workers = g_thread_pool_new ( - process_regexp_item_threaded, - nL, - regexp_module_ctx->max_threads, - TRUE, - &err); - if (err != NULL) { - msg_err ("thread pool creation failed: %s", err->message); - regexp_module_ctx->max_threads = 0; - return; - } - } - thr_ud = - rspamd_mempool_alloc (task->task_pool, - sizeof (struct regexp_threaded_ud)); - thr_ud->item = item; - thr_ud->task = task; - - - register_async_thread (task->s); - g_thread_pool_push (regexp_module_ctx->workers, thr_ud, &err); - if (err != NULL) { - msg_err ("error pushing task to the regexp thread pool: %s", - err->message); - remove_async_thread (task->s); - } - } - else { - /* Non-threaded version */ - if (item->lua_function) { - /* Just call function */ - res = FALSE; - if (!rspamd_lua_call_expression_func (item->lua_function, task, NULL, + /* Non-threaded version */ + if (item->lua_function) { + /* Just call function */ + res = FALSE; + if (!rspamd_lua_call_expression_func (item->lua_function, task, NULL, &res)) { - msg_err ("error occurred when checking symbol %s", item->symbol); - } - if (res) { - rspamd_task_insert_result (task, item->symbol, 1, NULL); - } - } - else { - /* Process expression */ - if (process_regexp_expression (item->expr, item->symbol, task, NULL, - NULL)) { - rspamd_task_insert_result (task, item->symbol, 1, NULL); - } - } - } -} - -static gboolean -rspamd_regexp_match_number (struct rspamd_task *task, GList * args, - void *unused) -{ - gint param_count, res = 0; - struct expression_argument *arg; - GList *cur; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - param_count = strtoul (arg->data, NULL, 10); - - cur = args->next; - while (cur) { - arg = get_function_arg (cur->data, task, FALSE); - if (arg && arg->type == EXPRESSION_ARGUMENT_BOOL) { - if ((gboolean) GPOINTER_TO_SIZE (arg->data)) { - res++; - } - } - else { - if (process_regexp_expression (cur->data, "regexp_match_number", - task, NULL, NULL)) { - res++; - } - if (res >= param_count) { - return TRUE; - } - } - cur = g_list_next (cur); - } - - return res >= param_count; -} - -static gboolean -rspamd_regexp_occurs_number (struct rspamd_task *task, - GList * args, - void *unused) -{ - gint limit; - struct expression_argument *arg; - struct rspamd_regexp_element *re; - gchar *param, *err_str, op; - int_compare_func f = NULL; - - if (args == NULL || args->next == NULL) { - msg_warn ("wrong number of parameters to function, must be 2"); - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - if ((re = re_cache_check (arg->data, task->cfg->cfg_pool)) == NULL) { - re = parse_regexp (task->cfg->cfg_pool, arg->data, task->cfg->raw_mode); - if (!re) { - msg_err ("cannot parse given regexp: %s", (gchar *)arg->data); - return FALSE; - } - } - - arg = get_function_arg (args->next->data, task, TRUE); - param = arg->data; - op = *param; - if (g_ascii_isdigit (op)) { - op = '='; - } - else { - param++; - } - switch (op) { - case '>': - if (*param == '=') { - f = op_more_equal; - param++; - } - else { - f = op_more; - } - break; - case '<': - if (*param == '=') { - f = op_less_equal; - param++; - } - else { - f = op_less; - } - break; - case '=': - f = op_equal; - break; - default: - msg_err ( - "wrong operation character: %c, assumed '=', '>', '<', '>=', '<=' or empty op", - op); - return FALSE; - } - - limit = strtoul (param, &err_str, 10); - if (*err_str != 0) { - msg_err ("wrong numeric: %s at position: %s", param, err_str); - return FALSE; - } - - return process_regexp (re, task, NULL, limit, f); -} -static gboolean -rspamd_raw_header_exists (struct rspamd_task *task, GList * args, void *unused) -{ - struct expression_argument *arg; - - if (args == NULL || task == NULL) { - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) { - msg_warn ("invalid argument to function is passed"); - return FALSE; - } - - return g_hash_table_lookup (task->raw_headers, arg->data) != NULL; -} - -static gboolean -match_smtp_data (struct rspamd_task *task, - const gchar *re_text, - const gchar *what) -{ - struct rspamd_regexp_element *re; - gint r; - - if (*re_text == '/') { - /* This is a regexp */ - re = parse_regexp (task->cfg->cfg_pool, - (gchar *)re_text, - task->cfg->raw_mode); - if (re == NULL) { - msg_warn ("cannot compile regexp for function"); - return FALSE; + msg_err ("error occurred when checking symbol %s", item->symbol); } - - if ((r = task_cache_check (task, re)) == -1) { - if (rspamd_regexp_search (re->regexp, what, 0, NULL, NULL, FALSE)) { - task_cache_add (task, re, 1); - return TRUE; - } - task_cache_add (task, re, 0); - } - else { - return r == 1; + if (res) { + rspamd_task_insert_result (task, item->symbol, 1, NULL); } } - else if (g_ascii_strcasecmp (re_text, what) == 0) { - return TRUE; - } - - return FALSE; -} - -static gboolean -rspamd_check_smtp_data (struct rspamd_task *task, GList * args, void *unused) -{ - struct expression_argument *arg; - InternetAddressList *ia = NULL; - const gchar *type, *what = NULL; - GList *cur; - gint i, ialen; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - - if (!arg || !arg->data) { - msg_warn ("no parameters to function"); - return FALSE; - } else { - type = arg->data; - switch (*type) { - case 'f': - case 'F': - if (g_ascii_strcasecmp (type, "from") == 0) { - what = rspamd_task_get_sender (task); - } - else { - msg_warn ("bad argument to function: %s", type); - return FALSE; - } - break; - case 'h': - case 'H': - if (g_ascii_strcasecmp (type, "helo") == 0) { - what = task->helo; - } - else { - msg_warn ("bad argument to function: %s", type); - return FALSE; - } - break; - case 'u': - case 'U': - if (g_ascii_strcasecmp (type, "user") == 0) { - what = task->user; - } - else { - msg_warn ("bad argument to function: %s", type); - return FALSE; - } - break; - case 's': - case 'S': - if (g_ascii_strcasecmp (type, "subject") == 0) { - what = task->subject; - } - else { - msg_warn ("bad argument to function: %s", type); - return FALSE; - } - break; - case 'r': - case 'R': - if (g_ascii_strcasecmp (type, "rcpt") == 0) { - ia = task->rcpt_mime; - } - else { - msg_warn ("bad argument to function: %s", type); - return FALSE; - } - break; - default: - msg_warn ("bad argument to function: %s", type); - return FALSE; - } - } - - if (what == NULL && ia == NULL) { - /* Not enough data so regexp would NOT be found anyway */ - return FALSE; - } - - /* We would process only one more argument, others are ignored */ - cur = args->next; - if (cur) { - arg = get_function_arg (cur->data, task, FALSE); - if (arg && arg->type == EXPRESSION_ARGUMENT_NORMAL) { - if (what != NULL) { - return match_smtp_data (task, arg->data, what); - } - else { - if (ia != NULL) { - ialen = internet_address_list_length(ia); - for (i = 0; i < ialen; i ++) { - InternetAddress *iaelt = - internet_address_list_get_address(ia, i); - InternetAddressMailbox *iamb = - INTERNET_ADDRESS_IS_MAILBOX(iaelt) ? - INTERNET_ADDRESS_MAILBOX (iaelt) : NULL; - if (iamb && - match_smtp_data (task, arg->data, - internet_address_mailbox_get_addr(iamb))) { - return TRUE; - } - } - } - } - } - else if (arg != NULL) { - if (what != NULL) { - if (process_regexp_expression (arg->data, - "regexp_check_smtp_data", task, what, NULL)) { - return TRUE; - } - } - else { - if (ia != NULL) { - ialen = internet_address_list_length(ia); - for (i = 0; i < ialen; i ++) { - InternetAddress *iaelt = - internet_address_list_get_address(ia, i); - InternetAddressMailbox *iamb = - INTERNET_ADDRESS_IS_MAILBOX(iaelt) ? - INTERNET_ADDRESS_MAILBOX (iaelt) : NULL; - if (iamb && - process_regexp_expression (arg->data, - "regexp_check_smtp_data", task, - internet_address_mailbox_get_addr(iamb), - NULL)) { - return TRUE; - } - } - } - } - } - } - - return FALSE; -} - -/* Lua part */ -static gint -lua_regexp_match (lua_State *L) -{ - void *ud = luaL_checkudata (L, 1, "rspamd{task}"); - struct rspamd_task *task; - const gchar *re_text; - struct rspamd_regexp_element *re; - gint r = 0; - - luaL_argcheck (L, ud != NULL, 1, "'task' expected"); - task = ud ? *((struct rspamd_task **)ud) : NULL; - re_text = luaL_checkstring (L, 2); - - /* This is a regexp */ - if (task != NULL) { - re = parse_regexp (task->cfg->cfg_pool, - (gchar *)re_text, - task->cfg->raw_mode); - if (re == NULL) { - msg_warn ("cannot compile regexp for function"); - return FALSE; - } - r = process_regexp (re, task, NULL, 0, NULL); - } - lua_pushboolean (L, r == 1); - - return 1; -} - -static gboolean -rspamd_content_type_compare_param (struct rspamd_task * task, - GList * args, - void *unused) -{ - gchar *param_name, *param_pattern; - const gchar *param_data; - struct rspamd_regexp_element *re; - struct expression_argument *arg, *arg1; - GMimeObject *part; - GMimeContentType *ct; - gint r; - gboolean recursive = FALSE, result = FALSE; - GList *cur = NULL; - struct mime_part *cur_part; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; - } - arg = get_function_arg (args->data, task, TRUE); - param_name = arg->data; - args = g_list_next (args); - if (args == NULL) { - msg_warn ("too few params to function"); - return FALSE; - } - arg = get_function_arg (args->data, task, TRUE); - param_pattern = arg->data; - - - part = g_mime_message_get_mime_part (task->message); - if (part) { - ct = (GMimeContentType *)g_mime_object_get_content_type (part); - if (args->next) { - args = g_list_next (args); - arg1 = get_function_arg (args->data, task, TRUE); - if (g_ascii_strncasecmp (arg1->data, "true", - sizeof ("true") - 1) == 0) { - recursive = TRUE; - } - } - else { - /* - * If user did not specify argument, let's assume that he wants - * recursive search if mime part is multipart/mixed - */ - if (g_mime_content_type_is_type (ct, "multipart", "*")) { - recursive = TRUE; - } - } - - if (recursive) { - cur = task->parts; - } - -#ifndef GMIME24 - g_object_unref (part); -#endif - for (;; ) { - if ((param_data = - g_mime_content_type_get_parameter ((GMimeContentType *)ct, - param_name)) == NULL) { - result = FALSE; - } - else { - if (*param_pattern == '/') { - re = parse_regexp (task->cfg->cfg_pool, - param_pattern, - task->cfg->raw_mode); - if (re == NULL) { - msg_warn ("cannot compile regexp for function"); - return FALSE; - } - if ((r = task_cache_check (task, re)) == -1) { - if (rspamd_regexp_search (re->regexp, param_data, 0, - NULL, NULL, FALSE) == TRUE) { - task_cache_add (task, re, 1); - return TRUE; - } - task_cache_add (task, re, 0); - } - else { - - } - } - else { - /* Just do strcasecmp */ - if (g_ascii_strcasecmp (param_data, param_pattern) == 0) { - return TRUE; - } - } - } - /* Get next part */ - if (!recursive) { - return result; - } - else if (cur != NULL) { - cur_part = cur->data; - if (cur_part->type != NULL) { - ct = cur_part->type; - } - cur = g_list_next (cur); - } - else { - /* All is done */ - return result; - } - } - - } - - return FALSE; -} - -static gboolean -rspamd_content_type_has_param (struct rspamd_task * task, - GList * args, - void *unused) -{ - gchar *param_name; - const gchar *param_data; - struct expression_argument *arg, *arg1; - GMimeObject *part; - GMimeContentType *ct; - gboolean recursive = FALSE, result = FALSE; - GList *cur = NULL; - struct mime_part *cur_part; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; + /* Process expression */ + /* XXX: add this function */ } - arg = get_function_arg (args->data, task, TRUE); - param_name = arg->data; - - part = g_mime_message_get_mime_part (task->message); - if (part) { - ct = (GMimeContentType *)g_mime_object_get_content_type (part); - if (args->next) { - args = g_list_next (args); - arg1 = get_function_arg (args->data, task, TRUE); - if (g_ascii_strncasecmp (arg1->data, "true", - sizeof ("true") - 1) == 0) { - recursive = TRUE; - } - } - else { - /* - * If user did not specify argument, let's assume that he wants - * recursive search if mime part is multipart/mixed - */ - if (g_mime_content_type_is_type (ct, "multipart", "*")) { - recursive = TRUE; - } - } - - if (recursive) { - cur = task->parts; - } - -#ifndef GMIME24 - g_object_unref (part); -#endif - for (;; ) { - if ((param_data = - g_mime_content_type_get_parameter ((GMimeContentType *)ct, - param_name)) != NULL) { - return TRUE; - } - /* Get next part */ - if (!recursive) { - return result; - } - else if (cur != NULL) { - cur_part = cur->data; - if (cur_part->type != NULL) { - ct = cur_part->type; - } - cur = g_list_next (cur); - } - else { - /* All is done */ - return result; - } - } - - } - - return TRUE; -} - -static gboolean -rspamd_content_type_is_subtype (struct rspamd_task *task, - GList * args, - void *unused) -{ - gchar *param_pattern; - struct rspamd_regexp_element *re; - struct expression_argument *arg, *arg1; - GMimeObject *part; - GMimeContentType *ct; - gint r; - gboolean recursive = FALSE, result = FALSE; - GList *cur = NULL; - struct mime_part *cur_part; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; - } - arg = get_function_arg (args->data, task, TRUE); - param_pattern = arg->data; - - part = g_mime_message_get_mime_part (task->message); - if (part) { - ct = (GMimeContentType *)g_mime_object_get_content_type (part); - if (args->next) { - args = g_list_next (args); - arg1 = get_function_arg (args->data, task, TRUE); - if (g_ascii_strncasecmp (arg1->data, "true", - sizeof ("true") - 1) == 0) { - recursive = TRUE; - } - } - else { - /* - * If user did not specify argument, let's assume that he wants - * recursive search if mime part is multipart/mixed - */ - if (g_mime_content_type_is_type (ct, "multipart", "*")) { - recursive = TRUE; - } - } - - if (recursive) { - cur = task->parts; - } - -#ifndef GMIME24 - g_object_unref (part); -#endif - for (;; ) { - if (*param_pattern == '/') { - re = parse_regexp (task->cfg->cfg_pool, - param_pattern, - task->cfg->raw_mode); - if (re == NULL) { - msg_warn ("cannot compile regexp for function"); - return FALSE; - } - if ((r = task_cache_check (task, re)) == -1) { - if (rspamd_regexp_search (re->regexp, ct->subtype, 0, - NULL, NULL, FALSE)) { - task_cache_add (task, re, 1); - return TRUE; - } - task_cache_add (task, re, 0); - } - else { - - } - } - else { - /* Just do strcasecmp */ - if (g_ascii_strcasecmp (ct->subtype, param_pattern) == 0) { - return TRUE; - } - } - /* Get next part */ - if (!recursive) { - return result; - } - else if (cur != NULL) { - cur_part = cur->data; - if (cur_part->type != NULL) { - ct = cur_part->type; - } - cur = g_list_next (cur); - } - else { - /* All is done */ - return result; - } - } - - } - - return FALSE; -} - -static gboolean -rspamd_content_type_is_type (struct rspamd_task * task, - GList * args, - void *unused) -{ - gchar *param_pattern; - struct rspamd_regexp_element *re; - struct expression_argument *arg, *arg1; - GMimeObject *part; - GMimeContentType *ct; - gint r; - gboolean recursive = FALSE, result = FALSE; - GList *cur = NULL; - struct mime_part *cur_part; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; - } - arg = get_function_arg (args->data, task, TRUE); - param_pattern = arg->data; - - - part = g_mime_message_get_mime_part (task->message); - if (part) { - ct = (GMimeContentType *)g_mime_object_get_content_type (part); - if (args->next) { - args = g_list_next (args); - arg1 = get_function_arg (args->data, task, TRUE); - if (g_ascii_strncasecmp (arg1->data, "true", - sizeof ("true") - 1) == 0) { - recursive = TRUE; - } - } - else { - /* - * If user did not specify argument, let's assume that he wants - * recursive search if mime part is multipart/mixed - */ - if (g_mime_content_type_is_type (ct, "multipart", "*")) { - recursive = TRUE; - } - } - - if (recursive) { - cur = task->parts; - } - -#ifndef GMIME24 - g_object_unref (part); -#endif - for (;; ) { - if (*param_pattern == '/') { - re = parse_regexp (task->cfg->cfg_pool, - param_pattern, - task->cfg->raw_mode); - if (re == NULL) { - msg_warn ("cannot compile regexp for function"); - return FALSE; - } - if ((r = task_cache_check (task, re)) == -1) { - if (rspamd_regexp_search (re->regexp, ct->type, 0, - NULL, NULL, FALSE) == TRUE) { - task_cache_add (task, re, 1); - return TRUE; - } - task_cache_add (task, re, 0); - } - else { - - } - } - else { - /* Just do strcasecmp */ - if (g_ascii_strcasecmp (ct->type, param_pattern) == 0) { - return TRUE; - } - } - /* Get next part */ - if (!recursive) { - return result; - } - else if (cur != NULL) { - cur_part = cur->data; - if (cur_part->type != NULL) { - ct = cur_part->type; - } - cur = g_list_next (cur); - } - else { - /* All is done */ - return result; - } - } - - } - - return FALSE; -} - -static gboolean -compare_subtype (struct rspamd_task *task, GMimeContentType * ct, - gchar *subtype) -{ - struct rspamd_regexp_element *re; - gint r; - - if (subtype == NULL || ct == NULL) { - msg_warn ("invalid parameters passed"); - return FALSE; - } - if (*subtype == '/') { - re = parse_regexp (task->cfg->cfg_pool, subtype, - task->cfg->raw_mode); - if (re == NULL) { - msg_warn ("cannot compile regexp for function"); - return FALSE; - } - if ((r = task_cache_check (task, re)) == -1) { - if (rspamd_regexp_search (re->regexp, subtype, 0, - NULL, NULL, FALSE) == TRUE) { - task_cache_add (task, re, 1); - return TRUE; - } - task_cache_add (task, re, 0); - } - else { - return r == 1; - } - } - else { - /* Just do strcasecmp */ - if (ct->subtype && g_ascii_strcasecmp (ct->subtype, subtype) == 0) { - return TRUE; - } - } - - return FALSE; -} - -static gboolean -compare_len (struct mime_part *part, guint min, guint max) -{ - if (min == 0 && max == 0) { - return TRUE; - } - - if (min == 0) { - return part->content->len <= max; - } - else if (max == 0) { - return part->content->len >= min; - } - else { - return part->content->len >= min && part->content->len <= max; - } -} - -static gboolean -common_has_content_part (struct rspamd_task * task, - gchar *param_type, - gchar *param_subtype, - gint min_len, - gint max_len) -{ - struct rspamd_regexp_element *re; - struct mime_part *part; - GList *cur; - GMimeContentType *ct; - gint r; - - cur = g_list_first (task->parts); - while (cur) { - part = cur->data; - ct = part->type; - if (ct == NULL) { - cur = g_list_next (cur); - continue; - } - - if (*param_type == '/') { - re = parse_regexp (task->cfg->cfg_pool, - param_type, - task->cfg->raw_mode); - if (re == NULL) { - msg_warn ("cannot compile regexp for function"); - cur = g_list_next (cur); - continue; - } - if ((r = task_cache_check (task, re)) == -1) { - if (ct->type && - rspamd_regexp_search (re->regexp, ct->type, 0, - NULL, NULL, TRUE)) { - if (param_subtype) { - if (compare_subtype (task, ct, param_subtype)) { - if (compare_len (part, min_len, max_len)) { - return TRUE; - } - } - } - else { - if (compare_len (part, min_len, max_len)) { - return TRUE; - } - } - task_cache_add (task, re, 1); - } - else { - task_cache_add (task, re, 0); - } - } - else { - if (r == 1) { - if (compare_subtype (task, ct, param_subtype)) { - if (compare_len (part, min_len, max_len)) { - return TRUE; - } - } - } - } - } - else { - /* Just do strcasecmp */ - if (ct->type && g_ascii_strcasecmp (ct->type, param_type) == 0) { - if (param_subtype) { - if (compare_subtype (task, ct, param_subtype)) { - if (compare_len (part, min_len, max_len)) { - return TRUE; - } - } - } - else { - if (compare_len (part, min_len, max_len)) { - return TRUE; - } - } - } - } - cur = g_list_next (cur); - } - - return FALSE; -} - -static gboolean -rspamd_has_content_part (struct rspamd_task * task, GList * args, void *unused) -{ - gchar *param_type = NULL, *param_subtype = NULL; - struct expression_argument *arg; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - param_type = arg->data; - args = args->next; - if (args) { - arg = args->data; - param_subtype = arg->data; - } - - return common_has_content_part (task, param_type, param_subtype, 0, 0); -} - -static gboolean -rspamd_has_content_part_len (struct rspamd_task * task, - GList * args, - void *unused) -{ - gchar *param_type = NULL, *param_subtype = NULL; - gint min = 0, max = 0; - struct expression_argument *arg; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - param_type = arg->data; - args = args->next; - if (args) { - arg = get_function_arg (args->data, task, TRUE); - param_subtype = arg->data; - args = args->next; - if (args) { - arg = get_function_arg (args->data, task, TRUE); - errno = 0; - min = strtoul (arg->data, NULL, 10); - if (errno != 0) { - msg_warn ("invalid numeric value '%s': %s", - (gchar *)arg->data, - strerror (errno)); - return FALSE; - } - args = args->next; - if (args) { - arg = get_function_arg (args->data, task, TRUE); - max = strtoul (arg->data, NULL, 10); - if (errno != 0) { - msg_warn ("invalid numeric value '%s': %s", - (gchar *)arg->data, - strerror (errno)); - return FALSE; - } - } - } - } - - return common_has_content_part (task, param_type, param_subtype, min, max); } diff --git a/src/plugins/spf.c b/src/plugins/spf.c index 908e097ab..14c9e0b42 100644 --- a/src/plugins/spf.c +++ b/src/plugins/spf.c @@ -34,7 +34,6 @@ #include "config.h" #include "libmime/message.h" -#include "libmime/expressions.h" #include "libserver/spf.h" #include "libutil/hash.h" #include "libutil/map.h" diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 09e99dec3..0401ff932 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -43,7 +43,6 @@ #include "config.h" #include "libmime/message.h" -#include "libmime/expressions.h" #include "libutil/hash.h" #include "libutil/map.h" #include "main.h" |