diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-04-21 16:25:51 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-04-21 16:25:51 +0100 |
commit | 61555065f3d1c8badcc9573691232f1b6e42988c (patch) | |
tree | 563d5b7cb8c468530f7e79c4da0a75267b1184e1 /src/libmime/expressions.c | |
parent | ad5bf825b7f33bc10311673991f0cc888e69c0b1 (diff) | |
download | rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.tar.gz rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.zip |
Rework project structure, remove trash files.
Diffstat (limited to 'src/libmime/expressions.c')
-rw-r--r-- | src/libmime/expressions.c | 1452 |
1 files changed, 1452 insertions, 0 deletions
diff --git a/src/libmime/expressions.c b/src/libmime/expressions.c new file mode 100644 index 000000000..5d19626bb --- /dev/null +++ b/src/libmime/expressions.c @@ -0,0 +1,1452 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "util.h" +#include "cfg_file.h" +#include "main.h" +#include "message.h" +#include "fuzzy.h" +#include "expressions.h" +#include "html.h" +#include "lua/lua_common.h" +#include "diff.h" + +gboolean rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_header_exists (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_parts_distance (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_recipients_distance (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_has_only_html_part (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_is_recipients_sorted (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_is_html_balanced (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_has_html_tag (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_has_fake_html (struct rspamd_task *task, GList * args, void *unused); + +/* + * List of internal functions of rspamd + * Sorted by name to use bsearch + */ +static struct _fl { + const gchar *name; + rspamd_internal_func_t func; + void *user_data; +} rspamd_functions_list[] = { + {"compare_encoding", rspamd_compare_encoding, NULL}, + {"compare_parts_distance", rspamd_parts_distance, NULL}, + {"compare_recipients_distance", rspamd_recipients_distance, NULL}, + {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL}, + {"has_fake_html", rspamd_has_fake_html, NULL}, + {"has_html_tag", rspamd_has_html_tag, NULL}, + {"has_only_html_part", rspamd_has_only_html_part, NULL}, + {"header_exists", rspamd_header_exists, NULL}, + {"is_html_balanced", rspamd_is_html_balanced, NULL}, + {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL} +}; + +static struct _fl *list_ptr = &rspamd_functions_list[0]; +static guint32 functions_number = sizeof (rspamd_functions_list) / sizeof (struct _fl); +static gboolean list_allocated = FALSE; + +/* Bsearch routine */ +static gint +fl_cmp (const void *s1, const void *s2) +{ + struct _fl *fl1 = (struct _fl *)s1; + struct _fl *fl2 = (struct _fl *)s2; + return strcmp (fl1->name, fl2->name); +} + +/* Cache for regular expressions that are used in functions */ +void * +re_cache_check (const gchar *line, rspamd_mempool_t *pool) +{ + GHashTable *re_cache; + + re_cache = rspamd_mempool_get_variable (pool, "re_cache"); + + if (re_cache == NULL) { + re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + rspamd_mempool_set_variable (pool, "re_cache", re_cache, (rspamd_mempool_destruct_t)g_hash_table_destroy); + return NULL; + } + return g_hash_table_lookup (re_cache, line); +} + +void +re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool) +{ + GHashTable *re_cache; + + re_cache = rspamd_mempool_get_variable (pool, "re_cache"); + + if (re_cache == NULL) { + re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + rspamd_mempool_set_variable (pool, "re_cache", re_cache, (rspamd_mempool_destruct_t)g_hash_table_destroy); + } + + g_hash_table_insert (re_cache, (gpointer)line, pointer); +} + +void +re_cache_del (const gchar *line, rspamd_mempool_t *pool) +{ + GHashTable *re_cache; + + re_cache = rspamd_mempool_get_variable (pool, "re_cache"); + + if (re_cache != NULL) { + g_hash_table_remove (re_cache, line); + } + +} + +/* + * Functions for parsing expressions + */ +struct expression_stack { + gchar op; + struct expression_stack *next; +}; + +/* + * Push operand or operator to stack + */ +static struct expression_stack * +push_expression_stack (rspamd_mempool_t * pool, struct expression_stack *head, gchar op) +{ + struct expression_stack *new; + new = rspamd_mempool_alloc (pool, sizeof (struct expression_stack)); + new->op = op; + new->next = head; + return new; +} + +/* + * Delete symbol from stack, return pointer to operand or operator (casted to void* ) + */ +static gchar +delete_expression_stack (struct expression_stack **head) +{ + struct expression_stack *cur; + gchar res; + + if (*head == NULL) + return 0; + + cur = *head; + res = cur->op; + + *head = cur->next; + return res; +} + +/* + * Return operation priority + */ +static gint +logic_priority (gchar a) +{ + switch (a) { + case '!': + return 3; + case '|': + case '&': + return 2; + case '(': + return 1; + default: + return 0; + } +} + +/* + * Return FALSE if symbol is not operation symbol (operand) + * Return TRUE if symbol is operation symbol + */ +static gboolean +is_operation_symbol (gchar *a) +{ + switch (*a) { + case '!': + case '&': + case '|': + case '(': + case ')': + return TRUE; + case 'O': + case 'o': + if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0&& g_ascii_isspace (a[2])) { + return TRUE; + } + break; + case 'A': + case 'a': + if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0&& g_ascii_isspace (a[3])) { + return TRUE; + } + break; + case 'N': + case 'n': + if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0 && g_ascii_isspace (a[3])) { + return TRUE; + } + break; + } + + return FALSE; +} + +/* Return character representation of operation */ +static gchar +op_to_char (gchar *a, gchar **next) +{ + switch (*a) { + case '!': + case '&': + case '|': + case '(': + case ')': + *next = a + 1; + return *a; + case 'O': + case 'o': + if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) { + *next = a + sizeof ("or") - 1; + return '|'; + } + break; + case 'A': + case 'a': + if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) { + *next = a + sizeof ("and") - 1; + return '&'; + } + break; + case 'N': + case 'n': + if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) { + *next = a + sizeof ("not") - 1; + return '!'; + } + break; + } + + return '\0'; +} + +/* + * Return TRUE if symbol can be regexp flag + */ +static gboolean +is_regexp_flag (gchar a) +{ + switch (a) { + case 'i': + case 'm': + case 'x': + case 's': + case 'u': + case 'o': + case 'r': + case 'H': + case 'M': + case 'P': + case 'U': + case 'X': + case 'T': + case 'S': + return TRUE; + default: + return FALSE; + } +} + +static void +insert_expression (rspamd_mempool_t * pool, struct expression **head, gint type, gchar op, void *operand, const gchar *orig) +{ + struct expression *new, *cur; + + new = rspamd_mempool_alloc (pool, sizeof (struct expression)); + new->type = type; + new->orig = orig; + if (new->type != EXPR_OPERATION) { + new->content.operand = operand; + } + else { + new->content.operation = op; + } + new->next = NULL; + + if (!*head) { + *head = new; + } + else { + cur = *head; + while (cur->next) { + cur = cur->next; + } + cur->next = new; + } +} + +static struct expression * +maybe_parse_expression (rspamd_mempool_t * pool, gchar *line) +{ + struct expression *expr; + gchar *p = line; + + while (*p) { + if (is_operation_symbol (p)) { + return parse_expression (pool, line); + } + p++; + } + + expr = rspamd_mempool_alloc (pool, sizeof (struct expression)); + expr->type = EXPR_STR; + expr->content.operand = rspamd_mempool_strdup (pool, line); + expr->next = NULL; + + return expr; +} + +/* + * Make inverse polish record for specified expression + * Memory is allocated from given pool + */ +struct expression * +parse_expression (rspamd_mempool_t * pool, gchar *line) +{ + struct expression *expr = NULL; + struct expression_stack *stack = NULL; + struct expression_function *func = NULL; + struct expression *arg; + GQueue *function_stack; + gchar *p, *c, *str, op, newop, *copy, *next; + gboolean in_regexp = FALSE; + gint brackets = 0; + + enum { + SKIP_SPACES, + READ_OPERATOR, + READ_REGEXP, + READ_REGEXP_FLAGS, + READ_FUNCTION, + READ_FUNCTION_ARGUMENT, + } state = SKIP_SPACES; + + if (line == NULL || pool == NULL) { + return NULL; + } + + msg_debug ("parsing expression {{ %s }}", line); + + function_stack = g_queue_new (); + copy = rspamd_mempool_strdup (pool, line); + p = line; + c = p; + while (*p) { + switch (state) { + case SKIP_SPACES: + if (!g_ascii_isspace (*p)) { + if (is_operation_symbol (p)) { + state = READ_OPERATOR; + } + else if (*p == '/') { + c = ++p; + state = READ_REGEXP; + } + else { + c = p; + state = READ_FUNCTION; + } + } + else { + p++; + } + break; + case READ_OPERATOR: + if (*p == ')') { + if (stack == NULL) { + return NULL; + } + /* Pop all operators from stack to nearest '(' or to head */ + while (stack && stack->op != '(') { + op = delete_expression_stack (&stack); + if (op != '(') { + insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy); + } + } + if (stack) { + op = delete_expression_stack (&stack); + } + } + else if (*p == '(') { + /* Push it to stack */ + stack = push_expression_stack (pool, stack, *p); + } + else { + if (stack == NULL) { + newop = op_to_char (p, &next); + if (newop != '\0') { + stack = push_expression_stack (pool, stack, newop); + p = next; + state = SKIP_SPACES; + continue; + } + } + /* Check priority of logic operation */ + else { + newop = op_to_char (p, &next); + if (newop != '\0') { + if (logic_priority (stack->op) < logic_priority (newop)) { + stack = push_expression_stack (pool, stack, newop); + } + else { + /* Pop all operations that have higher priority than this one */ + while ((stack != NULL) && (logic_priority (stack->op) >= logic_priority (newop))) { + op = delete_expression_stack (&stack); + if (op != '(') { + insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy); + } + } + stack = push_expression_stack (pool, stack, newop); + } + } + p = next; + state = SKIP_SPACES; + continue; + } + } + p++; + state = SKIP_SPACES; + break; + + case READ_REGEXP: + if (*p == '/' && *(p - 1) != '\\') { + if (*(p + 1)) { + p++; + } + state = READ_REGEXP_FLAGS; + } + else { + p++; + } + break; + + case READ_REGEXP_FLAGS: + if (!is_regexp_flag (*p) || *(p + 1) == '\0') { + if (c != p) { + if ((is_regexp_flag (*p) || *p == '/') && *(p + 1) == '\0') { + p++; + } + str = rspamd_mempool_alloc (pool, p - c + 2); + rspamd_strlcpy (str, c - 1, (p - c + 2)); + g_strstrip (str); + msg_debug ("found regexp: %s", str); + if (strlen (str) > 0) { + insert_expression (pool, &expr, EXPR_REGEXP, 0, str, copy); + } + } + c = p; + state = SKIP_SPACES; + } + else { + p++; + } + break; + + case READ_FUNCTION: + if (*p == '/') { + /* In fact it is regexp */ + state = READ_REGEXP; + c++; + p++; + } + else if (*p == '(') { + func = rspamd_mempool_alloc (pool, sizeof (struct expression_function)); + func->name = rspamd_mempool_alloc (pool, p - c + 1); + func->args = NULL; + rspamd_strlcpy (func->name, c, (p - c + 1)); + g_strstrip (func->name); + state = READ_FUNCTION_ARGUMENT; + g_queue_push_tail (function_stack, func); + insert_expression (pool, &expr, EXPR_FUNCTION, 0, func, copy); + c = ++p; + } + else if (is_operation_symbol (p)) { + /* In fact it is not function, but symbol */ + if (c != p) { + str = rspamd_mempool_alloc (pool, p - c + 1); + rspamd_strlcpy (str, c, (p - c + 1)); + g_strstrip (str); + if (strlen (str) > 0) { + insert_expression (pool, &expr, EXPR_STR, 0, str, copy); + } + } + state = READ_OPERATOR; + } + else if (*(p + 1) == '\0') { + /* In fact it is not function, but symbol */ + p++; + if (c != p) { + str = rspamd_mempool_alloc (pool, p - c + 1); + rspamd_strlcpy (str, c, (p - c + 1)); + g_strstrip (str); + if (strlen (str) > 0) { + insert_expression (pool, &expr, EXPR_STR, 0, str, copy); + } + } + state = SKIP_SPACES; + } + else { + p++; + } + break; + + case READ_FUNCTION_ARGUMENT: + if (*p == '/' && !in_regexp) { + in_regexp = TRUE; + p++; + } + if (!in_regexp) { + /* Append argument to list */ + if (*p == ',' || (*p == ')' && brackets == 0)) { + arg = NULL; + str = rspamd_mempool_alloc (pool, p - c + 1); + rspamd_strlcpy (str, c, (p - c + 1)); + g_strstrip (str); + /* Recursive call */ + arg = maybe_parse_expression (pool, str); + func->args = g_list_append (func->args, arg); + /* Pop function */ + if (*p == ')') { + /* Last function in chain, goto skipping spaces state */ + func = g_queue_pop_tail (function_stack); + if (g_queue_get_length (function_stack) == 0) { + state = SKIP_SPACES; + } + } + c = p + 1; + } + else if (*p == '(') { + brackets++; + } + else if (*p == ')') { + brackets--; + } + } + else if (*p == '/' && *(p - 1) != '\\') { + in_regexp = FALSE; + } + p++; + break; + } + } + + g_queue_free (function_stack); + if (state != SKIP_SPACES) { + /* In fact we got bad expression */ + msg_warn ("expression \"%s\" is invalid", line); + return NULL; + } + /* Pop everything from stack */ + while (stack != NULL) { + op = delete_expression_stack (&stack); + if (op != '(') { + insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy); + } + } + + return expr; +} + +/* + * Rspamd regexp utility functions + */ +struct rspamd_regexp * +parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode) +{ + const gchar *begin, *end, *p, *src, *start; + gchar *dbegin, *dend; + struct rspamd_regexp *result, *check; + gint regexp_flags = G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE; + GError *err = NULL; + + if (line == NULL) { + msg_err ("cannot parse NULL line"); + return NULL; + } + + src = line; + result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp)); + /* Skip whitespaces */ + while (g_ascii_isspace (*line)) { + line++; + } + if (*line == '\0') { + msg_warn ("got empty regexp"); + return NULL; + } + start = line; + /* First try to find header name */ + begin = strchr (line, '/'); + if (begin != NULL) { + p = begin; + end = NULL; + while (p != line) { + if (*p == '=') { + end = p; + break; + } + p --; + } + if (end) { + result->header = rspamd_mempool_alloc (pool, end - line + 1); + rspamd_strlcpy (result->header, line, end - line + 1); + result->type = REGEXP_HEADER; + line = end; + } + } + else { + result->header = rspamd_mempool_strdup (pool, line); + result->type = REGEXP_HEADER; + line = start; + } + /* Find begin of regexp */ + while (*line && *line != '/') { + line++; + } + if (*line != '\0') { + begin = line + 1; + } + else if (result->header == NULL) { + /* Assume that line without // is just a header name */ + result->header = rspamd_mempool_strdup (pool, line); + result->type = REGEXP_HEADER; + return result; + } + else { + /* We got header name earlier but have not found // expression, so it is invalid regexp */ + msg_warn ("got no header name (eg. header=) but without corresponding regexp, %s", src); + return NULL; + } + /* Find end */ + end = begin; + while (*end && (*end != '/' || *(end - 1) == '\\')) { + end++; + } + if (end == begin || *end != '/') { + msg_warn ("no trailing / in regexp %s", src); + return NULL; + } + /* Parse flags */ + p = end + 1; + while (p != NULL) { + switch (*p) { + case 'i': + regexp_flags |= G_REGEX_CASELESS; + p++; + break; + case 'm': + regexp_flags |= G_REGEX_MULTILINE; + p++; + break; + case 's': + regexp_flags |= G_REGEX_DOTALL; + p++; + break; + case 'x': + regexp_flags |= G_REGEX_EXTENDED; + p++; + break; + case 'u': + regexp_flags |= G_REGEX_UNGREEDY; + p++; + break; + case 'o': + regexp_flags |= G_REGEX_OPTIMIZE; + p++; + break; + case 'r': + regexp_flags |= G_REGEX_RAW; + result->is_raw = TRUE; + p++; + break; + /* Type flags */ + case 'H': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_HEADER; + } + p++; + break; + case 'M': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_MESSAGE; + } + p++; + break; + case 'P': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_MIME; + } + p++; + break; + case 'U': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_URL; + } + p++; + break; + case 'X': + if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) { + result->type = REGEXP_RAW_HEADER; + } + p++; + break; + case 'T': + result->is_test = TRUE; + p ++; + break; + case 'S': + result->is_strong = TRUE; + p ++; + break; + /* Stop flags parsing */ + default: + p = NULL; + break; + } + } + + result->regexp_text = rspamd_mempool_strdup (pool, start); + dbegin = result->regexp_text + (begin - start); + dend = result->regexp_text + (end - start); + *dend = '\0'; + + if (raw_mode) { + regexp_flags |= G_REGEX_RAW; + } + + /* Avoid multiply regexp structures for similar regexps */ + if ((check = (struct rspamd_regexp *)re_cache_check (result->regexp_text, pool)) != NULL) { + /* Additional check for headers */ + if (result->type == REGEXP_HEADER || result->type == REGEXP_RAW_HEADER) { + if (result->header && check->header) { + if (strcmp (result->header, check->header) == 0) { + return check; + } + } + } + else { + return check; + } + } + result->regexp = g_regex_new (dbegin, regexp_flags, 0, &err); + if ((regexp_flags & G_REGEX_RAW) != 0) { + result->raw_regexp = result->regexp; + } + else { + result->raw_regexp = g_regex_new (dbegin, regexp_flags | G_REGEX_RAW, 0, &err); + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_regex_unref, (void *)result->raw_regexp); + } + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_regex_unref, (void *)result->regexp); + + *dend = '/'; + + if (result->regexp == NULL || err != NULL) { + msg_warn ("could not read regexp: %s while reading regexp %s", err->message, src); + return NULL; + } + + if (result->raw_regexp == NULL || err != NULL) { + msg_warn ("could not read raw regexp: %s while reading regexp %s", err->message, src); + return NULL; + } + + /* Add to cache for further usage */ + re_cache_add (result->regexp_text, result, pool); + return result; +} + +gboolean +call_expression_function (struct expression_function * func, struct rspamd_task * task, lua_State *L) +{ + struct _fl *selected, key; + + key.name = func->name; + + selected = bsearch (&key, list_ptr, functions_number, sizeof (struct _fl), fl_cmp); + if (selected == NULL) { + /* Try to check lua function */ + return FALSE; + } + + return selected->func (task, func->args, selected->user_data); +} + +struct expression_argument * +get_function_arg (struct expression *expr, struct rspamd_task *task, gboolean want_string) +{ + GQueue *stack; + gsize cur, op1, op2; + struct expression_argument *res; + struct expression *it; + + if (expr == NULL) { + msg_warn ("NULL expression passed"); + return NULL; + } + if (expr->next == NULL) { + res = rspamd_mempool_alloc (task->task_pool, sizeof (struct expression_argument)); + if (expr->type == EXPR_REGEXP || expr->type == EXPR_STR || expr->type == EXPR_REGEXP_PARSED) { + res->type = EXPRESSION_ARGUMENT_NORMAL; + res->data = expr->content.operand; + } + else if (expr->type == EXPR_FUNCTION && !want_string) { + res->type = EXPRESSION_ARGUMENT_BOOL; + cur = call_expression_function (expr->content.operand, task, NULL); + res->data = GSIZE_TO_POINTER (cur); + } + else { + msg_warn ("cannot parse argument: it contains operator or bool expression that is not wanted"); + return NULL; + } + return res; + } + else if (!want_string) { + res = rspamd_mempool_alloc (task->task_pool, sizeof (struct expression_argument)); + res->type = EXPRESSION_ARGUMENT_BOOL; + stack = g_queue_new (); + it = expr; + + while (it) { + if (it->type == EXPR_REGEXP || it->type == EXPR_REGEXP_PARSED || it->type == EXPR_STR) { + g_queue_free (stack); + res->type = EXPRESSION_ARGUMENT_EXPR; + res->data = expr; + return res; + } + else if (it->type == EXPR_FUNCTION) { + cur = (gsize) call_expression_function ((struct expression_function *)it->content.operand, task, NULL); + debug_task ("function %s returned %s", ((struct expression_function *)it->content.operand)->name, cur ? "true" : "false"); + } + else if (it->type == EXPR_OPERATION) { + if (g_queue_is_empty (stack)) { + /* Queue has no operands for operation, exiting */ + debug_task ("invalid expression"); + g_queue_free (stack); + return NULL; + } + switch (it->content.operation) { + case '!': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op1 = !op1; + g_queue_push_head (stack, GSIZE_TO_POINTER (op1)); + break; + case '&': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); + break; + case '|': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); + break; + default: + it = it->next; + continue; + } + } + if (it) { + it = it->next; + } + } + if (!g_queue_is_empty (stack)) { + res->data = g_queue_pop_head (stack); + } + else { + res->data = GSIZE_TO_POINTER (FALSE); + } + + return res; + } + + msg_warn ("invalid expression argument"); + + return NULL; +} + +void +register_expression_function (const gchar *name, rspamd_internal_func_t func, void *user_data) +{ + static struct _fl *new; + + functions_number++; + + new = g_new (struct _fl, functions_number); + memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl)); + if (list_allocated) { + g_free (list_ptr); + } + + list_allocated = TRUE; + new[functions_number - 1].name = name; + new[functions_number - 1].func = func; + new[functions_number - 1].user_data = user_data; + qsort (new, functions_number, sizeof (struct _fl), fl_cmp); + list_ptr = new; +} + +gboolean +rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused) +{ + struct expression_argument *arg; + + if (args == NULL || task == NULL) { + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); + if (arg->type == EXPRESSION_ARGUMENT_BOOL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + + /* XXX: really write this function */ + return TRUE; +} + +gboolean +rspamd_header_exists (struct rspamd_task * task, GList * args, void *unused) +{ + struct expression_argument *arg; + GList *headerlist; + + if (args == NULL || task == NULL) { + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); + if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + + debug_task ("try to get header %s", (gchar *)arg->data); + headerlist = message_get_header (task->task_pool, task->message, (gchar *)arg->data, FALSE); + if (headerlist) { + g_list_free (headerlist); + return TRUE; + } + return FALSE; +} + +/* + * This function is designed to find difference between text/html and text/plain parts + * It takes one argument: difference threshold, if we have two text parts, compare + * its hashes and check for threshold, if value is greater than threshold, return TRUE + * and return FALSE otherwise. + */ +gboolean +rspamd_parts_distance (struct rspamd_task * task, GList * args, void *unused) +{ + gint threshold, threshold2 = -1, diff; + struct mime_text_part *p1, *p2; + GList *cur; + struct expression_argument *arg; + GMimeObject *parent; + const GMimeContentType *ct; + gint *pdiff; + + if (args == NULL) { + debug_task ("no threshold is specified, assume it 100"); + threshold = 100; + } + else { + errno = 0; + arg = get_function_arg (args->data, task, TRUE); + threshold = strtoul ((gchar *)arg->data, NULL, 10); + if (errno != 0) { + msg_info ("bad numeric value for threshold \"%s\", assume it 100", (gchar *)args->data); + threshold = 100; + } + if (args->next) { + arg = get_function_arg (args->next->data, task, TRUE); + errno = 0; + threshold2 = strtoul ((gchar *)arg->data, NULL, 10); + if (errno != 0) { + msg_info ("bad numeric value for threshold \"%s\", ignore it", (gchar *)arg->data); + threshold2 = -1; + } + } + } + + if ((pdiff = rspamd_mempool_get_variable (task->task_pool, "parts_distance")) != NULL) { + diff = *pdiff; + if (diff != -1) { + if (threshold2 > 0) { + if (diff >= MIN (threshold, threshold2) && diff < MAX (threshold, threshold2)) { + return TRUE; + } + } + else { + if (diff <= threshold) { + return TRUE; + } + } + return FALSE; + } + else { + return FALSE; + } + } + + if (g_list_length (task->text_parts) == 2) { + cur = g_list_first (task->text_parts); + p1 = cur->data; + cur = g_list_next (cur); + pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint)); + *pdiff = -1; + + if (cur == NULL) { + msg_info ("bad parts list"); + return FALSE; + } + p2 = cur->data; + /* First of all check parent object */ + if (p1->parent && p1->parent == p2->parent) { + parent = p1->parent; + ct = g_mime_object_get_content_type (parent); +#ifndef GMIME24 + if (ct == NULL || ! g_mime_content_type_is_type (ct, "multipart", "alternative")) { +#else + if (ct == NULL || ! g_mime_content_type_is_type ((GMimeContentType *)ct, "multipart", "alternative")) { +#endif + debug_task ("two parts are not belong to multipart/alternative container, skip check"); + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return FALSE; + } + } + else { + debug_task ("message contains two parts but they are in different multi-parts"); + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return FALSE; + } + if (!p1->is_empty && !p2->is_empty) { + if (p1->diff_str != NULL && p2->diff_str != NULL) { + diff = compare_diff_distance_normalized (p1->diff_str, p2->diff_str); + } + else { + diff = fuzzy_compare_parts (p1, p2); + } + debug_task ("got likeliness between parts of %d%%, threshold is %d%%", diff, threshold); + *pdiff = diff; + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + if (threshold2 > 0) { + if (diff >= MIN (threshold, threshold2) && diff < MAX (threshold, threshold2)) { + return TRUE; + } + } + else { + if (diff <= threshold) { + return TRUE; + } + } + } + else if ((p1->is_empty && !p2->is_empty) || (!p1->is_empty && p2->is_empty)) { + /* Empty and non empty parts are different */ + *pdiff = 0; + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return TRUE; + } + } + else { + debug_task ("message has too many text parts, so do not try to compare them with each other"); + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return FALSE; + } + + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return FALSE; +} + +struct addr_list { + const gchar *name; + const gchar *addr; +}; + +#define COMPARE_RCPT_LEN 3 +#define MIN_RCPT_TO_COMPARE 7 + +gboolean +rspamd_recipients_distance (struct rspamd_task *task, GList * args, void *unused) +{ + struct expression_argument *arg; + InternetAddressList *cur; + InternetAddress *addr; + double threshold; + struct addr_list *ar; + gchar *c; + gint num, i, j, hits = 0, total = 0; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); + errno = 0; + threshold = strtod ((gchar *)arg->data, NULL); + if (errno != 0) { + msg_warn ("invalid numeric value '%s': %s", (gchar *)arg->data, strerror (errno)); + return FALSE; + } + + if (!task->rcpts) { + return FALSE; + } + num = internet_address_list_length (task->rcpts); + if (num < MIN_RCPT_TO_COMPARE) { + return FALSE; + } + ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list)); + + /* Fill array */ + cur = task->rcpts; +#ifdef GMIME24 + for (i = 0; i < num; i ++) { + addr = internet_address_list_get_address (cur, i); + ar[i].name = rspamd_mempool_strdup (task->task_pool, internet_address_get_name (addr)); + if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) { + *c = '\0'; + ar[i].addr = c + 1; + } + } +#else + i = 0; + while (cur) { + addr = internet_address_list_get_address (cur); + if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) { + ar[i].name = rspamd_mempool_strdup (task->task_pool, internet_address_get_addr (addr)); + if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) { + *c = '\0'; + ar[i].addr = c + 1; + } + cur = internet_address_list_next (cur); + i++; + } + else { + cur = internet_address_list_next (cur); + } + } +#endif + + /* Cycle all elements in array */ + for (i = 0; i < num; i++) { + for (j = i + 1; j < num; j++) { + if (ar[i].name && ar[j].name && g_ascii_strncasecmp (ar[i].name, ar[j].name, COMPARE_RCPT_LEN) == 0) { + /* Common name part */ + hits++; + } + else if (ar[i].addr && ar[j].addr && g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) { + /* Common address part, but different name */ + hits++; + } + total++; + } + } + + if ((double)(hits * num / 2.) / (double)total >= threshold) { + return TRUE; + } + + return FALSE; +} + +gboolean +rspamd_has_only_html_part (struct rspamd_task * task, GList * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + gboolean res = FALSE; + + cur = g_list_first (task->text_parts); + while (cur) { + p = cur->data; + if (p->is_html) { + res = TRUE; + } + else { + res = FALSE; + break; + } + cur = g_list_next (cur); + } + + return res; +} + +static gboolean +is_recipient_list_sorted (const InternetAddressList * ia) +{ + const InternetAddressList *cur; + InternetAddress *addr; + gboolean res = TRUE; + struct addr_list current = { NULL, NULL }, previous = { + NULL, NULL}; +#ifdef GMIME24 + gint num, i; +#endif + + /* Do not check to short address lists */ + if (internet_address_list_length ((InternetAddressList *)ia) < MIN_RCPT_TO_COMPARE) { + return FALSE; + } +#ifdef GMIME24 + num = internet_address_list_length ((InternetAddressList *)ia); + cur = ia; + for (i = 0; i < num; i ++) { + addr = internet_address_list_get_address ((InternetAddressList *)cur, i); + current.addr = (gchar *)internet_address_get_name (addr); + if (previous.addr != NULL) { + if (current.addr && g_ascii_strcasecmp (current.addr, previous.addr) < 0) { + res = FALSE; + break; + } + } + previous.addr = current.addr; + } +#else + cur = ia; + while (cur) { + addr = internet_address_list_get_address (cur); + if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) { + current.addr = internet_address_get_addr (addr); + if (previous.addr != NULL) { + if (current.addr && g_ascii_strcasecmp (current.addr, previous.addr) < 0) { + res = FALSE; + break; + } + } + previous.addr = current.addr; + } + cur = internet_address_list_next (cur); + } +#endif + + return res; +} + +gboolean +rspamd_is_recipients_sorted (struct rspamd_task * task, GList * args, void *unused) +{ + /* Check all types of addresses */ + if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_TO)) == TRUE) { + return TRUE; + } + if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_BCC)) == TRUE) { + return TRUE; + } + if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_CC)) == TRUE) { + return TRUE; + } + + return FALSE; +} + +gboolean +rspamd_compare_transfer_encoding (struct rspamd_task * task, GList * args, void *unused) +{ + GMimeObject *part; +#ifndef GMIME24 + GMimePartEncodingType enc_req, part_enc; +#else + GMimeContentEncoding enc_req, part_enc; +#endif + struct expression_argument *arg; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); +#ifndef GMIME24 + enc_req = g_mime_part_encoding_from_string (arg->data); + if (enc_req == GMIME_PART_ENCODING_DEFAULT) { +#else + enc_req = g_mime_content_encoding_from_string (arg->data); + if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) { +#endif + msg_warn ("bad encoding type: %s", (gchar *)arg->data); + return FALSE; + } + + part = g_mime_message_get_mime_part (task->message); + if (part) { + if (GMIME_IS_PART (part)) { +#ifndef GMIME24 + part_enc = g_mime_part_get_encoding (GMIME_PART (part)); + if (part_enc == GMIME_PART_ENCODING_DEFAULT) { + /* Assume 7bit as default transfer encoding */ + part_enc = GMIME_PART_ENCODING_7BIT; + } +#else + part_enc = g_mime_part_get_content_encoding (GMIME_PART (part)); + if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) { + /* Assume 7bit as default transfer encoding */ + part_enc = GMIME_CONTENT_ENCODING_7BIT; + } +#endif + + + debug_task ("got encoding in part: %d and compare with %d", (gint)part_enc, (gint)enc_req); +#ifndef GMIME24 + g_object_unref (part); +#endif + + return part_enc == enc_req; + } +#ifndef GMIME24 + g_object_unref (part); +#endif + } + + return FALSE; +} + +gboolean +rspamd_is_html_balanced (struct rspamd_task * task, GList * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + gboolean res = TRUE; + + cur = g_list_first (task->text_parts); + while (cur) { + p = cur->data; + if (!p->is_empty && p->is_html) { + if (p->is_balanced) { + res = TRUE; + } + else { + res = FALSE; + break; + } + } + cur = g_list_next (cur); + } + + return res; + +} + +struct html_callback_data { + struct html_tag *tag; + gboolean *res; +}; + +static gboolean +search_html_node_callback (GNode * node, gpointer data) +{ + struct html_callback_data *cd = data; + struct html_node *nd; + + nd = node->data; + if (nd) { + if (nd->tag == cd->tag) { + *cd->res = TRUE; + return TRUE; + } + } + + return FALSE; +} + +gboolean +rspamd_has_html_tag (struct rspamd_task * task, GList * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + struct expression_argument *arg; + struct html_tag *tag; + gboolean res = FALSE; + struct html_callback_data cd; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); + tag = get_tag_by_name (arg->data); + if (tag == NULL) { + msg_warn ("unknown tag type passed as argument: %s", (gchar *)arg->data); + return FALSE; + } + + cur = g_list_first (task->text_parts); + cd.res = &res; + cd.tag = tag; + + while (cur && res == FALSE) { + p = cur->data; + if (!p->is_empty && p->is_html && p->html_nodes) { + g_node_traverse (p->html_nodes, G_PRE_ORDER, G_TRAVERSE_ALL, -1, search_html_node_callback, &cd); + } + cur = g_list_next (cur); + } + + return res; + +} + +gboolean +rspamd_has_fake_html (struct rspamd_task * task, GList * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + gboolean res = FALSE; + + cur = g_list_first (task->text_parts); + + while (cur && res == FALSE) { + p = cur->data; + if (!p->is_empty && p->is_html && p->html_nodes == NULL) { + res = TRUE; + } + cur = g_list_next (cur); + } + + return res; + +} + + +/* + * vi:ts=4 + */ |