+++ /dev/null
-/*
- * Copyright (c) 2009-2012, Vsevolod Stakhov
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "util.h"
-#include "cfg_file.h"
-#include "main.h"
-#include "message.h"
-#include "fuzzy.h"
-#include "expressions.h"
-#include "html.h"
-#include "lua/lua_common.h"
-#include "diff.h"
-
-gboolean rspamd_compare_encoding (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_header_exists (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_parts_distance (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_recipients_distance (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_has_only_html_part (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_is_html_balanced (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_has_html_tag (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_has_fake_html (struct rspamd_task *task,
- GList * args,
- void *unused);
-
-/*
- * List of internal functions of rspamd
- * Sorted by name to use bsearch
- */
-static struct _fl {
- const gchar *name;
- rspamd_internal_func_t func;
- void *user_data;
-} rspamd_functions_list[] = {
- {"compare_encoding", rspamd_compare_encoding, NULL},
- {"compare_parts_distance", rspamd_parts_distance, NULL},
- {"compare_recipients_distance", rspamd_recipients_distance, NULL},
- {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
- {"has_fake_html", rspamd_has_fake_html, NULL},
- {"has_html_tag", rspamd_has_html_tag, NULL},
- {"has_only_html_part", rspamd_has_only_html_part, NULL},
- {"header_exists", rspamd_header_exists, NULL},
- {"is_html_balanced", rspamd_is_html_balanced, NULL},
- {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL}
-};
-
-static struct _fl *list_ptr = &rspamd_functions_list[0];
-static guint32 functions_number = sizeof (rspamd_functions_list) /
- sizeof (struct _fl);
-static gboolean list_allocated = FALSE;
-
-/* Bsearch routine */
-static gint
-fl_cmp (const void *s1, const void *s2)
-{
- struct _fl *fl1 = (struct _fl *)s1;
- struct _fl *fl2 = (struct _fl *)s2;
- return strcmp (fl1->name, fl2->name);
-}
-
-/* Cache for regular expressions that are used in functions */
-void *
-re_cache_check (const gchar *line, rspamd_mempool_t *pool)
-{
- GHashTable *re_cache;
-
- re_cache = rspamd_mempool_get_variable (pool, "re_cache");
-
- if (re_cache == NULL) {
- re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
- rspamd_mempool_set_variable (pool, "re_cache", re_cache,
- (rspamd_mempool_destruct_t)g_hash_table_destroy);
- return NULL;
- }
- return g_hash_table_lookup (re_cache, line);
-}
-
-void
-re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool)
-{
- GHashTable *re_cache;
-
- re_cache = rspamd_mempool_get_variable (pool, "re_cache");
-
- if (re_cache == NULL) {
- re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
- rspamd_mempool_set_variable (pool, "re_cache", re_cache,
- (rspamd_mempool_destruct_t)g_hash_table_destroy);
- }
-
- g_hash_table_insert (re_cache, (gpointer)line, pointer);
-}
-
-void
-re_cache_del (const gchar *line, rspamd_mempool_t *pool)
-{
- GHashTable *re_cache;
-
- re_cache = rspamd_mempool_get_variable (pool, "re_cache");
-
- if (re_cache != NULL) {
- g_hash_table_remove (re_cache, line);
- }
-
-}
-
-/*
- * Functions for parsing expressions
- */
-struct expression_stack {
- gchar op;
- struct expression_stack *next;
-};
-
-/*
- * Push operand or operator to stack
- */
-static struct expression_stack *
-push_expression_stack (rspamd_mempool_t * pool,
- struct expression_stack *head,
- gchar op)
-{
- struct expression_stack *new;
- new = rspamd_mempool_alloc (pool, sizeof (struct expression_stack));
- new->op = op;
- new->next = head;
- return new;
-}
-
-/*
- * Delete symbol from stack, return pointer to operand or operator (casted to void* )
- */
-static gchar
-delete_expression_stack (struct expression_stack **head)
-{
- struct expression_stack *cur;
- gchar res;
-
- if (*head == NULL)
- return 0;
-
- cur = *head;
- res = cur->op;
-
- *head = cur->next;
- return res;
-}
-
-/*
- * Return operation priority
- */
-static gint
-logic_priority (gchar a)
-{
- switch (a) {
- case '!':
- return 3;
- case '|':
- case '&':
- return 2;
- case '(':
- return 1;
- default:
- return 0;
- }
-}
-
-/*
- * Return FALSE if symbol is not operation symbol (operand)
- * Return TRUE if symbol is operation symbol
- */
-static gboolean
-is_operation_symbol (gchar *a)
-{
- switch (*a) {
- case '!':
- case '&':
- case '|':
- case '(':
- case ')':
- return TRUE;
- case 'O':
- case 'o':
- if (g_ascii_strncasecmp (a, "or",
- sizeof ("or") - 1) == 0 && g_ascii_isspace (a[2])) {
- return TRUE;
- }
- break;
- case 'A':
- case 'a':
- if (g_ascii_strncasecmp (a, "and",
- sizeof ("and") - 1) == 0 && g_ascii_isspace (a[3])) {
- return TRUE;
- }
- break;
- case 'N':
- case 'n':
- if (g_ascii_strncasecmp (a, "not",
- sizeof ("not") - 1) == 0 && g_ascii_isspace (a[3])) {
- return TRUE;
- }
- break;
- }
-
- return FALSE;
-}
-
-/* Return character representation of operation */
-static gchar
-op_to_char (gchar *a, gchar **next)
-{
- switch (*a) {
- case '!':
- case '&':
- case '|':
- case '(':
- case ')':
- if ((a[0] == '&' && a[1] == '&') ||
- (a[0] == '|' && a[1] == '|')) {
- *next = a + 2;
- }
- else {
- *next = a + 1;
- }
- return *a;
- case 'O':
- case 'o':
- if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) {
- *next = a + sizeof ("or") - 1;
- return '|';
- }
- break;
- case 'A':
- case 'a':
- if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) {
- *next = a + sizeof ("and") - 1;
- return '&';
- }
- break;
- case 'N':
- case 'n':
- if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) {
- *next = a + sizeof ("not") - 1;
- return '!';
- }
- break;
- }
-
- return '\0';
-}
-
-/*
- * Return TRUE if symbol can be regexp flag
- */
-static gboolean
-is_regexp_flag (gchar a)
-{
- switch (a) {
- case 'i':
- case 'm':
- case 'x':
- case 's':
- case 'u':
- case 'o':
- case 'r':
- case 'H':
- case 'M':
- case 'P':
- case 'U':
- case 'X':
- case 'T':
- case 'S':
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-static void
-insert_expression (rspamd_mempool_t * pool,
- struct expression **head,
- gint type,
- gchar op,
- void *operand,
- const gchar *orig)
-{
- struct expression *new, *cur;
-
- new = rspamd_mempool_alloc (pool, sizeof (struct expression));
- new->type = type;
- new->orig = orig;
- if (new->type != EXPR_OPERATION) {
- new->content.operand = operand;
- }
- else {
- new->content.operation = op;
- }
- new->next = NULL;
-
- if (!*head) {
- *head = new;
- }
- else {
- cur = *head;
- while (cur->next) {
- cur = cur->next;
- }
- cur->next = new;
- }
-}
-
-static struct expression *
-maybe_parse_expression (rspamd_mempool_t * pool, gchar *line)
-{
- struct expression *expr;
- gchar *p = line;
-
- while (*p) {
- if (is_operation_symbol (p)) {
- return parse_expression (pool, line);
- }
- p++;
- }
-
- expr = rspamd_mempool_alloc (pool, sizeof (struct expression));
- expr->type = EXPR_STR;
- expr->content.operand = rspamd_mempool_strdup (pool, line);
- expr->next = NULL;
-
- return expr;
-}
-
-/*
- * Make inverse polish record for specified expression
- * Memory is allocated from given pool
- */
-struct expression *
-parse_expression (rspamd_mempool_t * pool, gchar *line)
-{
- struct expression *expr = NULL;
- struct expression_stack *stack = NULL;
- struct expression_function *func = NULL;
- struct expression *arg;
- GQueue *function_stack;
- gchar *p, *c, *str, op, newop, *copy, *next;
- gboolean in_regexp = FALSE;
- gint brackets = 0;
-
- enum {
- SKIP_SPACES,
- READ_OPERATOR,
- READ_REGEXP,
- READ_REGEXP_FLAGS,
- READ_FUNCTION,
- READ_FUNCTION_ARGUMENT,
- } state = SKIP_SPACES;
-
- if (line == NULL || pool == NULL) {
- return NULL;
- }
-
- msg_debug ("parsing expression {{ %s }}", line);
-
- function_stack = g_queue_new ();
- copy = rspamd_mempool_strdup (pool, line);
- p = line;
- c = p;
- while (*p) {
- switch (state) {
- case SKIP_SPACES:
- if (!g_ascii_isspace (*p)) {
- if (is_operation_symbol (p)) {
- state = READ_OPERATOR;
- }
- else if (*p == '/') {
- c = ++p;
- state = READ_REGEXP;
- }
- else {
- c = p;
- state = READ_FUNCTION;
- }
- }
- else {
- p++;
- }
- break;
- case READ_OPERATOR:
- if (*p == ')') {
- if (stack == NULL) {
- return NULL;
- }
- /* Pop all operators from stack to nearest '(' or to head */
- while (stack && stack->op != '(') {
- op = delete_expression_stack (&stack);
- if (op != '(') {
- insert_expression (pool,
- &expr,
- EXPR_OPERATION,
- op,
- NULL,
- copy);
- }
- }
- if (stack) {
- /* Remove open brace itself */
- delete_expression_stack (&stack);
- }
- }
- else if (*p == '(') {
- /* Push it to stack */
- stack = push_expression_stack (pool, stack, *p);
- }
- else {
- if (stack == NULL) {
- newop = op_to_char (p, &next);
- if (newop != '\0') {
- stack = push_expression_stack (pool, stack, newop);
- p = next;
- state = SKIP_SPACES;
- continue;
- }
- }
- /* Check priority of logic operation */
- else {
- newop = op_to_char (p, &next);
- if (newop != '\0') {
- if (logic_priority (stack->op) <
- logic_priority (newop)) {
- stack = push_expression_stack (pool, stack, newop);
- }
- else {
- /* Pop all operations that have higher priority than this one */
- while ((stack != NULL) &&
- (logic_priority (stack->op) >=
- logic_priority (newop))) {
- op = delete_expression_stack (&stack);
- if (op != '(') {
- insert_expression (pool,
- &expr,
- EXPR_OPERATION,
- op,
- NULL,
- copy);
- }
- }
- stack = push_expression_stack (pool, stack, newop);
- }
- }
- p = next;
- state = SKIP_SPACES;
- continue;
- }
- }
- p++;
- state = SKIP_SPACES;
- break;
-
- case READ_REGEXP:
- if (*p == '/' && *(p - 1) != '\\') {
- if (*(p + 1)) {
- p++;
- }
- state = READ_REGEXP_FLAGS;
- }
- else {
- p++;
- }
- break;
-
- case READ_REGEXP_FLAGS:
- if (!is_regexp_flag (*p) || *(p + 1) == '\0') {
- if (c != p) {
- if ((is_regexp_flag (*p) || *p ==
- '/') && *(p + 1) == '\0') {
- p++;
- }
- str = rspamd_mempool_alloc (pool, p - c + 2);
- rspamd_strlcpy (str, c - 1, (p - c + 2));
- g_strstrip (str);
- msg_debug ("found regexp: %s", str);
- if (strlen (str) > 0) {
- insert_expression (pool,
- &expr,
- EXPR_REGEXP,
- 0,
- str,
- copy);
- }
- }
- c = p;
- state = SKIP_SPACES;
- }
- else {
- p++;
- }
- break;
-
- case READ_FUNCTION:
- if (*p == '/') {
- /* In fact it is regexp */
- state = READ_REGEXP;
- c++;
- p++;
- }
- else if (*p == '(') {
- func =
- rspamd_mempool_alloc (pool,
- sizeof (struct expression_function));
- func->name = rspamd_mempool_alloc (pool, p - c + 1);
- func->args = NULL;
- rspamd_strlcpy (func->name, c, (p - c + 1));
- g_strstrip (func->name);
- state = READ_FUNCTION_ARGUMENT;
- g_queue_push_tail (function_stack, func);
- insert_expression (pool, &expr, EXPR_FUNCTION, 0, func, copy);
- c = ++p;
- }
- else if (is_operation_symbol (p)) {
- /* In fact it is not function, but symbol */
- if (c != p) {
- str = rspamd_mempool_alloc (pool, p - c + 1);
- rspamd_strlcpy (str, c, (p - c + 1));
- g_strstrip (str);
- if (strlen (str) > 0) {
- insert_expression (pool, &expr, EXPR_STR, 0, str, copy);
- }
- }
- state = READ_OPERATOR;
- }
- else if (*(p + 1) == '\0') {
- /* In fact it is not function, but symbol */
- p++;
- if (c != p) {
- str = rspamd_mempool_alloc (pool, p - c + 1);
- rspamd_strlcpy (str, c, (p - c + 1));
- g_strstrip (str);
- if (strlen (str) > 0) {
- insert_expression (pool, &expr, EXPR_STR, 0, str, copy);
- }
- }
- state = SKIP_SPACES;
- }
- else {
- p++;
- }
- break;
-
- case READ_FUNCTION_ARGUMENT:
- if (*p == '/' && !in_regexp) {
- in_regexp = TRUE;
- p++;
- }
- if (!in_regexp) {
- /* Append argument to list */
- if (*p == ',' || (*p == ')' && brackets == 0)) {
- arg = NULL;
- str = rspamd_mempool_alloc (pool, p - c + 1);
- rspamd_strlcpy (str, c, (p - c + 1));
- g_strstrip (str);
- /* Recursive call */
- arg = maybe_parse_expression (pool, str);
- func->args = g_list_append (func->args, arg);
- /* Pop function */
- if (*p == ')') {
- /* Last function in chain, goto skipping spaces state */
- func = g_queue_pop_tail (function_stack);
- if (g_queue_get_length (function_stack) == 0) {
- state = SKIP_SPACES;
- }
- }
- c = p + 1;
- }
- else if (*p == '(') {
- brackets++;
- }
- else if (*p == ')') {
- brackets--;
- }
- }
- else if (*p == '/' && *(p - 1) != '\\') {
- in_regexp = FALSE;
- }
- p++;
- break;
- }
- }
-
- g_queue_free (function_stack);
- if (state != SKIP_SPACES) {
- /* In fact we got bad expression */
- msg_warn ("expression \"%s\" is invalid", line);
- return NULL;
- }
- /* Pop everything from stack */
- while (stack != NULL) {
- op = delete_expression_stack (&stack);
- if (op != '(') {
- insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy);
- }
- }
-
- return expr;
-}
-
-/*
- * Rspamd regexp utility functions
- */
-struct rspamd_regexp_element *
-parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
-{
- const gchar *begin, *end, *p, *src, *start;
- gchar *dbegin, *dend;
- struct rspamd_regexp_element *result;
- rspamd_regexp_t *re;
- GError *err = NULL;
- GString *re_flags;
-
- if (line == NULL) {
- msg_err ("cannot parse NULL line");
- return NULL;
- }
-
- if ((re = rspamd_regexp_cache_query (NULL, line, NULL)) != NULL) {
- return ((struct rspamd_regexp_element *)rspamd_regexp_get_ud (re));
- }
-
- src = line;
- result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_element));
- /* Skip whitespaces */
- while (g_ascii_isspace (*line)) {
- line++;
- }
- if (*line == '\0') {
- msg_warn ("got empty regexp");
- return NULL;
- }
- start = line;
- /* First try to find header name */
- begin = strchr (line, '/');
- if (begin != NULL) {
- p = begin;
- end = NULL;
- while (p != line) {
- if (*p == '=') {
- end = p;
- break;
- }
- p--;
- }
- if (end) {
- result->header = rspamd_mempool_alloc (pool, end - line + 1);
- rspamd_strlcpy (result->header, line, end - line + 1);
- result->type = REGEXP_HEADER;
- line = end;
- }
- }
- else {
- result->header = rspamd_mempool_strdup (pool, line);
- result->type = REGEXP_HEADER;
- line = start;
- }
- /* Find begin of regexp */
- while (*line && *line != '/') {
- line++;
- }
- if (*line != '\0') {
- begin = line + 1;
- }
- else if (result->header == NULL) {
- /* Assume that line without // is just a header name */
- result->header = rspamd_mempool_strdup (pool, line);
- result->type = REGEXP_HEADER;
- return result;
- }
- else {
- /* We got header name earlier but have not found // expression, so it is invalid regexp */
- msg_warn (
- "got no header name (eg. header=) but without corresponding regexp, %s",
- src);
- return NULL;
- }
- /* Find end */
- end = begin;
- while (*end && (*end != '/' || *(end - 1) == '\\')) {
- end++;
- }
- if (end == begin || *end != '/') {
- msg_warn ("no trailing / in regexp %s", src);
- return NULL;
- }
- /* Parse flags */
- p = end + 1;
- re_flags = g_string_sized_new (32);
- while (p != NULL) {
- switch (*p) {
- case 'i':
- case 'm':
- case 's':
- case 'x':
- case 'u':
- case 'O':
- case 'r':
- g_string_append_c (re_flags, *p);
- p++;
- break;
- case 'o':
- p++;
- break;
- /* Type flags */
- case 'H':
- if (result->type == REGEXP_NONE) {
- result->type = REGEXP_HEADER;
- }
- p++;
- break;
- case 'M':
- if (result->type == REGEXP_NONE) {
- result->type = REGEXP_MESSAGE;
- }
- p++;
- break;
- case 'P':
- if (result->type == REGEXP_NONE) {
- result->type = REGEXP_MIME;
- }
- p++;
- break;
- case 'U':
- if (result->type == REGEXP_NONE) {
- result->type = REGEXP_URL;
- }
- p++;
- break;
- case 'X':
- if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) {
- result->type = REGEXP_RAW_HEADER;
- }
- p++;
- break;
- case 'T':
- result->is_test = TRUE;
- p++;
- break;
- case 'S':
- result->is_strong = TRUE;
- p++;
- break;
- /* Stop flags parsing */
- default:
- p = NULL;
- break;
- }
- }
-
- result->regexp_text = rspamd_mempool_strdup (pool, start);
- dbegin = result->regexp_text + (begin - start);
- dend = result->regexp_text + (end - start);
- *dend = '\0';
-
- if (raw_mode) {
- g_string_append_c (re_flags, 'r');
- }
-
- result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
- &err);
-
- g_string_free (re_flags, TRUE);
-
- if (result->regexp == NULL || err != NULL) {
- msg_warn ("could not read regexp: %s while reading regexp %s",
- err ? err->message : "unknown error",
- src);
- return NULL;
- }
-
- rspamd_mempool_add_destructor (pool,
- (rspamd_mempool_destruct_t) rspamd_regexp_unref,
- (void *)result->regexp);
-
- rspamd_regexp_set_ud (result->regexp, result);
-
- rspamd_regexp_cache_insert (NULL, line, NULL, result->regexp);
-
- *dend = '/';
-
- return result;
-}
-
-gboolean
-call_expression_function (struct expression_function * func,
- struct rspamd_task * task,
- lua_State *L)
-{
- struct _fl *selected, key;
-
- key.name = func->name;
-
- selected = bsearch (&key,
- list_ptr,
- functions_number,
- sizeof (struct _fl),
- fl_cmp);
- if (selected == NULL) {
- /* Try to check lua function */
- return FALSE;
- }
-
- return selected->func (task, func->args, selected->user_data);
-}
-
-struct expression_argument *
-get_function_arg (struct expression *expr,
- struct rspamd_task *task,
- gboolean want_string)
-{
- GQueue *stack;
- gsize cur, op1, op2;
- struct expression_argument *res;
- struct expression *it;
-
- if (expr == NULL) {
- msg_warn ("NULL expression passed");
- return NULL;
- }
- if (expr->next == NULL) {
- res =
- rspamd_mempool_alloc (task->task_pool,
- sizeof (struct expression_argument));
- if (expr->type == EXPR_REGEXP || expr->type == EXPR_STR || expr->type ==
- EXPR_REGEXP_PARSED) {
- res->type = EXPRESSION_ARGUMENT_NORMAL;
- res->data = expr->content.operand;
- }
- else if (expr->type == EXPR_FUNCTION && !want_string) {
- res->type = EXPRESSION_ARGUMENT_BOOL;
- cur = call_expression_function (expr->content.operand, task, NULL);
- res->data = GSIZE_TO_POINTER (cur);
- }
- else {
- msg_warn (
- "cannot parse argument: it contains operator or bool expression that is not wanted");
- return NULL;
- }
- return res;
- }
- else if (!want_string) {
- res =
- rspamd_mempool_alloc (task->task_pool,
- sizeof (struct expression_argument));
- res->type = EXPRESSION_ARGUMENT_BOOL;
- stack = g_queue_new ();
- it = expr;
-
- while (it) {
- if (it->type == EXPR_REGEXP || it->type == EXPR_REGEXP_PARSED ||
- it->type == EXPR_STR) {
- g_queue_free (stack);
- res->type = EXPRESSION_ARGUMENT_EXPR;
- res->data = expr;
- return res;
- }
- else if (it->type == EXPR_FUNCTION) {
- cur =
- (gsize) call_expression_function ((struct
- expression_function
- *)it->content.operand, task, NULL);
- debug_task ("function %s returned %s",
- ((struct expression_function *)it->content.operand)->name,
- cur ? "true" : "false");
- }
- else if (it->type == EXPR_OPERATION) {
- if (g_queue_is_empty (stack)) {
- /* Queue has no operands for operation, exiting */
- debug_task ("invalid expression");
- g_queue_free (stack);
- return NULL;
- }
- switch (it->content.operation) {
- case '!':
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- op1 = !op1;
- g_queue_push_head (stack, GSIZE_TO_POINTER (op1));
- break;
- case '&':
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2));
- break;
- case '|':
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2));
- break;
- default:
- it = it->next;
- continue;
- }
- }
- if (it) {
- it = it->next;
- }
- }
- if (!g_queue_is_empty (stack)) {
- res->data = g_queue_pop_head (stack);
- }
- else {
- res->data = GSIZE_TO_POINTER (FALSE);
- }
-
- return res;
- }
-
- msg_warn ("invalid expression argument");
-
- return NULL;
-}
-
-void
-register_expression_function (const gchar *name,
- rspamd_internal_func_t func,
- void *user_data)
-{
- static struct _fl *new;
-
- functions_number++;
-
- new = g_new (struct _fl, functions_number);
- memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
- if (list_allocated) {
- g_free (list_ptr);
- }
-
- list_allocated = TRUE;
- new[functions_number - 1].name = name;
- new[functions_number - 1].func = func;
- new[functions_number - 1].user_data = user_data;
- qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
- list_ptr = new;
-}
-
-gboolean
-rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused)
-{
- struct expression_argument *arg;
-
- if (args == NULL || task == NULL) {
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
- if (arg->type == EXPRESSION_ARGUMENT_BOOL) {
- msg_warn ("invalid argument to function is passed");
- return FALSE;
- }
-
- /* XXX: really write this function */
- return TRUE;
-}
-
-gboolean
-rspamd_header_exists (struct rspamd_task * task, GList * args, void *unused)
-{
- struct expression_argument *arg;
- GList *headerlist;
-
- if (args == NULL || task == NULL) {
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
- if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) {
- msg_warn ("invalid argument to function is passed");
- return FALSE;
- }
-
- debug_task ("try to get header %s", (gchar *)arg->data);
- headerlist = message_get_header (task,
- (gchar *)arg->data,
- FALSE);
- if (headerlist) {
- return TRUE;
- }
- return FALSE;
-}
-
-/*
- * This function is designed to find difference between text/html and text/plain parts
- * It takes one argument: difference threshold, if we have two text parts, compare
- * its hashes and check for threshold, if value is greater than threshold, return TRUE
- * and return FALSE otherwise.
- */
-gboolean
-rspamd_parts_distance (struct rspamd_task * task, GList * args, void *unused)
-{
- gint threshold, threshold2 = -1, diff;
- struct mime_text_part *p1, *p2;
- GList *cur;
- struct expression_argument *arg;
- GMimeObject *parent;
- const GMimeContentType *ct;
- gint *pdiff;
-
- if (args == NULL) {
- debug_task ("no threshold is specified, assume it 100");
- threshold = 100;
- }
- else {
- errno = 0;
- arg = get_function_arg (args->data, task, TRUE);
- threshold = strtoul ((gchar *)arg->data, NULL, 10);
- if (errno != 0) {
- msg_info ("bad numeric value for threshold \"%s\", assume it 100",
- (gchar *)args->data);
- threshold = 100;
- }
- if (args->next) {
- arg = get_function_arg (args->next->data, task, TRUE);
- errno = 0;
- threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
- if (errno != 0) {
- msg_info ("bad numeric value for threshold \"%s\", ignore it",
- (gchar *)arg->data);
- threshold2 = -1;
- }
- }
- }
-
- if ((pdiff =
- rspamd_mempool_get_variable (task->task_pool,
- "parts_distance")) != NULL) {
- diff = *pdiff;
- if (diff != -1) {
- if (threshold2 > 0) {
- if (diff >=
- MIN (threshold,
- threshold2) && diff < MAX (threshold, threshold2)) {
- return TRUE;
- }
- }
- else {
- if (diff <= threshold) {
- return TRUE;
- }
- }
- return FALSE;
- }
- else {
- return FALSE;
- }
- }
-
- if (g_list_length (task->text_parts) == 2) {
- cur = g_list_first (task->text_parts);
- p1 = cur->data;
- cur = g_list_next (cur);
- pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint));
- *pdiff = -1;
-
- if (cur == NULL) {
- msg_info ("bad parts list");
- return FALSE;
- }
- p2 = cur->data;
- /* First of all check parent object */
- if (p1->parent && p1->parent == p2->parent) {
- parent = p1->parent;
- ct = g_mime_object_get_content_type (parent);
-#ifndef GMIME24
- if (ct == NULL ||
- !g_mime_content_type_is_type (ct, "multipart", "alternative")) {
-#else
- if (ct == NULL ||
- !g_mime_content_type_is_type ((GMimeContentType *)ct,
- "multipart", "alternative")) {
-#endif
- debug_task (
- "two parts are not belong to multipart/alternative container, skip check");
- rspamd_mempool_set_variable (task->task_pool,
- "parts_distance",
- pdiff,
- NULL);
- return FALSE;
- }
- }
- else {
- debug_task (
- "message contains two parts but they are in different multi-parts");
- rspamd_mempool_set_variable (task->task_pool,
- "parts_distance",
- pdiff,
- NULL);
- return FALSE;
- }
- if (!p1->is_empty && !p2->is_empty) {
- if (p1->diff_str != NULL && p2->diff_str != NULL) {
- diff = rspamd_diff_distance_normalized (p1->diff_str,
- p2->diff_str);
- }
- else {
- diff = rspamd_fuzzy_compare_parts (p1, p2);
- }
- debug_task (
- "got likeliness between parts of %d%%, threshold is %d%%",
- diff,
- threshold);
- *pdiff = diff;
- rspamd_mempool_set_variable (task->task_pool,
- "parts_distance",
- pdiff,
- NULL);
- if (threshold2 > 0) {
- if (diff >=
- MIN (threshold,
- threshold2) && diff < MAX (threshold, threshold2)) {
- return TRUE;
- }
- }
- else {
- if (diff <= threshold) {
- return TRUE;
- }
- }
- }
- else if ((p1->is_empty &&
- !p2->is_empty) || (!p1->is_empty && p2->is_empty)) {
- /* Empty and non empty parts are different */
- *pdiff = 0;
- rspamd_mempool_set_variable (task->task_pool,
- "parts_distance",
- pdiff,
- NULL);
- return TRUE;
- }
- }
- else {
- debug_task (
- "message has too many text parts, so do not try to compare them with each other");
- rspamd_mempool_set_variable (task->task_pool,
- "parts_distance",
- pdiff,
- NULL);
- return FALSE;
- }
-
- rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff,
- NULL);
- return FALSE;
-}
-
-struct addr_list {
- const gchar *name;
- const gchar *addr;
-};
-
-#define COMPARE_RCPT_LEN 3
-#define MIN_RCPT_TO_COMPARE 7
-
-gboolean
-rspamd_recipients_distance (struct rspamd_task *task, GList * args,
- void *unused)
-{
- struct expression_argument *arg;
- InternetAddressList *cur;
- double threshold;
- struct addr_list *ar;
- gchar *c;
- gint num, i, j, hits = 0, total = 0;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
- errno = 0;
- threshold = strtod ((gchar *)arg->data, NULL);
- if (errno != 0) {
- msg_warn ("invalid numeric value '%s': %s",
- (gchar *)arg->data,
- strerror (errno));
- return FALSE;
- }
-
- if (!task->rcpt_mime) {
- return FALSE;
- }
- num = internet_address_list_length (task->rcpt_mime);
- if (num < MIN_RCPT_TO_COMPARE) {
- return FALSE;
- }
- ar =
- rspamd_mempool_alloc0 (task->task_pool, num *
- sizeof (struct addr_list));
-
- /* Fill array */
- cur = task->rcpt_mime;
-#ifdef GMIME24
- for (i = 0; i < num; i++) {
- InternetAddress *iaelt =
- internet_address_list_get_address(cur, i);
- InternetAddressMailbox *iamb =
- INTERNET_ADDRESS_IS_MAILBOX(iaelt) ?
- INTERNET_ADDRESS_MAILBOX (iaelt) : NULL;
- if (iamb) {
- ar[i].name = internet_address_mailbox_get_addr (iamb);
- if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
- ar[i].addr = c + 1;
- }
- }
- }
-#else
- InternetAddress *addr;
- i = 0;
- while (cur) {
- addr = internet_address_list_get_address (cur);
- if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
- ar[i].name = rspamd_mempool_strdup (task->task_pool,
- internet_address_get_addr (addr));
- if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
- *c = '\0';
- ar[i].addr = c + 1;
- }
- cur = internet_address_list_next (cur);
- i++;
- }
- else {
- cur = internet_address_list_next (cur);
- }
- }
-#endif
-
- /* Cycle all elements in array */
- for (i = 0; i < num; i++) {
- for (j = i + 1; j < num; j++) {
- if (ar[i].name && ar[j].name &&
- g_ascii_strncasecmp (ar[i].name, ar[j].name,
- COMPARE_RCPT_LEN) == 0) {
- /* Common name part */
- hits++;
- }
- else if (ar[i].addr && ar[j].addr &&
- g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) {
- /* Common address part, but different name */
- hits++;
- }
- total++;
- }
- }
-
- if ((double)(hits * num / 2.) / (double)total >= threshold) {
- return TRUE;
- }
-
- return FALSE;
-}
-
-gboolean
-rspamd_has_only_html_part (struct rspamd_task * task, GList * args,
- void *unused)
-{
- struct mime_text_part *p;
- GList *cur;
- gboolean res = FALSE;
-
- cur = g_list_first (task->text_parts);
- while (cur) {
- p = cur->data;
- if (p->is_html) {
- res = TRUE;
- }
- else {
- res = FALSE;
- break;
- }
- cur = g_list_next (cur);
- }
-
- return res;
-}
-
-static gboolean
-is_recipient_list_sorted (const InternetAddressList * ia)
-{
- const InternetAddressList *cur;
- InternetAddress *addr;
- gboolean res = TRUE;
- struct addr_list current = { NULL, NULL }, previous = {
- NULL, NULL
- };
-#ifdef GMIME24
- gint num, i;
-#endif
-
- /* Do not check to short address lists */
- if (internet_address_list_length ((InternetAddressList *)ia) <
- MIN_RCPT_TO_COMPARE) {
- return FALSE;
- }
-#ifdef GMIME24
- num = internet_address_list_length ((InternetAddressList *)ia);
- cur = ia;
- for (i = 0; i < num; i++) {
- addr =
- internet_address_list_get_address ((InternetAddressList *)cur, i);
- current.addr = (gchar *)internet_address_get_name (addr);
- if (previous.addr != NULL) {
- if (current.addr &&
- g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
- res = FALSE;
- break;
- }
- }
- previous.addr = current.addr;
- }
-#else
- cur = ia;
- while (cur) {
- addr = internet_address_list_get_address (cur);
- if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
- current.addr = internet_address_get_addr (addr);
- if (previous.addr != NULL) {
- if (current.addr &&
- g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
- res = FALSE;
- break;
- }
- }
- previous.addr = current.addr;
- }
- cur = internet_address_list_next (cur);
- }
-#endif
-
- return res;
-}
-
-gboolean
-rspamd_is_recipients_sorted (struct rspamd_task * task,
- GList * args,
- void *unused)
-{
- /* Check all types of addresses */
- if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
- GMIME_RECIPIENT_TYPE_TO)) == TRUE) {
- return TRUE;
- }
- if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
- GMIME_RECIPIENT_TYPE_BCC)) == TRUE) {
- return TRUE;
- }
- if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
- GMIME_RECIPIENT_TYPE_CC)) == TRUE) {
- return TRUE;
- }
-
- return FALSE;
-}
-
-gboolean
-rspamd_compare_transfer_encoding (struct rspamd_task * task,
- GList * args,
- void *unused)
-{
- GMimeObject *part;
-#ifndef GMIME24
- GMimePartEncodingType enc_req, part_enc;
-#else
- GMimeContentEncoding enc_req, part_enc;
-#endif
- struct expression_argument *arg;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
-#ifndef GMIME24
- enc_req = g_mime_part_encoding_from_string (arg->data);
- if (enc_req == GMIME_PART_ENCODING_DEFAULT) {
-#else
- enc_req = g_mime_content_encoding_from_string (arg->data);
- if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) {
-#endif
- msg_warn ("bad encoding type: %s", (gchar *)arg->data);
- return FALSE;
- }
-
- part = g_mime_message_get_mime_part (task->message);
- if (part) {
- if (GMIME_IS_PART (part)) {
-#ifndef GMIME24
- part_enc = g_mime_part_get_encoding (GMIME_PART (part));
- if (part_enc == GMIME_PART_ENCODING_DEFAULT) {
- /* Assume 7bit as default transfer encoding */
- part_enc = GMIME_PART_ENCODING_7BIT;
- }
-#else
- part_enc = g_mime_part_get_content_encoding (GMIME_PART (part));
- if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) {
- /* Assume 7bit as default transfer encoding */
- part_enc = GMIME_CONTENT_ENCODING_7BIT;
- }
-#endif
-
-
- debug_task ("got encoding in part: %d and compare with %d",
- (gint)part_enc,
- (gint)enc_req);
-#ifndef GMIME24
- g_object_unref (part);
-#endif
-
- return part_enc == enc_req;
- }
-#ifndef GMIME24
- g_object_unref (part);
-#endif
- }
-
- return FALSE;
-}
-
-gboolean
-rspamd_is_html_balanced (struct rspamd_task * task, GList * args, void *unused)
-{
- struct mime_text_part *p;
- GList *cur;
- gboolean res = TRUE;
-
- cur = g_list_first (task->text_parts);
- while (cur) {
- p = cur->data;
- if (!p->is_empty && p->is_html) {
- if (p->is_balanced) {
- res = TRUE;
- }
- else {
- res = FALSE;
- break;
- }
- }
- cur = g_list_next (cur);
- }
-
- return res;
-
-}
-
-struct html_callback_data {
- struct html_tag *tag;
- gboolean *res;
-};
-
-static gboolean
-search_html_node_callback (GNode * node, gpointer data)
-{
- struct html_callback_data *cd = data;
- struct html_node *nd;
-
- nd = node->data;
- if (nd) {
- if (nd->tag == cd->tag) {
- *cd->res = TRUE;
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-gboolean
-rspamd_has_html_tag (struct rspamd_task * task, GList * args, void *unused)
-{
- struct mime_text_part *p;
- GList *cur;
- struct expression_argument *arg;
- struct html_tag *tag;
- gboolean res = FALSE;
- struct html_callback_data cd;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
- tag = get_tag_by_name (arg->data);
- if (tag == NULL) {
- msg_warn ("unknown tag type passed as argument: %s",
- (gchar *)arg->data);
- return FALSE;
- }
-
- cur = g_list_first (task->text_parts);
- cd.res = &res;
- cd.tag = tag;
-
- while (cur && res == FALSE) {
- p = cur->data;
- if (!p->is_empty && p->is_html && p->html_nodes) {
- g_node_traverse (p->html_nodes,
- G_PRE_ORDER,
- G_TRAVERSE_ALL,
- -1,
- search_html_node_callback,
- &cd);
- }
- cur = g_list_next (cur);
- }
-
- return res;
-
-}
-
-gboolean
-rspamd_has_fake_html (struct rspamd_task * task, GList * args, void *unused)
-{
- struct mime_text_part *p;
- GList *cur;
- gboolean res = FALSE;
-
- cur = g_list_first (task->text_parts);
-
- while (cur && res == FALSE) {
- p = cur->data;
- if (!p->is_empty && p->is_html && p->html_nodes == NULL) {
- res = TRUE;
- }
- cur = g_list_next (cur);
- }
-
- return res;
-
-}
-
-
-/*
- * vi:ts=4
- */
--- /dev/null
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "util.h"
+#include "cfg_file.h"
+#include "main.h"
+#include "message.h"
+#include "fuzzy.h"
+#include "mime_expressions.h"
+#include "html.h"
+#include "lua/lua_common.h"
+#include "diff.h"
+
+gboolean rspamd_compare_encoding (struct rspamd_task *task,
+ GList * args,
+ void *unused);
+gboolean rspamd_header_exists (struct rspamd_task *task,
+ GList * args,
+ void *unused);
+gboolean rspamd_parts_distance (struct rspamd_task *task,
+ GList * args,
+ void *unused);
+gboolean rspamd_recipients_distance (struct rspamd_task *task,
+ GList * args,
+ void *unused);
+gboolean rspamd_has_only_html_part (struct rspamd_task *task,
+ GList * args,
+ void *unused);
+gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
+ GList * args,
+ void *unused);
+gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
+ GList * args,
+ void *unused);
+gboolean rspamd_is_html_balanced (struct rspamd_task *task,
+ GList * args,
+ void *unused);
+gboolean rspamd_has_html_tag (struct rspamd_task *task,
+ GList * args,
+ void *unused);
+gboolean rspamd_has_fake_html (struct rspamd_task *task,
+ GList * args,
+ void *unused);
+
+/**
+ * Regexp type: /H - header, /M - mime, /U - url /X - raw header
+ */
+enum rspamd_regexp_type {
+ REGEXP_NONE = 0,
+ REGEXP_HEADER,
+ REGEXP_MIME,
+ REGEXP_MESSAGE,
+ REGEXP_URL,
+ REGEXP_RAW_HEADER
+};
+
+/**
+ * Regexp structure
+ */
+struct rspamd_regexp_atom {
+ enum rspamd_regexp_type type; /**< regexp type */
+ gchar *regexp_text; /**< regexp text representation */
+ rspamd_regexp_t *regexp; /**< regexp structure */
+ gchar *header; /**< header name for header regexps */
+ gboolean is_test; /**< true if this expression must be tested */
+ gboolean is_strong; /**< true if headers search must be case sensitive */
+};
+
+/**
+ * Rspamd expression function
+ */
+struct rspamd_function_atom {
+ gchar *name; /**< name of function */
+ GList *args; /**< its args */
+};
+
+struct rspamd_mime_atom {
+
+};
+
+/*
+ * List of internal functions of rspamd
+ * Sorted by name to use bsearch
+ */
+static struct _fl {
+ const gchar *name;
+ rspamd_internal_func_t func;
+ void *user_data;
+} rspamd_functions_list[] = {
+ {"compare_encoding", rspamd_compare_encoding, NULL},
+ {"compare_parts_distance", rspamd_parts_distance, NULL},
+ {"compare_recipients_distance", rspamd_recipients_distance, NULL},
+ {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
+ {"has_fake_html", rspamd_has_fake_html, NULL},
+ {"has_html_tag", rspamd_has_html_tag, NULL},
+ {"has_only_html_part", rspamd_has_only_html_part, NULL},
+ {"header_exists", rspamd_header_exists, NULL},
+ {"is_html_balanced", rspamd_is_html_balanced, NULL},
+ {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL}
+};
+
+static struct _fl *list_ptr = &rspamd_functions_list[0];
+static guint32 functions_number = sizeof (rspamd_functions_list) /
+ sizeof (struct _fl);
+static gboolean list_allocated = FALSE;
+
+/* Bsearch routine */
+static gint
+fl_cmp (const void *s1, const void *s2)
+{
+ struct _fl *fl1 = (struct _fl *)s1;
+ struct _fl *fl2 = (struct _fl *)s2;
+ return strcmp (fl1->name, fl2->name);
+}
+
+/*
+ * Rspamd regexp utility functions
+ */
+static struct rspamd_regexp_atom *
+rspamd_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line)
+{
+ const gchar *begin, *end, *p, *src, *start;
+ gchar *dbegin, *dend;
+ struct rspamd_regexp_atom *result;
+ rspamd_regexp_t *re;
+ GError *err = NULL;
+ GString *re_flags;
+
+ if (line == NULL) {
+ msg_err ("cannot parse NULL line");
+ return NULL;
+ }
+
+ if ((re = rspamd_regexp_cache_query (NULL, line, NULL)) != NULL) {
+ return ((struct rspamd_regexp_element *)rspamd_regexp_get_ud (re));
+ }
+
+ src = line;
+ result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_element));
+ /* Skip whitespaces */
+ while (g_ascii_isspace (*line)) {
+ line++;
+ }
+ if (*line == '\0') {
+ msg_warn ("got empty regexp");
+ return NULL;
+ }
+ start = line;
+ /* First try to find header name */
+ begin = strchr (line, '/');
+ if (begin != NULL) {
+ p = begin;
+ end = NULL;
+ while (p != line) {
+ if (*p == '=') {
+ end = p;
+ break;
+ }
+ p--;
+ }
+ if (end) {
+ result->header = rspamd_mempool_alloc (pool, end - line + 1);
+ rspamd_strlcpy (result->header, line, end - line + 1);
+ result->type = REGEXP_HEADER;
+ line = end;
+ }
+ }
+ else {
+ result->header = rspamd_mempool_strdup (pool, line);
+ result->type = REGEXP_HEADER;
+ line = start;
+ }
+ /* Find begin of regexp */
+ while (*line && *line != '/') {
+ line++;
+ }
+ if (*line != '\0') {
+ begin = line + 1;
+ }
+ else if (result->header == NULL) {
+ /* Assume that line without // is just a header name */
+ result->header = rspamd_mempool_strdup (pool, line);
+ result->type = REGEXP_HEADER;
+ return result;
+ }
+ else {
+ /* We got header name earlier but have not found // expression, so it is invalid regexp */
+ msg_warn (
+ "got no header name (eg. header=) but without corresponding regexp, %s",
+ src);
+ return NULL;
+ }
+ /* Find end */
+ end = begin;
+ while (*end && (*end != '/' || *(end - 1) == '\\')) {
+ end++;
+ }
+ if (end == begin || *end != '/') {
+ msg_warn ("no trailing / in regexp %s", src);
+ return NULL;
+ }
+ /* Parse flags */
+ p = end + 1;
+ re_flags = g_string_sized_new (32);
+ while (p != NULL) {
+ switch (*p) {
+ case 'i':
+ case 'm':
+ case 's':
+ case 'x':
+ case 'u':
+ case 'O':
+ case 'r':
+ g_string_append_c (re_flags, *p);
+ p++;
+ break;
+ case 'o':
+ p++;
+ break;
+ /* Type flags */
+ case 'H':
+ if (result->type == REGEXP_NONE) {
+ result->type = REGEXP_HEADER;
+ }
+ p++;
+ break;
+ case 'M':
+ if (result->type == REGEXP_NONE) {
+ result->type = REGEXP_MESSAGE;
+ }
+ p++;
+ break;
+ case 'P':
+ if (result->type == REGEXP_NONE) {
+ result->type = REGEXP_MIME;
+ }
+ p++;
+ break;
+ case 'U':
+ if (result->type == REGEXP_NONE) {
+ result->type = REGEXP_URL;
+ }
+ p++;
+ break;
+ case 'X':
+ if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) {
+ result->type = REGEXP_RAW_HEADER;
+ }
+ p++;
+ break;
+ case 'T':
+ result->is_test = TRUE;
+ p++;
+ break;
+ case 'S':
+ result->is_strong = TRUE;
+ p++;
+ break;
+ /* Stop flags parsing */
+ default:
+ p = NULL;
+ break;
+ }
+ }
+
+ result->regexp_text = rspamd_mempool_strdup (pool, start);
+ dbegin = result->regexp_text + (begin - start);
+ dend = result->regexp_text + (end - start);
+ *dend = '\0';
+
+ result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
+ &err);
+
+ g_string_free (re_flags, TRUE);
+
+ if (result->regexp == NULL || err != NULL) {
+ msg_warn ("could not read regexp: %s while reading regexp %s",
+ err ? err->message : "unknown error",
+ src);
+ return NULL;
+ }
+
+ rspamd_mempool_add_destructor (pool,
+ (rspamd_mempool_destruct_t) rspamd_regexp_unref,
+ (void *)result->regexp);
+
+ rspamd_regexp_set_ud (result->regexp, result);
+
+ rspamd_regexp_cache_insert (NULL, line, NULL, result->regexp);
+
+ *dend = '/';
+
+ return result;
+}
+
+gboolean
+call_expression_function (struct expression_function * func,
+ struct rspamd_task * task,
+ lua_State *L)
+{
+ struct _fl *selected, key;
+
+ key.name = func->name;
+
+ selected = bsearch (&key,
+ list_ptr,
+ functions_number,
+ sizeof (struct _fl),
+ fl_cmp);
+ if (selected == NULL) {
+ /* Try to check lua function */
+ return FALSE;
+ }
+
+ return selected->func (task, func->args, selected->user_data);
+}
+
+void
+register_expression_function (const gchar *name,
+ rspamd_internal_func_t func,
+ void *user_data)
+{
+ static struct _fl *new;
+
+ functions_number++;
+
+ new = g_new (struct _fl, functions_number);
+ memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
+ if (list_allocated) {
+ g_free (list_ptr);
+ }
+
+ list_allocated = TRUE;
+ new[functions_number - 1].name = name;
+ new[functions_number - 1].func = func;
+ new[functions_number - 1].user_data = user_data;
+ qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
+ list_ptr = new;
+}
+
+gboolean
+rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused)
+{
+ struct expression_argument *arg;
+
+ if (args == NULL || task == NULL) {
+ return FALSE;
+ }
+
+ arg = get_function_arg (args->data, task, TRUE);
+ if (arg->type == EXPRESSION_ARGUMENT_BOOL) {
+ msg_warn ("invalid argument to function is passed");
+ return FALSE;
+ }
+
+ /* XXX: really write this function */
+ return TRUE;
+}
+
+gboolean
+rspamd_header_exists (struct rspamd_task * task, GList * args, void *unused)
+{
+ struct expression_argument *arg;
+ GList *headerlist;
+
+ if (args == NULL || task == NULL) {
+ return FALSE;
+ }
+
+ arg = get_function_arg (args->data, task, TRUE);
+ if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) {
+ msg_warn ("invalid argument to function is passed");
+ return FALSE;
+ }
+
+ debug_task ("try to get header %s", (gchar *)arg->data);
+ headerlist = message_get_header (task,
+ (gchar *)arg->data,
+ FALSE);
+ if (headerlist) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/*
+ * This function is designed to find difference between text/html and text/plain parts
+ * It takes one argument: difference threshold, if we have two text parts, compare
+ * its hashes and check for threshold, if value is greater than threshold, return TRUE
+ * and return FALSE otherwise.
+ */
+gboolean
+rspamd_parts_distance (struct rspamd_task * task, GList * args, void *unused)
+{
+ gint threshold, threshold2 = -1, diff;
+ struct mime_text_part *p1, *p2;
+ GList *cur;
+ struct expression_argument *arg;
+ GMimeObject *parent;
+ const GMimeContentType *ct;
+ gint *pdiff;
+
+ if (args == NULL) {
+ debug_task ("no threshold is specified, assume it 100");
+ threshold = 100;
+ }
+ else {
+ errno = 0;
+ arg = get_function_arg (args->data, task, TRUE);
+ threshold = strtoul ((gchar *)arg->data, NULL, 10);
+ if (errno != 0) {
+ msg_info ("bad numeric value for threshold \"%s\", assume it 100",
+ (gchar *)args->data);
+ threshold = 100;
+ }
+ if (args->next) {
+ arg = get_function_arg (args->next->data, task, TRUE);
+ errno = 0;
+ threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
+ if (errno != 0) {
+ msg_info ("bad numeric value for threshold \"%s\", ignore it",
+ (gchar *)arg->data);
+ threshold2 = -1;
+ }
+ }
+ }
+
+ if ((pdiff =
+ rspamd_mempool_get_variable (task->task_pool,
+ "parts_distance")) != NULL) {
+ diff = *pdiff;
+ if (diff != -1) {
+ if (threshold2 > 0) {
+ if (diff >=
+ MIN (threshold,
+ threshold2) && diff < MAX (threshold, threshold2)) {
+ return TRUE;
+ }
+ }
+ else {
+ if (diff <= threshold) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+ }
+ else {
+ return FALSE;
+ }
+ }
+
+ if (g_list_length (task->text_parts) == 2) {
+ cur = g_list_first (task->text_parts);
+ p1 = cur->data;
+ cur = g_list_next (cur);
+ pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint));
+ *pdiff = -1;
+
+ if (cur == NULL) {
+ msg_info ("bad parts list");
+ return FALSE;
+ }
+ p2 = cur->data;
+ /* First of all check parent object */
+ if (p1->parent && p1->parent == p2->parent) {
+ parent = p1->parent;
+ ct = g_mime_object_get_content_type (parent);
+#ifndef GMIME24
+ if (ct == NULL ||
+ !g_mime_content_type_is_type (ct, "multipart", "alternative")) {
+#else
+ if (ct == NULL ||
+ !g_mime_content_type_is_type ((GMimeContentType *)ct,
+ "multipart", "alternative")) {
+#endif
+ debug_task (
+ "two parts are not belong to multipart/alternative container, skip check");
+ rspamd_mempool_set_variable (task->task_pool,
+ "parts_distance",
+ pdiff,
+ NULL);
+ return FALSE;
+ }
+ }
+ else {
+ debug_task (
+ "message contains two parts but they are in different multi-parts");
+ rspamd_mempool_set_variable (task->task_pool,
+ "parts_distance",
+ pdiff,
+ NULL);
+ return FALSE;
+ }
+ if (!p1->is_empty && !p2->is_empty) {
+ if (p1->diff_str != NULL && p2->diff_str != NULL) {
+ diff = rspamd_diff_distance_normalized (p1->diff_str,
+ p2->diff_str);
+ }
+ else {
+ diff = rspamd_fuzzy_compare_parts (p1, p2);
+ }
+ debug_task (
+ "got likeliness between parts of %d%%, threshold is %d%%",
+ diff,
+ threshold);
+ *pdiff = diff;
+ rspamd_mempool_set_variable (task->task_pool,
+ "parts_distance",
+ pdiff,
+ NULL);
+ if (threshold2 > 0) {
+ if (diff >=
+ MIN (threshold,
+ threshold2) && diff < MAX (threshold, threshold2)) {
+ return TRUE;
+ }
+ }
+ else {
+ if (diff <= threshold) {
+ return TRUE;
+ }
+ }
+ }
+ else if ((p1->is_empty &&
+ !p2->is_empty) || (!p1->is_empty && p2->is_empty)) {
+ /* Empty and non empty parts are different */
+ *pdiff = 0;
+ rspamd_mempool_set_variable (task->task_pool,
+ "parts_distance",
+ pdiff,
+ NULL);
+ return TRUE;
+ }
+ }
+ else {
+ debug_task (
+ "message has too many text parts, so do not try to compare them with each other");
+ rspamd_mempool_set_variable (task->task_pool,
+ "parts_distance",
+ pdiff,
+ NULL);
+ return FALSE;
+ }
+
+ rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff,
+ NULL);
+ return FALSE;
+}
+
+struct addr_list {
+ const gchar *name;
+ const gchar *addr;
+};
+
+#define COMPARE_RCPT_LEN 3
+#define MIN_RCPT_TO_COMPARE 7
+
+gboolean
+rspamd_recipients_distance (struct rspamd_task *task, GList * args,
+ void *unused)
+{
+ struct expression_argument *arg;
+ InternetAddressList *cur;
+ double threshold;
+ struct addr_list *ar;
+ gchar *c;
+ gint num, i, j, hits = 0, total = 0;
+
+ if (args == NULL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ arg = get_function_arg (args->data, task, TRUE);
+ errno = 0;
+ threshold = strtod ((gchar *)arg->data, NULL);
+ if (errno != 0) {
+ msg_warn ("invalid numeric value '%s': %s",
+ (gchar *)arg->data,
+ strerror (errno));
+ return FALSE;
+ }
+
+ if (!task->rcpt_mime) {
+ return FALSE;
+ }
+ num = internet_address_list_length (task->rcpt_mime);
+ if (num < MIN_RCPT_TO_COMPARE) {
+ return FALSE;
+ }
+ ar =
+ rspamd_mempool_alloc0 (task->task_pool, num *
+ sizeof (struct addr_list));
+
+ /* Fill array */
+ cur = task->rcpt_mime;
+#ifdef GMIME24
+ for (i = 0; i < num; i++) {
+ InternetAddress *iaelt =
+ internet_address_list_get_address(cur, i);
+ InternetAddressMailbox *iamb =
+ INTERNET_ADDRESS_IS_MAILBOX(iaelt) ?
+ INTERNET_ADDRESS_MAILBOX (iaelt) : NULL;
+ if (iamb) {
+ ar[i].name = internet_address_mailbox_get_addr (iamb);
+ if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
+ ar[i].addr = c + 1;
+ }
+ }
+ }
+#else
+ InternetAddress *addr;
+ i = 0;
+ while (cur) {
+ addr = internet_address_list_get_address (cur);
+ if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
+ ar[i].name = rspamd_mempool_strdup (task->task_pool,
+ internet_address_get_addr (addr));
+ if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
+ *c = '\0';
+ ar[i].addr = c + 1;
+ }
+ cur = internet_address_list_next (cur);
+ i++;
+ }
+ else {
+ cur = internet_address_list_next (cur);
+ }
+ }
+#endif
+
+ /* Cycle all elements in array */
+ for (i = 0; i < num; i++) {
+ for (j = i + 1; j < num; j++) {
+ if (ar[i].name && ar[j].name &&
+ g_ascii_strncasecmp (ar[i].name, ar[j].name,
+ COMPARE_RCPT_LEN) == 0) {
+ /* Common name part */
+ hits++;
+ }
+ else if (ar[i].addr && ar[j].addr &&
+ g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) {
+ /* Common address part, but different name */
+ hits++;
+ }
+ total++;
+ }
+ }
+
+ if ((double)(hits * num / 2.) / (double)total >= threshold) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_has_only_html_part (struct rspamd_task * task, GList * args,
+ void *unused)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ gboolean res = FALSE;
+
+ cur = g_list_first (task->text_parts);
+ while (cur) {
+ p = cur->data;
+ if (p->is_html) {
+ res = TRUE;
+ }
+ else {
+ res = FALSE;
+ break;
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+}
+
+static gboolean
+is_recipient_list_sorted (const InternetAddressList * ia)
+{
+ const InternetAddressList *cur;
+ InternetAddress *addr;
+ gboolean res = TRUE;
+ struct addr_list current = { NULL, NULL }, previous = {
+ NULL, NULL
+ };
+#ifdef GMIME24
+ gint num, i;
+#endif
+
+ /* Do not check to short address lists */
+ if (internet_address_list_length ((InternetAddressList *)ia) <
+ MIN_RCPT_TO_COMPARE) {
+ return FALSE;
+ }
+#ifdef GMIME24
+ num = internet_address_list_length ((InternetAddressList *)ia);
+ cur = ia;
+ for (i = 0; i < num; i++) {
+ addr =
+ internet_address_list_get_address ((InternetAddressList *)cur, i);
+ current.addr = (gchar *)internet_address_get_name (addr);
+ if (previous.addr != NULL) {
+ if (current.addr &&
+ g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
+ res = FALSE;
+ break;
+ }
+ }
+ previous.addr = current.addr;
+ }
+#else
+ cur = ia;
+ while (cur) {
+ addr = internet_address_list_get_address (cur);
+ if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
+ current.addr = internet_address_get_addr (addr);
+ if (previous.addr != NULL) {
+ if (current.addr &&
+ g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
+ res = FALSE;
+ break;
+ }
+ }
+ previous.addr = current.addr;
+ }
+ cur = internet_address_list_next (cur);
+ }
+#endif
+
+ return res;
+}
+
+gboolean
+rspamd_is_recipients_sorted (struct rspamd_task * task,
+ GList * args,
+ void *unused)
+{
+ /* Check all types of addresses */
+ if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
+ GMIME_RECIPIENT_TYPE_TO)) == TRUE) {
+ return TRUE;
+ }
+ if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
+ GMIME_RECIPIENT_TYPE_BCC)) == TRUE) {
+ return TRUE;
+ }
+ if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
+ GMIME_RECIPIENT_TYPE_CC)) == TRUE) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_compare_transfer_encoding (struct rspamd_task * task,
+ GList * args,
+ void *unused)
+{
+ GMimeObject *part;
+#ifndef GMIME24
+ GMimePartEncodingType enc_req, part_enc;
+#else
+ GMimeContentEncoding enc_req, part_enc;
+#endif
+ struct expression_argument *arg;
+
+ if (args == NULL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ arg = get_function_arg (args->data, task, TRUE);
+#ifndef GMIME24
+ enc_req = g_mime_part_encoding_from_string (arg->data);
+ if (enc_req == GMIME_PART_ENCODING_DEFAULT) {
+#else
+ enc_req = g_mime_content_encoding_from_string (arg->data);
+ if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) {
+#endif
+ msg_warn ("bad encoding type: %s", (gchar *)arg->data);
+ return FALSE;
+ }
+
+ part = g_mime_message_get_mime_part (task->message);
+ if (part) {
+ if (GMIME_IS_PART (part)) {
+#ifndef GMIME24
+ part_enc = g_mime_part_get_encoding (GMIME_PART (part));
+ if (part_enc == GMIME_PART_ENCODING_DEFAULT) {
+ /* Assume 7bit as default transfer encoding */
+ part_enc = GMIME_PART_ENCODING_7BIT;
+ }
+#else
+ part_enc = g_mime_part_get_content_encoding (GMIME_PART (part));
+ if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) {
+ /* Assume 7bit as default transfer encoding */
+ part_enc = GMIME_CONTENT_ENCODING_7BIT;
+ }
+#endif
+
+
+ debug_task ("got encoding in part: %d and compare with %d",
+ (gint)part_enc,
+ (gint)enc_req);
+#ifndef GMIME24
+ g_object_unref (part);
+#endif
+
+ return part_enc == enc_req;
+ }
+#ifndef GMIME24
+ g_object_unref (part);
+#endif
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_is_html_balanced (struct rspamd_task * task, GList * args, void *unused)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ gboolean res = TRUE;
+
+ cur = g_list_first (task->text_parts);
+ while (cur) {
+ p = cur->data;
+ if (!p->is_empty && p->is_html) {
+ if (p->is_balanced) {
+ res = TRUE;
+ }
+ else {
+ res = FALSE;
+ break;
+ }
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+
+}
+
+struct html_callback_data {
+ struct html_tag *tag;
+ gboolean *res;
+};
+
+static gboolean
+search_html_node_callback (GNode * node, gpointer data)
+{
+ struct html_callback_data *cd = data;
+ struct html_node *nd;
+
+ nd = node->data;
+ if (nd) {
+ if (nd->tag == cd->tag) {
+ *cd->res = TRUE;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_has_html_tag (struct rspamd_task * task, GList * args, void *unused)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ struct expression_argument *arg;
+ struct html_tag *tag;
+ gboolean res = FALSE;
+ struct html_callback_data cd;
+
+ if (args == NULL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ arg = get_function_arg (args->data, task, TRUE);
+ tag = get_tag_by_name (arg->data);
+ if (tag == NULL) {
+ msg_warn ("unknown tag type passed as argument: %s",
+ (gchar *)arg->data);
+ return FALSE;
+ }
+
+ cur = g_list_first (task->text_parts);
+ cd.res = &res;
+ cd.tag = tag;
+
+ while (cur && res == FALSE) {
+ p = cur->data;
+ if (!p->is_empty && p->is_html && p->html_nodes) {
+ g_node_traverse (p->html_nodes,
+ G_PRE_ORDER,
+ G_TRAVERSE_ALL,
+ -1,
+ search_html_node_callback,
+ &cd);
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+
+}
+
+gboolean
+rspamd_has_fake_html (struct rspamd_task * task, GList * args, void *unused)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ gboolean res = FALSE;
+
+ cur = g_list_first (task->text_parts);
+
+ while (cur && res == FALSE) {
+ p = cur->data;
+ if (!p->is_empty && p->is_html && p->html_nodes == NULL) {
+ res = TRUE;
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+
+}