]> source.dussan.org Git - rspamd.git/commitdiff
Start removing old stuff.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 19 Mar 2015 15:07:22 +0000 (15:07 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 19 Mar 2015 15:07:22 +0000 (15:07 +0000)
src/libmime/expressions.c [deleted file]
src/libmime/expressions.h [deleted file]
src/libmime/mime_expressions.c [new file with mode: 0644]
src/libmime/mime_expressions.h [new file with mode: 0644]
src/libserver/cfg_file.h
src/lua/lua_task.c

diff --git a/src/libmime/expressions.c b/src/libmime/expressions.c
deleted file mode 100644 (file)
index 547cc0d..0000000
+++ /dev/null
@@ -1,1582 +0,0 @@
-/*
- * Copyright (c) 2009-2012, Vsevolod Stakhov
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in the
- *       documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "util.h"
-#include "cfg_file.h"
-#include "main.h"
-#include "message.h"
-#include "fuzzy.h"
-#include "expressions.h"
-#include "html.h"
-#include "lua/lua_common.h"
-#include "diff.h"
-
-gboolean rspamd_compare_encoding (struct rspamd_task *task,
-       GList * args,
-       void *unused);
-gboolean rspamd_header_exists (struct rspamd_task *task,
-       GList * args,
-       void *unused);
-gboolean rspamd_parts_distance (struct rspamd_task *task,
-       GList * args,
-       void *unused);
-gboolean rspamd_recipients_distance (struct rspamd_task *task,
-       GList * args,
-       void *unused);
-gboolean rspamd_has_only_html_part (struct rspamd_task *task,
-       GList * args,
-       void *unused);
-gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
-       GList * args,
-       void *unused);
-gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
-       GList * args,
-       void *unused);
-gboolean rspamd_is_html_balanced (struct rspamd_task *task,
-       GList * args,
-       void *unused);
-gboolean rspamd_has_html_tag (struct rspamd_task *task,
-       GList * args,
-       void *unused);
-gboolean rspamd_has_fake_html (struct rspamd_task *task,
-       GList * args,
-       void *unused);
-
-/*
- * List of internal functions of rspamd
- * Sorted by name to use bsearch
- */
-static struct _fl {
-       const gchar *name;
-       rspamd_internal_func_t func;
-       void *user_data;
-} rspamd_functions_list[] = {
-       {"compare_encoding", rspamd_compare_encoding, NULL},
-       {"compare_parts_distance", rspamd_parts_distance, NULL},
-       {"compare_recipients_distance", rspamd_recipients_distance, NULL},
-       {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
-       {"has_fake_html", rspamd_has_fake_html, NULL},
-       {"has_html_tag", rspamd_has_html_tag, NULL},
-       {"has_only_html_part", rspamd_has_only_html_part, NULL},
-       {"header_exists", rspamd_header_exists, NULL},
-       {"is_html_balanced", rspamd_is_html_balanced, NULL},
-       {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL}
-};
-
-static struct _fl *list_ptr = &rspamd_functions_list[0];
-static guint32 functions_number = sizeof (rspamd_functions_list) /
-       sizeof (struct _fl);
-static gboolean list_allocated = FALSE;
-
-/* Bsearch routine */
-static gint
-fl_cmp (const void *s1, const void *s2)
-{
-       struct _fl *fl1 = (struct _fl *)s1;
-       struct _fl *fl2 = (struct _fl *)s2;
-       return strcmp (fl1->name, fl2->name);
-}
-
-/* Cache for regular expressions that are used in functions */
-void *
-re_cache_check (const gchar *line, rspamd_mempool_t *pool)
-{
-       GHashTable *re_cache;
-
-       re_cache = rspamd_mempool_get_variable (pool, "re_cache");
-
-       if (re_cache == NULL) {
-               re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
-               rspamd_mempool_set_variable (pool, "re_cache", re_cache,
-                       (rspamd_mempool_destruct_t)g_hash_table_destroy);
-               return NULL;
-       }
-       return g_hash_table_lookup (re_cache, line);
-}
-
-void
-re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool)
-{
-       GHashTable *re_cache;
-
-       re_cache = rspamd_mempool_get_variable (pool, "re_cache");
-
-       if (re_cache == NULL) {
-               re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
-               rspamd_mempool_set_variable (pool, "re_cache", re_cache,
-                       (rspamd_mempool_destruct_t)g_hash_table_destroy);
-       }
-
-       g_hash_table_insert (re_cache, (gpointer)line, pointer);
-}
-
-void
-re_cache_del (const gchar *line, rspamd_mempool_t *pool)
-{
-       GHashTable *re_cache;
-
-       re_cache = rspamd_mempool_get_variable (pool, "re_cache");
-
-       if (re_cache != NULL) {
-               g_hash_table_remove (re_cache, line);
-       }
-
-}
-
-/*
- * Functions for parsing expressions
- */
-struct expression_stack {
-       gchar op;
-       struct expression_stack *next;
-};
-
-/*
- * Push operand or operator to stack
- */
-static struct expression_stack *
-push_expression_stack (rspamd_mempool_t * pool,
-       struct expression_stack *head,
-       gchar op)
-{
-       struct expression_stack *new;
-       new = rspamd_mempool_alloc (pool, sizeof (struct expression_stack));
-       new->op = op;
-       new->next = head;
-       return new;
-}
-
-/*
- * Delete symbol from stack, return pointer to operand or operator (casted to void* )
- */
-static gchar
-delete_expression_stack (struct expression_stack **head)
-{
-       struct expression_stack *cur;
-       gchar res;
-
-       if (*head == NULL)
-               return 0;
-
-       cur = *head;
-       res = cur->op;
-
-       *head = cur->next;
-       return res;
-}
-
-/*
- * Return operation priority
- */
-static gint
-logic_priority (gchar a)
-{
-       switch (a) {
-       case '!':
-               return 3;
-       case '|':
-       case '&':
-               return 2;
-       case '(':
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-/*
- * Return FALSE if symbol is not operation symbol (operand)
- * Return TRUE if symbol is operation symbol
- */
-static gboolean
-is_operation_symbol (gchar *a)
-{
-       switch (*a) {
-       case '!':
-       case '&':
-       case '|':
-       case '(':
-       case ')':
-               return TRUE;
-       case 'O':
-       case 'o':
-               if (g_ascii_strncasecmp (a, "or",
-                       sizeof ("or") - 1) == 0 && g_ascii_isspace (a[2])) {
-                       return TRUE;
-               }
-               break;
-       case 'A':
-       case 'a':
-               if (g_ascii_strncasecmp (a, "and",
-                       sizeof ("and") - 1) == 0 && g_ascii_isspace (a[3])) {
-                       return TRUE;
-               }
-               break;
-       case 'N':
-       case 'n':
-               if (g_ascii_strncasecmp (a, "not",
-                       sizeof ("not") - 1) == 0 && g_ascii_isspace (a[3])) {
-                       return TRUE;
-               }
-               break;
-       }
-
-       return FALSE;
-}
-
-/* Return character representation of operation */
-static gchar
-op_to_char (gchar *a, gchar **next)
-{
-       switch (*a) {
-       case '!':
-       case '&':
-       case '|':
-       case '(':
-       case ')':
-               if ((a[0] == '&' && a[1] == '&') ||
-                               (a[0] == '|' && a[1] == '|')) {
-                       *next = a + 2;
-               }
-               else {
-                       *next = a + 1;
-               }
-               return *a;
-       case 'O':
-       case 'o':
-               if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) {
-                       *next = a + sizeof ("or") - 1;
-                       return '|';
-               }
-               break;
-       case 'A':
-       case 'a':
-               if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) {
-                       *next = a + sizeof ("and") - 1;
-                       return '&';
-               }
-               break;
-       case 'N':
-       case 'n':
-               if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) {
-                       *next = a + sizeof ("not") - 1;
-                       return '!';
-               }
-               break;
-       }
-
-       return '\0';
-}
-
-/*
- * Return TRUE if symbol can be regexp flag
- */
-static gboolean
-is_regexp_flag (gchar a)
-{
-       switch (a) {
-       case 'i':
-       case 'm':
-       case 'x':
-       case 's':
-       case 'u':
-       case 'o':
-       case 'r':
-       case 'H':
-       case 'M':
-       case 'P':
-       case 'U':
-       case 'X':
-       case 'T':
-       case 'S':
-               return TRUE;
-       default:
-               return FALSE;
-       }
-}
-
-static void
-insert_expression (rspamd_mempool_t * pool,
-       struct expression **head,
-       gint type,
-       gchar op,
-       void *operand,
-       const gchar *orig)
-{
-       struct expression *new, *cur;
-
-       new = rspamd_mempool_alloc (pool, sizeof (struct expression));
-       new->type = type;
-       new->orig = orig;
-       if (new->type != EXPR_OPERATION) {
-               new->content.operand = operand;
-       }
-       else {
-               new->content.operation = op;
-       }
-       new->next = NULL;
-
-       if (!*head) {
-               *head = new;
-       }
-       else {
-               cur = *head;
-               while (cur->next) {
-                       cur = cur->next;
-               }
-               cur->next = new;
-       }
-}
-
-static struct expression *
-maybe_parse_expression (rspamd_mempool_t * pool, gchar *line)
-{
-       struct expression *expr;
-       gchar *p = line;
-
-       while (*p) {
-               if (is_operation_symbol (p)) {
-                       return parse_expression (pool, line);
-               }
-               p++;
-       }
-
-       expr = rspamd_mempool_alloc (pool, sizeof (struct expression));
-       expr->type = EXPR_STR;
-       expr->content.operand = rspamd_mempool_strdup (pool, line);
-       expr->next = NULL;
-
-       return expr;
-}
-
-/*
- * Make inverse polish record for specified expression
- * Memory is allocated from given pool
- */
-struct expression *
-parse_expression (rspamd_mempool_t * pool, gchar *line)
-{
-       struct expression *expr = NULL;
-       struct expression_stack *stack = NULL;
-       struct expression_function *func = NULL;
-       struct expression *arg;
-       GQueue *function_stack;
-       gchar *p, *c, *str, op, newop, *copy, *next;
-       gboolean in_regexp = FALSE;
-       gint brackets = 0;
-
-       enum {
-               SKIP_SPACES,
-               READ_OPERATOR,
-               READ_REGEXP,
-               READ_REGEXP_FLAGS,
-               READ_FUNCTION,
-               READ_FUNCTION_ARGUMENT,
-       } state = SKIP_SPACES;
-
-       if (line == NULL || pool == NULL) {
-               return NULL;
-       }
-
-       msg_debug ("parsing expression {{ %s }}", line);
-
-       function_stack = g_queue_new ();
-       copy = rspamd_mempool_strdup (pool, line);
-       p = line;
-       c = p;
-       while (*p) {
-               switch (state) {
-               case SKIP_SPACES:
-                       if (!g_ascii_isspace (*p)) {
-                               if (is_operation_symbol (p)) {
-                                       state = READ_OPERATOR;
-                               }
-                               else if (*p == '/') {
-                                       c = ++p;
-                                       state = READ_REGEXP;
-                               }
-                               else {
-                                       c = p;
-                                       state = READ_FUNCTION;
-                               }
-                       }
-                       else {
-                               p++;
-                       }
-                       break;
-               case READ_OPERATOR:
-                       if (*p == ')') {
-                               if (stack == NULL) {
-                                       return NULL;
-                               }
-                               /* Pop all operators from stack to nearest '(' or to head */
-                               while (stack && stack->op != '(') {
-                                       op = delete_expression_stack (&stack);
-                                       if (op != '(') {
-                                               insert_expression (pool,
-                                                       &expr,
-                                                       EXPR_OPERATION,
-                                                       op,
-                                                       NULL,
-                                                       copy);
-                                       }
-                               }
-                               if (stack) {
-                                       /* Remove open brace itself */
-                                       delete_expression_stack (&stack);
-                               }
-                       }
-                       else if (*p == '(') {
-                               /* Push it to stack */
-                               stack = push_expression_stack (pool, stack, *p);
-                       }
-                       else {
-                               if (stack == NULL) {
-                                       newop = op_to_char (p, &next);
-                                       if (newop != '\0') {
-                                               stack = push_expression_stack (pool, stack, newop);
-                                               p = next;
-                                               state = SKIP_SPACES;
-                                               continue;
-                                       }
-                               }
-                               /* Check priority of logic operation */
-                               else {
-                                       newop = op_to_char (p, &next);
-                                       if (newop != '\0') {
-                                               if (logic_priority (stack->op) <
-                                                       logic_priority (newop)) {
-                                                       stack = push_expression_stack (pool, stack, newop);
-                                               }
-                                               else {
-                                                       /* Pop all operations that have higher priority than this one */
-                                                       while ((stack != NULL) &&
-                                                               (logic_priority (stack->op) >=
-                                                               logic_priority (newop))) {
-                                                               op = delete_expression_stack (&stack);
-                                                               if (op != '(') {
-                                                                       insert_expression (pool,
-                                                                               &expr,
-                                                                               EXPR_OPERATION,
-                                                                               op,
-                                                                               NULL,
-                                                                               copy);
-                                                               }
-                                                       }
-                                                       stack = push_expression_stack (pool, stack, newop);
-                                               }
-                                       }
-                                       p = next;
-                                       state = SKIP_SPACES;
-                                       continue;
-                               }
-                       }
-                       p++;
-                       state = SKIP_SPACES;
-                       break;
-
-               case READ_REGEXP:
-                       if (*p == '/' && *(p - 1) != '\\') {
-                               if (*(p + 1)) {
-                                       p++;
-                               }
-                               state = READ_REGEXP_FLAGS;
-                       }
-                       else {
-                               p++;
-                       }
-                       break;
-
-               case READ_REGEXP_FLAGS:
-                       if (!is_regexp_flag (*p) || *(p + 1) == '\0') {
-                               if (c != p) {
-                                       if ((is_regexp_flag (*p) || *p ==
-                                               '/') && *(p + 1) == '\0') {
-                                               p++;
-                                       }
-                                       str = rspamd_mempool_alloc (pool, p - c + 2);
-                                       rspamd_strlcpy (str, c - 1, (p - c + 2));
-                                       g_strstrip (str);
-                                       msg_debug ("found regexp: %s", str);
-                                       if (strlen (str) > 0) {
-                                               insert_expression (pool,
-                                                       &expr,
-                                                       EXPR_REGEXP,
-                                                       0,
-                                                       str,
-                                                       copy);
-                                       }
-                               }
-                               c = p;
-                               state = SKIP_SPACES;
-                       }
-                       else {
-                               p++;
-                       }
-                       break;
-
-               case READ_FUNCTION:
-                       if (*p == '/') {
-                               /* In fact it is regexp */
-                               state = READ_REGEXP;
-                               c++;
-                               p++;
-                       }
-                       else if (*p == '(') {
-                               func =
-                                       rspamd_mempool_alloc (pool,
-                                               sizeof (struct expression_function));
-                               func->name = rspamd_mempool_alloc (pool, p - c + 1);
-                               func->args = NULL;
-                               rspamd_strlcpy (func->name, c, (p - c + 1));
-                               g_strstrip (func->name);
-                               state = READ_FUNCTION_ARGUMENT;
-                               g_queue_push_tail (function_stack, func);
-                               insert_expression (pool, &expr, EXPR_FUNCTION, 0, func, copy);
-                               c = ++p;
-                       }
-                       else if (is_operation_symbol (p)) {
-                               /* In fact it is not function, but symbol */
-                               if (c != p) {
-                                       str = rspamd_mempool_alloc (pool, p - c + 1);
-                                       rspamd_strlcpy (str, c, (p - c + 1));
-                                       g_strstrip (str);
-                                       if (strlen (str) > 0) {
-                                               insert_expression (pool, &expr, EXPR_STR, 0, str, copy);
-                                       }
-                               }
-                               state = READ_OPERATOR;
-                       }
-                       else if (*(p + 1) == '\0') {
-                               /* In fact it is not function, but symbol */
-                               p++;
-                               if (c != p) {
-                                       str = rspamd_mempool_alloc (pool, p - c + 1);
-                                       rspamd_strlcpy (str, c, (p - c + 1));
-                                       g_strstrip (str);
-                                       if (strlen (str) > 0) {
-                                               insert_expression (pool, &expr, EXPR_STR, 0, str, copy);
-                                       }
-                               }
-                               state = SKIP_SPACES;
-                       }
-                       else {
-                               p++;
-                       }
-                       break;
-
-               case READ_FUNCTION_ARGUMENT:
-                       if (*p == '/' && !in_regexp) {
-                               in_regexp = TRUE;
-                               p++;
-                       }
-                       if (!in_regexp) {
-                               /* Append argument to list */
-                               if (*p == ',' || (*p == ')' && brackets == 0)) {
-                                       arg = NULL;
-                                       str = rspamd_mempool_alloc (pool, p - c + 1);
-                                       rspamd_strlcpy (str, c, (p - c + 1));
-                                       g_strstrip (str);
-                                       /* Recursive call */
-                                       arg = maybe_parse_expression (pool, str);
-                                       func->args = g_list_append (func->args, arg);
-                                       /* Pop function */
-                                       if (*p == ')') {
-                                               /* Last function in chain, goto skipping spaces state */
-                                               func = g_queue_pop_tail (function_stack);
-                                               if (g_queue_get_length (function_stack) == 0) {
-                                                       state = SKIP_SPACES;
-                                               }
-                                       }
-                                       c = p + 1;
-                               }
-                               else if (*p == '(') {
-                                       brackets++;
-                               }
-                               else if (*p == ')') {
-                                       brackets--;
-                               }
-                       }
-                       else if (*p == '/' && *(p - 1) != '\\') {
-                               in_regexp = FALSE;
-                       }
-                       p++;
-                       break;
-               }
-       }
-
-       g_queue_free (function_stack);
-       if (state != SKIP_SPACES) {
-               /* In fact we got bad expression */
-               msg_warn ("expression \"%s\" is invalid", line);
-               return NULL;
-       }
-       /* Pop everything from stack */
-       while (stack != NULL) {
-               op = delete_expression_stack (&stack);
-               if (op != '(') {
-                       insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy);
-               }
-       }
-
-       return expr;
-}
-
-/*
- * Rspamd regexp utility functions
- */
-struct rspamd_regexp_element *
-parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
-{
-       const gchar *begin, *end, *p, *src, *start;
-       gchar *dbegin, *dend;
-       struct rspamd_regexp_element *result;
-       rspamd_regexp_t *re;
-       GError *err = NULL;
-       GString *re_flags;
-
-       if (line == NULL) {
-               msg_err ("cannot parse NULL line");
-               return NULL;
-       }
-
-       if ((re = rspamd_regexp_cache_query (NULL, line, NULL)) != NULL) {
-               return ((struct rspamd_regexp_element *)rspamd_regexp_get_ud (re));
-       }
-
-       src = line;
-       result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_element));
-       /* Skip whitespaces */
-       while (g_ascii_isspace (*line)) {
-               line++;
-       }
-       if (*line == '\0') {
-               msg_warn ("got empty regexp");
-               return NULL;
-       }
-       start = line;
-       /* First try to find header name */
-       begin = strchr (line, '/');
-       if (begin != NULL) {
-               p = begin;
-               end = NULL;
-               while (p != line) {
-                       if (*p == '=') {
-                               end = p;
-                               break;
-                       }
-                       p--;
-               }
-               if (end) {
-                       result->header = rspamd_mempool_alloc (pool, end - line + 1);
-                       rspamd_strlcpy (result->header, line, end - line + 1);
-                       result->type = REGEXP_HEADER;
-                       line = end;
-               }
-       }
-       else {
-               result->header = rspamd_mempool_strdup (pool, line);
-               result->type = REGEXP_HEADER;
-               line = start;
-       }
-       /* Find begin of regexp */
-       while (*line && *line != '/') {
-               line++;
-       }
-       if (*line != '\0') {
-               begin = line + 1;
-       }
-       else if (result->header == NULL) {
-               /* Assume that line without // is just a header name */
-               result->header = rspamd_mempool_strdup (pool, line);
-               result->type = REGEXP_HEADER;
-               return result;
-       }
-       else {
-               /* We got header name earlier but have not found // expression, so it is invalid regexp */
-               msg_warn (
-                       "got no header name (eg. header=) but without corresponding regexp, %s",
-                       src);
-               return NULL;
-       }
-       /* Find end */
-       end = begin;
-       while (*end && (*end != '/' || *(end - 1) == '\\')) {
-               end++;
-       }
-       if (end == begin || *end != '/') {
-               msg_warn ("no trailing / in regexp %s", src);
-               return NULL;
-       }
-       /* Parse flags */
-       p = end + 1;
-       re_flags = g_string_sized_new (32);
-       while (p != NULL) {
-               switch (*p) {
-               case 'i':
-               case 'm':
-               case 's':
-               case 'x':
-               case 'u':
-               case 'O':
-               case 'r':
-                       g_string_append_c (re_flags, *p);
-                       p++;
-                       break;
-               case 'o':
-                       p++;
-                       break;
-               /* Type flags */
-               case 'H':
-                       if (result->type == REGEXP_NONE) {
-                               result->type = REGEXP_HEADER;
-                       }
-                       p++;
-                       break;
-               case 'M':
-                       if (result->type == REGEXP_NONE) {
-                               result->type = REGEXP_MESSAGE;
-                       }
-                       p++;
-                       break;
-               case 'P':
-                       if (result->type == REGEXP_NONE) {
-                               result->type = REGEXP_MIME;
-                       }
-                       p++;
-                       break;
-               case 'U':
-                       if (result->type == REGEXP_NONE) {
-                               result->type = REGEXP_URL;
-                       }
-                       p++;
-                       break;
-               case 'X':
-                       if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) {
-                               result->type = REGEXP_RAW_HEADER;
-                       }
-                       p++;
-                       break;
-               case 'T':
-                       result->is_test = TRUE;
-                       p++;
-                       break;
-               case 'S':
-                       result->is_strong = TRUE;
-                       p++;
-                       break;
-               /* Stop flags parsing */
-               default:
-                       p = NULL;
-                       break;
-               }
-       }
-
-       result->regexp_text = rspamd_mempool_strdup (pool, start);
-       dbegin = result->regexp_text + (begin - start);
-       dend = result->regexp_text + (end - start);
-       *dend = '\0';
-
-       if (raw_mode) {
-               g_string_append_c (re_flags, 'r');
-       }
-
-       result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
-                       &err);
-
-       g_string_free (re_flags, TRUE);
-
-       if (result->regexp == NULL || err != NULL) {
-               msg_warn ("could not read regexp: %s while reading regexp %s",
-                               err ? err->message : "unknown error",
-                                               src);
-               return NULL;
-       }
-
-       rspamd_mempool_add_destructor (pool,
-               (rspamd_mempool_destruct_t) rspamd_regexp_unref,
-               (void *)result->regexp);
-
-       rspamd_regexp_set_ud (result->regexp, result);
-
-       rspamd_regexp_cache_insert (NULL, line, NULL, result->regexp);
-
-       *dend = '/';
-
-       return result;
-}
-
-gboolean
-call_expression_function (struct expression_function * func,
-       struct rspamd_task * task,
-       lua_State *L)
-{
-       struct _fl *selected, key;
-
-       key.name = func->name;
-
-       selected = bsearch (&key,
-                       list_ptr,
-                       functions_number,
-                       sizeof (struct _fl),
-                       fl_cmp);
-       if (selected == NULL) {
-               /* Try to check lua function */
-               return FALSE;
-       }
-
-       return selected->func (task, func->args, selected->user_data);
-}
-
-struct expression_argument *
-get_function_arg (struct expression *expr,
-       struct rspamd_task *task,
-       gboolean want_string)
-{
-       GQueue *stack;
-       gsize cur, op1, op2;
-       struct expression_argument *res;
-       struct expression *it;
-
-       if (expr == NULL) {
-               msg_warn ("NULL expression passed");
-               return NULL;
-       }
-       if (expr->next == NULL) {
-               res =
-                       rspamd_mempool_alloc (task->task_pool,
-                               sizeof (struct expression_argument));
-               if (expr->type == EXPR_REGEXP || expr->type == EXPR_STR || expr->type ==
-                       EXPR_REGEXP_PARSED) {
-                       res->type = EXPRESSION_ARGUMENT_NORMAL;
-                       res->data = expr->content.operand;
-               }
-               else if (expr->type == EXPR_FUNCTION && !want_string) {
-                       res->type = EXPRESSION_ARGUMENT_BOOL;
-                       cur = call_expression_function (expr->content.operand, task, NULL);
-                       res->data = GSIZE_TO_POINTER (cur);
-               }
-               else {
-                       msg_warn (
-                               "cannot parse argument: it contains operator or bool expression that is not wanted");
-                       return NULL;
-               }
-               return res;
-       }
-       else if (!want_string) {
-               res =
-                       rspamd_mempool_alloc (task->task_pool,
-                               sizeof (struct expression_argument));
-               res->type = EXPRESSION_ARGUMENT_BOOL;
-               stack = g_queue_new ();
-               it = expr;
-
-               while (it) {
-                       if (it->type == EXPR_REGEXP || it->type == EXPR_REGEXP_PARSED ||
-                               it->type == EXPR_STR) {
-                               g_queue_free (stack);
-                               res->type = EXPRESSION_ARGUMENT_EXPR;
-                               res->data = expr;
-                               return res;
-                       }
-                       else if (it->type == EXPR_FUNCTION) {
-                               cur =
-                                       (gsize) call_expression_function ((struct
-                                               expression_function
-                                               *)it->content.operand, task, NULL);
-                               debug_task ("function %s returned %s",
-                                       ((struct expression_function *)it->content.operand)->name,
-                                       cur ? "true" : "false");
-                       }
-                       else if (it->type == EXPR_OPERATION) {
-                               if (g_queue_is_empty (stack)) {
-                                       /* Queue has no operands for operation, exiting */
-                                       debug_task ("invalid expression");
-                                       g_queue_free (stack);
-                                       return NULL;
-                               }
-                               switch (it->content.operation) {
-                               case '!':
-                                       op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
-                                       op1 = !op1;
-                                       g_queue_push_head (stack, GSIZE_TO_POINTER (op1));
-                                       break;
-                               case '&':
-                                       op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
-                                       op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
-                                       g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2));
-                                       break;
-                               case '|':
-                                       op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
-                                       op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
-                                       g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2));
-                                       break;
-                               default:
-                                       it = it->next;
-                                       continue;
-                               }
-                       }
-                       if (it) {
-                               it = it->next;
-                       }
-               }
-               if (!g_queue_is_empty (stack)) {
-                       res->data = g_queue_pop_head (stack);
-               }
-               else {
-                       res->data = GSIZE_TO_POINTER (FALSE);
-               }
-
-               return res;
-       }
-
-       msg_warn ("invalid expression argument");
-
-       return NULL;
-}
-
-void
-register_expression_function (const gchar *name,
-       rspamd_internal_func_t func,
-       void *user_data)
-{
-       static struct _fl *new;
-
-       functions_number++;
-
-       new = g_new (struct _fl, functions_number);
-       memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
-       if (list_allocated) {
-               g_free (list_ptr);
-       }
-
-       list_allocated = TRUE;
-       new[functions_number - 1].name = name;
-       new[functions_number - 1].func = func;
-       new[functions_number - 1].user_data = user_data;
-       qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
-       list_ptr = new;
-}
-
-gboolean
-rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused)
-{
-       struct expression_argument *arg;
-
-       if (args == NULL || task == NULL) {
-               return FALSE;
-       }
-
-       arg = get_function_arg (args->data, task, TRUE);
-       if (arg->type == EXPRESSION_ARGUMENT_BOOL) {
-               msg_warn ("invalid argument to function is passed");
-               return FALSE;
-       }
-
-       /* XXX: really write this function */
-       return TRUE;
-}
-
-gboolean
-rspamd_header_exists (struct rspamd_task * task, GList * args, void *unused)
-{
-       struct expression_argument *arg;
-       GList *headerlist;
-
-       if (args == NULL || task == NULL) {
-               return FALSE;
-       }
-
-       arg = get_function_arg (args->data, task, TRUE);
-       if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) {
-               msg_warn ("invalid argument to function is passed");
-               return FALSE;
-       }
-
-       debug_task ("try to get header %s", (gchar *)arg->data);
-       headerlist = message_get_header (task,
-                       (gchar *)arg->data,
-                       FALSE);
-       if (headerlist) {
-               return TRUE;
-       }
-       return FALSE;
-}
-
-/*
- * This function is designed to find difference between text/html and text/plain parts
- * It takes one argument: difference threshold, if we have two text parts, compare
- * its hashes and check for threshold, if value is greater than threshold, return TRUE
- * and return FALSE otherwise.
- */
-gboolean
-rspamd_parts_distance (struct rspamd_task * task, GList * args, void *unused)
-{
-       gint threshold, threshold2 = -1, diff;
-       struct mime_text_part *p1, *p2;
-       GList *cur;
-       struct expression_argument *arg;
-       GMimeObject *parent;
-       const GMimeContentType *ct;
-       gint *pdiff;
-
-       if (args == NULL) {
-               debug_task ("no threshold is specified, assume it 100");
-               threshold = 100;
-       }
-       else {
-               errno = 0;
-               arg = get_function_arg (args->data, task, TRUE);
-               threshold = strtoul ((gchar *)arg->data, NULL, 10);
-               if (errno != 0) {
-                       msg_info ("bad numeric value for threshold \"%s\", assume it 100",
-                               (gchar *)args->data);
-                       threshold = 100;
-               }
-               if (args->next) {
-                       arg = get_function_arg (args->next->data, task, TRUE);
-                       errno = 0;
-                       threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
-                       if (errno != 0) {
-                               msg_info ("bad numeric value for threshold \"%s\", ignore it",
-                                       (gchar *)arg->data);
-                               threshold2 = -1;
-                       }
-               }
-       }
-
-       if ((pdiff =
-               rspamd_mempool_get_variable (task->task_pool,
-               "parts_distance")) != NULL) {
-               diff = *pdiff;
-               if (diff != -1) {
-                       if (threshold2 > 0) {
-                               if (diff >=
-                                       MIN (threshold,
-                                       threshold2) && diff < MAX (threshold, threshold2)) {
-                                       return TRUE;
-                               }
-                       }
-                       else {
-                               if (diff <= threshold) {
-                                       return TRUE;
-                               }
-                       }
-                       return FALSE;
-               }
-               else {
-                       return FALSE;
-               }
-       }
-
-       if (g_list_length (task->text_parts) == 2) {
-               cur = g_list_first (task->text_parts);
-               p1 = cur->data;
-               cur = g_list_next (cur);
-               pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint));
-               *pdiff = -1;
-
-               if (cur == NULL) {
-                       msg_info ("bad parts list");
-                       return FALSE;
-               }
-               p2 = cur->data;
-               /* First of all check parent object */
-               if (p1->parent && p1->parent == p2->parent) {
-                       parent = p1->parent;
-                       ct = g_mime_object_get_content_type (parent);
-#ifndef GMIME24
-                       if (ct == NULL ||
-                               !g_mime_content_type_is_type (ct, "multipart", "alternative")) {
-#else
-                       if (ct == NULL ||
-                               !g_mime_content_type_is_type ((GMimeContentType *)ct,
-                               "multipart", "alternative")) {
-#endif
-                               debug_task (
-                                       "two parts are not belong to multipart/alternative container, skip check");
-                               rspamd_mempool_set_variable (task->task_pool,
-                                       "parts_distance",
-                                       pdiff,
-                                       NULL);
-                               return FALSE;
-                       }
-               }
-               else {
-                       debug_task (
-                               "message contains two parts but they are in different multi-parts");
-                       rspamd_mempool_set_variable (task->task_pool,
-                               "parts_distance",
-                               pdiff,
-                               NULL);
-                       return FALSE;
-               }
-               if (!p1->is_empty && !p2->is_empty) {
-                       if (p1->diff_str != NULL && p2->diff_str != NULL) {
-                               diff = rspamd_diff_distance_normalized (p1->diff_str,
-                                               p2->diff_str);
-                       }
-                       else {
-                               diff = rspamd_fuzzy_compare_parts (p1, p2);
-                       }
-                       debug_task (
-                               "got likeliness between parts of %d%%, threshold is %d%%",
-                               diff,
-                               threshold);
-                       *pdiff = diff;
-                       rspamd_mempool_set_variable (task->task_pool,
-                               "parts_distance",
-                               pdiff,
-                               NULL);
-                       if (threshold2 > 0) {
-                               if (diff >=
-                                       MIN (threshold,
-                                       threshold2) && diff < MAX (threshold, threshold2)) {
-                                       return TRUE;
-                               }
-                       }
-                       else {
-                               if (diff <= threshold) {
-                                       return TRUE;
-                               }
-                       }
-               }
-               else if ((p1->is_empty &&
-                       !p2->is_empty) || (!p1->is_empty && p2->is_empty)) {
-                       /* Empty and non empty parts are different */
-                       *pdiff = 0;
-                       rspamd_mempool_set_variable (task->task_pool,
-                               "parts_distance",
-                               pdiff,
-                               NULL);
-                       return TRUE;
-               }
-       }
-       else {
-               debug_task (
-                       "message has too many text parts, so do not try to compare them with each other");
-               rspamd_mempool_set_variable (task->task_pool,
-                       "parts_distance",
-                       pdiff,
-                       NULL);
-               return FALSE;
-       }
-
-       rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff,
-               NULL);
-       return FALSE;
-}
-
-struct addr_list {
-       const gchar *name;
-       const gchar *addr;
-};
-
-#define COMPARE_RCPT_LEN 3
-#define MIN_RCPT_TO_COMPARE 7
-
-gboolean
-rspamd_recipients_distance (struct rspamd_task *task, GList * args,
-       void *unused)
-{
-       struct expression_argument *arg;
-       InternetAddressList *cur;
-       double threshold;
-       struct addr_list *ar;
-       gchar *c;
-       gint num, i, j, hits = 0, total = 0;
-
-       if (args == NULL) {
-               msg_warn ("no parameters to function");
-               return FALSE;
-       }
-
-       arg = get_function_arg (args->data, task, TRUE);
-       errno = 0;
-       threshold = strtod ((gchar *)arg->data, NULL);
-       if (errno != 0) {
-               msg_warn ("invalid numeric value '%s': %s",
-                       (gchar *)arg->data,
-                       strerror (errno));
-               return FALSE;
-       }
-
-       if (!task->rcpt_mime) {
-               return FALSE;
-       }
-       num = internet_address_list_length (task->rcpt_mime);
-       if (num < MIN_RCPT_TO_COMPARE) {
-               return FALSE;
-       }
-       ar =
-               rspamd_mempool_alloc0 (task->task_pool, num *
-                       sizeof (struct addr_list));
-
-       /* Fill array */
-       cur = task->rcpt_mime;
-#ifdef GMIME24
-       for (i = 0; i < num; i++) {
-               InternetAddress *iaelt =
-                       internet_address_list_get_address(cur, i);
-               InternetAddressMailbox *iamb =
-                       INTERNET_ADDRESS_IS_MAILBOX(iaelt) ?
-                       INTERNET_ADDRESS_MAILBOX (iaelt) : NULL;
-               if (iamb) {
-                       ar[i].name = internet_address_mailbox_get_addr (iamb);
-                       if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
-                               ar[i].addr = c + 1;
-                       }
-               }
-       }
-#else
-       InternetAddress *addr;
-       i = 0;
-       while (cur) {
-               addr = internet_address_list_get_address (cur);
-               if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
-                       ar[i].name = rspamd_mempool_strdup (task->task_pool,
-                                       internet_address_get_addr (addr));
-                       if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
-                               *c = '\0';
-                               ar[i].addr = c + 1;
-                       }
-                       cur = internet_address_list_next (cur);
-                       i++;
-               }
-               else {
-                       cur = internet_address_list_next (cur);
-               }
-       }
-#endif
-
-       /* Cycle all elements in array */
-       for (i = 0; i < num; i++) {
-               for (j = i + 1; j < num; j++) {
-                       if (ar[i].name && ar[j].name &&
-                               g_ascii_strncasecmp (ar[i].name, ar[j].name,
-                               COMPARE_RCPT_LEN) == 0) {
-                               /* Common name part */
-                               hits++;
-                       }
-                       else if (ar[i].addr && ar[j].addr &&
-                               g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) {
-                               /* Common address part, but different name */
-                               hits++;
-                       }
-                       total++;
-               }
-       }
-
-       if ((double)(hits * num / 2.) / (double)total >= threshold) {
-               return TRUE;
-       }
-
-       return FALSE;
-}
-
-gboolean
-rspamd_has_only_html_part (struct rspamd_task * task, GList * args,
-       void *unused)
-{
-       struct mime_text_part *p;
-       GList *cur;
-       gboolean res = FALSE;
-
-       cur = g_list_first (task->text_parts);
-       while (cur) {
-               p = cur->data;
-               if (p->is_html) {
-                       res = TRUE;
-               }
-               else {
-                       res = FALSE;
-                       break;
-               }
-               cur = g_list_next (cur);
-       }
-
-       return res;
-}
-
-static gboolean
-is_recipient_list_sorted (const InternetAddressList * ia)
-{
-       const InternetAddressList *cur;
-       InternetAddress *addr;
-       gboolean res = TRUE;
-       struct addr_list current = { NULL, NULL }, previous = {
-               NULL, NULL
-       };
-#ifdef GMIME24
-       gint num, i;
-#endif
-
-       /* Do not check to short address lists */
-       if (internet_address_list_length ((InternetAddressList *)ia) <
-               MIN_RCPT_TO_COMPARE) {
-               return FALSE;
-       }
-#ifdef GMIME24
-       num = internet_address_list_length ((InternetAddressList *)ia);
-       cur = ia;
-       for (i = 0; i < num; i++) {
-               addr =
-                       internet_address_list_get_address ((InternetAddressList *)cur, i);
-               current.addr = (gchar *)internet_address_get_name (addr);
-               if (previous.addr != NULL) {
-                       if (current.addr &&
-                               g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
-                               res = FALSE;
-                               break;
-                       }
-               }
-               previous.addr = current.addr;
-       }
-#else
-       cur = ia;
-       while (cur) {
-               addr = internet_address_list_get_address (cur);
-               if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
-                       current.addr = internet_address_get_addr (addr);
-                       if (previous.addr != NULL) {
-                               if (current.addr &&
-                                       g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
-                                       res = FALSE;
-                                       break;
-                               }
-                       }
-                       previous.addr = current.addr;
-               }
-               cur = internet_address_list_next (cur);
-       }
-#endif
-
-       return res;
-}
-
-gboolean
-rspamd_is_recipients_sorted (struct rspamd_task * task,
-       GList * args,
-       void *unused)
-{
-       /* Check all types of addresses */
-       if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
-               GMIME_RECIPIENT_TYPE_TO)) == TRUE) {
-               return TRUE;
-       }
-       if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
-               GMIME_RECIPIENT_TYPE_BCC)) == TRUE) {
-               return TRUE;
-       }
-       if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
-               GMIME_RECIPIENT_TYPE_CC)) == TRUE) {
-               return TRUE;
-       }
-
-       return FALSE;
-}
-
-gboolean
-rspamd_compare_transfer_encoding (struct rspamd_task * task,
-       GList * args,
-       void *unused)
-{
-       GMimeObject *part;
-#ifndef GMIME24
-       GMimePartEncodingType enc_req, part_enc;
-#else
-       GMimeContentEncoding enc_req, part_enc;
-#endif
-       struct expression_argument *arg;
-
-       if (args == NULL) {
-               msg_warn ("no parameters to function");
-               return FALSE;
-       }
-
-       arg = get_function_arg (args->data, task, TRUE);
-#ifndef GMIME24
-       enc_req = g_mime_part_encoding_from_string (arg->data);
-       if (enc_req == GMIME_PART_ENCODING_DEFAULT) {
-#else
-       enc_req = g_mime_content_encoding_from_string (arg->data);
-       if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) {
-#endif
-               msg_warn ("bad encoding type: %s", (gchar *)arg->data);
-               return FALSE;
-       }
-
-       part = g_mime_message_get_mime_part (task->message);
-       if (part) {
-               if (GMIME_IS_PART (part)) {
-#ifndef GMIME24
-                       part_enc = g_mime_part_get_encoding (GMIME_PART (part));
-                       if (part_enc == GMIME_PART_ENCODING_DEFAULT) {
-                               /* Assume 7bit as default transfer encoding */
-                               part_enc = GMIME_PART_ENCODING_7BIT;
-                       }
-#else
-                       part_enc = g_mime_part_get_content_encoding (GMIME_PART (part));
-                       if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) {
-                               /* Assume 7bit as default transfer encoding */
-                               part_enc = GMIME_CONTENT_ENCODING_7BIT;
-                       }
-#endif
-
-
-                       debug_task ("got encoding in part: %d and compare with %d",
-                               (gint)part_enc,
-                               (gint)enc_req);
-#ifndef GMIME24
-                       g_object_unref (part);
-#endif
-
-                       return part_enc == enc_req;
-               }
-#ifndef GMIME24
-               g_object_unref (part);
-#endif
-       }
-
-       return FALSE;
-}
-
-gboolean
-rspamd_is_html_balanced (struct rspamd_task * task, GList * args, void *unused)
-{
-       struct mime_text_part *p;
-       GList *cur;
-       gboolean res = TRUE;
-
-       cur = g_list_first (task->text_parts);
-       while (cur) {
-               p = cur->data;
-               if (!p->is_empty && p->is_html) {
-                       if (p->is_balanced) {
-                               res = TRUE;
-                       }
-                       else {
-                               res = FALSE;
-                               break;
-                       }
-               }
-               cur = g_list_next (cur);
-       }
-
-       return res;
-
-}
-
-struct html_callback_data {
-       struct html_tag *tag;
-       gboolean *res;
-};
-
-static gboolean
-search_html_node_callback (GNode * node, gpointer data)
-{
-       struct html_callback_data *cd = data;
-       struct html_node *nd;
-
-       nd = node->data;
-       if (nd) {
-               if (nd->tag == cd->tag) {
-                       *cd->res = TRUE;
-                       return TRUE;
-               }
-       }
-
-       return FALSE;
-}
-
-gboolean
-rspamd_has_html_tag (struct rspamd_task * task, GList * args, void *unused)
-{
-       struct mime_text_part *p;
-       GList *cur;
-       struct expression_argument *arg;
-       struct html_tag *tag;
-       gboolean res = FALSE;
-       struct html_callback_data cd;
-
-       if (args == NULL) {
-               msg_warn ("no parameters to function");
-               return FALSE;
-       }
-
-       arg = get_function_arg (args->data, task, TRUE);
-       tag = get_tag_by_name (arg->data);
-       if (tag == NULL) {
-               msg_warn ("unknown tag type passed as argument: %s",
-                       (gchar *)arg->data);
-               return FALSE;
-       }
-
-       cur = g_list_first (task->text_parts);
-       cd.res = &res;
-       cd.tag = tag;
-
-       while (cur && res == FALSE) {
-               p = cur->data;
-               if (!p->is_empty && p->is_html && p->html_nodes) {
-                       g_node_traverse (p->html_nodes,
-                               G_PRE_ORDER,
-                               G_TRAVERSE_ALL,
-                               -1,
-                               search_html_node_callback,
-                               &cd);
-               }
-               cur = g_list_next (cur);
-       }
-
-       return res;
-
-}
-
-gboolean
-rspamd_has_fake_html (struct rspamd_task * task, GList * args, void *unused)
-{
-       struct mime_text_part *p;
-       GList *cur;
-       gboolean res = FALSE;
-
-       cur = g_list_first (task->text_parts);
-
-       while (cur && res == FALSE) {
-               p = cur->data;
-               if (!p->is_empty && p->is_html && p->html_nodes == NULL) {
-                       res = TRUE;
-               }
-               cur = g_list_next (cur);
-       }
-
-       return res;
-
-}
-
-
-/*
- * vi:ts=4
- */
diff --git a/src/libmime/expressions.h b/src/libmime/expressions.h
deleted file mode 100644 (file)
index 469cc69..0000000
+++ /dev/null
@@ -1,144 +0,0 @@
-/**
- * @file expressions.h
- * Rspamd expressions API
- */
-
-#ifndef RSPAMD_EXPRESSIONS_H
-#define RSPAMD_EXPRESSIONS_H
-
-#include "config.h"
-#include <lua.h>
-
-struct rspamd_task;
-struct rspamd_regexp_element;
-
-/**
- * Rspamd expression function
- */
-struct expression_function {
-       gchar *name;                                                    /**< name of function                                                           */
-       GList *args;                                                /**< its args                                                                               */
-};
-
-/**
- * Function's argument
- */
-struct expression_argument {
-       enum {
-               EXPRESSION_ARGUMENT_NORMAL,
-               EXPRESSION_ARGUMENT_BOOL,
-               EXPRESSION_ARGUMENT_EXPR,
-       } type;                                                     /**< type of argument (text or other function)              */
-       void *data;                                                 /**< pointer to its data                                                    */
-};
-
-/**
- * Logic expression
- */
-struct expression {
-       enum {
-               EXPR_REGEXP,
-               EXPR_OPERATION,
-               EXPR_FUNCTION,
-               EXPR_STR,
-               EXPR_REGEXP_PARSED,
-       } type;                                                     /**< expression type                                                                */
-       union {
-               void *operand;
-               gchar operation;
-       } content;                                                  /**< union for storing operand or operation code    */
-       const gchar *orig;                                          /**< original line                                                                  */
-       struct expression *next;                                    /**< chain link                                                                             */
-};
-
-typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *, GList *args,
-       void *user_data);
-
-/**
- * Parse regexp line to regexp structure
- * @param pool memory pool to use
- * @param line incoming line
- * @return regexp structure or NULL in case of error
- */
-struct rspamd_regexp_element * parse_regexp (rspamd_mempool_t *pool,
-       const gchar *line,
-       gboolean raw_mode);
-
-/**
- * Parse composites line to composites structure (eg. "SYMBOL1&SYMBOL2|!SYMBOL3")
- * @param pool memory pool to use
- * @param line incoming line
- * @return expression structure or NULL in case of error
- */
-struct expression * parse_expression (rspamd_mempool_t *pool, gchar *line);
-
-/**
- * Call specified fucntion and return boolean result
- * @param func function to call
- * @param task task object
- * @param L lua specific state
- * @return TRUE or FALSE depending on function result
- */
-gboolean call_expression_function (struct expression_function *func,
-       struct rspamd_task *task,
-       lua_State *L);
-
-/**
- * Register specified function to rspamd internal functions list
- * @param name name of function
- * @param func pointer to function
- */
-void register_expression_function (const gchar *name,
-       rspamd_internal_func_t func,
-       void *user_data);
-
-/**
- * Add regexp to regexp cache
- * @param line symbolic representation
- * @param pointer regexp data
- */
-void re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool);
-
-/**
- * Check regexp in cache
- * @param line symbolic representation
- * @return pointer to regexp data or NULL if regexp is not found
- */
-void * re_cache_check (const gchar *line, rspamd_mempool_t *pool);
-
-/**
- * Remove regexp from regexp cache
- * @param line symbolic representation
- */
-void re_cache_del (const gchar *line, rspamd_mempool_t *pool);
-
-/**
- * Add regexp to regexp task cache
- * @param task task object
- * @param pointer regexp data
- * @param result numeric result of this regexp
- */
-void task_cache_add (struct rspamd_task *task,
-       struct rspamd_regexp_element *re,
-       gint32 result);
-
-/**
- * Check regexp in cache
- * @param task task object
- * @param pointer regexp data
- * @return numeric result if value exists or -1 if not
- */
-gint32 task_cache_check (struct rspamd_task *task, struct rspamd_regexp_element *re);
-
-/**
- * Parse and return a single function argument for a function (may recurse)
- * @param expr expression structure that represents function's argument
- * @param task task object
- * @param want_string return NULL if argument is not a string
- * @return expression argument structure or NULL if failed
- */
-struct expression_argument * get_function_arg (struct expression *expr,
-       struct rspamd_task *task,
-       gboolean want_string);
-
-#endif
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
new file mode 100644 (file)
index 0000000..779553e
--- /dev/null
@@ -0,0 +1,957 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "util.h"
+#include "cfg_file.h"
+#include "main.h"
+#include "message.h"
+#include "fuzzy.h"
+#include "mime_expressions.h"
+#include "html.h"
+#include "lua/lua_common.h"
+#include "diff.h"
+
+gboolean rspamd_compare_encoding (struct rspamd_task *task,
+       GList * args,
+       void *unused);
+gboolean rspamd_header_exists (struct rspamd_task *task,
+       GList * args,
+       void *unused);
+gboolean rspamd_parts_distance (struct rspamd_task *task,
+       GList * args,
+       void *unused);
+gboolean rspamd_recipients_distance (struct rspamd_task *task,
+       GList * args,
+       void *unused);
+gboolean rspamd_has_only_html_part (struct rspamd_task *task,
+       GList * args,
+       void *unused);
+gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
+       GList * args,
+       void *unused);
+gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
+       GList * args,
+       void *unused);
+gboolean rspamd_is_html_balanced (struct rspamd_task *task,
+       GList * args,
+       void *unused);
+gboolean rspamd_has_html_tag (struct rspamd_task *task,
+       GList * args,
+       void *unused);
+gboolean rspamd_has_fake_html (struct rspamd_task *task,
+       GList * args,
+       void *unused);
+
+/**
+ * Regexp type: /H - header, /M - mime, /U - url /X - raw header
+ */
+enum rspamd_regexp_type {
+       REGEXP_NONE = 0,
+       REGEXP_HEADER,
+       REGEXP_MIME,
+       REGEXP_MESSAGE,
+       REGEXP_URL,
+       REGEXP_RAW_HEADER
+};
+
+/**
+ * Regexp structure
+ */
+struct rspamd_regexp_atom {
+       enum rspamd_regexp_type type;                   /**< regexp type                                                                                */
+       gchar *regexp_text;                             /**< regexp text representation                                                 */
+       rspamd_regexp_t *regexp;                        /**< regexp structure                                                                   */
+       gchar *header;                                  /**< header name for header regexps                                             */
+       gboolean is_test;                               /**< true if this expression must be tested                             */
+       gboolean is_strong;                             /**< true if headers search must be case sensitive              */
+};
+
+/**
+ * Rspamd expression function
+ */
+struct rspamd_function_atom {
+       gchar *name;    /**< name of function                                                           */
+       GList *args;    /**< its args                                                                           */
+};
+
+struct rspamd_mime_atom {
+
+};
+
+/*
+ * List of internal functions of rspamd
+ * Sorted by name to use bsearch
+ */
+static struct _fl {
+       const gchar *name;
+       rspamd_internal_func_t func;
+       void *user_data;
+} rspamd_functions_list[] = {
+       {"compare_encoding", rspamd_compare_encoding, NULL},
+       {"compare_parts_distance", rspamd_parts_distance, NULL},
+       {"compare_recipients_distance", rspamd_recipients_distance, NULL},
+       {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
+       {"has_fake_html", rspamd_has_fake_html, NULL},
+       {"has_html_tag", rspamd_has_html_tag, NULL},
+       {"has_only_html_part", rspamd_has_only_html_part, NULL},
+       {"header_exists", rspamd_header_exists, NULL},
+       {"is_html_balanced", rspamd_is_html_balanced, NULL},
+       {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL}
+};
+
+static struct _fl *list_ptr = &rspamd_functions_list[0];
+static guint32 functions_number = sizeof (rspamd_functions_list) /
+       sizeof (struct _fl);
+static gboolean list_allocated = FALSE;
+
+/* Bsearch routine */
+static gint
+fl_cmp (const void *s1, const void *s2)
+{
+       struct _fl *fl1 = (struct _fl *)s1;
+       struct _fl *fl2 = (struct _fl *)s2;
+       return strcmp (fl1->name, fl2->name);
+}
+
+/*
+ * Rspamd regexp utility functions
+ */
+static struct rspamd_regexp_atom *
+rspamd_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line)
+{
+       const gchar *begin, *end, *p, *src, *start;
+       gchar *dbegin, *dend;
+       struct rspamd_regexp_atom *result;
+       rspamd_regexp_t *re;
+       GError *err = NULL;
+       GString *re_flags;
+
+       if (line == NULL) {
+               msg_err ("cannot parse NULL line");
+               return NULL;
+       }
+
+       if ((re = rspamd_regexp_cache_query (NULL, line, NULL)) != NULL) {
+               return ((struct rspamd_regexp_element *)rspamd_regexp_get_ud (re));
+       }
+
+       src = line;
+       result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_element));
+       /* Skip whitespaces */
+       while (g_ascii_isspace (*line)) {
+               line++;
+       }
+       if (*line == '\0') {
+               msg_warn ("got empty regexp");
+               return NULL;
+       }
+       start = line;
+       /* First try to find header name */
+       begin = strchr (line, '/');
+       if (begin != NULL) {
+               p = begin;
+               end = NULL;
+               while (p != line) {
+                       if (*p == '=') {
+                               end = p;
+                               break;
+                       }
+                       p--;
+               }
+               if (end) {
+                       result->header = rspamd_mempool_alloc (pool, end - line + 1);
+                       rspamd_strlcpy (result->header, line, end - line + 1);
+                       result->type = REGEXP_HEADER;
+                       line = end;
+               }
+       }
+       else {
+               result->header = rspamd_mempool_strdup (pool, line);
+               result->type = REGEXP_HEADER;
+               line = start;
+       }
+       /* Find begin of regexp */
+       while (*line && *line != '/') {
+               line++;
+       }
+       if (*line != '\0') {
+               begin = line + 1;
+       }
+       else if (result->header == NULL) {
+               /* Assume that line without // is just a header name */
+               result->header = rspamd_mempool_strdup (pool, line);
+               result->type = REGEXP_HEADER;
+               return result;
+       }
+       else {
+               /* We got header name earlier but have not found // expression, so it is invalid regexp */
+               msg_warn (
+                       "got no header name (eg. header=) but without corresponding regexp, %s",
+                       src);
+               return NULL;
+       }
+       /* Find end */
+       end = begin;
+       while (*end && (*end != '/' || *(end - 1) == '\\')) {
+               end++;
+       }
+       if (end == begin || *end != '/') {
+               msg_warn ("no trailing / in regexp %s", src);
+               return NULL;
+       }
+       /* Parse flags */
+       p = end + 1;
+       re_flags = g_string_sized_new (32);
+       while (p != NULL) {
+               switch (*p) {
+               case 'i':
+               case 'm':
+               case 's':
+               case 'x':
+               case 'u':
+               case 'O':
+               case 'r':
+                       g_string_append_c (re_flags, *p);
+                       p++;
+                       break;
+               case 'o':
+                       p++;
+                       break;
+               /* Type flags */
+               case 'H':
+                       if (result->type == REGEXP_NONE) {
+                               result->type = REGEXP_HEADER;
+                       }
+                       p++;
+                       break;
+               case 'M':
+                       if (result->type == REGEXP_NONE) {
+                               result->type = REGEXP_MESSAGE;
+                       }
+                       p++;
+                       break;
+               case 'P':
+                       if (result->type == REGEXP_NONE) {
+                               result->type = REGEXP_MIME;
+                       }
+                       p++;
+                       break;
+               case 'U':
+                       if (result->type == REGEXP_NONE) {
+                               result->type = REGEXP_URL;
+                       }
+                       p++;
+                       break;
+               case 'X':
+                       if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) {
+                               result->type = REGEXP_RAW_HEADER;
+                       }
+                       p++;
+                       break;
+               case 'T':
+                       result->is_test = TRUE;
+                       p++;
+                       break;
+               case 'S':
+                       result->is_strong = TRUE;
+                       p++;
+                       break;
+               /* Stop flags parsing */
+               default:
+                       p = NULL;
+                       break;
+               }
+       }
+
+       result->regexp_text = rspamd_mempool_strdup (pool, start);
+       dbegin = result->regexp_text + (begin - start);
+       dend = result->regexp_text + (end - start);
+       *dend = '\0';
+
+       result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
+                       &err);
+
+       g_string_free (re_flags, TRUE);
+
+       if (result->regexp == NULL || err != NULL) {
+               msg_warn ("could not read regexp: %s while reading regexp %s",
+                               err ? err->message : "unknown error",
+                                               src);
+               return NULL;
+       }
+
+       rspamd_mempool_add_destructor (pool,
+               (rspamd_mempool_destruct_t) rspamd_regexp_unref,
+               (void *)result->regexp);
+
+       rspamd_regexp_set_ud (result->regexp, result);
+
+       rspamd_regexp_cache_insert (NULL, line, NULL, result->regexp);
+
+       *dend = '/';
+
+       return result;
+}
+
+gboolean
+call_expression_function (struct expression_function * func,
+       struct rspamd_task * task,
+       lua_State *L)
+{
+       struct _fl *selected, key;
+
+       key.name = func->name;
+
+       selected = bsearch (&key,
+                       list_ptr,
+                       functions_number,
+                       sizeof (struct _fl),
+                       fl_cmp);
+       if (selected == NULL) {
+               /* Try to check lua function */
+               return FALSE;
+       }
+
+       return selected->func (task, func->args, selected->user_data);
+}
+
+void
+register_expression_function (const gchar *name,
+       rspamd_internal_func_t func,
+       void *user_data)
+{
+       static struct _fl *new;
+
+       functions_number++;
+
+       new = g_new (struct _fl, functions_number);
+       memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
+       if (list_allocated) {
+               g_free (list_ptr);
+       }
+
+       list_allocated = TRUE;
+       new[functions_number - 1].name = name;
+       new[functions_number - 1].func = func;
+       new[functions_number - 1].user_data = user_data;
+       qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
+       list_ptr = new;
+}
+
+gboolean
+rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused)
+{
+       struct expression_argument *arg;
+
+       if (args == NULL || task == NULL) {
+               return FALSE;
+       }
+
+       arg = get_function_arg (args->data, task, TRUE);
+       if (arg->type == EXPRESSION_ARGUMENT_BOOL) {
+               msg_warn ("invalid argument to function is passed");
+               return FALSE;
+       }
+
+       /* XXX: really write this function */
+       return TRUE;
+}
+
+gboolean
+rspamd_header_exists (struct rspamd_task * task, GList * args, void *unused)
+{
+       struct expression_argument *arg;
+       GList *headerlist;
+
+       if (args == NULL || task == NULL) {
+               return FALSE;
+       }
+
+       arg = get_function_arg (args->data, task, TRUE);
+       if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) {
+               msg_warn ("invalid argument to function is passed");
+               return FALSE;
+       }
+
+       debug_task ("try to get header %s", (gchar *)arg->data);
+       headerlist = message_get_header (task,
+                       (gchar *)arg->data,
+                       FALSE);
+       if (headerlist) {
+               return TRUE;
+       }
+       return FALSE;
+}
+
+/*
+ * This function is designed to find difference between text/html and text/plain parts
+ * It takes one argument: difference threshold, if we have two text parts, compare
+ * its hashes and check for threshold, if value is greater than threshold, return TRUE
+ * and return FALSE otherwise.
+ */
+gboolean
+rspamd_parts_distance (struct rspamd_task * task, GList * args, void *unused)
+{
+       gint threshold, threshold2 = -1, diff;
+       struct mime_text_part *p1, *p2;
+       GList *cur;
+       struct expression_argument *arg;
+       GMimeObject *parent;
+       const GMimeContentType *ct;
+       gint *pdiff;
+
+       if (args == NULL) {
+               debug_task ("no threshold is specified, assume it 100");
+               threshold = 100;
+       }
+       else {
+               errno = 0;
+               arg = get_function_arg (args->data, task, TRUE);
+               threshold = strtoul ((gchar *)arg->data, NULL, 10);
+               if (errno != 0) {
+                       msg_info ("bad numeric value for threshold \"%s\", assume it 100",
+                               (gchar *)args->data);
+                       threshold = 100;
+               }
+               if (args->next) {
+                       arg = get_function_arg (args->next->data, task, TRUE);
+                       errno = 0;
+                       threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
+                       if (errno != 0) {
+                               msg_info ("bad numeric value for threshold \"%s\", ignore it",
+                                       (gchar *)arg->data);
+                               threshold2 = -1;
+                       }
+               }
+       }
+
+       if ((pdiff =
+               rspamd_mempool_get_variable (task->task_pool,
+               "parts_distance")) != NULL) {
+               diff = *pdiff;
+               if (diff != -1) {
+                       if (threshold2 > 0) {
+                               if (diff >=
+                                       MIN (threshold,
+                                       threshold2) && diff < MAX (threshold, threshold2)) {
+                                       return TRUE;
+                               }
+                       }
+                       else {
+                               if (diff <= threshold) {
+                                       return TRUE;
+                               }
+                       }
+                       return FALSE;
+               }
+               else {
+                       return FALSE;
+               }
+       }
+
+       if (g_list_length (task->text_parts) == 2) {
+               cur = g_list_first (task->text_parts);
+               p1 = cur->data;
+               cur = g_list_next (cur);
+               pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint));
+               *pdiff = -1;
+
+               if (cur == NULL) {
+                       msg_info ("bad parts list");
+                       return FALSE;
+               }
+               p2 = cur->data;
+               /* First of all check parent object */
+               if (p1->parent && p1->parent == p2->parent) {
+                       parent = p1->parent;
+                       ct = g_mime_object_get_content_type (parent);
+#ifndef GMIME24
+                       if (ct == NULL ||
+                               !g_mime_content_type_is_type (ct, "multipart", "alternative")) {
+#else
+                       if (ct == NULL ||
+                               !g_mime_content_type_is_type ((GMimeContentType *)ct,
+                               "multipart", "alternative")) {
+#endif
+                               debug_task (
+                                       "two parts are not belong to multipart/alternative container, skip check");
+                               rspamd_mempool_set_variable (task->task_pool,
+                                       "parts_distance",
+                                       pdiff,
+                                       NULL);
+                               return FALSE;
+                       }
+               }
+               else {
+                       debug_task (
+                               "message contains two parts but they are in different multi-parts");
+                       rspamd_mempool_set_variable (task->task_pool,
+                               "parts_distance",
+                               pdiff,
+                               NULL);
+                       return FALSE;
+               }
+               if (!p1->is_empty && !p2->is_empty) {
+                       if (p1->diff_str != NULL && p2->diff_str != NULL) {
+                               diff = rspamd_diff_distance_normalized (p1->diff_str,
+                                               p2->diff_str);
+                       }
+                       else {
+                               diff = rspamd_fuzzy_compare_parts (p1, p2);
+                       }
+                       debug_task (
+                               "got likeliness between parts of %d%%, threshold is %d%%",
+                               diff,
+                               threshold);
+                       *pdiff = diff;
+                       rspamd_mempool_set_variable (task->task_pool,
+                               "parts_distance",
+                               pdiff,
+                               NULL);
+                       if (threshold2 > 0) {
+                               if (diff >=
+                                       MIN (threshold,
+                                       threshold2) && diff < MAX (threshold, threshold2)) {
+                                       return TRUE;
+                               }
+                       }
+                       else {
+                               if (diff <= threshold) {
+                                       return TRUE;
+                               }
+                       }
+               }
+               else if ((p1->is_empty &&
+                       !p2->is_empty) || (!p1->is_empty && p2->is_empty)) {
+                       /* Empty and non empty parts are different */
+                       *pdiff = 0;
+                       rspamd_mempool_set_variable (task->task_pool,
+                               "parts_distance",
+                               pdiff,
+                               NULL);
+                       return TRUE;
+               }
+       }
+       else {
+               debug_task (
+                       "message has too many text parts, so do not try to compare them with each other");
+               rspamd_mempool_set_variable (task->task_pool,
+                       "parts_distance",
+                       pdiff,
+                       NULL);
+               return FALSE;
+       }
+
+       rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff,
+               NULL);
+       return FALSE;
+}
+
+struct addr_list {
+       const gchar *name;
+       const gchar *addr;
+};
+
+#define COMPARE_RCPT_LEN 3
+#define MIN_RCPT_TO_COMPARE 7
+
+gboolean
+rspamd_recipients_distance (struct rspamd_task *task, GList * args,
+       void *unused)
+{
+       struct expression_argument *arg;
+       InternetAddressList *cur;
+       double threshold;
+       struct addr_list *ar;
+       gchar *c;
+       gint num, i, j, hits = 0, total = 0;
+
+       if (args == NULL) {
+               msg_warn ("no parameters to function");
+               return FALSE;
+       }
+
+       arg = get_function_arg (args->data, task, TRUE);
+       errno = 0;
+       threshold = strtod ((gchar *)arg->data, NULL);
+       if (errno != 0) {
+               msg_warn ("invalid numeric value '%s': %s",
+                       (gchar *)arg->data,
+                       strerror (errno));
+               return FALSE;
+       }
+
+       if (!task->rcpt_mime) {
+               return FALSE;
+       }
+       num = internet_address_list_length (task->rcpt_mime);
+       if (num < MIN_RCPT_TO_COMPARE) {
+               return FALSE;
+       }
+       ar =
+               rspamd_mempool_alloc0 (task->task_pool, num *
+                       sizeof (struct addr_list));
+
+       /* Fill array */
+       cur = task->rcpt_mime;
+#ifdef GMIME24
+       for (i = 0; i < num; i++) {
+               InternetAddress *iaelt =
+                       internet_address_list_get_address(cur, i);
+               InternetAddressMailbox *iamb =
+                       INTERNET_ADDRESS_IS_MAILBOX(iaelt) ?
+                       INTERNET_ADDRESS_MAILBOX (iaelt) : NULL;
+               if (iamb) {
+                       ar[i].name = internet_address_mailbox_get_addr (iamb);
+                       if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
+                               ar[i].addr = c + 1;
+                       }
+               }
+       }
+#else
+       InternetAddress *addr;
+       i = 0;
+       while (cur) {
+               addr = internet_address_list_get_address (cur);
+               if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
+                       ar[i].name = rspamd_mempool_strdup (task->task_pool,
+                                       internet_address_get_addr (addr));
+                       if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
+                               *c = '\0';
+                               ar[i].addr = c + 1;
+                       }
+                       cur = internet_address_list_next (cur);
+                       i++;
+               }
+               else {
+                       cur = internet_address_list_next (cur);
+               }
+       }
+#endif
+
+       /* Cycle all elements in array */
+       for (i = 0; i < num; i++) {
+               for (j = i + 1; j < num; j++) {
+                       if (ar[i].name && ar[j].name &&
+                               g_ascii_strncasecmp (ar[i].name, ar[j].name,
+                               COMPARE_RCPT_LEN) == 0) {
+                               /* Common name part */
+                               hits++;
+                       }
+                       else if (ar[i].addr && ar[j].addr &&
+                               g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) {
+                               /* Common address part, but different name */
+                               hits++;
+                       }
+                       total++;
+               }
+       }
+
+       if ((double)(hits * num / 2.) / (double)total >= threshold) {
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
+gboolean
+rspamd_has_only_html_part (struct rspamd_task * task, GList * args,
+       void *unused)
+{
+       struct mime_text_part *p;
+       GList *cur;
+       gboolean res = FALSE;
+
+       cur = g_list_first (task->text_parts);
+       while (cur) {
+               p = cur->data;
+               if (p->is_html) {
+                       res = TRUE;
+               }
+               else {
+                       res = FALSE;
+                       break;
+               }
+               cur = g_list_next (cur);
+       }
+
+       return res;
+}
+
+static gboolean
+is_recipient_list_sorted (const InternetAddressList * ia)
+{
+       const InternetAddressList *cur;
+       InternetAddress *addr;
+       gboolean res = TRUE;
+       struct addr_list current = { NULL, NULL }, previous = {
+               NULL, NULL
+       };
+#ifdef GMIME24
+       gint num, i;
+#endif
+
+       /* Do not check to short address lists */
+       if (internet_address_list_length ((InternetAddressList *)ia) <
+               MIN_RCPT_TO_COMPARE) {
+               return FALSE;
+       }
+#ifdef GMIME24
+       num = internet_address_list_length ((InternetAddressList *)ia);
+       cur = ia;
+       for (i = 0; i < num; i++) {
+               addr =
+                       internet_address_list_get_address ((InternetAddressList *)cur, i);
+               current.addr = (gchar *)internet_address_get_name (addr);
+               if (previous.addr != NULL) {
+                       if (current.addr &&
+                               g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
+                               res = FALSE;
+                               break;
+                       }
+               }
+               previous.addr = current.addr;
+       }
+#else
+       cur = ia;
+       while (cur) {
+               addr = internet_address_list_get_address (cur);
+               if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
+                       current.addr = internet_address_get_addr (addr);
+                       if (previous.addr != NULL) {
+                               if (current.addr &&
+                                       g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
+                                       res = FALSE;
+                                       break;
+                               }
+                       }
+                       previous.addr = current.addr;
+               }
+               cur = internet_address_list_next (cur);
+       }
+#endif
+
+       return res;
+}
+
+gboolean
+rspamd_is_recipients_sorted (struct rspamd_task * task,
+       GList * args,
+       void *unused)
+{
+       /* Check all types of addresses */
+       if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
+               GMIME_RECIPIENT_TYPE_TO)) == TRUE) {
+               return TRUE;
+       }
+       if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
+               GMIME_RECIPIENT_TYPE_BCC)) == TRUE) {
+               return TRUE;
+       }
+       if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
+               GMIME_RECIPIENT_TYPE_CC)) == TRUE) {
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
+gboolean
+rspamd_compare_transfer_encoding (struct rspamd_task * task,
+       GList * args,
+       void *unused)
+{
+       GMimeObject *part;
+#ifndef GMIME24
+       GMimePartEncodingType enc_req, part_enc;
+#else
+       GMimeContentEncoding enc_req, part_enc;
+#endif
+       struct expression_argument *arg;
+
+       if (args == NULL) {
+               msg_warn ("no parameters to function");
+               return FALSE;
+       }
+
+       arg = get_function_arg (args->data, task, TRUE);
+#ifndef GMIME24
+       enc_req = g_mime_part_encoding_from_string (arg->data);
+       if (enc_req == GMIME_PART_ENCODING_DEFAULT) {
+#else
+       enc_req = g_mime_content_encoding_from_string (arg->data);
+       if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) {
+#endif
+               msg_warn ("bad encoding type: %s", (gchar *)arg->data);
+               return FALSE;
+       }
+
+       part = g_mime_message_get_mime_part (task->message);
+       if (part) {
+               if (GMIME_IS_PART (part)) {
+#ifndef GMIME24
+                       part_enc = g_mime_part_get_encoding (GMIME_PART (part));
+                       if (part_enc == GMIME_PART_ENCODING_DEFAULT) {
+                               /* Assume 7bit as default transfer encoding */
+                               part_enc = GMIME_PART_ENCODING_7BIT;
+                       }
+#else
+                       part_enc = g_mime_part_get_content_encoding (GMIME_PART (part));
+                       if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) {
+                               /* Assume 7bit as default transfer encoding */
+                               part_enc = GMIME_CONTENT_ENCODING_7BIT;
+                       }
+#endif
+
+
+                       debug_task ("got encoding in part: %d and compare with %d",
+                               (gint)part_enc,
+                               (gint)enc_req);
+#ifndef GMIME24
+                       g_object_unref (part);
+#endif
+
+                       return part_enc == enc_req;
+               }
+#ifndef GMIME24
+               g_object_unref (part);
+#endif
+       }
+
+       return FALSE;
+}
+
+gboolean
+rspamd_is_html_balanced (struct rspamd_task * task, GList * args, void *unused)
+{
+       struct mime_text_part *p;
+       GList *cur;
+       gboolean res = TRUE;
+
+       cur = g_list_first (task->text_parts);
+       while (cur) {
+               p = cur->data;
+               if (!p->is_empty && p->is_html) {
+                       if (p->is_balanced) {
+                               res = TRUE;
+                       }
+                       else {
+                               res = FALSE;
+                               break;
+                       }
+               }
+               cur = g_list_next (cur);
+       }
+
+       return res;
+
+}
+
+struct html_callback_data {
+       struct html_tag *tag;
+       gboolean *res;
+};
+
+static gboolean
+search_html_node_callback (GNode * node, gpointer data)
+{
+       struct html_callback_data *cd = data;
+       struct html_node *nd;
+
+       nd = node->data;
+       if (nd) {
+               if (nd->tag == cd->tag) {
+                       *cd->res = TRUE;
+                       return TRUE;
+               }
+       }
+
+       return FALSE;
+}
+
+gboolean
+rspamd_has_html_tag (struct rspamd_task * task, GList * args, void *unused)
+{
+       struct mime_text_part *p;
+       GList *cur;
+       struct expression_argument *arg;
+       struct html_tag *tag;
+       gboolean res = FALSE;
+       struct html_callback_data cd;
+
+       if (args == NULL) {
+               msg_warn ("no parameters to function");
+               return FALSE;
+       }
+
+       arg = get_function_arg (args->data, task, TRUE);
+       tag = get_tag_by_name (arg->data);
+       if (tag == NULL) {
+               msg_warn ("unknown tag type passed as argument: %s",
+                       (gchar *)arg->data);
+               return FALSE;
+       }
+
+       cur = g_list_first (task->text_parts);
+       cd.res = &res;
+       cd.tag = tag;
+
+       while (cur && res == FALSE) {
+               p = cur->data;
+               if (!p->is_empty && p->is_html && p->html_nodes) {
+                       g_node_traverse (p->html_nodes,
+                               G_PRE_ORDER,
+                               G_TRAVERSE_ALL,
+                               -1,
+                               search_html_node_callback,
+                               &cd);
+               }
+               cur = g_list_next (cur);
+       }
+
+       return res;
+
+}
+
+gboolean
+rspamd_has_fake_html (struct rspamd_task * task, GList * args, void *unused)
+{
+       struct mime_text_part *p;
+       GList *cur;
+       gboolean res = FALSE;
+
+       cur = g_list_first (task->text_parts);
+
+       while (cur && res == FALSE) {
+               p = cur->data;
+               if (!p->is_empty && p->is_html && p->html_nodes == NULL) {
+                       res = TRUE;
+               }
+               cur = g_list_next (cur);
+       }
+
+       return res;
+
+}
diff --git a/src/libmime/mime_expressions.h b/src/libmime/mime_expressions.h
new file mode 100644 (file)
index 0000000..343e912
--- /dev/null
@@ -0,0 +1,59 @@
+/**
+ * @file expressions.h
+ * Rspamd expressions API
+ */
+
+#ifndef RSPAMD_EXPRESSIONS_H
+#define RSPAMD_EXPRESSIONS_H
+
+#include "config.h"
+#include <lua.h>
+
+struct rspamd_task;
+struct rspamd_regexp_element;
+
+/**
+ * Function's argument
+ */
+struct expression_argument {
+       enum {
+               EXPRESSION_ARGUMENT_NORMAL,
+               EXPRESSION_ARGUMENT_BOOL,
+               EXPRESSION_ARGUMENT_EXPR,
+       } type;                                                     /**< type of argument (text or other function)              */
+       void *data;                                                 /**< pointer to its data                                                    */
+};
+
+
+typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *, GList *args,
+       void *user_data);
+
+
+/**
+ * Register specified function to rspamd internal functions list
+ * @param name name of function
+ * @param func pointer to function
+ */
+void register_expression_function (const gchar *name,
+       rspamd_internal_func_t func,
+       void *user_data);
+
+/**
+ * Add regexp to regexp task cache
+ * @param task task object
+ * @param pointer regexp data
+ * @param result numeric result of this regexp
+ */
+void task_cache_add (struct rspamd_task *task,
+       struct rspamd_regexp_element *re,
+       gint32 result);
+
+/**
+ * Check regexp in cache
+ * @param task task object
+ * @param pointer regexp data
+ * @return numeric result if value exists or -1 if not
+ */
+gint32 task_cache_check (struct rspamd_task *task, struct rspamd_regexp_element *re);
+
+#endif
index 08b70f5c927a9642b9009f9de35dcebff53e9640..06232cff89cc321f679cd411f2d31c5035eb95fc 100644 (file)
@@ -42,18 +42,6 @@ enum rspamd_cred_type {
        CRED_DELIVERY
 };
 
-/**
- * Regexp type: /H - header, /M - mime, /U - url /X - raw header
- */
-enum rspamd_regexp_type {
-       REGEXP_NONE = 0,
-       REGEXP_HEADER,
-       REGEXP_MIME,
-       REGEXP_MESSAGE,
-       REGEXP_URL,
-       REGEXP_RAW_HEADER
-};
-
 /**
  * Logging type
  */
@@ -63,18 +51,6 @@ enum rspamd_log_type {
        RSPAMD_LOG_FILE
 };
 
-/**
- * Regexp structure
- */
-struct rspamd_regexp_element {
-       enum rspamd_regexp_type type;                   /**< regexp type                                                                                */
-       gchar *regexp_text;                             /**< regexp text representation                                                 */
-       rspamd_regexp_t *regexp;                        /**< regexp structure                                                                   */
-       gchar *header;                                  /**< header name for header regexps                                             */
-       gboolean is_test;                               /**< true if this expression must be tested                             */
-       gboolean is_strong;                             /**< true if headers search must be case sensitive              */
-};
-
 /**
  * script module list item
  */
index e74d7e71c10b3be7b9912ff999b633c694ecdaf3..b15d3e1816e07efd1f463088e447582434b76cbf 100644 (file)
@@ -25,7 +25,6 @@
 
 #include "lua_common.h"
 #include "message.h"
-#include "expressions.h"
 #include "protocol.h"
 #include "filter.h"
 #include "dns.h"
@@ -273,15 +272,7 @@ LUA_FUNCTION_DEF (task, get_resolver);
  * Increment number of DNS requests for the task. Is used just for logging purposes.
  */
 LUA_FUNCTION_DEF (task, inc_dns_req);
-/***
- * @method task:call_rspamd_function(function[, param, param...])
- * Calls rspamd expression function `func` with the specified parameters.
- * It returns the boolean result of function invocation.
- * @param {string} function name of internal or registered lua function to call
- * @param {list of strings} params parameters for a function
- * @return {bool} true or false returned by expression function
- */
-LUA_FUNCTION_DEF (task, call_rspamd_function);
+
 /***
  * @method task:get_recipients([type])
  * Return SMTP or MIME recipients for a task. This function returns list of internet addresses each one is a table with the following structure:
@@ -459,7 +450,6 @@ static const struct luaL_reg tasklib_m[] = {
        LUA_INTERFACE_DEF (task, get_received_headers),
        LUA_INTERFACE_DEF (task, get_resolver),
        LUA_INTERFACE_DEF (task, inc_dns_req),
-       LUA_INTERFACE_DEF (task, call_rspamd_function),
        LUA_INTERFACE_DEF (task, get_recipients),
        LUA_INTERFACE_DEF (task, get_from),
        LUA_INTERFACE_DEF (task, get_user),
@@ -1255,45 +1245,6 @@ lua_task_inc_dns_req (lua_State *L)
        return 0;
 }
 
-static gint
-lua_task_call_rspamd_function (lua_State * L)
-{
-       struct rspamd_task *task = lua_check_task (L, 1);
-       struct expression_function f;
-       gint i, top;
-       gboolean res;
-       gchar *arg;
-
-       if (task) {
-               f.name = (gchar *)luaL_checkstring (L, 2);
-               if (f.name) {
-                       f.args = NULL;
-                       top = lua_gettop (L);
-                       /* Get arguments after function name */
-                       for (i = 3; i <= top; i++) {
-                               arg = (gchar *)luaL_checkstring (L, i);
-                               if (arg != NULL) {
-                                       f.args = g_list_prepend (f.args, arg);
-                               }
-                       }
-                       res = call_expression_function (&f, task, L);
-                       lua_pushboolean (L, res);
-                       if (f.args) {
-                               g_list_free (f.args);
-                       }
-
-                       return 1;
-               }
-       }
-
-       lua_pushnil (L);
-
-       return 1;
-
-}
-
-
-
 static gboolean
 lua_push_internet_address (lua_State *L, InternetAddress *ia)
 {