aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/libmime/CMakeLists.txt2
-rw-r--r--src/libmime/expressions.c1582
-rw-r--r--src/libmime/expressions.h144
-rw-r--r--src/libmime/filter.c298
-rw-r--r--src/libmime/filter.h7
-rw-r--r--src/libmime/mime_expressions.c2234
-rw-r--r--src/libmime/mime_expressions.h49
-rw-r--r--src/libserver/cfg_file.h24
-rw-r--r--src/libserver/cfg_rcl.c14
-rw-r--r--src/libserver/task.c34
-rw-r--r--src/libserver/task.h19
-rw-r--r--src/libutil/regexp.c4
-rw-r--r--src/libutil/regexp.h22
-rw-r--r--src/lua/lua_cfg_file.c16
-rw-r--r--src/lua/lua_common.c1
-rw-r--r--src/lua/lua_config.c114
-rw-r--r--src/lua/lua_regexp.c1
-rw-r--r--src/lua/lua_task.c51
-rw-r--r--src/plugins/chartable.c1
-rw-r--r--src/plugins/dkim_check.c1
-rw-r--r--src/plugins/fuzzy_check.c1
-rw-r--r--src/plugins/regexp.c1863
-rw-r--r--src/plugins/spf.c1
-rw-r--r--src/plugins/surbl.c1
24 files changed, 2553 insertions, 3931 deletions
diff --git a/src/libmime/CMakeLists.txt b/src/libmime/CMakeLists.txt
index 36de02c82..bb678d2f9 100644
--- a/src/libmime/CMakeLists.txt
+++ b/src/libmime/CMakeLists.txt
@@ -1,6 +1,6 @@
# Librspamd mime
SET(LIBRSPAMDMIMESRC
- ${CMAKE_CURRENT_SOURCE_DIR}/expressions.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/mime_expressions.c
${CMAKE_CURRENT_SOURCE_DIR}/filter.c
${CMAKE_CURRENT_SOURCE_DIR}/images.c
${CMAKE_CURRENT_SOURCE_DIR}/message.c
diff --git a/src/libmime/expressions.c b/src/libmime/expressions.c
deleted file mode 100644
index 547cc0d58..000000000
--- a/src/libmime/expressions.c
+++ /dev/null
@@ -1,1582 +0,0 @@
-/*
- * Copyright (c) 2009-2012, Vsevolod Stakhov
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "util.h"
-#include "cfg_file.h"
-#include "main.h"
-#include "message.h"
-#include "fuzzy.h"
-#include "expressions.h"
-#include "html.h"
-#include "lua/lua_common.h"
-#include "diff.h"
-
-gboolean rspamd_compare_encoding (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_header_exists (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_parts_distance (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_recipients_distance (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_has_only_html_part (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_is_html_balanced (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_has_html_tag (struct rspamd_task *task,
- GList * args,
- void *unused);
-gboolean rspamd_has_fake_html (struct rspamd_task *task,
- GList * args,
- void *unused);
-
-/*
- * List of internal functions of rspamd
- * Sorted by name to use bsearch
- */
-static struct _fl {
- const gchar *name;
- rspamd_internal_func_t func;
- void *user_data;
-} rspamd_functions_list[] = {
- {"compare_encoding", rspamd_compare_encoding, NULL},
- {"compare_parts_distance", rspamd_parts_distance, NULL},
- {"compare_recipients_distance", rspamd_recipients_distance, NULL},
- {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
- {"has_fake_html", rspamd_has_fake_html, NULL},
- {"has_html_tag", rspamd_has_html_tag, NULL},
- {"has_only_html_part", rspamd_has_only_html_part, NULL},
- {"header_exists", rspamd_header_exists, NULL},
- {"is_html_balanced", rspamd_is_html_balanced, NULL},
- {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL}
-};
-
-static struct _fl *list_ptr = &rspamd_functions_list[0];
-static guint32 functions_number = sizeof (rspamd_functions_list) /
- sizeof (struct _fl);
-static gboolean list_allocated = FALSE;
-
-/* Bsearch routine */
-static gint
-fl_cmp (const void *s1, const void *s2)
-{
- struct _fl *fl1 = (struct _fl *)s1;
- struct _fl *fl2 = (struct _fl *)s2;
- return strcmp (fl1->name, fl2->name);
-}
-
-/* Cache for regular expressions that are used in functions */
-void *
-re_cache_check (const gchar *line, rspamd_mempool_t *pool)
-{
- GHashTable *re_cache;
-
- re_cache = rspamd_mempool_get_variable (pool, "re_cache");
-
- if (re_cache == NULL) {
- re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
- rspamd_mempool_set_variable (pool, "re_cache", re_cache,
- (rspamd_mempool_destruct_t)g_hash_table_destroy);
- return NULL;
- }
- return g_hash_table_lookup (re_cache, line);
-}
-
-void
-re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool)
-{
- GHashTable *re_cache;
-
- re_cache = rspamd_mempool_get_variable (pool, "re_cache");
-
- if (re_cache == NULL) {
- re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
- rspamd_mempool_set_variable (pool, "re_cache", re_cache,
- (rspamd_mempool_destruct_t)g_hash_table_destroy);
- }
-
- g_hash_table_insert (re_cache, (gpointer)line, pointer);
-}
-
-void
-re_cache_del (const gchar *line, rspamd_mempool_t *pool)
-{
- GHashTable *re_cache;
-
- re_cache = rspamd_mempool_get_variable (pool, "re_cache");
-
- if (re_cache != NULL) {
- g_hash_table_remove (re_cache, line);
- }
-
-}
-
-/*
- * Functions for parsing expressions
- */
-struct expression_stack {
- gchar op;
- struct expression_stack *next;
-};
-
-/*
- * Push operand or operator to stack
- */
-static struct expression_stack *
-push_expression_stack (rspamd_mempool_t * pool,
- struct expression_stack *head,
- gchar op)
-{
- struct expression_stack *new;
- new = rspamd_mempool_alloc (pool, sizeof (struct expression_stack));
- new->op = op;
- new->next = head;
- return new;
-}
-
-/*
- * Delete symbol from stack, return pointer to operand or operator (casted to void* )
- */
-static gchar
-delete_expression_stack (struct expression_stack **head)
-{
- struct expression_stack *cur;
- gchar res;
-
- if (*head == NULL)
- return 0;
-
- cur = *head;
- res = cur->op;
-
- *head = cur->next;
- return res;
-}
-
-/*
- * Return operation priority
- */
-static gint
-logic_priority (gchar a)
-{
- switch (a) {
- case '!':
- return 3;
- case '|':
- case '&':
- return 2;
- case '(':
- return 1;
- default:
- return 0;
- }
-}
-
-/*
- * Return FALSE if symbol is not operation symbol (operand)
- * Return TRUE if symbol is operation symbol
- */
-static gboolean
-is_operation_symbol (gchar *a)
-{
- switch (*a) {
- case '!':
- case '&':
- case '|':
- case '(':
- case ')':
- return TRUE;
- case 'O':
- case 'o':
- if (g_ascii_strncasecmp (a, "or",
- sizeof ("or") - 1) == 0 && g_ascii_isspace (a[2])) {
- return TRUE;
- }
- break;
- case 'A':
- case 'a':
- if (g_ascii_strncasecmp (a, "and",
- sizeof ("and") - 1) == 0 && g_ascii_isspace (a[3])) {
- return TRUE;
- }
- break;
- case 'N':
- case 'n':
- if (g_ascii_strncasecmp (a, "not",
- sizeof ("not") - 1) == 0 && g_ascii_isspace (a[3])) {
- return TRUE;
- }
- break;
- }
-
- return FALSE;
-}
-
-/* Return character representation of operation */
-static gchar
-op_to_char (gchar *a, gchar **next)
-{
- switch (*a) {
- case '!':
- case '&':
- case '|':
- case '(':
- case ')':
- if ((a[0] == '&' && a[1] == '&') ||
- (a[0] == '|' && a[1] == '|')) {
- *next = a + 2;
- }
- else {
- *next = a + 1;
- }
- return *a;
- case 'O':
- case 'o':
- if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) {
- *next = a + sizeof ("or") - 1;
- return '|';
- }
- break;
- case 'A':
- case 'a':
- if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) {
- *next = a + sizeof ("and") - 1;
- return '&';
- }
- break;
- case 'N':
- case 'n':
- if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) {
- *next = a + sizeof ("not") - 1;
- return '!';
- }
- break;
- }
-
- return '\0';
-}
-
-/*
- * Return TRUE if symbol can be regexp flag
- */
-static gboolean
-is_regexp_flag (gchar a)
-{
- switch (a) {
- case 'i':
- case 'm':
- case 'x':
- case 's':
- case 'u':
- case 'o':
- case 'r':
- case 'H':
- case 'M':
- case 'P':
- case 'U':
- case 'X':
- case 'T':
- case 'S':
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-static void
-insert_expression (rspamd_mempool_t * pool,
- struct expression **head,
- gint type,
- gchar op,
- void *operand,
- const gchar *orig)
-{
- struct expression *new, *cur;
-
- new = rspamd_mempool_alloc (pool, sizeof (struct expression));
- new->type = type;
- new->orig = orig;
- if (new->type != EXPR_OPERATION) {
- new->content.operand = operand;
- }
- else {
- new->content.operation = op;
- }
- new->next = NULL;
-
- if (!*head) {
- *head = new;
- }
- else {
- cur = *head;
- while (cur->next) {
- cur = cur->next;
- }
- cur->next = new;
- }
-}
-
-static struct expression *
-maybe_parse_expression (rspamd_mempool_t * pool, gchar *line)
-{
- struct expression *expr;
- gchar *p = line;
-
- while (*p) {
- if (is_operation_symbol (p)) {
- return parse_expression (pool, line);
- }
- p++;
- }
-
- expr = rspamd_mempool_alloc (pool, sizeof (struct expression));
- expr->type = EXPR_STR;
- expr->content.operand = rspamd_mempool_strdup (pool, line);
- expr->next = NULL;
-
- return expr;
-}
-
-/*
- * Make inverse polish record for specified expression
- * Memory is allocated from given pool
- */
-struct expression *
-parse_expression (rspamd_mempool_t * pool, gchar *line)
-{
- struct expression *expr = NULL;
- struct expression_stack *stack = NULL;
- struct expression_function *func = NULL;
- struct expression *arg;
- GQueue *function_stack;
- gchar *p, *c, *str, op, newop, *copy, *next;
- gboolean in_regexp = FALSE;
- gint brackets = 0;
-
- enum {
- SKIP_SPACES,
- READ_OPERATOR,
- READ_REGEXP,
- READ_REGEXP_FLAGS,
- READ_FUNCTION,
- READ_FUNCTION_ARGUMENT,
- } state = SKIP_SPACES;
-
- if (line == NULL || pool == NULL) {
- return NULL;
- }
-
- msg_debug ("parsing expression {{ %s }}", line);
-
- function_stack = g_queue_new ();
- copy = rspamd_mempool_strdup (pool, line);
- p = line;
- c = p;
- while (*p) {
- switch (state) {
- case SKIP_SPACES:
- if (!g_ascii_isspace (*p)) {
- if (is_operation_symbol (p)) {
- state = READ_OPERATOR;
- }
- else if (*p == '/') {
- c = ++p;
- state = READ_REGEXP;
- }
- else {
- c = p;
- state = READ_FUNCTION;
- }
- }
- else {
- p++;
- }
- break;
- case READ_OPERATOR:
- if (*p == ')') {
- if (stack == NULL) {
- return NULL;
- }
- /* Pop all operators from stack to nearest '(' or to head */
- while (stack && stack->op != '(') {
- op = delete_expression_stack (&stack);
- if (op != '(') {
- insert_expression (pool,
- &expr,
- EXPR_OPERATION,
- op,
- NULL,
- copy);
- }
- }
- if (stack) {
- /* Remove open brace itself */
- delete_expression_stack (&stack);
- }
- }
- else if (*p == '(') {
- /* Push it to stack */
- stack = push_expression_stack (pool, stack, *p);
- }
- else {
- if (stack == NULL) {
- newop = op_to_char (p, &next);
- if (newop != '\0') {
- stack = push_expression_stack (pool, stack, newop);
- p = next;
- state = SKIP_SPACES;
- continue;
- }
- }
- /* Check priority of logic operation */
- else {
- newop = op_to_char (p, &next);
- if (newop != '\0') {
- if (logic_priority (stack->op) <
- logic_priority (newop)) {
- stack = push_expression_stack (pool, stack, newop);
- }
- else {
- /* Pop all operations that have higher priority than this one */
- while ((stack != NULL) &&
- (logic_priority (stack->op) >=
- logic_priority (newop))) {
- op = delete_expression_stack (&stack);
- if (op != '(') {
- insert_expression (pool,
- &expr,
- EXPR_OPERATION,
- op,
- NULL,
- copy);
- }
- }
- stack = push_expression_stack (pool, stack, newop);
- }
- }
- p = next;
- state = SKIP_SPACES;
- continue;
- }
- }
- p++;
- state = SKIP_SPACES;
- break;
-
- case READ_REGEXP:
- if (*p == '/' && *(p - 1) != '\\') {
- if (*(p + 1)) {
- p++;
- }
- state = READ_REGEXP_FLAGS;
- }
- else {
- p++;
- }
- break;
-
- case READ_REGEXP_FLAGS:
- if (!is_regexp_flag (*p) || *(p + 1) == '\0') {
- if (c != p) {
- if ((is_regexp_flag (*p) || *p ==
- '/') && *(p + 1) == '\0') {
- p++;
- }
- str = rspamd_mempool_alloc (pool, p - c + 2);
- rspamd_strlcpy (str, c - 1, (p - c + 2));
- g_strstrip (str);
- msg_debug ("found regexp: %s", str);
- if (strlen (str) > 0) {
- insert_expression (pool,
- &expr,
- EXPR_REGEXP,
- 0,
- str,
- copy);
- }
- }
- c = p;
- state = SKIP_SPACES;
- }
- else {
- p++;
- }
- break;
-
- case READ_FUNCTION:
- if (*p == '/') {
- /* In fact it is regexp */
- state = READ_REGEXP;
- c++;
- p++;
- }
- else if (*p == '(') {
- func =
- rspamd_mempool_alloc (pool,
- sizeof (struct expression_function));
- func->name = rspamd_mempool_alloc (pool, p - c + 1);
- func->args = NULL;
- rspamd_strlcpy (func->name, c, (p - c + 1));
- g_strstrip (func->name);
- state = READ_FUNCTION_ARGUMENT;
- g_queue_push_tail (function_stack, func);
- insert_expression (pool, &expr, EXPR_FUNCTION, 0, func, copy);
- c = ++p;
- }
- else if (is_operation_symbol (p)) {
- /* In fact it is not function, but symbol */
- if (c != p) {
- str = rspamd_mempool_alloc (pool, p - c + 1);
- rspamd_strlcpy (str, c, (p - c + 1));
- g_strstrip (str);
- if (strlen (str) > 0) {
- insert_expression (pool, &expr, EXPR_STR, 0, str, copy);
- }
- }
- state = READ_OPERATOR;
- }
- else if (*(p + 1) == '\0') {
- /* In fact it is not function, but symbol */
- p++;
- if (c != p) {
- str = rspamd_mempool_alloc (pool, p - c + 1);
- rspamd_strlcpy (str, c, (p - c + 1));
- g_strstrip (str);
- if (strlen (str) > 0) {
- insert_expression (pool, &expr, EXPR_STR, 0, str, copy);
- }
- }
- state = SKIP_SPACES;
- }
- else {
- p++;
- }
- break;
-
- case READ_FUNCTION_ARGUMENT:
- if (*p == '/' && !in_regexp) {
- in_regexp = TRUE;
- p++;
- }
- if (!in_regexp) {
- /* Append argument to list */
- if (*p == ',' || (*p == ')' && brackets == 0)) {
- arg = NULL;
- str = rspamd_mempool_alloc (pool, p - c + 1);
- rspamd_strlcpy (str, c, (p - c + 1));
- g_strstrip (str);
- /* Recursive call */
- arg = maybe_parse_expression (pool, str);
- func->args = g_list_append (func->args, arg);
- /* Pop function */
- if (*p == ')') {
- /* Last function in chain, goto skipping spaces state */
- func = g_queue_pop_tail (function_stack);
- if (g_queue_get_length (function_stack) == 0) {
- state = SKIP_SPACES;
- }
- }
- c = p + 1;
- }
- else if (*p == '(') {
- brackets++;
- }
- else if (*p == ')') {
- brackets--;
- }
- }
- else if (*p == '/' && *(p - 1) != '\\') {
- in_regexp = FALSE;
- }
- p++;
- break;
- }
- }
-
- g_queue_free (function_stack);
- if (state != SKIP_SPACES) {
- /* In fact we got bad expression */
- msg_warn ("expression \"%s\" is invalid", line);
- return NULL;
- }
- /* Pop everything from stack */
- while (stack != NULL) {
- op = delete_expression_stack (&stack);
- if (op != '(') {
- insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy);
- }
- }
-
- return expr;
-}
-
-/*
- * Rspamd regexp utility functions
- */
-struct rspamd_regexp_element *
-parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
-{
- const gchar *begin, *end, *p, *src, *start;
- gchar *dbegin, *dend;
- struct rspamd_regexp_element *result;
- rspamd_regexp_t *re;
- GError *err = NULL;
- GString *re_flags;
-
- if (line == NULL) {
- msg_err ("cannot parse NULL line");
- return NULL;
- }
-
- if ((re = rspamd_regexp_cache_query (NULL, line, NULL)) != NULL) {
- return ((struct rspamd_regexp_element *)rspamd_regexp_get_ud (re));
- }
-
- src = line;
- result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_element));
- /* Skip whitespaces */
- while (g_ascii_isspace (*line)) {
- line++;
- }
- if (*line == '\0') {
- msg_warn ("got empty regexp");
- return NULL;
- }
- start = line;
- /* First try to find header name */
- begin = strchr (line, '/');
- if (begin != NULL) {
- p = begin;
- end = NULL;
- while (p != line) {
- if (*p == '=') {
- end = p;
- break;
- }
- p--;
- }
- if (end) {
- result->header = rspamd_mempool_alloc (pool, end - line + 1);
- rspamd_strlcpy (result->header, line, end - line + 1);
- result->type = REGEXP_HEADER;
- line = end;
- }
- }
- else {
- result->header = rspamd_mempool_strdup (pool, line);
- result->type = REGEXP_HEADER;
- line = start;
- }
- /* Find begin of regexp */
- while (*line && *line != '/') {
- line++;
- }
- if (*line != '\0') {
- begin = line + 1;
- }
- else if (result->header == NULL) {
- /* Assume that line without // is just a header name */
- result->header = rspamd_mempool_strdup (pool, line);
- result->type = REGEXP_HEADER;
- return result;
- }
- else {
- /* We got header name earlier but have not found // expression, so it is invalid regexp */
- msg_warn (
- "got no header name (eg. header=) but without corresponding regexp, %s",
- src);
- return NULL;
- }
- /* Find end */
- end = begin;
- while (*end && (*end != '/' || *(end - 1) == '\\')) {
- end++;
- }
- if (end == begin || *end != '/') {
- msg_warn ("no trailing / in regexp %s", src);
- return NULL;
- }
- /* Parse flags */
- p = end + 1;
- re_flags = g_string_sized_new (32);
- while (p != NULL) {
- switch (*p) {
- case 'i':
- case 'm':
- case 's':
- case 'x':
- case 'u':
- case 'O':
- case 'r':
- g_string_append_c (re_flags, *p);
- p++;
- break;
- case 'o':
- p++;
- break;
- /* Type flags */
- case 'H':
- if (result->type == REGEXP_NONE) {
- result->type = REGEXP_HEADER;
- }
- p++;
- break;
- case 'M':
- if (result->type == REGEXP_NONE) {
- result->type = REGEXP_MESSAGE;
- }
- p++;
- break;
- case 'P':
- if (result->type == REGEXP_NONE) {
- result->type = REGEXP_MIME;
- }
- p++;
- break;
- case 'U':
- if (result->type == REGEXP_NONE) {
- result->type = REGEXP_URL;
- }
- p++;
- break;
- case 'X':
- if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) {
- result->type = REGEXP_RAW_HEADER;
- }
- p++;
- break;
- case 'T':
- result->is_test = TRUE;
- p++;
- break;
- case 'S':
- result->is_strong = TRUE;
- p++;
- break;
- /* Stop flags parsing */
- default:
- p = NULL;
- break;
- }
- }
-
- result->regexp_text = rspamd_mempool_strdup (pool, start);
- dbegin = result->regexp_text + (begin - start);
- dend = result->regexp_text + (end - start);
- *dend = '\0';
-
- if (raw_mode) {
- g_string_append_c (re_flags, 'r');
- }
-
- result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
- &err);
-
- g_string_free (re_flags, TRUE);
-
- if (result->regexp == NULL || err != NULL) {
- msg_warn ("could not read regexp: %s while reading regexp %s",
- err ? err->message : "unknown error",
- src);
- return NULL;
- }
-
- rspamd_mempool_add_destructor (pool,
- (rspamd_mempool_destruct_t) rspamd_regexp_unref,
- (void *)result->regexp);
-
- rspamd_regexp_set_ud (result->regexp, result);
-
- rspamd_regexp_cache_insert (NULL, line, NULL, result->regexp);
-
- *dend = '/';
-
- return result;
-}
-
-gboolean
-call_expression_function (struct expression_function * func,
- struct rspamd_task * task,
- lua_State *L)
-{
- struct _fl *selected, key;
-
- key.name = func->name;
-
- selected = bsearch (&key,
- list_ptr,
- functions_number,
- sizeof (struct _fl),
- fl_cmp);
- if (selected == NULL) {
- /* Try to check lua function */
- return FALSE;
- }
-
- return selected->func (task, func->args, selected->user_data);
-}
-
-struct expression_argument *
-get_function_arg (struct expression *expr,
- struct rspamd_task *task,
- gboolean want_string)
-{
- GQueue *stack;
- gsize cur, op1, op2;
- struct expression_argument *res;
- struct expression *it;
-
- if (expr == NULL) {
- msg_warn ("NULL expression passed");
- return NULL;
- }
- if (expr->next == NULL) {
- res =
- rspamd_mempool_alloc (task->task_pool,
- sizeof (struct expression_argument));
- if (expr->type == EXPR_REGEXP || expr->type == EXPR_STR || expr->type ==
- EXPR_REGEXP_PARSED) {
- res->type = EXPRESSION_ARGUMENT_NORMAL;
- res->data = expr->content.operand;
- }
- else if (expr->type == EXPR_FUNCTION && !want_string) {
- res->type = EXPRESSION_ARGUMENT_BOOL;
- cur = call_expression_function (expr->content.operand, task, NULL);
- res->data = GSIZE_TO_POINTER (cur);
- }
- else {
- msg_warn (
- "cannot parse argument: it contains operator or bool expression that is not wanted");
- return NULL;
- }
- return res;
- }
- else if (!want_string) {
- res =
- rspamd_mempool_alloc (task->task_pool,
- sizeof (struct expression_argument));
- res->type = EXPRESSION_ARGUMENT_BOOL;
- stack = g_queue_new ();
- it = expr;
-
- while (it) {
- if (it->type == EXPR_REGEXP || it->type == EXPR_REGEXP_PARSED ||
- it->type == EXPR_STR) {
- g_queue_free (stack);
- res->type = EXPRESSION_ARGUMENT_EXPR;
- res->data = expr;
- return res;
- }
- else if (it->type == EXPR_FUNCTION) {
- cur =
- (gsize) call_expression_function ((struct
- expression_function
- *)it->content.operand, task, NULL);
- debug_task ("function %s returned %s",
- ((struct expression_function *)it->content.operand)->name,
- cur ? "true" : "false");
- }
- else if (it->type == EXPR_OPERATION) {
- if (g_queue_is_empty (stack)) {
- /* Queue has no operands for operation, exiting */
- debug_task ("invalid expression");
- g_queue_free (stack);
- return NULL;
- }
- switch (it->content.operation) {
- case '!':
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- op1 = !op1;
- g_queue_push_head (stack, GSIZE_TO_POINTER (op1));
- break;
- case '&':
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2));
- break;
- case '|':
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2));
- break;
- default:
- it = it->next;
- continue;
- }
- }
- if (it) {
- it = it->next;
- }
- }
- if (!g_queue_is_empty (stack)) {
- res->data = g_queue_pop_head (stack);
- }
- else {
- res->data = GSIZE_TO_POINTER (FALSE);
- }
-
- return res;
- }
-
- msg_warn ("invalid expression argument");
-
- return NULL;
-}
-
-void
-register_expression_function (const gchar *name,
- rspamd_internal_func_t func,
- void *user_data)
-{
- static struct _fl *new;
-
- functions_number++;
-
- new = g_new (struct _fl, functions_number);
- memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
- if (list_allocated) {
- g_free (list_ptr);
- }
-
- list_allocated = TRUE;
- new[functions_number - 1].name = name;
- new[functions_number - 1].func = func;
- new[functions_number - 1].user_data = user_data;
- qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
- list_ptr = new;
-}
-
-gboolean
-rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused)
-{
- struct expression_argument *arg;
-
- if (args == NULL || task == NULL) {
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
- if (arg->type == EXPRESSION_ARGUMENT_BOOL) {
- msg_warn ("invalid argument to function is passed");
- return FALSE;
- }
-
- /* XXX: really write this function */
- return TRUE;
-}
-
-gboolean
-rspamd_header_exists (struct rspamd_task * task, GList * args, void *unused)
-{
- struct expression_argument *arg;
- GList *headerlist;
-
- if (args == NULL || task == NULL) {
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
- if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) {
- msg_warn ("invalid argument to function is passed");
- return FALSE;
- }
-
- debug_task ("try to get header %s", (gchar *)arg->data);
- headerlist = message_get_header (task,
- (gchar *)arg->data,
- FALSE);
- if (headerlist) {
- return TRUE;
- }
- return FALSE;
-}
-
-/*
- * This function is designed to find difference between text/html and text/plain parts
- * It takes one argument: difference threshold, if we have two text parts, compare
- * its hashes and check for threshold, if value is greater than threshold, return TRUE
- * and return FALSE otherwise.
- */
-gboolean
-rspamd_parts_distance (struct rspamd_task * task, GList * args, void *unused)
-{
- gint threshold, threshold2 = -1, diff;
- struct mime_text_part *p1, *p2;
- GList *cur;
- struct expression_argument *arg;
- GMimeObject *parent;
- const GMimeContentType *ct;
- gint *pdiff;
-
- if (args == NULL) {
- debug_task ("no threshold is specified, assume it 100");
- threshold = 100;
- }
- else {
- errno = 0;
- arg = get_function_arg (args->data, task, TRUE);
- threshold = strtoul ((gchar *)arg->data, NULL, 10);
- if (errno != 0) {
- msg_info ("bad numeric value for threshold \"%s\", assume it 100",
- (gchar *)args->data);
- threshold = 100;
- }
- if (args->next) {
- arg = get_function_arg (args->next->data, task, TRUE);
- errno = 0;
- threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
- if (errno != 0) {
- msg_info ("bad numeric value for threshold \"%s\", ignore it",
- (gchar *)arg->data);
- threshold2 = -1;
- }
- }
- }
-
- if ((pdiff =
- rspamd_mempool_get_variable (task->task_pool,
- "parts_distance")) != NULL) {
- diff = *pdiff;
- if (diff != -1) {
- if (threshold2 > 0) {
- if (diff >=
- MIN (threshold,
- threshold2) && diff < MAX (threshold, threshold2)) {
- return TRUE;
- }
- }
- else {
- if (diff <= threshold) {
- return TRUE;
- }
- }
- return FALSE;
- }
- else {
- return FALSE;
- }
- }
-
- if (g_list_length (task->text_parts) == 2) {
- cur = g_list_first (task->text_parts);
- p1 = cur->data;
- cur = g_list_next (cur);
- pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint));
- *pdiff = -1;
-
- if (cur == NULL) {
- msg_info ("bad parts list");
- return FALSE;
- }
- p2 = cur->data;
- /* First of all check parent object */
- if (p1->parent && p1->parent == p2->parent) {
- parent = p1->parent;
- ct = g_mime_object_get_content_type (parent);
-#ifndef GMIME24
- if (ct == NULL ||
- !g_mime_content_type_is_type (ct, "multipart", "alternative")) {
-#else
- if (ct == NULL ||
- !g_mime_content_type_is_type ((GMimeContentType *)ct,
- "multipart", "alternative")) {
-#endif
- debug_task (
- "two parts are not belong to multipart/alternative container, skip check");
- rspamd_mempool_set_variable (task->task_pool,
- "parts_distance",
- pdiff,
- NULL);
- return FALSE;
- }
- }
- else {
- debug_task (
- "message contains two parts but they are in different multi-parts");
- rspamd_mempool_set_variable (task->task_pool,
- "parts_distance",
- pdiff,
- NULL);
- return FALSE;
- }
- if (!p1->is_empty && !p2->is_empty) {
- if (p1->diff_str != NULL && p2->diff_str != NULL) {
- diff = rspamd_diff_distance_normalized (p1->diff_str,
- p2->diff_str);
- }
- else {
- diff = rspamd_fuzzy_compare_parts (p1, p2);
- }
- debug_task (
- "got likeliness between parts of %d%%, threshold is %d%%",
- diff,
- threshold);
- *pdiff = diff;
- rspamd_mempool_set_variable (task->task_pool,
- "parts_distance",
- pdiff,
- NULL);
- if (threshold2 > 0) {
- if (diff >=
- MIN (threshold,
- threshold2) && diff < MAX (threshold, threshold2)) {
- return TRUE;
- }
- }
- else {
- if (diff <= threshold) {
- return TRUE;
- }
- }
- }
- else if ((p1->is_empty &&
- !p2->is_empty) || (!p1->is_empty && p2->is_empty)) {
- /* Empty and non empty parts are different */
- *pdiff = 0;
- rspamd_mempool_set_variable (task->task_pool,
- "parts_distance",
- pdiff,
- NULL);
- return TRUE;
- }
- }
- else {
- debug_task (
- "message has too many text parts, so do not try to compare them with each other");
- rspamd_mempool_set_variable (task->task_pool,
- "parts_distance",
- pdiff,
- NULL);
- return FALSE;
- }
-
- rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff,
- NULL);
- return FALSE;
-}
-
-struct addr_list {
- const gchar *name;
- const gchar *addr;
-};
-
-#define COMPARE_RCPT_LEN 3
-#define MIN_RCPT_TO_COMPARE 7
-
-gboolean
-rspamd_recipients_distance (struct rspamd_task *task, GList * args,
- void *unused)
-{
- struct expression_argument *arg;
- InternetAddressList *cur;
- double threshold;
- struct addr_list *ar;
- gchar *c;
- gint num, i, j, hits = 0, total = 0;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
- errno = 0;
- threshold = strtod ((gchar *)arg->data, NULL);
- if (errno != 0) {
- msg_warn ("invalid numeric value '%s': %s",
- (gchar *)arg->data,
- strerror (errno));
- return FALSE;
- }
-
- if (!task->rcpt_mime) {
- return FALSE;
- }
- num = internet_address_list_length (task->rcpt_mime);
- if (num < MIN_RCPT_TO_COMPARE) {
- return FALSE;
- }
- ar =
- rspamd_mempool_alloc0 (task->task_pool, num *
- sizeof (struct addr_list));
-
- /* Fill array */
- cur = task->rcpt_mime;
-#ifdef GMIME24
- for (i = 0; i < num; i++) {
- InternetAddress *iaelt =
- internet_address_list_get_address(cur, i);
- InternetAddressMailbox *iamb =
- INTERNET_ADDRESS_IS_MAILBOX(iaelt) ?
- INTERNET_ADDRESS_MAILBOX (iaelt) : NULL;
- if (iamb) {
- ar[i].name = internet_address_mailbox_get_addr (iamb);
- if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
- ar[i].addr = c + 1;
- }
- }
- }
-#else
- InternetAddress *addr;
- i = 0;
- while (cur) {
- addr = internet_address_list_get_address (cur);
- if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
- ar[i].name = rspamd_mempool_strdup (task->task_pool,
- internet_address_get_addr (addr));
- if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
- *c = '\0';
- ar[i].addr = c + 1;
- }
- cur = internet_address_list_next (cur);
- i++;
- }
- else {
- cur = internet_address_list_next (cur);
- }
- }
-#endif
-
- /* Cycle all elements in array */
- for (i = 0; i < num; i++) {
- for (j = i + 1; j < num; j++) {
- if (ar[i].name && ar[j].name &&
- g_ascii_strncasecmp (ar[i].name, ar[j].name,
- COMPARE_RCPT_LEN) == 0) {
- /* Common name part */
- hits++;
- }
- else if (ar[i].addr && ar[j].addr &&
- g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) {
- /* Common address part, but different name */
- hits++;
- }
- total++;
- }
- }
-
- if ((double)(hits * num / 2.) / (double)total >= threshold) {
- return TRUE;
- }
-
- return FALSE;
-}
-
-gboolean
-rspamd_has_only_html_part (struct rspamd_task * task, GList * args,
- void *unused)
-{
- struct mime_text_part *p;
- GList *cur;
- gboolean res = FALSE;
-
- cur = g_list_first (task->text_parts);
- while (cur) {
- p = cur->data;
- if (p->is_html) {
- res = TRUE;
- }
- else {
- res = FALSE;
- break;
- }
- cur = g_list_next (cur);
- }
-
- return res;
-}
-
-static gboolean
-is_recipient_list_sorted (const InternetAddressList * ia)
-{
- const InternetAddressList *cur;
- InternetAddress *addr;
- gboolean res = TRUE;
- struct addr_list current = { NULL, NULL }, previous = {
- NULL, NULL
- };
-#ifdef GMIME24
- gint num, i;
-#endif
-
- /* Do not check to short address lists */
- if (internet_address_list_length ((InternetAddressList *)ia) <
- MIN_RCPT_TO_COMPARE) {
- return FALSE;
- }
-#ifdef GMIME24
- num = internet_address_list_length ((InternetAddressList *)ia);
- cur = ia;
- for (i = 0; i < num; i++) {
- addr =
- internet_address_list_get_address ((InternetAddressList *)cur, i);
- current.addr = (gchar *)internet_address_get_name (addr);
- if (previous.addr != NULL) {
- if (current.addr &&
- g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
- res = FALSE;
- break;
- }
- }
- previous.addr = current.addr;
- }
-#else
- cur = ia;
- while (cur) {
- addr = internet_address_list_get_address (cur);
- if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
- current.addr = internet_address_get_addr (addr);
- if (previous.addr != NULL) {
- if (current.addr &&
- g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
- res = FALSE;
- break;
- }
- }
- previous.addr = current.addr;
- }
- cur = internet_address_list_next (cur);
- }
-#endif
-
- return res;
-}
-
-gboolean
-rspamd_is_recipients_sorted (struct rspamd_task * task,
- GList * args,
- void *unused)
-{
- /* Check all types of addresses */
- if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
- GMIME_RECIPIENT_TYPE_TO)) == TRUE) {
- return TRUE;
- }
- if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
- GMIME_RECIPIENT_TYPE_BCC)) == TRUE) {
- return TRUE;
- }
- if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
- GMIME_RECIPIENT_TYPE_CC)) == TRUE) {
- return TRUE;
- }
-
- return FALSE;
-}
-
-gboolean
-rspamd_compare_transfer_encoding (struct rspamd_task * task,
- GList * args,
- void *unused)
-{
- GMimeObject *part;
-#ifndef GMIME24
- GMimePartEncodingType enc_req, part_enc;
-#else
- GMimeContentEncoding enc_req, part_enc;
-#endif
- struct expression_argument *arg;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
-#ifndef GMIME24
- enc_req = g_mime_part_encoding_from_string (arg->data);
- if (enc_req == GMIME_PART_ENCODING_DEFAULT) {
-#else
- enc_req = g_mime_content_encoding_from_string (arg->data);
- if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) {
-#endif
- msg_warn ("bad encoding type: %s", (gchar *)arg->data);
- return FALSE;
- }
-
- part = g_mime_message_get_mime_part (task->message);
- if (part) {
- if (GMIME_IS_PART (part)) {
-#ifndef GMIME24
- part_enc = g_mime_part_get_encoding (GMIME_PART (part));
- if (part_enc == GMIME_PART_ENCODING_DEFAULT) {
- /* Assume 7bit as default transfer encoding */
- part_enc = GMIME_PART_ENCODING_7BIT;
- }
-#else
- part_enc = g_mime_part_get_content_encoding (GMIME_PART (part));
- if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) {
- /* Assume 7bit as default transfer encoding */
- part_enc = GMIME_CONTENT_ENCODING_7BIT;
- }
-#endif
-
-
- debug_task ("got encoding in part: %d and compare with %d",
- (gint)part_enc,
- (gint)enc_req);
-#ifndef GMIME24
- g_object_unref (part);
-#endif
-
- return part_enc == enc_req;
- }
-#ifndef GMIME24
- g_object_unref (part);
-#endif
- }
-
- return FALSE;
-}
-
-gboolean
-rspamd_is_html_balanced (struct rspamd_task * task, GList * args, void *unused)
-{
- struct mime_text_part *p;
- GList *cur;
- gboolean res = TRUE;
-
- cur = g_list_first (task->text_parts);
- while (cur) {
- p = cur->data;
- if (!p->is_empty && p->is_html) {
- if (p->is_balanced) {
- res = TRUE;
- }
- else {
- res = FALSE;
- break;
- }
- }
- cur = g_list_next (cur);
- }
-
- return res;
-
-}
-
-struct html_callback_data {
- struct html_tag *tag;
- gboolean *res;
-};
-
-static gboolean
-search_html_node_callback (GNode * node, gpointer data)
-{
- struct html_callback_data *cd = data;
- struct html_node *nd;
-
- nd = node->data;
- if (nd) {
- if (nd->tag == cd->tag) {
- *cd->res = TRUE;
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-gboolean
-rspamd_has_html_tag (struct rspamd_task * task, GList * args, void *unused)
-{
- struct mime_text_part *p;
- GList *cur;
- struct expression_argument *arg;
- struct html_tag *tag;
- gboolean res = FALSE;
- struct html_callback_data cd;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
- tag = get_tag_by_name (arg->data);
- if (tag == NULL) {
- msg_warn ("unknown tag type passed as argument: %s",
- (gchar *)arg->data);
- return FALSE;
- }
-
- cur = g_list_first (task->text_parts);
- cd.res = &res;
- cd.tag = tag;
-
- while (cur && res == FALSE) {
- p = cur->data;
- if (!p->is_empty && p->is_html && p->html_nodes) {
- g_node_traverse (p->html_nodes,
- G_PRE_ORDER,
- G_TRAVERSE_ALL,
- -1,
- search_html_node_callback,
- &cd);
- }
- cur = g_list_next (cur);
- }
-
- return res;
-
-}
-
-gboolean
-rspamd_has_fake_html (struct rspamd_task * task, GList * args, void *unused)
-{
- struct mime_text_part *p;
- GList *cur;
- gboolean res = FALSE;
-
- cur = g_list_first (task->text_parts);
-
- while (cur && res == FALSE) {
- p = cur->data;
- if (!p->is_empty && p->is_html && p->html_nodes == NULL) {
- res = TRUE;
- }
- cur = g_list_next (cur);
- }
-
- return res;
-
-}
-
-
-/*
- * vi:ts=4
- */
diff --git a/src/libmime/expressions.h b/src/libmime/expressions.h
deleted file mode 100644
index 469cc690d..000000000
--- a/src/libmime/expressions.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/**
- * @file expressions.h
- * Rspamd expressions API
- */
-
-#ifndef RSPAMD_EXPRESSIONS_H
-#define RSPAMD_EXPRESSIONS_H
-
-#include "config.h"
-#include <lua.h>
-
-struct rspamd_task;
-struct rspamd_regexp_element;
-
-/**
- * Rspamd expression function
- */
-struct expression_function {
- gchar *name; /**< name of function */
- GList *args; /**< its args */
-};
-
-/**
- * Function's argument
- */
-struct expression_argument {
- enum {
- EXPRESSION_ARGUMENT_NORMAL,
- EXPRESSION_ARGUMENT_BOOL,
- EXPRESSION_ARGUMENT_EXPR,
- } type; /**< type of argument (text or other function) */
- void *data; /**< pointer to its data */
-};
-
-/**
- * Logic expression
- */
-struct expression {
- enum {
- EXPR_REGEXP,
- EXPR_OPERATION,
- EXPR_FUNCTION,
- EXPR_STR,
- EXPR_REGEXP_PARSED,
- } type; /**< expression type */
- union {
- void *operand;
- gchar operation;
- } content; /**< union for storing operand or operation code */
- const gchar *orig; /**< original line */
- struct expression *next; /**< chain link */
-};
-
-typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *, GList *args,
- void *user_data);
-
-/**
- * Parse regexp line to regexp structure
- * @param pool memory pool to use
- * @param line incoming line
- * @return regexp structure or NULL in case of error
- */
-struct rspamd_regexp_element * parse_regexp (rspamd_mempool_t *pool,
- const gchar *line,
- gboolean raw_mode);
-
-/**
- * Parse composites line to composites structure (eg. "SYMBOL1&SYMBOL2|!SYMBOL3")
- * @param pool memory pool to use
- * @param line incoming line
- * @return expression structure or NULL in case of error
- */
-struct expression * parse_expression (rspamd_mempool_t *pool, gchar *line);
-
-/**
- * Call specified fucntion and return boolean result
- * @param func function to call
- * @param task task object
- * @param L lua specific state
- * @return TRUE or FALSE depending on function result
- */
-gboolean call_expression_function (struct expression_function *func,
- struct rspamd_task *task,
- lua_State *L);
-
-/**
- * Register specified function to rspamd internal functions list
- * @param name name of function
- * @param func pointer to function
- */
-void register_expression_function (const gchar *name,
- rspamd_internal_func_t func,
- void *user_data);
-
-/**
- * Add regexp to regexp cache
- * @param line symbolic representation
- * @param pointer regexp data
- */
-void re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool);
-
-/**
- * Check regexp in cache
- * @param line symbolic representation
- * @return pointer to regexp data or NULL if regexp is not found
- */
-void * re_cache_check (const gchar *line, rspamd_mempool_t *pool);
-
-/**
- * Remove regexp from regexp cache
- * @param line symbolic representation
- */
-void re_cache_del (const gchar *line, rspamd_mempool_t *pool);
-
-/**
- * Add regexp to regexp task cache
- * @param task task object
- * @param pointer regexp data
- * @param result numeric result of this regexp
- */
-void task_cache_add (struct rspamd_task *task,
- struct rspamd_regexp_element *re,
- gint32 result);
-
-/**
- * Check regexp in cache
- * @param task task object
- * @param pointer regexp data
- * @return numeric result if value exists or -1 if not
- */
-gint32 task_cache_check (struct rspamd_task *task, struct rspamd_regexp_element *re);
-
-/**
- * Parse and return a single function argument for a function (may recurse)
- * @param expr expression structure that represents function's argument
- * @param task task object
- * @param want_string return NULL if argument is not a string
- * @return expression argument structure or NULL if failed
- */
-struct expression_argument * get_function_arg (struct expression *expr,
- struct rspamd_task *task,
- gboolean want_string);
-
-#endif
diff --git a/src/libmime/filter.c b/src/libmime/filter.c
index 7736ba4cf..e107bc56b 100644
--- a/src/libmime/filter.c
+++ b/src/libmime/filter.c
@@ -29,7 +29,7 @@
#include "message.h"
#include "cfg_file.h"
#include "util.h"
-#include "expressions.h"
+#include "expression.h"
#include "diff.h"
#include "libstat/stat_api.h"
@@ -54,6 +54,19 @@
#define BITSPERBYTE (8 * sizeof (gchar))
#define NBYTES(nbits) (((nbits) + BITSPERBYTE - 1) / BITSPERBYTE)
+static rspamd_expression_atom_t * rspamd_composite_expr_parse (const gchar *line, gsize len,
+ rspamd_mempool_t *pool, gpointer ud, GError **err);
+static gint rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom);
+static gint rspamd_composite_expr_priority (rspamd_expression_atom_t *atom);
+static void rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom);
+
+const struct rspamd_atom_subr composite_expr_subr = {
+ .parse = rspamd_composite_expr_parse,
+ .process = rspamd_composite_expr_process,
+ .priority = rspamd_composite_expr_priority,
+ .destroy = rspamd_composite_expr_destroy
+};
+
static inline GQuark
filter_error_quark (void)
{
@@ -398,6 +411,7 @@ rspamd_process_filters (struct rspamd_task *task)
struct composites_data {
struct rspamd_task *task;
+ struct rspamd_composite *composite;
struct metric_result *metric_res;
GTree *symbols_to_remove;
guint8 *checked;
@@ -409,186 +423,150 @@ struct symbol_remove_data {
gboolean remove_symbol;
};
-static gint
-remove_compare_data (gconstpointer a, gconstpointer b)
+
+/*
+ * Composites are just sequences of symbols
+ */
+static rspamd_expression_atom_t *
+rspamd_composite_expr_parse (const gchar *line, gsize len,
+ rspamd_mempool_t *pool, gpointer ud, GError **err)
{
- const gchar *ca = a, *cb = b;
+ gsize clen;
+ rspamd_expression_atom_t *res;
+
+ clen = strcspn (line, ", \t(+!|&\n");
+ if (clen == 0) {
+ /* Invalid composite atom */
+ g_set_error (err, filter_error_quark (), 100, "Invalid composite: %s",
+ line);
+ return NULL;
+ }
- return strcmp (ca, cb);
-}
+ res = rspamd_mempool_alloc0 (pool, sizeof (*res));
+ res->len = clen;
+ res->data = rspamd_mempool_strdup (pool, line);
-static void
-composites_foreach_callback (gpointer key, gpointer value, void *data)
+ return res;
+}
+static gint
+rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom)
{
- struct composites_data *cd = (struct composites_data *)data;
- struct rspamd_composite *composite = value, *ncomp;
- struct expression *expr;
- GQueue *stack;
- GList *symbols = NULL, *s;
- gsize cur, op1, op2;
- gchar logbuf[256], *sym, *check_sym;
- gint r;
- struct symbol *ms;
+ struct composites_data *cd = (struct composites_data *)input;
+ const gchar *sym = atom->str;
+ struct rspamd_composite *ncomp;
struct symbol_remove_data *rd;
+ struct symbol *ms;
+ gint rc = 0;
+ gchar t;
-
- expr = composite->expr;
- if (isset (cd->checked, composite->id)) {
- /* Symbol was already checked */
- return;
+ if (isset (cd->checked, cd->composite->id * 2)) {
+ /* We have already checked this composite, so just return its value */
+ rc = isset (cd->checked, cd->composite->id * 2 + 1);
+ return rc;
}
- stack = g_queue_new ();
+ if (*sym == '~' || *sym == '-') {
+ t = *sym ++;
+ }
- while (expr) {
- if (expr->type == EXPR_STR) {
- /* Find corresponding symbol */
- sym = expr->content.operand;
- if (*sym == '~' || *sym == '-') {
- sym++;
- }
- if (g_hash_table_lookup (cd->metric_res->symbols, sym) == NULL) {
- cur = 0;
- if ((ncomp =
- g_hash_table_lookup (cd->task->cfg->composite_symbols,
- sym)) != NULL) {
- /* Set checked for this symbol to avoid cyclic references */
- if (isclr (cd->checked, ncomp->id)) {
- setbit (cd->checked, composite->id);
- composites_foreach_callback (sym, ncomp, cd);
- if (g_hash_table_lookup (cd->metric_res->symbols,
- sym) != NULL) {
- cur = 1;
- }
- }
- }
+ if ((ms = g_hash_table_lookup (cd->metric_res->symbols, sym)) == NULL) {
+ if ((ncomp =
+ g_hash_table_lookup (cd->task->cfg->composite_symbols,
+ sym)) != NULL) {
+ /* Set checked for this symbol to avoid cyclic references */
+ if (isclr (cd->checked, ncomp->id * 2)) {
+ setbit (cd->checked, cd->composite->id * 2);
+ rc = rspamd_process_expression (ncomp->expr, cd);
+ clrbit (cd->checked, cd->composite->id * 2);
+ ms = g_hash_table_lookup (cd->metric_res->symbols, sym);
}
else {
- cur = 1;
- symbols = g_list_prepend (symbols, expr->content.operand);
+ /*
+ * XXX: in case of cyclic references this would return 0
+ */
+ rc = isset (cd->checked, ncomp->id * 2 + 1);
}
- g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
+ }
+ }
+ else {
+ rc = 1;
+ }
+
+ if (rc && ms) {
+ /*
+ * At this point we know that we need to do something about this symbol,
+ * however, we don't know whether we need to delete it unfortunately,
+ * that depends on the later decisions when the complete expression is
+ * evaluated.
+ */
+ rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*cd));
+ rd->ms = ms;
+ if (G_UNLIKELY (t == '~')) {
+ rd->remove_weight = FALSE;
+ rd->remove_symbol = TRUE;
+ }
+ else if (G_UNLIKELY (t == '-')) {
+ rd->remove_symbol = FALSE;
+ rd->remove_weight = FALSE;
}
else {
- if (g_queue_is_empty (stack)) {
- /* Queue has no operands for operation, exiting */
- g_list_free (symbols);
- g_queue_free (stack);
- setbit (cd->checked, composite->id);
- return;
- }
- switch (expr->content.operation) {
- case '!':
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- op1 = !op1;
- g_queue_push_head (stack, GSIZE_TO_POINTER (op1));
- break;
- case '&':
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2));
- break;
- case '|':
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2));
- break;
- default:
- expr = expr->next;
- continue;
- }
+ rd->remove_symbol = TRUE;
+ rd->remove_weight = TRUE;
}
- expr = expr->next;
- }
- if (!g_queue_is_empty (stack)) {
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- if (op1) {
- /* Remove all symbols that are in composite symbol */
- s = g_list_first (symbols);
- r = rspamd_snprintf (logbuf,
- sizeof (logbuf),
- "<%s>, insert symbol %s instead of symbols: ",
- cd->task->message_id,
- key);
- while (s) {
- sym = s->data;
- if (*sym == '~' || *sym == '-') {
- check_sym = sym + 1;
- }
- else {
- check_sym = sym;
- }
- ms = g_hash_table_lookup (cd->metric_res->symbols, check_sym);
-
- if (ms == NULL) {
- /* Try to process other composites */
- if ((ncomp =
- g_hash_table_lookup (cd->task->cfg->composite_symbols,
- check_sym)) != NULL) {
- /* Set checked for this symbol to avoid cyclic references */
- if (isclr (cd->checked, ncomp->id)) {
- setbit (cd->checked, composite->id);
- composites_foreach_callback (check_sym, ncomp, cd);
- ms = g_hash_table_lookup (cd->metric_res->symbols,
- check_sym);
- }
- }
- }
-
- if (ms != NULL) {
- rd =
- rspamd_mempool_alloc (cd->task->task_pool,
- sizeof (struct symbol_remove_data));
- rd->ms = ms;
- if (G_UNLIKELY (*sym == '~')) {
- rd->remove_weight = FALSE;
- rd->remove_symbol = TRUE;
- }
- else if (G_UNLIKELY (*sym == '-')) {
- rd->remove_symbol = FALSE;
- rd->remove_weight = FALSE;
- }
- else {
- rd->remove_symbol = TRUE;
- rd->remove_weight = TRUE;
- }
- if (!g_tree_lookup (cd->symbols_to_remove, ms->name)) {
- g_tree_insert (cd->symbols_to_remove,
- (gpointer)ms->name,
- rd);
- }
- }
- else {
-
- }
-
- if (s->next) {
- r += rspamd_snprintf (logbuf + r,
- sizeof (logbuf) - r,
- "%s, ",
- s->data);
- }
- else {
- r += rspamd_snprintf (logbuf + r,
- sizeof (logbuf) - r,
- "%s",
- s->data);
- }
- s = g_list_next (s);
- }
- /* Add new symbol */
- rspamd_task_insert_result_single (cd->task, key, 1.0, NULL);
- msg_info ("%s", logbuf);
+ if (!g_tree_lookup (cd->symbols_to_remove, ms->name)) {
+ g_tree_insert (cd->symbols_to_remove,
+ (gpointer)ms->name,
+ rd);
}
}
- setbit (cd->checked, composite->id);
- g_queue_free (stack);
- g_list_free (symbols);
+ return rc;
+}
- return;
+/*
+ * We don't have preferences for composites
+ */
+static gint
+rspamd_composite_expr_priority (rspamd_expression_atom_t *atom)
+{
+ return 0;
}
+static void
+rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom)
+{
+ /* Composite atoms are destroyed just with the pool */
+}
+
+static gint
+remove_compare_data (gconstpointer a, gconstpointer b)
+{
+ const gchar *ca = a, *cb = b;
+
+ return strcmp (ca, cb);
+}
+
+static void
+composites_foreach_callback (gpointer key, gpointer value, void *data)
+{
+ struct composites_data *cd = data;
+ struct rspamd_composite *comp = value;
+ gint rc;
+
+ cd->composite = comp;
+
+ rc = rspamd_process_expression (comp->expr, cd);
+
+ /* Checked bit */
+ setbit (cd->checked, comp->id * 2);
+ /* Result bit */
+ if (rc) {
+ setbit (cd->checked, comp->id * 2 + 1);
+ }
+ else {
+ clrbit (cd->checked, comp->id * 2 + 1);
+ }
+}
static gboolean
diff --git a/src/libmime/filter.h b/src/libmime/filter.h
index de324caf7..3eef17525 100644
--- a/src/libmime/filter.h
+++ b/src/libmime/filter.h
@@ -71,11 +71,16 @@ struct metric_result {
double grow_factor; /**< current grow factor */
};
+
+/**
+ * Subr for composite expressions
+ */
+extern const struct rspamd_atom_subr composite_expr_subr;
/**
* Composite structure
*/
struct rspamd_composite {
- struct expression *expr;
+ struct rspamd_expression *expr;
gint id;
};
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
new file mode 100644
index 000000000..841610480
--- /dev/null
+++ b/src/libmime/mime_expressions.c
@@ -0,0 +1,2234 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "util.h"
+#include "cfg_file.h"
+#include "main.h"
+#include "message.h"
+#include "fuzzy.h"
+#include "mime_expressions.h"
+#include "html.h"
+#include "lua/lua_common.h"
+#include "diff.h"
+
+gboolean rspamd_compare_encoding (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+gboolean rspamd_header_exists (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+gboolean rspamd_parts_distance (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+gboolean rspamd_recipients_distance (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+gboolean rspamd_has_only_html_part (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+gboolean rspamd_is_html_balanced (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+gboolean rspamd_has_html_tag (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+gboolean rspamd_has_fake_html (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+static gboolean rspamd_raw_header_exists (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+static gboolean rspamd_check_smtp_data (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+static gboolean rspamd_content_type_is_type (struct rspamd_task * task,
+ GArray * args,
+ void *unused);
+static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+static gboolean rspamd_content_type_has_param (struct rspamd_task * task,
+ GArray * args,
+ void *unused);
+static gboolean rspamd_content_type_compare_param (struct rspamd_task * task,
+ GArray * args,
+ void *unused);
+static gboolean rspamd_has_content_part (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+static gboolean rspamd_has_content_part_len (struct rspamd_task *task,
+ GArray * args,
+ void *unused);
+
+static rspamd_expression_atom_t * rspamd_mime_expr_parse (const gchar *line, gsize len,
+ rspamd_mempool_t *pool, gpointer ud, GError **err);
+static gint rspamd_mime_expr_process (gpointer input, rspamd_expression_atom_t *atom);
+static gint rspamd_mime_expr_priority (rspamd_expression_atom_t *atom);
+static void rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom);
+
+/**
+ * Regexp type: /H - header, /M - mime, /U - url /X - raw header
+ */
+enum rspamd_regexp_type {
+ REGEXP_NONE = 0,
+ REGEXP_HEADER,
+ REGEXP_MIME,
+ REGEXP_MESSAGE,
+ REGEXP_URL,
+ REGEXP_RAW_HEADER
+};
+
+/**
+ * Regexp structure
+ */
+struct rspamd_regexp_atom {
+ enum rspamd_regexp_type type; /**< regexp type */
+ gchar *regexp_text; /**< regexp text representation */
+ rspamd_regexp_t *regexp; /**< regexp structure */
+ gchar *header; /**< header name for header regexps */
+ gboolean is_test; /**< true if this expression must be tested */
+ gboolean is_strong; /**< true if headers search must be case sensitive */
+ gboolean is_multiple; /**< true if we need to match all inclusions of atom */
+};
+
+/**
+ * Rspamd expression function
+ */
+struct rspamd_function_atom {
+ gchar *name; /**< name of function */
+ GArray *args; /**< its args */
+};
+
+struct rspamd_mime_atom {
+ gchar *str;
+ union {
+ struct rspamd_regexp_atom *re;
+ struct rspamd_function_atom *func;
+ } d;
+ gboolean is_function;
+};
+
+/*
+ * List of internal functions of rspamd
+ * Sorted by name to use bsearch
+ */
+static struct _fl {
+ const gchar *name;
+ rspamd_internal_func_t func;
+ void *user_data;
+} rspamd_functions_list[] = {
+ {"check_smtp_data", rspamd_check_smtp_data, NULL},
+ {"compare_encoding", rspamd_compare_encoding, NULL},
+ {"compare_parts_distance", rspamd_parts_distance, NULL},
+ {"compare_recipients_distance", rspamd_recipients_distance, NULL},
+ {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
+ {"content_type_compare_param", rspamd_content_type_compare_param, NULL},
+ {"content_type_has_param", rspamd_content_type_has_param, NULL},
+ {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL},
+ {"content_type_is_type", rspamd_content_type_is_type, NULL},
+ {"has_content_part", rspamd_has_content_part, NULL},
+ {"has_content_part_len", rspamd_has_content_part_len, NULL},
+ {"has_fake_html", rspamd_has_fake_html, NULL},
+ {"has_html_tag", rspamd_has_html_tag, NULL},
+ {"has_only_html_part", rspamd_has_only_html_part, NULL},
+ {"header_exists", rspamd_header_exists, NULL},
+ {"is_html_balanced", rspamd_is_html_balanced, NULL},
+ {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL},
+ {"raw_header_exists", rspamd_raw_header_exists, NULL}
+};
+
+const struct rspamd_atom_subr mime_expr_subr = {
+ .parse = rspamd_mime_expr_parse,
+ .process = rspamd_mime_expr_process,
+ .priority = rspamd_mime_expr_priority,
+ .destroy = rspamd_mime_expr_destroy
+};
+
+static struct _fl *list_ptr = &rspamd_functions_list[0];
+static guint32 functions_number = sizeof (rspamd_functions_list) /
+ sizeof (struct _fl);
+static gboolean list_allocated = FALSE;
+static guint max_re_data = 0;
+
+/* Bsearch routine */
+static gint
+fl_cmp (const void *s1, const void *s2)
+{
+ struct _fl *fl1 = (struct _fl *)s1;
+ struct _fl *fl2 = (struct _fl *)s2;
+ return strcmp (fl1->name, fl2->name);
+}
+
+static GQuark
+rspamd_mime_expr_quark (void)
+{
+ return g_quark_from_static_string ("mime-expressions");
+}
+
+/*
+ * Rspamd regexp utility functions
+ */
+static struct rspamd_regexp_atom *
+rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line)
+{
+ const gchar *begin, *end, *p, *src, *start;
+ gchar *dbegin, *dend;
+ struct rspamd_regexp_atom *result;
+ rspamd_regexp_t *re;
+ GError *err = NULL;
+ GString *re_flags;
+
+ if (line == NULL) {
+ msg_err ("cannot parse NULL line");
+ return NULL;
+ }
+
+ if ((re = rspamd_regexp_cache_query (NULL, line, NULL)) != NULL) {
+ return ((struct rspamd_regexp_atom *)rspamd_regexp_get_ud (re));
+ }
+
+ src = line;
+ result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_atom));
+ /* Skip whitespaces */
+ while (g_ascii_isspace (*line)) {
+ line++;
+ }
+ if (*line == '\0') {
+ msg_warn ("got empty regexp");
+ return NULL;
+ }
+ start = line;
+ /* First try to find header name */
+ begin = strchr (line, '/');
+ if (begin != NULL) {
+ p = begin;
+ end = NULL;
+ while (p != line) {
+ if (*p == '=') {
+ end = p;
+ break;
+ }
+ p--;
+ }
+ if (end) {
+ result->header = rspamd_mempool_alloc (pool, end - line + 1);
+ rspamd_strlcpy (result->header, line, end - line + 1);
+ result->type = REGEXP_HEADER;
+ line = end;
+ }
+ }
+ else {
+ result->header = rspamd_mempool_strdup (pool, line);
+ result->type = REGEXP_HEADER;
+ line = start;
+ }
+ /* Find begin of regexp */
+ while (*line && *line != '/') {
+ line++;
+ }
+ if (*line != '\0') {
+ begin = line + 1;
+ }
+ else if (result->header == NULL) {
+ /* Assume that line without // is just a header name */
+ result->header = rspamd_mempool_strdup (pool, line);
+ result->type = REGEXP_HEADER;
+ return result;
+ }
+ else {
+ /* We got header name earlier but have not found // expression, so it is invalid regexp */
+ msg_warn (
+ "got no header name (eg. header=) but without corresponding regexp, %s",
+ src);
+ return NULL;
+ }
+ /* Find end */
+ end = begin;
+ while (*end && (*end != '/' || *(end - 1) == '\\')) {
+ end++;
+ }
+ if (end == begin || *end != '/') {
+ msg_warn ("no trailing / in regexp %s", src);
+ return NULL;
+ }
+ /* Parse flags */
+ p = end + 1;
+ re_flags = g_string_sized_new (32);
+ while (p != NULL) {
+ switch (*p) {
+ case 'i':
+ case 'm':
+ case 's':
+ case 'x':
+ case 'u':
+ case 'O':
+ case 'r':
+ g_string_append_c (re_flags, *p);
+ p++;
+ break;
+ case 'o':
+ p++;
+ break;
+ /* Type flags */
+ case 'H':
+ if (result->type == REGEXP_NONE) {
+ result->type = REGEXP_HEADER;
+ }
+ p++;
+ break;
+ case 'M':
+ if (result->type == REGEXP_NONE) {
+ result->type = REGEXP_MESSAGE;
+ }
+ p++;
+ break;
+ case 'P':
+ if (result->type == REGEXP_NONE) {
+ result->type = REGEXP_MIME;
+ }
+ p++;
+ break;
+ case 'U':
+ if (result->type == REGEXP_NONE) {
+ result->type = REGEXP_URL;
+ }
+ p++;
+ break;
+ case 'X':
+ if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) {
+ result->type = REGEXP_RAW_HEADER;
+ }
+ p++;
+ break;
+ case 'T':
+ result->is_test = TRUE;
+ p++;
+ break;
+ case 'S':
+ result->is_strong = TRUE;
+ p++;
+ break;
+ case 'A':
+ result->is_multiple = TRUE;
+ p++;
+ break;
+ /* Stop flags parsing */
+ default:
+ p = NULL;
+ break;
+ }
+ }
+
+ result->regexp_text = rspamd_mempool_strdup (pool, start);
+ dbegin = result->regexp_text + (begin - start);
+ dend = result->regexp_text + (end - start);
+ *dend = '\0';
+
+ result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
+ &err);
+
+ g_string_free (re_flags, TRUE);
+
+ if (result->regexp == NULL || err != NULL) {
+ msg_warn ("could not read regexp: %s while reading regexp %s",
+ err ? err->message : "unknown error",
+ src);
+ return NULL;
+ }
+
+ rspamd_mempool_add_destructor (pool,
+ (rspamd_mempool_destruct_t) rspamd_regexp_unref,
+ (void *)result->regexp);
+
+ rspamd_regexp_set_ud (result->regexp, result);
+
+ rspamd_regexp_cache_insert (NULL, line, NULL, result->regexp);
+
+ *dend = '/';
+
+ return result;
+}
+
+struct rspamd_function_atom *
+rspamd_mime_expr_parse_function_atom (const gchar *input)
+{
+ const gchar *obrace, *ebrace, *p, *c;
+ gchar t, *databuf;
+ struct rspamd_function_atom *res;
+ struct expression_argument arg;
+ GError *err = NULL;
+ enum {
+ start_read_argument = 0,
+ in_string,
+ in_regexp,
+ got_backslash,
+ got_comma
+ } state, prev_state = 0;
+
+ obrace = strchr (input, '(');
+ ebrace = strrchr (input, ')');
+
+ g_assert (obrace != NULL && ebrace != NULL);
+
+ res = g_slice_alloc0 (sizeof (*res));
+ res->name = g_malloc (obrace - input + 1);
+ rspamd_strlcpy (res->name, input, obrace - input + 1);
+ res->args = g_array_new (FALSE, FALSE, sizeof (struct expression_argument));
+
+ p = obrace + 1;
+ c = p;
+ state = start_read_argument;
+
+ /* Read arguments */
+ while (p <= ebrace) {
+ t = *p;
+ switch (state) {
+ case start_read_argument:
+ if (t == '/') {
+ state = in_regexp;
+ c = p;
+ }
+ else if (!g_ascii_isspace (t)) {
+ state = in_string;
+ c = p;
+ }
+ p ++;
+ break;
+ case in_regexp:
+ if (t == '\\') {
+ state = got_backslash;
+ prev_state = in_regexp;
+ }
+ else if (t == ',' || p == ebrace) {
+ databuf = g_malloc (p - c + 1);
+ rspamd_strlcpy (databuf, c, p - c + 1);
+ arg.type = EXPRESSION_ARGUMENT_REGEXP;
+ arg.data = rspamd_regexp_cache_create (NULL, databuf, NULL, &err);
+
+ if (arg.data == NULL) {
+ /* Fallback to string */
+ msg_warn ("cannot parse slashed argument %s as regexp: %s",
+ databuf, err->message);
+ g_error_free (err);
+ arg.type = EXPRESSION_ARGUMENT_NORMAL;
+ arg.data = databuf;
+ }
+ else {
+ g_free (databuf);
+ }
+
+ g_array_append_val (res->args, arg);
+ }
+ p ++;
+ break;
+ case in_string:
+ if (t == '\\') {
+ state = got_backslash;
+ prev_state = in_string;
+ }
+ else if (t == ',' || p == ebrace) {
+ databuf = g_malloc (p - c + 1);
+ rspamd_strlcpy (databuf, c, p - c + 1);
+ arg.type = EXPRESSION_ARGUMENT_NORMAL;
+ arg.data = databuf;
+ g_array_append_val (res->args, arg);
+ }
+ p ++;
+ break;
+ case got_backslash:
+ state = prev_state;
+ p ++;
+ break;
+ case got_comma:
+ state = start_read_argument;
+ break;
+ }
+ }
+
+ return res;
+}
+
+static rspamd_expression_atom_t *
+rspamd_mime_expr_parse (const gchar *line, gsize len,
+ rspamd_mempool_t *pool, gpointer ud, GError **err)
+{
+ rspamd_expression_atom_t *a = NULL;
+ struct rspamd_mime_atom *mime_atom = NULL;
+ const gchar *p, *end;
+ gchar t;
+ gboolean is_function = FALSE;
+ enum {
+ in_header = 0,
+ got_slash,
+ in_regexp,
+ got_backslash,
+ got_second_slash,
+ in_flags,
+ got_obrace,
+ in_function,
+ got_ebrace,
+ end_atom,
+ bad_atom
+ } state = 0, prev_state = 0;
+
+ p = line;
+ end = p + len;
+
+ while (p < end) {
+ t = *p;
+
+ switch (state) {
+ case in_header:
+ if (t == '/') {
+ /* Regexp */
+ state = got_slash;
+ }
+ else if (t == '(') {
+ /* Function */
+ state = got_obrace;
+ }
+ else if (g_ascii_isspace (t)) {
+ state = bad_atom;
+ }
+ p ++;
+ break;
+ case got_slash:
+ state = in_regexp;
+ break;
+ case in_regexp:
+ if (t == '\\') {
+ state = got_backslash;
+ prev_state = in_regexp;
+ }
+ else if (t == '/') {
+ state = got_second_slash;
+ }
+ p ++;
+ break;
+ case got_second_slash:
+ state = in_flags;
+ break;
+ case in_flags:
+ if (!g_ascii_isalpha (t)) {
+ state = end_atom;
+ }
+ else {
+ p ++;
+ }
+ break;
+ case got_backslash:
+ state = prev_state;
+ p ++;
+ break;
+ case got_obrace:
+ state = in_function;
+ is_function = TRUE;
+ break;
+ case in_function:
+ if (t == '\\') {
+ state = got_backslash;
+ prev_state = in_function;
+ }
+ else if (t == ')') {
+ state = got_ebrace;
+ }
+ p ++;
+ break;
+ case got_ebrace:
+ state = end_atom;
+ break;
+ case bad_atom:
+ g_set_error (err, rspamd_mime_expr_quark(), 100, "cannot parse"
+ " mime atom '%*.s' when reading symbol '%c'", (gint)len, line, t);
+ return NULL;
+ case end_atom:
+ goto set;
+ }
+ }
+set:
+
+ if (p - line == 0 || (state != got_ebrace || state != got_second_slash ||
+ state != in_flags)) {
+ g_set_error (err, rspamd_mime_expr_quark(), 200, "inclomplete or empty"
+ " mime atom");
+ return NULL;
+ }
+
+ mime_atom = g_slice_alloc (sizeof (*mime_atom));
+ mime_atom->is_function = is_function;
+ mime_atom->str = g_malloc (p - line + 1);
+ rspamd_strlcpy (mime_atom->str, line, p - line + 1);
+
+ if (!is_function) {
+ mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom (pool,
+ mime_atom->str);
+ if (mime_atom->d.re == NULL) {
+ g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse regexp '%s'",
+ mime_atom->str);
+ goto err;
+ }
+ }
+ else {
+ mime_atom->d.func = rspamd_mime_expr_parse_function_atom (mime_atom->str);
+ if (mime_atom->d.func == NULL) {
+ g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse function '%s'",
+ mime_atom->str);
+ goto err;
+ }
+ }
+
+ a = rspamd_mempool_alloc (pool, sizeof (*a));
+ a->len = p - line;
+ a->priority = 0;
+ a->data = mime_atom;
+
+ return a;
+
+err:
+ if (mime_atom != NULL) {
+ g_free (mime_atom->str);
+ g_slice_free1 (sizeof (*mime_atom), mime_atom);
+ }
+
+ return NULL;
+}
+
+static gint
+rspamd_mime_regexp_element_process (struct rspamd_task *task,
+ struct rspamd_regexp_atom *re, const gchar *data, gsize len,
+ gboolean raw)
+{
+ guint r = 0;
+ const gchar *start = NULL, *end = NULL;
+
+ if ((r = rspamd_task_re_cache_check (task, re->regexp_text)) !=
+ RSPAMD_TASK_CACHE_NO_VALUE) {
+ debug_task ("regexp /%s/ is found in cache, result: %d",
+ re->regexp_text, r);
+ return r;
+ }
+
+ if (len == 0) {
+ len = strlen (data);
+ }
+
+ if (max_re_data != 0 && len > max_re_data) {
+ msg_info ("<%s> skip data of size %Hud",
+ task->message_id,
+ len);
+
+ return 0;
+ }
+
+ while (rspamd_regexp_search (re->regexp, data, len, &start, &end, raw)) {
+ if (G_UNLIKELY (re->is_test)) {
+ msg_info (
+ "process test regexp %s for header %s with value '%s' returned TRUE",
+ re->regexp_text,
+ re->header,
+ data);
+ }
+ r++;
+
+ if (!re->is_multiple) {
+ break;
+ }
+ }
+
+ if (r > 0) {
+ rspamd_task_re_cache_add (task, re->regexp_text, r);
+ }
+
+ return r;
+}
+
+struct url_regexp_param {
+ struct rspamd_task *task;
+ rspamd_regexp_t *regexp;
+ struct rspamd_regexp_atom *re;
+ gboolean found;
+};
+
+static gboolean
+tree_url_callback (gpointer key, gpointer value, void *data)
+{
+ struct url_regexp_param *param = data;
+ struct rspamd_url *url = value;
+
+ if (rspamd_mime_regexp_element_process (param->task, param->re,
+ struri (url), 0, FALSE)) {
+ param->found = TRUE;
+ return TRUE;
+ }
+ else if (G_UNLIKELY (param->re->is_test)) {
+ msg_info ("process test regexp %s for url %s returned FALSE",
+ struri (url));
+ }
+
+ return FALSE;
+}
+
+static gint
+rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
+ struct rspamd_task *task)
+{
+ guint8 *ct;
+ gsize clen;
+ gboolean raw = FALSE;
+ const gchar *in;
+
+ GList *cur, *headerlist;
+ rspamd_regexp_t *regexp;
+ struct url_regexp_param callback_param = {
+ .task = task,
+ .re = re,
+ .found = FALSE
+ };
+ struct mime_text_part *part;
+ struct raw_header *rh;
+
+ if (re == NULL) {
+ msg_info ("invalid regexp passed");
+ return 0;
+ }
+
+ callback_param.regexp = re->regexp;
+
+
+ switch (re->type) {
+ case REGEXP_NONE:
+ msg_warn ("bad error detected: %s has invalid regexp type",
+ re->regexp_text);
+ break;
+ case REGEXP_HEADER:
+ case REGEXP_RAW_HEADER:
+ /* Check header's name */
+ if (re->header == NULL) {
+ msg_info ("header regexp without header name: '%s'",
+ re->regexp_text);
+ rspamd_task_re_cache_add (task, re->regexp_text, 0);
+ return 0;
+ }
+ debug_task ("checking %s header regexp: %s = %s",
+ re->type == REGEXP_RAW_HEADER ? "raw" : "decoded",
+ re->header,
+ re->regexp_text);
+
+ /* Get list of specified headers */
+ headerlist = message_get_header (task,
+ re->header,
+ re->is_strong);
+ if (headerlist == NULL) {
+ /* Header is not found */
+ if (G_UNLIKELY (re->is_test)) {
+ msg_info (
+ "process test regexp %s for header %s returned FALSE: no header found",
+ re->regexp_text,
+ re->header);
+ }
+ rspamd_task_re_cache_add (task, re->regexp_text, 0);
+ return 0;
+ }
+ else {
+ /* Check whether we have regexp for it */
+ if (re->regexp == NULL) {
+ debug_task ("regexp contains only header and it is found %s",
+ re->header);
+ rspamd_task_re_cache_add (task, re->regexp_text, 1);
+ return 1;
+ }
+ /* Iterate through headers */
+ cur = headerlist;
+ while (cur) {
+ rh = cur->data;
+ debug_task ("found header \"%s\" with value \"%s\"",
+ re->header, rh->decoded);
+ regexp = re->regexp;
+
+ if (re->type == REGEXP_RAW_HEADER) {
+ in = rh->value;
+ raw = TRUE;
+ }
+ else {
+ in = rh->decoded;
+ /* Validate input */
+ if (!in || !g_utf8_validate (in, -1, NULL)) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ }
+
+ /* Match re */
+ if (in && rspamd_mime_regexp_element_process (task, re, in,
+ strlen (in), raw)) {
+
+ return 1;
+ }
+
+ cur = g_list_next (cur);
+ }
+
+ rspamd_task_re_cache_add (task, re->regexp_text, 0);
+ }
+ break;
+ case REGEXP_MIME:
+ debug_task ("checking mime regexp: %s", re->regexp_text);
+ /* Iterate throught text parts */
+ cur = g_list_first (task->text_parts);
+ while (cur) {
+ part = (struct mime_text_part *)cur->data;
+ /* Skip empty parts */
+ if (part->is_empty) {
+ cur = g_list_next (cur);
+ continue;
+ }
+
+ /* Check raw flags */
+ if (part->is_raw) {
+ raw = TRUE;
+ }
+ /* Select data for regexp */
+ if (raw) {
+ ct = part->orig->data;
+ clen = part->orig->len;
+ }
+ else {
+ ct = part->content->data;
+ clen = part->content->len;
+ }
+ /* If we have limit, apply regexp so much times as we can */
+ if (rspamd_mime_regexp_element_process (task, re, ct, clen, raw)) {
+ return 1;
+ }
+ cur = g_list_next (cur);
+ }
+ rspamd_task_re_cache_add (task, re->regexp_text, 0);
+ break;
+ case REGEXP_MESSAGE:
+ debug_task ("checking message regexp: %s", re->regexp_text);
+ raw = TRUE;
+ ct = (guint8 *)task->msg.start;
+ clen = task->msg.len;
+
+ if (rspamd_mime_regexp_element_process (task, re, ct, clen, raw)) {
+ return 1;
+ }
+ rspamd_task_re_cache_add (task, re->regexp_text, 0);
+ break;
+ case REGEXP_URL:
+ debug_task ("checking url regexp: %s", re->regexp_text);
+ regexp = re->regexp;
+ callback_param.task = task;
+ callback_param.regexp = regexp;
+ callback_param.re = re;
+ callback_param.found = FALSE;
+ if (task->urls) {
+ g_tree_foreach (task->urls, tree_url_callback, &callback_param);
+ }
+ if (task->emails && callback_param.found == FALSE) {
+ g_tree_foreach (task->emails, tree_url_callback, &callback_param);
+ }
+ if (callback_param.found == FALSE) {
+ rspamd_task_re_cache_add (task, re->regexp_text, 0);
+ }
+ break;
+ default:
+ msg_warn ("bad error detected: %p is not a valid regexp object", re);
+ break;
+ }
+
+ return 0;
+}
+
+
+static gint
+rspamd_mime_expr_priority (rspamd_expression_atom_t *atom)
+{
+ /* TODO: implement priorities for mime expressions */
+ return 0;
+}
+
+static void
+rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom)
+{
+ struct rspamd_mime_atom *mime_atom = atom->data;
+ guint i;
+ struct expression_argument *arg;
+
+ if (mime_atom) {
+ if (mime_atom->is_function) {
+ /* Need to cleanup arguments */
+ for (i = 0; i < mime_atom->d.func->args->len; i ++) {
+ arg = &g_array_index (mime_atom->d.func->args,
+ struct expression_argument, i);
+
+ if (arg->type == EXPRESSION_ARGUMENT_NORMAL) {
+ g_free (arg->data);
+ }
+ }
+ g_array_free (mime_atom->d.func->args, TRUE);
+ }
+ /* XXX: regexp shouldn't be special */
+ g_slice_free1 (sizeof (*mime_atom), mime_atom);
+ }
+}
+
+static gboolean
+rspamd_mime_expr_process_function (struct rspamd_function_atom * func,
+ struct rspamd_task * task,
+ lua_State *L)
+{
+ struct _fl *selected, key;
+
+ key.name = func->name;
+
+ selected = bsearch (&key,
+ list_ptr,
+ functions_number,
+ sizeof (struct _fl),
+ fl_cmp);
+ if (selected == NULL) {
+ /* Try to check lua function */
+ return FALSE;
+ }
+
+ return selected->func (task, func->args, selected->user_data);
+}
+
+static gint
+rspamd_mime_expr_process (gpointer input, rspamd_expression_atom_t *atom)
+{
+ struct rspamd_task *task = input;
+ struct rspamd_mime_atom *mime_atom;
+ gint ret = 0;
+
+ g_assert (task != NULL);
+ g_assert (atom != NULL);
+
+ mime_atom = atom->data;
+
+ if (!mime_atom->is_function) {
+ ret = rspamd_mime_expr_process_regexp (mime_atom->d.re, task);
+ }
+ else {
+ ret = rspamd_mime_expr_process_function (mime_atom->d.func, task,
+ task->cfg->lua_state);
+ }
+
+ return ret;
+}
+
+void
+register_expression_function (const gchar *name,
+ rspamd_internal_func_t func,
+ void *user_data)
+{
+ static struct _fl *new;
+
+ functions_number++;
+
+ new = g_new (struct _fl, functions_number);
+ memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
+ if (list_allocated) {
+ g_free (list_ptr);
+ }
+
+ list_allocated = TRUE;
+ new[functions_number - 1].name = name;
+ new[functions_number - 1].func = func;
+ new[functions_number - 1].user_data = user_data;
+ qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
+ list_ptr = new;
+}
+
+gboolean
+rspamd_compare_encoding (struct rspamd_task *task, GArray * args, void *unused)
+{
+ struct expression_argument *arg;
+
+ if (args == NULL || task == NULL) {
+ return FALSE;
+ }
+
+ arg = &g_array_index (args, struct expression_argument, 0);
+ if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+ msg_warn ("invalid argument to function is passed");
+ return FALSE;
+ }
+
+ /* XXX: really write this function */
+ return TRUE;
+}
+
+gboolean
+rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused)
+{
+ struct expression_argument *arg;
+ GList *headerlist;
+
+ if (args == NULL || task == NULL) {
+ return FALSE;
+ }
+
+ arg = &g_array_index (args, struct expression_argument, 0);
+ if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+ msg_warn ("invalid argument to function is passed");
+ return FALSE;
+ }
+
+ debug_task ("try to get header %s", (gchar *)arg->data);
+ headerlist = message_get_header (task,
+ (gchar *)arg->data,
+ FALSE);
+ if (headerlist) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/*
+ * This function is designed to find difference between text/html and text/plain parts
+ * It takes one argument: difference threshold, if we have two text parts, compare
+ * its hashes and check for threshold, if value is greater than threshold, return TRUE
+ * and return FALSE otherwise.
+ */
+gboolean
+rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
+{
+ gint threshold, threshold2 = -1, diff;
+ struct mime_text_part *p1, *p2;
+ GList *cur;
+ struct expression_argument *arg;
+ GMimeObject *parent;
+ const GMimeContentType *ct;
+ gint *pdiff;
+
+ if (args == NULL || args->len == 0) {
+ debug_task ("no threshold is specified, assume it 100");
+ threshold = 100;
+ }
+ else {
+ errno = 0;
+ arg = &g_array_index (args, struct expression_argument, 0);
+ if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+ msg_warn ("invalid argument to function is passed");
+ return FALSE;
+ }
+
+ threshold = strtoul ((gchar *)arg->data, NULL, 10);
+ if (errno != 0) {
+ msg_info ("bad numeric value for threshold \"%s\", assume it 100",
+ (gchar *)arg->data);
+ threshold = 100;
+ }
+ if (args->len == 1) {
+ arg = &g_array_index (args, struct expression_argument, 1);
+ if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+ msg_warn ("invalid argument to function is passed");
+ return FALSE;
+ }
+
+ errno = 0;
+ threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
+ if (errno != 0) {
+ msg_info ("bad numeric value for threshold \"%s\", ignore it",
+ (gchar *)arg->data);
+ threshold2 = -1;
+ }
+ }
+ }
+
+ if ((pdiff =
+ rspamd_mempool_get_variable (task->task_pool,
+ "parts_distance")) != NULL) {
+ diff = *pdiff;
+ if (diff != -1) {
+ if (threshold2 > 0) {
+ if (diff >=
+ MIN (threshold,
+ threshold2) && diff < MAX (threshold, threshold2)) {
+ return TRUE;
+ }
+ }
+ else {
+ if (diff <= threshold) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+ }
+ else {
+ return FALSE;
+ }
+ }
+
+ if (g_list_length (task->text_parts) == 2) {
+ cur = g_list_first (task->text_parts);
+ p1 = cur->data;
+ cur = g_list_next (cur);
+ pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint));
+ *pdiff = -1;
+
+ if (cur == NULL) {
+ msg_info ("bad parts list");
+ return FALSE;
+ }
+ p2 = cur->data;
+ /* First of all check parent object */
+ if (p1->parent && p1->parent == p2->parent) {
+ parent = p1->parent;
+ ct = g_mime_object_get_content_type (parent);
+#ifndef GMIME24
+ if (ct == NULL ||
+ !g_mime_content_type_is_type (ct, "multipart", "alternative")) {
+#else
+ if (ct == NULL ||
+ !g_mime_content_type_is_type ((GMimeContentType *)ct,
+ "multipart", "alternative")) {
+#endif
+ debug_task (
+ "two parts are not belong to multipart/alternative container, skip check");
+ rspamd_mempool_set_variable (task->task_pool,
+ "parts_distance",
+ pdiff,
+ NULL);
+ return FALSE;
+ }
+ }
+ else {
+ debug_task (
+ "message contains two parts but they are in different multi-parts");
+ rspamd_mempool_set_variable (task->task_pool,
+ "parts_distance",
+ pdiff,
+ NULL);
+ return FALSE;
+ }
+ if (!p1->is_empty && !p2->is_empty) {
+ if (p1->diff_str != NULL && p2->diff_str != NULL) {
+ diff = rspamd_diff_distance_normalized (p1->diff_str,
+ p2->diff_str);
+ }
+ else {
+ diff = rspamd_fuzzy_compare_parts (p1, p2);
+ }
+ debug_task (
+ "got likeliness between parts of %d%%, threshold is %d%%",
+ diff,
+ threshold);
+ *pdiff = diff;
+ rspamd_mempool_set_variable (task->task_pool,
+ "parts_distance",
+ pdiff,
+ NULL);
+ if (threshold2 > 0) {
+ if (diff >=
+ MIN (threshold,
+ threshold2) && diff < MAX (threshold, threshold2)) {
+ return TRUE;
+ }
+ }
+ else {
+ if (diff <= threshold) {
+ return TRUE;
+ }
+ }
+ }
+ else if ((p1->is_empty &&
+ !p2->is_empty) || (!p1->is_empty && p2->is_empty)) {
+ /* Empty and non empty parts are different */
+ *pdiff = 0;
+ rspamd_mempool_set_variable (task->task_pool,
+ "parts_distance",
+ pdiff,
+ NULL);
+ return TRUE;
+ }
+ }
+ else {
+ debug_task (
+ "message has too many text parts, so do not try to compare them with each other");
+ rspamd_mempool_set_variable (task->task_pool,
+ "parts_distance",
+ pdiff,
+ NULL);
+ return FALSE;
+ }
+
+ rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff,
+ NULL);
+ return FALSE;
+}
+
+struct addr_list {
+ const gchar *name;
+ const gchar *addr;
+};
+
+#define COMPARE_RCPT_LEN 3
+#define MIN_RCPT_TO_COMPARE 7
+
+gboolean
+rspamd_recipients_distance (struct rspamd_task *task, GArray * args,
+ void *unused)
+{
+ struct expression_argument *arg;
+ InternetAddressList *cur;
+ double threshold;
+ struct addr_list *ar;
+ gchar *c;
+ gint num, i, j, hits = 0, total = 0;
+
+ if (args == NULL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ arg = &g_array_index (args, struct expression_argument, 0);
+ if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+ msg_warn ("invalid argument to function is passed");
+ return FALSE;
+ }
+
+ errno = 0;
+ threshold = strtod ((gchar *)arg->data, NULL);
+
+ if (errno != 0) {
+ msg_warn ("invalid numeric value '%s': %s",
+ (gchar *)arg->data,
+ strerror (errno));
+ return FALSE;
+ }
+
+ if (!task->rcpt_mime) {
+ return FALSE;
+ }
+
+ num = internet_address_list_length (task->rcpt_mime);
+
+ if (num < MIN_RCPT_TO_COMPARE) {
+ return FALSE;
+ }
+ ar =
+ rspamd_mempool_alloc0 (task->task_pool, num *
+ sizeof (struct addr_list));
+
+ /* Fill array */
+ cur = task->rcpt_mime;
+#ifdef GMIME24
+ for (i = 0; i < num; i++) {
+ InternetAddress *iaelt =
+ internet_address_list_get_address(cur, i);
+ InternetAddressMailbox *iamb =
+ INTERNET_ADDRESS_IS_MAILBOX(iaelt) ?
+ INTERNET_ADDRESS_MAILBOX (iaelt) : NULL;
+ if (iamb) {
+ ar[i].name = internet_address_mailbox_get_addr (iamb);
+ if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
+ ar[i].addr = c + 1;
+ }
+ }
+ }
+#else
+ InternetAddress *addr;
+ i = 0;
+ while (cur) {
+ addr = internet_address_list_get_address (cur);
+ if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
+ ar[i].name = rspamd_mempool_strdup (task->task_pool,
+ internet_address_get_addr (addr));
+ if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
+ *c = '\0';
+ ar[i].addr = c + 1;
+ }
+ cur = internet_address_list_next (cur);
+ i++;
+ }
+ else {
+ cur = internet_address_list_next (cur);
+ }
+ }
+#endif
+
+ /* Cycle all elements in array */
+ for (i = 0; i < num; i++) {
+ for (j = i + 1; j < num; j++) {
+ if (ar[i].name && ar[j].name &&
+ g_ascii_strncasecmp (ar[i].name, ar[j].name,
+ COMPARE_RCPT_LEN) == 0) {
+ /* Common name part */
+ hits++;
+ }
+ else if (ar[i].addr && ar[j].addr &&
+ g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) {
+ /* Common address part, but different name */
+ hits++;
+ }
+ total++;
+ }
+ }
+
+ if ((double)(hits * num / 2.) / (double)total >= threshold) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_has_only_html_part (struct rspamd_task * task, GArray * args,
+ void *unused)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ gboolean res = FALSE;
+
+ cur = g_list_first (task->text_parts);
+ while (cur) {
+ p = cur->data;
+ if (p->is_html) {
+ res = TRUE;
+ }
+ else {
+ res = FALSE;
+ break;
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+}
+
+static gboolean
+is_recipient_list_sorted (const InternetAddressList * ia)
+{
+ const InternetAddressList *cur;
+ InternetAddress *addr;
+ gboolean res = TRUE;
+ struct addr_list current = { NULL, NULL }, previous = {
+ NULL, NULL
+ };
+#ifdef GMIME24
+ gint num, i;
+#endif
+
+ /* Do not check to short address lists */
+ if (internet_address_list_length ((InternetAddressList *)ia) <
+ MIN_RCPT_TO_COMPARE) {
+ return FALSE;
+ }
+#ifdef GMIME24
+ num = internet_address_list_length ((InternetAddressList *)ia);
+ cur = ia;
+ for (i = 0; i < num; i++) {
+ addr =
+ internet_address_list_get_address ((InternetAddressList *)cur, i);
+ current.addr = (gchar *)internet_address_get_name (addr);
+ if (previous.addr != NULL) {
+ if (current.addr &&
+ g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
+ res = FALSE;
+ break;
+ }
+ }
+ previous.addr = current.addr;
+ }
+#else
+ cur = ia;
+ while (cur) {
+ addr = internet_address_list_get_address (cur);
+ if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
+ current.addr = internet_address_get_addr (addr);
+ if (previous.addr != NULL) {
+ if (current.addr &&
+ g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
+ res = FALSE;
+ break;
+ }
+ }
+ previous.addr = current.addr;
+ }
+ cur = internet_address_list_next (cur);
+ }
+#endif
+
+ return res;
+}
+
+gboolean
+rspamd_is_recipients_sorted (struct rspamd_task * task,
+ GArray * args,
+ void *unused)
+{
+ /* Check all types of addresses */
+ if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
+ GMIME_RECIPIENT_TYPE_TO)) == TRUE) {
+ return TRUE;
+ }
+ if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
+ GMIME_RECIPIENT_TYPE_BCC)) == TRUE) {
+ return TRUE;
+ }
+ if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
+ GMIME_RECIPIENT_TYPE_CC)) == TRUE) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_compare_transfer_encoding (struct rspamd_task * task,
+ GArray * args,
+ void *unused)
+{
+ GMimeObject *part;
+#ifndef GMIME24
+ GMimePartEncodingType enc_req, part_enc;
+#else
+ GMimeContentEncoding enc_req, part_enc;
+#endif
+ struct expression_argument *arg;
+
+ if (args == NULL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ arg = &g_array_index (args, struct expression_argument, 0);
+ if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+ msg_warn ("invalid argument to function is passed");
+ return FALSE;
+ }
+
+#ifndef GMIME24
+ enc_req = g_mime_part_encoding_from_string (arg->data);
+ if (enc_req == GMIME_PART_ENCODING_DEFAULT) {
+#else
+ enc_req = g_mime_content_encoding_from_string (arg->data);
+ if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) {
+#endif
+ msg_warn ("bad encoding type: %s", (gchar *)arg->data);
+ return FALSE;
+ }
+
+ part = g_mime_message_get_mime_part (task->message);
+ if (part) {
+ if (GMIME_IS_PART (part)) {
+#ifndef GMIME24
+ part_enc = g_mime_part_get_encoding (GMIME_PART (part));
+ if (part_enc == GMIME_PART_ENCODING_DEFAULT) {
+ /* Assume 7bit as default transfer encoding */
+ part_enc = GMIME_PART_ENCODING_7BIT;
+ }
+#else
+ part_enc = g_mime_part_get_content_encoding (GMIME_PART (part));
+ if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) {
+ /* Assume 7bit as default transfer encoding */
+ part_enc = GMIME_CONTENT_ENCODING_7BIT;
+ }
+#endif
+
+
+ debug_task ("got encoding in part: %d and compare with %d",
+ (gint)part_enc,
+ (gint)enc_req);
+#ifndef GMIME24
+ g_object_unref (part);
+#endif
+
+ return part_enc == enc_req;
+ }
+#ifndef GMIME24
+ g_object_unref (part);
+#endif
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ gboolean res = TRUE;
+
+ cur = g_list_first (task->text_parts);
+ while (cur) {
+ p = cur->data;
+ if (!p->is_empty && p->is_html) {
+ if (p->is_balanced) {
+ res = TRUE;
+ }
+ else {
+ res = FALSE;
+ break;
+ }
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+
+}
+
+struct html_callback_data {
+ struct html_tag *tag;
+ gboolean *res;
+};
+
+static gboolean
+search_html_node_callback (GNode * node, gpointer data)
+{
+ struct html_callback_data *cd = data;
+ struct html_node *nd;
+
+ nd = node->data;
+ if (nd) {
+ if (nd->tag == cd->tag) {
+ *cd->res = TRUE;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ struct expression_argument *arg;
+ struct html_tag *tag;
+ gboolean res = FALSE;
+ struct html_callback_data cd;
+
+ if (args == NULL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ arg = &g_array_index (args, struct expression_argument, 0);
+ if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+ msg_warn ("invalid argument to function is passed");
+ return FALSE;
+ }
+
+ tag = get_tag_by_name (arg->data);
+ if (tag == NULL) {
+ msg_warn ("unknown tag type passed as argument: %s",
+ (gchar *)arg->data);
+ return FALSE;
+ }
+
+ cur = g_list_first (task->text_parts);
+ cd.res = &res;
+ cd.tag = tag;
+
+ while (cur && res == FALSE) {
+ p = cur->data;
+ if (!p->is_empty && p->is_html && p->html_nodes) {
+ g_node_traverse (p->html_nodes,
+ G_PRE_ORDER,
+ G_TRAVERSE_ALL,
+ -1,
+ search_html_node_callback,
+ &cd);
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+
+}
+
+gboolean
+rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ gboolean res = FALSE;
+
+ cur = g_list_first (task->text_parts);
+
+ while (cur && res == FALSE) {
+ p = cur->data;
+ if (!p->is_empty && p->is_html && p->html_nodes == NULL) {
+ res = TRUE;
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+
+}
+
+static gboolean
+rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused)
+{
+ struct expression_argument *arg;
+
+ if (args == NULL || task == NULL) {
+ return FALSE;
+ }
+
+ arg = &g_array_index (args, struct expression_argument, 0);
+ if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+ msg_warn ("invalid argument to function is passed");
+ return FALSE;
+ }
+
+ return g_hash_table_lookup (task->raw_headers, arg->data) != NULL;
+}
+
+static gboolean
+match_smtp_data (struct rspamd_task *task,
+ struct expression_argument *arg,
+ const gchar *what)
+{
+ rspamd_regexp_t *re;
+ gint r;
+
+ if (arg->type == EXPRESSION_ARGUMENT_REGEXP) {
+ /* This is a regexp */
+ re = arg->data;
+ if (re == NULL) {
+ msg_warn ("cannot compile regexp for function");
+ return FALSE;
+ }
+
+ if ((r = rspamd_task_re_cache_check (task,
+ rspamd_regexp_get_pattern (re))) == -1) {
+ r = rspamd_regexp_search (re, what, 0, NULL, NULL, FALSE);
+ rspamd_task_re_cache_add (task, rspamd_regexp_get_pattern (re), r);
+ }
+ return r;
+ }
+ else if (arg->type == EXPRESSION_ARGUMENT_NORMAL &&
+ g_ascii_strcasecmp (arg->data, what) == 0) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused)
+{
+ struct expression_argument *arg;
+ InternetAddressList *ia = NULL;
+ const gchar *type, *what = NULL;
+ gint i, ialen;
+
+ if (args == NULL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ arg = &g_array_index (args, struct expression_argument, 0);
+
+ if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+ else {
+ type = arg->data;
+ switch (*type) {
+ case 'f':
+ case 'F':
+ if (g_ascii_strcasecmp (type, "from") == 0) {
+ what = rspamd_task_get_sender (task);
+ }
+ else {
+ msg_warn ("bad argument to function: %s", type);
+ return FALSE;
+ }
+ break;
+ case 'h':
+ case 'H':
+ if (g_ascii_strcasecmp (type, "helo") == 0) {
+ what = task->helo;
+ }
+ else {
+ msg_warn ("bad argument to function: %s", type);
+ return FALSE;
+ }
+ break;
+ case 'u':
+ case 'U':
+ if (g_ascii_strcasecmp (type, "user") == 0) {
+ what = task->user;
+ }
+ else {
+ msg_warn ("bad argument to function: %s", type);
+ return FALSE;
+ }
+ break;
+ case 's':
+ case 'S':
+ if (g_ascii_strcasecmp (type, "subject") == 0) {
+ what = task->subject;
+ }
+ else {
+ msg_warn ("bad argument to function: %s", type);
+ return FALSE;
+ }
+ break;
+ case 'r':
+ case 'R':
+ if (g_ascii_strcasecmp (type, "rcpt") == 0) {
+ ia = task->rcpt_mime;
+ }
+ else {
+ msg_warn ("bad argument to function: %s", type);
+ return FALSE;
+ }
+ break;
+ default:
+ msg_warn ("bad argument to function: %s", type);
+ return FALSE;
+ }
+ }
+
+ if (what == NULL && ia == NULL) {
+ /* Not enough data so regexp would NOT be found anyway */
+ return FALSE;
+ }
+
+ /* We would process only one more argument, others are ignored */
+ if (args->len >= 2) {
+ arg = &g_array_index (args, struct expression_argument, 1);
+ if (arg) {
+ if (what != NULL) {
+ return match_smtp_data (task, arg, what);
+ }
+ else {
+ if (ia != NULL) {
+ ialen = internet_address_list_length(ia);
+ for (i = 0; i < ialen; i ++) {
+ InternetAddress *iaelt =
+ internet_address_list_get_address(ia, i);
+ InternetAddressMailbox *iamb =
+ INTERNET_ADDRESS_IS_MAILBOX(iaelt) ?
+ INTERNET_ADDRESS_MAILBOX (iaelt) : NULL;
+ if (iamb &&
+ match_smtp_data (task, arg,
+ internet_address_mailbox_get_addr(iamb))) {
+ return TRUE;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_content_type_compare_param (struct rspamd_task * task,
+ GArray * args,
+ void *unused)
+{
+ const gchar *param_name;
+ const gchar *param_data;
+ rspamd_regexp_t *re;
+ struct expression_argument *arg, *arg1, *arg_pattern;
+ GMimeObject *part;
+ GMimeContentType *ct;
+ gint r;
+ gboolean recursive = FALSE, result = FALSE;
+ GList *cur = NULL;
+ struct mime_part *cur_part;
+
+ if (args == NULL || args->len < 2) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ arg = &g_array_index (args, struct expression_argument, 0);
+ g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
+ param_name = arg->data;
+ arg_pattern = &g_array_index (args, struct expression_argument, 1);
+
+
+ part = g_mime_message_get_mime_part (task->message);
+ if (part) {
+ ct = (GMimeContentType *)g_mime_object_get_content_type (part);
+ if (args->len >= 3) {
+ arg1 = &g_array_index (args, struct expression_argument, 2);
+ if (g_ascii_strncasecmp (arg1->data, "true",
+ sizeof ("true") - 1) == 0) {
+ recursive = TRUE;
+ }
+ }
+ else {
+ /*
+ * If user did not specify argument, let's assume that he wants
+ * recursive search if mime part is multipart/mixed
+ */
+ if (g_mime_content_type_is_type (ct, "multipart", "*")) {
+ recursive = TRUE;
+ }
+ }
+
+ if (recursive) {
+ cur = task->parts;
+ }
+
+#ifndef GMIME24
+ g_object_unref (part);
+#endif
+ for (;; ) {
+ if ((param_data =
+ g_mime_content_type_get_parameter ((GMimeContentType *)ct,
+ param_name)) == NULL) {
+ result = FALSE;
+ }
+ else {
+ if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
+ re = arg_pattern->data;
+
+ if ((r = rspamd_task_re_cache_check (task,
+ rspamd_regexp_get_pattern (re))) == -1) {
+ r = rspamd_regexp_search (re, param_data, 0,
+ NULL, NULL, FALSE);
+ rspamd_task_re_cache_add (task,
+ rspamd_regexp_get_pattern (re), r);
+ }
+ }
+ else {
+ /* Just do strcasecmp */
+ if (g_ascii_strcasecmp (param_data, arg_pattern->data) == 0) {
+ return TRUE;
+ }
+ }
+ }
+ /* Get next part */
+ if (!recursive) {
+ return result;
+ }
+ else if (cur != NULL) {
+ cur_part = cur->data;
+ if (cur_part->type != NULL) {
+ ct = cur_part->type;
+ }
+ cur = g_list_next (cur);
+ }
+ else {
+ /* All is done */
+ return result;
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_content_type_has_param (struct rspamd_task * task,
+ GArray * args,
+ void *unused)
+{
+ gchar *param_name;
+ const gchar *param_data;
+ struct expression_argument *arg, *arg1;
+ GMimeObject *part;
+ GMimeContentType *ct;
+ gboolean recursive = FALSE, result = FALSE;
+ GList *cur = NULL;
+ struct mime_part *cur_part;
+
+ if (args == NULL || args->len < 1) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ arg = &g_array_index (args, struct expression_argument, 0);
+ g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
+ param_name = arg->data;
+
+ part = g_mime_message_get_mime_part (task->message);
+ if (part) {
+ ct = (GMimeContentType *)g_mime_object_get_content_type (part);
+ if (args->len >= 2) {
+ arg1 = &g_array_index (args, struct expression_argument, 2);
+ if (g_ascii_strncasecmp (arg1->data, "true",
+ sizeof ("true") - 1) == 0) {
+ recursive = TRUE;
+ }
+ }
+ else {
+ /*
+ * If user did not specify argument, let's assume that he wants
+ * recursive search if mime part is multipart/mixed
+ */
+ if (g_mime_content_type_is_type (ct, "multipart", "*")) {
+ recursive = TRUE;
+ }
+ }
+
+ if (recursive) {
+ cur = task->parts;
+ }
+
+#ifndef GMIME24
+ g_object_unref (part);
+#endif
+ for (;; ) {
+ if ((param_data =
+ g_mime_content_type_get_parameter ((GMimeContentType *)ct,
+ param_name)) != NULL) {
+ return TRUE;
+ }
+ /* Get next part */
+ if (!recursive) {
+ return result;
+ }
+ else if (cur != NULL) {
+ cur_part = cur->data;
+ if (cur_part->type != NULL) {
+ ct = cur_part->type;
+ }
+ cur = g_list_next (cur);
+ }
+ else {
+ /* All is done */
+ return result;
+ }
+ }
+
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_content_type_check (struct rspamd_task *task,
+ GArray * args,
+ gboolean check_subtype)
+{
+ const gchar *param_data;
+ rspamd_regexp_t *re;
+ struct expression_argument *arg1, *arg_pattern;
+ GMimeObject *part;
+ GMimeContentType *ct;
+ gint r;
+ gboolean recursive = FALSE, result = FALSE;
+ GList *cur = NULL;
+ struct mime_part *cur_part;
+
+ if (args == NULL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+ arg_pattern = &g_array_index (args, struct expression_argument, 1);
+
+ part = g_mime_message_get_mime_part (task->message);
+ if (part) {
+ ct = (GMimeContentType *)g_mime_object_get_content_type (part);
+ if (args->len >= 2) {
+ arg1 = &g_array_index (args, struct expression_argument, 2);
+ if (g_ascii_strncasecmp (arg1->data, "true",
+ sizeof ("true") - 1) == 0) {
+ recursive = TRUE;
+ }
+ }
+ else {
+ /*
+ * If user did not specify argument, let's assume that he wants
+ * recursive search if mime part is multipart/mixed
+ */
+ if (g_mime_content_type_is_type (ct, "multipart", "*")) {
+ recursive = TRUE;
+ }
+ }
+
+ if (recursive) {
+ cur = task->parts;
+ }
+
+#ifndef GMIME24
+ g_object_unref (part);
+#endif
+ for (;;) {
+
+ if (check_subtype) {
+ param_data = ct->subtype;
+ }
+ else {
+ param_data = ct->type;
+ }
+
+ if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
+ re = arg_pattern->data;
+
+ if ((r = rspamd_task_re_cache_check (task,
+ rspamd_regexp_get_pattern (re))) == -1) {
+ r = rspamd_regexp_search (re, param_data, 0,
+ NULL, NULL, FALSE);
+ rspamd_task_re_cache_add (task,
+ rspamd_regexp_get_pattern (re), r);
+ }
+ }
+ else {
+ /* Just do strcasecmp */
+ if (g_ascii_strcasecmp (param_data, arg_pattern->data) == 0) {
+ return TRUE;
+ }
+ }
+ /* Get next part */
+ if (!recursive) {
+ return result;
+ }
+ else if (cur != NULL) {
+ cur_part = cur->data;
+ if (cur_part->type != NULL) {
+ ct = cur_part->type;
+ }
+ cur = g_list_next (cur);
+ }
+ else {
+ /* All is done */
+ return result;
+ }
+ }
+
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_content_type_is_type (struct rspamd_task * task,
+ GArray * args,
+ void *unused)
+{
+ return rspamd_content_type_check (task, args, FALSE);
+}
+
+static gboolean
+rspamd_content_type_is_subtype (struct rspamd_task * task,
+ GArray * args,
+ void *unused)
+{
+ return rspamd_content_type_check (task, args, TRUE);
+}
+
+static gboolean
+compare_subtype (struct rspamd_task *task, GMimeContentType * ct,
+ struct expression_argument *subtype)
+{
+ rspamd_regexp_t *re;
+ gint r = 0;
+
+ if (subtype == NULL || ct == NULL) {
+ msg_warn ("invalid parameters passed");
+ return FALSE;
+ }
+ if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) {
+ re = subtype->data;
+
+ if ((r = rspamd_task_re_cache_check (task,
+ rspamd_regexp_get_pattern (re))) == -1) {
+ r = rspamd_regexp_search (re, ct->subtype, 0,
+ NULL, NULL, FALSE);
+ rspamd_task_re_cache_add (task,
+ rspamd_regexp_get_pattern (re), r);
+ }
+ }
+ else {
+ /* Just do strcasecmp */
+ if (ct->subtype && g_ascii_strcasecmp (ct->subtype, subtype->data) == 0) {
+ return TRUE;
+ }
+ }
+
+ return r;
+}
+
+static gboolean
+compare_len (struct mime_part *part, guint min, guint max)
+{
+ if (min == 0 && max == 0) {
+ return TRUE;
+ }
+
+ if (min == 0) {
+ return part->content->len <= max;
+ }
+ else if (max == 0) {
+ return part->content->len >= min;
+ }
+ else {
+ return part->content->len >= min && part->content->len <= max;
+ }
+}
+
+static gboolean
+common_has_content_part (struct rspamd_task * task,
+ struct expression_argument *param_type,
+ struct expression_argument *param_subtype,
+ gint min_len,
+ gint max_len)
+{
+ rspamd_regexp_t *re;
+ struct mime_part *part;
+ GList *cur;
+ GMimeContentType *ct;
+ gint r;
+
+ cur = g_list_first (task->parts);
+ while (cur) {
+ part = cur->data;
+ ct = part->type;
+ if (ct == NULL) {
+ cur = g_list_next (cur);
+ continue;
+ }
+
+ if (param_type->type == EXPRESSION_ARGUMENT_REGEXP) {
+ re = param_type->data;
+
+ if ((r = rspamd_task_re_cache_check (task,
+ rspamd_regexp_get_pattern (re))) == -1) {
+ r = rspamd_regexp_search (re, ct->type, 0,
+ NULL, NULL, FALSE);
+ /* Also check subtype and length of the part */
+ if (r && param_subtype) {
+ r = compare_len (part, min_len, max_len) &&
+ compare_subtype (task, ct, param_subtype);
+ }
+ rspamd_task_re_cache_add (task,
+ rspamd_regexp_get_pattern (re), r);
+ }
+ }
+ else {
+ /* Just do strcasecmp */
+ if (ct->type && g_ascii_strcasecmp (ct->type, param_type->data) == 0) {
+ if (param_subtype) {
+ if (compare_subtype (task, ct, param_subtype)) {
+ if (compare_len (part, min_len, max_len)) {
+ return TRUE;
+ }
+ }
+ }
+ else {
+ if (compare_len (part, min_len, max_len)) {
+ return TRUE;
+ }
+ }
+ }
+ }
+ cur = g_list_next (cur);
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_has_content_part (struct rspamd_task * task, GArray * args, void *unused)
+{
+ struct expression_argument *param_type = NULL, *param_subtype = NULL;
+
+ if (args == NULL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ param_type = &g_array_index (args, struct expression_argument, 0);
+ if (args->len >= 2) {
+ param_subtype = &g_array_index (args, struct expression_argument, 1);
+ }
+
+ return common_has_content_part (task, param_type, param_subtype, 0, 0);
+}
+
+static gboolean
+rspamd_has_content_part_len (struct rspamd_task * task,
+ GArray * args,
+ void *unused)
+{
+ struct expression_argument *param_type = NULL, *param_subtype = NULL;
+ gint min = 0, max = 0;
+ struct expression_argument *arg;
+
+ if (args == NULL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ param_type = &g_array_index (args, struct expression_argument, 0);
+
+ if (args->len >= 2) {
+ param_subtype = &g_array_index (args, struct expression_argument, 1);
+
+ if (args->len >= 3) {
+ arg = &g_array_index (args, struct expression_argument, 2);
+ errno = 0;
+ min = strtoul (arg->data, NULL, 10);
+ g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
+
+ if (errno != 0) {
+ msg_warn ("invalid numeric value '%s': %s",
+ (gchar *)arg->data,
+ strerror (errno));
+ return FALSE;
+ }
+
+ if (args) {
+ arg = &g_array_index (args, struct expression_argument, 3);
+ g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
+ max = strtoul (arg->data, NULL, 10);
+
+ if (errno != 0) {
+ msg_warn ("invalid numeric value '%s': %s",
+ (gchar *)arg->data,
+ strerror (errno));
+ return FALSE;
+ }
+ }
+ }
+ }
+
+ return common_has_content_part (task, param_type, param_subtype, min, max);
+}
+
+guint
+rspamd_mime_expression_set_re_limit (guint limit)
+{
+ guint ret = max_re_data;
+
+ max_re_data = limit;
+ return ret;
+}
diff --git a/src/libmime/mime_expressions.h b/src/libmime/mime_expressions.h
new file mode 100644
index 000000000..41e8b33ac
--- /dev/null
+++ b/src/libmime/mime_expressions.h
@@ -0,0 +1,49 @@
+/**
+ * @file expressions.h
+ * Rspamd expressions API
+ */
+
+#ifndef RSPAMD_EXPRESSIONS_H
+#define RSPAMD_EXPRESSIONS_H
+
+#include "config.h"
+#include "expression.h"
+
+struct rspamd_task;
+
+extern const struct rspamd_atom_subr mime_expr_subr;
+
+/**
+ * Function's argument
+ */
+struct expression_argument {
+ enum {
+ EXPRESSION_ARGUMENT_NORMAL = 0,
+ EXPRESSION_ARGUMENT_BOOL,
+ EXPRESSION_ARGUMENT_REGEXP
+ } type; /**< type of argument (text or other function) */
+ void *data; /**< pointer to its data */
+};
+
+
+typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *,
+ GArray *args, void *user_data);
+
+
+/**
+ * Register specified function to rspamd internal functions list
+ * @param name name of function
+ * @param func pointer to function
+ */
+void register_expression_function (const gchar *name,
+ rspamd_internal_func_t func,
+ void *user_data);
+
+/**
+ * Set global limit of regexp data size to be processed
+ * @param limit new limit in bytes
+ * @return old limit value
+ */
+guint rspamd_mime_expression_set_re_limit (guint limit);
+
+#endif
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index 08b70f5c9..06232cff8 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -43,18 +43,6 @@ enum rspamd_cred_type {
};
/**
- * Regexp type: /H - header, /M - mime, /U - url /X - raw header
- */
-enum rspamd_regexp_type {
- REGEXP_NONE = 0,
- REGEXP_HEADER,
- REGEXP_MIME,
- REGEXP_MESSAGE,
- REGEXP_URL,
- REGEXP_RAW_HEADER
-};
-
-/**
* Logging type
*/
enum rspamd_log_type {
@@ -64,18 +52,6 @@ enum rspamd_log_type {
};
/**
- * Regexp structure
- */
-struct rspamd_regexp_element {
- enum rspamd_regexp_type type; /**< regexp type */
- gchar *regexp_text; /**< regexp text representation */
- rspamd_regexp_t *regexp; /**< regexp structure */
- gchar *header; /**< header name for header regexps */
- gboolean is_test; /**< true if this expression must be tested */
- gboolean is_strong; /**< true if headers search must be case sensitive */
-};
-
-/**
* script module list item
*/
struct script_module {
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index 69d11f756..3cb252ae6 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -27,7 +27,7 @@
#include "utlist.h"
#include "cfg_file.h"
#include "lua/lua_common.h"
-#include "expressions.h"
+#include "expression.h"
struct rspamd_rcl_default_handler_data {
@@ -1000,7 +1000,7 @@ rspamd_rcl_composite_handler (struct rspamd_config *cfg,
GError **err)
{
const ucl_object_t *val;
- struct expression *expr;
+ struct rspamd_expression *expr;
struct rspamd_composite *composite;
const gchar *composite_name, *composite_expression;
gboolean new = TRUE;
@@ -1028,14 +1028,8 @@ rspamd_rcl_composite_handler (struct rspamd_config *cfg,
return FALSE;
}
- if ((expr =
- parse_expression (cfg->cfg_pool,
- (gchar *)composite_expression)) == NULL) {
- g_set_error (err,
- CFG_RCL_ERROR,
- EINVAL,
- "cannot parse composite expression: %s",
- composite_expression);
+ if (!rspamd_parse_expression (composite_expression, 0, &composite_expr_subr,
+ NULL, cfg->cfg_pool, err, &expr)) {
return FALSE;
}
diff --git a/src/libserver/task.c b/src/libserver/task.c
index f6eeef2b1..9ac66b65c 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -450,3 +450,37 @@ rspamd_task_add_sender (struct rspamd_task *task, const gchar *sender)
return FALSE;
}
+
+
+guint
+rspamd_task_re_cache_add (struct rspamd_task *task, const gchar *re,
+ guint value)
+{
+ guint ret = RSPAMD_TASK_CACHE_NO_VALUE;
+ gpointer p;
+
+ p = g_hash_table_lookup (task->re_cache, re);
+
+ if (p != NULL) {
+ ret = GPOINTER_TO_INT (p);
+ }
+
+ g_hash_table_insert (task->re_cache, (gpointer)re, GINT_TO_POINTER (value));
+
+ return ret;
+}
+
+guint
+rspamd_task_re_cache_check (struct rspamd_task *task, const gchar *re)
+{
+ guint ret = RSPAMD_TASK_CACHE_NO_VALUE;
+ gpointer p;
+
+ p = g_hash_table_lookup (task->re_cache, re);
+
+ if (p != NULL) {
+ ret = GPOINTER_TO_INT (p);
+ }
+
+ return ret;
+}
diff --git a/src/libserver/task.h b/src/libserver/task.h
index 799182f01..21df26535 100644
--- a/src/libserver/task.h
+++ b/src/libserver/task.h
@@ -218,5 +218,24 @@ gboolean rspamd_task_add_recipient (struct rspamd_task *task, const gchar *rcpt)
*/
gboolean rspamd_task_add_sender (struct rspamd_task *task, const gchar *sender);
+#define RSPAMD_TASK_CACHE_NO_VALUE ((guint)-1)
+
+/**
+ * Add or replace the value to the task cache of regular expressions results
+ * @param task task object
+ * @param re text value of regexp
+ * @param value value to add
+ * @return previous value of element or RSPAMD_TASK_CACHE_NO_VALUE
+ */
+guint rspamd_task_re_cache_add (struct rspamd_task *task, const gchar *re,
+ guint value);
+
+/**
+ * Check for cached result of re inside cache
+ * @param task task object
+ * @param re text value of regexp
+ * @return the current value of element or RSPAMD_TASK_CACHE_NO_VALUE
+ */
+guint rspamd_task_re_cache_check (struct rspamd_task *task, const gchar *re);
#endif /* TASK_H_ */
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c
index b8bd4454c..bf1a1762d 100644
--- a/src/libutil/regexp.c
+++ b/src/libutil/regexp.c
@@ -436,7 +436,7 @@ rspamd_regexp_get_ud (rspamd_regexp_t *re)
return re->ud;
}
-static gboolean
+gboolean
rspamd_regexp_equal (gconstpointer a, gconstpointer b)
{
const guchar *ia = a, *ib = b;
@@ -444,7 +444,7 @@ rspamd_regexp_equal (gconstpointer a, gconstpointer b)
return (memcmp (ia, ib, sizeof (regexp_id_t)) == 0);
}
-static guint32
+guint32
rspamd_regexp_hash (gconstpointer a)
{
const guchar *ia = a;
diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h
index 3c08de71a..fc236c1b3 100644
--- a/src/libutil/regexp.h
+++ b/src/libutil/regexp.h
@@ -90,6 +90,13 @@ void rspamd_regexp_set_ud (rspamd_regexp_t *re, gpointer ud);
gpointer rspamd_regexp_get_ud (rspamd_regexp_t *re);
/**
+ * Get regexp ID suitable for hashing
+ * @param re
+ * @return
+ */
+gpointer rspamd_regexp_get_id (rspamd_regexp_t *re);
+
+/**
* Get pattern for the specified regexp object
* @param re
* @return
@@ -152,6 +159,21 @@ gboolean rspamd_regexp_cache_remove (struct rspamd_regexp_cache *cache,
void rspamd_regexp_cache_destroy (struct rspamd_regexp_cache *cache);
/**
+ * Return the value for regexp hash based on its ID
+ * @param a
+ * @return
+ */
+guint32 rspamd_regexp_hash (gconstpointer a);
+
+/**
+ * Compare two regexp objects based on theirs ID
+ * @param a
+ * @param b
+ * @return
+ */
+gboolean rspamd_regexp_equal (gconstpointer a, gconstpointer b);
+
+/**
* Initialize superglobal regexp cache and library
*/
void rspamd_regexp_library_init (void);
diff --git a/src/lua/lua_cfg_file.c b/src/lua/lua_cfg_file.c
index c29b4b2b1..c73173f97 100644
--- a/src/lua/lua_cfg_file.c
+++ b/src/lua/lua_cfg_file.c
@@ -23,8 +23,9 @@
*/
#include "lua_common.h"
-#include "expressions.h"
#include "symbols_cache.h"
+#include "expression.h"
+#include "filter.h"
#ifdef HAVE_SYS_UTSNAME_H
#include <sys/utsname.h>
#endif
@@ -140,9 +141,10 @@ rspamd_lua_post_load_config (struct rspamd_config *cfg)
lua_State *L = cfg->lua_state;
const gchar *name, *val;
gchar *sym;
- struct expression *expr, *old_expr;
+ struct rspamd_expression *expr, *old_expr;
ucl_object_t *obj;
gsize keylen;
+ GError *err = NULL;
/* First check all module options that may be overriden in 'config' global */
lua_getglobal (L, "config");
@@ -193,10 +195,12 @@ rspamd_lua_post_load_config (struct rspamd_config *cfg)
if (name != NULL && lua_isstring (L, -1)) {
val = lua_tostring (L, -1);
sym = rspamd_mempool_strdup (cfg->cfg_pool, name);
- if ((expr =
- parse_expression (cfg->cfg_pool,
- rspamd_mempool_strdup (cfg->cfg_pool, val))) == NULL) {
- msg_err ("cannot parse composite expression: %s", val);
+ if (!rspamd_parse_expression (val, 0, &composite_expr_subr, NULL,
+ cfg->cfg_pool, &err, &expr)) {
+ msg_err ("cannot parse composite expression '%s': %s", val,
+ err->message);
+ g_error_free (err);
+ err = NULL;
continue;
}
/* Now check hash table for this composite */
diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c
index 63d34e4e6..d970325d1 100644
--- a/src/lua/lua_common.c
+++ b/src/lua/lua_common.c
@@ -23,7 +23,6 @@
*/
#include "lua_common.h"
-#include "expressions.h"
/* Lua module init function */
#define MODULE_INIT_FUNC "module_init"
diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c
index 5116fff93..6f77612fb 100644
--- a/src/lua/lua_config.c
+++ b/src/lua/lua_config.c
@@ -24,11 +24,11 @@
#include "lua_common.h"
-#include "expressions.h"
#include "map.h"
#include "message.h"
#include "radix.h"
#include "trie.h"
+#include "expression.h"
/***
* This module is used to configure rspamd and is normally available as global
@@ -70,27 +70,6 @@ LUA_FUNCTION_DEF (config, get_all_opt);
*/
LUA_FUNCTION_DEF (config, get_mempool);
/***
- * @method rspamd_config:register_function(name, callback)
- * Registers new rspamd function that could be used in symbols expressions
- * @param {string} name name of function
- * @param {function} callback callback to be called
- * @example
-
-local function lua_header_exists(task, hname)
- if task:get_raw_header(hname) then
- return true
- end
-
- return false
-end
-
-rspamd_config:register_function('lua_header_exists', lua_header_exists)
-
--- Further in configuration it would be possible to define symbols like:
--- HAS_CONTENT_TYPE = 'lua_header_exists(Content-Type)'
- */
-LUA_FUNCTION_DEF (config, register_function);
-/***
* @method rspamd_config:add_radix_map(mapline[, description])
* Creates new dynamic map of IP/mask addresses.
* @param {string} mapline URL for a map
@@ -314,7 +293,6 @@ static const struct luaL_reg configlib_m[] = {
LUA_INTERFACE_DEF (config, get_module_opt),
LUA_INTERFACE_DEF (config, get_mempool),
LUA_INTERFACE_DEF (config, get_all_opt),
- LUA_INTERFACE_DEF (config, register_function),
LUA_INTERFACE_DEF (config, add_radix_map),
LUA_INTERFACE_DEF (config, radix_from_config),
LUA_INTERFACE_DEF (config, add_hash_map),
@@ -532,90 +510,6 @@ lua_destroy_cfg_symbol (gpointer ud)
}
}
-static gboolean
-lua_config_function_callback (struct rspamd_task *task,
- GList *args,
- void *user_data)
-{
- struct lua_callback_data *cd = user_data;
- struct rspamd_task **ptask;
- gint i = 1;
- struct expression_argument *arg;
- GList *cur;
- gboolean res = FALSE;
-
- if (cd->cb_is_ref) {
- lua_rawgeti (cd->L, LUA_REGISTRYINDEX, cd->callback.ref);
- }
- else {
- lua_getglobal (cd->L, cd->callback.name);
- }
- ptask = lua_newuserdata (cd->L, sizeof (struct rspamd_task *));
- rspamd_lua_setclass (cd->L, "rspamd{task}", -1);
- *ptask = task;
- /* Now push all arguments */
- cur = args;
- while (cur) {
- arg = get_function_arg (cur->data, task, TRUE);
- lua_pushstring (cd->L, (const gchar *)arg->data);
- cur = g_list_next (cur);
- i++;
- }
-
- if (lua_pcall (cd->L, i, 1, 0) != 0) {
- msg_info ("error processing symbol %s: call to %s failed: %s",
- cd->symbol,
- cd->cb_is_ref ? "local function" :
- cd->callback.name,
- lua_tostring (cd->L, -1));
- }
- else {
- if (lua_isboolean (cd->L, 1)) {
- res = lua_toboolean (cd->L, 1);
- }
- lua_pop (cd->L, 1);
- }
-
- return res;
-}
-
-static gint
-lua_config_register_function (lua_State *L)
-{
- struct rspamd_config *cfg = lua_check_config (L);
- gchar *name;
- struct lua_callback_data *cd;
-
- if (cfg) {
- name = rspamd_mempool_strdup (cfg->cfg_pool, luaL_checkstring (L, 2));
- cd =
- rspamd_mempool_alloc (cfg->cfg_pool,
- sizeof (struct lua_callback_data));
-
- if (lua_type (L, 3) == LUA_TSTRING) {
- cd->callback.name = rspamd_mempool_strdup (cfg->cfg_pool,
- luaL_checkstring (L, 3));
- cd->cb_is_ref = FALSE;
- }
- else {
- lua_pushvalue (L, 3);
- /* Get a reference */
- cd->callback.ref = luaL_ref (L, LUA_REGISTRYINDEX);
- cd->cb_is_ref = TRUE;
- }
- if (name) {
- cd->L = L;
- cd->symbol = name;
- register_expression_function (name, lua_config_function_callback,
- cd);
- }
- rspamd_mempool_add_destructor (cfg->cfg_pool,
- (rspamd_mempool_destruct_t)lua_destroy_cfg_symbol,
- cd);
- }
- return 1;
-}
-
static gint
lua_config_register_module_option (lua_State *L)
{
@@ -1238,7 +1132,7 @@ static gint
lua_config_add_composite (lua_State * L)
{
struct rspamd_config *cfg = lua_check_config (L);
- struct expression *expr;
+ struct rspamd_expression *expr;
gchar *name;
const gchar *expr_str;
struct rspamd_composite *composite;
@@ -1249,8 +1143,8 @@ lua_config_add_composite (lua_State * L)
expr_str = luaL_checkstring (L, 3);
if (name && expr_str) {
- expr = parse_expression (cfg->cfg_pool, (gchar *)expr_str);
- if (expr == NULL) {
+ if (!rspamd_parse_expression (expr_str, 0, &composite_expr_subr,
+ NULL, cfg->cfg_pool, NULL, &expr)) {
msg_err ("cannot parse composite expression %s", expr_str);
}
else {
diff --git a/src/lua/lua_regexp.c b/src/lua/lua_regexp.c
index 6fa0de772..58073b3bc 100644
--- a/src/lua/lua_regexp.c
+++ b/src/lua/lua_regexp.c
@@ -22,7 +22,6 @@
*/
#include "lua_common.h"
-#include "expressions.h"
#include "regexp.h"
/***
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index e74d7e71c..b15d3e181 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -25,7 +25,6 @@
#include "lua_common.h"
#include "message.h"
-#include "expressions.h"
#include "protocol.h"
#include "filter.h"
#include "dns.h"
@@ -273,15 +272,7 @@ LUA_FUNCTION_DEF (task, get_resolver);
* Increment number of DNS requests for the task. Is used just for logging purposes.
*/
LUA_FUNCTION_DEF (task, inc_dns_req);
-/***
- * @method task:call_rspamd_function(function[, param, param...])
- * Calls rspamd expression function `func` with the specified parameters.
- * It returns the boolean result of function invocation.
- * @param {string} function name of internal or registered lua function to call
- * @param {list of strings} params parameters for a function
- * @return {bool} true or false returned by expression function
- */
-LUA_FUNCTION_DEF (task, call_rspamd_function);
+
/***
* @method task:get_recipients([type])
* Return SMTP or MIME recipients for a task. This function returns list of internet addresses each one is a table with the following structure:
@@ -459,7 +450,6 @@ static const struct luaL_reg tasklib_m[] = {
LUA_INTERFACE_DEF (task, get_received_headers),
LUA_INTERFACE_DEF (task, get_resolver),
LUA_INTERFACE_DEF (task, inc_dns_req),
- LUA_INTERFACE_DEF (task, call_rspamd_function),
LUA_INTERFACE_DEF (task, get_recipients),
LUA_INTERFACE_DEF (task, get_from),
LUA_INTERFACE_DEF (task, get_user),
@@ -1255,45 +1245,6 @@ lua_task_inc_dns_req (lua_State *L)
return 0;
}
-static gint
-lua_task_call_rspamd_function (lua_State * L)
-{
- struct rspamd_task *task = lua_check_task (L, 1);
- struct expression_function f;
- gint i, top;
- gboolean res;
- gchar *arg;
-
- if (task) {
- f.name = (gchar *)luaL_checkstring (L, 2);
- if (f.name) {
- f.args = NULL;
- top = lua_gettop (L);
- /* Get arguments after function name */
- for (i = 3; i <= top; i++) {
- arg = (gchar *)luaL_checkstring (L, i);
- if (arg != NULL) {
- f.args = g_list_prepend (f.args, arg);
- }
- }
- res = call_expression_function (&f, task, L);
- lua_pushboolean (L, res);
- if (f.args) {
- g_list_free (f.args);
- }
-
- return 1;
- }
- }
-
- lua_pushnil (L);
-
- return 1;
-
-}
-
-
-
static gboolean
lua_push_internet_address (lua_State *L, InternetAddress *ia)
{
diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c
index 90a76e50a..002a1d05b 100644
--- a/src/plugins/chartable.c
+++ b/src/plugins/chartable.c
@@ -33,7 +33,6 @@
#include "config.h"
#include "libmime/message.h"
-#include "libmime/expressions.h"
#include "main.h"
#define DEFAULT_SYMBOL "R_CHARSET_MIXED"
diff --git a/src/plugins/dkim_check.c b/src/plugins/dkim_check.c
index f37a465a1..0b710ce63 100644
--- a/src/plugins/dkim_check.c
+++ b/src/plugins/dkim_check.c
@@ -39,7 +39,6 @@
#include "config.h"
#include "libmime/message.h"
-#include "libmime/expressions.h"
#include "libserver/dkim.h"
#include "libutil/hash.h"
#include "libutil/map.h"
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index 9fb62187f..a6bbacaf6 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -41,7 +41,6 @@
#include "config.h"
#include "libmime/message.h"
-#include "libmime/expressions.h"
#include "libutil/map.h"
#include "libmime/images.h"
#include "fuzzy_storage.h"
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index b96fcca31..a140d616c 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -29,72 +29,25 @@
#include "config.h"
#include "libmime/message.h"
-#include "libmime/expressions.h"
+#include "expression.h"
+#include "mime_expressions.h"
#include "libutil/map.h"
#include "lua/lua_common.h"
#include "main.h"
-#define DEFAULT_STATFILE_PREFIX "./"
-
struct regexp_module_item {
- struct expression *expr;
+ struct rspamd_expression *expr;
const gchar *symbol;
- guint32 avg_time;
struct ucl_lua_funcdata *lua_function;
};
struct regexp_ctx {
- gchar *statfile_prefix;
-
rspamd_mempool_t *regexp_pool;
gsize max_size;
- gsize max_threads;
- GThreadPool *workers;
-};
-
-/* Lua regexp module for checking rspamd regexps */
-LUA_FUNCTION_DEF (regexp, match);
-
-static const struct luaL_reg regexplib_m[] = {
- LUA_INTERFACE_DEF (regexp, match),
- {"__tostring", rspamd_lua_class_tostring},
- {NULL, NULL}
};
static struct regexp_ctx *regexp_module_ctx = NULL;
-static GMutex *workers_mtx = NULL;
-static void process_regexp_item_threaded (gpointer data, gpointer user_data);
-static gboolean rspamd_regexp_match_number (struct rspamd_task *task,
- GList * args,
- void *unused);
-static gboolean rspamd_raw_header_exists (struct rspamd_task *task,
- GList * args,
- void *unused);
-static gboolean rspamd_check_smtp_data (struct rspamd_task *task,
- GList * args,
- void *unused);
-static gboolean rspamd_regexp_occurs_number (struct rspamd_task *task,
- GList * args,
- void *unused);
-static gboolean rspamd_content_type_is_type (struct rspamd_task * task,
- GList * args,
- void *unused);
-static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task,
- GList * args,
- void *unused);
-static gboolean rspamd_content_type_has_param (struct rspamd_task * task,
- GList * args,
- void *unused);
-static gboolean rspamd_content_type_compare_param (struct rspamd_task * task,
- GList * args,
- void *unused);
-static gboolean rspamd_has_content_part (struct rspamd_task *task,
- GList * args,
- void *unused);
-static gboolean rspamd_has_content_part_len (struct rspamd_task *task,
- GList * args,
- void *unused);
static void process_regexp_item (struct rspamd_task *task, void *user_data);
@@ -111,106 +64,6 @@ module_t regexp_module = {
NULL
};
-/* Task cache functions */
-#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
-static GStaticMutex task_cache_mtx = G_STATIC_MUTEX_INIT;
-#else
-G_LOCK_DEFINE (task_cache_mtx);
-#endif
-
-void
-task_cache_add (struct rspamd_task *task,
- struct rspamd_regexp_element *re,
- gint32 result)
-{
- if (result == 0) {
- result = -1;
- }
- /* Avoid concurrenting inserting of results */
-#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
- g_static_mutex_lock (&task_cache_mtx);
-#else
- G_LOCK (task_cache_mtx);
-#endif
- g_hash_table_insert (task->re_cache, re->regexp_text,
- GINT_TO_POINTER (result));
-#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
- g_static_mutex_unlock (&task_cache_mtx);
-#else
- G_UNLOCK (task_cache_mtx);
-#endif
-}
-
-gint32
-task_cache_check (struct rspamd_task *task, struct rspamd_regexp_element *re)
-{
- gpointer res;
- gint32 r;
-
-#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
- g_static_mutex_lock (&task_cache_mtx);
-#else
- G_LOCK (task_cache_mtx);
-#endif
- if ((res = g_hash_table_lookup (task->re_cache, re->regexp_text)) != NULL) {
- r = GPOINTER_TO_INT (res);
-#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
- g_static_mutex_unlock (&task_cache_mtx);
-#else
- G_UNLOCK (task_cache_mtx);
-#endif
- if (r == -1) {
- return 0;
- }
- return 1;
- }
-#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
- g_static_mutex_unlock (&task_cache_mtx);
-#else
- G_UNLOCK (task_cache_mtx);
-#endif
- return -1;
-}
-
-
-static gint
-luaopen_regexp (lua_State * L)
-{
- luaL_register (L, "rspamd_regexp", regexplib_m);
-
- return 1;
-}
-
-/*
- * Utility functions for matching exact number of regexps
- */
-typedef gboolean (*int_compare_func) (gint a, gint b);
-static gboolean
-op_equal (gint a, gint b)
-{
- return a == b;
-}
-static gboolean
-op_more (gint a, gint b)
-{
- return a > b;
-}
-static gboolean
-op_less (gint a, gint b)
-{
- return a < b;
-}
-static gboolean
-op_more_equal (gint a, gint b)
-{
- return a >= b;
-}
-static gboolean
-op_less_equal (gint a, gint b)
-{
- return a <= b;
-}
-
/* Process regexp expression */
static gboolean
read_regexp_expression (rspamd_mempool_t * pool,
@@ -219,30 +72,14 @@ read_regexp_expression (rspamd_mempool_t * pool,
const gchar *line,
gboolean raw_mode)
{
- struct expression *e, *cur;
+ struct rspamd_expression *e = NULL;
- e = parse_expression (pool, (gchar *)line);
+ /* XXX: Implement atoms parsing */
if (e == NULL) {
msg_warn ("%s = \"%s\" is invalid regexp expression", symbol, line);
return FALSE;
}
chain->expr = e;
- cur = e;
- while (cur) {
- if (cur->type == EXPR_REGEXP) {
- cur->content.operand = parse_regexp (pool,
- cur->content.operand,
- raw_mode);
- if (cur->content.operand == NULL) {
- msg_warn ("cannot parse regexp, skip expression %s = \"%s\"",
- symbol,
- line);
- return FALSE;
- }
- cur->type = EXPR_REGEXP_PARSED;
- }
- cur = cur->next;
- }
return TRUE;
}
@@ -256,41 +93,8 @@ regexp_module_init (struct rspamd_config *cfg, struct module_ctx **ctx)
regexp_module_ctx->regexp_pool = rspamd_mempool_new (
rspamd_mempool_suggest_size ());
- regexp_module_ctx->workers = NULL;
*ctx = (struct module_ctx *)regexp_module_ctx;
- register_expression_function ("regexp_match_number",
- rspamd_regexp_match_number,
- NULL);
- register_expression_function ("regexp_occurs_number",
- rspamd_regexp_occurs_number,
- NULL);
- register_expression_function ("raw_header_exists",
- rspamd_raw_header_exists,
- NULL);
- register_expression_function ("check_smtp_data",
- rspamd_check_smtp_data,
- NULL);
- register_expression_function ("content_type_is_type",
- rspamd_content_type_is_type,
- NULL);
- register_expression_function ("content_type_is_subtype",
- rspamd_content_type_is_subtype,
- NULL);
- register_expression_function ("content_type_has_param",
- rspamd_content_type_has_param,
- NULL);
- register_expression_function ("content_type_compare_param",
- rspamd_content_type_compare_param,
- NULL);
- register_expression_function ("has_content_part",
- rspamd_has_content_part,
- NULL);
- register_expression_function ("has_content_part_len",
- rspamd_has_content_part_len,
- NULL);
-
- (void)luaopen_regexp (cfg->lua_state);
return 0;
}
@@ -310,8 +114,6 @@ regexp_module_config (struct rspamd_config *cfg)
}
regexp_module_ctx->max_size = 0;
- regexp_module_ctx->max_threads = 0;
- regexp_module_ctx->workers = NULL;
while ((value = ucl_iterate_object (sec, &it, true)) != NULL) {
if (g_ascii_strncasecmp (ucl_object_key (value), "max_size",
@@ -320,7 +122,7 @@ regexp_module_config (struct rspamd_config *cfg)
}
else if (g_ascii_strncasecmp (ucl_object_key (value), "max_threads",
sizeof ("max_threads") - 1) == 0) {
- regexp_module_ctx->max_threads = ucl_obj_toint (value);
+ msg_warn ("regexp module is now single threaded, max_threads is ignored");
}
else if (value->type == UCL_STRING) {
cur_item = rspamd_mempool_alloc0 (regexp_module_ctx->regexp_pool,
@@ -367,640 +169,48 @@ regexp_module_reconfig (struct rspamd_config *cfg)
return regexp_module_config (cfg);
}
-struct url_regexp_param {
- struct rspamd_task *task;
- rspamd_regexp_t *regexp;
- struct rspamd_regexp_element *re;
- gboolean found;
-};
-
-static gboolean
-tree_url_callback (gpointer key, gpointer value, void *data)
-{
- struct url_regexp_param *param = data;
- struct rspamd_url *url = value;
-
- if (rspamd_regexp_search (param->regexp, struri (url), 0, NULL, NULL, FALSE)
- == TRUE) {
- if (G_UNLIKELY (param->re->is_test)) {
- msg_info ("process test regexp %s for url %s returned TRUE",
- struri (url));
- }
- task_cache_add (param->task, param->re, 1);
- param->found = TRUE;
- return TRUE;
- }
- else if (G_UNLIKELY (param->re->is_test)) {
- msg_info ("process test regexp %s for url %s returned FALSE",
- struri (url));
- }
-
- return FALSE;
-}
-
-static gsize
-process_regexp (struct rspamd_regexp_element *re,
- struct rspamd_task *task,
- const gchar *additional,
- gint limit,
- int_compare_func f)
-{
- guint8 *ct;
- gsize clen;
- gint r, passed = 0;
- gboolean matched = FALSE, raw = FALSE;
- const gchar *in, *start, *end;
-
- GList *cur, *headerlist;
- rspamd_regexp_t *regexp;
- struct url_regexp_param callback_param = {
- .task = task,
- .re = re,
- .found = FALSE
- };
- struct mime_text_part *part;
- struct raw_header *rh;
-
- if (re == NULL) {
- msg_info ("invalid regexp passed");
- return 0;
- }
-
- callback_param.regexp = re->regexp;
- if ((r = task_cache_check (task, re)) != -1) {
- debug_task ("regexp /%s/ is found in cache, result: %d",
- re->regexp_text,
- r);
- return r == 1;
- }
-
- if (additional != NULL) {
- /* We have additional parameter defined, so ignore type of regexp expression and use it for parsing */
- if (G_UNLIKELY (re->is_test)) {
- msg_info ("process test regexp %s with test %s",
- re->regexp_text,
- additional);
- }
- if (rspamd_regexp_search (re->regexp, additional, 0, NULL, NULL,
- FALSE) == TRUE) {
- if (G_UNLIKELY (re->is_test)) {
- msg_info ("result of regexp %s is true", re->regexp_text);
- }
- task_cache_add (task, re, 1);
- return 1;
- }
- else {
- task_cache_add (task, re, 0);
- return 0;
- }
- }
-
- switch (re->type) {
- case REGEXP_NONE:
- msg_warn ("bad error detected: %s has invalid regexp type",
- re->regexp_text);
- break;
- case REGEXP_HEADER:
- case REGEXP_RAW_HEADER:
- /* Check header's name */
- if (re->header == NULL) {
- msg_info ("header regexp without header name: '%s'",
- re->regexp_text);
- task_cache_add (task, re, 0);
- return 0;
- }
- debug_task ("checking %s header regexp: %s = %s",
- re->type == REGEXP_RAW_HEADER ? "raw" : "decoded",
- re->header,
- re->regexp_text);
-
- /* Get list of specified headers */
- headerlist = message_get_header (task,
- re->header,
- re->is_strong);
- if (headerlist == NULL) {
- /* Header is not found */
- if (G_UNLIKELY (re->is_test)) {
- msg_info (
- "process test regexp %s for header %s returned FALSE: no header found",
- re->regexp_text,
- re->header);
- }
- task_cache_add (task, re, 0);
- return 0;
- }
- else {
- /* Check whether we have regexp for it */
- if (re->regexp == NULL) {
- debug_task ("regexp contains only header and it is found %s",
- re->header);
- task_cache_add (task, re, 1);
- return 1;
- }
- /* Iterate throught headers */
- cur = headerlist;
- while (cur) {
- rh = cur->data;
- debug_task ("found header \"%s\" with value \"%s\"",
- re->header, rh->decoded);
- regexp = re->regexp;
-
- if (re->type == REGEXP_RAW_HEADER) {
- in = rh->value;
- raw = TRUE;
- }
- else {
- in = rh->decoded;
- /* Validate input */
- if (!in || !g_utf8_validate (in, -1, NULL)) {
- cur = g_list_next (cur);
- continue;
- }
- }
-
- /* Match re */
- if (in &&
- rspamd_regexp_search (regexp, in, 0, NULL, NULL, raw)) {
- if (G_UNLIKELY (re->is_test)) {
- msg_info (
- "process test regexp %s for header %s with value '%s' returned TRUE",
- re->regexp_text,
- re->header,
- in);
- }
- if (f != NULL && limit > 1) {
- /* If we have limit count, increase passed count and compare with limit */
- if (f (++passed, limit)) {
- task_cache_add (task, re, 1);
- return 1;
- }
- }
- else {
- task_cache_add (task, re, 1);
- return 1;
- }
- }
- else if (G_UNLIKELY (re->is_test)) {
- msg_info (
- "process test regexp %s for header %s with value '%s' returned FALSE",
- re->regexp_text,
- re->header,
- in);
- }
- cur = g_list_next (cur);
- }
- task_cache_add (task, re, 0);
- return 0;
- }
- break;
- case REGEXP_MIME:
- debug_task ("checking mime regexp: %s", re->regexp_text);
- /* Iterate throught text parts */
- cur = g_list_first (task->text_parts);
- while (cur) {
- part = (struct mime_text_part *)cur->data;
- /* Skip empty parts */
- if (part->is_empty) {
- cur = g_list_next (cur);
- continue;
- }
- /* Skip too large parts */
- if (regexp_module_ctx->max_size != 0 && part->content->len >
- regexp_module_ctx->max_size) {
- msg_info ("<%s> skip part of size %Hud",
- task->message_id,
- part->content->len);
- cur = g_list_next (cur);
- continue;
- }
-
- regexp = re->regexp;
-
- /* Check raw flags */
- if (part->is_raw) {
- raw = TRUE;
- }
- /* Select data for regexp */
- if (raw) {
- ct = part->orig->data;
- clen = part->orig->len;
- }
- else {
- ct = part->content->data;
- clen = part->content->len;
- }
- /* If we have limit, apply regexp so much times as we can */
- if (f != NULL && limit > 1) {
- end = 0;
- start = NULL;
- end = NULL;
- while ((matched =
- rspamd_regexp_search (regexp, ct, clen, &start, &end, raw))) {
- if (G_UNLIKELY (re->is_test)) {
- msg_info (
- "process test regexp %s for mime part of length %d returned TRUE",
- re->regexp_text,
- (gint)clen,
- end);
- }
- if (f (++passed, limit)) {
- task_cache_add (task, re, 1);
- return 1;
- }
- }
- }
- else {
- if (rspamd_regexp_search (regexp, ct, clen, NULL, NULL, raw)) {
- if (G_UNLIKELY (re->is_test)) {
- msg_info (
- "process test regexp %s for mime part of length %d returned TRUE",
- re->regexp_text,
- (gint)clen);
- }
- task_cache_add (task, re, 1);
- return 1;
- }
-
- }
- if (!matched && G_UNLIKELY (re->is_test)) {
- msg_info (
- "process test regexp %s for mime part of length %d returned FALSE",
- re->regexp_text,
- (gint)clen);
- }
- cur = g_list_next (cur);
- }
- task_cache_add (task, re, 0);
- break;
- case REGEXP_MESSAGE:
- debug_task ("checking message regexp: %s", re->regexp_text);
- raw = TRUE;
- regexp = re->regexp;
- ct = (guint8 *)task->msg.start;
- clen = task->msg.len;
-
- if (regexp_module_ctx->max_size != 0 && clen >
- regexp_module_ctx->max_size) {
- msg_info ("<%s> skip message of size %Hz", task->message_id, clen);
- return 0;
- }
- /* If we have limit, apply regexp so much times as we can */
- if (f != NULL && limit > 1) {
- start = end = NULL;
- while ((matched =
- rspamd_regexp_search (regexp, ct, clen, &start, &end, raw))) {
- if (G_UNLIKELY (re->is_test)) {
- msg_info (
- "process test regexp %s for mime part of length %d returned TRUE",
- re->regexp_text,
- (gint)clen);
- }
- if (f (++passed, limit)) {
- task_cache_add (task, re, 1);
- return 1;
- }
- }
- }
- else {
- if (rspamd_regexp_search (regexp, ct, clen, NULL, NULL, raw)) {
- if (G_UNLIKELY (re->is_test)) {
- msg_info (
- "process test regexp %s for message part of length %d returned TRUE",
- re->regexp_text,
- (gint)clen);
- }
- task_cache_add (task, re, 1);
- return 1;
- }
-
- }
- if (!matched && G_UNLIKELY (re->is_test)) {
- msg_info (
- "process test regexp %s for message part of length %d returned FALSE",
- re->regexp_text,
- (gint)clen);
- }
- task_cache_add (task, re, 0);
- break;
- case REGEXP_URL:
- debug_task ("checking url regexp: %s", re->regexp_text);
- if (f != NULL && limit > 1) {
- /*XXX: add support of it */
- msg_warn ("numbered matches are not supported for url regexp");
- }
- regexp = re->regexp;
- callback_param.task = task;
- callback_param.regexp = regexp;
- callback_param.re = re;
- callback_param.found = FALSE;
- if (task->urls) {
- g_tree_foreach (task->urls, tree_url_callback, &callback_param);
- }
- if (task->emails && callback_param.found == FALSE) {
- g_tree_foreach (task->emails, tree_url_callback, &callback_param);
- }
- if (callback_param.found == FALSE) {
- task_cache_add (task, re, 0);
- }
- break;
- default:
- msg_warn ("bad error detected: %p is not a valid regexp object", re);
- break;
- }
-
- /* Not reached */
- return 0;
-}
-
-static gboolean
-maybe_call_lua_function (const gchar *name,
- struct rspamd_task *task,
- lua_State *L)
-{
- struct rspamd_task **ptask;
- gboolean res;
-
- lua_getglobal (L, name);
- if (lua_isfunction (L, -1)) {
- ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
- rspamd_lua_setclass (L, "rspamd{task}", -1);
- *ptask = task;
- /* Call function */
- if (lua_pcall (L, 1, 1, 0) != 0) {
- msg_info ("call to %s failed: %s", (gchar *)name,
- lua_tostring (L, -1));
- return FALSE;
- }
- res = lua_toboolean (L, -1);
- lua_pop (L, 1);
- return res;
- }
- else {
- lua_pop (L, 1);
- }
- return FALSE;
-}
-
-static gboolean
-optimize_regexp_expression (struct expression **e, GQueue * stack, gboolean res)
-{
- struct expression *it = (*e)->next;
- gboolean ret = FALSE, is_nearest = TRUE;
- gint skip_level = 0;
-
- /* Skip nearest logical operators from optimization */
- if (!it || (it->type == EXPR_OPERATION && it->content.operation != '!')) {
- g_queue_push_head (stack, GSIZE_TO_POINTER (res));
- return ret;
- }
-
- while (it) {
- /* Find first operation for this iterator */
- if (it->type == EXPR_OPERATION) {
- /* If this operation is just ! just inverse res and check for further operators */
- if (it->content.operation == '!') {
- if (is_nearest) {
- msg_debug ("found '!' operator, inversing result");
- res = !res;
- *e = it;
- }
- it = it->next;
- continue;
- }
- else {
- skip_level--;
- }
- /* Check whether we found corresponding operator for this operand */
- if (skip_level <= 0) {
- if (it->content.operation == '|' && res == TRUE) {
- msg_debug ("found '|' and previous expression is true");
- *e = it;
- ret = TRUE;
- }
- else if (it->content.operation == '&' && res == FALSE) {
- msg_debug ("found '&' and previous expression is false");
- *e = it;
- ret = TRUE;
- }
- break;
- }
- }
- else {
- is_nearest = FALSE;
- skip_level++;
- }
- it = it->next;
- }
-
- g_queue_push_head (stack, GSIZE_TO_POINTER (res));
-
- return ret;
-}
-
-static gboolean
-process_regexp_expression (struct expression *expr,
- const gchar *symbol,
- struct rspamd_task *task,
- const gchar *additional,
- struct lua_locked_state *nL)
-{
- GQueue *stack;
- gsize cur, op1, op2;
- struct expression *it = expr;
- struct rspamd_regexp_element *re;
- gboolean try_optimize = TRUE;
-
- stack = g_queue_new ();
-
- while (it) {
- if (it->type == EXPR_REGEXP_PARSED) {
- /* Find corresponding symbol */
- cur = process_regexp ((struct rspamd_regexp_element *)it->content.operand,
- task,
- additional,
- 0,
- NULL);
- debug_task ("regexp %s found", cur ? "is" : "is not");
- if (try_optimize) {
- try_optimize = optimize_regexp_expression (&it, stack, cur);
- }
- else {
- g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
- }
- }
- else if (it->type == EXPR_FUNCTION) {
- if (nL) {
- rspamd_mutex_lock (nL->m);
- cur =
- (gsize) call_expression_function ((struct
- expression_function
- *)it->content.operand, task, nL->L);
- rspamd_mutex_unlock (nL->m);
- }
- else {
- cur =
- (gsize) call_expression_function ((struct
- expression_function
- *)it->content.operand, task, task->cfg->lua_state);
- }
- debug_task ("function %s returned %s",
- ((struct expression_function *)it->content.operand)->name,
- cur ? "true" : "false");
- if (try_optimize) {
- try_optimize = optimize_regexp_expression (&it, stack, cur);
- }
- else {
- g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
- }
- }
- else if (it->type == EXPR_STR) {
- /* This may be lua function, try to call it */
- if (nL) {
- rspamd_mutex_lock (nL->m);
- cur = maybe_call_lua_function (
- (const gchar *)it->content.operand,
- task,
- nL->L);
- rspamd_mutex_unlock (nL->m);
- }
- else {
- cur = maybe_call_lua_function (
- (const gchar *)it->content.operand,
- task,
- task->cfg->lua_state);
- }
- debug_task ("function %s returned %s",
- (const gchar *)it->content.operand,
- cur ? "true" : "false");
- if (try_optimize) {
- try_optimize = optimize_regexp_expression (&it, stack, cur);
- }
- else {
- g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
- }
- }
- else if (it->type == EXPR_REGEXP) {
- /* Compile regexp if it is not parsed */
- if (it->content.operand == NULL) {
- it = it->next;
- continue;
- }
- re = parse_regexp (task->cfg->cfg_pool,
- it->content.operand,
- task->cfg->raw_mode);
- if (re == NULL) {
- msg_warn ("cannot parse regexp, skip expression");
- g_queue_free (stack);
- return FALSE;
- }
- it->content.operand = re;
- it->type = EXPR_REGEXP_PARSED;
- /* Continue with this regexp once again */
- continue;
- }
- else if (it->type == EXPR_OPERATION) {
- if (g_queue_is_empty (stack)) {
- /* Queue has no operands for operation, exiting */
- msg_warn (
- "regexp expression seems to be invalid: empty stack while reading operation");
- g_queue_free (stack);
- return FALSE;
- }
- debug_task ("got operation %c", it->content.operation);
- switch (it->content.operation) {
- case '!':
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- op1 = !op1;
- try_optimize = optimize_regexp_expression (&it, stack, op1);
- break;
- case '&':
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- try_optimize = optimize_regexp_expression (&it,
- stack,
- op1 && op2);
- break;
- case '|':
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- try_optimize = optimize_regexp_expression (&it,
- stack,
- op1 || op2);
- break;
- default:
- it = it->next;
- continue;
- }
- }
- if (it) {
- it = it->next;
- }
- }
- if (!g_queue_is_empty (stack)) {
- op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
- if (op1) {
- g_queue_free (stack);
- return TRUE;
- }
- }
- else {
- msg_warn (
- "regexp expression seems to be invalid: empty stack at the end of expression, symbol %s",
- symbol);
- }
-
- g_queue_free (stack);
-
- return FALSE;
-}
-
-/* Call custom lua function in rspamd expression */
-static gboolean
-rspamd_lua_call_expression_func (struct ucl_lua_funcdata *lua_data,
- struct rspamd_task *task, GList *args, gboolean *res)
+static gboolean rspamd_lua_call_expression_func(
+ struct ucl_lua_funcdata *lua_data, struct rspamd_task *task,
+ GArray *args, gboolean *res)
{
lua_State *L = lua_data->L;
struct rspamd_task **ptask;
- GList *cur;
struct expression_argument *arg;
- int nargs = 1, pop = 0;
+ gint pop = 0, i;
lua_rawgeti (L, LUA_REGISTRYINDEX, lua_data->idx);
/* Now we got function in top of stack */
- ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
+ ptask = lua_newuserdata (L, sizeof(struct rspamd_task *));
rspamd_lua_setclass (L, "rspamd{task}", -1);
*ptask = task;
/* Now push all arguments */
- cur = args;
- while (cur) {
- arg = get_function_arg (cur->data, task, FALSE);
+ for (i = 0; i < args->len; i ++) {
+ arg = &g_array_index (args, struct expression_argument, i);
if (arg) {
switch (arg->type) {
case EXPRESSION_ARGUMENT_NORMAL:
- lua_pushstring (L, (const gchar *)arg->data);
+ lua_pushstring (L, (const gchar *) arg->data);
break;
case EXPRESSION_ARGUMENT_BOOL:
- lua_pushboolean (L, (gboolean) GPOINTER_TO_SIZE (arg->data));
+ lua_pushboolean (L, (gboolean) GPOINTER_TO_SIZE(arg->data));
break;
default:
- msg_err ("cannot pass custom params to lua function");
+ msg_err("cannot pass custom params to lua function");
return FALSE;
}
}
- nargs++;
- cur = g_list_next (cur);
}
- if (lua_pcall (L, nargs, 1, 0) != 0) {
- msg_info ("call to lua function failed: %s", lua_tostring (L, -1));
+ if (lua_pcall (L, args->len, 1, 0) != 0) {
+ msg_info("call to lua function failed: %s", lua_tostring (L, -1));
return FALSE;
}
pop++;
if (!lua_isboolean (L, -1)) {
lua_pop (L, pop);
- msg_info ("lua function must return a boolean");
+ msg_info("lua function must return a boolean");
return FALSE;
}
*res = lua_toboolean (L, -1);
@@ -1009,1042 +219,27 @@ rspamd_lua_call_expression_func (struct ucl_lua_funcdata *lua_data,
return TRUE;
}
-struct regexp_threaded_ud {
- struct regexp_module_item *item;
- struct rspamd_task *task;
-};
-
-static void
-process_regexp_item_threaded (gpointer data, gpointer user_data)
-{
- struct regexp_threaded_ud *ud = data;
- struct lua_locked_state *nL = user_data;
-
- /* Process expression */
- if (process_regexp_expression (ud->item->expr, ud->item->symbol, ud->task,
- NULL, nL)) {
- g_mutex_lock (workers_mtx);
- rspamd_task_insert_result (ud->task, ud->item->symbol, 1, NULL);
- g_mutex_unlock (workers_mtx);
- }
- remove_async_thread (ud->task->s);
-}
static void
process_regexp_item (struct rspamd_task *task, void *user_data)
{
struct regexp_module_item *item = user_data;
gboolean res = FALSE;
- struct regexp_threaded_ud *thr_ud;
- GError *err = NULL;
- struct lua_locked_state *nL;
-
- if (!item->lua_function && regexp_module_ctx->max_threads > 1) {
- if (regexp_module_ctx->workers == NULL) {
-#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
-# if GLIB_MINOR_VERSION > 20
- if (!g_thread_get_initialized ()) {
- g_thread_init (NULL);
- }
-# else
- g_thread_init (NULL);
-# endif
- workers_mtx = g_mutex_new ();
-#else
- workers_mtx = rspamd_mempool_alloc (regexp_module_ctx->regexp_pool,
- sizeof (GMutex));
- g_mutex_init (workers_mtx);
-#endif
- nL = rspamd_init_lua_locked (task->cfg);
- luaopen_regexp (nL->L);
- regexp_module_ctx->workers = g_thread_pool_new (
- process_regexp_item_threaded,
- nL,
- regexp_module_ctx->max_threads,
- TRUE,
- &err);
- if (err != NULL) {
- msg_err ("thread pool creation failed: %s", err->message);
- regexp_module_ctx->max_threads = 0;
- return;
- }
- }
- thr_ud =
- rspamd_mempool_alloc (task->task_pool,
- sizeof (struct regexp_threaded_ud));
- thr_ud->item = item;
- thr_ud->task = task;
-
-
- register_async_thread (task->s);
- g_thread_pool_push (regexp_module_ctx->workers, thr_ud, &err);
- if (err != NULL) {
- msg_err ("error pushing task to the regexp thread pool: %s",
- err->message);
- remove_async_thread (task->s);
- }
- }
- else {
- /* Non-threaded version */
- if (item->lua_function) {
- /* Just call function */
- res = FALSE;
- if (!rspamd_lua_call_expression_func (item->lua_function, task, NULL,
+ /* Non-threaded version */
+ if (item->lua_function) {
+ /* Just call function */
+ res = FALSE;
+ if (!rspamd_lua_call_expression_func (item->lua_function, task, NULL,
&res)) {
- msg_err ("error occurred when checking symbol %s", item->symbol);
- }
- if (res) {
- rspamd_task_insert_result (task, item->symbol, 1, NULL);
- }
- }
- else {
- /* Process expression */
- if (process_regexp_expression (item->expr, item->symbol, task, NULL,
- NULL)) {
- rspamd_task_insert_result (task, item->symbol, 1, NULL);
- }
- }
- }
-}
-
-static gboolean
-rspamd_regexp_match_number (struct rspamd_task *task, GList * args,
- void *unused)
-{
- gint param_count, res = 0;
- struct expression_argument *arg;
- GList *cur;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
- param_count = strtoul (arg->data, NULL, 10);
-
- cur = args->next;
- while (cur) {
- arg = get_function_arg (cur->data, task, FALSE);
- if (arg && arg->type == EXPRESSION_ARGUMENT_BOOL) {
- if ((gboolean) GPOINTER_TO_SIZE (arg->data)) {
- res++;
- }
- }
- else {
- if (process_regexp_expression (cur->data, "regexp_match_number",
- task, NULL, NULL)) {
- res++;
- }
- if (res >= param_count) {
- return TRUE;
- }
- }
- cur = g_list_next (cur);
- }
-
- return res >= param_count;
-}
-
-static gboolean
-rspamd_regexp_occurs_number (struct rspamd_task *task,
- GList * args,
- void *unused)
-{
- gint limit;
- struct expression_argument *arg;
- struct rspamd_regexp_element *re;
- gchar *param, *err_str, op;
- int_compare_func f = NULL;
-
- if (args == NULL || args->next == NULL) {
- msg_warn ("wrong number of parameters to function, must be 2");
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
- if ((re = re_cache_check (arg->data, task->cfg->cfg_pool)) == NULL) {
- re = parse_regexp (task->cfg->cfg_pool, arg->data, task->cfg->raw_mode);
- if (!re) {
- msg_err ("cannot parse given regexp: %s", (gchar *)arg->data);
- return FALSE;
- }
- }
-
- arg = get_function_arg (args->next->data, task, TRUE);
- param = arg->data;
- op = *param;
- if (g_ascii_isdigit (op)) {
- op = '=';
- }
- else {
- param++;
- }
- switch (op) {
- case '>':
- if (*param == '=') {
- f = op_more_equal;
- param++;
- }
- else {
- f = op_more;
- }
- break;
- case '<':
- if (*param == '=') {
- f = op_less_equal;
- param++;
- }
- else {
- f = op_less;
- }
- break;
- case '=':
- f = op_equal;
- break;
- default:
- msg_err (
- "wrong operation character: %c, assumed '=', '>', '<', '>=', '<=' or empty op",
- op);
- return FALSE;
- }
-
- limit = strtoul (param, &err_str, 10);
- if (*err_str != 0) {
- msg_err ("wrong numeric: %s at position: %s", param, err_str);
- return FALSE;
- }
-
- return process_regexp (re, task, NULL, limit, f);
-}
-static gboolean
-rspamd_raw_header_exists (struct rspamd_task *task, GList * args, void *unused)
-{
- struct expression_argument *arg;
-
- if (args == NULL || task == NULL) {
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
- if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) {
- msg_warn ("invalid argument to function is passed");
- return FALSE;
- }
-
- return g_hash_table_lookup (task->raw_headers, arg->data) != NULL;
-}
-
-static gboolean
-match_smtp_data (struct rspamd_task *task,
- const gchar *re_text,
- const gchar *what)
-{
- struct rspamd_regexp_element *re;
- gint r;
-
- if (*re_text == '/') {
- /* This is a regexp */
- re = parse_regexp (task->cfg->cfg_pool,
- (gchar *)re_text,
- task->cfg->raw_mode);
- if (re == NULL) {
- msg_warn ("cannot compile regexp for function");
- return FALSE;
+ msg_err ("error occurred when checking symbol %s", item->symbol);
}
-
- if ((r = task_cache_check (task, re)) == -1) {
- if (rspamd_regexp_search (re->regexp, what, 0, NULL, NULL, FALSE)) {
- task_cache_add (task, re, 1);
- return TRUE;
- }
- task_cache_add (task, re, 0);
- }
- else {
- return r == 1;
+ if (res) {
+ rspamd_task_insert_result (task, item->symbol, 1, NULL);
}
}
- else if (g_ascii_strcasecmp (re_text, what) == 0) {
- return TRUE;
- }
-
- return FALSE;
-}
-
-static gboolean
-rspamd_check_smtp_data (struct rspamd_task *task, GList * args, void *unused)
-{
- struct expression_argument *arg;
- InternetAddressList *ia = NULL;
- const gchar *type, *what = NULL;
- GList *cur;
- gint i, ialen;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
-
- if (!arg || !arg->data) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
else {
- type = arg->data;
- switch (*type) {
- case 'f':
- case 'F':
- if (g_ascii_strcasecmp (type, "from") == 0) {
- what = rspamd_task_get_sender (task);
- }
- else {
- msg_warn ("bad argument to function: %s", type);
- return FALSE;
- }
- break;
- case 'h':
- case 'H':
- if (g_ascii_strcasecmp (type, "helo") == 0) {
- what = task->helo;
- }
- else {
- msg_warn ("bad argument to function: %s", type);
- return FALSE;
- }
- break;
- case 'u':
- case 'U':
- if (g_ascii_strcasecmp (type, "user") == 0) {
- what = task->user;
- }
- else {
- msg_warn ("bad argument to function: %s", type);
- return FALSE;
- }
- break;
- case 's':
- case 'S':
- if (g_ascii_strcasecmp (type, "subject") == 0) {
- what = task->subject;
- }
- else {
- msg_warn ("bad argument to function: %s", type);
- return FALSE;
- }
- break;
- case 'r':
- case 'R':
- if (g_ascii_strcasecmp (type, "rcpt") == 0) {
- ia = task->rcpt_mime;
- }
- else {
- msg_warn ("bad argument to function: %s", type);
- return FALSE;
- }
- break;
- default:
- msg_warn ("bad argument to function: %s", type);
- return FALSE;
- }
- }
-
- if (what == NULL && ia == NULL) {
- /* Not enough data so regexp would NOT be found anyway */
- return FALSE;
- }
-
- /* We would process only one more argument, others are ignored */
- cur = args->next;
- if (cur) {
- arg = get_function_arg (cur->data, task, FALSE);
- if (arg && arg->type == EXPRESSION_ARGUMENT_NORMAL) {
- if (what != NULL) {
- return match_smtp_data (task, arg->data, what);
- }
- else {
- if (ia != NULL) {
- ialen = internet_address_list_length(ia);
- for (i = 0; i < ialen; i ++) {
- InternetAddress *iaelt =
- internet_address_list_get_address(ia, i);
- InternetAddressMailbox *iamb =
- INTERNET_ADDRESS_IS_MAILBOX(iaelt) ?
- INTERNET_ADDRESS_MAILBOX (iaelt) : NULL;
- if (iamb &&
- match_smtp_data (task, arg->data,
- internet_address_mailbox_get_addr(iamb))) {
- return TRUE;
- }
- }
- }
- }
- }
- else if (arg != NULL) {
- if (what != NULL) {
- if (process_regexp_expression (arg->data,
- "regexp_check_smtp_data", task, what, NULL)) {
- return TRUE;
- }
- }
- else {
- if (ia != NULL) {
- ialen = internet_address_list_length(ia);
- for (i = 0; i < ialen; i ++) {
- InternetAddress *iaelt =
- internet_address_list_get_address(ia, i);
- InternetAddressMailbox *iamb =
- INTERNET_ADDRESS_IS_MAILBOX(iaelt) ?
- INTERNET_ADDRESS_MAILBOX (iaelt) : NULL;
- if (iamb &&
- process_regexp_expression (arg->data,
- "regexp_check_smtp_data", task,
- internet_address_mailbox_get_addr(iamb),
- NULL)) {
- return TRUE;
- }
- }
- }
- }
- }
- }
-
- return FALSE;
-}
-
-/* Lua part */
-static gint
-lua_regexp_match (lua_State *L)
-{
- void *ud = luaL_checkudata (L, 1, "rspamd{task}");
- struct rspamd_task *task;
- const gchar *re_text;
- struct rspamd_regexp_element *re;
- gint r = 0;
-
- luaL_argcheck (L, ud != NULL, 1, "'task' expected");
- task = ud ? *((struct rspamd_task **)ud) : NULL;
- re_text = luaL_checkstring (L, 2);
-
- /* This is a regexp */
- if (task != NULL) {
- re = parse_regexp (task->cfg->cfg_pool,
- (gchar *)re_text,
- task->cfg->raw_mode);
- if (re == NULL) {
- msg_warn ("cannot compile regexp for function");
- return FALSE;
- }
- r = process_regexp (re, task, NULL, 0, NULL);
- }
- lua_pushboolean (L, r == 1);
-
- return 1;
-}
-
-static gboolean
-rspamd_content_type_compare_param (struct rspamd_task * task,
- GList * args,
- void *unused)
-{
- gchar *param_name, *param_pattern;
- const gchar *param_data;
- struct rspamd_regexp_element *re;
- struct expression_argument *arg, *arg1;
- GMimeObject *part;
- GMimeContentType *ct;
- gint r;
- gboolean recursive = FALSE, result = FALSE;
- GList *cur = NULL;
- struct mime_part *cur_part;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
- arg = get_function_arg (args->data, task, TRUE);
- param_name = arg->data;
- args = g_list_next (args);
- if (args == NULL) {
- msg_warn ("too few params to function");
- return FALSE;
- }
- arg = get_function_arg (args->data, task, TRUE);
- param_pattern = arg->data;
-
-
- part = g_mime_message_get_mime_part (task->message);
- if (part) {
- ct = (GMimeContentType *)g_mime_object_get_content_type (part);
- if (args->next) {
- args = g_list_next (args);
- arg1 = get_function_arg (args->data, task, TRUE);
- if (g_ascii_strncasecmp (arg1->data, "true",
- sizeof ("true") - 1) == 0) {
- recursive = TRUE;
- }
- }
- else {
- /*
- * If user did not specify argument, let's assume that he wants
- * recursive search if mime part is multipart/mixed
- */
- if (g_mime_content_type_is_type (ct, "multipart", "*")) {
- recursive = TRUE;
- }
- }
-
- if (recursive) {
- cur = task->parts;
- }
-
-#ifndef GMIME24
- g_object_unref (part);
-#endif
- for (;; ) {
- if ((param_data =
- g_mime_content_type_get_parameter ((GMimeContentType *)ct,
- param_name)) == NULL) {
- result = FALSE;
- }
- else {
- if (*param_pattern == '/') {
- re = parse_regexp (task->cfg->cfg_pool,
- param_pattern,
- task->cfg->raw_mode);
- if (re == NULL) {
- msg_warn ("cannot compile regexp for function");
- return FALSE;
- }
- if ((r = task_cache_check (task, re)) == -1) {
- if (rspamd_regexp_search (re->regexp, param_data, 0,
- NULL, NULL, FALSE) == TRUE) {
- task_cache_add (task, re, 1);
- return TRUE;
- }
- task_cache_add (task, re, 0);
- }
- else {
-
- }
- }
- else {
- /* Just do strcasecmp */
- if (g_ascii_strcasecmp (param_data, param_pattern) == 0) {
- return TRUE;
- }
- }
- }
- /* Get next part */
- if (!recursive) {
- return result;
- }
- else if (cur != NULL) {
- cur_part = cur->data;
- if (cur_part->type != NULL) {
- ct = cur_part->type;
- }
- cur = g_list_next (cur);
- }
- else {
- /* All is done */
- return result;
- }
- }
-
- }
-
- return FALSE;
-}
-
-static gboolean
-rspamd_content_type_has_param (struct rspamd_task * task,
- GList * args,
- void *unused)
-{
- gchar *param_name;
- const gchar *param_data;
- struct expression_argument *arg, *arg1;
- GMimeObject *part;
- GMimeContentType *ct;
- gboolean recursive = FALSE, result = FALSE;
- GList *cur = NULL;
- struct mime_part *cur_part;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
+ /* Process expression */
+ /* XXX: add this function */
}
- arg = get_function_arg (args->data, task, TRUE);
- param_name = arg->data;
-
- part = g_mime_message_get_mime_part (task->message);
- if (part) {
- ct = (GMimeContentType *)g_mime_object_get_content_type (part);
- if (args->next) {
- args = g_list_next (args);
- arg1 = get_function_arg (args->data, task, TRUE);
- if (g_ascii_strncasecmp (arg1->data, "true",
- sizeof ("true") - 1) == 0) {
- recursive = TRUE;
- }
- }
- else {
- /*
- * If user did not specify argument, let's assume that he wants
- * recursive search if mime part is multipart/mixed
- */
- if (g_mime_content_type_is_type (ct, "multipart", "*")) {
- recursive = TRUE;
- }
- }
-
- if (recursive) {
- cur = task->parts;
- }
-
-#ifndef GMIME24
- g_object_unref (part);
-#endif
- for (;; ) {
- if ((param_data =
- g_mime_content_type_get_parameter ((GMimeContentType *)ct,
- param_name)) != NULL) {
- return TRUE;
- }
- /* Get next part */
- if (!recursive) {
- return result;
- }
- else if (cur != NULL) {
- cur_part = cur->data;
- if (cur_part->type != NULL) {
- ct = cur_part->type;
- }
- cur = g_list_next (cur);
- }
- else {
- /* All is done */
- return result;
- }
- }
-
- }
-
- return TRUE;
-}
-
-static gboolean
-rspamd_content_type_is_subtype (struct rspamd_task *task,
- GList * args,
- void *unused)
-{
- gchar *param_pattern;
- struct rspamd_regexp_element *re;
- struct expression_argument *arg, *arg1;
- GMimeObject *part;
- GMimeContentType *ct;
- gint r;
- gboolean recursive = FALSE, result = FALSE;
- GList *cur = NULL;
- struct mime_part *cur_part;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
- arg = get_function_arg (args->data, task, TRUE);
- param_pattern = arg->data;
-
- part = g_mime_message_get_mime_part (task->message);
- if (part) {
- ct = (GMimeContentType *)g_mime_object_get_content_type (part);
- if (args->next) {
- args = g_list_next (args);
- arg1 = get_function_arg (args->data, task, TRUE);
- if (g_ascii_strncasecmp (arg1->data, "true",
- sizeof ("true") - 1) == 0) {
- recursive = TRUE;
- }
- }
- else {
- /*
- * If user did not specify argument, let's assume that he wants
- * recursive search if mime part is multipart/mixed
- */
- if (g_mime_content_type_is_type (ct, "multipart", "*")) {
- recursive = TRUE;
- }
- }
-
- if (recursive) {
- cur = task->parts;
- }
-
-#ifndef GMIME24
- g_object_unref (part);
-#endif
- for (;; ) {
- if (*param_pattern == '/') {
- re = parse_regexp (task->cfg->cfg_pool,
- param_pattern,
- task->cfg->raw_mode);
- if (re == NULL) {
- msg_warn ("cannot compile regexp for function");
- return FALSE;
- }
- if ((r = task_cache_check (task, re)) == -1) {
- if (rspamd_regexp_search (re->regexp, ct->subtype, 0,
- NULL, NULL, FALSE)) {
- task_cache_add (task, re, 1);
- return TRUE;
- }
- task_cache_add (task, re, 0);
- }
- else {
-
- }
- }
- else {
- /* Just do strcasecmp */
- if (g_ascii_strcasecmp (ct->subtype, param_pattern) == 0) {
- return TRUE;
- }
- }
- /* Get next part */
- if (!recursive) {
- return result;
- }
- else if (cur != NULL) {
- cur_part = cur->data;
- if (cur_part->type != NULL) {
- ct = cur_part->type;
- }
- cur = g_list_next (cur);
- }
- else {
- /* All is done */
- return result;
- }
- }
-
- }
-
- return FALSE;
-}
-
-static gboolean
-rspamd_content_type_is_type (struct rspamd_task * task,
- GList * args,
- void *unused)
-{
- gchar *param_pattern;
- struct rspamd_regexp_element *re;
- struct expression_argument *arg, *arg1;
- GMimeObject *part;
- GMimeContentType *ct;
- gint r;
- gboolean recursive = FALSE, result = FALSE;
- GList *cur = NULL;
- struct mime_part *cur_part;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
- arg = get_function_arg (args->data, task, TRUE);
- param_pattern = arg->data;
-
-
- part = g_mime_message_get_mime_part (task->message);
- if (part) {
- ct = (GMimeContentType *)g_mime_object_get_content_type (part);
- if (args->next) {
- args = g_list_next (args);
- arg1 = get_function_arg (args->data, task, TRUE);
- if (g_ascii_strncasecmp (arg1->data, "true",
- sizeof ("true") - 1) == 0) {
- recursive = TRUE;
- }
- }
- else {
- /*
- * If user did not specify argument, let's assume that he wants
- * recursive search if mime part is multipart/mixed
- */
- if (g_mime_content_type_is_type (ct, "multipart", "*")) {
- recursive = TRUE;
- }
- }
-
- if (recursive) {
- cur = task->parts;
- }
-
-#ifndef GMIME24
- g_object_unref (part);
-#endif
- for (;; ) {
- if (*param_pattern == '/') {
- re = parse_regexp (task->cfg->cfg_pool,
- param_pattern,
- task->cfg->raw_mode);
- if (re == NULL) {
- msg_warn ("cannot compile regexp for function");
- return FALSE;
- }
- if ((r = task_cache_check (task, re)) == -1) {
- if (rspamd_regexp_search (re->regexp, ct->type, 0,
- NULL, NULL, FALSE) == TRUE) {
- task_cache_add (task, re, 1);
- return TRUE;
- }
- task_cache_add (task, re, 0);
- }
- else {
-
- }
- }
- else {
- /* Just do strcasecmp */
- if (g_ascii_strcasecmp (ct->type, param_pattern) == 0) {
- return TRUE;
- }
- }
- /* Get next part */
- if (!recursive) {
- return result;
- }
- else if (cur != NULL) {
- cur_part = cur->data;
- if (cur_part->type != NULL) {
- ct = cur_part->type;
- }
- cur = g_list_next (cur);
- }
- else {
- /* All is done */
- return result;
- }
- }
-
- }
-
- return FALSE;
-}
-
-static gboolean
-compare_subtype (struct rspamd_task *task, GMimeContentType * ct,
- gchar *subtype)
-{
- struct rspamd_regexp_element *re;
- gint r;
-
- if (subtype == NULL || ct == NULL) {
- msg_warn ("invalid parameters passed");
- return FALSE;
- }
- if (*subtype == '/') {
- re = parse_regexp (task->cfg->cfg_pool, subtype,
- task->cfg->raw_mode);
- if (re == NULL) {
- msg_warn ("cannot compile regexp for function");
- return FALSE;
- }
- if ((r = task_cache_check (task, re)) == -1) {
- if (rspamd_regexp_search (re->regexp, subtype, 0,
- NULL, NULL, FALSE) == TRUE) {
- task_cache_add (task, re, 1);
- return TRUE;
- }
- task_cache_add (task, re, 0);
- }
- else {
- return r == 1;
- }
- }
- else {
- /* Just do strcasecmp */
- if (ct->subtype && g_ascii_strcasecmp (ct->subtype, subtype) == 0) {
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-static gboolean
-compare_len (struct mime_part *part, guint min, guint max)
-{
- if (min == 0 && max == 0) {
- return TRUE;
- }
-
- if (min == 0) {
- return part->content->len <= max;
- }
- else if (max == 0) {
- return part->content->len >= min;
- }
- else {
- return part->content->len >= min && part->content->len <= max;
- }
-}
-
-static gboolean
-common_has_content_part (struct rspamd_task * task,
- gchar *param_type,
- gchar *param_subtype,
- gint min_len,
- gint max_len)
-{
- struct rspamd_regexp_element *re;
- struct mime_part *part;
- GList *cur;
- GMimeContentType *ct;
- gint r;
-
- cur = g_list_first (task->parts);
- while (cur) {
- part = cur->data;
- ct = part->type;
- if (ct == NULL) {
- cur = g_list_next (cur);
- continue;
- }
-
- if (*param_type == '/') {
- re = parse_regexp (task->cfg->cfg_pool,
- param_type,
- task->cfg->raw_mode);
- if (re == NULL) {
- msg_warn ("cannot compile regexp for function");
- cur = g_list_next (cur);
- continue;
- }
- if ((r = task_cache_check (task, re)) == -1) {
- if (ct->type &&
- rspamd_regexp_search (re->regexp, ct->type, 0,
- NULL, NULL, TRUE)) {
- if (param_subtype) {
- if (compare_subtype (task, ct, param_subtype)) {
- if (compare_len (part, min_len, max_len)) {
- return TRUE;
- }
- }
- }
- else {
- if (compare_len (part, min_len, max_len)) {
- return TRUE;
- }
- }
- task_cache_add (task, re, 1);
- }
- else {
- task_cache_add (task, re, 0);
- }
- }
- else {
- if (r == 1) {
- if (compare_subtype (task, ct, param_subtype)) {
- if (compare_len (part, min_len, max_len)) {
- return TRUE;
- }
- }
- }
- }
- }
- else {
- /* Just do strcasecmp */
- if (ct->type && g_ascii_strcasecmp (ct->type, param_type) == 0) {
- if (param_subtype) {
- if (compare_subtype (task, ct, param_subtype)) {
- if (compare_len (part, min_len, max_len)) {
- return TRUE;
- }
- }
- }
- else {
- if (compare_len (part, min_len, max_len)) {
- return TRUE;
- }
- }
- }
- }
- cur = g_list_next (cur);
- }
-
- return FALSE;
-}
-
-static gboolean
-rspamd_has_content_part (struct rspamd_task * task, GList * args, void *unused)
-{
- gchar *param_type = NULL, *param_subtype = NULL;
- struct expression_argument *arg;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
- param_type = arg->data;
- args = args->next;
- if (args) {
- arg = args->data;
- param_subtype = arg->data;
- }
-
- return common_has_content_part (task, param_type, param_subtype, 0, 0);
-}
-
-static gboolean
-rspamd_has_content_part_len (struct rspamd_task * task,
- GList * args,
- void *unused)
-{
- gchar *param_type = NULL, *param_subtype = NULL;
- gint min = 0, max = 0;
- struct expression_argument *arg;
-
- if (args == NULL) {
- msg_warn ("no parameters to function");
- return FALSE;
- }
-
- arg = get_function_arg (args->data, task, TRUE);
- param_type = arg->data;
- args = args->next;
- if (args) {
- arg = get_function_arg (args->data, task, TRUE);
- param_subtype = arg->data;
- args = args->next;
- if (args) {
- arg = get_function_arg (args->data, task, TRUE);
- errno = 0;
- min = strtoul (arg->data, NULL, 10);
- if (errno != 0) {
- msg_warn ("invalid numeric value '%s': %s",
- (gchar *)arg->data,
- strerror (errno));
- return FALSE;
- }
- args = args->next;
- if (args) {
- arg = get_function_arg (args->data, task, TRUE);
- max = strtoul (arg->data, NULL, 10);
- if (errno != 0) {
- msg_warn ("invalid numeric value '%s': %s",
- (gchar *)arg->data,
- strerror (errno));
- return FALSE;
- }
- }
- }
- }
-
- return common_has_content_part (task, param_type, param_subtype, min, max);
}
diff --git a/src/plugins/spf.c b/src/plugins/spf.c
index 908e097ab..14c9e0b42 100644
--- a/src/plugins/spf.c
+++ b/src/plugins/spf.c
@@ -34,7 +34,6 @@
#include "config.h"
#include "libmime/message.h"
-#include "libmime/expressions.h"
#include "libserver/spf.h"
#include "libutil/hash.h"
#include "libutil/map.h"
diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c
index 09e99dec3..0401ff932 100644
--- a/src/plugins/surbl.c
+++ b/src/plugins/surbl.c
@@ -43,7 +43,6 @@
#include "config.h"
#include "libmime/message.h"
-#include "libmime/expressions.h"
#include "libutil/hash.h"
#include "libutil/map.h"
#include "main.h"