diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-04-21 16:25:51 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-04-21 16:25:51 +0100 |
commit | 61555065f3d1c8badcc9573691232f1b6e42988c (patch) | |
tree | 563d5b7cb8c468530f7e79c4da0a75267b1184e1 /src/libmime | |
parent | ad5bf825b7f33bc10311673991f0cc888e69c0b1 (diff) | |
download | rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.tar.gz rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.zip |
Rework project structure, remove trash files.
Diffstat (limited to 'src/libmime')
-rw-r--r-- | src/libmime/CMakeLists.txt | 29 | ||||
-rw-r--r-- | src/libmime/expressions.c | 1452 | ||||
-rw-r--r-- | src/libmime/expressions.h | 133 | ||||
-rw-r--r-- | src/libmime/filter.c | 1096 | ||||
-rw-r--r-- | src/libmime/filter.h | 167 | ||||
-rw-r--r-- | src/libmime/images.c | 255 | ||||
-rw-r--r-- | src/libmime/images.h | 33 | ||||
-rw-r--r-- | src/libmime/message.c | 1764 | ||||
-rw-r--r-- | src/libmime/message.h | 91 | ||||
-rw-r--r-- | src/libmime/protocol.c | 821 | ||||
-rw-r--r-- | src/libmime/protocol.h | 46 | ||||
-rw-r--r-- | src/libmime/smtp_proto.c | 701 | ||||
-rw-r--r-- | src/libmime/smtp_proto.h | 95 | ||||
-rw-r--r-- | src/libmime/smtp_utils.c | 362 | ||||
-rw-r--r-- | src/libmime/smtp_utils.h | 63 | ||||
-rw-r--r-- | src/libmime/worker_util.c | 255 |
16 files changed, 7363 insertions, 0 deletions
diff --git a/src/libmime/CMakeLists.txt b/src/libmime/CMakeLists.txt new file mode 100644 index 000000000..303b7a088 --- /dev/null +++ b/src/libmime/CMakeLists.txt @@ -0,0 +1,29 @@ +# Librspamd mime +SET(LIBRSPAMDMIMESRC + expressions.c + filter.c + images.c + message.c + protocol.c + smtp_utils.c + smtp_proto.c + worker_util.c) + +# Librspamdmime +ADD_LIBRARY(rspamd-mime ${LINK_TYPE} ${LIBRSPAMDMIMESRC}) +IF(NOT DEBIAN_BUILD) +SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES VERSION ${RSPAMD_VERSION}) +ENDIF(NOT DEBIAN_BUILD) +SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES LINKER_LANGUAGE C) +SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB") +TARGET_LINK_LIBRARIES(rspamd-mime rspamd-server) +TARGET_LINK_LIBRARIES(rspamd-mime rspamd-util) +IF(CMAKE_COMPILER_IS_GNUCC) +SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB -fno-strict-aliasing") +ENDIF(CMAKE_COMPILER_IS_GNUCC) + +IF(NO_SHARED MATCHES "OFF") + INSTALL(TARGETS rspamd-mime + LIBRARY DESTINATION ${LIBDIR} + PUBLIC_HEADER DESTINATION ${INCLUDEDIR}) +ENDIF(NO_SHARED MATCHES "OFF")
\ No newline at end of file diff --git a/src/libmime/expressions.c b/src/libmime/expressions.c new file mode 100644 index 000000000..5d19626bb --- /dev/null +++ b/src/libmime/expressions.c @@ -0,0 +1,1452 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "util.h" +#include "cfg_file.h" +#include "main.h" +#include "message.h" +#include "fuzzy.h" +#include "expressions.h" +#include "html.h" +#include "lua/lua_common.h" +#include "diff.h" + +gboolean rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_header_exists (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_parts_distance (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_recipients_distance (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_has_only_html_part (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_is_recipients_sorted (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_is_html_balanced (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_has_html_tag (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_has_fake_html (struct rspamd_task *task, GList * args, void *unused); + +/* + * List of internal functions of rspamd + * Sorted by name to use bsearch + */ +static struct _fl { + const gchar *name; + rspamd_internal_func_t func; + void *user_data; +} rspamd_functions_list[] = { + {"compare_encoding", rspamd_compare_encoding, NULL}, + {"compare_parts_distance", rspamd_parts_distance, NULL}, + {"compare_recipients_distance", rspamd_recipients_distance, NULL}, + {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL}, + {"has_fake_html", rspamd_has_fake_html, NULL}, + {"has_html_tag", rspamd_has_html_tag, NULL}, + {"has_only_html_part", rspamd_has_only_html_part, NULL}, + {"header_exists", rspamd_header_exists, NULL}, + {"is_html_balanced", rspamd_is_html_balanced, NULL}, + {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL} +}; + +static struct _fl *list_ptr = &rspamd_functions_list[0]; +static guint32 functions_number = sizeof (rspamd_functions_list) / sizeof (struct _fl); +static gboolean list_allocated = FALSE; + +/* Bsearch routine */ +static gint +fl_cmp (const void *s1, const void *s2) +{ + struct _fl *fl1 = (struct _fl *)s1; + struct _fl *fl2 = (struct _fl *)s2; + return strcmp (fl1->name, fl2->name); +} + +/* Cache for regular expressions that are used in functions */ +void * +re_cache_check (const gchar *line, rspamd_mempool_t *pool) +{ + GHashTable *re_cache; + + re_cache = rspamd_mempool_get_variable (pool, "re_cache"); + + if (re_cache == NULL) { + re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + rspamd_mempool_set_variable (pool, "re_cache", re_cache, (rspamd_mempool_destruct_t)g_hash_table_destroy); + return NULL; + } + return g_hash_table_lookup (re_cache, line); +} + +void +re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool) +{ + GHashTable *re_cache; + + re_cache = rspamd_mempool_get_variable (pool, "re_cache"); + + if (re_cache == NULL) { + re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + rspamd_mempool_set_variable (pool, "re_cache", re_cache, (rspamd_mempool_destruct_t)g_hash_table_destroy); + } + + g_hash_table_insert (re_cache, (gpointer)line, pointer); +} + +void +re_cache_del (const gchar *line, rspamd_mempool_t *pool) +{ + GHashTable *re_cache; + + re_cache = rspamd_mempool_get_variable (pool, "re_cache"); + + if (re_cache != NULL) { + g_hash_table_remove (re_cache, line); + } + +} + +/* + * Functions for parsing expressions + */ +struct expression_stack { + gchar op; + struct expression_stack *next; +}; + +/* + * Push operand or operator to stack + */ +static struct expression_stack * +push_expression_stack (rspamd_mempool_t * pool, struct expression_stack *head, gchar op) +{ + struct expression_stack *new; + new = rspamd_mempool_alloc (pool, sizeof (struct expression_stack)); + new->op = op; + new->next = head; + return new; +} + +/* + * Delete symbol from stack, return pointer to operand or operator (casted to void* ) + */ +static gchar +delete_expression_stack (struct expression_stack **head) +{ + struct expression_stack *cur; + gchar res; + + if (*head == NULL) + return 0; + + cur = *head; + res = cur->op; + + *head = cur->next; + return res; +} + +/* + * Return operation priority + */ +static gint +logic_priority (gchar a) +{ + switch (a) { + case '!': + return 3; + case '|': + case '&': + return 2; + case '(': + return 1; + default: + return 0; + } +} + +/* + * Return FALSE if symbol is not operation symbol (operand) + * Return TRUE if symbol is operation symbol + */ +static gboolean +is_operation_symbol (gchar *a) +{ + switch (*a) { + case '!': + case '&': + case '|': + case '(': + case ')': + return TRUE; + case 'O': + case 'o': + if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0&& g_ascii_isspace (a[2])) { + return TRUE; + } + break; + case 'A': + case 'a': + if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0&& g_ascii_isspace (a[3])) { + return TRUE; + } + break; + case 'N': + case 'n': + if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0 && g_ascii_isspace (a[3])) { + return TRUE; + } + break; + } + + return FALSE; +} + +/* Return character representation of operation */ +static gchar +op_to_char (gchar *a, gchar **next) +{ + switch (*a) { + case '!': + case '&': + case '|': + case '(': + case ')': + *next = a + 1; + return *a; + case 'O': + case 'o': + if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) { + *next = a + sizeof ("or") - 1; + return '|'; + } + break; + case 'A': + case 'a': + if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) { + *next = a + sizeof ("and") - 1; + return '&'; + } + break; + case 'N': + case 'n': + if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) { + *next = a + sizeof ("not") - 1; + return '!'; + } + break; + } + + return '\0'; +} + +/* + * Return TRUE if symbol can be regexp flag + */ +static gboolean +is_regexp_flag (gchar a) +{ + switch (a) { + case 'i': + case 'm': + case 'x': + case 's': + case 'u': + case 'o': + case 'r': + case 'H': + case 'M': + case 'P': + case 'U': + case 'X': + case 'T': + case 'S': + return TRUE; + default: + return FALSE; + } +} + +static void +insert_expression (rspamd_mempool_t * pool, struct expression **head, gint type, gchar op, void *operand, const gchar *orig) +{ + struct expression *new, *cur; + + new = rspamd_mempool_alloc (pool, sizeof (struct expression)); + new->type = type; + new->orig = orig; + if (new->type != EXPR_OPERATION) { + new->content.operand = operand; + } + else { + new->content.operation = op; + } + new->next = NULL; + + if (!*head) { + *head = new; + } + else { + cur = *head; + while (cur->next) { + cur = cur->next; + } + cur->next = new; + } +} + +static struct expression * +maybe_parse_expression (rspamd_mempool_t * pool, gchar *line) +{ + struct expression *expr; + gchar *p = line; + + while (*p) { + if (is_operation_symbol (p)) { + return parse_expression (pool, line); + } + p++; + } + + expr = rspamd_mempool_alloc (pool, sizeof (struct expression)); + expr->type = EXPR_STR; + expr->content.operand = rspamd_mempool_strdup (pool, line); + expr->next = NULL; + + return expr; +} + +/* + * Make inverse polish record for specified expression + * Memory is allocated from given pool + */ +struct expression * +parse_expression (rspamd_mempool_t * pool, gchar *line) +{ + struct expression *expr = NULL; + struct expression_stack *stack = NULL; + struct expression_function *func = NULL; + struct expression *arg; + GQueue *function_stack; + gchar *p, *c, *str, op, newop, *copy, *next; + gboolean in_regexp = FALSE; + gint brackets = 0; + + enum { + SKIP_SPACES, + READ_OPERATOR, + READ_REGEXP, + READ_REGEXP_FLAGS, + READ_FUNCTION, + READ_FUNCTION_ARGUMENT, + } state = SKIP_SPACES; + + if (line == NULL || pool == NULL) { + return NULL; + } + + msg_debug ("parsing expression {{ %s }}", line); + + function_stack = g_queue_new (); + copy = rspamd_mempool_strdup (pool, line); + p = line; + c = p; + while (*p) { + switch (state) { + case SKIP_SPACES: + if (!g_ascii_isspace (*p)) { + if (is_operation_symbol (p)) { + state = READ_OPERATOR; + } + else if (*p == '/') { + c = ++p; + state = READ_REGEXP; + } + else { + c = p; + state = READ_FUNCTION; + } + } + else { + p++; + } + break; + case READ_OPERATOR: + if (*p == ')') { + if (stack == NULL) { + return NULL; + } + /* Pop all operators from stack to nearest '(' or to head */ + while (stack && stack->op != '(') { + op = delete_expression_stack (&stack); + if (op != '(') { + insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy); + } + } + if (stack) { + op = delete_expression_stack (&stack); + } + } + else if (*p == '(') { + /* Push it to stack */ + stack = push_expression_stack (pool, stack, *p); + } + else { + if (stack == NULL) { + newop = op_to_char (p, &next); + if (newop != '\0') { + stack = push_expression_stack (pool, stack, newop); + p = next; + state = SKIP_SPACES; + continue; + } + } + /* Check priority of logic operation */ + else { + newop = op_to_char (p, &next); + if (newop != '\0') { + if (logic_priority (stack->op) < logic_priority (newop)) { + stack = push_expression_stack (pool, stack, newop); + } + else { + /* Pop all operations that have higher priority than this one */ + while ((stack != NULL) && (logic_priority (stack->op) >= logic_priority (newop))) { + op = delete_expression_stack (&stack); + if (op != '(') { + insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy); + } + } + stack = push_expression_stack (pool, stack, newop); + } + } + p = next; + state = SKIP_SPACES; + continue; + } + } + p++; + state = SKIP_SPACES; + break; + + case READ_REGEXP: + if (*p == '/' && *(p - 1) != '\\') { + if (*(p + 1)) { + p++; + } + state = READ_REGEXP_FLAGS; + } + else { + p++; + } + break; + + case READ_REGEXP_FLAGS: + if (!is_regexp_flag (*p) || *(p + 1) == '\0') { + if (c != p) { + if ((is_regexp_flag (*p) || *p == '/') && *(p + 1) == '\0') { + p++; + } + str = rspamd_mempool_alloc (pool, p - c + 2); + rspamd_strlcpy (str, c - 1, (p - c + 2)); + g_strstrip (str); + msg_debug ("found regexp: %s", str); + if (strlen (str) > 0) { + insert_expression (pool, &expr, EXPR_REGEXP, 0, str, copy); + } + } + c = p; + state = SKIP_SPACES; + } + else { + p++; + } + break; + + case READ_FUNCTION: + if (*p == '/') { + /* In fact it is regexp */ + state = READ_REGEXP; + c++; + p++; + } + else if (*p == '(') { + func = rspamd_mempool_alloc (pool, sizeof (struct expression_function)); + func->name = rspamd_mempool_alloc (pool, p - c + 1); + func->args = NULL; + rspamd_strlcpy (func->name, c, (p - c + 1)); + g_strstrip (func->name); + state = READ_FUNCTION_ARGUMENT; + g_queue_push_tail (function_stack, func); + insert_expression (pool, &expr, EXPR_FUNCTION, 0, func, copy); + c = ++p; + } + else if (is_operation_symbol (p)) { + /* In fact it is not function, but symbol */ + if (c != p) { + str = rspamd_mempool_alloc (pool, p - c + 1); + rspamd_strlcpy (str, c, (p - c + 1)); + g_strstrip (str); + if (strlen (str) > 0) { + insert_expression (pool, &expr, EXPR_STR, 0, str, copy); + } + } + state = READ_OPERATOR; + } + else if (*(p + 1) == '\0') { + /* In fact it is not function, but symbol */ + p++; + if (c != p) { + str = rspamd_mempool_alloc (pool, p - c + 1); + rspamd_strlcpy (str, c, (p - c + 1)); + g_strstrip (str); + if (strlen (str) > 0) { + insert_expression (pool, &expr, EXPR_STR, 0, str, copy); + } + } + state = SKIP_SPACES; + } + else { + p++; + } + break; + + case READ_FUNCTION_ARGUMENT: + if (*p == '/' && !in_regexp) { + in_regexp = TRUE; + p++; + } + if (!in_regexp) { + /* Append argument to list */ + if (*p == ',' || (*p == ')' && brackets == 0)) { + arg = NULL; + str = rspamd_mempool_alloc (pool, p - c + 1); + rspamd_strlcpy (str, c, (p - c + 1)); + g_strstrip (str); + /* Recursive call */ + arg = maybe_parse_expression (pool, str); + func->args = g_list_append (func->args, arg); + /* Pop function */ + if (*p == ')') { + /* Last function in chain, goto skipping spaces state */ + func = g_queue_pop_tail (function_stack); + if (g_queue_get_length (function_stack) == 0) { + state = SKIP_SPACES; + } + } + c = p + 1; + } + else if (*p == '(') { + brackets++; + } + else if (*p == ')') { + brackets--; + } + } + else if (*p == '/' && *(p - 1) != '\\') { + in_regexp = FALSE; + } + p++; + break; + } + } + + g_queue_free (function_stack); + if (state != SKIP_SPACES) { + /* In fact we got bad expression */ + msg_warn ("expression \"%s\" is invalid", line); + return NULL; + } + /* Pop everything from stack */ + while (stack != NULL) { + op = delete_expression_stack (&stack); + if (op != '(') { + insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy); + } + } + + return expr; +} + +/* + * Rspamd regexp utility functions + */ +struct rspamd_regexp * +parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode) +{ + const gchar *begin, *end, *p, *src, *start; + gchar *dbegin, *dend; + struct rspamd_regexp *result, *check; + gint regexp_flags = G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE; + GError *err = NULL; + + if (line == NULL) { + msg_err ("cannot parse NULL line"); + return NULL; + } + + src = line; + result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp)); + /* Skip whitespaces */ + while (g_ascii_isspace (*line)) { + line++; + } + if (*line == '\0') { + msg_warn ("got empty regexp"); + return NULL; + } + start = line; + /* First try to find header name */ + begin = strchr (line, '/'); + if (begin != NULL) { + p = begin; + end = NULL; + while (p != line) { + if (*p == '=') { + end = p; + break; + } + p --; + } + if (end) { + result->header = rspamd_mempool_alloc (pool, end - line + 1); + rspamd_strlcpy (result->header, line, end - line + 1); + result->type = REGEXP_HEADER; + line = end; + } + } + else { + result->header = rspamd_mempool_strdup (pool, line); + result->type = REGEXP_HEADER; + line = start; + } + /* Find begin of regexp */ + while (*line && *line != '/') { + line++; + } + if (*line != '\0') { + begin = line + 1; + } + else if (result->header == NULL) { + /* Assume that line without // is just a header name */ + result->header = rspamd_mempool_strdup (pool, line); + result->type = REGEXP_HEADER; + return result; + } + else { + /* We got header name earlier but have not found // expression, so it is invalid regexp */ + msg_warn ("got no header name (eg. header=) but without corresponding regexp, %s", src); + return NULL; + } + /* Find end */ + end = begin; + while (*end && (*end != '/' || *(end - 1) == '\\')) { + end++; + } + if (end == begin || *end != '/') { + msg_warn ("no trailing / in regexp %s", src); + return NULL; + } + /* Parse flags */ + p = end + 1; + while (p != NULL) { + switch (*p) { + case 'i': + regexp_flags |= G_REGEX_CASELESS; + p++; + break; + case 'm': + regexp_flags |= G_REGEX_MULTILINE; + p++; + break; + case 's': + regexp_flags |= G_REGEX_DOTALL; + p++; + break; + case 'x': + regexp_flags |= G_REGEX_EXTENDED; + p++; + break; + case 'u': + regexp_flags |= G_REGEX_UNGREEDY; + p++; + break; + case 'o': + regexp_flags |= G_REGEX_OPTIMIZE; + p++; + break; + case 'r': + regexp_flags |= G_REGEX_RAW; + result->is_raw = TRUE; + p++; + break; + /* Type flags */ + case 'H': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_HEADER; + } + p++; + break; + case 'M': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_MESSAGE; + } + p++; + break; + case 'P': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_MIME; + } + p++; + break; + case 'U': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_URL; + } + p++; + break; + case 'X': + if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) { + result->type = REGEXP_RAW_HEADER; + } + p++; + break; + case 'T': + result->is_test = TRUE; + p ++; + break; + case 'S': + result->is_strong = TRUE; + p ++; + break; + /* Stop flags parsing */ + default: + p = NULL; + break; + } + } + + result->regexp_text = rspamd_mempool_strdup (pool, start); + dbegin = result->regexp_text + (begin - start); + dend = result->regexp_text + (end - start); + *dend = '\0'; + + if (raw_mode) { + regexp_flags |= G_REGEX_RAW; + } + + /* Avoid multiply regexp structures for similar regexps */ + if ((check = (struct rspamd_regexp *)re_cache_check (result->regexp_text, pool)) != NULL) { + /* Additional check for headers */ + if (result->type == REGEXP_HEADER || result->type == REGEXP_RAW_HEADER) { + if (result->header && check->header) { + if (strcmp (result->header, check->header) == 0) { + return check; + } + } + } + else { + return check; + } + } + result->regexp = g_regex_new (dbegin, regexp_flags, 0, &err); + if ((regexp_flags & G_REGEX_RAW) != 0) { + result->raw_regexp = result->regexp; + } + else { + result->raw_regexp = g_regex_new (dbegin, regexp_flags | G_REGEX_RAW, 0, &err); + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_regex_unref, (void *)result->raw_regexp); + } + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_regex_unref, (void *)result->regexp); + + *dend = '/'; + + if (result->regexp == NULL || err != NULL) { + msg_warn ("could not read regexp: %s while reading regexp %s", err->message, src); + return NULL; + } + + if (result->raw_regexp == NULL || err != NULL) { + msg_warn ("could not read raw regexp: %s while reading regexp %s", err->message, src); + return NULL; + } + + /* Add to cache for further usage */ + re_cache_add (result->regexp_text, result, pool); + return result; +} + +gboolean +call_expression_function (struct expression_function * func, struct rspamd_task * task, lua_State *L) +{ + struct _fl *selected, key; + + key.name = func->name; + + selected = bsearch (&key, list_ptr, functions_number, sizeof (struct _fl), fl_cmp); + if (selected == NULL) { + /* Try to check lua function */ + return FALSE; + } + + return selected->func (task, func->args, selected->user_data); +} + +struct expression_argument * +get_function_arg (struct expression *expr, struct rspamd_task *task, gboolean want_string) +{ + GQueue *stack; + gsize cur, op1, op2; + struct expression_argument *res; + struct expression *it; + + if (expr == NULL) { + msg_warn ("NULL expression passed"); + return NULL; + } + if (expr->next == NULL) { + res = rspamd_mempool_alloc (task->task_pool, sizeof (struct expression_argument)); + if (expr->type == EXPR_REGEXP || expr->type == EXPR_STR || expr->type == EXPR_REGEXP_PARSED) { + res->type = EXPRESSION_ARGUMENT_NORMAL; + res->data = expr->content.operand; + } + else if (expr->type == EXPR_FUNCTION && !want_string) { + res->type = EXPRESSION_ARGUMENT_BOOL; + cur = call_expression_function (expr->content.operand, task, NULL); + res->data = GSIZE_TO_POINTER (cur); + } + else { + msg_warn ("cannot parse argument: it contains operator or bool expression that is not wanted"); + return NULL; + } + return res; + } + else if (!want_string) { + res = rspamd_mempool_alloc (task->task_pool, sizeof (struct expression_argument)); + res->type = EXPRESSION_ARGUMENT_BOOL; + stack = g_queue_new (); + it = expr; + + while (it) { + if (it->type == EXPR_REGEXP || it->type == EXPR_REGEXP_PARSED || it->type == EXPR_STR) { + g_queue_free (stack); + res->type = EXPRESSION_ARGUMENT_EXPR; + res->data = expr; + return res; + } + else if (it->type == EXPR_FUNCTION) { + cur = (gsize) call_expression_function ((struct expression_function *)it->content.operand, task, NULL); + debug_task ("function %s returned %s", ((struct expression_function *)it->content.operand)->name, cur ? "true" : "false"); + } + else if (it->type == EXPR_OPERATION) { + if (g_queue_is_empty (stack)) { + /* Queue has no operands for operation, exiting */ + debug_task ("invalid expression"); + g_queue_free (stack); + return NULL; + } + switch (it->content.operation) { + case '!': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op1 = !op1; + g_queue_push_head (stack, GSIZE_TO_POINTER (op1)); + break; + case '&': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); + break; + case '|': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); + break; + default: + it = it->next; + continue; + } + } + if (it) { + it = it->next; + } + } + if (!g_queue_is_empty (stack)) { + res->data = g_queue_pop_head (stack); + } + else { + res->data = GSIZE_TO_POINTER (FALSE); + } + + return res; + } + + msg_warn ("invalid expression argument"); + + return NULL; +} + +void +register_expression_function (const gchar *name, rspamd_internal_func_t func, void *user_data) +{ + static struct _fl *new; + + functions_number++; + + new = g_new (struct _fl, functions_number); + memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl)); + if (list_allocated) { + g_free (list_ptr); + } + + list_allocated = TRUE; + new[functions_number - 1].name = name; + new[functions_number - 1].func = func; + new[functions_number - 1].user_data = user_data; + qsort (new, functions_number, sizeof (struct _fl), fl_cmp); + list_ptr = new; +} + +gboolean +rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused) +{ + struct expression_argument *arg; + + if (args == NULL || task == NULL) { + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); + if (arg->type == EXPRESSION_ARGUMENT_BOOL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + + /* XXX: really write this function */ + return TRUE; +} + +gboolean +rspamd_header_exists (struct rspamd_task * task, GList * args, void *unused) +{ + struct expression_argument *arg; + GList *headerlist; + + if (args == NULL || task == NULL) { + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); + if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + + debug_task ("try to get header %s", (gchar *)arg->data); + headerlist = message_get_header (task->task_pool, task->message, (gchar *)arg->data, FALSE); + if (headerlist) { + g_list_free (headerlist); + return TRUE; + } + return FALSE; +} + +/* + * This function is designed to find difference between text/html and text/plain parts + * It takes one argument: difference threshold, if we have two text parts, compare + * its hashes and check for threshold, if value is greater than threshold, return TRUE + * and return FALSE otherwise. + */ +gboolean +rspamd_parts_distance (struct rspamd_task * task, GList * args, void *unused) +{ + gint threshold, threshold2 = -1, diff; + struct mime_text_part *p1, *p2; + GList *cur; + struct expression_argument *arg; + GMimeObject *parent; + const GMimeContentType *ct; + gint *pdiff; + + if (args == NULL) { + debug_task ("no threshold is specified, assume it 100"); + threshold = 100; + } + else { + errno = 0; + arg = get_function_arg (args->data, task, TRUE); + threshold = strtoul ((gchar *)arg->data, NULL, 10); + if (errno != 0) { + msg_info ("bad numeric value for threshold \"%s\", assume it 100", (gchar *)args->data); + threshold = 100; + } + if (args->next) { + arg = get_function_arg (args->next->data, task, TRUE); + errno = 0; + threshold2 = strtoul ((gchar *)arg->data, NULL, 10); + if (errno != 0) { + msg_info ("bad numeric value for threshold \"%s\", ignore it", (gchar *)arg->data); + threshold2 = -1; + } + } + } + + if ((pdiff = rspamd_mempool_get_variable (task->task_pool, "parts_distance")) != NULL) { + diff = *pdiff; + if (diff != -1) { + if (threshold2 > 0) { + if (diff >= MIN (threshold, threshold2) && diff < MAX (threshold, threshold2)) { + return TRUE; + } + } + else { + if (diff <= threshold) { + return TRUE; + } + } + return FALSE; + } + else { + return FALSE; + } + } + + if (g_list_length (task->text_parts) == 2) { + cur = g_list_first (task->text_parts); + p1 = cur->data; + cur = g_list_next (cur); + pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint)); + *pdiff = -1; + + if (cur == NULL) { + msg_info ("bad parts list"); + return FALSE; + } + p2 = cur->data; + /* First of all check parent object */ + if (p1->parent && p1->parent == p2->parent) { + parent = p1->parent; + ct = g_mime_object_get_content_type (parent); +#ifndef GMIME24 + if (ct == NULL || ! g_mime_content_type_is_type (ct, "multipart", "alternative")) { +#else + if (ct == NULL || ! g_mime_content_type_is_type ((GMimeContentType *)ct, "multipart", "alternative")) { +#endif + debug_task ("two parts are not belong to multipart/alternative container, skip check"); + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return FALSE; + } + } + else { + debug_task ("message contains two parts but they are in different multi-parts"); + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return FALSE; + } + if (!p1->is_empty && !p2->is_empty) { + if (p1->diff_str != NULL && p2->diff_str != NULL) { + diff = compare_diff_distance_normalized (p1->diff_str, p2->diff_str); + } + else { + diff = fuzzy_compare_parts (p1, p2); + } + debug_task ("got likeliness between parts of %d%%, threshold is %d%%", diff, threshold); + *pdiff = diff; + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + if (threshold2 > 0) { + if (diff >= MIN (threshold, threshold2) && diff < MAX (threshold, threshold2)) { + return TRUE; + } + } + else { + if (diff <= threshold) { + return TRUE; + } + } + } + else if ((p1->is_empty && !p2->is_empty) || (!p1->is_empty && p2->is_empty)) { + /* Empty and non empty parts are different */ + *pdiff = 0; + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return TRUE; + } + } + else { + debug_task ("message has too many text parts, so do not try to compare them with each other"); + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return FALSE; + } + + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return FALSE; +} + +struct addr_list { + const gchar *name; + const gchar *addr; +}; + +#define COMPARE_RCPT_LEN 3 +#define MIN_RCPT_TO_COMPARE 7 + +gboolean +rspamd_recipients_distance (struct rspamd_task *task, GList * args, void *unused) +{ + struct expression_argument *arg; + InternetAddressList *cur; + InternetAddress *addr; + double threshold; + struct addr_list *ar; + gchar *c; + gint num, i, j, hits = 0, total = 0; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); + errno = 0; + threshold = strtod ((gchar *)arg->data, NULL); + if (errno != 0) { + msg_warn ("invalid numeric value '%s': %s", (gchar *)arg->data, strerror (errno)); + return FALSE; + } + + if (!task->rcpts) { + return FALSE; + } + num = internet_address_list_length (task->rcpts); + if (num < MIN_RCPT_TO_COMPARE) { + return FALSE; + } + ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list)); + + /* Fill array */ + cur = task->rcpts; +#ifdef GMIME24 + for (i = 0; i < num; i ++) { + addr = internet_address_list_get_address (cur, i); + ar[i].name = rspamd_mempool_strdup (task->task_pool, internet_address_get_name (addr)); + if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) { + *c = '\0'; + ar[i].addr = c + 1; + } + } +#else + i = 0; + while (cur) { + addr = internet_address_list_get_address (cur); + if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) { + ar[i].name = rspamd_mempool_strdup (task->task_pool, internet_address_get_addr (addr)); + if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) { + *c = '\0'; + ar[i].addr = c + 1; + } + cur = internet_address_list_next (cur); + i++; + } + else { + cur = internet_address_list_next (cur); + } + } +#endif + + /* Cycle all elements in array */ + for (i = 0; i < num; i++) { + for (j = i + 1; j < num; j++) { + if (ar[i].name && ar[j].name && g_ascii_strncasecmp (ar[i].name, ar[j].name, COMPARE_RCPT_LEN) == 0) { + /* Common name part */ + hits++; + } + else if (ar[i].addr && ar[j].addr && g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) { + /* Common address part, but different name */ + hits++; + } + total++; + } + } + + if ((double)(hits * num / 2.) / (double)total >= threshold) { + return TRUE; + } + + return FALSE; +} + +gboolean +rspamd_has_only_html_part (struct rspamd_task * task, GList * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + gboolean res = FALSE; + + cur = g_list_first (task->text_parts); + while (cur) { + p = cur->data; + if (p->is_html) { + res = TRUE; + } + else { + res = FALSE; + break; + } + cur = g_list_next (cur); + } + + return res; +} + +static gboolean +is_recipient_list_sorted (const InternetAddressList * ia) +{ + const InternetAddressList *cur; + InternetAddress *addr; + gboolean res = TRUE; + struct addr_list current = { NULL, NULL }, previous = { + NULL, NULL}; +#ifdef GMIME24 + gint num, i; +#endif + + /* Do not check to short address lists */ + if (internet_address_list_length ((InternetAddressList *)ia) < MIN_RCPT_TO_COMPARE) { + return FALSE; + } +#ifdef GMIME24 + num = internet_address_list_length ((InternetAddressList *)ia); + cur = ia; + for (i = 0; i < num; i ++) { + addr = internet_address_list_get_address ((InternetAddressList *)cur, i); + current.addr = (gchar *)internet_address_get_name (addr); + if (previous.addr != NULL) { + if (current.addr && g_ascii_strcasecmp (current.addr, previous.addr) < 0) { + res = FALSE; + break; + } + } + previous.addr = current.addr; + } +#else + cur = ia; + while (cur) { + addr = internet_address_list_get_address (cur); + if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) { + current.addr = internet_address_get_addr (addr); + if (previous.addr != NULL) { + if (current.addr && g_ascii_strcasecmp (current.addr, previous.addr) < 0) { + res = FALSE; + break; + } + } + previous.addr = current.addr; + } + cur = internet_address_list_next (cur); + } +#endif + + return res; +} + +gboolean +rspamd_is_recipients_sorted (struct rspamd_task * task, GList * args, void *unused) +{ + /* Check all types of addresses */ + if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_TO)) == TRUE) { + return TRUE; + } + if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_BCC)) == TRUE) { + return TRUE; + } + if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_CC)) == TRUE) { + return TRUE; + } + + return FALSE; +} + +gboolean +rspamd_compare_transfer_encoding (struct rspamd_task * task, GList * args, void *unused) +{ + GMimeObject *part; +#ifndef GMIME24 + GMimePartEncodingType enc_req, part_enc; +#else + GMimeContentEncoding enc_req, part_enc; +#endif + struct expression_argument *arg; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); +#ifndef GMIME24 + enc_req = g_mime_part_encoding_from_string (arg->data); + if (enc_req == GMIME_PART_ENCODING_DEFAULT) { +#else + enc_req = g_mime_content_encoding_from_string (arg->data); + if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) { +#endif + msg_warn ("bad encoding type: %s", (gchar *)arg->data); + return FALSE; + } + + part = g_mime_message_get_mime_part (task->message); + if (part) { + if (GMIME_IS_PART (part)) { +#ifndef GMIME24 + part_enc = g_mime_part_get_encoding (GMIME_PART (part)); + if (part_enc == GMIME_PART_ENCODING_DEFAULT) { + /* Assume 7bit as default transfer encoding */ + part_enc = GMIME_PART_ENCODING_7BIT; + } +#else + part_enc = g_mime_part_get_content_encoding (GMIME_PART (part)); + if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) { + /* Assume 7bit as default transfer encoding */ + part_enc = GMIME_CONTENT_ENCODING_7BIT; + } +#endif + + + debug_task ("got encoding in part: %d and compare with %d", (gint)part_enc, (gint)enc_req); +#ifndef GMIME24 + g_object_unref (part); +#endif + + return part_enc == enc_req; + } +#ifndef GMIME24 + g_object_unref (part); +#endif + } + + return FALSE; +} + +gboolean +rspamd_is_html_balanced (struct rspamd_task * task, GList * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + gboolean res = TRUE; + + cur = g_list_first (task->text_parts); + while (cur) { + p = cur->data; + if (!p->is_empty && p->is_html) { + if (p->is_balanced) { + res = TRUE; + } + else { + res = FALSE; + break; + } + } + cur = g_list_next (cur); + } + + return res; + +} + +struct html_callback_data { + struct html_tag *tag; + gboolean *res; +}; + +static gboolean +search_html_node_callback (GNode * node, gpointer data) +{ + struct html_callback_data *cd = data; + struct html_node *nd; + + nd = node->data; + if (nd) { + if (nd->tag == cd->tag) { + *cd->res = TRUE; + return TRUE; + } + } + + return FALSE; +} + +gboolean +rspamd_has_html_tag (struct rspamd_task * task, GList * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + struct expression_argument *arg; + struct html_tag *tag; + gboolean res = FALSE; + struct html_callback_data cd; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); + tag = get_tag_by_name (arg->data); + if (tag == NULL) { + msg_warn ("unknown tag type passed as argument: %s", (gchar *)arg->data); + return FALSE; + } + + cur = g_list_first (task->text_parts); + cd.res = &res; + cd.tag = tag; + + while (cur && res == FALSE) { + p = cur->data; + if (!p->is_empty && p->is_html && p->html_nodes) { + g_node_traverse (p->html_nodes, G_PRE_ORDER, G_TRAVERSE_ALL, -1, search_html_node_callback, &cd); + } + cur = g_list_next (cur); + } + + return res; + +} + +gboolean +rspamd_has_fake_html (struct rspamd_task * task, GList * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + gboolean res = FALSE; + + cur = g_list_first (task->text_parts); + + while (cur && res == FALSE) { + p = cur->data; + if (!p->is_empty && p->is_html && p->html_nodes == NULL) { + res = TRUE; + } + cur = g_list_next (cur); + } + + return res; + +} + + +/* + * vi:ts=4 + */ diff --git a/src/libmime/expressions.h b/src/libmime/expressions.h new file mode 100644 index 000000000..954cc74f7 --- /dev/null +++ b/src/libmime/expressions.h @@ -0,0 +1,133 @@ +/** + * @file expressions.h + * Rspamd expressions API + */ + +#ifndef RSPAMD_EXPRESSIONS_H +#define RSPAMD_EXPRESSIONS_H + +#include "config.h" +#include <lua.h> + +struct rspamd_task; +struct rspamd_regexp; + +/** + * Rspamd expression function + */ +struct expression_function { + gchar *name; /**< name of function */ + GList *args; /**< its args */ +}; + +/** + * Function's argument + */ +struct expression_argument { + enum { + EXPRESSION_ARGUMENT_NORMAL, + EXPRESSION_ARGUMENT_BOOL, + EXPRESSION_ARGUMENT_EXPR, + } type; /**< type of argument (text or other function) */ + void *data; /**< pointer to its data */ +}; + +/** + * Logic expression + */ +struct expression { + enum { + EXPR_REGEXP, + EXPR_OPERATION, + EXPR_FUNCTION, + EXPR_STR, + EXPR_REGEXP_PARSED, + } type; /**< expression type */ + union { + void *operand; + gchar operation; + } content; /**< union for storing operand or operation code */ + const gchar *orig; /**< original line */ + struct expression *next; /**< chain link */ +}; + +typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *, GList *args, void *user_data); + +/** + * Parse regexp line to regexp structure + * @param pool memory pool to use + * @param line incoming line + * @return regexp structure or NULL in case of error + */ +struct rspamd_regexp* parse_regexp (rspamd_mempool_t *pool, const gchar *line, gboolean raw_mode); + +/** + * Parse composites line to composites structure (eg. "SYMBOL1&SYMBOL2|!SYMBOL3") + * @param pool memory pool to use + * @param line incoming line + * @return expression structure or NULL in case of error + */ +struct expression* parse_expression (rspamd_mempool_t *pool, gchar *line); + +/** + * Call specified fucntion and return boolean result + * @param func function to call + * @param task task object + * @param L lua specific state + * @return TRUE or FALSE depending on function result + */ +gboolean call_expression_function (struct expression_function *func, struct rspamd_task *task, lua_State *L); + +/** + * Register specified function to rspamd internal functions list + * @param name name of function + * @param func pointer to function + */ +void register_expression_function (const gchar *name, rspamd_internal_func_t func, void *user_data); + +/** + * Add regexp to regexp cache + * @param line symbolic representation + * @param pointer regexp data + */ +void re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool); + +/** + * Check regexp in cache + * @param line symbolic representation + * @return pointer to regexp data or NULL if regexp is not found + */ +void * re_cache_check (const gchar *line, rspamd_mempool_t *pool); + +/** + * Remove regexp from regexp cache + * @param line symbolic representation + */ +void re_cache_del (const gchar *line, rspamd_mempool_t *pool); + +/** + * Add regexp to regexp task cache + * @param task task object + * @param pointer regexp data + * @param result numeric result of this regexp + */ +void task_cache_add (struct rspamd_task *task, struct rspamd_regexp *re, gint32 result); + +/** + * Check regexp in cache + * @param task task object + * @param pointer regexp data + * @return numeric result if value exists or -1 if not + */ +gint32 task_cache_check (struct rspamd_task *task, struct rspamd_regexp *re); + +/** + * Parse and return a single function argument for a function (may recurse) + * @param expr expression structure that represents function's argument + * @param task task object + * @param want_string return NULL if argument is not a string + * @return expression argument structure or NULL if failed + */ +struct expression_argument *get_function_arg (struct expression *expr, struct rspamd_task *task, gboolean want_string); + +#endif diff --git a/src/libmime/filter.c b/src/libmime/filter.c new file mode 100644 index 000000000..cb0630d9d --- /dev/null +++ b/src/libmime/filter.c @@ -0,0 +1,1096 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "mem_pool.h" +#include "filter.h" +#include "main.h" +#include "message.h" +#include "cfg_file.h" +#include "util.h" +#include "expressions.h" +#include "settings.h" +#include "binlog.h" +#include "diff.h" +#include "classifiers/classifiers.h" +#include "tokenizers/tokenizers.h" + +#ifdef WITH_LUA +# include "lua/lua_common.h" +#endif + +#define COMMON_PART_FACTOR 95 + +#ifndef PARAM_H_HAS_BITSET +/* Bit map related macros. */ +#define NBBY 8 /* number of bits in a byte */ +#define setbit(a,i) (((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY)) +#define clrbit(a,i) (((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY))) +#define isset(a,i) \ + (((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) +#define isclr(a,i) \ + ((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0) +#endif +#define BITSPERBYTE (8*sizeof (gchar)) +#define NBYTES(nbits) (((nbits) + BITSPERBYTE - 1) / BITSPERBYTE) + +static inline GQuark +filter_error_quark (void) +{ + return g_quark_from_static_string ("g-filter-error-quark"); +} + +static void +insert_metric_result (struct rspamd_task *task, struct metric *metric, const gchar *symbol, + double flag, GList * opts, gboolean single) +{ + struct metric_result *metric_res; + struct symbol *s; + gdouble *weight, w; + + metric_res = g_hash_table_lookup (task->results, metric->name); + + if (metric_res == NULL) { + /* Create new metric chain */ + metric_res = rspamd_mempool_alloc (task->task_pool, sizeof (struct metric_result)); + metric_res->symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + metric_res->checked = FALSE; + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_hash_table_unref, metric_res->symbols); + metric_res->metric = metric; + metric_res->grow_factor = 0; + metric_res->score = 0; + metric_res->domain_settings = NULL; + metric_res->user_settings = NULL; + apply_metric_settings (task, metric, metric_res); + g_hash_table_insert (task->results, (gpointer) metric->name, metric_res); + } + + weight = g_hash_table_lookup (metric->symbols, symbol); + if (weight == NULL) { + w = 0.0; + } + else { + w = (*weight) * flag; + } + + + /* Add metric score */ + if ((s = g_hash_table_lookup (metric_res->symbols, symbol)) != NULL) { + if (s->options && opts && opts != s->options) { + /* Append new options */ + s->options = g_list_concat (s->options, g_list_copy(opts)); + /* + * Note that there is no need to add new destructor of GList as elements of appended + * GList are used directly, so just free initial GList + */ + } + else if (opts) { + s->options = g_list_copy (opts); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_list_free, s->options); + } + if (!single) { + /* Handle grow factor */ + if (metric_res->grow_factor && w > 0) { + w *= metric_res->grow_factor; + metric_res->grow_factor *= metric->grow_factor; + } + s->score += w; + metric_res->score += w; + } + else { + if (fabs (s->score) < fabs (w)) { + /* Replace less weight with a bigger one */ + metric_res->score = metric_res->score - s->score + w; + s->score = w; + } + } + } + else { + s = rspamd_mempool_alloc (task->task_pool, sizeof (struct symbol)); + + /* Handle grow factor */ + if (metric_res->grow_factor && w > 0) { + w *= metric_res->grow_factor; + metric_res->grow_factor *= metric->grow_factor; + } + else if (w > 0) { + metric_res->grow_factor = metric->grow_factor; + } + + s->score = w; + s->name = symbol; + metric_res->score += w; + + if (opts) { + s->options = g_list_copy (opts); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_list_free, s->options); + } + else { + s->options = NULL; + } + + g_hash_table_insert (metric_res->symbols, (gpointer) symbol, s); + } + debug_task ("symbol %s, score %.2f, metric %s, factor: %f", symbol, s->score, metric->name, w); + +} + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) +static GStaticMutex result_mtx = G_STATIC_MUTEX_INIT; +#else +G_LOCK_DEFINE (result_mtx); +#endif + +static void +insert_result_common (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts, gboolean single) +{ + struct metric *metric; + struct cache_item *item; + GList *cur, *metric_list; + + /* Avoid concurrenting inserting of results */ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + g_static_mutex_lock (&result_mtx); +#else + G_LOCK (result_mtx); +#endif + metric_list = g_hash_table_lookup (task->cfg->metrics_symbols, symbol); + if (metric_list) { + cur = metric_list; + + while (cur) { + metric = cur->data; + insert_metric_result (task, metric, symbol, flag, opts, single); + cur = g_list_next (cur); + } + } + else { + /* Insert symbol to default metric */ + insert_metric_result (task, task->cfg->default_metric, symbol, flag, opts, single); + } + + /* Process cache item */ + if (task->cfg->cache) { + item = g_hash_table_lookup (task->cfg->cache->items_by_symbol, symbol); + if (item != NULL) { + item->s->frequency++; + } + } + + if (opts != NULL) { + /* XXX: it is not wise to destroy them here */ + g_list_free (opts); + } +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + g_static_mutex_unlock (&result_mtx); +#else + G_UNLOCK (result_mtx); +#endif +} + +/* Insert result that may be increased on next insertions */ +void +insert_result (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts) +{ + insert_result_common (task, symbol, flag, opts, task->cfg->one_shot_mode); +} + +/* Insert result as a single option */ +void +insert_result_single (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts) +{ + insert_result_common (task, symbol, flag, opts, TRUE); +} + +/* Return true if metric has score that is more than spam score for it */ +static gboolean +check_metric_is_spam (struct rspamd_task *task, struct metric *metric) +{ + struct metric_result *res; + double ms, rs; + + /* Avoid concurrency while checking results */ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + g_static_mutex_lock (&result_mtx); +#else + G_LOCK (result_mtx); +#endif + res = g_hash_table_lookup (task->results, metric->name); + if (res) { +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + g_static_mutex_unlock (&result_mtx); +#else + G_UNLOCK (result_mtx); +#endif + if (!check_metric_settings (res, &ms, &rs)) { + ms = metric->actions[METRIC_ACTION_REJECT].score; + } + return (ms > 0 && res->score >= ms); + } + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + g_static_mutex_unlock (&result_mtx); +#else + G_UNLOCK (result_mtx); +#endif + + return FALSE; +} + +gint +process_filters (struct rspamd_task *task) +{ + GList *cur; + struct metric *metric; + gpointer item = NULL; + + /* Process metrics symbols */ + while (call_symbol_callback (task, task->cfg->cache, &item)) { + /* Check reject actions */ + cur = task->cfg->metrics_list; + while (cur) { + metric = cur->data; + if (!task->pass_all_filters && + metric->actions[METRIC_ACTION_REJECT].score > 0 && + check_metric_is_spam (task, metric)) { + task->state = WRITE_REPLY; + return 1; + } + cur = g_list_next (cur); + } + } + + task->state = WAIT_FILTER; + + return 1; +} + + +struct composites_data { + struct rspamd_task *task; + struct metric_result *metric_res; + GTree *symbols_to_remove; + guint8 *checked; +}; + +struct symbol_remove_data { + struct symbol *ms; + gboolean remove_weight; + gboolean remove_symbol; +}; + +static gint +remove_compare_data (gconstpointer a, gconstpointer b) +{ + const gchar *ca = a, *cb = b; + + return strcmp (ca, cb); +} + +static void +composites_foreach_callback (gpointer key, gpointer value, void *data) +{ + struct composites_data *cd = (struct composites_data *)data; + struct rspamd_composite *composite = value, *ncomp; + struct expression *expr; + GQueue *stack; + GList *symbols = NULL, *s; + gsize cur, op1, op2; + gchar logbuf[256], *sym, *check_sym; + gint r; + struct symbol *ms; + struct symbol_remove_data *rd; + + + expr = composite->expr; + if (isset (cd->checked, composite->id)) { + /* Symbol was already checked */ + return; + } + + stack = g_queue_new (); + + while (expr) { + if (expr->type == EXPR_STR) { + /* Find corresponding symbol */ + sym = expr->content.operand; + if (*sym == '~' || *sym == '-') { + sym ++; + } + if (g_hash_table_lookup (cd->metric_res->symbols, sym) == NULL) { + cur = 0; + if ((ncomp = g_hash_table_lookup (cd->task->cfg->composite_symbols, sym)) != NULL) { + /* Set checked for this symbol to avoid cyclic references */ + if (isclr (cd->checked, ncomp->id)) { + setbit (cd->checked, composite->id); + composites_foreach_callback (sym, ncomp, cd); + if (g_hash_table_lookup (cd->metric_res->symbols, sym) != NULL) { + cur = 1; + } + } + } + } + else { + cur = 1; + symbols = g_list_prepend (symbols, expr->content.operand); + } + g_queue_push_head (stack, GSIZE_TO_POINTER (cur)); + } + else { + if (g_queue_is_empty (stack)) { + /* Queue has no operands for operation, exiting */ + g_list_free (symbols); + g_queue_free (stack); + setbit (cd->checked, composite->id); + return; + } + switch (expr->content.operation) { + case '!': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op1 = !op1; + g_queue_push_head (stack, GSIZE_TO_POINTER (op1)); + break; + case '&': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); + break; + case '|': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); + break; + default: + expr = expr->next; + continue; + } + } + expr = expr->next; + } + if (!g_queue_is_empty (stack)) { + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + if (op1) { + /* Remove all symbols that are in composite symbol */ + s = g_list_first (symbols); + r = rspamd_snprintf (logbuf, sizeof (logbuf), "<%s>, insert symbol %s instead of symbols: ", cd->task->message_id, key); + while (s) { + sym = s->data; + if (*sym == '~' || *sym == '-') { + check_sym = sym + 1; + } + else { + check_sym = sym; + } + ms = g_hash_table_lookup (cd->metric_res->symbols, check_sym); + + if (ms == NULL) { + /* Try to process other composites */ + if ((ncomp = g_hash_table_lookup (cd->task->cfg->composite_symbols, check_sym)) != NULL) { + /* Set checked for this symbol to avoid cyclic references */ + if (isclr (cd->checked, ncomp->id)) { + setbit (cd->checked, composite->id); + composites_foreach_callback (check_sym, ncomp, cd); + ms = g_hash_table_lookup (cd->metric_res->symbols, check_sym); + } + } + } + + if (ms != NULL) { + rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (struct symbol_remove_data)); + rd->ms = ms; + if (G_UNLIKELY (*sym == '~')) { + rd->remove_weight = FALSE; + rd->remove_symbol = TRUE; + } + else if (G_UNLIKELY (*sym == '-')) { + rd->remove_symbol = FALSE; + rd->remove_weight = FALSE; + } + else { + rd->remove_symbol = TRUE; + rd->remove_weight = TRUE; + } + if (!g_tree_lookup (cd->symbols_to_remove, rd)) { + g_tree_insert (cd->symbols_to_remove, (gpointer)ms->name, rd); + } + } + else { + + } + + if (s->next) { + r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s, ", s->data); + } + else { + r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s", s->data); + } + s = g_list_next (s); + } + /* Add new symbol */ + insert_result_single (cd->task, key, 1.0, NULL); + msg_info ("%s", logbuf); + } + } + + setbit (cd->checked, composite->id); + g_queue_free (stack); + g_list_free (symbols); + + return; +} + +static gboolean +check_autolearn (struct statfile_autolearn_params *params, struct rspamd_task *task) +{ + gchar *metric_name = DEFAULT_METRIC; + struct metric_result *metric_res; + GList *cur; + + if (params->metric != NULL) { + metric_name = (gchar *)params->metric; + } + + /* First check threshold */ + metric_res = g_hash_table_lookup (task->results, metric_name); + if (metric_res == NULL) { + if (params->symbols == NULL && params->threshold_max > 0) { + /* For ham messages */ + return TRUE; + } + debug_task ("metric %s has no results", metric_name); + return FALSE; + } + else { + /* Process score of metric */ + if ((params->threshold_min != 0 && metric_res->score > params->threshold_min) || (params->threshold_max != 0 && metric_res->score < params->threshold_max)) { + /* Now check for specific symbols */ + if (params->symbols) { + cur = params->symbols; + while (cur) { + if (g_hash_table_lookup (metric_res->symbols, cur->data) == NULL) { + return FALSE; + } + cur = g_list_next (cur); + } + } + /* Now allow processing of actual autolearn */ + return TRUE; + } + } + + return FALSE; +} + +void +process_autolearn (struct statfile *st, struct rspamd_task *task, GTree * tokens, struct classifier *classifier, gchar *filename, struct classifier_ctx *ctx) +{ + stat_file_t *statfile; + struct statfile *unused; + + if (check_autolearn (st->autolearn, task)) { + if (tokens) { + /* Take care of subject */ + tokenize_subject (task, &tokens); + msg_info ("message with id <%s> autolearned statfile '%s'", task->message_id, filename); + + /* Get or create statfile */ + statfile = get_statfile_by_symbol (task->worker->srv->statfile_pool, ctx->cfg, + st->symbol, &unused, TRUE); + + if (statfile == NULL) { + return; + } + + classifier->learn_func (ctx, task->worker->srv->statfile_pool, st->symbol, tokens, TRUE, NULL, 1., NULL); + maybe_write_binlog (ctx->cfg, st, statfile, tokens); + statfile_pool_plan_invalidate (task->worker->srv->statfile_pool, DEFAULT_STATFILE_INVALIDATE_TIME, DEFAULT_STATFILE_INVALIDATE_JITTER); + } + } +} + +static gboolean +composites_remove_symbols (gpointer key, gpointer value, gpointer data) +{ + struct composites_data *cd = data; + struct symbol_remove_data *rd = value; + + if (rd->remove_symbol) { + g_hash_table_remove (cd->metric_res->symbols, key); + } + if (rd->remove_weight) { + cd->metric_res->score -= rd->ms->score; + } + + return FALSE; +} + +static void +composites_metric_callback (gpointer key, gpointer value, gpointer data) +{ + struct rspamd_task *task = (struct rspamd_task *)data; + struct composites_data *cd = rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data)); + struct metric_result *metric_res = (struct metric_result *)value; + + cd->task = task; + cd->metric_res = (struct metric_result *)metric_res; + cd->symbols_to_remove = g_tree_new (remove_compare_data); + cd->checked = rspamd_mempool_alloc0 (task->task_pool, NBYTES (g_hash_table_size (task->cfg->composite_symbols))); + + /* Process hash table */ + g_hash_table_foreach (task->cfg->composite_symbols, composites_foreach_callback, cd); + + /* Remove symbols that are in composites */ + g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd); + /* Free list */ + g_tree_destroy (cd->symbols_to_remove); +} + +void +make_composites (struct rspamd_task *task) +{ + g_hash_table_foreach (task->results, composites_metric_callback, task); +} + +struct classifiers_cbdata { + struct rspamd_task *task; + struct lua_locked_state *nL; +}; + +static void +classifiers_callback (gpointer value, void *arg) +{ + struct classifiers_cbdata *cbdata = arg; + struct rspamd_task *task; + struct classifier_config *cl = value; + struct classifier_ctx *ctx; + struct mime_text_part *text_part, *p1, *p2; + struct statfile *st; + GTree *tokens = NULL; + GList *cur; + f_str_t c; + gchar *header = NULL; + gint *dist = NULL, diff; + gboolean is_twopart = FALSE; + + task = cbdata->task; + + if ((header = g_hash_table_lookup (cl->opts, "header")) != NULL) { + cur = message_get_header (task->task_pool, task->message, header, FALSE); + if (cur) { + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, cur); + } + } + else { + cur = g_list_first (task->text_parts); + dist = rspamd_mempool_get_variable (task->task_pool, "parts_distance"); + if (cur != NULL && cur->next != NULL && cur->next->next == NULL) { + is_twopart = TRUE; + } + } + ctx = cl->classifier->init_func (task->task_pool, cl); + + if ((tokens = g_hash_table_lookup (task->tokens, cl->tokenizer)) == NULL) { + while (cur != NULL) { + if (header) { + c.len = strlen (cur->data); + if (c.len > 0) { + c.begin = cur->data; + if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE, FALSE, NULL)) { + msg_info ("cannot tokenize input"); + return; + } + } + } + else { + text_part = (struct mime_text_part *)cur->data; + if (text_part->is_empty) { + cur = g_list_next (cur); + continue; + } + if (dist != NULL && cur->next == NULL) { + /* Compare part's content */ + + if (*dist >= COMMON_PART_FACTOR) { + msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id); + break; + } + } + else if (cur->next == NULL && is_twopart) { + p1 = cur->prev->data; + p2 = text_part; + if (p1->diff_str != NULL && p2->diff_str != NULL) { + diff = compare_diff_distance (p1->diff_str, p2->diff_str); + } + else { + diff = fuzzy_compare_parts (p1, p2); + } + if (diff >= COMMON_PART_FACTOR) { + msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id); + break; + } + } + c.begin = (gchar *)text_part->content->data; + c.len = text_part->content->len; + /* Tree would be freed at task pool freeing */ + if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, + FALSE, text_part->is_utf, text_part->urls_offset)) { + msg_info ("cannot tokenize input"); + return; + } + } + cur = g_list_next (cur); + } + g_hash_table_insert (task->tokens, cl->tokenizer, tokens); + } + + /* Take care of subject */ + tokenize_subject (task, &tokens); + + if (tokens == NULL) { + return; + } + + if (cbdata->nL != NULL) { + rspamd_mutex_lock (cbdata->nL->m); + cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task, cbdata->nL->L); + rspamd_mutex_unlock (cbdata->nL->m); + } + else { + /* Non-threaded case */ + cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task, task->cfg->lua_state); + } + + /* Autolearning */ + cur = g_list_first (cl->statfiles); + while (cur) { + st = cur->data; + if (st->autolearn) { + if (check_autolearn (st->autolearn, task)) { + /* Process autolearn */ + process_autolearn (st, task, tokens, cl->classifier, st->path, ctx); + } + } + cur = g_list_next (cur); + } +} + + +void +process_statfiles (struct rspamd_task *task) +{ + struct classifiers_cbdata cbdata; + + if (task->is_skipped) { + return; + } + + if (task->tokens == NULL) { + task->tokens = g_hash_table_new (g_direct_hash, g_direct_equal); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_hash_table_unref, task->tokens); + } + cbdata.task = task; + cbdata.nL = NULL; + g_list_foreach (task->cfg->classifiers, classifiers_callback, &cbdata); + + /* Process results */ + make_composites (task); +} + +void +process_statfiles_threaded (gpointer data, gpointer user_data) +{ + struct rspamd_task *task = (struct rspamd_task *)data; + struct lua_locked_state *nL = user_data; + struct classifiers_cbdata cbdata; + + if (task->is_skipped) { + remove_async_thread (task->s); + return; + } + + if (task->tokens == NULL) { + task->tokens = g_hash_table_new (g_direct_hash, g_direct_equal); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_hash_table_unref, task->tokens); + } + + cbdata.task = task; + cbdata.nL = nL; + g_list_foreach (task->cfg->classifiers, classifiers_callback, &cbdata); + remove_async_thread (task->s); +} + +static void +insert_metric_header (gpointer metric_name, gpointer metric_value, gpointer data) +{ +#ifndef GLIB_HASH_COMPAT + struct rspamd_task *task = (struct rspamd_task *)data; + gint r = 0; + /* Try to be rfc2822 compatible and avoid long headers with folding */ + gchar header_name[128], outbuf[1000]; + GList *symbols = NULL, *cur; + struct metric_result *metric_res = (struct metric_result *)metric_value; + double ms, rs; + + rspamd_snprintf (header_name, sizeof (header_name), "X-Spam-%s", metric_res->metric->name); + + if (!check_metric_settings (metric_res, &ms, &rs)) { + ms = metric_res->metric->actions[METRIC_ACTION_REJECT].score; + } + if (ms > 0 && metric_res->score >= ms) { + r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "yes; %.2f/%.2f/%.2f; ", metric_res->score, ms, rs); + } + else { + r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "no; %.2f/%.2f/%.2f; ", metric_res->score, ms, rs); + } + + symbols = g_hash_table_get_keys (metric_res->symbols); + cur = symbols; + while (cur) { + if (g_list_next (cur) != NULL) { + r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "%s,", (gchar *)cur->data); + } + else { + r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "%s", (gchar *)cur->data); + } + cur = g_list_next (cur); + } + g_list_free (symbols); +#ifdef GMIME24 + g_mime_object_append_header (GMIME_OBJECT (task->message), header_name, outbuf); +#else + g_mime_message_add_header (task->message, header_name, outbuf); +#endif + +#endif /* GLIB_COMPAT */ +} + +void +insert_headers (struct rspamd_task *task) +{ + g_hash_table_foreach (task->results, insert_metric_header, task); +} + +gboolean +check_action_str (const gchar *data, gint *result) +{ + if (g_ascii_strncasecmp (data, "reject", sizeof ("reject") - 1) == 0) { + *result = METRIC_ACTION_REJECT; + } + else if (g_ascii_strncasecmp (data, "greylist", sizeof ("greylist") - 1) == 0) { + *result = METRIC_ACTION_GREYLIST; + } + else if (g_ascii_strncasecmp (data, "add_header", sizeof ("add_header") - 1) == 0) { + *result = METRIC_ACTION_ADD_HEADER; + } + else if (g_ascii_strncasecmp (data, "rewrite_subject", sizeof ("rewrite_subject") - 1) == 0) { + *result = METRIC_ACTION_REWRITE_SUBJECT; + } + else { + return FALSE; + } + return TRUE; +} + +const gchar * +str_action_metric (enum rspamd_metric_action action) +{ + switch (action) { + case METRIC_ACTION_REJECT: + return "reject"; + case METRIC_ACTION_SOFT_REJECT: + return "soft_reject"; + case METRIC_ACTION_REWRITE_SUBJECT: + return "rewrite_subject"; + case METRIC_ACTION_ADD_HEADER: + return "add_header"; + case METRIC_ACTION_GREYLIST: + return "greylist"; + case METRIC_ACTION_NOACTION: + return "no_action"; + case METRIC_ACTION_MAX: + return "invalid max action"; + } + + return "unknown action"; +} + +gint +check_metric_action (double score, double required_score, struct metric *metric) +{ + struct metric_action *action, *selected_action = NULL; + double max_score = 0; + int i; + + if (score >= required_score) { + return METRIC_ACTION_REJECT; + } + else if (metric->actions == NULL) { + return METRIC_ACTION_NOACTION; + } + else { + for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { + action = &metric->actions[i]; + if (action->score < 0) { + continue; + } + if (score >= action->score && action->score > max_score) { + selected_action = action; + max_score = action->score; + } + } + if (selected_action) { + return selected_action->action; + } + else { + return METRIC_ACTION_NOACTION; + } + } +} + +gboolean +learn_task (const gchar *statfile, struct rspamd_task *task, GError **err) +{ + GList *cur, *ex; + struct classifier_config *cl; + struct classifier_ctx *cls_ctx; + gchar *s; + f_str_t c; + GTree *tokens = NULL; + struct statfile *st; + stat_file_t *stf; + gdouble sum; + struct mime_text_part *part, *p1, *p2; + gboolean is_utf = FALSE, is_twopart = FALSE; + gint diff; + + + /* Load classifier by symbol */ + cl = g_hash_table_lookup (task->cfg->classifiers_symbols, statfile); + if (cl == NULL) { + g_set_error (err, filter_error_quark(), 1, "Statfile %s is not configured in any classifier", statfile); + return FALSE; + } + + /* If classifier has 'header' option just classify header of this type */ + if ((s = g_hash_table_lookup (cl->opts, "header")) != NULL) { + cur = message_get_header (task->task_pool, task->message, s, FALSE); + if (cur) { + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, cur); + } + } + else { + /* Classify message otherwise */ + cur = g_list_first (task->text_parts); + if (cur != NULL && cur->next != NULL && cur->next->next == NULL) { + is_twopart = TRUE; + } + } + + /* Get tokens from each element */ + while (cur) { + if (s != NULL) { + c.len = strlen (cur->data); + c.begin = cur->data; + ex = NULL; + } + else { + part = cur->data; + /* Skip empty parts */ + if (part->is_empty) { + cur = g_list_next (cur); + continue; + } + c.begin = (gchar *)part->content->data; + c.len = part->content->len; + is_utf = part->is_utf; + ex = part->urls_offset; + if (is_twopart && cur->next == NULL) { + /* Compare part's content */ + p1 = cur->prev->data; + p2 = part; + if (p1->diff_str != NULL && p2->diff_str != NULL) { + diff = compare_diff_distance (p1->diff_str, p2->diff_str); + } + else { + diff = fuzzy_compare_parts (p1, p2); + } + if (diff >= COMMON_PART_FACTOR) { + msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id); + break; + } + } + } + /* Get tokens */ + if (!cl->tokenizer->tokenize_func ( + cl->tokenizer, task->task_pool, + &c, &tokens, FALSE, is_utf, ex)) { + g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message"); + return FALSE; + } + cur = g_list_next (cur); + } + + /* Handle messages without text */ + if (tokens == NULL) { + g_set_error (err, filter_error_quark(), 3, "Cannot tokenize message, no text data"); + msg_info ("learn failed for message <%s>, no tokens to extract", task->message_id); + return FALSE; + } + + /* Take care of subject */ + tokenize_subject (task, &tokens); + + /* Init classifier */ + cls_ctx = cl->classifier->init_func ( + task->task_pool, cl); + /* Get or create statfile */ + stf = get_statfile_by_symbol (task->worker->srv->statfile_pool, + cl, statfile, &st, TRUE); + + /* Learn */ + if (stf== NULL || !cl->classifier->learn_func ( + cls_ctx, task->worker->srv->statfile_pool, + statfile, tokens, TRUE, &sum, + 1.0, err)) { + if (*err) { + msg_info ("learn failed for message <%s>, learn error: %s", task->message_id, (*err)->message); + return FALSE; + } + else { + g_set_error (err, filter_error_quark(), 4, "Learn failed, unknown learn classifier error"); + msg_info ("learn failed for message <%s>, unknown learn error", task->message_id); + return FALSE; + } + } + /* Increase statistics */ + task->worker->srv->stat->messages_learned++; + + maybe_write_binlog (cl, st, stf, tokens); + msg_info ("learn success for message <%s>, for statfile: %s, sum weight: %.2f", + task->message_id, statfile, sum); + statfile_pool_plan_invalidate (task->worker->srv->statfile_pool, + DEFAULT_STATFILE_INVALIDATE_TIME, + DEFAULT_STATFILE_INVALIDATE_JITTER); + + return TRUE; +} + +gboolean +learn_task_spam (struct classifier_config *cl, struct rspamd_task *task, gboolean is_spam, GError **err) +{ + GList *cur, *ex; + struct classifier_ctx *cls_ctx; + f_str_t c; + GTree *tokens = NULL; + struct mime_text_part *part, *p1, *p2; + gboolean is_utf = FALSE, is_twopart = FALSE; + gint diff; + + cur = g_list_first (task->text_parts); + if (cur != NULL && cur->next != NULL && cur->next->next == NULL) { + is_twopart = TRUE; + } + + /* Get tokens from each element */ + while (cur) { + part = cur->data; + /* Skip empty parts */ + if (part->is_empty) { + cur = g_list_next (cur); + continue; + } + c.begin = (gchar *)part->content->data; + c.len = part->content->len; + is_utf = part->is_utf; + ex = part->urls_offset; + if (is_twopart && cur->next == NULL) { + /* + * Compare part's content + * Note: here we don't have filters proceeded this message, so using pool variable is a bad idea + */ + p1 = cur->prev->data; + p2 = part; + if (p1->diff_str != NULL && p2->diff_str != NULL) { + diff = compare_diff_distance (p1->diff_str, p2->diff_str); + } + else { + diff = fuzzy_compare_parts (p1, p2); + } + if (diff >= COMMON_PART_FACTOR) { + msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id); + break; + } + } + /* Get tokens */ + if (!cl->tokenizer->tokenize_func ( + cl->tokenizer, task->task_pool, + &c, &tokens, FALSE, is_utf, ex)) { + g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message"); + return FALSE; + } + cur = g_list_next (cur); + } + + /* Handle messages without text */ + if (tokens == NULL) { + g_set_error (err, filter_error_quark(), 3, "Cannot tokenize message, no text data"); + msg_info ("learn failed for message <%s>, no tokens to extract", task->message_id); + return FALSE; + } + + /* Take care of subject */ + tokenize_subject (task, &tokens); + + /* Init classifier */ + cls_ctx = cl->classifier->init_func ( + task->task_pool, cl); + /* Learn */ + if (!cl->classifier->learn_spam_func ( + cls_ctx, task->worker->srv->statfile_pool, + tokens, task, is_spam, task->cfg->lua_state, err)) { + if (*err) { + msg_info ("learn failed for message <%s>, learn error: %s", task->message_id, (*err)->message); + return FALSE; + } + else { + g_set_error (err, filter_error_quark(), 4, "Learn failed, unknown learn classifier error"); + msg_info ("learn failed for message <%s>, unknown learn error", task->message_id); + return FALSE; + } + } + /* Increase statistics */ + task->worker->srv->stat->messages_learned++; + + msg_info ("learn success for message <%s>", + task->message_id); + statfile_pool_plan_invalidate (task->worker->srv->statfile_pool, + DEFAULT_STATFILE_INVALIDATE_TIME, + DEFAULT_STATFILE_INVALIDATE_JITTER); + + return TRUE; +} + +/* + * vi:ts=4 + */ diff --git a/src/libmime/filter.h b/src/libmime/filter.h new file mode 100644 index 000000000..258bd9447 --- /dev/null +++ b/src/libmime/filter.h @@ -0,0 +1,167 @@ +/** + * @file filter.h + * Filters logic implemetation + */ + +#ifndef RSPAMD_FILTER_H +#define RSPAMD_FILTER_H + +#include "config.h" +#include "symbols_cache.h" +#include "task.h" + +struct rspamd_task; +struct rspamd_settings; +struct classifier_config; + +typedef double (*metric_cons_func)(struct rspamd_task *task, const gchar *metric_name, const gchar *func_name); +typedef void (*filter_func)(struct rspamd_task *task); + +enum filter_type { C_FILTER, PERL_FILTER }; + +/** + * Filter structure + */ +struct filter { + gchar *func_name; /**< function name */ + enum filter_type type; /**< filter type (c or perl) */ + module_t *module; +}; + +/** + * Rspamd symbol + */ +struct symbol { + double score; /**< symbol's score */ + GList *options; /**< list of symbol's options */ + const gchar *name; +}; + +struct metric_action { + enum rspamd_metric_action action; + gdouble score; +}; + +/** + * Common definition of metric + */ +struct metric { + const gchar *name; /**< name of metric */ + gchar *func_name; /**< name of consolidation function */ + metric_cons_func func; /**< c consolidation function */ + double grow_factor; /**< grow factor for metric */ + GHashTable *symbols; /**< weights of symbols in metric */ + GHashTable *descriptions; /**< descriptions of symbols in metric */ + struct metric_action actions[METRIC_ACTION_MAX]; /**< all actions of the metric */ + gchar *subject; /**< subject rewrite string */ +}; + +/** + * Result of metric processing + */ +struct metric_result { + struct metric *metric; /**< pointer to metric structure */ + double score; /**< total score */ + GHashTable *symbols; /**< symbols of metric */ + gboolean checked; /**< whether metric result is consolidated */ + double grow_factor; /**< current grow factor */ + struct rspamd_settings *user_settings; /**< settings for metric */ + struct rspamd_settings *domain_settings; /**< settings for metric */ +}; + +/** + * Composite structure + */ +struct rspamd_composite { + struct expression *expr; + gint id; +}; + +/** + * Process all filters + * @param task worker's task that present message from user + * @return 0 - if there is non-finished tasks and 1 if processing is completed + */ +gint process_filters (struct rspamd_task *task); + +/** + * Process message with statfiles + * @param task worker's task that present message from user + */ +void process_statfiles (struct rspamd_task *task); + +/** + * Process message with statfiles threaded + * @param data worker's task that present message from user + */ +void process_statfiles_threaded (gpointer data, gpointer user_data); + +/** + * Insert a result to task + * @param task worker's task that present message from user + * @param metric_name metric's name to which we need to insert result + * @param symbol symbol to insert + * @param flag numeric weight for symbol + * @param opts list of symbol's options + */ +void insert_result (struct rspamd_task *task, const gchar *symbol, double flag, GList *opts); + +/** + * Insert a single result to task + * @param task worker's task that present message from user + * @param metric_name metric's name to which we need to insert result + * @param symbol symbol to insert + * @param flag numeric weight for symbol + * @param opts list of symbol's options + */ +void insert_result_single (struct rspamd_task *task, const gchar *symbol, double flag, GList *opts); + +/** + * Process all results and form composite metrics from existent metrics as it is defined in config + * @param task worker's task that present message from user + */ +void make_composites (struct rspamd_task *task); + +/** + * Default consolidation function for metric, it get all symbols and multiply symbol + * weight by some factor that is specified in config. Default factor is 1. + * @param task worker's task that present message from user + * @param metric_name name of metric + * @return result metric weight + */ +double factor_consolidation_func (struct rspamd_task *task, const gchar *metric_name, const gchar *unused); + +/* + * Learn specified statfile with message in a task + * @param statfile symbol of statfile + * @param task worker's task object + * @param err pointer to GError + * @return true if learn succeed + */ +gboolean learn_task (const gchar *statfile, struct rspamd_task *task, GError **err); + +/* + * Learn specified statfile with message in a task + * @param statfile symbol of statfile + * @param task worker's task object + * @param err pointer to GError + * @return true if learn succeed + */ +gboolean learn_task_spam (struct classifier_config *cl, struct rspamd_task *task, gboolean is_spam, GError **err); + +/* + * Get action from a string + */ +gboolean check_action_str (const gchar *data, gint *result); + +/* + * Return textual representation of action enumeration + */ +const gchar *str_action_metric (enum rspamd_metric_action action); + +/* + * Get action for specific metric + */ +gint check_metric_action (double score, double required_score, struct metric *metric); + +#endif diff --git a/src/libmime/images.c b/src/libmime/images.c new file mode 100644 index 000000000..ff07bbd72 --- /dev/null +++ b/src/libmime/images.c @@ -0,0 +1,255 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "images.h" +#include "main.h" +#include "message.h" + +static const guint8 png_signature[] = {137, 80, 78, 71, 13, 10, 26, 10}; +static const guint8 jpg_sig1[] = {0xff, 0xd8}; +static const guint8 jpg_sig2[] = {'J', 'F', 'I', 'F'}; +static const guint8 gif_signature[] = {'G', 'I', 'F', '8'}; +static const guint8 bmp_signature[] = {'B', 'M'}; + +static void process_image (struct rspamd_task *task, struct mime_part *part); + + +void +process_images (struct rspamd_task *task) +{ + GList *cur; + struct mime_part *part; + + cur = task->parts; + while (cur) { + part = cur->data; + if (g_mime_content_type_is_type (part->type, "image", "*") && part->content->len > 0) { + process_image (task, part); + } + cur = g_list_next (cur); + } + +} + +static enum known_image_types +detect_image_type (GByteArray *data) +{ + if (data->len > sizeof (png_signature) / sizeof (png_signature[0])) { + if (memcmp (data->data, png_signature, sizeof (png_signature)) == 0) { + return IMAGE_TYPE_PNG; + } + } + if (data->len > 10) { + if (memcmp (data->data, jpg_sig1, sizeof (jpg_sig1)) == 0) { + if (memcmp (data->data + 6, jpg_sig2, sizeof (jpg_sig2)) == 0) { + return IMAGE_TYPE_JPG; + } + } + } + if (data->len > sizeof (gif_signature) / sizeof (gif_signature[0])) { + if (memcmp (data->data, gif_signature, sizeof (gif_signature)) == 0) { + return IMAGE_TYPE_GIF; + } + } + if (data->len > sizeof (bmp_signature) / sizeof (bmp_signature[0])) { + if (memcmp (data->data, bmp_signature, sizeof (bmp_signature)) == 0) { + return IMAGE_TYPE_BMP; + } + } + + return IMAGE_TYPE_UNKNOWN; +} + + +static struct rspamd_image * +process_png_image (struct rspamd_task *task, GByteArray *data) +{ + struct rspamd_image *img; + guint32 t; + guint8 *p; + + if (data->len < 24) { + msg_info ("bad png detected (maybe striped): <%s>", task->message_id); + return NULL; + } + + /* In png we should find iHDR section and get data from it */ + /* Skip signature and read header section */ + p = data->data + 12; + if (memcmp (p, "IHDR", 4) != 0) { + msg_info ("png doesn't begins with IHDR section", task->message_id); + return NULL; + } + + img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image)); + img->type = IMAGE_TYPE_PNG; + img->data = data; + + p += 4; + memcpy (&t, p, sizeof (guint32)); + img->width = ntohl (t); + p += 4; + memcpy (&t, p, sizeof (guint32)); + img->height = ntohl (t); + + return img; +} + +static struct rspamd_image * +process_jpg_image (struct rspamd_task *task, GByteArray *data) +{ + guint8 *p; + guint16 t; + gsize remain; + struct rspamd_image *img; + + img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image)); + img->type = IMAGE_TYPE_JPG; + img->data = data; + + p = data->data; + remain = data->len; + /* In jpeg we should find any data stream (ff c0 .. ff c3) and extract its height and width */ + while (remain --) { + if (*p == 0xFF && remain > 8 && (*(p + 1) >= 0xC0 && *(p + 1) <= 0xC3)) { + memcpy (&t, p + 5, sizeof (guint16)); + img->height = ntohs (t); + memcpy (&t, p + 7, sizeof (guint16)); + img->width = ntohs (t); + return img; + } + p ++; + } + + return NULL; +} + +static struct rspamd_image * +process_gif_image (struct rspamd_task *task, GByteArray *data) +{ + struct rspamd_image *img; + guint8 *p; + guint16 t; + + if (data->len < 10) { + msg_info ("bad gif detected (maybe striped): <%s>", task->message_id); + return NULL; + } + + img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image)); + img->type = IMAGE_TYPE_GIF; + img->data = data; + + p = data->data + 6; + memcpy (&t, p, sizeof (guint16)); + img->width = GUINT16_FROM_LE (t); + memcpy (&t, p + 2, sizeof (guint16)); + img->height = GUINT16_FROM_LE (t); + + return img; +} + +static struct rspamd_image * +process_bmp_image (struct rspamd_task *task, GByteArray *data) +{ + struct rspamd_image *img; + gint32 t; + guint8 *p; + + + + if (data->len < 28) { + msg_info ("bad bmp detected (maybe striped): <%s>", task->message_id); + return NULL; + } + + img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image)); + img->type = IMAGE_TYPE_BMP; + img->data = data; + p = data->data + 18; + memcpy (&t, p, sizeof (gint32)); + img->width = abs (GINT32_FROM_LE (t)); + memcpy (&t, p + 4, sizeof (gint32)); + img->height = abs (GINT32_FROM_LE (t)); + + return img; +} + +static void +process_image (struct rspamd_task *task, struct mime_part *part) +{ + enum known_image_types type; + struct rspamd_image *img = NULL; + if ((type = detect_image_type (part->content)) != IMAGE_TYPE_UNKNOWN) { + switch (type) { + case IMAGE_TYPE_PNG: + img = process_png_image (task, part->content); + break; + case IMAGE_TYPE_JPG: + img = process_jpg_image (task, part->content); + break; + case IMAGE_TYPE_GIF: + img = process_gif_image (task, part->content); + break; + case IMAGE_TYPE_BMP: + img = process_bmp_image (task, part->content); + break; + default: + img = NULL; + break; + } + } + + if (img != NULL) { + debug_task ("detected %s image of size %ud x %ud in message <%s>", + image_type_str (img->type), + img->width, img->height, + task->message_id); + img->filename = part->filename; + task->images = g_list_prepend (task->images, img); + } +} + +const gchar * +image_type_str (enum known_image_types type) +{ + switch (type) { + case IMAGE_TYPE_PNG: + return "PNG"; + break; + case IMAGE_TYPE_JPG: + return "JPEG"; + break; + case IMAGE_TYPE_GIF: + return "GIF"; + break; + case IMAGE_TYPE_BMP: + return "BMP"; + break; + default: + return "unknown"; + } + + return "unknown"; +} diff --git a/src/libmime/images.h b/src/libmime/images.h new file mode 100644 index 000000000..c43941ebc --- /dev/null +++ b/src/libmime/images.h @@ -0,0 +1,33 @@ +#ifndef IMAGES_H_ +#define IMAGES_H_ + +#include "config.h" +#include "main.h" + +enum known_image_types { + IMAGE_TYPE_PNG, + IMAGE_TYPE_JPG, + IMAGE_TYPE_GIF, + IMAGE_TYPE_BMP, + IMAGE_TYPE_UNKNOWN = 9000 +}; + +struct rspamd_image { + enum known_image_types type; + GByteArray *data; + guint32 width; + guint32 height; + const gchar *filename; +}; + +/* + * Process images from a worker task + */ +void process_images (struct rspamd_task *task); + +/* + * Get textual representation of an image's type + */ +const gchar *image_type_str (enum known_image_types type); + +#endif /* IMAGES_H_ */ diff --git a/src/libmime/message.c b/src/libmime/message.c new file mode 100644 index 000000000..4567869e9 --- /dev/null +++ b/src/libmime/message.c @@ -0,0 +1,1764 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "util.h" +#include "main.h" +#include "message.h" +#include "cfg_file.h" +#include "html.h" +#include "images.h" + +#define RECURSION_LIMIT 30 +#define UTF8_CHARSET "UTF-8" + +GByteArray * +strip_html_tags (struct rspamd_task *task, rspamd_mempool_t * pool, struct mime_text_part *part, GByteArray * src, gint *stateptr) +{ + uint8_t *p, *rp, *tbegin = NULL, *end, c, lc; + gint br, i = 0, depth = 0, in_q = 0; + gint state = 0; + GByteArray *buf; + GNode *level_ptr = NULL; + gboolean erase = FALSE; + + if (stateptr) + state = *stateptr; + + buf = g_byte_array_sized_new (src->len); + g_byte_array_append (buf, src->data, src->len); + + c = *src->data; + lc = '\0'; + p = src->data; + rp = buf->data; + end = src->data + src->len; + br = 0; + + while (i < (gint)src->len) { + switch (c) { + case '\0': + break; + case '<': + if (g_ascii_isspace (*(p + 1))) { + goto reg_char; + } + if (state == 0) { + lc = '<'; + tbegin = p + 1; + state = 1; + } + else if (state == 1) { + /* Opening bracket without closing one */ + p --; + while (g_ascii_isspace (*p) && p > src->data) { + p --; + } + p ++; + goto unbreak_tag; + } + break; + + case '(': + if (state == 2) { + if (lc != '"' && lc != '\'') { + lc = '('; + br++; + } + } + else if (state == 0 && !erase) { + *(rp++) = c; + } + break; + + case ')': + if (state == 2) { + if (lc != '"' && lc != '\'') { + lc = ')'; + br--; + } + } + else if (state == 0 && !erase) { + *(rp++) = c; + } + break; + + case '>': + if (depth) { + depth--; + break; + } + + if (in_q) { + break; + } +unbreak_tag: + switch (state) { + case 1: /* HTML/XML */ + lc = '>'; + in_q = state = 0; + erase = !add_html_node (task, pool, part, tbegin, p - tbegin, end - tbegin, &level_ptr); + break; + + case 2: /* PHP */ + if (!br && lc != '\"' && *(p - 1) == '?') { + in_q = state = 0; + } + break; + + case 3: + in_q = state = 0; + break; + + case 4: /* JavaScript/CSS/etc... */ + if (p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '-') { + in_q = state = 0; + } + break; + + default: + if (!erase) { + *(rp++) = c; + } + break; + } + break; + + case '"': + case '\'': + if (state == 2 && *(p - 1) != '\\') { + if (lc == c) { + lc = '\0'; + } + else if (lc != '\\') { + lc = c; + } + } + else if (state == 0 && !erase) { + *(rp++) = c; + } + if (state && p != src->data && *(p - 1) != '\\' && (!in_q || *p == in_q)) { + if (in_q) { + in_q = 0; + } + else { + in_q = *p; + } + } + break; + + case '!': + /* JavaScript & Other HTML scripting languages */ + if (state == 1 && *(p - 1) == '<') { + state = 3; + lc = c; + } + else { + if (state == 0 && !erase) { + *(rp++) = c; + } + } + break; + + case '-': + if (state == 3 && p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '!') { + state = 4; + } + else { + goto reg_char; + } + break; + + case '?': + + if (state == 1 && *(p - 1) == '<') { + br = 0; + state = 2; + break; + } + + case 'E': + case 'e': + /* !DOCTYPE exception */ + if (state == 3 && p > src->data + 6 + && g_ascii_tolower (*(p - 1)) == 'p' + && g_ascii_tolower (*(p - 2)) == 'y' + && g_ascii_tolower (*(p - 3)) == 't' && g_ascii_tolower (*(p - 4)) == 'c' && g_ascii_tolower (*(p - 5)) == 'o' && g_ascii_tolower (*(p - 6)) == 'd') { + state = 1; + break; + } + /* fall-through */ + + case 'l': + + /* swm: If we encounter '<?xml' then we shouldn't be in + * state == 2 (PHP). Switch back to HTML. + */ + + if (state == 2 && p > src->data + 2 && *(p - 1) == 'm' && *(p - 2) == 'x') { + state = 1; + break; + } + + /* fall-through */ + default: + reg_char: + if (state == 0 && !erase) { + *(rp++) = c; + } + break; + } + i++; + if (i < (gint)src->len) { + c = *(++p); + } + } + if (rp < buf->data + src->len) { + *rp = '\0'; + g_byte_array_set_size (buf, rp - buf->data); + } + + /* Check tag balancing */ + if (level_ptr && level_ptr->data != NULL) { + part->is_balanced = FALSE; + } + + if (stateptr) { + *stateptr = state; + } + + return buf; +} + +static void +parse_qmail_recv (rspamd_mempool_t * pool, gchar *line, struct received_header *r) +{ + gchar *s, *p, t; + + /* We are interested only with received from network headers */ + if ((p = strstr (line, "from network")) == NULL) { + r->is_error = 2; + return; + } + + p += sizeof ("from network") - 1; + while (g_ascii_isspace (*p) || *p == '[') { + p++; + } + /* format is ip/host */ + s = p; + if (*p) { + while (g_ascii_isdigit (*++p) || *p == '.'); + if (*p != '/') { + r->is_error = 1; + return; + } + else { + *p = '\0'; + r->real_ip = rspamd_mempool_strdup (pool, s); + *p = '/'; + /* Now try to parse hostname */ + s = ++p; + while (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') { + p++; + } + t = *p; + *p = '\0'; + r->real_hostname = rspamd_mempool_strdup (pool, s); + *p = t; + } + } +} + +static void +parse_recv_header (rspamd_mempool_t * pool, gchar *line, struct received_header *r) +{ + gchar *p, *s, t, **res = NULL; + enum { + RSPAMD_RECV_STATE_INIT = 0, + RSPAMD_RECV_STATE_FROM, + RSPAMD_RECV_STATE_IP_BLOCK, + RSPAMD_RECV_STATE_BRACES_BLOCK, + RSPAMD_RECV_STATE_BY_BLOCK, + RSPAMD_RECV_STATE_PARSE_IP, + RSPAMD_RECV_STATE_SKIP_SPACES, + RSPAMD_RECV_STATE_ERROR + } state = RSPAMD_RECV_STATE_INIT, + next_state = RSPAMD_RECV_STATE_INIT; + gboolean is_exim = FALSE; + + g_strstrip (line); + p = line; + s = line; + + while (*p) { + switch (state) { + /* Initial state, search for from */ + case RSPAMD_RECV_STATE_INIT: + if (*p == 'f' || *p == 'F') { + if (g_ascii_tolower (*++p) == 'r' && g_ascii_tolower (*++p) == 'o' && g_ascii_tolower (*++p) == 'm') { + p++; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_FROM; + } + } + else if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') { + state = RSPAMD_RECV_STATE_IP_BLOCK; + } + else { + /* This can be qmail header, parse it separately */ + parse_qmail_recv (pool, line, r); + return; + } + break; + /* Read hostname */ + case RSPAMD_RECV_STATE_FROM: + if (*p == '[') { + /* This should be IP address */ + res = &r->from_ip; + state = RSPAMD_RECV_STATE_PARSE_IP; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + s = ++p; + } + else if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') { + p++; + } + else { + t = *p; + *p = '\0'; + r->from_hostname = rspamd_mempool_strdup (pool, s); + *p = t; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + } + break; + /* Try to extract additional info */ + case RSPAMD_RECV_STATE_IP_BLOCK: + /* Try to extract ip or () info or by */ + if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') { + p += 2; + /* Skip spaces after by */ + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BY_BLOCK; + } + else if (*p == '(') { + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + p++; + } + else if (*p == '[') { + /* Got ip before '(' so extract it */ + s = ++p; + res = &r->from_ip; + state = RSPAMD_RECV_STATE_PARSE_IP; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + } + else { + p++; + } + break; + /* We are in () block. Here can be found real hostname and real ip, this is written by some MTA */ + case RSPAMD_RECV_STATE_BRACES_BLOCK: + /* End of block */ + if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || + *p == '_' || *p == ':') { + p++; + } + else if (*p == '[') { + s = ++p; + state = RSPAMD_RECV_STATE_PARSE_IP; + res = &r->real_ip; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + } + else { + if (p > s) { + /* Got some real hostname */ + /* check whether it is helo or p is not space symbol */ + if (!g_ascii_isspace (*p) || *(p + 1) != '[') { + /* Exim style ([ip]:port helo=hostname) */ + if (*s == ':' && (g_ascii_isspace (*p) || *p == ')')) { + /* Ip ending */ + is_exim = TRUE; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + } + else if (p - s == 4 && memcmp (s, "helo=", 5) == 0) { + p ++; + is_exim = TRUE; + if (r->real_hostname == NULL && r->from_hostname != NULL) { + r->real_hostname = r->from_hostname; + } + s = p; + while (*p != ')' && !g_ascii_isspace (*p) && *p != '\0') { + p ++; + } + if (p > s) { + r->from_hostname = rspamd_mempool_alloc (pool, p - s + 1); + rspamd_strlcpy (r->from_hostname, s, p - s + 1); + } + } + else if (p - s == 4 && memcmp (s, "port=", 5) == 0) { + p ++; + is_exim = TRUE; + while (g_ascii_isdigit (*p)) { + p ++; + } + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + } + else if (*p == '=' && is_exim) { + /* Just skip unknown pairs */ + p ++; + while (!g_ascii_isspace (*p) && *p != ')' && *p != '\0') { + p ++; + } + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + } + else { + /* skip all */ + while (*p++ != ')' && *p != '\0'); + state = RSPAMD_RECV_STATE_IP_BLOCK; + } + } + else { + /* Postfix style (hostname [ip]) */ + t = *p; + *p = '\0'; + r->real_hostname = rspamd_mempool_strdup (pool, s); + *p = t; + /* Now parse ip */ + p += 2; + s = p; + res = &r->real_ip; + state = RSPAMD_RECV_STATE_PARSE_IP; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + continue; + } + if (*p == ')') { + p ++; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + } + } + else if (*p == ')') { + p ++; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + } + else { + r->is_error = 1; + return; + } + } + break; + /* Got by word */ + case RSPAMD_RECV_STATE_BY_BLOCK: + /* Here can be only hostname */ + if ((g_ascii_isalnum (*p) || *p == '.' || *p == '-' + || *p == '_') && p[1] != '\0') { + p++; + } + else { + /* We got something like hostname */ + if (p[1] != '\0') { + t = *p; + *p = '\0'; + r->by_hostname = rspamd_mempool_strdup (pool, s); + *p = t; + } + else { + r->by_hostname = rspamd_mempool_strdup (pool, s); + } + /* Now end of parsing */ + if (is_exim) { + /* Adjust for exim received */ + if (r->real_ip == NULL && r->from_ip != NULL) { + r->real_ip = r->from_ip; + } + else if (r->from_ip == NULL && r->real_ip != NULL) { + r->from_ip = r->real_ip; + if (r->real_hostname == NULL && r->from_hostname != NULL) { + r->real_hostname = r->from_hostname; + } + } + } + return; + } + break; + + /* Extract ip */ + case RSPAMD_RECV_STATE_PARSE_IP: + while (g_ascii_isxdigit (*p) || *p == '.' || *p == ':') { + p ++; + } + if (*p != ']') { + /* Not an ip in fact */ + state = RSPAMD_RECV_STATE_SKIP_SPACES; + p++; + } + else { + *p = '\0'; + *res = rspamd_mempool_strdup (pool, s); + *p = ']'; + p++; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + } + break; + + /* Skip spaces */ + case RSPAMD_RECV_STATE_SKIP_SPACES: + if (!g_ascii_isspace (*p)) { + state = next_state; + s = p; + } + else { + p++; + } + break; + default: + r->is_error = 1; + return; + break; + } + } + + r->is_error = 1; + return; +} + +/* Convert raw headers to a list of struct raw_header * */ +static void +process_raw_headers (struct rspamd_task *task) +{ + struct raw_header *new = NULL, *lp; + gchar *p, *c, *tmp, *tp; + gint state = 0, l, next_state = 100, err_state = 100, t_state; + gboolean valid_folding = FALSE; + + p = task->raw_headers_str; + c = p; + while (*p) { + /* FSM for processing headers */ + switch (state) { + case 0: + /* Begin processing headers */ + if (!g_ascii_isalpha (*p)) { + /* We have some garbage at the beginning of headers, skip this line */ + state = 100; + next_state = 0; + } + else { + state = 1; + c = p; + } + break; + case 1: + /* We got something like header's name */ + if (*p == ':') { + new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct raw_header)); + l = p - c; + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + rspamd_strlcpy (tmp, c, l + 1); + new->name = tmp; + new->empty_separator = TRUE; + p ++; + state = 2; + c = p; + } + else if (g_ascii_isspace (*p)) { + /* Not header but some garbage */ + state = 100; + next_state = 0; + } + else { + p ++; + } + break; + case 2: + /* We got header's name, so skip any \t or spaces */ + if (*p == '\t') { + new->tab_separated = TRUE; + new->empty_separator = FALSE; + p ++; + } + else if (*p == ' ') { + new->empty_separator = FALSE; + p ++; + } + else if (*p == '\n' || *p == '\r') { + /* Process folding */ + state = 99; + l = p - c; + if (l > 0) { + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + rspamd_strlcpy (tmp, c, l + 1); + new->separator = tmp; + } + next_state = 3; + err_state = 5; + c = p; + } + else { + /* Process value */ + l = p - c; + if (l >= 0) { + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + rspamd_strlcpy (tmp, c, l + 1); + new->separator = tmp; + } + c = p; + state = 3; + } + break; + case 3: + if (*p == '\r' || *p == '\n') { + /* Hold folding */ + state = 99; + next_state = 3; + err_state = 4; + } + else if (*(p + 1) == '\0') { + state = 4; + } + else { + p ++; + } + break; + case 4: + /* Copy header's value */ + l = p - c; + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + tp = tmp; + t_state = 0; + while (l --) { + if (t_state == 0) { + /* Before folding */ + if (*c == '\n' || *c == '\r') { + t_state = 1; + c ++; + *tp ++ = ' '; + } + else { + *tp ++ = *c ++; + } + } + else if (t_state == 1) { + /* Inside folding */ + if (g_ascii_isspace (*c)) { + c++; + } + else { + t_state = 0; + *tp ++ = *c ++; + } + } + } + /* Strip last space that can be added by \r\n parsing */ + if (*(tp - 1) == ' ') { + tp --; + } + *tp = '\0'; + new->value = tmp; + new->next = NULL; + if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) { + while (lp->next != NULL) { + lp = lp->next; + } + lp->next = new; + } + else { + g_hash_table_insert (task->raw_headers, new->name, new); + } + debug_task ("add raw header %s: %s", new->name, new->value); + state = 0; + break; + case 5: + /* Header has only name, no value */ + new->next = NULL; + new->value = ""; + if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) { + while (lp->next != NULL) { + lp = lp->next; + } + lp->next = new; + } + else { + g_hash_table_insert (task->raw_headers, new->name, new); + } + state = 0; + debug_task ("add raw header %s: %s", new->name, new->value); + break; + case 99: + /* Folding state */ + if (*(p + 1) == '\0') { + state = err_state; + } + else { + if (*p == '\r' || *p == '\n') { + p ++; + valid_folding = FALSE; + } + else if (*p == '\t' || *p == ' ') { + /* Valid folding */ + p ++; + valid_folding = TRUE; + } + else { + if (valid_folding) { + debug_task ("go to state: %d->%d", state, next_state); + state = next_state; + } + else { + /* Fall back */ + debug_task ("go to state: %d->%d", state, err_state); + state = err_state; + } + } + } + break; + case 100: + /* Fail state, skip line */ + if (*p == '\r') { + if (*(p + 1) == '\n') { + p ++; + } + p ++; + state = next_state; + } + else if (*p == '\n') { + if (*(p + 1) == '\r') { + p ++; + } + p ++; + state = next_state; + } + else if (*(p + 1) == '\0') { + state = next_state; + p ++; + } + else { + p ++; + } + break; + } + } +} + +static void +free_byte_array_callback (void *pointer) +{ + GByteArray *arr = (GByteArray *) pointer; + g_byte_array_free (arr, TRUE); +} + +static GByteArray * +convert_text_to_utf (struct rspamd_task *task, GByteArray * part_content, GMimeContentType * type, struct mime_text_part *text_part) +{ + GError *err = NULL; + gsize read_bytes, write_bytes; + const gchar *charset; + gchar *res_str; + GByteArray *result_array; + + if (task->cfg->raw_mode) { + text_part->is_raw = TRUE; + return part_content; + } + + if ((charset = g_mime_content_type_get_parameter (type, "charset")) == NULL) { + text_part->is_raw = TRUE; + return part_content; + } + + if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) { + if (g_utf8_validate (part_content->data, part_content->len, NULL)) { + text_part->is_raw = FALSE; + text_part->is_utf = TRUE; + return part_content; + } + else { + msg_info ("<%s>: contains invalid utf8 characters, assume it as raw", task->message_id); + text_part->is_raw = TRUE; + return part_content; + } + } + + res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err); + if (res_str == NULL) { + msg_warn ("<%s>: cannot convert from %s to utf8: %s", task->message_id, charset, err ? err->message : "unknown problem"); + text_part->is_raw = TRUE; + return part_content; + } + + result_array = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray)); + result_array->data = res_str; + result_array->len = write_bytes; + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, res_str); + text_part->is_raw = FALSE; + text_part->is_utf = TRUE; + + return result_array; +} + +static void +process_text_part (struct rspamd_task *task, GByteArray *part_content, GMimeContentType *type, + GMimeObject *part, GMimeObject *parent, gboolean is_empty) +{ + struct mime_text_part *text_part; + const gchar *cd; + + /* Skip attachements */ +#ifndef GMIME24 + cd = g_mime_part_get_content_disposition (GMIME_PART (part)); + if (cd && g_ascii_strcasecmp (cd, "attachment") == 0 && !task->cfg->check_text_attachements) { + debug_task ("skip attachments for checking as text parts"); + return; + } +#else + cd = g_mime_object_get_disposition (GMIME_OBJECT (part)); + if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0 && !task->cfg->check_text_attachements) { + debug_task ("skip attachments for checking as text parts"); + return; + } +#endif + + if (g_mime_content_type_is_type (type, "text", "html") || g_mime_content_type_is_type (type, "text", "xhtml")) { + debug_task ("got urls from text/html part"); + + text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part)); + text_part->is_html = TRUE; + if (is_empty) { + text_part->is_empty = TRUE; + text_part->orig = NULL; + text_part->content = NULL; + task->text_parts = g_list_prepend (task->text_parts, text_part); + return; + } + text_part->orig = convert_text_to_utf (task, part_content, type, text_part); + text_part->is_balanced = TRUE; + text_part->html_nodes = NULL; + text_part->parent = parent; + + text_part->content = strip_html_tags (task, task->task_pool, text_part, text_part->orig, NULL); + + if (text_part->html_nodes == NULL) { + url_parse_text (task->task_pool, task, text_part, FALSE); + } + else { + decode_entitles (text_part->content->data, &text_part->content->len); + url_parse_text (task->task_pool, task, text_part, FALSE); +#if 0 + url_parse_text (task->task_pool, task, text_part, TRUE); +#endif + } + + fuzzy_init_part (text_part, task->task_pool, task->cfg->max_diff); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) free_byte_array_callback, text_part->content); + task->text_parts = g_list_prepend (task->text_parts, text_part); + } + else if (g_mime_content_type_is_type (type, "text", "*")) { + debug_task ("got urls from text/plain part"); + + text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part)); + text_part->is_html = FALSE; + text_part->parent = parent; + if (is_empty) { + text_part->is_empty = TRUE; + text_part->orig = NULL; + text_part->content = NULL; + task->text_parts = g_list_prepend (task->text_parts, text_part); + return; + } + text_part->orig = convert_text_to_utf (task, part_content, type, text_part); + text_part->content = text_part->orig; + url_parse_text (task->task_pool, task, text_part, FALSE); + fuzzy_init_part (text_part, task->task_pool, task->cfg->max_diff); + task->text_parts = g_list_prepend (task->text_parts, text_part); + } +} + +#ifdef GMIME24 +static void +mime_foreach_callback (GMimeObject * parent, GMimeObject * part, gpointer user_data) +#else +static void +mime_foreach_callback (GMimeObject * part, gpointer user_data) +#endif +{ + struct rspamd_task *task = (struct rspamd_task *)user_data; + struct mime_part *mime_part; + GMimeContentType *type; + GMimeDataWrapper *wrapper; + GMimeStream *part_stream; + GByteArray *part_content; + + task->parts_count++; + + /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */ + + /* find out what class 'part' is... */ + if (GMIME_IS_MESSAGE_PART (part)) { + /* message/rfc822 or message/news */ + GMimeMessage *message; + + /* g_mime_message_foreach_part() won't descend into + child message parts, so if we want to count any + subparts of this child message, we'll have to call + g_mime_message_foreach_part() again here. */ + + message = g_mime_message_part_get_message ((GMimeMessagePart *) part); + if (task->parser_recursion++ < RECURSION_LIMIT) { +#ifdef GMIME24 + g_mime_message_foreach (message, mime_foreach_callback, task); +#else + g_mime_message_foreach_part (message, mime_foreach_callback, task); +#endif + } + else { + msg_err ("endless recursion detected: %d", task->parser_recursion); + return; + } +#ifndef GMIME24 + g_object_unref (message); +#endif + } + else if (GMIME_IS_MESSAGE_PARTIAL (part)) { + /* message/partial */ + + /* this is an incomplete message part, probably a + large message that the sender has broken into + smaller parts and is sending us bit by bit. we + could save some info about it so that we could + piece this back together again once we get all the + parts? */ + } + else if (GMIME_IS_MULTIPART (part)) { + /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */ + task->parser_parent_part = part; +#ifndef GMIME24 + debug_task ("detected multipart part"); + /* we'll get to finding out if this is a signed/encrypted multipart later... */ + if (task->parser_recursion++ < RECURSION_LIMIT) { + g_mime_multipart_foreach ((GMimeMultipart *) part, mime_foreach_callback, task); + } + else { + msg_err ("endless recursion detected: %d", task->parser_recursion); + return; + } +#endif + } + else if (GMIME_IS_PART (part)) { + /* a normal leaf part, could be text/plain or image/jpeg etc */ +#ifdef GMIME24 + type = (GMimeContentType *) g_mime_object_get_content_type (GMIME_OBJECT (part)); +#else + type = (GMimeContentType *) g_mime_part_get_content_type (GMIME_PART (part)); +#endif + if (type == NULL) { + msg_warn ("type of part is unknown, assume text/plain"); + type = g_mime_content_type_new ("text", "plain"); +#ifdef GMIME24 + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, type); +#else + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_mime_content_type_destroy, type); +#endif + } + wrapper = g_mime_part_get_content_object (GMIME_PART (part)); +#ifdef GMIME24 + if (wrapper != NULL && GMIME_IS_DATA_WRAPPER (wrapper)) { +#else + if (wrapper != NULL) { +#endif + part_stream = g_mime_stream_mem_new (); + if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) { + g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (part_stream), FALSE); + part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream)); + g_object_unref (part_stream); + mime_part = rspamd_mempool_alloc (task->task_pool, sizeof (struct mime_part)); + mime_part->type = type; + mime_part->content = part_content; + mime_part->parent = task->parser_parent_part; + mime_part->filename = g_mime_part_get_filename (GMIME_PART (part)); + debug_task ("found part with content-type: %s/%s", type->type, type->subtype); + task->parts = g_list_prepend (task->parts, mime_part); + /* Skip empty parts */ + process_text_part (task, part_content, type, part, task->parser_parent_part, (part_content->len <= 0)); + } + else { + msg_warn ("write to stream failed: %d, %s", errno, strerror (errno)); + } +#ifndef GMIME24 + g_object_unref (wrapper); +#endif + } + else { + msg_warn ("cannot get wrapper for mime part, type of part: %s/%s", type->type, type->subtype); + } + } + else { + g_assert_not_reached (); + } +} + +static void +destroy_message (void *pointer) +{ + GMimeMessage *msg = pointer; + + msg_debug ("freeing pointer %p", msg); + g_object_unref (msg); +} + +gint +process_message (struct rspamd_task *task) +{ + GMimeMessage *message; + GMimeParser *parser; + GMimeStream *stream; + GByteArray *tmp; + GList *first, *cur; + GMimePart *part; + GMimeDataWrapper *wrapper; + struct received_header *recv; + gchar *mid, *url_str, *p, *end, *url_end; + struct uri *subject_url; + gsize len; + gint rc; + + tmp = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray)); + tmp->data = task->msg->str; + tmp->len = task->msg->len; + + stream = g_mime_stream_mem_new_with_byte_array (tmp); + /* + * This causes g_mime_stream not to free memory by itself as it is memory allocated by + * pool allocator + */ + g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE); + + if (task->is_mime) { + + debug_task ("construct mime parser from string length %d", (gint)task->msg->len); + /* create a new parser object to parse the stream */ + parser = g_mime_parser_new_with_stream (stream); + g_object_unref (stream); + + /* parse the message from the stream */ + message = g_mime_parser_construct_message (parser); + + if (message == NULL) { + msg_warn ("cannot construct mime from stream"); + return -1; + } + + task->message = message; + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) destroy_message, task->message); + + /* Save message id for future use */ + task->message_id = g_mime_message_get_message_id (task->message); + if (task->message_id == NULL) { + task->message_id = "undef"; + } + + task->parser_recursion = 0; +#ifdef GMIME24 + g_mime_message_foreach (message, mime_foreach_callback, task); +#else + /* + * This is rather strange, but gmime 2.2 do NOT pass top-level part to foreach callback + * so we need to set up parent part by hands + */ + task->parser_parent_part = g_mime_message_get_mime_part (message); + g_object_unref (task->parser_parent_part); + g_mime_message_foreach_part (message, mime_foreach_callback, task); +#endif + + debug_task ("found %d parts in message", task->parts_count); + if (task->queue_id == NULL) { + task->queue_id = "undef"; + } + +#ifdef GMIME24 + task->raw_headers_str = g_mime_object_get_headers (GMIME_OBJECT (task->message)); +#else + task->raw_headers_str = g_mime_message_get_headers (task->message); +#endif + + process_images (task); + + /* Parse received headers */ + first = message_get_header (task->task_pool, message, "Received", FALSE); + cur = first; + while (cur) { + recv = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct received_header)); + parse_recv_header (task->task_pool, cur->data, recv); + task->received = g_list_prepend (task->received, recv); + cur = g_list_next (cur); + } + if (first) { + g_list_free (first); + } + + if (task->raw_headers_str) { + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, task->raw_headers_str); + process_raw_headers (task); + } + + task->rcpts = g_mime_message_get_all_recipients (message); + if (task->rcpts) { +#ifdef GMIME24 + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, task->rcpts); +#else + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) internet_address_list_destroy, task->rcpts); +#endif + } + + + /* free the parser (and the stream) */ + g_object_unref (parser); + } + else { + /* We got only message, no mime headers or anything like this */ + /* Construct fake message for it */ + task->message = g_mime_message_new (TRUE); + if (task->from) { + g_mime_message_set_sender (task->message, task->from); + } + /* Construct part for it */ + part = g_mime_part_new_with_type ("text", "html"); +#ifdef GMIME24 + wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_CONTENT_ENCODING_8BIT); +#else + wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_PART_ENCODING_8BIT); +#endif + g_mime_part_set_content_object (part, wrapper); + g_mime_message_set_mime_part (task->message, GMIME_OBJECT (part)); + /* Register destructors */ + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, wrapper); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, part); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) destroy_message, task->message); + /* Now parse in a normal way */ + task->parser_recursion = 0; +#ifdef GMIME24 + g_mime_message_foreach (task->message, mime_foreach_callback, task); +#else + g_mime_message_foreach_part (task->message, mime_foreach_callback, task); +#endif + /* Generate message ID */ + mid = g_mime_utils_generate_message_id ("localhost.localdomain"); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, mid); + g_mime_message_set_message_id (task->message, mid); + task->message_id = mid; + task->queue_id = mid; + /* Set headers for message */ + if (task->subject) { + g_mime_message_set_subject (task->message, task->subject); + } + + /* Add recipients */ +#ifndef GMIME24 + if (task->rcpt) { + cur = task->rcpt; + while (cur) { + g_mime_message_add_recipient (task->message, GMIME_RECIPIENT_TYPE_TO, NULL, (gchar *)cur->data); + cur = g_list_next (cur); + } + } +#endif + } + + /* Parse urls inside Subject header */ + cur = message_get_header (task->task_pool, task->message, "Subject", FALSE); + if (cur) { + p = cur->data; + len = strlen (p); + end = p + len; + + while (p < end) { + /* Search to the end of url */ + if (url_try_text (task->task_pool, p, end - p, NULL, &url_end, &url_str, FALSE)) { + if (url_str != NULL) { + subject_url = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct uri)); + if (subject_url != NULL) { + /* Try to parse url */ + rc = parse_uri (subject_url, url_str, task->task_pool); + if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) && + subject_url->hostlen > 0) { + if (subject_url->protocol != PROTOCOL_MAILTO) { + if (!g_tree_lookup (task->urls, subject_url)) { + g_tree_insert (task->urls, subject_url, subject_url); + } + } + } + else if (rc != URI_ERRNO_OK) { + msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); + } + } + } + } + else { + break; + } + p = url_end + 1; + } + /* Free header's list */ + g_list_free (cur); + } + + return 0; +} + +struct gmime_raw_header { + struct raw_header *next; + gchar *name; + gchar *value; +}; + +typedef struct _GMimeHeader { + GHashTable *hash; + GHashTable *writers; + struct raw_header *headers; +} local_GMimeHeader; + + +/* known header field types */ +enum { + HEADER_FROM = 0, + HEADER_REPLY_TO, + HEADER_TO, + HEADER_CC, + HEADER_BCC, + HEADER_SUBJECT, + HEADER_DATE, + HEADER_MESSAGE_ID, + HEADER_UNKNOWN +}; + +/* + * Iterate throught all headers and make a list + */ +#ifndef GMIME24 +static void +header_iterate (rspamd_mempool_t * pool, struct gmime_raw_header *h, GList ** ret, const gchar *field, gboolean strong) +{ + while (h) { + if (G_LIKELY (!strong)) { + if (h->value && !g_ascii_strncasecmp (field, h->name, strlen (field))) { + if (pool != NULL) { + *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, h->value)); + } + else { + *ret = g_list_prepend (*ret, g_strdup (h->value)); + } + } + } + else { + if (h->value && !strncmp (field, h->name, strlen (field))) { + if (pool != NULL) { + *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, h->value)); + } + else { + *ret = g_list_prepend (*ret, g_strdup (h->value)); + } + } + } + h = (struct gmime_raw_header *)h->next; + } +} +#else +static void +header_iterate (rspamd_mempool_t * pool, GMimeHeaderList * ls, GList ** ret, const gchar *field, gboolean strong) +{ + /* Use iterator in case of gmime 2.4 */ + GMimeHeaderIter *iter; + const gchar *name; + + if (ls == NULL) { + *ret = NULL; + return; + } + + iter = g_mime_header_iter_new (); + if (g_mime_header_list_get_iter (ls, iter) && g_mime_header_iter_first (iter)) { + /* Iterate throught headers */ + while (g_mime_header_iter_is_valid (iter)) { + name = g_mime_header_iter_get_name (iter); + if (G_LIKELY (!strong)) { + if (!g_ascii_strncasecmp (field, name, strlen (name))) { + if (pool != NULL) { + *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, g_mime_header_iter_get_value (iter))); + } + else { + *ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter))); + } + } + } + else { + if (!strncmp (field, name, strlen (name))) { + if (pool != NULL) { + *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, g_mime_header_iter_get_value (iter))); + } + else { + *ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter))); + } + } + } + if (!g_mime_header_iter_next (iter)) { + break; + } + } + } + g_mime_header_iter_free (iter); +} +#endif + + +struct multipart_cb_data { + GList *ret; + rspamd_mempool_t *pool; + const gchar *field; + gboolean try_search; + gboolean strong; + gint rec; +}; + +#define MAX_REC 10 + +static void +#ifdef GMIME24 +multipart_iterate (GMimeObject * parent, GMimeObject * part, gpointer user_data) +#else +multipart_iterate (GMimeObject * part, gpointer user_data) +#endif +{ + struct multipart_cb_data *data = user_data; +#ifndef GMIME24 + struct gmime_raw_header *h; +#endif + GList *l = NULL; + + if (data->try_search && part != NULL && GMIME_IS_PART (part)) { +#ifdef GMIME24 + GMimeHeaderList *ls; + + ls = g_mime_object_get_header_list (GMIME_OBJECT (part)); + header_iterate (data->pool, ls, &l, data->field, data->strong); +#else + h = (struct gmime_raw_header *)part->headers->headers; + header_iterate (data->pool, h, &l, data->field, data->strong); +#endif + if (l == NULL) { + /* Header not found, abandon search results */ + data->try_search = FALSE; + g_list_free (data->ret); + data->ret = NULL; + } + else { + data->ret = g_list_concat (l, data->ret); + } + } + else if (data->try_search && GMIME_IS_MULTIPART (part)) { + /* Maybe endless recursion here ? */ + if (data->rec++ < MAX_REC) { + g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, data); + } + else { + msg_info ("maximum recurse limit is over, stop recursing, %d", data->rec); + data->try_search = FALSE; + } + } +} + +static GList * +local_message_get_header (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong) +{ + GList *gret = NULL; + GMimeObject *part; + struct multipart_cb_data cb = { + .try_search = TRUE, + .rec = 0, + .ret = NULL, + }; + cb.pool = pool; + cb.field = field; + cb.strong = strong; + +#ifndef GMIME24 + struct gmime_raw_header *h; + + if (field == NULL) { + return NULL; + } + + msg_debug ("iterate over headers to find header %s", field); + h = (struct gmime_raw_header *) (GMIME_OBJECT (message)->headers->headers); + header_iterate (pool, h, &gret, field, strong); + + if (gret == NULL) { + /* Try to iterate with mime part headers */ + msg_debug ("iterate over headers of mime part to find header %s", field); + part = g_mime_message_get_mime_part (message); + if (part) { + h = (struct gmime_raw_header *)part->headers->headers; + header_iterate (pool, h, &gret, field, strong); + if (gret == NULL && GMIME_IS_MULTIPART (part)) { + msg_debug ("iterate over headers of each multipart's subparts %s", field); + g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb); + if (cb.ret != NULL) { + gret = cb.ret; + } + } +#ifndef GMIME24 + g_object_unref (part); +#endif + } + } + + return gret; +#else + GMimeHeaderList *ls; + + ls = g_mime_object_get_header_list (GMIME_OBJECT (message)); + header_iterate (pool, ls, &gret, field, strong); + if (gret == NULL) { + /* Try to iterate with mime part headers */ + part = g_mime_message_get_mime_part (message); + if (part) { + ls = g_mime_object_get_header_list (GMIME_OBJECT (part)); + header_iterate (pool, ls, &gret, field, strong); + if (gret == NULL && GMIME_IS_MULTIPART (part)) { + g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb); + if (cb.ret != NULL) { + gret = cb.ret; + } + } +#ifndef GMIME24 + g_object_unref (part); +#endif + } + } + + + return gret; +#endif +} + +/** +* g_mime_message_set_date_from_string: Set the message sent-date +* @message: MIME Message +* @string: A string of date +* +* Set the sent-date on a MIME Message. +**/ +void +local_mime_message_set_date_from_string (GMimeMessage * message, const gchar * string) +{ + time_t date; + gint offset = 0; + + date = g_mime_utils_header_decode_date (string, &offset); + g_mime_message_set_date (message, date, offset); +} + +/* + * Replacements for standart gmime functions but converting adresses to IA + */ +static const gchar * +local_message_get_sender (GMimeMessage * message) +{ + gchar *res; + const gchar *from = g_mime_message_get_sender (message); + InternetAddressList *ia; + +#ifndef GMIME24 + ia = internet_address_parse_string (from); +#else + ia = internet_address_list_parse_string (from); +#endif + if (!ia) { + return NULL; + } + res = internet_address_list_to_string (ia, FALSE); +#ifndef GMIME24 + internet_address_list_destroy (ia); +#else + g_object_unref (ia); +#endif + + return res; +} + +static const gchar * +local_message_get_reply_to (GMimeMessage * message) +{ + gchar *res; + const gchar *from = g_mime_message_get_reply_to (message); + InternetAddressList *ia; + +#ifndef GMIME24 + ia = internet_address_parse_string (from); +#else + ia = internet_address_list_parse_string (from); +#endif + if (!ia) { + return NULL; + } + res = internet_address_list_to_string (ia, FALSE); +#ifndef GMIME24 + internet_address_list_destroy (ia); +#else + g_object_unref (ia); +#endif + + return res; +} + +#ifdef GMIME24 + +# define ADD_RECIPIENT_TEMPLATE(type,def) \ +static void \ +local_message_add_recipients_from_string_##type (GMimeMessage *message, const gchar *string, const gchar *value) \ +{ \ + InternetAddressList *il, *new; \ + \ + il = g_mime_message_get_recipients (message, (def)); \ + new = internet_address_list_parse_string (string); \ + internet_address_list_append (il, new); \ +} \ + +ADD_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO) + ADD_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC) + ADD_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC) +# define GET_RECIPIENT_TEMPLATE(type,def) \ +static InternetAddressList* \ +local_message_get_recipients_##type (GMimeMessage *message, const gchar *unused) \ +{ \ + return g_mime_message_get_recipients (message, (def)); \ +} + GET_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO) + GET_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC) + GET_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC) +#endif +/* different declarations for different types of set and get functions */ + typedef const gchar *(*GetFunc) (GMimeMessage * message); + typedef InternetAddressList *(*GetRcptFunc) (GMimeMessage * message, const gchar *type); + typedef GList *(*GetListFunc) (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *type, gboolean strong); + typedef void (*SetFunc) (GMimeMessage * message, const gchar *value); + typedef void (*SetListFunc) (GMimeMessage * message, const gchar *field, const gchar *value); + +/** different types of functions +* +* FUNC_CHARPTR +* - function with no arguments +* - get returns gchar* +* +* FUNC_IA (from Internet Address) +* - function with additional "field" argument from the fieldfunc table, +* - get returns Glist* +* +* FUNC_LIST +* - function with additional "field" argument (given arbitrary header field name) +* - get returns Glist* +**/ + enum { + FUNC_CHARPTR = 0, + FUNC_CHARFREEPTR, + FUNC_IA, + FUNC_LIST + }; + +/** +* fieldfunc struct: structure of MIME fields and corresponding get and set +* functions. +**/ + static struct { + gchar *name; + GetFunc func; + GetRcptFunc rcptfunc; + GetListFunc getlistfunc; + SetFunc setfunc; + SetListFunc setlfunc; + gint functype; + } fieldfunc[] = +{ + { + "From", local_message_get_sender, NULL, NULL, g_mime_message_set_sender, NULL, FUNC_CHARFREEPTR}, { + "Reply-To", local_message_get_reply_to, NULL, NULL, g_mime_message_set_reply_to, NULL, FUNC_CHARFREEPTR}, +#ifndef GMIME24 + { + "To", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, { + "Cc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, { + "Bcc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, { + "Date", (GetFunc) g_mime_message_get_date_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR}, +#else + { + "To", NULL, local_message_get_recipients_to, NULL, NULL, local_message_add_recipients_from_string_to, FUNC_IA}, { + "Cc", NULL, local_message_get_recipients_cc, NULL, NULL, local_message_add_recipients_from_string_cc, FUNC_IA}, { + "Bcc", NULL, local_message_get_recipients_bcc, NULL, NULL, local_message_add_recipients_from_string_bcc, FUNC_IA}, { + "Date", (GetFunc)g_mime_message_get_date_as_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR}, +#endif + { + "Subject", g_mime_message_get_subject, NULL, NULL, g_mime_message_set_subject, NULL, FUNC_CHARPTR}, { + "Message-Id", g_mime_message_get_message_id, NULL, NULL, g_mime_message_set_message_id, NULL, FUNC_CHARPTR}, +#ifndef GMIME24 + { + NULL, NULL, NULL, local_message_get_header, NULL, g_mime_message_add_header, FUNC_LIST} +#else + { + NULL, NULL, NULL, local_message_get_header, NULL, (SetListFunc)g_mime_object_append_header, FUNC_LIST} +#endif +}; + +/** +* message_set_header: set header of any type excluding special (Content- and MIME-Version:) +**/ +void +message_set_header (GMimeMessage * message, const gchar *field, const gchar *value) +{ + gint i; + + if (!g_ascii_strcasecmp (field, "MIME-Version:") || !g_ascii_strncasecmp (field, "Content-", 8)) { + return; + } + for (i = 0; i <= HEADER_UNKNOWN; ++i) { + if (!fieldfunc[i].name || !g_ascii_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) { + switch (fieldfunc[i].functype) { + case FUNC_CHARPTR: + (*(fieldfunc[i].setfunc)) (message, value); + break; + case FUNC_IA: + (*(fieldfunc[i].setlfunc)) (message, fieldfunc[i].name, value); + break; + case FUNC_LIST: + (*(fieldfunc[i].setlfunc)) (message, field, value); + break; + } + break; + } + } +} + + +/** +* message_get_header: returns the list of 'any header' values +* (except of unsupported yet Content- and MIME-Version special headers) +* +* You should free the GList list by yourself. +**/ +GList * +message_get_header (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong) +{ + gint i; + gchar *ret = NULL, *ia_string; + GList *gret = NULL; + InternetAddressList *ia_list = NULL, *ia; + + for (i = 0; i <= HEADER_UNKNOWN; ++i) { + if (!fieldfunc[i].name || !g_ascii_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) { + switch (fieldfunc[i].functype) { + case FUNC_CHARFREEPTR: + ret = (gchar *)(*(fieldfunc[i].func)) (message); + break; + case FUNC_CHARPTR: + ret = (gchar *)(*(fieldfunc[i].func)) (message); + break; + case FUNC_IA: + ia_list = (*(fieldfunc[i].rcptfunc)) (message, field); + ia = ia_list; +#ifndef GMIME24 + while (ia && ia->address) { + + ia_string = internet_address_to_string ((InternetAddress *) ia->address, FALSE); + if (pool != NULL) { + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_free, ia_string); + } + gret = g_list_prepend (gret, ia_string); + ia = ia->next; + } +#else + i = internet_address_list_length (ia); + while (--i >= 0) { + ia_string = internet_address_to_string (internet_address_list_get_address (ia, i), FALSE); + if (pool != NULL) { + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_free, ia_string); + } + gret = g_list_prepend (gret, ia_string); + } +#endif + break; + case FUNC_LIST: + gret = (*(fieldfunc[i].getlistfunc)) (pool, message, field, strong); + break; + } + break; + } + } + if (gret == NULL && ret != NULL) { + if (pool != NULL) { + gret = g_list_prepend (gret, rspamd_mempool_strdup (pool, ret)); + } + else { + gret = g_list_prepend (gret, g_strdup (ret)); + } + } + if (fieldfunc[i].functype == FUNC_CHARFREEPTR && ret) { + g_free (ret); + } + + return gret; +} + +GList* +message_get_raw_header (struct rspamd_task *task, const gchar *field, gboolean strong) +{ + GList *gret = NULL; + struct raw_header *rh; + + rh = g_hash_table_lookup (task->raw_headers, field); + + if (rh == NULL) { + return NULL; + } + + while (rh) { + if (strong) { + if (strcmp (rh->name, field) == 0) { + gret = g_list_prepend (gret, rh); + } + } + else { + if (g_ascii_strcasecmp (rh->name, field) == 0) { + gret = g_list_prepend (gret, rh); + } + } + rh = rh->next; + } + + if (gret != NULL) { + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, gret); + } + + return gret; +} diff --git a/src/libmime/message.h b/src/libmime/message.h new file mode 100644 index 000000000..5e27579d1 --- /dev/null +++ b/src/libmime/message.h @@ -0,0 +1,91 @@ +/** + * @file message.h + * Message processing functions and structures + */ + +#ifndef RSPAMD_MESSAGE_H +#define RSPAMD_MESSAGE_H + +#include "config.h" +#include "fuzzy.h" + +struct rspamd_task; +struct controller_session; + +struct mime_part { + GMimeContentType *type; + GByteArray *content; + GMimeObject *parent; + gchar *checksum; + const gchar *filename; +}; + +struct mime_text_part { + gboolean is_html; + gboolean is_raw; + gboolean is_balanced; + gboolean is_empty; + gboolean is_utf; + const gchar *real_charset; + GByteArray *orig; + GByteArray *content; + GNode *html_nodes; + GList *urls_offset; /**< list of offsets of urls */ + fuzzy_hash_t *fuzzy; + fuzzy_hash_t *double_fuzzy; + GMimeObject *parent; + GUnicodeScript script; + f_str_t *diff_str; +}; + +struct received_header { + gchar *from_hostname; + gchar *from_ip; + gchar *real_hostname; + gchar *real_ip; + gchar *by_hostname; + gint is_error; +}; + +struct raw_header { + gchar *name; + gchar *value; + gboolean tab_separated; + gboolean empty_separator; + gchar *separator; + struct raw_header *next; +}; + +/** + * Process message with all filters/statfiles, extract mime parts, urls and + * call metrics consolidation functions + * @param task worker_task object + * @return 0 if we have delayed filters to process and 1 if we have finished with processing + */ +gint process_message (struct rspamd_task *task); + +/* + * Set header with specified name and value + */ +void message_set_header (GMimeMessage *message, const gchar *field, const gchar *value); + +/* + * Get a list of header's values with specified header's name + * @param pool if not NULL this pool would be used for storing header's values + * @param message g_mime_message object + * @param field header's name + * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not + * @return A list of header's values or NULL. If list is not NULL it MUST be freed. If pool is NULL elements must be freed as well. + */ +GList* message_get_header (rspamd_mempool_t *pool, GMimeMessage *message, const gchar *field, gboolean strong); + +/* + * Get a list of header's values with specified header's name using raw headers + * @param task worker task structure + * @param field header's name + * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not + * @return A list of header's values or NULL. Unlike previous function it is NOT required to free list or values. I should rework one of these functions some time. + */ +GList* message_get_raw_header (struct rspamd_task *task, const gchar *field, gboolean strong); + +#endif diff --git a/src/libmime/protocol.c b/src/libmime/protocol.c new file mode 100644 index 000000000..8a5c3f0df --- /dev/null +++ b/src/libmime/protocol.c @@ -0,0 +1,821 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "util.h" +#include "cfg_file.h" +#include "settings.h" +#include "message.h" + +/* Max line size */ +#define OUTBUFSIZ BUFSIZ +/* + * Just check if the passed message is spam or not and reply as + * described below + */ +#define MSG_CMD_CHECK "check" +/* + * Check if message is spam or not, and return score plus list + * of symbols hit + */ +#define MSG_CMD_SYMBOLS "symbols" +/* + * Check if message is spam or not, and return score plus report + */ +#define MSG_CMD_REPORT "report" +/* + * Check if message is spam or not, and return score plus report + * if the message is spam + */ +#define MSG_CMD_REPORT_IFSPAM "report_ifspam" +/* + * Ignore this message -- client opened connection then changed + */ +#define MSG_CMD_SKIP "skip" +/* + * Return a confirmation that spamd is alive + */ +#define MSG_CMD_PING "ping" +/* + * Process this message as described above and return modified message + */ +#define MSG_CMD_PROCESS "process" + +/* + * Learn specified statfile using message + */ +#define MSG_CMD_LEARN "learn" + +/* + * spamassassin greeting: + */ +#define SPAMC_GREETING "SPAMC" +/* + * rspamd greeting: + */ +#define RSPAMC_GREETING "RSPAMC" +/* + * Headers + */ +#define CONTENT_LENGTH_HEADER "Content-length" +#define HELO_HEADER "Helo" +#define FROM_HEADER "From" +#define IP_ADDR_HEADER "IP" +#define NRCPT_HEADER "Recipient-Number" +#define RCPT_HEADER "Rcpt" +#define SUBJECT_HEADER "Subject" +#define STATFILE_HEADER "Statfile" +#define QUEUE_ID_HEADER "Queue-ID" +#define ERROR_HEADER "Error" +#define USER_HEADER "User" +#define PASS_HEADER "Pass" +#define JSON_HEADER "Json" +#define HOSTNAME_HEADER "Hostname" +#define DELIVER_TO_HEADER "Deliver-To" +#define NO_LOG_HEADER "Log" + +static GList *custom_commands = NULL; + + +/* + * Remove <> from the fixed string and copy it to the pool + */ +static gchar * +rspamd_protocol_escape_braces (GString *in) +{ + gint len = 0; + gchar *orig, *p; + + orig = in->str; + while ((g_ascii_isspace (*orig) || *orig == '<') && orig - in->str < (gint)in->len) { + orig ++; + } + + g_string_erase (in, 0, orig - in->str); + + p = orig; + while ((!g_ascii_isspace (*p) && *p != '>') && p - in->str < (gint)in->len) { + p ++; + len ++; + } + + g_string_truncate (in, len); + + return in->str; +} + +static gboolean +rspamd_protocol_handle_url (struct rspamd_task *task, struct rspamd_http_message *msg) +{ + GList *cur; + struct custom_command *cmd; + const gchar *p; + + if (msg->url == NULL || msg->url->len == 0) { + task->last_error = "command is absent"; + task->error_code = 400; + return FALSE; + } + + if (msg->url->str[0] == '/') { + p = &msg->url->str[1]; + } + else { + p = msg->url->str; + } + + switch (*p) { + case 'c': + case 'C': + /* check */ + if (g_ascii_strcasecmp (p + 1, MSG_CMD_CHECK + 1) == 0) { + task->cmd = CMD_CHECK; + } + else { + goto err; + } + break; + case 's': + case 'S': + /* symbols, skip */ + if (g_ascii_strcasecmp (p + 1, MSG_CMD_SYMBOLS + 1) == 0) { + task->cmd = CMD_SYMBOLS; + } + else if (g_ascii_strcasecmp (p + 1, MSG_CMD_SKIP + 1) == 0) { + task->cmd = CMD_SKIP; + } + else { + goto err; + } + break; + case 'p': + case 'P': + /* ping, process */ + if (g_ascii_strcasecmp (p + 1, MSG_CMD_PING + 1) == 0) { + task->cmd = CMD_PING; + } + else if (g_ascii_strcasecmp (p + 1, MSG_CMD_PROCESS + 1) == 0) { + task->cmd = CMD_PROCESS; + } + else { + goto err; + } + break; + case 'r': + case 'R': + /* report, report_ifspam */ + if (g_ascii_strcasecmp (p + 1, MSG_CMD_REPORT + 1) == 0) { + task->cmd = CMD_REPORT; + } + else if (g_ascii_strcasecmp (p + 1, MSG_CMD_REPORT_IFSPAM + 1) == 0) { + task->cmd = CMD_REPORT_IFSPAM; + } + else { + goto err; + } + break; + default: + cur = custom_commands; + while (cur) { + cmd = cur->data; + if (g_ascii_strcasecmp (p, cmd->name) == 0) { + task->cmd = CMD_OTHER; + task->custom_cmd = cmd; + break; + } + cur = g_list_next (cur); + } + + if (cur == NULL) { + goto err; + } + break; + } + + return TRUE; + +err: + debug_task ("bad command: %s", p); + task->last_error = "invalid command"; + task->error_code = 400; + return FALSE; +} + +static gboolean +rspamd_protocol_handle_headers (struct rspamd_task *task, struct rspamd_http_message *msg) +{ + gchar *headern, *err, *tmp; + gboolean res = TRUE; + struct rspamd_http_header *h; + + LL_FOREACH (msg->headers, h) { + headern = h->name->str; + + switch (headern[0]) { + case 'd': + case 'D': + if (g_ascii_strcasecmp (headern, DELIVER_TO_HEADER) == 0) { + task->deliver_to = rspamd_protocol_escape_braces (h->value); + debug_task ("read deliver-to header, value: %s", task->deliver_to); + } + else { + debug_task ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'h': + case 'H': + if (g_ascii_strcasecmp (headern, HELO_HEADER) == 0) { + task->helo = h->value->str; + debug_task ("read helo header, value: %s", task->helo); + } + else if (g_ascii_strcasecmp (headern, HOSTNAME_HEADER) == 0) { + task->hostname = h->value->str; + debug_task ("read hostname header, value: %s", task->hostname); + } + else { + debug_task ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'f': + case 'F': + if (g_ascii_strcasecmp (headern, FROM_HEADER) == 0) { + task->from = rspamd_protocol_escape_braces (h->value); + debug_task ("read from header, value: %s", task->from); + } + else { + debug_task ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'j': + case 'J': + if (g_ascii_strcasecmp (headern, JSON_HEADER) == 0) { + task->is_json = parse_flag (h->value->str); + } + else { + debug_task ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'q': + case 'Q': + if (g_ascii_strcasecmp (headern, QUEUE_ID_HEADER) == 0) { + task->queue_id = h->value->str; + debug_task ("read queue_id header, value: %s", task->queue_id); + } + else { + debug_task ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'r': + case 'R': + if (g_ascii_strcasecmp (headern, RCPT_HEADER) == 0) { + tmp = rspamd_protocol_escape_braces (h->value); + task->rcpt = g_list_prepend (task->rcpt, tmp); + debug_task ("read rcpt header, value: %s", tmp); + } + else if (g_ascii_strcasecmp (headern, NRCPT_HEADER) == 0) { + task->nrcpt = strtoul (h->value->str, &err, 10); + debug_task ("read rcpt header, value: %d", (gint)task->nrcpt); + } + else { + msg_info ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'i': + case 'I': + if (g_ascii_strcasecmp (headern, IP_ADDR_HEADER) == 0) { + tmp = h->value->str; + if (!rspamd_parse_inet_address (&task->from_addr, tmp)) { + msg_err ("bad ip header: '%s'", tmp); + return FALSE; + } + debug_task ("read IP header, value: %s", tmp); + } + else { + debug_task ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'p': + case 'P': + if (g_ascii_strcasecmp (headern, PASS_HEADER) == 0) { + if (h->value->len == sizeof ("all") - 1 && + g_ascii_strcasecmp (h->value->str, "all") == 0) { + task->pass_all_filters = TRUE; + debug_task ("pass all filters"); + } + } + else { + res = FALSE; + } + break; + case 's': + case 'S': + if (g_ascii_strcasecmp (headern, SUBJECT_HEADER) == 0) { + task->subject = h->value->str; + } + else { + res = FALSE; + } + break; + case 'u': + case 'U': + if (g_ascii_strcasecmp (headern, USER_HEADER) == 0) { + task->user = h->value->str; + } + else { + res = FALSE; + } + break; + case 'l': + case 'L': + if (g_ascii_strcasecmp (headern, NO_LOG_HEADER) == 0) { + if (g_ascii_strcasecmp (h->value->str, "no") == 0) { + task->no_log = TRUE; + } + } + else { + res = FALSE; + } + break; + default: + debug_task ("wrong header: %s", headern); + res = FALSE; + break; + } + } + + if (!res && task->cfg->strict_protocol_headers) { + msg_err ("deny processing of a request with incorrect or unknown headers"); + task->last_error = "invalid header"; + task->error_code = 400; + return FALSE; + } + + return TRUE; +} + +gboolean +rspamd_protocol_handle_request (struct rspamd_task *task, + struct rspamd_http_message *msg) +{ + gboolean ret = TRUE; + + if (msg->method == HTTP_SYMBOLS) { + task->cmd = CMD_SYMBOLS; + task->is_json = FALSE; + } + else if (msg->method == HTTP_CHECK) { + task->cmd = CMD_CHECK; + task->is_json = FALSE; + } + else { + task->is_json = TRUE; + ret = rspamd_protocol_handle_url (task, msg); + } + + if (ret) { + ret = rspamd_protocol_handle_headers (task, msg); + } + + return ret; +} + +static void +write_hashes_to_log (struct rspamd_task *task, GString *logbuf) +{ + GList *cur; + struct mime_text_part *text_part; + + cur = task->text_parts; + + while (cur) { + text_part = cur->data; + if (text_part->fuzzy) { + if (cur->next != NULL) { + rspamd_printf_gstring (logbuf, " part: %Xd,", text_part->fuzzy->h); + } + else { + rspamd_printf_gstring (logbuf, " part: %Xd", text_part->fuzzy->h); + } + } + cur = g_list_next (cur); + } +} + + +/* Structure for writing tree data */ +struct tree_cb_data { + ucl_object_t *top; + struct rspamd_task *task; +}; + +/* + * Callback for writing urls + */ +static gboolean +urls_protocol_cb (gpointer key, gpointer value, gpointer ud) +{ + struct tree_cb_data *cb = ud; + struct uri *url = value; + ucl_object_t *obj; + + obj = ucl_object_fromlstring (url->host, url->hostlen); + DL_APPEND (cb->top->value.av, obj); + + if (cb->task->cfg->log_urls) { + msg_info ("<%s> URL: %s - %s: %s", cb->task->message_id, cb->task->user ? + cb->task->user : (cb->task->from ? cb->task->from : "unknown"), + rspamd_inet_address_to_string (&cb->task->from_addr), + struri (url)); + } + + return FALSE; +} + +static ucl_object_t * +rspamd_urls_tree_ucl (GTree *input, struct rspamd_task *task) +{ + struct tree_cb_data cb; + ucl_object_t *obj; + + obj = ucl_object_typed_new (UCL_ARRAY); + cb.top = obj; + cb.task = task; + + g_tree_foreach (input, urls_protocol_cb, &cb); + + return obj; +} + +static gboolean +emails_protocol_cb (gpointer key, gpointer value, gpointer ud) +{ + struct tree_cb_data *cb = ud; + struct uri *url = value; + ucl_object_t *obj; + + obj = ucl_object_fromlstring (url->user, url->userlen + url->hostlen + 1); + DL_APPEND (cb->top->value.av, obj); + + return FALSE; +} + +static ucl_object_t * +rspamd_emails_tree_ucl (GTree *input, struct rspamd_task *task) +{ + struct tree_cb_data cb; + ucl_object_t *obj; + + obj = ucl_object_typed_new (UCL_ARRAY); + cb.top = obj; + cb.task = task; + + g_tree_foreach (input, emails_protocol_cb, &cb); + + return obj; +} + + +/* Write new subject */ +static const gchar * +make_rewritten_subject (struct metric *metric, struct rspamd_task *task) +{ + static gchar subj_buf[1024]; + gchar *p = subj_buf, *end, *c, *res; + const gchar *s; + + end = p + sizeof(subj_buf); + c = metric->subject; + s = g_mime_message_get_subject (task->message); + + while (p < end) { + if (*c == '\0') { + *p = '\0'; + break; + } + else if (*c == '%' && *(c + 1) == 's') { + p += rspamd_strlcpy (p, (s != NULL) ? s : "", end - p); + c += 2; + } + else { + *p = *c ++; + } + p ++; + } + res = g_mime_utils_header_encode_text (subj_buf); + + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_free, res); + + return res; +} + +static ucl_object_t * +rspamd_str_list_ucl (GList *str_list) +{ + ucl_object_t *top = NULL, *obj; + GList *cur; + + top = ucl_object_typed_new (UCL_ARRAY); + cur = str_list; + while (cur) { + obj = ucl_object_fromstring (cur->data); + DL_APPEND (top->value.av, obj); + cur = g_list_next (cur); + } + + return top; +} + +static ucl_object_t * +rspamd_metric_symbol_ucl (struct rspamd_task *task, struct metric *m, + struct symbol *sym, GString *logbuf) +{ + ucl_object_t *obj = NULL; + const gchar *description = NULL; + + rspamd_printf_gstring (logbuf, "%s,", sym->name); + description = g_hash_table_lookup (m->descriptions, sym->name); + + obj = ucl_object_typed_new (UCL_OBJECT); + ucl_object_insert_key (obj, ucl_object_fromstring (sym->name), "name", 0, false); + ucl_object_insert_key (obj, ucl_object_fromdouble (sym->score), "score", 0, false); + if (description) { + ucl_object_insert_key (obj, ucl_object_fromstring (description), "description", 0, false); + } + if (sym->options != NULL) { + ucl_object_insert_key (obj, rspamd_str_list_ucl (sym->options), "options", 0, false); + } + + return obj; +} + +static ucl_object_t * +rspamd_metric_result_ucl (struct rspamd_task *task, struct metric_result *mres, GString *logbuf) +{ + GHashTableIter hiter; + struct symbol *sym; + struct metric *m; + gboolean is_spam; + enum rspamd_metric_action action = METRIC_ACTION_NOACTION; + ucl_object_t *obj = NULL, *sobj; + gdouble required_score; + gpointer h, v; + const gchar *subject; + gchar action_char; + + m = mres->metric; + + /* XXX: handle settings */ + required_score = m->actions[METRIC_ACTION_REJECT].score; + is_spam = (mres->score >= required_score); + action = check_metric_action (mres->score, required_score, m); + if (task->is_skipped) { + action_char = 'S'; + } + else if (is_spam) { + action_char = 'T'; + } + else { + action_char = 'F'; + } + rspamd_printf_gstring (logbuf, "(%s: %c (%s): [%.2f/%.2f] [", + m->name, action_char, + str_action_metric (action), + mres->score, required_score); + + obj = ucl_object_typed_new (UCL_OBJECT); + ucl_object_insert_key (obj, ucl_object_frombool (is_spam), + "is_spam", 0, false); + ucl_object_insert_key (obj, ucl_object_frombool (task->is_skipped), + "is_skipped", 0, false); + ucl_object_insert_key (obj, ucl_object_fromdouble (mres->score), + "score", 0, false); + ucl_object_insert_key (obj, ucl_object_fromdouble (required_score), + "required_score", 0, false); + ucl_object_insert_key (obj, ucl_object_fromstring (str_action_metric (action)), + "action", 0, false); + + if (action == METRIC_ACTION_REWRITE_SUBJECT) { + subject = make_rewritten_subject (m, task); + ucl_object_insert_key (obj, ucl_object_fromstring (subject), + "subject", 0, false); + } + /* Now handle symbols */ + g_hash_table_iter_init (&hiter, mres->symbols); + while (g_hash_table_iter_next (&hiter, &h, &v)) { + sym = (struct symbol *)v; + sobj = rspamd_metric_symbol_ucl (task, m, sym, logbuf); + ucl_object_insert_key (obj, sobj, h, 0, false); + } + + /* Cut the trailing comma if needed */ + if (logbuf->str[logbuf->len - 1] == ',') { + logbuf->len --; + } + +#ifdef HAVE_CLOCK_GETTIME + rspamd_printf_gstring (logbuf, "]), len: %z, time: %s, dns req: %d,", + task->msg->len, calculate_check_time (&task->tv, &task->ts, + task->cfg->clock_res, &task->scan_milliseconds), task->dns_requests); +#else + rspamd_printf_gstring (logbuf, "]), len: %z, time: %s, dns req: %d,", + task->msg->len, + calculate_check_time (&task->tv, task->cfg->clock_res, &task->scan_milliseconds), + task->dns_requests); +#endif + + return obj; +} + +static void +rspamd_ucl_tolegacy_output (struct rspamd_task *task, ucl_object_t *top, GString *out) +{ + const ucl_object_t *metric, *score, + *required_score, *is_spam, *elt, *symbols; + ucl_object_iter_t iter = NULL; + + metric = ucl_object_find_key (top, DEFAULT_METRIC); + if (metric != NULL) { + score = ucl_object_find_key (metric, "score"); + required_score = ucl_object_find_key (metric, "required_score"); + is_spam = ucl_object_find_key (metric, "is_spam"); + g_string_append_printf (out, "Metric: default; %s; %.2f / %.2f / 0.0\r\n", + ucl_object_toboolean (is_spam) ? "True" : "False", + ucl_object_todouble (score), + ucl_object_todouble (required_score)); + elt = ucl_object_find_key (metric, "action"); + if (elt != NULL) { + g_string_append_printf (out, "Action: %s\r\n", + ucl_object_tostring (elt)); + } + + symbols = ucl_object_find_key (metric, "symbols"); + while ((elt = ucl_iterate_object (symbols, &iter, true)) != NULL) { + const ucl_object_t *sym_score; + sym_score = ucl_object_find_key (elt, "score"); + g_string_append_printf (out, "Symbol: %s; %.2f\r\n", + ucl_object_key (elt), + ucl_object_todouble (sym_score)); + } + + elt = ucl_object_find_key (metric, "subject"); + if (elt != NULL) { + g_string_append_printf (out, "Subject: %s\r\n", + ucl_object_tostring (elt)); + } + } + g_string_append_printf (out, "Message-ID: %s\r\n", task->message_id); +} + +static void +write_check_reply (struct rspamd_http_message *msg, struct rspamd_task *task) +{ + GString *logbuf; + struct metric_result *metric_res; + GHashTableIter hiter; + gpointer h, v; + ucl_object_t *top = NULL, *obj; + + /* Output the first line - check status */ + logbuf = g_string_sized_new (BUFSIZ); + rspamd_printf_gstring (logbuf, "id: <%s>, qid: <%s>, ", task->message_id, task->queue_id); + + if (task->user) { + rspamd_printf_gstring (logbuf, "user: %s, ", task->user); + } + + if (!task->no_log) { + rspamd_roll_history_update (task->worker->srv->history, task); + } + g_hash_table_iter_init (&hiter, task->results); + + top = ucl_object_typed_new (UCL_OBJECT); + /* Convert results to an ucl object */ + while (g_hash_table_iter_next (&hiter, &h, &v)) { + metric_res = (struct metric_result *)v; + obj = rspamd_metric_result_ucl (task, metric_res, logbuf); + ucl_object_insert_key (top, obj, h, 0, false); + } + + if (task->messages != NULL) { + ucl_object_insert_key (top, rspamd_str_list_ucl (task->messages), "messages", 0, false); + } + if (g_tree_nnodes (task->urls) > 0) { + ucl_object_insert_key (top, rspamd_urls_tree_ucl (task->urls, task), "urls", 0, false); + } + if (g_tree_nnodes (task->emails) > 0) { + ucl_object_insert_key (top, rspamd_emails_tree_ucl (task->emails, task), + "emails", 0, false); + } + + ucl_object_insert_key (top, ucl_object_fromstring (task->message_id), + "message-id", 0, false); + + write_hashes_to_log (task, logbuf); + if (!task->no_log) { + msg_info ("%v", logbuf); + } + g_string_free (logbuf, TRUE); + + msg->body = g_string_sized_new (BUFSIZ); + + if (msg->method < HTTP_SYMBOLS) { + rspamd_ucl_emit_gstring (top, UCL_EMIT_JSON_COMPACT, msg->body); + } + else { + rspamd_ucl_tolegacy_output (task, top, msg->body); + } + ucl_object_unref (top); + + /* Increase counters */ + task->worker->srv->stat->messages_scanned++; +} + +void +rspamd_protocol_write_reply (struct rspamd_task *task) +{ + struct rspamd_http_message *msg; + const gchar *ctype = "application/json"; + ucl_object_t *top = NULL; + + msg = rspamd_http_new_message (HTTP_RESPONSE); + if (!task->is_json) { + /* Turn compatibility on */ + msg->method = HTTP_SYMBOLS; + } + msg->date = time (NULL); + + task->state = CLOSING_CONNECTION; + + top = ucl_object_typed_new (UCL_OBJECT); + debug_task ("writing reply to client"); + if (task->error_code != 0) { + msg->code = task->error_code; + ucl_object_insert_key (top, ucl_object_fromstring (task->last_error), "error", 0, false); + msg->body = g_string_sized_new (256); + rspamd_ucl_emit_gstring (top, UCL_EMIT_JSON_COMPACT, msg->body); + ucl_object_unref (top); + } + else { + switch (task->cmd) { + case CMD_REPORT_IFSPAM: + case CMD_REPORT: + case CMD_CHECK: + case CMD_SYMBOLS: + case CMD_PROCESS: + case CMD_SKIP: + write_check_reply (msg, task); + break; + case CMD_PING: + msg->body = g_string_new ("pong"); + break; + case CMD_OTHER: + msg_err ("BROKEN"); + break; + } + } + + rspamd_http_connection_reset (task->http_conn); + rspamd_http_connection_write_message (task->http_conn, msg, NULL, + ctype, task, task->sock, &task->tv, task->ev_base); +} + +void +register_protocol_command (const gchar *name, protocol_reply_func func) +{ + struct custom_command *cmd; + + cmd = g_malloc (sizeof (struct custom_command)); + cmd->name = name; + cmd->func = func; + + custom_commands = g_list_prepend (custom_commands, cmd); +} diff --git a/src/libmime/protocol.h b/src/libmime/protocol.h new file mode 100644 index 000000000..8d2efe118 --- /dev/null +++ b/src/libmime/protocol.h @@ -0,0 +1,46 @@ +/** + * @file protocol.h + * Rspamd protocol definition + */ + +#ifndef RSPAMD_PROTOCOL_H +#define RSPAMD_PROTOCOL_H + +#include "config.h" +#include "filter.h" +#include "http.h" +#include "task.h" + +#define RSPAMD_BASE_ERROR 500 +#define RSPAMD_FILTER_ERROR RSPAMD_BASE_ERROR + 1 +#define RSPAMD_NETWORK_ERROR RSPAMD_BASE_ERROR + 2 +#define RSPAMD_PROTOCOL_ERROR RSPAMD_BASE_ERROR + 3 +#define RSPAMD_LENGTH_ERROR RSPAMD_BASE_ERROR + 4 +#define RSPAMD_STATFILE_ERROR RSPAMD_BASE_ERROR + 5 + +struct metric; + +/** + * Process HTTP request to the task structure + * @param task + * @param msg + * @return + */ +gboolean rspamd_protocol_handle_request (struct rspamd_task *task, struct rspamd_http_message *msg); + +/** + * Write reply for specified task command + * @param task task object + * @return 0 if we wrote reply and -1 if there was some error + */ +void rspamd_protocol_write_reply (struct rspamd_task *task); + + +/** + * Register custom fucntion to extend protocol + * @param name symbolic name of custom function + * @param func callback function for writing reply + */ +void register_protocol_command (const gchar *name, protocol_reply_func func); + +#endif diff --git a/src/libmime/smtp_proto.c b/src/libmime/smtp_proto.c new file mode 100644 index 000000000..3af1c3910 --- /dev/null +++ b/src/libmime/smtp_proto.c @@ -0,0 +1,701 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "cfg_file.h" +#include "util.h" +#include "smtp.h" +#include "smtp_proto.h" +#include "smtp_utils.h" + +gchar * +make_smtp_error (rspamd_mempool_t *pool, gint error_code, const gchar *format, ...) +{ + va_list vp; + gchar *result = NULL, *p; + size_t len; + + va_start (vp, format); + len = g_printf_string_upper_bound (format, vp); + va_end (vp); + va_start (vp, format); + len += sizeof ("65535 ") + sizeof (CRLF) - 1; + result = rspamd_mempool_alloc (pool, len); + p = result + rspamd_snprintf (result, len, "%d ", error_code); + p = rspamd_vsnprintf (p, len - (p - result), format, vp); + *p++ = CR; *p++ = LF; *p = '\0'; + va_end (vp); + + return result; +} + + +gboolean +parse_smtp_command (struct smtp_session *session, f_str_t *line, struct smtp_command **cmd) +{ + enum { + SMTP_PARSE_START = 0, + SMTP_PARSE_SPACES, + SMTP_PARSE_ARGUMENT, + SMTP_PARSE_DONE + } state; + gchar *p, *c, ch, cmd_buf[4]; + guint i; + f_str_t *arg = NULL; + struct smtp_command *pcmd; + + if (line->len == 0) { + return FALSE; + } + + state = SMTP_PARSE_START; + c = line->begin; + p = c; + *cmd = rspamd_mempool_alloc0 (session->pool, sizeof (struct smtp_command)); + pcmd = *cmd; + + for (i = 0; i < line->len; i ++, p ++) { + ch = *p; + switch (state) { + case SMTP_PARSE_START: + if (ch == ' ' || ch == ':' || ch == CR || ch == LF || i == line->len - 1) { + if (i == line->len - 1) { + p ++; + } + if (p - c == 4) { + cmd_buf[0] = g_ascii_toupper (c[0]); + cmd_buf[1] = g_ascii_toupper (c[1]); + cmd_buf[2] = g_ascii_toupper (c[2]); + cmd_buf[3] = g_ascii_toupper (c[3]); + + if (memcmp (cmd_buf, "HELO", 4) == 0) { + pcmd->command = SMTP_COMMAND_HELO; + } + else if (memcmp (cmd_buf, "EHLO", 4) == 0) { + pcmd->command = SMTP_COMMAND_EHLO; + } + else if (memcmp (cmd_buf, "MAIL", 4) == 0) { + pcmd->command = SMTP_COMMAND_MAIL; + } + else if (memcmp (cmd_buf, "RCPT", 4) == 0) { + pcmd->command = SMTP_COMMAND_RCPT; + } + else if (memcmp (cmd_buf, "DATA", 4) == 0) { + pcmd->command = SMTP_COMMAND_DATA; + } + else if (memcmp (cmd_buf, "QUIT", 4) == 0) { + pcmd->command = SMTP_COMMAND_QUIT; + } + else if (memcmp (cmd_buf, "NOOP", 4) == 0) { + pcmd->command = SMTP_COMMAND_NOOP; + } + else if (memcmp (cmd_buf, "EXPN", 4) == 0) { + pcmd->command = SMTP_COMMAND_EXPN; + } + else if (memcmp (cmd_buf, "RSET", 4) == 0) { + pcmd->command = SMTP_COMMAND_RSET; + } + else if (memcmp (cmd_buf, "HELP", 4) == 0) { + pcmd->command = SMTP_COMMAND_HELP; + } + else if (memcmp (cmd_buf, "VRFY", 4) == 0) { + pcmd->command = SMTP_COMMAND_VRFY; + } + else { + msg_info ("invalid command: %*s", 4, cmd_buf); + return FALSE; + } + } + else { + /* Invalid command */ + msg_info ("invalid command: %*s", 4, c); + return FALSE; + } + /* Now check what we have */ + if (ch == ' ' || ch == ':') { + state = SMTP_PARSE_SPACES; + } + else if (ch == CR) { + state = SMTP_PARSE_DONE; + } + else if (ch == LF) { + return TRUE; + } + } + else if ((ch < 'A' || ch > 'Z') && (ch < 'a' || ch > 'z')) { + msg_info ("invalid letter code in SMTP command: %d", (gint)ch); + return FALSE; + } + break; + case SMTP_PARSE_SPACES: + if (ch == CR) { + state = SMTP_PARSE_DONE; + } + else if (ch == LF) { + goto end; + } + else if (ch != ' ' && ch != ':') { + state = SMTP_PARSE_ARGUMENT; + arg = rspamd_mempool_alloc (session->pool, sizeof (f_str_t)); + c = p; + } + break; + case SMTP_PARSE_ARGUMENT: + if (ch == ' ' || ch == ':' || ch == CR || ch == LF || i == line->len - 1) { + if (i == line->len - 1 && (ch != ' ' && ch != CR && ch != LF)) { + p ++; + } + arg->len = p - c; + arg->begin = rspamd_mempool_alloc (session->pool, arg->len); + memcpy (arg->begin, c, arg->len); + pcmd->args = g_list_prepend (pcmd->args, arg); + if (ch == ' ' || ch == ':') { + state = SMTP_PARSE_SPACES; + } + else if (ch == CR) { + state = SMTP_PARSE_DONE; + } + else { + goto end; + } + } + break; + case SMTP_PARSE_DONE: + if (ch == LF) { + goto end; + } + msg_info ("CR without LF in SMTP command"); + return FALSE; + } + } + +end: + if (pcmd->args) { + pcmd->args = g_list_reverse (pcmd->args); + rspamd_mempool_add_destructor (session->pool, (rspamd_mempool_destruct_t)g_list_free, pcmd->args); + } + return TRUE; +} + +static gboolean +check_smtp_path (f_str_t *path) +{ + guint i; + gchar *p; + + p = path->begin; + if (*p != '<' || path->len < 2) { + return FALSE; + } + for (i = 0; i < path->len; i++, p ++) { + if (*p == '>' && i != path->len - 1) { + return FALSE; + } + } + + return *(p - 1) == '>'; +} + +gboolean +parse_smtp_helo (struct smtp_session *session, struct smtp_command *cmd) +{ + f_str_t *arg; + + if (cmd->args == NULL) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + arg = cmd->args->data; + session->helo = rspamd_mempool_alloc (session->pool, arg->len + 1); + rspamd_strlcpy (session->helo, arg->begin, arg->len + 1); + /* Now try to write reply */ + if (cmd->command == SMTP_COMMAND_HELO) { + /* No ESMTP */ + session->error = SMTP_ERROR_OK; + session->esmtp = FALSE; + return TRUE; + } + else { + /* Try to write all capabilities */ + session->esmtp = TRUE; + if (session->ctx->smtp_capabilities == NULL) { + session->error = SMTP_ERROR_OK; + return TRUE; + } + else { + session->error = session->ctx->smtp_capabilities; + return TRUE; + } + } + + return FALSE; +} + +gboolean +parse_smtp_from (struct smtp_session *session, struct smtp_command *cmd) +{ + f_str_t *arg; + GList *cur = cmd->args; + + if (cmd->args == NULL) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + arg = cur->data; + /* First argument MUST be FROM */ + if (arg->len != 4 || ( + g_ascii_toupper (arg->begin[0]) != 'F' || + g_ascii_toupper (arg->begin[1]) != 'R' || + g_ascii_toupper (arg->begin[2]) != 'O' || + g_ascii_toupper (arg->begin[3]) != 'M')) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + /* Next one is from address */ + cur = g_list_next (cur); + if (cur == NULL) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + arg = cur->data; + if (check_smtp_path (arg)) { + session->from = cur; + } + else { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + + return TRUE; +} + +gboolean +parse_smtp_rcpt (struct smtp_session *session, struct smtp_command *cmd) +{ + f_str_t *arg; + GList *cur = cmd->args; + + if (cmd->args == NULL) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + arg = cur->data; + /* First argument MUST be FROM */ + if (arg->len != 2 || ( + g_ascii_toupper (arg->begin[0]) != 'T' || + g_ascii_toupper (arg->begin[1]) != 'O')) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + /* Next one is from address */ + cur = g_list_next (cur); + if (cur == NULL) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + arg = cur->data; + if (check_smtp_path (arg)) { + session->rcpt = g_list_prepend (session->rcpt, cur); + } + else { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + + return TRUE; + +} + +/* Return -1 if there are some error, 1 if all is ok and 0 in case of incomplete reply */ +static gint +check_smtp_ustream_reply (f_str_t *in, gchar success_code) +{ + gchar *p; + + /* Check for 250 at the begin of line */ + if (in->len >= sizeof ("220 ") - 1) { + p = in->begin; + if (p[0] == success_code) { + /* Last reply line */ + if (p[3] == ' ') { + return 1; + } + else { + return 0; + } + } + else { + return -1; + } + } + + return -1; +} + +size_t +smtp_upstream_write_list (GList *args, gchar *buf, size_t buflen) +{ + GList *cur = args; + size_t r = 0; + f_str_t *arg; + + while (cur && r < buflen - 3) { + arg = cur->data; + r += rspamd_snprintf (buf + r, buflen - r, " %V", arg); + cur = g_list_next (cur); + } + + buf[r++] = CR; + buf[r++] = LF; + buf[r] = '\0'; + + return r; +} + +gboolean +smtp_upstream_write_socket (void *arg) +{ + struct smtp_session *session = arg; + + if (session->upstream_state == SMTP_STATE_IN_SENDFILE) { + session->upstream_state = SMTP_STATE_AFTER_DATA; + return rspamd_dispatcher_write (session->upstream_dispatcher, CRLF DATA_END_TRAILER, sizeof (CRLF DATA_END_TRAILER) - 1, FALSE, TRUE); + } + + return TRUE; +} + +gboolean +smtp_upstream_read_socket (f_str_t * in, void *arg) +{ + struct smtp_session *session = arg; + gchar outbuf[BUFSIZ]; + gint r; + + msg_debug ("in: %V, state: %d", in, session->upstream_state); + switch (session->upstream_state) { + case SMTP_STATE_GREETING: + r = check_smtp_ustream_reply (in, '2'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* XXX: assume upstream errors as critical errors */ + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + else if (r == 1) { + if (session->ctx->use_xclient) { + r = rspamd_snprintf (outbuf, sizeof (outbuf), "XCLIENT NAME=%s ADDR=%s" CRLF, + session->resolved ? session->hostname : "[UNDEFINED]", + inet_ntoa (session->client_addr)); + session->upstream_state = SMTP_STATE_HELO; + return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); + } + else { + session->upstream_state = SMTP_STATE_FROM; + if (session->helo) { + r = rspamd_snprintf (outbuf, sizeof (outbuf), "%s %s" CRLF, + session->esmtp ? "EHLO" : "HELO", + session->helo); + } + else { + return smtp_upstream_read_socket (in, arg); + } + return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); + } + } + break; + case SMTP_STATE_HELO: + r = check_smtp_ustream_reply (in, '2'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* XXX: assume upstream errors as critical errors */ + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + else if (r == 1) { + session->upstream_state = SMTP_STATE_FROM; + if (session->helo) { + r = rspamd_snprintf (outbuf, sizeof (outbuf), "%s %s" CRLF, + session->esmtp ? "EHLO" : "HELO", + session->helo); + } + else { + return smtp_upstream_read_socket (in, arg); + } + return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); + } + break; + case SMTP_STATE_FROM: + r = check_smtp_ustream_reply (in, '2'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* XXX: assume upstream errors as critical errors */ + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + else if (r == 1) { + r = rspamd_snprintf (outbuf, sizeof (outbuf), "MAIL FROM: "); + r += smtp_upstream_write_list (session->from, outbuf + r, sizeof (outbuf) - r); + session->upstream_state = SMTP_STATE_RCPT; + return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); + } + break; + case SMTP_STATE_RCPT: + r = check_smtp_ustream_reply (in, '2'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* XXX: assume upstream errors as critical errors */ + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + else if (r == 1) { + r = rspamd_snprintf (outbuf, sizeof (outbuf), "RCPT TO: "); + session->cur_rcpt = g_list_first (session->rcpt); + r += smtp_upstream_write_list (session->cur_rcpt->data, outbuf + r, sizeof (outbuf) - r); + session->cur_rcpt = g_list_next (session->cur_rcpt); + session->upstream_state = SMTP_STATE_BEFORE_DATA; + return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); + } + break; + case SMTP_STATE_BEFORE_DATA: + r = check_smtp_ustream_reply (in, '2'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + if (session->cur_rcpt) { + session->rcpt = g_list_delete_link (session->rcpt, session->cur_rcpt); + } + else { + session->rcpt = g_list_delete_link (session->rcpt, session->rcpt); + } + session->errors ++; + session->state = SMTP_STATE_RCPT; + return TRUE; + } + else if (r == 1) { + if (session->cur_rcpt != NULL) { + r = rspamd_snprintf (outbuf, sizeof (outbuf), "RCPT TO: "); + r += smtp_upstream_write_list (session->cur_rcpt, outbuf + r, sizeof (outbuf) - r); + session->cur_rcpt = g_list_next (session->cur_rcpt); + if (! rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE)) { + goto err; + } + } + else { + session->upstream_state = SMTP_STATE_DATA; + rspamd_dispatcher_pause (session->upstream_dispatcher); + } + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* Write to client */ + if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + if (session->state == SMTP_STATE_WAIT_UPSTREAM) { + rspamd_dispatcher_restore (session->dispatcher); + session->state = SMTP_STATE_RCPT; + } + } + break; + case SMTP_STATE_DATA: + r = check_smtp_ustream_reply (in, '3'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* XXX: assume upstream errors as critical errors */ + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + else if (r == 1) { + if (! make_smtp_tempfile (session)) { + session->error = SMTP_ERROR_FILE; + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + session->state = SMTP_STATE_AFTER_DATA; + session->error = SMTP_ERROR_DATA_OK; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + rspamd_dispatcher_pause (session->upstream_dispatcher); + rspamd_set_dispatcher_policy (session->dispatcher, BUFFER_LINE, 0); + session->dispatcher->strip_eol = FALSE; + return TRUE; + } + break; + case SMTP_STATE_AFTER_DATA: + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + session->state = SMTP_STATE_DATA; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->upstream_dispatcher, "QUIT" CRLF, sizeof ("QUIT" CRLF) - 1, FALSE, TRUE)) { + goto err; + } + session->upstream_state = SMTP_STATE_END; + return TRUE; + break; + case SMTP_STATE_END: + r = check_smtp_ustream_reply (in, '5'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* XXX: assume upstream errors as critical errors */ + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (!rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + else { + remove_normal_event (session->s, (event_finalizer_t)smtp_upstream_finalize_connection, session); + } + return FALSE; + break; + default: + msg_err ("got upstream reply at unexpected state: %d, reply: %V", session->upstream_state, in); + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + + return TRUE; +err: + msg_warn ("write error occured"); + return FALSE; +} + +void +smtp_upstream_err_socket (GError *err, void *arg) +{ + struct smtp_session *session = arg; + + msg_info ("abnormally closing connection with upstream %s, error: %s", session->upstream->name, err->message); + session->error = SMTP_ERROR_UPSTREAM; + session->state = SMTP_STATE_CRITICAL_ERROR; + /* XXX: assume upstream errors as critical errors */ + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + return; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + return; + } + upstream_fail (&session->upstream->up, session->session_time); + destroy_session (session->s); +} + +void +smtp_upstream_finalize_connection (gpointer data) +{ + struct smtp_session *session = data; + + if (session->state != SMTP_STATE_CRITICAL_ERROR) { + if (! rspamd_dispatcher_write (session->upstream_dispatcher, "QUIT" CRLF, 0, FALSE, TRUE)) { + msg_warn ("cannot send correctly closing message to upstream"); + } + } + rspamd_remove_dispatcher (session->upstream_dispatcher); + session->upstream_dispatcher = NULL; + close (session->upstream_sock); + session->upstream_sock = -1; +} diff --git a/src/libmime/smtp_proto.h b/src/libmime/smtp_proto.h new file mode 100644 index 000000000..42fecd255 --- /dev/null +++ b/src/libmime/smtp_proto.h @@ -0,0 +1,95 @@ +#ifndef RSPAMD_SMTP_PROTO_H +#define RSPAMD_SMTP_PROTO_H + +#include "config.h" +#include "smtp.h" + +/* SMTP errors */ +#define SMTP_ERROR_BAD_COMMAND "500 Syntax error, command unrecognized" CRLF +#define SMTP_ERROR_BAD_ARGUMENTS "501 Syntax error in parameters or arguments" CRLF +#define SMTP_ERROR_SEQUENCE "503 Bad sequence of commands" CRLF +#define SMTP_ERROR_RECIPIENTS "554 No valid recipients" CRLF +#define SMTP_ERROR_UNIMPLIMENTED "502 Command not implemented" CRLF +#define SMTP_ERROR_LIMIT "505 Too many errors. Aborting." CRLF +#define SMTP_ERROR_UPSTREAM "421 Service not available, closing transmission channel" CRLF +#define SMTP_ERROR_FILE "420 Service not available, filesystem error" CRLF +#define SMTP_ERROR_OK "250 Requested mail action okay, completed" CRLF +#define SMTP_ERROR_DATA_OK "354 Start mail input; end with <CRLF>.<CRLF>" CRLF + +#define DATA_END_TRAILER "." CRLF + +#define XCLIENT_HOST_UNAVAILABLE "[UNAVAILABLE]" +#define XCLIENT_HOST_TEMPFAIL "[TEMPUNAVAIL]" + +#define MAX_SMTP_UPSTREAMS 128 + +struct smtp_command { + enum { + SMTP_COMMAND_HELO, + SMTP_COMMAND_EHLO, + SMTP_COMMAND_QUIT, + SMTP_COMMAND_NOOP, + SMTP_COMMAND_MAIL, + SMTP_COMMAND_RCPT, + SMTP_COMMAND_RSET, + SMTP_COMMAND_DATA, + SMTP_COMMAND_VRFY, + SMTP_COMMAND_EXPN, + SMTP_COMMAND_HELP + } command; + GList *args; +}; + +/* + * Generate SMTP error message + */ +gchar * make_smtp_error (rspamd_mempool_t *pool, gint error_code, const gchar *format, ...); + +/* + * Parse a single SMTP command + */ +gboolean parse_smtp_command (struct smtp_session *session, f_str_t *line, struct smtp_command **cmd); + +/* + * Parse HELO command + */ +gboolean parse_smtp_helo (struct smtp_session *session, struct smtp_command *cmd); + +/* + * Parse MAIL command + */ +gboolean parse_smtp_from (struct smtp_session *session, struct smtp_command *cmd); + +/* + * Parse RCPT command + */ +gboolean parse_smtp_rcpt (struct smtp_session *session, struct smtp_command *cmd); + +/* Upstream SMTP */ + +/* + * Read a line from SMTP upstream + */ +gboolean smtp_upstream_read_socket (f_str_t * in, void *arg); + +/* + * Write to SMTP upstream + */ +gboolean smtp_upstream_write_socket (void *arg); + +/* + * Error handler for SMTP upstream + */ +void smtp_upstream_err_socket (GError *err, void *arg); + +/* + * Terminate connection with upstream + */ +void smtp_upstream_finalize_connection (gpointer data); + +/* + * Write a list of strings to the upstream + */ +size_t smtp_upstream_write_list (GList *args, gchar *buf, size_t buflen); + +#endif diff --git a/src/libmime/smtp_utils.c b/src/libmime/smtp_utils.c new file mode 100644 index 000000000..5178de9dd --- /dev/null +++ b/src/libmime/smtp_utils.c @@ -0,0 +1,362 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "filter.h" +#include "settings.h" +#include "smtp.h" +#include "smtp_proto.h" + +void +free_smtp_session (gpointer arg) +{ + struct smtp_session *session = arg; + + if (session) { + if (session->task) { + rspamd_task_free (session->task, FALSE); + if (session->task->msg->str) { + munmap (session->task->msg->str, session->task->msg->len); + } + } + if (session->rcpt) { + g_list_free (session->rcpt); + } + if (session->dispatcher) { + rspamd_remove_dispatcher (session->dispatcher); + } + close (session->sock); + if (session->temp_name != NULL) { + unlink (session->temp_name); + } + if (session->temp_fd != -1) { + close (session->temp_fd); + } + rspamd_mempool_delete (session->pool); + g_free (session); + } +} + +gboolean +create_smtp_upstream_connection (struct smtp_session *session) +{ + struct smtp_upstream *selected; + + /* Try to select upstream */ + selected = (struct smtp_upstream *)get_upstream_round_robin (session->ctx->upstreams, + session->ctx->upstream_num, sizeof (struct smtp_upstream), + session->session_time, DEFAULT_UPSTREAM_ERROR_TIME, DEFAULT_UPSTREAM_DEAD_TIME, DEFAULT_UPSTREAM_MAXERRORS); + if (selected == NULL) { + msg_err ("no upstreams suitable found"); + return FALSE; + } + + session->upstream = selected; + + /* Now try to create socket */ + session->upstream_sock = make_universal_socket (selected->addr, selected->port, SOCK_STREAM, TRUE, FALSE, FALSE); + if (session->upstream_sock == -1) { + msg_err ("cannot make a connection to %s", selected->name); + upstream_fail (&selected->up, session->session_time); + return FALSE; + } + /* Create a dispatcher for upstream connection */ + session->upstream_dispatcher = rspamd_create_dispatcher (session->ev_base, session->upstream_sock, BUFFER_LINE, + smtp_upstream_read_socket, smtp_upstream_write_socket, smtp_upstream_err_socket, + &session->ctx->smtp_timeout, session); + session->state = SMTP_STATE_WAIT_UPSTREAM; + session->upstream_state = SMTP_STATE_GREETING; + register_async_event (session->s, (event_finalizer_t)smtp_upstream_finalize_connection, session, g_quark_from_static_string ("smtp proxy")); + return TRUE; +} + +gboolean +smtp_send_upstream_message (struct smtp_session *session) +{ + rspamd_dispatcher_pause (session->dispatcher); + rspamd_dispatcher_restore (session->upstream_dispatcher); + + session->upstream_state = SMTP_STATE_IN_SENDFILE; + session->state = SMTP_STATE_WAIT_UPSTREAM; + if (! rspamd_dispatcher_sendfile (session->upstream_dispatcher, session->temp_fd, session->temp_size)) { + msg_err ("sendfile failed: %s", strerror (errno)); + goto err; + } + return TRUE; + +err: + session->error = SMTP_ERROR_FILE; + session->state = SMTP_STATE_CRITICAL_ERROR; + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + return FALSE; + } + destroy_session (session->s); + return FALSE; +} + +struct smtp_metric_callback_data { + struct smtp_session *session; + enum rspamd_metric_action action; + struct metric_result *res; + gchar *log_buf; + gint log_offset; + gint log_size; + gboolean alive; +}; + +static void +smtp_metric_symbols_callback (gpointer key, gpointer value, void *user_data) +{ + struct smtp_metric_callback_data *cd = user_data; + + cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "%s,", (gchar *)key); +} + +static void +smtp_metric_callback (gpointer key, gpointer value, gpointer ud) +{ + struct smtp_metric_callback_data *cd = ud; + struct metric_result *metric_res = value; + enum rspamd_metric_action action = METRIC_ACTION_NOACTION; + double ms = 0, rs = 0; + gboolean is_spam = FALSE; + struct rspamd_task *task; + + task = cd->session->task; + + if (!check_metric_settings (metric_res, &ms, &rs)) { + ms = metric_res->metric->actions[METRIC_ACTION_REJECT].score; + rs = metric_res->metric->actions[METRIC_ACTION_REJECT].score; + } + if (! check_metric_action_settings (task, metric_res, metric_res->score, &action)) { + action = check_metric_action (metric_res->score, ms, metric_res->metric); + } + if (metric_res->score >= ms) { + is_spam = 1; + } + if (action < cd->action) { + cd->action = action; + cd->res = metric_res; + } + + if (!task->is_skipped) { + cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "(%s: %c (%s): [%.2f/%.2f/%.2f] [", + (gchar *)key, is_spam ? 'T' : 'F', str_action_metric (action), metric_res->score, ms, rs); + } + else { + cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "(%s: %c (default): [%.2f/%.2f/%.2f] [", + (gchar *)key, 'S', metric_res->score, ms, rs); + + } + g_hash_table_foreach (metric_res->symbols, smtp_metric_symbols_callback, cd); + /* Remove last , from log buf */ + if (cd->log_buf[cd->log_offset - 1] == ',') { + cd->log_buf[--cd->log_offset] = '\0'; + } + +#ifdef HAVE_CLOCK_GETTIME + cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "]), len: %z, time: %s,", + task->msg->len, calculate_check_time (&task->tv, &task->ts, task->cfg->clock_res, &task->scan_milliseconds)); +#else + cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "]), len: %z, time: %s,", + task->msg->len, calculate_check_time (&task->tv, task->cfg->clock_res, &task->scan_milliseconds)); +#endif +} + +gboolean +make_smtp_tempfile (struct smtp_session *session) +{ + gsize r; + + r = strlen (session->cfg->temp_dir) + sizeof ("/rspamd-XXXXXX"); + session->temp_name = rspamd_mempool_alloc (session->pool, r); + rspamd_snprintf (session->temp_name, r, "%s%crspamd-XXXXXX", session->cfg->temp_dir, G_DIR_SEPARATOR); +#ifdef HAVE_MKSTEMP + /* Umask is set before */ + session->temp_fd = mkstemp (session->temp_name); +#else + session->temp_fd = g_mkstemp_full (session->temp_name, O_RDWR, S_IWUSR | S_IRUSR); +#endif + if (session->temp_fd == -1) { + msg_err ("mkstemp error: %s", strerror (errno)); + + return FALSE; + } + + return TRUE; +} + +gboolean +write_smtp_reply (struct smtp_session *session) +{ + gchar logbuf[1024], *new_subject; + const gchar *old_subject; + struct smtp_metric_callback_data cd; + GMimeStream *stream; + gint old_fd, sublen; + + /* Check metrics */ + cd.session = session; + cd.action = METRIC_ACTION_NOACTION; + cd.res = NULL; + cd.log_buf = logbuf; + cd.log_offset = rspamd_snprintf (logbuf, sizeof (logbuf), "id: <%s>, qid: <%s>, ", + session->task->message_id, session->task->queue_id); + cd.log_size = sizeof (logbuf); + if (session->task->user) { + cd.log_offset += rspamd_snprintf (logbuf + cd.log_offset, sizeof (logbuf) - cd.log_offset, + "user: %s, ", session->task->user); + } + + g_hash_table_foreach (session->task->results, smtp_metric_callback, &cd); + + msg_info ("%s", logbuf); + + if (cd.action <= METRIC_ACTION_REJECT) { + if (! rspamd_dispatcher_write (session->dispatcher, session->ctx->reject_message, 0, FALSE, TRUE)) { + return FALSE; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + return FALSE; + } + destroy_session (session->s); + return FALSE; + } + else if (cd.action <= METRIC_ACTION_ADD_HEADER || cd.action <= METRIC_ACTION_REWRITE_SUBJECT) { + old_fd = session->temp_fd; + if (! make_smtp_tempfile (session)) { + session->error = SMTP_ERROR_FILE; + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + + if (cd.action <= METRIC_ACTION_REWRITE_SUBJECT) { + /* XXX: add this action */ + old_subject = g_mime_message_get_subject (session->task->message); + if (old_subject != NULL) { + sublen = strlen (old_subject) + sizeof (SPAM_SUBJECT); + new_subject = rspamd_mempool_alloc (session->pool, sublen); + rspamd_snprintf (new_subject, sublen, "%s%s", SPAM_SUBJECT, old_subject); + } + else { + new_subject = SPAM_SUBJECT; + } + g_mime_message_set_subject (session->task->message, new_subject); + } + else if (cd.action <= METRIC_ACTION_ADD_HEADER) { +#ifndef GMIME24 + g_mime_message_add_header (session->task->message, "X-Spam", "true"); +#else + g_mime_object_append_header (GMIME_OBJECT (session->task->message), "X-Spam", "true"); +#endif + } + stream = g_mime_stream_fs_new (session->temp_fd); + g_mime_stream_fs_set_owner (GMIME_STREAM_FS (stream), FALSE); + close (old_fd); + + if (g_mime_object_write_to_stream (GMIME_OBJECT (session->task->message), stream) == -1) { + msg_err ("cannot write MIME object to stream: %s", strerror (errno)); + session->error = SMTP_ERROR_FILE; + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + g_object_unref (stream); + } + /* XXX: Add other actions */ + return smtp_send_upstream_message (session); +err: + session->error = SMTP_ERROR_FILE; + session->state = SMTP_STATE_CRITICAL_ERROR; + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + return FALSE; + } + destroy_session (session->s); + return FALSE; +} + +gboolean +parse_upstreams_line (rspamd_mempool_t *pool, struct smtp_upstream *upstreams, const gchar *line, gsize *count) +{ + gchar **strv, *p, *t, *tt, *err_str; + guint32 num, i; + struct smtp_upstream *cur; + gchar resolved_path[PATH_MAX]; + + strv = g_strsplit_set (line, ",; ", -1); + num = g_strv_length (strv); + + if (num >= MAX_SMTP_UPSTREAMS) { + msg_err ("cannot define %d upstreams %d is max", num, MAX_SMTP_UPSTREAMS); + return FALSE; + } + *count = 0; + + for (i = 0; i < num; i ++) { + p = strv[i]; + cur = &upstreams[*count]; + if ((t = strrchr (p, ':')) != NULL && (tt = strchr (p, ':')) != t) { + /* Assume that after last `:' we have weigth */ + *t = '\0'; + t ++; + errno = 0; + cur->up.priority = strtoul (t, &err_str, 10); + if (errno != 0 || (err_str && *err_str != '\0')) { + msg_err ("cannot convert weight: %s, %s", t, strerror (errno)); + g_strfreev (strv); + return FALSE; + } + } + if (*p == '/') { + cur->is_unix = TRUE; + if (realpath (p, resolved_path) == NULL) { + msg_err ("cannot resolve path: %s", resolved_path); + g_strfreev (strv); + return FALSE; + } + cur->name = rspamd_mempool_strdup (pool, resolved_path); + (*count) ++; + } + else { + if (! parse_host_port (pool, p, &cur->addr, &cur->port)) { + g_strfreev (strv); + return FALSE; + } + cur->name = rspamd_mempool_strdup (pool, p); + (*count) ++; + } + } + + g_strfreev (strv); + return TRUE; +} diff --git a/src/libmime/smtp_utils.h b/src/libmime/smtp_utils.h new file mode 100644 index 000000000..652b6759f --- /dev/null +++ b/src/libmime/smtp_utils.h @@ -0,0 +1,63 @@ +#ifndef SMTP_UTILS_H_ +#define SMTP_UTILS_H_ + +#include "config.h" +#include "main.h" +#include "smtp.h" + +/** + * @file smtp_utils.h + * Contains utilities for smtp protocol handling + */ + +struct smtp_upstream { + struct upstream up; + + const gchar *name; + gchar *addr; + guint16 port; + gboolean is_unix; +}; + +#define MAX_SMTP_UPSTREAMS 128 + +struct smtp_session; + +/** + * Send message to upstream + * @param session session object + */ +gboolean smtp_send_upstream_message (struct smtp_session *session); + +/** + * Create connection to upstream + * @param session session object + */ +gboolean create_smtp_upstream_connection (struct smtp_session *session); + +/** + * Create temporary file for smtp session + */ +gboolean make_smtp_tempfile (struct smtp_session *session); + +/** + * Write reply to upstream + * @param session session object + */ +gboolean write_smtp_reply (struct smtp_session *session); + +/** + * Frees smtp session object + */ +void free_smtp_session (gpointer arg); + +/** + * Parse upstreams line + * @param upstreams pointer to the array of upstreams (must be at least MAX_SMTP_UPSTREAMS size) + * @param line description line + * @param count targeted count + * @return + */ +gboolean parse_upstreams_line (rspamd_mempool_t *pool, struct smtp_upstream *upstreams, const gchar *line, gsize *count); + +#endif /* SMTP_UTILS_H_ */ diff --git a/src/libmime/worker_util.c b/src/libmime/worker_util.c new file mode 100644 index 000000000..d029f5dc4 --- /dev/null +++ b/src/libmime/worker_util.c @@ -0,0 +1,255 @@ +/* Copyright (c) 2010-2011, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "message.h" +#include "lua/lua_common.h" + +extern struct rspamd_main *rspamd_main; + +/** + * Return worker's control structure by its type + * @param type + * @return worker's control structure or NULL + */ +worker_t* +get_worker_by_type (GQuark type) +{ + worker_t **cur; + + cur = &workers[0]; + while (*cur) { + if (g_quark_from_string ((*cur)->name) == type) { + return *cur; + } + cur ++; + } + + return NULL; +} + +double +set_counter (const gchar *name, guint32 value) +{ + struct counter_data *cd; + double alpha; + gchar *key; + + cd = rspamd_hash_lookup (rspamd_main->counters, (gpointer) name); + + if (cd == NULL) { + cd = rspamd_mempool_alloc_shared (rspamd_main->counters->pool, sizeof (struct counter_data)); + cd->value = value; + cd->number = 0; + key = rspamd_mempool_strdup_shared (rspamd_main->counters->pool, name); + rspamd_hash_insert (rspamd_main->counters, (gpointer) key, (gpointer) cd); + } + else { + /* Calculate new value */ + rspamd_mempool_wlock_rwlock (rspamd_main->counters->lock); + + alpha = 2. / (++cd->number + 1); + cd->value = cd->value * (1. - alpha) + value * alpha; + + rspamd_mempool_wunlock_rwlock (rspamd_main->counters->lock); + } + + return cd->value; +} + +struct event_base * +prepare_worker (struct rspamd_worker *worker, const char *name, + rspamd_sig_handler_t sig_handler, + void (*accept_handler)(int, short, void *)) +{ + struct event_base *ev_base; + struct event *accept_event; + struct sigaction signals; + GList *cur; + gint listen_socket; + +#ifdef WITH_PROFILER + extern void _start (void), etext (void); + monstartup ((u_long) & _start, (u_long) & etext); +#endif + + gperf_profiler_init (worker->srv->cfg, name); + + worker->srv->pid = getpid (); + + ev_base = event_init (); + + init_signals (&signals, sig_handler); + sigprocmask (SIG_UNBLOCK, &signals.sa_mask, NULL); + + /* Accept all sockets */ + cur = worker->cf->listen_socks; + while (cur) { + listen_socket = GPOINTER_TO_INT (cur->data); + if (listen_socket != -1) { + accept_event = g_slice_alloc0 (sizeof (struct event)); + event_set (accept_event, listen_socket, EV_READ | EV_PERSIST, + accept_handler, worker); + event_base_set (ev_base, accept_event); + event_add (accept_event, NULL); + worker->accept_events = g_list_prepend (worker->accept_events, accept_event); + } + cur = g_list_next (cur); + } + + return ev_base; +} + +void +worker_stop_accept (struct rspamd_worker *worker) +{ + GList *cur; + struct event *event; + + /* Remove all events */ + cur = worker->accept_events; + while (cur) { + event = cur->data; + event_del (event); + cur = g_list_next (cur); + g_slice_free1 (sizeof (struct event), event); + } + + if (worker->accept_events != NULL) { + g_list_free (worker->accept_events); + } +} + +/* + * Called if all filters are processed + * @return TRUE if session should be terminated + */ +gboolean +rspamd_task_fin (void *arg) +{ + struct rspamd_task *task = (struct rspamd_task *) arg; + gint r; + GError *err = NULL; + + /* Task is already finished or skipped */ + if (task->state == WRITE_REPLY) { + if (task->fin_callback) { + task->fin_callback (task->fin_arg); + } + else { + rspamd_protocol_write_reply (task); + } + return TRUE; + } + + /* We processed all filters and want to process statfiles */ + if (task->state != WAIT_POST_FILTER && task->state != WAIT_PRE_FILTER) { + /* Process all statfiles */ + if (task->classify_pool == NULL) { + /* Non-threaded version */ + process_statfiles (task); + } + else { + /* Just process composites */ + make_composites (task); + } + if (task->cfg->post_filters) { + /* More to process */ + /* Special state */ + task->state = WAIT_POST_FILTER; + return FALSE; + } + + } + + /* We are on post-filter waiting state */ + if (task->state != WAIT_PRE_FILTER) { + /* Check if we have all events finished */ + task->state = WRITE_REPLY; + if (task->fin_callback) { + task->fin_callback (task->fin_arg); + } + else { + rspamd_protocol_write_reply (task); + } + } + else { + /* We were waiting for pre-filter */ + if (task->pre_result.action != METRIC_ACTION_NOACTION) { + /* Write result based on pre filters */ + task->state = WRITE_REPLY; + if (task->fin_callback) { + task->fin_callback (task->fin_arg); + } + else { + rspamd_protocol_write_reply (task); + } + return TRUE; + } + else { + task->state = WAIT_FILTER; + r = process_filters (task); + if (r == -1) { + task->last_error = "Filter processing error"; + task->error_code = RSPAMD_FILTER_ERROR; + task->state = WRITE_REPLY; + rspamd_protocol_write_reply (task); + return TRUE; + } + /* Add task to classify to classify pool */ + if (!task->is_skipped && task->classify_pool) { + register_async_thread (task->s); + g_thread_pool_push (task->classify_pool, task, &err); + if (err != NULL) { + msg_err ("cannot pull task to the pool: %s", err->message); + remove_async_thread (task->s); + g_error_free (err); + } + } + if (task->is_skipped) { + rspamd_protocol_write_reply (task); + } + else { + return FALSE; + } + } + } + + return TRUE; +} + +/* + * Called if session was restored inside fin callback + */ +void +rspamd_task_restore (void *arg) +{ + struct rspamd_task *task = (struct rspamd_task *) arg; + + /* Call post filters */ + if (task->state == WAIT_POST_FILTER) { + lua_call_post_filters (task); + } + task->s->wanna_die = TRUE; +} |