aboutsummaryrefslogtreecommitdiffstats
path: root/src/libmime
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2014-04-21 16:25:51 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2014-04-21 16:25:51 +0100
commit61555065f3d1c8badcc9573691232f1b6e42988c (patch)
tree563d5b7cb8c468530f7e79c4da0a75267b1184e1 /src/libmime
parentad5bf825b7f33bc10311673991f0cc888e69c0b1 (diff)
downloadrspamd-61555065f3d1c8badcc9573691232f1b6e42988c.tar.gz
rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.zip
Rework project structure, remove trash files.
Diffstat (limited to 'src/libmime')
-rw-r--r--src/libmime/CMakeLists.txt29
-rw-r--r--src/libmime/expressions.c1452
-rw-r--r--src/libmime/expressions.h133
-rw-r--r--src/libmime/filter.c1096
-rw-r--r--src/libmime/filter.h167
-rw-r--r--src/libmime/images.c255
-rw-r--r--src/libmime/images.h33
-rw-r--r--src/libmime/message.c1764
-rw-r--r--src/libmime/message.h91
-rw-r--r--src/libmime/protocol.c821
-rw-r--r--src/libmime/protocol.h46
-rw-r--r--src/libmime/smtp_proto.c701
-rw-r--r--src/libmime/smtp_proto.h95
-rw-r--r--src/libmime/smtp_utils.c362
-rw-r--r--src/libmime/smtp_utils.h63
-rw-r--r--src/libmime/worker_util.c255
16 files changed, 7363 insertions, 0 deletions
diff --git a/src/libmime/CMakeLists.txt b/src/libmime/CMakeLists.txt
new file mode 100644
index 000000000..303b7a088
--- /dev/null
+++ b/src/libmime/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Librspamd mime
+SET(LIBRSPAMDMIMESRC
+ expressions.c
+ filter.c
+ images.c
+ message.c
+ protocol.c
+ smtp_utils.c
+ smtp_proto.c
+ worker_util.c)
+
+# Librspamdmime
+ADD_LIBRARY(rspamd-mime ${LINK_TYPE} ${LIBRSPAMDMIMESRC})
+IF(NOT DEBIAN_BUILD)
+SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES VERSION ${RSPAMD_VERSION})
+ENDIF(NOT DEBIAN_BUILD)
+SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES LINKER_LANGUAGE C)
+SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB")
+TARGET_LINK_LIBRARIES(rspamd-mime rspamd-server)
+TARGET_LINK_LIBRARIES(rspamd-mime rspamd-util)
+IF(CMAKE_COMPILER_IS_GNUCC)
+SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB -fno-strict-aliasing")
+ENDIF(CMAKE_COMPILER_IS_GNUCC)
+
+IF(NO_SHARED MATCHES "OFF")
+ INSTALL(TARGETS rspamd-mime
+ LIBRARY DESTINATION ${LIBDIR}
+ PUBLIC_HEADER DESTINATION ${INCLUDEDIR})
+ENDIF(NO_SHARED MATCHES "OFF") \ No newline at end of file
diff --git a/src/libmime/expressions.c b/src/libmime/expressions.c
new file mode 100644
index 000000000..5d19626bb
--- /dev/null
+++ b/src/libmime/expressions.c
@@ -0,0 +1,1452 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "util.h"
+#include "cfg_file.h"
+#include "main.h"
+#include "message.h"
+#include "fuzzy.h"
+#include "expressions.h"
+#include "html.h"
+#include "lua/lua_common.h"
+#include "diff.h"
+
+gboolean rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused);
+gboolean rspamd_header_exists (struct rspamd_task *task, GList * args, void *unused);
+gboolean rspamd_parts_distance (struct rspamd_task *task, GList * args, void *unused);
+gboolean rspamd_recipients_distance (struct rspamd_task *task, GList * args, void *unused);
+gboolean rspamd_has_only_html_part (struct rspamd_task *task, GList * args, void *unused);
+gboolean rspamd_is_recipients_sorted (struct rspamd_task *task, GList * args, void *unused);
+gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task, GList * args, void *unused);
+gboolean rspamd_is_html_balanced (struct rspamd_task *task, GList * args, void *unused);
+gboolean rspamd_has_html_tag (struct rspamd_task *task, GList * args, void *unused);
+gboolean rspamd_has_fake_html (struct rspamd_task *task, GList * args, void *unused);
+
+/*
+ * List of internal functions of rspamd
+ * Sorted by name to use bsearch
+ */
+static struct _fl {
+ const gchar *name;
+ rspamd_internal_func_t func;
+ void *user_data;
+} rspamd_functions_list[] = {
+ {"compare_encoding", rspamd_compare_encoding, NULL},
+ {"compare_parts_distance", rspamd_parts_distance, NULL},
+ {"compare_recipients_distance", rspamd_recipients_distance, NULL},
+ {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
+ {"has_fake_html", rspamd_has_fake_html, NULL},
+ {"has_html_tag", rspamd_has_html_tag, NULL},
+ {"has_only_html_part", rspamd_has_only_html_part, NULL},
+ {"header_exists", rspamd_header_exists, NULL},
+ {"is_html_balanced", rspamd_is_html_balanced, NULL},
+ {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL}
+};
+
+static struct _fl *list_ptr = &rspamd_functions_list[0];
+static guint32 functions_number = sizeof (rspamd_functions_list) / sizeof (struct _fl);
+static gboolean list_allocated = FALSE;
+
+/* Bsearch routine */
+static gint
+fl_cmp (const void *s1, const void *s2)
+{
+ struct _fl *fl1 = (struct _fl *)s1;
+ struct _fl *fl2 = (struct _fl *)s2;
+ return strcmp (fl1->name, fl2->name);
+}
+
+/* Cache for regular expressions that are used in functions */
+void *
+re_cache_check (const gchar *line, rspamd_mempool_t *pool)
+{
+ GHashTable *re_cache;
+
+ re_cache = rspamd_mempool_get_variable (pool, "re_cache");
+
+ if (re_cache == NULL) {
+ re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ rspamd_mempool_set_variable (pool, "re_cache", re_cache, (rspamd_mempool_destruct_t)g_hash_table_destroy);
+ return NULL;
+ }
+ return g_hash_table_lookup (re_cache, line);
+}
+
+void
+re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool)
+{
+ GHashTable *re_cache;
+
+ re_cache = rspamd_mempool_get_variable (pool, "re_cache");
+
+ if (re_cache == NULL) {
+ re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ rspamd_mempool_set_variable (pool, "re_cache", re_cache, (rspamd_mempool_destruct_t)g_hash_table_destroy);
+ }
+
+ g_hash_table_insert (re_cache, (gpointer)line, pointer);
+}
+
+void
+re_cache_del (const gchar *line, rspamd_mempool_t *pool)
+{
+ GHashTable *re_cache;
+
+ re_cache = rspamd_mempool_get_variable (pool, "re_cache");
+
+ if (re_cache != NULL) {
+ g_hash_table_remove (re_cache, line);
+ }
+
+}
+
+/*
+ * Functions for parsing expressions
+ */
+struct expression_stack {
+ gchar op;
+ struct expression_stack *next;
+};
+
+/*
+ * Push operand or operator to stack
+ */
+static struct expression_stack *
+push_expression_stack (rspamd_mempool_t * pool, struct expression_stack *head, gchar op)
+{
+ struct expression_stack *new;
+ new = rspamd_mempool_alloc (pool, sizeof (struct expression_stack));
+ new->op = op;
+ new->next = head;
+ return new;
+}
+
+/*
+ * Delete symbol from stack, return pointer to operand or operator (casted to void* )
+ */
+static gchar
+delete_expression_stack (struct expression_stack **head)
+{
+ struct expression_stack *cur;
+ gchar res;
+
+ if (*head == NULL)
+ return 0;
+
+ cur = *head;
+ res = cur->op;
+
+ *head = cur->next;
+ return res;
+}
+
+/*
+ * Return operation priority
+ */
+static gint
+logic_priority (gchar a)
+{
+ switch (a) {
+ case '!':
+ return 3;
+ case '|':
+ case '&':
+ return 2;
+ case '(':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * Return FALSE if symbol is not operation symbol (operand)
+ * Return TRUE if symbol is operation symbol
+ */
+static gboolean
+is_operation_symbol (gchar *a)
+{
+ switch (*a) {
+ case '!':
+ case '&':
+ case '|':
+ case '(':
+ case ')':
+ return TRUE;
+ case 'O':
+ case 'o':
+ if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0&& g_ascii_isspace (a[2])) {
+ return TRUE;
+ }
+ break;
+ case 'A':
+ case 'a':
+ if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0&& g_ascii_isspace (a[3])) {
+ return TRUE;
+ }
+ break;
+ case 'N':
+ case 'n':
+ if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0 && g_ascii_isspace (a[3])) {
+ return TRUE;
+ }
+ break;
+ }
+
+ return FALSE;
+}
+
+/* Return character representation of operation */
+static gchar
+op_to_char (gchar *a, gchar **next)
+{
+ switch (*a) {
+ case '!':
+ case '&':
+ case '|':
+ case '(':
+ case ')':
+ *next = a + 1;
+ return *a;
+ case 'O':
+ case 'o':
+ if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) {
+ *next = a + sizeof ("or") - 1;
+ return '|';
+ }
+ break;
+ case 'A':
+ case 'a':
+ if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) {
+ *next = a + sizeof ("and") - 1;
+ return '&';
+ }
+ break;
+ case 'N':
+ case 'n':
+ if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) {
+ *next = a + sizeof ("not") - 1;
+ return '!';
+ }
+ break;
+ }
+
+ return '\0';
+}
+
+/*
+ * Return TRUE if symbol can be regexp flag
+ */
+static gboolean
+is_regexp_flag (gchar a)
+{
+ switch (a) {
+ case 'i':
+ case 'm':
+ case 'x':
+ case 's':
+ case 'u':
+ case 'o':
+ case 'r':
+ case 'H':
+ case 'M':
+ case 'P':
+ case 'U':
+ case 'X':
+ case 'T':
+ case 'S':
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static void
+insert_expression (rspamd_mempool_t * pool, struct expression **head, gint type, gchar op, void *operand, const gchar *orig)
+{
+ struct expression *new, *cur;
+
+ new = rspamd_mempool_alloc (pool, sizeof (struct expression));
+ new->type = type;
+ new->orig = orig;
+ if (new->type != EXPR_OPERATION) {
+ new->content.operand = operand;
+ }
+ else {
+ new->content.operation = op;
+ }
+ new->next = NULL;
+
+ if (!*head) {
+ *head = new;
+ }
+ else {
+ cur = *head;
+ while (cur->next) {
+ cur = cur->next;
+ }
+ cur->next = new;
+ }
+}
+
+static struct expression *
+maybe_parse_expression (rspamd_mempool_t * pool, gchar *line)
+{
+ struct expression *expr;
+ gchar *p = line;
+
+ while (*p) {
+ if (is_operation_symbol (p)) {
+ return parse_expression (pool, line);
+ }
+ p++;
+ }
+
+ expr = rspamd_mempool_alloc (pool, sizeof (struct expression));
+ expr->type = EXPR_STR;
+ expr->content.operand = rspamd_mempool_strdup (pool, line);
+ expr->next = NULL;
+
+ return expr;
+}
+
+/*
+ * Make inverse polish record for specified expression
+ * Memory is allocated from given pool
+ */
+struct expression *
+parse_expression (rspamd_mempool_t * pool, gchar *line)
+{
+ struct expression *expr = NULL;
+ struct expression_stack *stack = NULL;
+ struct expression_function *func = NULL;
+ struct expression *arg;
+ GQueue *function_stack;
+ gchar *p, *c, *str, op, newop, *copy, *next;
+ gboolean in_regexp = FALSE;
+ gint brackets = 0;
+
+ enum {
+ SKIP_SPACES,
+ READ_OPERATOR,
+ READ_REGEXP,
+ READ_REGEXP_FLAGS,
+ READ_FUNCTION,
+ READ_FUNCTION_ARGUMENT,
+ } state = SKIP_SPACES;
+
+ if (line == NULL || pool == NULL) {
+ return NULL;
+ }
+
+ msg_debug ("parsing expression {{ %s }}", line);
+
+ function_stack = g_queue_new ();
+ copy = rspamd_mempool_strdup (pool, line);
+ p = line;
+ c = p;
+ while (*p) {
+ switch (state) {
+ case SKIP_SPACES:
+ if (!g_ascii_isspace (*p)) {
+ if (is_operation_symbol (p)) {
+ state = READ_OPERATOR;
+ }
+ else if (*p == '/') {
+ c = ++p;
+ state = READ_REGEXP;
+ }
+ else {
+ c = p;
+ state = READ_FUNCTION;
+ }
+ }
+ else {
+ p++;
+ }
+ break;
+ case READ_OPERATOR:
+ if (*p == ')') {
+ if (stack == NULL) {
+ return NULL;
+ }
+ /* Pop all operators from stack to nearest '(' or to head */
+ while (stack && stack->op != '(') {
+ op = delete_expression_stack (&stack);
+ if (op != '(') {
+ insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy);
+ }
+ }
+ if (stack) {
+ op = delete_expression_stack (&stack);
+ }
+ }
+ else if (*p == '(') {
+ /* Push it to stack */
+ stack = push_expression_stack (pool, stack, *p);
+ }
+ else {
+ if (stack == NULL) {
+ newop = op_to_char (p, &next);
+ if (newop != '\0') {
+ stack = push_expression_stack (pool, stack, newop);
+ p = next;
+ state = SKIP_SPACES;
+ continue;
+ }
+ }
+ /* Check priority of logic operation */
+ else {
+ newop = op_to_char (p, &next);
+ if (newop != '\0') {
+ if (logic_priority (stack->op) < logic_priority (newop)) {
+ stack = push_expression_stack (pool, stack, newop);
+ }
+ else {
+ /* Pop all operations that have higher priority than this one */
+ while ((stack != NULL) && (logic_priority (stack->op) >= logic_priority (newop))) {
+ op = delete_expression_stack (&stack);
+ if (op != '(') {
+ insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy);
+ }
+ }
+ stack = push_expression_stack (pool, stack, newop);
+ }
+ }
+ p = next;
+ state = SKIP_SPACES;
+ continue;
+ }
+ }
+ p++;
+ state = SKIP_SPACES;
+ break;
+
+ case READ_REGEXP:
+ if (*p == '/' && *(p - 1) != '\\') {
+ if (*(p + 1)) {
+ p++;
+ }
+ state = READ_REGEXP_FLAGS;
+ }
+ else {
+ p++;
+ }
+ break;
+
+ case READ_REGEXP_FLAGS:
+ if (!is_regexp_flag (*p) || *(p + 1) == '\0') {
+ if (c != p) {
+ if ((is_regexp_flag (*p) || *p == '/') && *(p + 1) == '\0') {
+ p++;
+ }
+ str = rspamd_mempool_alloc (pool, p - c + 2);
+ rspamd_strlcpy (str, c - 1, (p - c + 2));
+ g_strstrip (str);
+ msg_debug ("found regexp: %s", str);
+ if (strlen (str) > 0) {
+ insert_expression (pool, &expr, EXPR_REGEXP, 0, str, copy);
+ }
+ }
+ c = p;
+ state = SKIP_SPACES;
+ }
+ else {
+ p++;
+ }
+ break;
+
+ case READ_FUNCTION:
+ if (*p == '/') {
+ /* In fact it is regexp */
+ state = READ_REGEXP;
+ c++;
+ p++;
+ }
+ else if (*p == '(') {
+ func = rspamd_mempool_alloc (pool, sizeof (struct expression_function));
+ func->name = rspamd_mempool_alloc (pool, p - c + 1);
+ func->args = NULL;
+ rspamd_strlcpy (func->name, c, (p - c + 1));
+ g_strstrip (func->name);
+ state = READ_FUNCTION_ARGUMENT;
+ g_queue_push_tail (function_stack, func);
+ insert_expression (pool, &expr, EXPR_FUNCTION, 0, func, copy);
+ c = ++p;
+ }
+ else if (is_operation_symbol (p)) {
+ /* In fact it is not function, but symbol */
+ if (c != p) {
+ str = rspamd_mempool_alloc (pool, p - c + 1);
+ rspamd_strlcpy (str, c, (p - c + 1));
+ g_strstrip (str);
+ if (strlen (str) > 0) {
+ insert_expression (pool, &expr, EXPR_STR, 0, str, copy);
+ }
+ }
+ state = READ_OPERATOR;
+ }
+ else if (*(p + 1) == '\0') {
+ /* In fact it is not function, but symbol */
+ p++;
+ if (c != p) {
+ str = rspamd_mempool_alloc (pool, p - c + 1);
+ rspamd_strlcpy (str, c, (p - c + 1));
+ g_strstrip (str);
+ if (strlen (str) > 0) {
+ insert_expression (pool, &expr, EXPR_STR, 0, str, copy);
+ }
+ }
+ state = SKIP_SPACES;
+ }
+ else {
+ p++;
+ }
+ break;
+
+ case READ_FUNCTION_ARGUMENT:
+ if (*p == '/' && !in_regexp) {
+ in_regexp = TRUE;
+ p++;
+ }
+ if (!in_regexp) {
+ /* Append argument to list */
+ if (*p == ',' || (*p == ')' && brackets == 0)) {
+ arg = NULL;
+ str = rspamd_mempool_alloc (pool, p - c + 1);
+ rspamd_strlcpy (str, c, (p - c + 1));
+ g_strstrip (str);
+ /* Recursive call */
+ arg = maybe_parse_expression (pool, str);
+ func->args = g_list_append (func->args, arg);
+ /* Pop function */
+ if (*p == ')') {
+ /* Last function in chain, goto skipping spaces state */
+ func = g_queue_pop_tail (function_stack);
+ if (g_queue_get_length (function_stack) == 0) {
+ state = SKIP_SPACES;
+ }
+ }
+ c = p + 1;
+ }
+ else if (*p == '(') {
+ brackets++;
+ }
+ else if (*p == ')') {
+ brackets--;
+ }
+ }
+ else if (*p == '/' && *(p - 1) != '\\') {
+ in_regexp = FALSE;
+ }
+ p++;
+ break;
+ }
+ }
+
+ g_queue_free (function_stack);
+ if (state != SKIP_SPACES) {
+ /* In fact we got bad expression */
+ msg_warn ("expression \"%s\" is invalid", line);
+ return NULL;
+ }
+ /* Pop everything from stack */
+ while (stack != NULL) {
+ op = delete_expression_stack (&stack);
+ if (op != '(') {
+ insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy);
+ }
+ }
+
+ return expr;
+}
+
+/*
+ * Rspamd regexp utility functions
+ */
+struct rspamd_regexp *
+parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
+{
+ const gchar *begin, *end, *p, *src, *start;
+ gchar *dbegin, *dend;
+ struct rspamd_regexp *result, *check;
+ gint regexp_flags = G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE;
+ GError *err = NULL;
+
+ if (line == NULL) {
+ msg_err ("cannot parse NULL line");
+ return NULL;
+ }
+
+ src = line;
+ result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp));
+ /* Skip whitespaces */
+ while (g_ascii_isspace (*line)) {
+ line++;
+ }
+ if (*line == '\0') {
+ msg_warn ("got empty regexp");
+ return NULL;
+ }
+ start = line;
+ /* First try to find header name */
+ begin = strchr (line, '/');
+ if (begin != NULL) {
+ p = begin;
+ end = NULL;
+ while (p != line) {
+ if (*p == '=') {
+ end = p;
+ break;
+ }
+ p --;
+ }
+ if (end) {
+ result->header = rspamd_mempool_alloc (pool, end - line + 1);
+ rspamd_strlcpy (result->header, line, end - line + 1);
+ result->type = REGEXP_HEADER;
+ line = end;
+ }
+ }
+ else {
+ result->header = rspamd_mempool_strdup (pool, line);
+ result->type = REGEXP_HEADER;
+ line = start;
+ }
+ /* Find begin of regexp */
+ while (*line && *line != '/') {
+ line++;
+ }
+ if (*line != '\0') {
+ begin = line + 1;
+ }
+ else if (result->header == NULL) {
+ /* Assume that line without // is just a header name */
+ result->header = rspamd_mempool_strdup (pool, line);
+ result->type = REGEXP_HEADER;
+ return result;
+ }
+ else {
+ /* We got header name earlier but have not found // expression, so it is invalid regexp */
+ msg_warn ("got no header name (eg. header=) but without corresponding regexp, %s", src);
+ return NULL;
+ }
+ /* Find end */
+ end = begin;
+ while (*end && (*end != '/' || *(end - 1) == '\\')) {
+ end++;
+ }
+ if (end == begin || *end != '/') {
+ msg_warn ("no trailing / in regexp %s", src);
+ return NULL;
+ }
+ /* Parse flags */
+ p = end + 1;
+ while (p != NULL) {
+ switch (*p) {
+ case 'i':
+ regexp_flags |= G_REGEX_CASELESS;
+ p++;
+ break;
+ case 'm':
+ regexp_flags |= G_REGEX_MULTILINE;
+ p++;
+ break;
+ case 's':
+ regexp_flags |= G_REGEX_DOTALL;
+ p++;
+ break;
+ case 'x':
+ regexp_flags |= G_REGEX_EXTENDED;
+ p++;
+ break;
+ case 'u':
+ regexp_flags |= G_REGEX_UNGREEDY;
+ p++;
+ break;
+ case 'o':
+ regexp_flags |= G_REGEX_OPTIMIZE;
+ p++;
+ break;
+ case 'r':
+ regexp_flags |= G_REGEX_RAW;
+ result->is_raw = TRUE;
+ p++;
+ break;
+ /* Type flags */
+ case 'H':
+ if (result->type == REGEXP_NONE) {
+ result->type = REGEXP_HEADER;
+ }
+ p++;
+ break;
+ case 'M':
+ if (result->type == REGEXP_NONE) {
+ result->type = REGEXP_MESSAGE;
+ }
+ p++;
+ break;
+ case 'P':
+ if (result->type == REGEXP_NONE) {
+ result->type = REGEXP_MIME;
+ }
+ p++;
+ break;
+ case 'U':
+ if (result->type == REGEXP_NONE) {
+ result->type = REGEXP_URL;
+ }
+ p++;
+ break;
+ case 'X':
+ if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) {
+ result->type = REGEXP_RAW_HEADER;
+ }
+ p++;
+ break;
+ case 'T':
+ result->is_test = TRUE;
+ p ++;
+ break;
+ case 'S':
+ result->is_strong = TRUE;
+ p ++;
+ break;
+ /* Stop flags parsing */
+ default:
+ p = NULL;
+ break;
+ }
+ }
+
+ result->regexp_text = rspamd_mempool_strdup (pool, start);
+ dbegin = result->regexp_text + (begin - start);
+ dend = result->regexp_text + (end - start);
+ *dend = '\0';
+
+ if (raw_mode) {
+ regexp_flags |= G_REGEX_RAW;
+ }
+
+ /* Avoid multiply regexp structures for similar regexps */
+ if ((check = (struct rspamd_regexp *)re_cache_check (result->regexp_text, pool)) != NULL) {
+ /* Additional check for headers */
+ if (result->type == REGEXP_HEADER || result->type == REGEXP_RAW_HEADER) {
+ if (result->header && check->header) {
+ if (strcmp (result->header, check->header) == 0) {
+ return check;
+ }
+ }
+ }
+ else {
+ return check;
+ }
+ }
+ result->regexp = g_regex_new (dbegin, regexp_flags, 0, &err);
+ if ((regexp_flags & G_REGEX_RAW) != 0) {
+ result->raw_regexp = result->regexp;
+ }
+ else {
+ result->raw_regexp = g_regex_new (dbegin, regexp_flags | G_REGEX_RAW, 0, &err);
+ rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_regex_unref, (void *)result->raw_regexp);
+ }
+ rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_regex_unref, (void *)result->regexp);
+
+ *dend = '/';
+
+ if (result->regexp == NULL || err != NULL) {
+ msg_warn ("could not read regexp: %s while reading regexp %s", err->message, src);
+ return NULL;
+ }
+
+ if (result->raw_regexp == NULL || err != NULL) {
+ msg_warn ("could not read raw regexp: %s while reading regexp %s", err->message, src);
+ return NULL;
+ }
+
+ /* Add to cache for further usage */
+ re_cache_add (result->regexp_text, result, pool);
+ return result;
+}
+
+gboolean
+call_expression_function (struct expression_function * func, struct rspamd_task * task, lua_State *L)
+{
+ struct _fl *selected, key;
+
+ key.name = func->name;
+
+ selected = bsearch (&key, list_ptr, functions_number, sizeof (struct _fl), fl_cmp);
+ if (selected == NULL) {
+ /* Try to check lua function */
+ return FALSE;
+ }
+
+ return selected->func (task, func->args, selected->user_data);
+}
+
+struct expression_argument *
+get_function_arg (struct expression *expr, struct rspamd_task *task, gboolean want_string)
+{
+ GQueue *stack;
+ gsize cur, op1, op2;
+ struct expression_argument *res;
+ struct expression *it;
+
+ if (expr == NULL) {
+ msg_warn ("NULL expression passed");
+ return NULL;
+ }
+ if (expr->next == NULL) {
+ res = rspamd_mempool_alloc (task->task_pool, sizeof (struct expression_argument));
+ if (expr->type == EXPR_REGEXP || expr->type == EXPR_STR || expr->type == EXPR_REGEXP_PARSED) {
+ res->type = EXPRESSION_ARGUMENT_NORMAL;
+ res->data = expr->content.operand;
+ }
+ else if (expr->type == EXPR_FUNCTION && !want_string) {
+ res->type = EXPRESSION_ARGUMENT_BOOL;
+ cur = call_expression_function (expr->content.operand, task, NULL);
+ res->data = GSIZE_TO_POINTER (cur);
+ }
+ else {
+ msg_warn ("cannot parse argument: it contains operator or bool expression that is not wanted");
+ return NULL;
+ }
+ return res;
+ }
+ else if (!want_string) {
+ res = rspamd_mempool_alloc (task->task_pool, sizeof (struct expression_argument));
+ res->type = EXPRESSION_ARGUMENT_BOOL;
+ stack = g_queue_new ();
+ it = expr;
+
+ while (it) {
+ if (it->type == EXPR_REGEXP || it->type == EXPR_REGEXP_PARSED || it->type == EXPR_STR) {
+ g_queue_free (stack);
+ res->type = EXPRESSION_ARGUMENT_EXPR;
+ res->data = expr;
+ return res;
+ }
+ else if (it->type == EXPR_FUNCTION) {
+ cur = (gsize) call_expression_function ((struct expression_function *)it->content.operand, task, NULL);
+ debug_task ("function %s returned %s", ((struct expression_function *)it->content.operand)->name, cur ? "true" : "false");
+ }
+ else if (it->type == EXPR_OPERATION) {
+ if (g_queue_is_empty (stack)) {
+ /* Queue has no operands for operation, exiting */
+ debug_task ("invalid expression");
+ g_queue_free (stack);
+ return NULL;
+ }
+ switch (it->content.operation) {
+ case '!':
+ op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ op1 = !op1;
+ g_queue_push_head (stack, GSIZE_TO_POINTER (op1));
+ break;
+ case '&':
+ op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2));
+ break;
+ case '|':
+ op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2));
+ break;
+ default:
+ it = it->next;
+ continue;
+ }
+ }
+ if (it) {
+ it = it->next;
+ }
+ }
+ if (!g_queue_is_empty (stack)) {
+ res->data = g_queue_pop_head (stack);
+ }
+ else {
+ res->data = GSIZE_TO_POINTER (FALSE);
+ }
+
+ return res;
+ }
+
+ msg_warn ("invalid expression argument");
+
+ return NULL;
+}
+
+void
+register_expression_function (const gchar *name, rspamd_internal_func_t func, void *user_data)
+{
+ static struct _fl *new;
+
+ functions_number++;
+
+ new = g_new (struct _fl, functions_number);
+ memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
+ if (list_allocated) {
+ g_free (list_ptr);
+ }
+
+ list_allocated = TRUE;
+ new[functions_number - 1].name = name;
+ new[functions_number - 1].func = func;
+ new[functions_number - 1].user_data = user_data;
+ qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
+ list_ptr = new;
+}
+
+gboolean
+rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused)
+{
+ struct expression_argument *arg;
+
+ if (args == NULL || task == NULL) {
+ return FALSE;
+ }
+
+ arg = get_function_arg (args->data, task, TRUE);
+ if (arg->type == EXPRESSION_ARGUMENT_BOOL) {
+ msg_warn ("invalid argument to function is passed");
+ return FALSE;
+ }
+
+ /* XXX: really write this function */
+ return TRUE;
+}
+
+gboolean
+rspamd_header_exists (struct rspamd_task * task, GList * args, void *unused)
+{
+ struct expression_argument *arg;
+ GList *headerlist;
+
+ if (args == NULL || task == NULL) {
+ return FALSE;
+ }
+
+ arg = get_function_arg (args->data, task, TRUE);
+ if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) {
+ msg_warn ("invalid argument to function is passed");
+ return FALSE;
+ }
+
+ debug_task ("try to get header %s", (gchar *)arg->data);
+ headerlist = message_get_header (task->task_pool, task->message, (gchar *)arg->data, FALSE);
+ if (headerlist) {
+ g_list_free (headerlist);
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/*
+ * This function is designed to find difference between text/html and text/plain parts
+ * It takes one argument: difference threshold, if we have two text parts, compare
+ * its hashes and check for threshold, if value is greater than threshold, return TRUE
+ * and return FALSE otherwise.
+ */
+gboolean
+rspamd_parts_distance (struct rspamd_task * task, GList * args, void *unused)
+{
+ gint threshold, threshold2 = -1, diff;
+ struct mime_text_part *p1, *p2;
+ GList *cur;
+ struct expression_argument *arg;
+ GMimeObject *parent;
+ const GMimeContentType *ct;
+ gint *pdiff;
+
+ if (args == NULL) {
+ debug_task ("no threshold is specified, assume it 100");
+ threshold = 100;
+ }
+ else {
+ errno = 0;
+ arg = get_function_arg (args->data, task, TRUE);
+ threshold = strtoul ((gchar *)arg->data, NULL, 10);
+ if (errno != 0) {
+ msg_info ("bad numeric value for threshold \"%s\", assume it 100", (gchar *)args->data);
+ threshold = 100;
+ }
+ if (args->next) {
+ arg = get_function_arg (args->next->data, task, TRUE);
+ errno = 0;
+ threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
+ if (errno != 0) {
+ msg_info ("bad numeric value for threshold \"%s\", ignore it", (gchar *)arg->data);
+ threshold2 = -1;
+ }
+ }
+ }
+
+ if ((pdiff = rspamd_mempool_get_variable (task->task_pool, "parts_distance")) != NULL) {
+ diff = *pdiff;
+ if (diff != -1) {
+ if (threshold2 > 0) {
+ if (diff >= MIN (threshold, threshold2) && diff < MAX (threshold, threshold2)) {
+ return TRUE;
+ }
+ }
+ else {
+ if (diff <= threshold) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+ }
+ else {
+ return FALSE;
+ }
+ }
+
+ if (g_list_length (task->text_parts) == 2) {
+ cur = g_list_first (task->text_parts);
+ p1 = cur->data;
+ cur = g_list_next (cur);
+ pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint));
+ *pdiff = -1;
+
+ if (cur == NULL) {
+ msg_info ("bad parts list");
+ return FALSE;
+ }
+ p2 = cur->data;
+ /* First of all check parent object */
+ if (p1->parent && p1->parent == p2->parent) {
+ parent = p1->parent;
+ ct = g_mime_object_get_content_type (parent);
+#ifndef GMIME24
+ if (ct == NULL || ! g_mime_content_type_is_type (ct, "multipart", "alternative")) {
+#else
+ if (ct == NULL || ! g_mime_content_type_is_type ((GMimeContentType *)ct, "multipart", "alternative")) {
+#endif
+ debug_task ("two parts are not belong to multipart/alternative container, skip check");
+ rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL);
+ return FALSE;
+ }
+ }
+ else {
+ debug_task ("message contains two parts but they are in different multi-parts");
+ rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL);
+ return FALSE;
+ }
+ if (!p1->is_empty && !p2->is_empty) {
+ if (p1->diff_str != NULL && p2->diff_str != NULL) {
+ diff = compare_diff_distance_normalized (p1->diff_str, p2->diff_str);
+ }
+ else {
+ diff = fuzzy_compare_parts (p1, p2);
+ }
+ debug_task ("got likeliness between parts of %d%%, threshold is %d%%", diff, threshold);
+ *pdiff = diff;
+ rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL);
+ if (threshold2 > 0) {
+ if (diff >= MIN (threshold, threshold2) && diff < MAX (threshold, threshold2)) {
+ return TRUE;
+ }
+ }
+ else {
+ if (diff <= threshold) {
+ return TRUE;
+ }
+ }
+ }
+ else if ((p1->is_empty && !p2->is_empty) || (!p1->is_empty && p2->is_empty)) {
+ /* Empty and non empty parts are different */
+ *pdiff = 0;
+ rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL);
+ return TRUE;
+ }
+ }
+ else {
+ debug_task ("message has too many text parts, so do not try to compare them with each other");
+ rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL);
+ return FALSE;
+ }
+
+ rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL);
+ return FALSE;
+}
+
+struct addr_list {
+ const gchar *name;
+ const gchar *addr;
+};
+
+#define COMPARE_RCPT_LEN 3
+#define MIN_RCPT_TO_COMPARE 7
+
+gboolean
+rspamd_recipients_distance (struct rspamd_task *task, GList * args, void *unused)
+{
+ struct expression_argument *arg;
+ InternetAddressList *cur;
+ InternetAddress *addr;
+ double threshold;
+ struct addr_list *ar;
+ gchar *c;
+ gint num, i, j, hits = 0, total = 0;
+
+ if (args == NULL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ arg = get_function_arg (args->data, task, TRUE);
+ errno = 0;
+ threshold = strtod ((gchar *)arg->data, NULL);
+ if (errno != 0) {
+ msg_warn ("invalid numeric value '%s': %s", (gchar *)arg->data, strerror (errno));
+ return FALSE;
+ }
+
+ if (!task->rcpts) {
+ return FALSE;
+ }
+ num = internet_address_list_length (task->rcpts);
+ if (num < MIN_RCPT_TO_COMPARE) {
+ return FALSE;
+ }
+ ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list));
+
+ /* Fill array */
+ cur = task->rcpts;
+#ifdef GMIME24
+ for (i = 0; i < num; i ++) {
+ addr = internet_address_list_get_address (cur, i);
+ ar[i].name = rspamd_mempool_strdup (task->task_pool, internet_address_get_name (addr));
+ if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
+ *c = '\0';
+ ar[i].addr = c + 1;
+ }
+ }
+#else
+ i = 0;
+ while (cur) {
+ addr = internet_address_list_get_address (cur);
+ if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
+ ar[i].name = rspamd_mempool_strdup (task->task_pool, internet_address_get_addr (addr));
+ if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
+ *c = '\0';
+ ar[i].addr = c + 1;
+ }
+ cur = internet_address_list_next (cur);
+ i++;
+ }
+ else {
+ cur = internet_address_list_next (cur);
+ }
+ }
+#endif
+
+ /* Cycle all elements in array */
+ for (i = 0; i < num; i++) {
+ for (j = i + 1; j < num; j++) {
+ if (ar[i].name && ar[j].name && g_ascii_strncasecmp (ar[i].name, ar[j].name, COMPARE_RCPT_LEN) == 0) {
+ /* Common name part */
+ hits++;
+ }
+ else if (ar[i].addr && ar[j].addr && g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) {
+ /* Common address part, but different name */
+ hits++;
+ }
+ total++;
+ }
+ }
+
+ if ((double)(hits * num / 2.) / (double)total >= threshold) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_has_only_html_part (struct rspamd_task * task, GList * args, void *unused)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ gboolean res = FALSE;
+
+ cur = g_list_first (task->text_parts);
+ while (cur) {
+ p = cur->data;
+ if (p->is_html) {
+ res = TRUE;
+ }
+ else {
+ res = FALSE;
+ break;
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+}
+
+static gboolean
+is_recipient_list_sorted (const InternetAddressList * ia)
+{
+ const InternetAddressList *cur;
+ InternetAddress *addr;
+ gboolean res = TRUE;
+ struct addr_list current = { NULL, NULL }, previous = {
+ NULL, NULL};
+#ifdef GMIME24
+ gint num, i;
+#endif
+
+ /* Do not check to short address lists */
+ if (internet_address_list_length ((InternetAddressList *)ia) < MIN_RCPT_TO_COMPARE) {
+ return FALSE;
+ }
+#ifdef GMIME24
+ num = internet_address_list_length ((InternetAddressList *)ia);
+ cur = ia;
+ for (i = 0; i < num; i ++) {
+ addr = internet_address_list_get_address ((InternetAddressList *)cur, i);
+ current.addr = (gchar *)internet_address_get_name (addr);
+ if (previous.addr != NULL) {
+ if (current.addr && g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
+ res = FALSE;
+ break;
+ }
+ }
+ previous.addr = current.addr;
+ }
+#else
+ cur = ia;
+ while (cur) {
+ addr = internet_address_list_get_address (cur);
+ if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
+ current.addr = internet_address_get_addr (addr);
+ if (previous.addr != NULL) {
+ if (current.addr && g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
+ res = FALSE;
+ break;
+ }
+ }
+ previous.addr = current.addr;
+ }
+ cur = internet_address_list_next (cur);
+ }
+#endif
+
+ return res;
+}
+
+gboolean
+rspamd_is_recipients_sorted (struct rspamd_task * task, GList * args, void *unused)
+{
+ /* Check all types of addresses */
+ if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_TO)) == TRUE) {
+ return TRUE;
+ }
+ if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_BCC)) == TRUE) {
+ return TRUE;
+ }
+ if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_CC)) == TRUE) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_compare_transfer_encoding (struct rspamd_task * task, GList * args, void *unused)
+{
+ GMimeObject *part;
+#ifndef GMIME24
+ GMimePartEncodingType enc_req, part_enc;
+#else
+ GMimeContentEncoding enc_req, part_enc;
+#endif
+ struct expression_argument *arg;
+
+ if (args == NULL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ arg = get_function_arg (args->data, task, TRUE);
+#ifndef GMIME24
+ enc_req = g_mime_part_encoding_from_string (arg->data);
+ if (enc_req == GMIME_PART_ENCODING_DEFAULT) {
+#else
+ enc_req = g_mime_content_encoding_from_string (arg->data);
+ if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) {
+#endif
+ msg_warn ("bad encoding type: %s", (gchar *)arg->data);
+ return FALSE;
+ }
+
+ part = g_mime_message_get_mime_part (task->message);
+ if (part) {
+ if (GMIME_IS_PART (part)) {
+#ifndef GMIME24
+ part_enc = g_mime_part_get_encoding (GMIME_PART (part));
+ if (part_enc == GMIME_PART_ENCODING_DEFAULT) {
+ /* Assume 7bit as default transfer encoding */
+ part_enc = GMIME_PART_ENCODING_7BIT;
+ }
+#else
+ part_enc = g_mime_part_get_content_encoding (GMIME_PART (part));
+ if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) {
+ /* Assume 7bit as default transfer encoding */
+ part_enc = GMIME_CONTENT_ENCODING_7BIT;
+ }
+#endif
+
+
+ debug_task ("got encoding in part: %d and compare with %d", (gint)part_enc, (gint)enc_req);
+#ifndef GMIME24
+ g_object_unref (part);
+#endif
+
+ return part_enc == enc_req;
+ }
+#ifndef GMIME24
+ g_object_unref (part);
+#endif
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_is_html_balanced (struct rspamd_task * task, GList * args, void *unused)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ gboolean res = TRUE;
+
+ cur = g_list_first (task->text_parts);
+ while (cur) {
+ p = cur->data;
+ if (!p->is_empty && p->is_html) {
+ if (p->is_balanced) {
+ res = TRUE;
+ }
+ else {
+ res = FALSE;
+ break;
+ }
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+
+}
+
+struct html_callback_data {
+ struct html_tag *tag;
+ gboolean *res;
+};
+
+static gboolean
+search_html_node_callback (GNode * node, gpointer data)
+{
+ struct html_callback_data *cd = data;
+ struct html_node *nd;
+
+ nd = node->data;
+ if (nd) {
+ if (nd->tag == cd->tag) {
+ *cd->res = TRUE;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_has_html_tag (struct rspamd_task * task, GList * args, void *unused)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ struct expression_argument *arg;
+ struct html_tag *tag;
+ gboolean res = FALSE;
+ struct html_callback_data cd;
+
+ if (args == NULL) {
+ msg_warn ("no parameters to function");
+ return FALSE;
+ }
+
+ arg = get_function_arg (args->data, task, TRUE);
+ tag = get_tag_by_name (arg->data);
+ if (tag == NULL) {
+ msg_warn ("unknown tag type passed as argument: %s", (gchar *)arg->data);
+ return FALSE;
+ }
+
+ cur = g_list_first (task->text_parts);
+ cd.res = &res;
+ cd.tag = tag;
+
+ while (cur && res == FALSE) {
+ p = cur->data;
+ if (!p->is_empty && p->is_html && p->html_nodes) {
+ g_node_traverse (p->html_nodes, G_PRE_ORDER, G_TRAVERSE_ALL, -1, search_html_node_callback, &cd);
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+
+}
+
+gboolean
+rspamd_has_fake_html (struct rspamd_task * task, GList * args, void *unused)
+{
+ struct mime_text_part *p;
+ GList *cur;
+ gboolean res = FALSE;
+
+ cur = g_list_first (task->text_parts);
+
+ while (cur && res == FALSE) {
+ p = cur->data;
+ if (!p->is_empty && p->is_html && p->html_nodes == NULL) {
+ res = TRUE;
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+
+}
+
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libmime/expressions.h b/src/libmime/expressions.h
new file mode 100644
index 000000000..954cc74f7
--- /dev/null
+++ b/src/libmime/expressions.h
@@ -0,0 +1,133 @@
+/**
+ * @file expressions.h
+ * Rspamd expressions API
+ */
+
+#ifndef RSPAMD_EXPRESSIONS_H
+#define RSPAMD_EXPRESSIONS_H
+
+#include "config.h"
+#include <lua.h>
+
+struct rspamd_task;
+struct rspamd_regexp;
+
+/**
+ * Rspamd expression function
+ */
+struct expression_function {
+ gchar *name; /**< name of function */
+ GList *args; /**< its args */
+};
+
+/**
+ * Function's argument
+ */
+struct expression_argument {
+ enum {
+ EXPRESSION_ARGUMENT_NORMAL,
+ EXPRESSION_ARGUMENT_BOOL,
+ EXPRESSION_ARGUMENT_EXPR,
+ } type; /**< type of argument (text or other function) */
+ void *data; /**< pointer to its data */
+};
+
+/**
+ * Logic expression
+ */
+struct expression {
+ enum {
+ EXPR_REGEXP,
+ EXPR_OPERATION,
+ EXPR_FUNCTION,
+ EXPR_STR,
+ EXPR_REGEXP_PARSED,
+ } type; /**< expression type */
+ union {
+ void *operand;
+ gchar operation;
+ } content; /**< union for storing operand or operation code */
+ const gchar *orig; /**< original line */
+ struct expression *next; /**< chain link */
+};
+
+typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *, GList *args, void *user_data);
+
+/**
+ * Parse regexp line to regexp structure
+ * @param pool memory pool to use
+ * @param line incoming line
+ * @return regexp structure or NULL in case of error
+ */
+struct rspamd_regexp* parse_regexp (rspamd_mempool_t *pool, const gchar *line, gboolean raw_mode);
+
+/**
+ * Parse composites line to composites structure (eg. "SYMBOL1&SYMBOL2|!SYMBOL3")
+ * @param pool memory pool to use
+ * @param line incoming line
+ * @return expression structure or NULL in case of error
+ */
+struct expression* parse_expression (rspamd_mempool_t *pool, gchar *line);
+
+/**
+ * Call specified fucntion and return boolean result
+ * @param func function to call
+ * @param task task object
+ * @param L lua specific state
+ * @return TRUE or FALSE depending on function result
+ */
+gboolean call_expression_function (struct expression_function *func, struct rspamd_task *task, lua_State *L);
+
+/**
+ * Register specified function to rspamd internal functions list
+ * @param name name of function
+ * @param func pointer to function
+ */
+void register_expression_function (const gchar *name, rspamd_internal_func_t func, void *user_data);
+
+/**
+ * Add regexp to regexp cache
+ * @param line symbolic representation
+ * @param pointer regexp data
+ */
+void re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool);
+
+/**
+ * Check regexp in cache
+ * @param line symbolic representation
+ * @return pointer to regexp data or NULL if regexp is not found
+ */
+void * re_cache_check (const gchar *line, rspamd_mempool_t *pool);
+
+/**
+ * Remove regexp from regexp cache
+ * @param line symbolic representation
+ */
+void re_cache_del (const gchar *line, rspamd_mempool_t *pool);
+
+/**
+ * Add regexp to regexp task cache
+ * @param task task object
+ * @param pointer regexp data
+ * @param result numeric result of this regexp
+ */
+void task_cache_add (struct rspamd_task *task, struct rspamd_regexp *re, gint32 result);
+
+/**
+ * Check regexp in cache
+ * @param task task object
+ * @param pointer regexp data
+ * @return numeric result if value exists or -1 if not
+ */
+gint32 task_cache_check (struct rspamd_task *task, struct rspamd_regexp *re);
+
+/**
+ * Parse and return a single function argument for a function (may recurse)
+ * @param expr expression structure that represents function's argument
+ * @param task task object
+ * @param want_string return NULL if argument is not a string
+ * @return expression argument structure or NULL if failed
+ */
+struct expression_argument *get_function_arg (struct expression *expr, struct rspamd_task *task, gboolean want_string);
+
+#endif
diff --git a/src/libmime/filter.c b/src/libmime/filter.c
new file mode 100644
index 000000000..cb0630d9d
--- /dev/null
+++ b/src/libmime/filter.c
@@ -0,0 +1,1096 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "mem_pool.h"
+#include "filter.h"
+#include "main.h"
+#include "message.h"
+#include "cfg_file.h"
+#include "util.h"
+#include "expressions.h"
+#include "settings.h"
+#include "binlog.h"
+#include "diff.h"
+#include "classifiers/classifiers.h"
+#include "tokenizers/tokenizers.h"
+
+#ifdef WITH_LUA
+# include "lua/lua_common.h"
+#endif
+
+#define COMMON_PART_FACTOR 95
+
+#ifndef PARAM_H_HAS_BITSET
+/* Bit map related macros. */
+#define NBBY 8 /* number of bits in a byte */
+#define setbit(a,i) (((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY))
+#define clrbit(a,i) (((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY)))
+#define isset(a,i) \
+ (((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY)))
+#define isclr(a,i) \
+ ((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0)
+#endif
+#define BITSPERBYTE (8*sizeof (gchar))
+#define NBYTES(nbits) (((nbits) + BITSPERBYTE - 1) / BITSPERBYTE)
+
+static inline GQuark
+filter_error_quark (void)
+{
+ return g_quark_from_static_string ("g-filter-error-quark");
+}
+
+static void
+insert_metric_result (struct rspamd_task *task, struct metric *metric, const gchar *symbol,
+ double flag, GList * opts, gboolean single)
+{
+ struct metric_result *metric_res;
+ struct symbol *s;
+ gdouble *weight, w;
+
+ metric_res = g_hash_table_lookup (task->results, metric->name);
+
+ if (metric_res == NULL) {
+ /* Create new metric chain */
+ metric_res = rspamd_mempool_alloc (task->task_pool, sizeof (struct metric_result));
+ metric_res->symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ metric_res->checked = FALSE;
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_hash_table_unref, metric_res->symbols);
+ metric_res->metric = metric;
+ metric_res->grow_factor = 0;
+ metric_res->score = 0;
+ metric_res->domain_settings = NULL;
+ metric_res->user_settings = NULL;
+ apply_metric_settings (task, metric, metric_res);
+ g_hash_table_insert (task->results, (gpointer) metric->name, metric_res);
+ }
+
+ weight = g_hash_table_lookup (metric->symbols, symbol);
+ if (weight == NULL) {
+ w = 0.0;
+ }
+ else {
+ w = (*weight) * flag;
+ }
+
+
+ /* Add metric score */
+ if ((s = g_hash_table_lookup (metric_res->symbols, symbol)) != NULL) {
+ if (s->options && opts && opts != s->options) {
+ /* Append new options */
+ s->options = g_list_concat (s->options, g_list_copy(opts));
+ /*
+ * Note that there is no need to add new destructor of GList as elements of appended
+ * GList are used directly, so just free initial GList
+ */
+ }
+ else if (opts) {
+ s->options = g_list_copy (opts);
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_list_free, s->options);
+ }
+ if (!single) {
+ /* Handle grow factor */
+ if (metric_res->grow_factor && w > 0) {
+ w *= metric_res->grow_factor;
+ metric_res->grow_factor *= metric->grow_factor;
+ }
+ s->score += w;
+ metric_res->score += w;
+ }
+ else {
+ if (fabs (s->score) < fabs (w)) {
+ /* Replace less weight with a bigger one */
+ metric_res->score = metric_res->score - s->score + w;
+ s->score = w;
+ }
+ }
+ }
+ else {
+ s = rspamd_mempool_alloc (task->task_pool, sizeof (struct symbol));
+
+ /* Handle grow factor */
+ if (metric_res->grow_factor && w > 0) {
+ w *= metric_res->grow_factor;
+ metric_res->grow_factor *= metric->grow_factor;
+ }
+ else if (w > 0) {
+ metric_res->grow_factor = metric->grow_factor;
+ }
+
+ s->score = w;
+ s->name = symbol;
+ metric_res->score += w;
+
+ if (opts) {
+ s->options = g_list_copy (opts);
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_list_free, s->options);
+ }
+ else {
+ s->options = NULL;
+ }
+
+ g_hash_table_insert (metric_res->symbols, (gpointer) symbol, s);
+ }
+ debug_task ("symbol %s, score %.2f, metric %s, factor: %f", symbol, s->score, metric->name, w);
+
+}
+
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+static GStaticMutex result_mtx = G_STATIC_MUTEX_INIT;
+#else
+G_LOCK_DEFINE (result_mtx);
+#endif
+
+static void
+insert_result_common (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts, gboolean single)
+{
+ struct metric *metric;
+ struct cache_item *item;
+ GList *cur, *metric_list;
+
+ /* Avoid concurrenting inserting of results */
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+ g_static_mutex_lock (&result_mtx);
+#else
+ G_LOCK (result_mtx);
+#endif
+ metric_list = g_hash_table_lookup (task->cfg->metrics_symbols, symbol);
+ if (metric_list) {
+ cur = metric_list;
+
+ while (cur) {
+ metric = cur->data;
+ insert_metric_result (task, metric, symbol, flag, opts, single);
+ cur = g_list_next (cur);
+ }
+ }
+ else {
+ /* Insert symbol to default metric */
+ insert_metric_result (task, task->cfg->default_metric, symbol, flag, opts, single);
+ }
+
+ /* Process cache item */
+ if (task->cfg->cache) {
+ item = g_hash_table_lookup (task->cfg->cache->items_by_symbol, symbol);
+ if (item != NULL) {
+ item->s->frequency++;
+ }
+ }
+
+ if (opts != NULL) {
+ /* XXX: it is not wise to destroy them here */
+ g_list_free (opts);
+ }
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+ g_static_mutex_unlock (&result_mtx);
+#else
+ G_UNLOCK (result_mtx);
+#endif
+}
+
+/* Insert result that may be increased on next insertions */
+void
+insert_result (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts)
+{
+ insert_result_common (task, symbol, flag, opts, task->cfg->one_shot_mode);
+}
+
+/* Insert result as a single option */
+void
+insert_result_single (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts)
+{
+ insert_result_common (task, symbol, flag, opts, TRUE);
+}
+
+/* Return true if metric has score that is more than spam score for it */
+static gboolean
+check_metric_is_spam (struct rspamd_task *task, struct metric *metric)
+{
+ struct metric_result *res;
+ double ms, rs;
+
+ /* Avoid concurrency while checking results */
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+ g_static_mutex_lock (&result_mtx);
+#else
+ G_LOCK (result_mtx);
+#endif
+ res = g_hash_table_lookup (task->results, metric->name);
+ if (res) {
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+ g_static_mutex_unlock (&result_mtx);
+#else
+ G_UNLOCK (result_mtx);
+#endif
+ if (!check_metric_settings (res, &ms, &rs)) {
+ ms = metric->actions[METRIC_ACTION_REJECT].score;
+ }
+ return (ms > 0 && res->score >= ms);
+ }
+
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+ g_static_mutex_unlock (&result_mtx);
+#else
+ G_UNLOCK (result_mtx);
+#endif
+
+ return FALSE;
+}
+
+gint
+process_filters (struct rspamd_task *task)
+{
+ GList *cur;
+ struct metric *metric;
+ gpointer item = NULL;
+
+ /* Process metrics symbols */
+ while (call_symbol_callback (task, task->cfg->cache, &item)) {
+ /* Check reject actions */
+ cur = task->cfg->metrics_list;
+ while (cur) {
+ metric = cur->data;
+ if (!task->pass_all_filters &&
+ metric->actions[METRIC_ACTION_REJECT].score > 0 &&
+ check_metric_is_spam (task, metric)) {
+ task->state = WRITE_REPLY;
+ return 1;
+ }
+ cur = g_list_next (cur);
+ }
+ }
+
+ task->state = WAIT_FILTER;
+
+ return 1;
+}
+
+
+struct composites_data {
+ struct rspamd_task *task;
+ struct metric_result *metric_res;
+ GTree *symbols_to_remove;
+ guint8 *checked;
+};
+
+struct symbol_remove_data {
+ struct symbol *ms;
+ gboolean remove_weight;
+ gboolean remove_symbol;
+};
+
+static gint
+remove_compare_data (gconstpointer a, gconstpointer b)
+{
+ const gchar *ca = a, *cb = b;
+
+ return strcmp (ca, cb);
+}
+
+static void
+composites_foreach_callback (gpointer key, gpointer value, void *data)
+{
+ struct composites_data *cd = (struct composites_data *)data;
+ struct rspamd_composite *composite = value, *ncomp;
+ struct expression *expr;
+ GQueue *stack;
+ GList *symbols = NULL, *s;
+ gsize cur, op1, op2;
+ gchar logbuf[256], *sym, *check_sym;
+ gint r;
+ struct symbol *ms;
+ struct symbol_remove_data *rd;
+
+
+ expr = composite->expr;
+ if (isset (cd->checked, composite->id)) {
+ /* Symbol was already checked */
+ return;
+ }
+
+ stack = g_queue_new ();
+
+ while (expr) {
+ if (expr->type == EXPR_STR) {
+ /* Find corresponding symbol */
+ sym = expr->content.operand;
+ if (*sym == '~' || *sym == '-') {
+ sym ++;
+ }
+ if (g_hash_table_lookup (cd->metric_res->symbols, sym) == NULL) {
+ cur = 0;
+ if ((ncomp = g_hash_table_lookup (cd->task->cfg->composite_symbols, sym)) != NULL) {
+ /* Set checked for this symbol to avoid cyclic references */
+ if (isclr (cd->checked, ncomp->id)) {
+ setbit (cd->checked, composite->id);
+ composites_foreach_callback (sym, ncomp, cd);
+ if (g_hash_table_lookup (cd->metric_res->symbols, sym) != NULL) {
+ cur = 1;
+ }
+ }
+ }
+ }
+ else {
+ cur = 1;
+ symbols = g_list_prepend (symbols, expr->content.operand);
+ }
+ g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
+ }
+ else {
+ if (g_queue_is_empty (stack)) {
+ /* Queue has no operands for operation, exiting */
+ g_list_free (symbols);
+ g_queue_free (stack);
+ setbit (cd->checked, composite->id);
+ return;
+ }
+ switch (expr->content.operation) {
+ case '!':
+ op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ op1 = !op1;
+ g_queue_push_head (stack, GSIZE_TO_POINTER (op1));
+ break;
+ case '&':
+ op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2));
+ break;
+ case '|':
+ op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2));
+ break;
+ default:
+ expr = expr->next;
+ continue;
+ }
+ }
+ expr = expr->next;
+ }
+ if (!g_queue_is_empty (stack)) {
+ op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+ if (op1) {
+ /* Remove all symbols that are in composite symbol */
+ s = g_list_first (symbols);
+ r = rspamd_snprintf (logbuf, sizeof (logbuf), "<%s>, insert symbol %s instead of symbols: ", cd->task->message_id, key);
+ while (s) {
+ sym = s->data;
+ if (*sym == '~' || *sym == '-') {
+ check_sym = sym + 1;
+ }
+ else {
+ check_sym = sym;
+ }
+ ms = g_hash_table_lookup (cd->metric_res->symbols, check_sym);
+
+ if (ms == NULL) {
+ /* Try to process other composites */
+ if ((ncomp = g_hash_table_lookup (cd->task->cfg->composite_symbols, check_sym)) != NULL) {
+ /* Set checked for this symbol to avoid cyclic references */
+ if (isclr (cd->checked, ncomp->id)) {
+ setbit (cd->checked, composite->id);
+ composites_foreach_callback (check_sym, ncomp, cd);
+ ms = g_hash_table_lookup (cd->metric_res->symbols, check_sym);
+ }
+ }
+ }
+
+ if (ms != NULL) {
+ rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (struct symbol_remove_data));
+ rd->ms = ms;
+ if (G_UNLIKELY (*sym == '~')) {
+ rd->remove_weight = FALSE;
+ rd->remove_symbol = TRUE;
+ }
+ else if (G_UNLIKELY (*sym == '-')) {
+ rd->remove_symbol = FALSE;
+ rd->remove_weight = FALSE;
+ }
+ else {
+ rd->remove_symbol = TRUE;
+ rd->remove_weight = TRUE;
+ }
+ if (!g_tree_lookup (cd->symbols_to_remove, rd)) {
+ g_tree_insert (cd->symbols_to_remove, (gpointer)ms->name, rd);
+ }
+ }
+ else {
+
+ }
+
+ if (s->next) {
+ r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s, ", s->data);
+ }
+ else {
+ r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s", s->data);
+ }
+ s = g_list_next (s);
+ }
+ /* Add new symbol */
+ insert_result_single (cd->task, key, 1.0, NULL);
+ msg_info ("%s", logbuf);
+ }
+ }
+
+ setbit (cd->checked, composite->id);
+ g_queue_free (stack);
+ g_list_free (symbols);
+
+ return;
+}
+
+static gboolean
+check_autolearn (struct statfile_autolearn_params *params, struct rspamd_task *task)
+{
+ gchar *metric_name = DEFAULT_METRIC;
+ struct metric_result *metric_res;
+ GList *cur;
+
+ if (params->metric != NULL) {
+ metric_name = (gchar *)params->metric;
+ }
+
+ /* First check threshold */
+ metric_res = g_hash_table_lookup (task->results, metric_name);
+ if (metric_res == NULL) {
+ if (params->symbols == NULL && params->threshold_max > 0) {
+ /* For ham messages */
+ return TRUE;
+ }
+ debug_task ("metric %s has no results", metric_name);
+ return FALSE;
+ }
+ else {
+ /* Process score of metric */
+ if ((params->threshold_min != 0 && metric_res->score > params->threshold_min) || (params->threshold_max != 0 && metric_res->score < params->threshold_max)) {
+ /* Now check for specific symbols */
+ if (params->symbols) {
+ cur = params->symbols;
+ while (cur) {
+ if (g_hash_table_lookup (metric_res->symbols, cur->data) == NULL) {
+ return FALSE;
+ }
+ cur = g_list_next (cur);
+ }
+ }
+ /* Now allow processing of actual autolearn */
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+void
+process_autolearn (struct statfile *st, struct rspamd_task *task, GTree * tokens, struct classifier *classifier, gchar *filename, struct classifier_ctx *ctx)
+{
+ stat_file_t *statfile;
+ struct statfile *unused;
+
+ if (check_autolearn (st->autolearn, task)) {
+ if (tokens) {
+ /* Take care of subject */
+ tokenize_subject (task, &tokens);
+ msg_info ("message with id <%s> autolearned statfile '%s'", task->message_id, filename);
+
+ /* Get or create statfile */
+ statfile = get_statfile_by_symbol (task->worker->srv->statfile_pool, ctx->cfg,
+ st->symbol, &unused, TRUE);
+
+ if (statfile == NULL) {
+ return;
+ }
+
+ classifier->learn_func (ctx, task->worker->srv->statfile_pool, st->symbol, tokens, TRUE, NULL, 1., NULL);
+ maybe_write_binlog (ctx->cfg, st, statfile, tokens);
+ statfile_pool_plan_invalidate (task->worker->srv->statfile_pool, DEFAULT_STATFILE_INVALIDATE_TIME, DEFAULT_STATFILE_INVALIDATE_JITTER);
+ }
+ }
+}
+
+static gboolean
+composites_remove_symbols (gpointer key, gpointer value, gpointer data)
+{
+ struct composites_data *cd = data;
+ struct symbol_remove_data *rd = value;
+
+ if (rd->remove_symbol) {
+ g_hash_table_remove (cd->metric_res->symbols, key);
+ }
+ if (rd->remove_weight) {
+ cd->metric_res->score -= rd->ms->score;
+ }
+
+ return FALSE;
+}
+
+static void
+composites_metric_callback (gpointer key, gpointer value, gpointer data)
+{
+ struct rspamd_task *task = (struct rspamd_task *)data;
+ struct composites_data *cd = rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data));
+ struct metric_result *metric_res = (struct metric_result *)value;
+
+ cd->task = task;
+ cd->metric_res = (struct metric_result *)metric_res;
+ cd->symbols_to_remove = g_tree_new (remove_compare_data);
+ cd->checked = rspamd_mempool_alloc0 (task->task_pool, NBYTES (g_hash_table_size (task->cfg->composite_symbols)));
+
+ /* Process hash table */
+ g_hash_table_foreach (task->cfg->composite_symbols, composites_foreach_callback, cd);
+
+ /* Remove symbols that are in composites */
+ g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd);
+ /* Free list */
+ g_tree_destroy (cd->symbols_to_remove);
+}
+
+void
+make_composites (struct rspamd_task *task)
+{
+ g_hash_table_foreach (task->results, composites_metric_callback, task);
+}
+
+struct classifiers_cbdata {
+ struct rspamd_task *task;
+ struct lua_locked_state *nL;
+};
+
+static void
+classifiers_callback (gpointer value, void *arg)
+{
+ struct classifiers_cbdata *cbdata = arg;
+ struct rspamd_task *task;
+ struct classifier_config *cl = value;
+ struct classifier_ctx *ctx;
+ struct mime_text_part *text_part, *p1, *p2;
+ struct statfile *st;
+ GTree *tokens = NULL;
+ GList *cur;
+ f_str_t c;
+ gchar *header = NULL;
+ gint *dist = NULL, diff;
+ gboolean is_twopart = FALSE;
+
+ task = cbdata->task;
+
+ if ((header = g_hash_table_lookup (cl->opts, "header")) != NULL) {
+ cur = message_get_header (task->task_pool, task->message, header, FALSE);
+ if (cur) {
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, cur);
+ }
+ }
+ else {
+ cur = g_list_first (task->text_parts);
+ dist = rspamd_mempool_get_variable (task->task_pool, "parts_distance");
+ if (cur != NULL && cur->next != NULL && cur->next->next == NULL) {
+ is_twopart = TRUE;
+ }
+ }
+ ctx = cl->classifier->init_func (task->task_pool, cl);
+
+ if ((tokens = g_hash_table_lookup (task->tokens, cl->tokenizer)) == NULL) {
+ while (cur != NULL) {
+ if (header) {
+ c.len = strlen (cur->data);
+ if (c.len > 0) {
+ c.begin = cur->data;
+ if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE, FALSE, NULL)) {
+ msg_info ("cannot tokenize input");
+ return;
+ }
+ }
+ }
+ else {
+ text_part = (struct mime_text_part *)cur->data;
+ if (text_part->is_empty) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ if (dist != NULL && cur->next == NULL) {
+ /* Compare part's content */
+
+ if (*dist >= COMMON_PART_FACTOR) {
+ msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id);
+ break;
+ }
+ }
+ else if (cur->next == NULL && is_twopart) {
+ p1 = cur->prev->data;
+ p2 = text_part;
+ if (p1->diff_str != NULL && p2->diff_str != NULL) {
+ diff = compare_diff_distance (p1->diff_str, p2->diff_str);
+ }
+ else {
+ diff = fuzzy_compare_parts (p1, p2);
+ }
+ if (diff >= COMMON_PART_FACTOR) {
+ msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id);
+ break;
+ }
+ }
+ c.begin = (gchar *)text_part->content->data;
+ c.len = text_part->content->len;
+ /* Tree would be freed at task pool freeing */
+ if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens,
+ FALSE, text_part->is_utf, text_part->urls_offset)) {
+ msg_info ("cannot tokenize input");
+ return;
+ }
+ }
+ cur = g_list_next (cur);
+ }
+ g_hash_table_insert (task->tokens, cl->tokenizer, tokens);
+ }
+
+ /* Take care of subject */
+ tokenize_subject (task, &tokens);
+
+ if (tokens == NULL) {
+ return;
+ }
+
+ if (cbdata->nL != NULL) {
+ rspamd_mutex_lock (cbdata->nL->m);
+ cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task, cbdata->nL->L);
+ rspamd_mutex_unlock (cbdata->nL->m);
+ }
+ else {
+ /* Non-threaded case */
+ cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task, task->cfg->lua_state);
+ }
+
+ /* Autolearning */
+ cur = g_list_first (cl->statfiles);
+ while (cur) {
+ st = cur->data;
+ if (st->autolearn) {
+ if (check_autolearn (st->autolearn, task)) {
+ /* Process autolearn */
+ process_autolearn (st, task, tokens, cl->classifier, st->path, ctx);
+ }
+ }
+ cur = g_list_next (cur);
+ }
+}
+
+
+void
+process_statfiles (struct rspamd_task *task)
+{
+ struct classifiers_cbdata cbdata;
+
+ if (task->is_skipped) {
+ return;
+ }
+
+ if (task->tokens == NULL) {
+ task->tokens = g_hash_table_new (g_direct_hash, g_direct_equal);
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_hash_table_unref, task->tokens);
+ }
+ cbdata.task = task;
+ cbdata.nL = NULL;
+ g_list_foreach (task->cfg->classifiers, classifiers_callback, &cbdata);
+
+ /* Process results */
+ make_composites (task);
+}
+
+void
+process_statfiles_threaded (gpointer data, gpointer user_data)
+{
+ struct rspamd_task *task = (struct rspamd_task *)data;
+ struct lua_locked_state *nL = user_data;
+ struct classifiers_cbdata cbdata;
+
+ if (task->is_skipped) {
+ remove_async_thread (task->s);
+ return;
+ }
+
+ if (task->tokens == NULL) {
+ task->tokens = g_hash_table_new (g_direct_hash, g_direct_equal);
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_hash_table_unref, task->tokens);
+ }
+
+ cbdata.task = task;
+ cbdata.nL = nL;
+ g_list_foreach (task->cfg->classifiers, classifiers_callback, &cbdata);
+ remove_async_thread (task->s);
+}
+
+static void
+insert_metric_header (gpointer metric_name, gpointer metric_value, gpointer data)
+{
+#ifndef GLIB_HASH_COMPAT
+ struct rspamd_task *task = (struct rspamd_task *)data;
+ gint r = 0;
+ /* Try to be rfc2822 compatible and avoid long headers with folding */
+ gchar header_name[128], outbuf[1000];
+ GList *symbols = NULL, *cur;
+ struct metric_result *metric_res = (struct metric_result *)metric_value;
+ double ms, rs;
+
+ rspamd_snprintf (header_name, sizeof (header_name), "X-Spam-%s", metric_res->metric->name);
+
+ if (!check_metric_settings (metric_res, &ms, &rs)) {
+ ms = metric_res->metric->actions[METRIC_ACTION_REJECT].score;
+ }
+ if (ms > 0 && metric_res->score >= ms) {
+ r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "yes; %.2f/%.2f/%.2f; ", metric_res->score, ms, rs);
+ }
+ else {
+ r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "no; %.2f/%.2f/%.2f; ", metric_res->score, ms, rs);
+ }
+
+ symbols = g_hash_table_get_keys (metric_res->symbols);
+ cur = symbols;
+ while (cur) {
+ if (g_list_next (cur) != NULL) {
+ r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "%s,", (gchar *)cur->data);
+ }
+ else {
+ r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "%s", (gchar *)cur->data);
+ }
+ cur = g_list_next (cur);
+ }
+ g_list_free (symbols);
+#ifdef GMIME24
+ g_mime_object_append_header (GMIME_OBJECT (task->message), header_name, outbuf);
+#else
+ g_mime_message_add_header (task->message, header_name, outbuf);
+#endif
+
+#endif /* GLIB_COMPAT */
+}
+
+void
+insert_headers (struct rspamd_task *task)
+{
+ g_hash_table_foreach (task->results, insert_metric_header, task);
+}
+
+gboolean
+check_action_str (const gchar *data, gint *result)
+{
+ if (g_ascii_strncasecmp (data, "reject", sizeof ("reject") - 1) == 0) {
+ *result = METRIC_ACTION_REJECT;
+ }
+ else if (g_ascii_strncasecmp (data, "greylist", sizeof ("greylist") - 1) == 0) {
+ *result = METRIC_ACTION_GREYLIST;
+ }
+ else if (g_ascii_strncasecmp (data, "add_header", sizeof ("add_header") - 1) == 0) {
+ *result = METRIC_ACTION_ADD_HEADER;
+ }
+ else if (g_ascii_strncasecmp (data, "rewrite_subject", sizeof ("rewrite_subject") - 1) == 0) {
+ *result = METRIC_ACTION_REWRITE_SUBJECT;
+ }
+ else {
+ return FALSE;
+ }
+ return TRUE;
+}
+
+const gchar *
+str_action_metric (enum rspamd_metric_action action)
+{
+ switch (action) {
+ case METRIC_ACTION_REJECT:
+ return "reject";
+ case METRIC_ACTION_SOFT_REJECT:
+ return "soft_reject";
+ case METRIC_ACTION_REWRITE_SUBJECT:
+ return "rewrite_subject";
+ case METRIC_ACTION_ADD_HEADER:
+ return "add_header";
+ case METRIC_ACTION_GREYLIST:
+ return "greylist";
+ case METRIC_ACTION_NOACTION:
+ return "no_action";
+ case METRIC_ACTION_MAX:
+ return "invalid max action";
+ }
+
+ return "unknown action";
+}
+
+gint
+check_metric_action (double score, double required_score, struct metric *metric)
+{
+ struct metric_action *action, *selected_action = NULL;
+ double max_score = 0;
+ int i;
+
+ if (score >= required_score) {
+ return METRIC_ACTION_REJECT;
+ }
+ else if (metric->actions == NULL) {
+ return METRIC_ACTION_NOACTION;
+ }
+ else {
+ for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) {
+ action = &metric->actions[i];
+ if (action->score < 0) {
+ continue;
+ }
+ if (score >= action->score && action->score > max_score) {
+ selected_action = action;
+ max_score = action->score;
+ }
+ }
+ if (selected_action) {
+ return selected_action->action;
+ }
+ else {
+ return METRIC_ACTION_NOACTION;
+ }
+ }
+}
+
+gboolean
+learn_task (const gchar *statfile, struct rspamd_task *task, GError **err)
+{
+ GList *cur, *ex;
+ struct classifier_config *cl;
+ struct classifier_ctx *cls_ctx;
+ gchar *s;
+ f_str_t c;
+ GTree *tokens = NULL;
+ struct statfile *st;
+ stat_file_t *stf;
+ gdouble sum;
+ struct mime_text_part *part, *p1, *p2;
+ gboolean is_utf = FALSE, is_twopart = FALSE;
+ gint diff;
+
+
+ /* Load classifier by symbol */
+ cl = g_hash_table_lookup (task->cfg->classifiers_symbols, statfile);
+ if (cl == NULL) {
+ g_set_error (err, filter_error_quark(), 1, "Statfile %s is not configured in any classifier", statfile);
+ return FALSE;
+ }
+
+ /* If classifier has 'header' option just classify header of this type */
+ if ((s = g_hash_table_lookup (cl->opts, "header")) != NULL) {
+ cur = message_get_header (task->task_pool, task->message, s, FALSE);
+ if (cur) {
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, cur);
+ }
+ }
+ else {
+ /* Classify message otherwise */
+ cur = g_list_first (task->text_parts);
+ if (cur != NULL && cur->next != NULL && cur->next->next == NULL) {
+ is_twopart = TRUE;
+ }
+ }
+
+ /* Get tokens from each element */
+ while (cur) {
+ if (s != NULL) {
+ c.len = strlen (cur->data);
+ c.begin = cur->data;
+ ex = NULL;
+ }
+ else {
+ part = cur->data;
+ /* Skip empty parts */
+ if (part->is_empty) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ c.begin = (gchar *)part->content->data;
+ c.len = part->content->len;
+ is_utf = part->is_utf;
+ ex = part->urls_offset;
+ if (is_twopart && cur->next == NULL) {
+ /* Compare part's content */
+ p1 = cur->prev->data;
+ p2 = part;
+ if (p1->diff_str != NULL && p2->diff_str != NULL) {
+ diff = compare_diff_distance (p1->diff_str, p2->diff_str);
+ }
+ else {
+ diff = fuzzy_compare_parts (p1, p2);
+ }
+ if (diff >= COMMON_PART_FACTOR) {
+ msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id);
+ break;
+ }
+ }
+ }
+ /* Get tokens */
+ if (!cl->tokenizer->tokenize_func (
+ cl->tokenizer, task->task_pool,
+ &c, &tokens, FALSE, is_utf, ex)) {
+ g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message");
+ return FALSE;
+ }
+ cur = g_list_next (cur);
+ }
+
+ /* Handle messages without text */
+ if (tokens == NULL) {
+ g_set_error (err, filter_error_quark(), 3, "Cannot tokenize message, no text data");
+ msg_info ("learn failed for message <%s>, no tokens to extract", task->message_id);
+ return FALSE;
+ }
+
+ /* Take care of subject */
+ tokenize_subject (task, &tokens);
+
+ /* Init classifier */
+ cls_ctx = cl->classifier->init_func (
+ task->task_pool, cl);
+ /* Get or create statfile */
+ stf = get_statfile_by_symbol (task->worker->srv->statfile_pool,
+ cl, statfile, &st, TRUE);
+
+ /* Learn */
+ if (stf== NULL || !cl->classifier->learn_func (
+ cls_ctx, task->worker->srv->statfile_pool,
+ statfile, tokens, TRUE, &sum,
+ 1.0, err)) {
+ if (*err) {
+ msg_info ("learn failed for message <%s>, learn error: %s", task->message_id, (*err)->message);
+ return FALSE;
+ }
+ else {
+ g_set_error (err, filter_error_quark(), 4, "Learn failed, unknown learn classifier error");
+ msg_info ("learn failed for message <%s>, unknown learn error", task->message_id);
+ return FALSE;
+ }
+ }
+ /* Increase statistics */
+ task->worker->srv->stat->messages_learned++;
+
+ maybe_write_binlog (cl, st, stf, tokens);
+ msg_info ("learn success for message <%s>, for statfile: %s, sum weight: %.2f",
+ task->message_id, statfile, sum);
+ statfile_pool_plan_invalidate (task->worker->srv->statfile_pool,
+ DEFAULT_STATFILE_INVALIDATE_TIME,
+ DEFAULT_STATFILE_INVALIDATE_JITTER);
+
+ return TRUE;
+}
+
+gboolean
+learn_task_spam (struct classifier_config *cl, struct rspamd_task *task, gboolean is_spam, GError **err)
+{
+ GList *cur, *ex;
+ struct classifier_ctx *cls_ctx;
+ f_str_t c;
+ GTree *tokens = NULL;
+ struct mime_text_part *part, *p1, *p2;
+ gboolean is_utf = FALSE, is_twopart = FALSE;
+ gint diff;
+
+ cur = g_list_first (task->text_parts);
+ if (cur != NULL && cur->next != NULL && cur->next->next == NULL) {
+ is_twopart = TRUE;
+ }
+
+ /* Get tokens from each element */
+ while (cur) {
+ part = cur->data;
+ /* Skip empty parts */
+ if (part->is_empty) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ c.begin = (gchar *)part->content->data;
+ c.len = part->content->len;
+ is_utf = part->is_utf;
+ ex = part->urls_offset;
+ if (is_twopart && cur->next == NULL) {
+ /*
+ * Compare part's content
+ * Note: here we don't have filters proceeded this message, so using pool variable is a bad idea
+ */
+ p1 = cur->prev->data;
+ p2 = part;
+ if (p1->diff_str != NULL && p2->diff_str != NULL) {
+ diff = compare_diff_distance (p1->diff_str, p2->diff_str);
+ }
+ else {
+ diff = fuzzy_compare_parts (p1, p2);
+ }
+ if (diff >= COMMON_PART_FACTOR) {
+ msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id);
+ break;
+ }
+ }
+ /* Get tokens */
+ if (!cl->tokenizer->tokenize_func (
+ cl->tokenizer, task->task_pool,
+ &c, &tokens, FALSE, is_utf, ex)) {
+ g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message");
+ return FALSE;
+ }
+ cur = g_list_next (cur);
+ }
+
+ /* Handle messages without text */
+ if (tokens == NULL) {
+ g_set_error (err, filter_error_quark(), 3, "Cannot tokenize message, no text data");
+ msg_info ("learn failed for message <%s>, no tokens to extract", task->message_id);
+ return FALSE;
+ }
+
+ /* Take care of subject */
+ tokenize_subject (task, &tokens);
+
+ /* Init classifier */
+ cls_ctx = cl->classifier->init_func (
+ task->task_pool, cl);
+ /* Learn */
+ if (!cl->classifier->learn_spam_func (
+ cls_ctx, task->worker->srv->statfile_pool,
+ tokens, task, is_spam, task->cfg->lua_state, err)) {
+ if (*err) {
+ msg_info ("learn failed for message <%s>, learn error: %s", task->message_id, (*err)->message);
+ return FALSE;
+ }
+ else {
+ g_set_error (err, filter_error_quark(), 4, "Learn failed, unknown learn classifier error");
+ msg_info ("learn failed for message <%s>, unknown learn error", task->message_id);
+ return FALSE;
+ }
+ }
+ /* Increase statistics */
+ task->worker->srv->stat->messages_learned++;
+
+ msg_info ("learn success for message <%s>",
+ task->message_id);
+ statfile_pool_plan_invalidate (task->worker->srv->statfile_pool,
+ DEFAULT_STATFILE_INVALIDATE_TIME,
+ DEFAULT_STATFILE_INVALIDATE_JITTER);
+
+ return TRUE;
+}
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libmime/filter.h b/src/libmime/filter.h
new file mode 100644
index 000000000..258bd9447
--- /dev/null
+++ b/src/libmime/filter.h
@@ -0,0 +1,167 @@
+/**
+ * @file filter.h
+ * Filters logic implemetation
+ */
+
+#ifndef RSPAMD_FILTER_H
+#define RSPAMD_FILTER_H
+
+#include "config.h"
+#include "symbols_cache.h"
+#include "task.h"
+
+struct rspamd_task;
+struct rspamd_settings;
+struct classifier_config;
+
+typedef double (*metric_cons_func)(struct rspamd_task *task, const gchar *metric_name, const gchar *func_name);
+typedef void (*filter_func)(struct rspamd_task *task);
+
+enum filter_type { C_FILTER, PERL_FILTER };
+
+/**
+ * Filter structure
+ */
+struct filter {
+ gchar *func_name; /**< function name */
+ enum filter_type type; /**< filter type (c or perl) */
+ module_t *module;
+};
+
+/**
+ * Rspamd symbol
+ */
+struct symbol {
+ double score; /**< symbol's score */
+ GList *options; /**< list of symbol's options */
+ const gchar *name;
+};
+
+struct metric_action {
+ enum rspamd_metric_action action;
+ gdouble score;
+};
+
+/**
+ * Common definition of metric
+ */
+struct metric {
+ const gchar *name; /**< name of metric */
+ gchar *func_name; /**< name of consolidation function */
+ metric_cons_func func; /**< c consolidation function */
+ double grow_factor; /**< grow factor for metric */
+ GHashTable *symbols; /**< weights of symbols in metric */
+ GHashTable *descriptions; /**< descriptions of symbols in metric */
+ struct metric_action actions[METRIC_ACTION_MAX]; /**< all actions of the metric */
+ gchar *subject; /**< subject rewrite string */
+};
+
+/**
+ * Result of metric processing
+ */
+struct metric_result {
+ struct metric *metric; /**< pointer to metric structure */
+ double score; /**< total score */
+ GHashTable *symbols; /**< symbols of metric */
+ gboolean checked; /**< whether metric result is consolidated */
+ double grow_factor; /**< current grow factor */
+ struct rspamd_settings *user_settings; /**< settings for metric */
+ struct rspamd_settings *domain_settings; /**< settings for metric */
+};
+
+/**
+ * Composite structure
+ */
+struct rspamd_composite {
+ struct expression *expr;
+ gint id;
+};
+
+/**
+ * Process all filters
+ * @param task worker's task that present message from user
+ * @return 0 - if there is non-finished tasks and 1 if processing is completed
+ */
+gint process_filters (struct rspamd_task *task);
+
+/**
+ * Process message with statfiles
+ * @param task worker's task that present message from user
+ */
+void process_statfiles (struct rspamd_task *task);
+
+/**
+ * Process message with statfiles threaded
+ * @param data worker's task that present message from user
+ */
+void process_statfiles_threaded (gpointer data, gpointer user_data);
+
+/**
+ * Insert a result to task
+ * @param task worker's task that present message from user
+ * @param metric_name metric's name to which we need to insert result
+ * @param symbol symbol to insert
+ * @param flag numeric weight for symbol
+ * @param opts list of symbol's options
+ */
+void insert_result (struct rspamd_task *task, const gchar *symbol, double flag, GList *opts);
+
+/**
+ * Insert a single result to task
+ * @param task worker's task that present message from user
+ * @param metric_name metric's name to which we need to insert result
+ * @param symbol symbol to insert
+ * @param flag numeric weight for symbol
+ * @param opts list of symbol's options
+ */
+void insert_result_single (struct rspamd_task *task, const gchar *symbol, double flag, GList *opts);
+
+/**
+ * Process all results and form composite metrics from existent metrics as it is defined in config
+ * @param task worker's task that present message from user
+ */
+void make_composites (struct rspamd_task *task);
+
+/**
+ * Default consolidation function for metric, it get all symbols and multiply symbol
+ * weight by some factor that is specified in config. Default factor is 1.
+ * @param task worker's task that present message from user
+ * @param metric_name name of metric
+ * @return result metric weight
+ */
+double factor_consolidation_func (struct rspamd_task *task, const gchar *metric_name, const gchar *unused);
+
+/*
+ * Learn specified statfile with message in a task
+ * @param statfile symbol of statfile
+ * @param task worker's task object
+ * @param err pointer to GError
+ * @return true if learn succeed
+ */
+gboolean learn_task (const gchar *statfile, struct rspamd_task *task, GError **err);
+
+/*
+ * Learn specified statfile with message in a task
+ * @param statfile symbol of statfile
+ * @param task worker's task object
+ * @param err pointer to GError
+ * @return true if learn succeed
+ */
+gboolean learn_task_spam (struct classifier_config *cl, struct rspamd_task *task, gboolean is_spam, GError **err);
+
+/*
+ * Get action from a string
+ */
+gboolean check_action_str (const gchar *data, gint *result);
+
+/*
+ * Return textual representation of action enumeration
+ */
+const gchar *str_action_metric (enum rspamd_metric_action action);
+
+/*
+ * Get action for specific metric
+ */
+gint check_metric_action (double score, double required_score, struct metric *metric);
+
+#endif
diff --git a/src/libmime/images.c b/src/libmime/images.c
new file mode 100644
index 000000000..ff07bbd72
--- /dev/null
+++ b/src/libmime/images.c
@@ -0,0 +1,255 @@
+/* Copyright (c) 2010, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "images.h"
+#include "main.h"
+#include "message.h"
+
+static const guint8 png_signature[] = {137, 80, 78, 71, 13, 10, 26, 10};
+static const guint8 jpg_sig1[] = {0xff, 0xd8};
+static const guint8 jpg_sig2[] = {'J', 'F', 'I', 'F'};
+static const guint8 gif_signature[] = {'G', 'I', 'F', '8'};
+static const guint8 bmp_signature[] = {'B', 'M'};
+
+static void process_image (struct rspamd_task *task, struct mime_part *part);
+
+
+void
+process_images (struct rspamd_task *task)
+{
+ GList *cur;
+ struct mime_part *part;
+
+ cur = task->parts;
+ while (cur) {
+ part = cur->data;
+ if (g_mime_content_type_is_type (part->type, "image", "*") && part->content->len > 0) {
+ process_image (task, part);
+ }
+ cur = g_list_next (cur);
+ }
+
+}
+
+static enum known_image_types
+detect_image_type (GByteArray *data)
+{
+ if (data->len > sizeof (png_signature) / sizeof (png_signature[0])) {
+ if (memcmp (data->data, png_signature, sizeof (png_signature)) == 0) {
+ return IMAGE_TYPE_PNG;
+ }
+ }
+ if (data->len > 10) {
+ if (memcmp (data->data, jpg_sig1, sizeof (jpg_sig1)) == 0) {
+ if (memcmp (data->data + 6, jpg_sig2, sizeof (jpg_sig2)) == 0) {
+ return IMAGE_TYPE_JPG;
+ }
+ }
+ }
+ if (data->len > sizeof (gif_signature) / sizeof (gif_signature[0])) {
+ if (memcmp (data->data, gif_signature, sizeof (gif_signature)) == 0) {
+ return IMAGE_TYPE_GIF;
+ }
+ }
+ if (data->len > sizeof (bmp_signature) / sizeof (bmp_signature[0])) {
+ if (memcmp (data->data, bmp_signature, sizeof (bmp_signature)) == 0) {
+ return IMAGE_TYPE_BMP;
+ }
+ }
+
+ return IMAGE_TYPE_UNKNOWN;
+}
+
+
+static struct rspamd_image *
+process_png_image (struct rspamd_task *task, GByteArray *data)
+{
+ struct rspamd_image *img;
+ guint32 t;
+ guint8 *p;
+
+ if (data->len < 24) {
+ msg_info ("bad png detected (maybe striped): <%s>", task->message_id);
+ return NULL;
+ }
+
+ /* In png we should find iHDR section and get data from it */
+ /* Skip signature and read header section */
+ p = data->data + 12;
+ if (memcmp (p, "IHDR", 4) != 0) {
+ msg_info ("png doesn't begins with IHDR section", task->message_id);
+ return NULL;
+ }
+
+ img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image));
+ img->type = IMAGE_TYPE_PNG;
+ img->data = data;
+
+ p += 4;
+ memcpy (&t, p, sizeof (guint32));
+ img->width = ntohl (t);
+ p += 4;
+ memcpy (&t, p, sizeof (guint32));
+ img->height = ntohl (t);
+
+ return img;
+}
+
+static struct rspamd_image *
+process_jpg_image (struct rspamd_task *task, GByteArray *data)
+{
+ guint8 *p;
+ guint16 t;
+ gsize remain;
+ struct rspamd_image *img;
+
+ img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image));
+ img->type = IMAGE_TYPE_JPG;
+ img->data = data;
+
+ p = data->data;
+ remain = data->len;
+ /* In jpeg we should find any data stream (ff c0 .. ff c3) and extract its height and width */
+ while (remain --) {
+ if (*p == 0xFF && remain > 8 && (*(p + 1) >= 0xC0 && *(p + 1) <= 0xC3)) {
+ memcpy (&t, p + 5, sizeof (guint16));
+ img->height = ntohs (t);
+ memcpy (&t, p + 7, sizeof (guint16));
+ img->width = ntohs (t);
+ return img;
+ }
+ p ++;
+ }
+
+ return NULL;
+}
+
+static struct rspamd_image *
+process_gif_image (struct rspamd_task *task, GByteArray *data)
+{
+ struct rspamd_image *img;
+ guint8 *p;
+ guint16 t;
+
+ if (data->len < 10) {
+ msg_info ("bad gif detected (maybe striped): <%s>", task->message_id);
+ return NULL;
+ }
+
+ img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image));
+ img->type = IMAGE_TYPE_GIF;
+ img->data = data;
+
+ p = data->data + 6;
+ memcpy (&t, p, sizeof (guint16));
+ img->width = GUINT16_FROM_LE (t);
+ memcpy (&t, p + 2, sizeof (guint16));
+ img->height = GUINT16_FROM_LE (t);
+
+ return img;
+}
+
+static struct rspamd_image *
+process_bmp_image (struct rspamd_task *task, GByteArray *data)
+{
+ struct rspamd_image *img;
+ gint32 t;
+ guint8 *p;
+
+
+
+ if (data->len < 28) {
+ msg_info ("bad bmp detected (maybe striped): <%s>", task->message_id);
+ return NULL;
+ }
+
+ img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image));
+ img->type = IMAGE_TYPE_BMP;
+ img->data = data;
+ p = data->data + 18;
+ memcpy (&t, p, sizeof (gint32));
+ img->width = abs (GINT32_FROM_LE (t));
+ memcpy (&t, p + 4, sizeof (gint32));
+ img->height = abs (GINT32_FROM_LE (t));
+
+ return img;
+}
+
+static void
+process_image (struct rspamd_task *task, struct mime_part *part)
+{
+ enum known_image_types type;
+ struct rspamd_image *img = NULL;
+ if ((type = detect_image_type (part->content)) != IMAGE_TYPE_UNKNOWN) {
+ switch (type) {
+ case IMAGE_TYPE_PNG:
+ img = process_png_image (task, part->content);
+ break;
+ case IMAGE_TYPE_JPG:
+ img = process_jpg_image (task, part->content);
+ break;
+ case IMAGE_TYPE_GIF:
+ img = process_gif_image (task, part->content);
+ break;
+ case IMAGE_TYPE_BMP:
+ img = process_bmp_image (task, part->content);
+ break;
+ default:
+ img = NULL;
+ break;
+ }
+ }
+
+ if (img != NULL) {
+ debug_task ("detected %s image of size %ud x %ud in message <%s>",
+ image_type_str (img->type),
+ img->width, img->height,
+ task->message_id);
+ img->filename = part->filename;
+ task->images = g_list_prepend (task->images, img);
+ }
+}
+
+const gchar *
+image_type_str (enum known_image_types type)
+{
+ switch (type) {
+ case IMAGE_TYPE_PNG:
+ return "PNG";
+ break;
+ case IMAGE_TYPE_JPG:
+ return "JPEG";
+ break;
+ case IMAGE_TYPE_GIF:
+ return "GIF";
+ break;
+ case IMAGE_TYPE_BMP:
+ return "BMP";
+ break;
+ default:
+ return "unknown";
+ }
+
+ return "unknown";
+}
diff --git a/src/libmime/images.h b/src/libmime/images.h
new file mode 100644
index 000000000..c43941ebc
--- /dev/null
+++ b/src/libmime/images.h
@@ -0,0 +1,33 @@
+#ifndef IMAGES_H_
+#define IMAGES_H_
+
+#include "config.h"
+#include "main.h"
+
+enum known_image_types {
+ IMAGE_TYPE_PNG,
+ IMAGE_TYPE_JPG,
+ IMAGE_TYPE_GIF,
+ IMAGE_TYPE_BMP,
+ IMAGE_TYPE_UNKNOWN = 9000
+};
+
+struct rspamd_image {
+ enum known_image_types type;
+ GByteArray *data;
+ guint32 width;
+ guint32 height;
+ const gchar *filename;
+};
+
+/*
+ * Process images from a worker task
+ */
+void process_images (struct rspamd_task *task);
+
+/*
+ * Get textual representation of an image's type
+ */
+const gchar *image_type_str (enum known_image_types type);
+
+#endif /* IMAGES_H_ */
diff --git a/src/libmime/message.c b/src/libmime/message.c
new file mode 100644
index 000000000..4567869e9
--- /dev/null
+++ b/src/libmime/message.c
@@ -0,0 +1,1764 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "util.h"
+#include "main.h"
+#include "message.h"
+#include "cfg_file.h"
+#include "html.h"
+#include "images.h"
+
+#define RECURSION_LIMIT 30
+#define UTF8_CHARSET "UTF-8"
+
+GByteArray *
+strip_html_tags (struct rspamd_task *task, rspamd_mempool_t * pool, struct mime_text_part *part, GByteArray * src, gint *stateptr)
+{
+ uint8_t *p, *rp, *tbegin = NULL, *end, c, lc;
+ gint br, i = 0, depth = 0, in_q = 0;
+ gint state = 0;
+ GByteArray *buf;
+ GNode *level_ptr = NULL;
+ gboolean erase = FALSE;
+
+ if (stateptr)
+ state = *stateptr;
+
+ buf = g_byte_array_sized_new (src->len);
+ g_byte_array_append (buf, src->data, src->len);
+
+ c = *src->data;
+ lc = '\0';
+ p = src->data;
+ rp = buf->data;
+ end = src->data + src->len;
+ br = 0;
+
+ while (i < (gint)src->len) {
+ switch (c) {
+ case '\0':
+ break;
+ case '<':
+ if (g_ascii_isspace (*(p + 1))) {
+ goto reg_char;
+ }
+ if (state == 0) {
+ lc = '<';
+ tbegin = p + 1;
+ state = 1;
+ }
+ else if (state == 1) {
+ /* Opening bracket without closing one */
+ p --;
+ while (g_ascii_isspace (*p) && p > src->data) {
+ p --;
+ }
+ p ++;
+ goto unbreak_tag;
+ }
+ break;
+
+ case '(':
+ if (state == 2) {
+ if (lc != '"' && lc != '\'') {
+ lc = '(';
+ br++;
+ }
+ }
+ else if (state == 0 && !erase) {
+ *(rp++) = c;
+ }
+ break;
+
+ case ')':
+ if (state == 2) {
+ if (lc != '"' && lc != '\'') {
+ lc = ')';
+ br--;
+ }
+ }
+ else if (state == 0 && !erase) {
+ *(rp++) = c;
+ }
+ break;
+
+ case '>':
+ if (depth) {
+ depth--;
+ break;
+ }
+
+ if (in_q) {
+ break;
+ }
+unbreak_tag:
+ switch (state) {
+ case 1: /* HTML/XML */
+ lc = '>';
+ in_q = state = 0;
+ erase = !add_html_node (task, pool, part, tbegin, p - tbegin, end - tbegin, &level_ptr);
+ break;
+
+ case 2: /* PHP */
+ if (!br && lc != '\"' && *(p - 1) == '?') {
+ in_q = state = 0;
+ }
+ break;
+
+ case 3:
+ in_q = state = 0;
+ break;
+
+ case 4: /* JavaScript/CSS/etc... */
+ if (p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '-') {
+ in_q = state = 0;
+ }
+ break;
+
+ default:
+ if (!erase) {
+ *(rp++) = c;
+ }
+ break;
+ }
+ break;
+
+ case '"':
+ case '\'':
+ if (state == 2 && *(p - 1) != '\\') {
+ if (lc == c) {
+ lc = '\0';
+ }
+ else if (lc != '\\') {
+ lc = c;
+ }
+ }
+ else if (state == 0 && !erase) {
+ *(rp++) = c;
+ }
+ if (state && p != src->data && *(p - 1) != '\\' && (!in_q || *p == in_q)) {
+ if (in_q) {
+ in_q = 0;
+ }
+ else {
+ in_q = *p;
+ }
+ }
+ break;
+
+ case '!':
+ /* JavaScript & Other HTML scripting languages */
+ if (state == 1 && *(p - 1) == '<') {
+ state = 3;
+ lc = c;
+ }
+ else {
+ if (state == 0 && !erase) {
+ *(rp++) = c;
+ }
+ }
+ break;
+
+ case '-':
+ if (state == 3 && p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '!') {
+ state = 4;
+ }
+ else {
+ goto reg_char;
+ }
+ break;
+
+ case '?':
+
+ if (state == 1 && *(p - 1) == '<') {
+ br = 0;
+ state = 2;
+ break;
+ }
+
+ case 'E':
+ case 'e':
+ /* !DOCTYPE exception */
+ if (state == 3 && p > src->data + 6
+ && g_ascii_tolower (*(p - 1)) == 'p'
+ && g_ascii_tolower (*(p - 2)) == 'y'
+ && g_ascii_tolower (*(p - 3)) == 't' && g_ascii_tolower (*(p - 4)) == 'c' && g_ascii_tolower (*(p - 5)) == 'o' && g_ascii_tolower (*(p - 6)) == 'd') {
+ state = 1;
+ break;
+ }
+ /* fall-through */
+
+ case 'l':
+
+ /* swm: If we encounter '<?xml' then we shouldn't be in
+ * state == 2 (PHP). Switch back to HTML.
+ */
+
+ if (state == 2 && p > src->data + 2 && *(p - 1) == 'm' && *(p - 2) == 'x') {
+ state = 1;
+ break;
+ }
+
+ /* fall-through */
+ default:
+ reg_char:
+ if (state == 0 && !erase) {
+ *(rp++) = c;
+ }
+ break;
+ }
+ i++;
+ if (i < (gint)src->len) {
+ c = *(++p);
+ }
+ }
+ if (rp < buf->data + src->len) {
+ *rp = '\0';
+ g_byte_array_set_size (buf, rp - buf->data);
+ }
+
+ /* Check tag balancing */
+ if (level_ptr && level_ptr->data != NULL) {
+ part->is_balanced = FALSE;
+ }
+
+ if (stateptr) {
+ *stateptr = state;
+ }
+
+ return buf;
+}
+
+static void
+parse_qmail_recv (rspamd_mempool_t * pool, gchar *line, struct received_header *r)
+{
+ gchar *s, *p, t;
+
+ /* We are interested only with received from network headers */
+ if ((p = strstr (line, "from network")) == NULL) {
+ r->is_error = 2;
+ return;
+ }
+
+ p += sizeof ("from network") - 1;
+ while (g_ascii_isspace (*p) || *p == '[') {
+ p++;
+ }
+ /* format is ip/host */
+ s = p;
+ if (*p) {
+ while (g_ascii_isdigit (*++p) || *p == '.');
+ if (*p != '/') {
+ r->is_error = 1;
+ return;
+ }
+ else {
+ *p = '\0';
+ r->real_ip = rspamd_mempool_strdup (pool, s);
+ *p = '/';
+ /* Now try to parse hostname */
+ s = ++p;
+ while (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') {
+ p++;
+ }
+ t = *p;
+ *p = '\0';
+ r->real_hostname = rspamd_mempool_strdup (pool, s);
+ *p = t;
+ }
+ }
+}
+
+static void
+parse_recv_header (rspamd_mempool_t * pool, gchar *line, struct received_header *r)
+{
+ gchar *p, *s, t, **res = NULL;
+ enum {
+ RSPAMD_RECV_STATE_INIT = 0,
+ RSPAMD_RECV_STATE_FROM,
+ RSPAMD_RECV_STATE_IP_BLOCK,
+ RSPAMD_RECV_STATE_BRACES_BLOCK,
+ RSPAMD_RECV_STATE_BY_BLOCK,
+ RSPAMD_RECV_STATE_PARSE_IP,
+ RSPAMD_RECV_STATE_SKIP_SPACES,
+ RSPAMD_RECV_STATE_ERROR
+ } state = RSPAMD_RECV_STATE_INIT,
+ next_state = RSPAMD_RECV_STATE_INIT;
+ gboolean is_exim = FALSE;
+
+ g_strstrip (line);
+ p = line;
+ s = line;
+
+ while (*p) {
+ switch (state) {
+ /* Initial state, search for from */
+ case RSPAMD_RECV_STATE_INIT:
+ if (*p == 'f' || *p == 'F') {
+ if (g_ascii_tolower (*++p) == 'r' && g_ascii_tolower (*++p) == 'o' && g_ascii_tolower (*++p) == 'm') {
+ p++;
+ state = RSPAMD_RECV_STATE_SKIP_SPACES;
+ next_state = RSPAMD_RECV_STATE_FROM;
+ }
+ }
+ else if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') {
+ state = RSPAMD_RECV_STATE_IP_BLOCK;
+ }
+ else {
+ /* This can be qmail header, parse it separately */
+ parse_qmail_recv (pool, line, r);
+ return;
+ }
+ break;
+ /* Read hostname */
+ case RSPAMD_RECV_STATE_FROM:
+ if (*p == '[') {
+ /* This should be IP address */
+ res = &r->from_ip;
+ state = RSPAMD_RECV_STATE_PARSE_IP;
+ next_state = RSPAMD_RECV_STATE_IP_BLOCK;
+ s = ++p;
+ }
+ else if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') {
+ p++;
+ }
+ else {
+ t = *p;
+ *p = '\0';
+ r->from_hostname = rspamd_mempool_strdup (pool, s);
+ *p = t;
+ state = RSPAMD_RECV_STATE_SKIP_SPACES;
+ next_state = RSPAMD_RECV_STATE_IP_BLOCK;
+ }
+ break;
+ /* Try to extract additional info */
+ case RSPAMD_RECV_STATE_IP_BLOCK:
+ /* Try to extract ip or () info or by */
+ if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') {
+ p += 2;
+ /* Skip spaces after by */
+ state = RSPAMD_RECV_STATE_SKIP_SPACES;
+ next_state = RSPAMD_RECV_STATE_BY_BLOCK;
+ }
+ else if (*p == '(') {
+ state = RSPAMD_RECV_STATE_SKIP_SPACES;
+ next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
+ p++;
+ }
+ else if (*p == '[') {
+ /* Got ip before '(' so extract it */
+ s = ++p;
+ res = &r->from_ip;
+ state = RSPAMD_RECV_STATE_PARSE_IP;
+ next_state = RSPAMD_RECV_STATE_IP_BLOCK;
+ }
+ else {
+ p++;
+ }
+ break;
+ /* We are in () block. Here can be found real hostname and real ip, this is written by some MTA */
+ case RSPAMD_RECV_STATE_BRACES_BLOCK:
+ /* End of block */
+ if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' ||
+ *p == '_' || *p == ':') {
+ p++;
+ }
+ else if (*p == '[') {
+ s = ++p;
+ state = RSPAMD_RECV_STATE_PARSE_IP;
+ res = &r->real_ip;
+ next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
+ }
+ else {
+ if (p > s) {
+ /* Got some real hostname */
+ /* check whether it is helo or p is not space symbol */
+ if (!g_ascii_isspace (*p) || *(p + 1) != '[') {
+ /* Exim style ([ip]:port helo=hostname) */
+ if (*s == ':' && (g_ascii_isspace (*p) || *p == ')')) {
+ /* Ip ending */
+ is_exim = TRUE;
+ state = RSPAMD_RECV_STATE_SKIP_SPACES;
+ next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
+ }
+ else if (p - s == 4 && memcmp (s, "helo=", 5) == 0) {
+ p ++;
+ is_exim = TRUE;
+ if (r->real_hostname == NULL && r->from_hostname != NULL) {
+ r->real_hostname = r->from_hostname;
+ }
+ s = p;
+ while (*p != ')' && !g_ascii_isspace (*p) && *p != '\0') {
+ p ++;
+ }
+ if (p > s) {
+ r->from_hostname = rspamd_mempool_alloc (pool, p - s + 1);
+ rspamd_strlcpy (r->from_hostname, s, p - s + 1);
+ }
+ }
+ else if (p - s == 4 && memcmp (s, "port=", 5) == 0) {
+ p ++;
+ is_exim = TRUE;
+ while (g_ascii_isdigit (*p)) {
+ p ++;
+ }
+ state = RSPAMD_RECV_STATE_SKIP_SPACES;
+ next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
+ }
+ else if (*p == '=' && is_exim) {
+ /* Just skip unknown pairs */
+ p ++;
+ while (!g_ascii_isspace (*p) && *p != ')' && *p != '\0') {
+ p ++;
+ }
+ state = RSPAMD_RECV_STATE_SKIP_SPACES;
+ next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
+ }
+ else {
+ /* skip all */
+ while (*p++ != ')' && *p != '\0');
+ state = RSPAMD_RECV_STATE_IP_BLOCK;
+ }
+ }
+ else {
+ /* Postfix style (hostname [ip]) */
+ t = *p;
+ *p = '\0';
+ r->real_hostname = rspamd_mempool_strdup (pool, s);
+ *p = t;
+ /* Now parse ip */
+ p += 2;
+ s = p;
+ res = &r->real_ip;
+ state = RSPAMD_RECV_STATE_PARSE_IP;
+ next_state = RSPAMD_RECV_STATE_BRACES_BLOCK;
+ continue;
+ }
+ if (*p == ')') {
+ p ++;
+ state = RSPAMD_RECV_STATE_SKIP_SPACES;
+ next_state = RSPAMD_RECV_STATE_IP_BLOCK;
+ }
+ }
+ else if (*p == ')') {
+ p ++;
+ state = RSPAMD_RECV_STATE_SKIP_SPACES;
+ next_state = RSPAMD_RECV_STATE_IP_BLOCK;
+ }
+ else {
+ r->is_error = 1;
+ return;
+ }
+ }
+ break;
+ /* Got by word */
+ case RSPAMD_RECV_STATE_BY_BLOCK:
+ /* Here can be only hostname */
+ if ((g_ascii_isalnum (*p) || *p == '.' || *p == '-'
+ || *p == '_') && p[1] != '\0') {
+ p++;
+ }
+ else {
+ /* We got something like hostname */
+ if (p[1] != '\0') {
+ t = *p;
+ *p = '\0';
+ r->by_hostname = rspamd_mempool_strdup (pool, s);
+ *p = t;
+ }
+ else {
+ r->by_hostname = rspamd_mempool_strdup (pool, s);
+ }
+ /* Now end of parsing */
+ if (is_exim) {
+ /* Adjust for exim received */
+ if (r->real_ip == NULL && r->from_ip != NULL) {
+ r->real_ip = r->from_ip;
+ }
+ else if (r->from_ip == NULL && r->real_ip != NULL) {
+ r->from_ip = r->real_ip;
+ if (r->real_hostname == NULL && r->from_hostname != NULL) {
+ r->real_hostname = r->from_hostname;
+ }
+ }
+ }
+ return;
+ }
+ break;
+
+ /* Extract ip */
+ case RSPAMD_RECV_STATE_PARSE_IP:
+ while (g_ascii_isxdigit (*p) || *p == '.' || *p == ':') {
+ p ++;
+ }
+ if (*p != ']') {
+ /* Not an ip in fact */
+ state = RSPAMD_RECV_STATE_SKIP_SPACES;
+ p++;
+ }
+ else {
+ *p = '\0';
+ *res = rspamd_mempool_strdup (pool, s);
+ *p = ']';
+ p++;
+ state = RSPAMD_RECV_STATE_SKIP_SPACES;
+ }
+ break;
+
+ /* Skip spaces */
+ case RSPAMD_RECV_STATE_SKIP_SPACES:
+ if (!g_ascii_isspace (*p)) {
+ state = next_state;
+ s = p;
+ }
+ else {
+ p++;
+ }
+ break;
+ default:
+ r->is_error = 1;
+ return;
+ break;
+ }
+ }
+
+ r->is_error = 1;
+ return;
+}
+
+/* Convert raw headers to a list of struct raw_header * */
+static void
+process_raw_headers (struct rspamd_task *task)
+{
+ struct raw_header *new = NULL, *lp;
+ gchar *p, *c, *tmp, *tp;
+ gint state = 0, l, next_state = 100, err_state = 100, t_state;
+ gboolean valid_folding = FALSE;
+
+ p = task->raw_headers_str;
+ c = p;
+ while (*p) {
+ /* FSM for processing headers */
+ switch (state) {
+ case 0:
+ /* Begin processing headers */
+ if (!g_ascii_isalpha (*p)) {
+ /* We have some garbage at the beginning of headers, skip this line */
+ state = 100;
+ next_state = 0;
+ }
+ else {
+ state = 1;
+ c = p;
+ }
+ break;
+ case 1:
+ /* We got something like header's name */
+ if (*p == ':') {
+ new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct raw_header));
+ l = p - c;
+ tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+ rspamd_strlcpy (tmp, c, l + 1);
+ new->name = tmp;
+ new->empty_separator = TRUE;
+ p ++;
+ state = 2;
+ c = p;
+ }
+ else if (g_ascii_isspace (*p)) {
+ /* Not header but some garbage */
+ state = 100;
+ next_state = 0;
+ }
+ else {
+ p ++;
+ }
+ break;
+ case 2:
+ /* We got header's name, so skip any \t or spaces */
+ if (*p == '\t') {
+ new->tab_separated = TRUE;
+ new->empty_separator = FALSE;
+ p ++;
+ }
+ else if (*p == ' ') {
+ new->empty_separator = FALSE;
+ p ++;
+ }
+ else if (*p == '\n' || *p == '\r') {
+ /* Process folding */
+ state = 99;
+ l = p - c;
+ if (l > 0) {
+ tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+ rspamd_strlcpy (tmp, c, l + 1);
+ new->separator = tmp;
+ }
+ next_state = 3;
+ err_state = 5;
+ c = p;
+ }
+ else {
+ /* Process value */
+ l = p - c;
+ if (l >= 0) {
+ tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+ rspamd_strlcpy (tmp, c, l + 1);
+ new->separator = tmp;
+ }
+ c = p;
+ state = 3;
+ }
+ break;
+ case 3:
+ if (*p == '\r' || *p == '\n') {
+ /* Hold folding */
+ state = 99;
+ next_state = 3;
+ err_state = 4;
+ }
+ else if (*(p + 1) == '\0') {
+ state = 4;
+ }
+ else {
+ p ++;
+ }
+ break;
+ case 4:
+ /* Copy header's value */
+ l = p - c;
+ tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+ tp = tmp;
+ t_state = 0;
+ while (l --) {
+ if (t_state == 0) {
+ /* Before folding */
+ if (*c == '\n' || *c == '\r') {
+ t_state = 1;
+ c ++;
+ *tp ++ = ' ';
+ }
+ else {
+ *tp ++ = *c ++;
+ }
+ }
+ else if (t_state == 1) {
+ /* Inside folding */
+ if (g_ascii_isspace (*c)) {
+ c++;
+ }
+ else {
+ t_state = 0;
+ *tp ++ = *c ++;
+ }
+ }
+ }
+ /* Strip last space that can be added by \r\n parsing */
+ if (*(tp - 1) == ' ') {
+ tp --;
+ }
+ *tp = '\0';
+ new->value = tmp;
+ new->next = NULL;
+ if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) {
+ while (lp->next != NULL) {
+ lp = lp->next;
+ }
+ lp->next = new;
+ }
+ else {
+ g_hash_table_insert (task->raw_headers, new->name, new);
+ }
+ debug_task ("add raw header %s: %s", new->name, new->value);
+ state = 0;
+ break;
+ case 5:
+ /* Header has only name, no value */
+ new->next = NULL;
+ new->value = "";
+ if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) {
+ while (lp->next != NULL) {
+ lp = lp->next;
+ }
+ lp->next = new;
+ }
+ else {
+ g_hash_table_insert (task->raw_headers, new->name, new);
+ }
+ state = 0;
+ debug_task ("add raw header %s: %s", new->name, new->value);
+ break;
+ case 99:
+ /* Folding state */
+ if (*(p + 1) == '\0') {
+ state = err_state;
+ }
+ else {
+ if (*p == '\r' || *p == '\n') {
+ p ++;
+ valid_folding = FALSE;
+ }
+ else if (*p == '\t' || *p == ' ') {
+ /* Valid folding */
+ p ++;
+ valid_folding = TRUE;
+ }
+ else {
+ if (valid_folding) {
+ debug_task ("go to state: %d->%d", state, next_state);
+ state = next_state;
+ }
+ else {
+ /* Fall back */
+ debug_task ("go to state: %d->%d", state, err_state);
+ state = err_state;
+ }
+ }
+ }
+ break;
+ case 100:
+ /* Fail state, skip line */
+ if (*p == '\r') {
+ if (*(p + 1) == '\n') {
+ p ++;
+ }
+ p ++;
+ state = next_state;
+ }
+ else if (*p == '\n') {
+ if (*(p + 1) == '\r') {
+ p ++;
+ }
+ p ++;
+ state = next_state;
+ }
+ else if (*(p + 1) == '\0') {
+ state = next_state;
+ p ++;
+ }
+ else {
+ p ++;
+ }
+ break;
+ }
+ }
+}
+
+static void
+free_byte_array_callback (void *pointer)
+{
+ GByteArray *arr = (GByteArray *) pointer;
+ g_byte_array_free (arr, TRUE);
+}
+
+static GByteArray *
+convert_text_to_utf (struct rspamd_task *task, GByteArray * part_content, GMimeContentType * type, struct mime_text_part *text_part)
+{
+ GError *err = NULL;
+ gsize read_bytes, write_bytes;
+ const gchar *charset;
+ gchar *res_str;
+ GByteArray *result_array;
+
+ if (task->cfg->raw_mode) {
+ text_part->is_raw = TRUE;
+ return part_content;
+ }
+
+ if ((charset = g_mime_content_type_get_parameter (type, "charset")) == NULL) {
+ text_part->is_raw = TRUE;
+ return part_content;
+ }
+
+ if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) {
+ if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
+ text_part->is_raw = FALSE;
+ text_part->is_utf = TRUE;
+ return part_content;
+ }
+ else {
+ msg_info ("<%s>: contains invalid utf8 characters, assume it as raw", task->message_id);
+ text_part->is_raw = TRUE;
+ return part_content;
+ }
+ }
+
+ res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err);
+ if (res_str == NULL) {
+ msg_warn ("<%s>: cannot convert from %s to utf8: %s", task->message_id, charset, err ? err->message : "unknown problem");
+ text_part->is_raw = TRUE;
+ return part_content;
+ }
+
+ result_array = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray));
+ result_array->data = res_str;
+ result_array->len = write_bytes;
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, res_str);
+ text_part->is_raw = FALSE;
+ text_part->is_utf = TRUE;
+
+ return result_array;
+}
+
+static void
+process_text_part (struct rspamd_task *task, GByteArray *part_content, GMimeContentType *type,
+ GMimeObject *part, GMimeObject *parent, gboolean is_empty)
+{
+ struct mime_text_part *text_part;
+ const gchar *cd;
+
+ /* Skip attachements */
+#ifndef GMIME24
+ cd = g_mime_part_get_content_disposition (GMIME_PART (part));
+ if (cd && g_ascii_strcasecmp (cd, "attachment") == 0 && !task->cfg->check_text_attachements) {
+ debug_task ("skip attachments for checking as text parts");
+ return;
+ }
+#else
+ cd = g_mime_object_get_disposition (GMIME_OBJECT (part));
+ if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0 && !task->cfg->check_text_attachements) {
+ debug_task ("skip attachments for checking as text parts");
+ return;
+ }
+#endif
+
+ if (g_mime_content_type_is_type (type, "text", "html") || g_mime_content_type_is_type (type, "text", "xhtml")) {
+ debug_task ("got urls from text/html part");
+
+ text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part));
+ text_part->is_html = TRUE;
+ if (is_empty) {
+ text_part->is_empty = TRUE;
+ text_part->orig = NULL;
+ text_part->content = NULL;
+ task->text_parts = g_list_prepend (task->text_parts, text_part);
+ return;
+ }
+ text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
+ text_part->is_balanced = TRUE;
+ text_part->html_nodes = NULL;
+ text_part->parent = parent;
+
+ text_part->content = strip_html_tags (task, task->task_pool, text_part, text_part->orig, NULL);
+
+ if (text_part->html_nodes == NULL) {
+ url_parse_text (task->task_pool, task, text_part, FALSE);
+ }
+ else {
+ decode_entitles (text_part->content->data, &text_part->content->len);
+ url_parse_text (task->task_pool, task, text_part, FALSE);
+#if 0
+ url_parse_text (task->task_pool, task, text_part, TRUE);
+#endif
+ }
+
+ fuzzy_init_part (text_part, task->task_pool, task->cfg->max_diff);
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) free_byte_array_callback, text_part->content);
+ task->text_parts = g_list_prepend (task->text_parts, text_part);
+ }
+ else if (g_mime_content_type_is_type (type, "text", "*")) {
+ debug_task ("got urls from text/plain part");
+
+ text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part));
+ text_part->is_html = FALSE;
+ text_part->parent = parent;
+ if (is_empty) {
+ text_part->is_empty = TRUE;
+ text_part->orig = NULL;
+ text_part->content = NULL;
+ task->text_parts = g_list_prepend (task->text_parts, text_part);
+ return;
+ }
+ text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
+ text_part->content = text_part->orig;
+ url_parse_text (task->task_pool, task, text_part, FALSE);
+ fuzzy_init_part (text_part, task->task_pool, task->cfg->max_diff);
+ task->text_parts = g_list_prepend (task->text_parts, text_part);
+ }
+}
+
+#ifdef GMIME24
+static void
+mime_foreach_callback (GMimeObject * parent, GMimeObject * part, gpointer user_data)
+#else
+static void
+mime_foreach_callback (GMimeObject * part, gpointer user_data)
+#endif
+{
+ struct rspamd_task *task = (struct rspamd_task *)user_data;
+ struct mime_part *mime_part;
+ GMimeContentType *type;
+ GMimeDataWrapper *wrapper;
+ GMimeStream *part_stream;
+ GByteArray *part_content;
+
+ task->parts_count++;
+
+ /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */
+
+ /* find out what class 'part' is... */
+ if (GMIME_IS_MESSAGE_PART (part)) {
+ /* message/rfc822 or message/news */
+ GMimeMessage *message;
+
+ /* g_mime_message_foreach_part() won't descend into
+ child message parts, so if we want to count any
+ subparts of this child message, we'll have to call
+ g_mime_message_foreach_part() again here. */
+
+ message = g_mime_message_part_get_message ((GMimeMessagePart *) part);
+ if (task->parser_recursion++ < RECURSION_LIMIT) {
+#ifdef GMIME24
+ g_mime_message_foreach (message, mime_foreach_callback, task);
+#else
+ g_mime_message_foreach_part (message, mime_foreach_callback, task);
+#endif
+ }
+ else {
+ msg_err ("endless recursion detected: %d", task->parser_recursion);
+ return;
+ }
+#ifndef GMIME24
+ g_object_unref (message);
+#endif
+ }
+ else if (GMIME_IS_MESSAGE_PARTIAL (part)) {
+ /* message/partial */
+
+ /* this is an incomplete message part, probably a
+ large message that the sender has broken into
+ smaller parts and is sending us bit by bit. we
+ could save some info about it so that we could
+ piece this back together again once we get all the
+ parts? */
+ }
+ else if (GMIME_IS_MULTIPART (part)) {
+ /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */
+ task->parser_parent_part = part;
+#ifndef GMIME24
+ debug_task ("detected multipart part");
+ /* we'll get to finding out if this is a signed/encrypted multipart later... */
+ if (task->parser_recursion++ < RECURSION_LIMIT) {
+ g_mime_multipart_foreach ((GMimeMultipart *) part, mime_foreach_callback, task);
+ }
+ else {
+ msg_err ("endless recursion detected: %d", task->parser_recursion);
+ return;
+ }
+#endif
+ }
+ else if (GMIME_IS_PART (part)) {
+ /* a normal leaf part, could be text/plain or image/jpeg etc */
+#ifdef GMIME24
+ type = (GMimeContentType *) g_mime_object_get_content_type (GMIME_OBJECT (part));
+#else
+ type = (GMimeContentType *) g_mime_part_get_content_type (GMIME_PART (part));
+#endif
+ if (type == NULL) {
+ msg_warn ("type of part is unknown, assume text/plain");
+ type = g_mime_content_type_new ("text", "plain");
+#ifdef GMIME24
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, type);
+#else
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_mime_content_type_destroy, type);
+#endif
+ }
+ wrapper = g_mime_part_get_content_object (GMIME_PART (part));
+#ifdef GMIME24
+ if (wrapper != NULL && GMIME_IS_DATA_WRAPPER (wrapper)) {
+#else
+ if (wrapper != NULL) {
+#endif
+ part_stream = g_mime_stream_mem_new ();
+ if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) {
+ g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (part_stream), FALSE);
+ part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream));
+ g_object_unref (part_stream);
+ mime_part = rspamd_mempool_alloc (task->task_pool, sizeof (struct mime_part));
+ mime_part->type = type;
+ mime_part->content = part_content;
+ mime_part->parent = task->parser_parent_part;
+ mime_part->filename = g_mime_part_get_filename (GMIME_PART (part));
+ debug_task ("found part with content-type: %s/%s", type->type, type->subtype);
+ task->parts = g_list_prepend (task->parts, mime_part);
+ /* Skip empty parts */
+ process_text_part (task, part_content, type, part, task->parser_parent_part, (part_content->len <= 0));
+ }
+ else {
+ msg_warn ("write to stream failed: %d, %s", errno, strerror (errno));
+ }
+#ifndef GMIME24
+ g_object_unref (wrapper);
+#endif
+ }
+ else {
+ msg_warn ("cannot get wrapper for mime part, type of part: %s/%s", type->type, type->subtype);
+ }
+ }
+ else {
+ g_assert_not_reached ();
+ }
+}
+
+static void
+destroy_message (void *pointer)
+{
+ GMimeMessage *msg = pointer;
+
+ msg_debug ("freeing pointer %p", msg);
+ g_object_unref (msg);
+}
+
+gint
+process_message (struct rspamd_task *task)
+{
+ GMimeMessage *message;
+ GMimeParser *parser;
+ GMimeStream *stream;
+ GByteArray *tmp;
+ GList *first, *cur;
+ GMimePart *part;
+ GMimeDataWrapper *wrapper;
+ struct received_header *recv;
+ gchar *mid, *url_str, *p, *end, *url_end;
+ struct uri *subject_url;
+ gsize len;
+ gint rc;
+
+ tmp = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray));
+ tmp->data = task->msg->str;
+ tmp->len = task->msg->len;
+
+ stream = g_mime_stream_mem_new_with_byte_array (tmp);
+ /*
+ * This causes g_mime_stream not to free memory by itself as it is memory allocated by
+ * pool allocator
+ */
+ g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE);
+
+ if (task->is_mime) {
+
+ debug_task ("construct mime parser from string length %d", (gint)task->msg->len);
+ /* create a new parser object to parse the stream */
+ parser = g_mime_parser_new_with_stream (stream);
+ g_object_unref (stream);
+
+ /* parse the message from the stream */
+ message = g_mime_parser_construct_message (parser);
+
+ if (message == NULL) {
+ msg_warn ("cannot construct mime from stream");
+ return -1;
+ }
+
+ task->message = message;
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) destroy_message, task->message);
+
+ /* Save message id for future use */
+ task->message_id = g_mime_message_get_message_id (task->message);
+ if (task->message_id == NULL) {
+ task->message_id = "undef";
+ }
+
+ task->parser_recursion = 0;
+#ifdef GMIME24
+ g_mime_message_foreach (message, mime_foreach_callback, task);
+#else
+ /*
+ * This is rather strange, but gmime 2.2 do NOT pass top-level part to foreach callback
+ * so we need to set up parent part by hands
+ */
+ task->parser_parent_part = g_mime_message_get_mime_part (message);
+ g_object_unref (task->parser_parent_part);
+ g_mime_message_foreach_part (message, mime_foreach_callback, task);
+#endif
+
+ debug_task ("found %d parts in message", task->parts_count);
+ if (task->queue_id == NULL) {
+ task->queue_id = "undef";
+ }
+
+#ifdef GMIME24
+ task->raw_headers_str = g_mime_object_get_headers (GMIME_OBJECT (task->message));
+#else
+ task->raw_headers_str = g_mime_message_get_headers (task->message);
+#endif
+
+ process_images (task);
+
+ /* Parse received headers */
+ first = message_get_header (task->task_pool, message, "Received", FALSE);
+ cur = first;
+ while (cur) {
+ recv = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct received_header));
+ parse_recv_header (task->task_pool, cur->data, recv);
+ task->received = g_list_prepend (task->received, recv);
+ cur = g_list_next (cur);
+ }
+ if (first) {
+ g_list_free (first);
+ }
+
+ if (task->raw_headers_str) {
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, task->raw_headers_str);
+ process_raw_headers (task);
+ }
+
+ task->rcpts = g_mime_message_get_all_recipients (message);
+ if (task->rcpts) {
+#ifdef GMIME24
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, task->rcpts);
+#else
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) internet_address_list_destroy, task->rcpts);
+#endif
+ }
+
+
+ /* free the parser (and the stream) */
+ g_object_unref (parser);
+ }
+ else {
+ /* We got only message, no mime headers or anything like this */
+ /* Construct fake message for it */
+ task->message = g_mime_message_new (TRUE);
+ if (task->from) {
+ g_mime_message_set_sender (task->message, task->from);
+ }
+ /* Construct part for it */
+ part = g_mime_part_new_with_type ("text", "html");
+#ifdef GMIME24
+ wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_CONTENT_ENCODING_8BIT);
+#else
+ wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_PART_ENCODING_8BIT);
+#endif
+ g_mime_part_set_content_object (part, wrapper);
+ g_mime_message_set_mime_part (task->message, GMIME_OBJECT (part));
+ /* Register destructors */
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, wrapper);
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, part);
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) destroy_message, task->message);
+ /* Now parse in a normal way */
+ task->parser_recursion = 0;
+#ifdef GMIME24
+ g_mime_message_foreach (task->message, mime_foreach_callback, task);
+#else
+ g_mime_message_foreach_part (task->message, mime_foreach_callback, task);
+#endif
+ /* Generate message ID */
+ mid = g_mime_utils_generate_message_id ("localhost.localdomain");
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, mid);
+ g_mime_message_set_message_id (task->message, mid);
+ task->message_id = mid;
+ task->queue_id = mid;
+ /* Set headers for message */
+ if (task->subject) {
+ g_mime_message_set_subject (task->message, task->subject);
+ }
+
+ /* Add recipients */
+#ifndef GMIME24
+ if (task->rcpt) {
+ cur = task->rcpt;
+ while (cur) {
+ g_mime_message_add_recipient (task->message, GMIME_RECIPIENT_TYPE_TO, NULL, (gchar *)cur->data);
+ cur = g_list_next (cur);
+ }
+ }
+#endif
+ }
+
+ /* Parse urls inside Subject header */
+ cur = message_get_header (task->task_pool, task->message, "Subject", FALSE);
+ if (cur) {
+ p = cur->data;
+ len = strlen (p);
+ end = p + len;
+
+ while (p < end) {
+ /* Search to the end of url */
+ if (url_try_text (task->task_pool, p, end - p, NULL, &url_end, &url_str, FALSE)) {
+ if (url_str != NULL) {
+ subject_url = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct uri));
+ if (subject_url != NULL) {
+ /* Try to parse url */
+ rc = parse_uri (subject_url, url_str, task->task_pool);
+ if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) &&
+ subject_url->hostlen > 0) {
+ if (subject_url->protocol != PROTOCOL_MAILTO) {
+ if (!g_tree_lookup (task->urls, subject_url)) {
+ g_tree_insert (task->urls, subject_url, subject_url);
+ }
+ }
+ }
+ else if (rc != URI_ERRNO_OK) {
+ msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc));
+ }
+ }
+ }
+ }
+ else {
+ break;
+ }
+ p = url_end + 1;
+ }
+ /* Free header's list */
+ g_list_free (cur);
+ }
+
+ return 0;
+}
+
+struct gmime_raw_header {
+ struct raw_header *next;
+ gchar *name;
+ gchar *value;
+};
+
+typedef struct _GMimeHeader {
+ GHashTable *hash;
+ GHashTable *writers;
+ struct raw_header *headers;
+} local_GMimeHeader;
+
+
+/* known header field types */
+enum {
+ HEADER_FROM = 0,
+ HEADER_REPLY_TO,
+ HEADER_TO,
+ HEADER_CC,
+ HEADER_BCC,
+ HEADER_SUBJECT,
+ HEADER_DATE,
+ HEADER_MESSAGE_ID,
+ HEADER_UNKNOWN
+};
+
+/*
+ * Iterate throught all headers and make a list
+ */
+#ifndef GMIME24
+static void
+header_iterate (rspamd_mempool_t * pool, struct gmime_raw_header *h, GList ** ret, const gchar *field, gboolean strong)
+{
+ while (h) {
+ if (G_LIKELY (!strong)) {
+ if (h->value && !g_ascii_strncasecmp (field, h->name, strlen (field))) {
+ if (pool != NULL) {
+ *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, h->value));
+ }
+ else {
+ *ret = g_list_prepend (*ret, g_strdup (h->value));
+ }
+ }
+ }
+ else {
+ if (h->value && !strncmp (field, h->name, strlen (field))) {
+ if (pool != NULL) {
+ *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, h->value));
+ }
+ else {
+ *ret = g_list_prepend (*ret, g_strdup (h->value));
+ }
+ }
+ }
+ h = (struct gmime_raw_header *)h->next;
+ }
+}
+#else
+static void
+header_iterate (rspamd_mempool_t * pool, GMimeHeaderList * ls, GList ** ret, const gchar *field, gboolean strong)
+{
+ /* Use iterator in case of gmime 2.4 */
+ GMimeHeaderIter *iter;
+ const gchar *name;
+
+ if (ls == NULL) {
+ *ret = NULL;
+ return;
+ }
+
+ iter = g_mime_header_iter_new ();
+ if (g_mime_header_list_get_iter (ls, iter) && g_mime_header_iter_first (iter)) {
+ /* Iterate throught headers */
+ while (g_mime_header_iter_is_valid (iter)) {
+ name = g_mime_header_iter_get_name (iter);
+ if (G_LIKELY (!strong)) {
+ if (!g_ascii_strncasecmp (field, name, strlen (name))) {
+ if (pool != NULL) {
+ *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, g_mime_header_iter_get_value (iter)));
+ }
+ else {
+ *ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter)));
+ }
+ }
+ }
+ else {
+ if (!strncmp (field, name, strlen (name))) {
+ if (pool != NULL) {
+ *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, g_mime_header_iter_get_value (iter)));
+ }
+ else {
+ *ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter)));
+ }
+ }
+ }
+ if (!g_mime_header_iter_next (iter)) {
+ break;
+ }
+ }
+ }
+ g_mime_header_iter_free (iter);
+}
+#endif
+
+
+struct multipart_cb_data {
+ GList *ret;
+ rspamd_mempool_t *pool;
+ const gchar *field;
+ gboolean try_search;
+ gboolean strong;
+ gint rec;
+};
+
+#define MAX_REC 10
+
+static void
+#ifdef GMIME24
+multipart_iterate (GMimeObject * parent, GMimeObject * part, gpointer user_data)
+#else
+multipart_iterate (GMimeObject * part, gpointer user_data)
+#endif
+{
+ struct multipart_cb_data *data = user_data;
+#ifndef GMIME24
+ struct gmime_raw_header *h;
+#endif
+ GList *l = NULL;
+
+ if (data->try_search && part != NULL && GMIME_IS_PART (part)) {
+#ifdef GMIME24
+ GMimeHeaderList *ls;
+
+ ls = g_mime_object_get_header_list (GMIME_OBJECT (part));
+ header_iterate (data->pool, ls, &l, data->field, data->strong);
+#else
+ h = (struct gmime_raw_header *)part->headers->headers;
+ header_iterate (data->pool, h, &l, data->field, data->strong);
+#endif
+ if (l == NULL) {
+ /* Header not found, abandon search results */
+ data->try_search = FALSE;
+ g_list_free (data->ret);
+ data->ret = NULL;
+ }
+ else {
+ data->ret = g_list_concat (l, data->ret);
+ }
+ }
+ else if (data->try_search && GMIME_IS_MULTIPART (part)) {
+ /* Maybe endless recursion here ? */
+ if (data->rec++ < MAX_REC) {
+ g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, data);
+ }
+ else {
+ msg_info ("maximum recurse limit is over, stop recursing, %d", data->rec);
+ data->try_search = FALSE;
+ }
+ }
+}
+
+static GList *
+local_message_get_header (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong)
+{
+ GList *gret = NULL;
+ GMimeObject *part;
+ struct multipart_cb_data cb = {
+ .try_search = TRUE,
+ .rec = 0,
+ .ret = NULL,
+ };
+ cb.pool = pool;
+ cb.field = field;
+ cb.strong = strong;
+
+#ifndef GMIME24
+ struct gmime_raw_header *h;
+
+ if (field == NULL) {
+ return NULL;
+ }
+
+ msg_debug ("iterate over headers to find header %s", field);
+ h = (struct gmime_raw_header *) (GMIME_OBJECT (message)->headers->headers);
+ header_iterate (pool, h, &gret, field, strong);
+
+ if (gret == NULL) {
+ /* Try to iterate with mime part headers */
+ msg_debug ("iterate over headers of mime part to find header %s", field);
+ part = g_mime_message_get_mime_part (message);
+ if (part) {
+ h = (struct gmime_raw_header *)part->headers->headers;
+ header_iterate (pool, h, &gret, field, strong);
+ if (gret == NULL && GMIME_IS_MULTIPART (part)) {
+ msg_debug ("iterate over headers of each multipart's subparts %s", field);
+ g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb);
+ if (cb.ret != NULL) {
+ gret = cb.ret;
+ }
+ }
+#ifndef GMIME24
+ g_object_unref (part);
+#endif
+ }
+ }
+
+ return gret;
+#else
+ GMimeHeaderList *ls;
+
+ ls = g_mime_object_get_header_list (GMIME_OBJECT (message));
+ header_iterate (pool, ls, &gret, field, strong);
+ if (gret == NULL) {
+ /* Try to iterate with mime part headers */
+ part = g_mime_message_get_mime_part (message);
+ if (part) {
+ ls = g_mime_object_get_header_list (GMIME_OBJECT (part));
+ header_iterate (pool, ls, &gret, field, strong);
+ if (gret == NULL && GMIME_IS_MULTIPART (part)) {
+ g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb);
+ if (cb.ret != NULL) {
+ gret = cb.ret;
+ }
+ }
+#ifndef GMIME24
+ g_object_unref (part);
+#endif
+ }
+ }
+
+
+ return gret;
+#endif
+}
+
+/**
+* g_mime_message_set_date_from_string: Set the message sent-date
+* @message: MIME Message
+* @string: A string of date
+*
+* Set the sent-date on a MIME Message.
+**/
+void
+local_mime_message_set_date_from_string (GMimeMessage * message, const gchar * string)
+{
+ time_t date;
+ gint offset = 0;
+
+ date = g_mime_utils_header_decode_date (string, &offset);
+ g_mime_message_set_date (message, date, offset);
+}
+
+/*
+ * Replacements for standart gmime functions but converting adresses to IA
+ */
+static const gchar *
+local_message_get_sender (GMimeMessage * message)
+{
+ gchar *res;
+ const gchar *from = g_mime_message_get_sender (message);
+ InternetAddressList *ia;
+
+#ifndef GMIME24
+ ia = internet_address_parse_string (from);
+#else
+ ia = internet_address_list_parse_string (from);
+#endif
+ if (!ia) {
+ return NULL;
+ }
+ res = internet_address_list_to_string (ia, FALSE);
+#ifndef GMIME24
+ internet_address_list_destroy (ia);
+#else
+ g_object_unref (ia);
+#endif
+
+ return res;
+}
+
+static const gchar *
+local_message_get_reply_to (GMimeMessage * message)
+{
+ gchar *res;
+ const gchar *from = g_mime_message_get_reply_to (message);
+ InternetAddressList *ia;
+
+#ifndef GMIME24
+ ia = internet_address_parse_string (from);
+#else
+ ia = internet_address_list_parse_string (from);
+#endif
+ if (!ia) {
+ return NULL;
+ }
+ res = internet_address_list_to_string (ia, FALSE);
+#ifndef GMIME24
+ internet_address_list_destroy (ia);
+#else
+ g_object_unref (ia);
+#endif
+
+ return res;
+}
+
+#ifdef GMIME24
+
+# define ADD_RECIPIENT_TEMPLATE(type,def) \
+static void \
+local_message_add_recipients_from_string_##type (GMimeMessage *message, const gchar *string, const gchar *value) \
+{ \
+ InternetAddressList *il, *new; \
+ \
+ il = g_mime_message_get_recipients (message, (def)); \
+ new = internet_address_list_parse_string (string); \
+ internet_address_list_append (il, new); \
+} \
+
+ADD_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO)
+ ADD_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC)
+ ADD_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC)
+# define GET_RECIPIENT_TEMPLATE(type,def) \
+static InternetAddressList* \
+local_message_get_recipients_##type (GMimeMessage *message, const gchar *unused) \
+{ \
+ return g_mime_message_get_recipients (message, (def)); \
+}
+ GET_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO)
+ GET_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC)
+ GET_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC)
+#endif
+/* different declarations for different types of set and get functions */
+ typedef const gchar *(*GetFunc) (GMimeMessage * message);
+ typedef InternetAddressList *(*GetRcptFunc) (GMimeMessage * message, const gchar *type);
+ typedef GList *(*GetListFunc) (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *type, gboolean strong);
+ typedef void (*SetFunc) (GMimeMessage * message, const gchar *value);
+ typedef void (*SetListFunc) (GMimeMessage * message, const gchar *field, const gchar *value);
+
+/** different types of functions
+*
+* FUNC_CHARPTR
+* - function with no arguments
+* - get returns gchar*
+*
+* FUNC_IA (from Internet Address)
+* - function with additional "field" argument from the fieldfunc table,
+* - get returns Glist*
+*
+* FUNC_LIST
+* - function with additional "field" argument (given arbitrary header field name)
+* - get returns Glist*
+**/
+ enum {
+ FUNC_CHARPTR = 0,
+ FUNC_CHARFREEPTR,
+ FUNC_IA,
+ FUNC_LIST
+ };
+
+/**
+* fieldfunc struct: structure of MIME fields and corresponding get and set
+* functions.
+**/
+ static struct {
+ gchar *name;
+ GetFunc func;
+ GetRcptFunc rcptfunc;
+ GetListFunc getlistfunc;
+ SetFunc setfunc;
+ SetListFunc setlfunc;
+ gint functype;
+ } fieldfunc[] =
+{
+ {
+ "From", local_message_get_sender, NULL, NULL, g_mime_message_set_sender, NULL, FUNC_CHARFREEPTR}, {
+ "Reply-To", local_message_get_reply_to, NULL, NULL, g_mime_message_set_reply_to, NULL, FUNC_CHARFREEPTR},
+#ifndef GMIME24
+ {
+ "To", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, {
+ "Cc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, {
+ "Bcc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, {
+ "Date", (GetFunc) g_mime_message_get_date_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR},
+#else
+ {
+ "To", NULL, local_message_get_recipients_to, NULL, NULL, local_message_add_recipients_from_string_to, FUNC_IA}, {
+ "Cc", NULL, local_message_get_recipients_cc, NULL, NULL, local_message_add_recipients_from_string_cc, FUNC_IA}, {
+ "Bcc", NULL, local_message_get_recipients_bcc, NULL, NULL, local_message_add_recipients_from_string_bcc, FUNC_IA}, {
+ "Date", (GetFunc)g_mime_message_get_date_as_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR},
+#endif
+ {
+ "Subject", g_mime_message_get_subject, NULL, NULL, g_mime_message_set_subject, NULL, FUNC_CHARPTR}, {
+ "Message-Id", g_mime_message_get_message_id, NULL, NULL, g_mime_message_set_message_id, NULL, FUNC_CHARPTR},
+#ifndef GMIME24
+ {
+ NULL, NULL, NULL, local_message_get_header, NULL, g_mime_message_add_header, FUNC_LIST}
+#else
+ {
+ NULL, NULL, NULL, local_message_get_header, NULL, (SetListFunc)g_mime_object_append_header, FUNC_LIST}
+#endif
+};
+
+/**
+* message_set_header: set header of any type excluding special (Content- and MIME-Version:)
+**/
+void
+message_set_header (GMimeMessage * message, const gchar *field, const gchar *value)
+{
+ gint i;
+
+ if (!g_ascii_strcasecmp (field, "MIME-Version:") || !g_ascii_strncasecmp (field, "Content-", 8)) {
+ return;
+ }
+ for (i = 0; i <= HEADER_UNKNOWN; ++i) {
+ if (!fieldfunc[i].name || !g_ascii_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) {
+ switch (fieldfunc[i].functype) {
+ case FUNC_CHARPTR:
+ (*(fieldfunc[i].setfunc)) (message, value);
+ break;
+ case FUNC_IA:
+ (*(fieldfunc[i].setlfunc)) (message, fieldfunc[i].name, value);
+ break;
+ case FUNC_LIST:
+ (*(fieldfunc[i].setlfunc)) (message, field, value);
+ break;
+ }
+ break;
+ }
+ }
+}
+
+
+/**
+* message_get_header: returns the list of 'any header' values
+* (except of unsupported yet Content- and MIME-Version special headers)
+*
+* You should free the GList list by yourself.
+**/
+GList *
+message_get_header (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong)
+{
+ gint i;
+ gchar *ret = NULL, *ia_string;
+ GList *gret = NULL;
+ InternetAddressList *ia_list = NULL, *ia;
+
+ for (i = 0; i <= HEADER_UNKNOWN; ++i) {
+ if (!fieldfunc[i].name || !g_ascii_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) {
+ switch (fieldfunc[i].functype) {
+ case FUNC_CHARFREEPTR:
+ ret = (gchar *)(*(fieldfunc[i].func)) (message);
+ break;
+ case FUNC_CHARPTR:
+ ret = (gchar *)(*(fieldfunc[i].func)) (message);
+ break;
+ case FUNC_IA:
+ ia_list = (*(fieldfunc[i].rcptfunc)) (message, field);
+ ia = ia_list;
+#ifndef GMIME24
+ while (ia && ia->address) {
+
+ ia_string = internet_address_to_string ((InternetAddress *) ia->address, FALSE);
+ if (pool != NULL) {
+ rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_free, ia_string);
+ }
+ gret = g_list_prepend (gret, ia_string);
+ ia = ia->next;
+ }
+#else
+ i = internet_address_list_length (ia);
+ while (--i >= 0) {
+ ia_string = internet_address_to_string (internet_address_list_get_address (ia, i), FALSE);
+ if (pool != NULL) {
+ rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_free, ia_string);
+ }
+ gret = g_list_prepend (gret, ia_string);
+ }
+#endif
+ break;
+ case FUNC_LIST:
+ gret = (*(fieldfunc[i].getlistfunc)) (pool, message, field, strong);
+ break;
+ }
+ break;
+ }
+ }
+ if (gret == NULL && ret != NULL) {
+ if (pool != NULL) {
+ gret = g_list_prepend (gret, rspamd_mempool_strdup (pool, ret));
+ }
+ else {
+ gret = g_list_prepend (gret, g_strdup (ret));
+ }
+ }
+ if (fieldfunc[i].functype == FUNC_CHARFREEPTR && ret) {
+ g_free (ret);
+ }
+
+ return gret;
+}
+
+GList*
+message_get_raw_header (struct rspamd_task *task, const gchar *field, gboolean strong)
+{
+ GList *gret = NULL;
+ struct raw_header *rh;
+
+ rh = g_hash_table_lookup (task->raw_headers, field);
+
+ if (rh == NULL) {
+ return NULL;
+ }
+
+ while (rh) {
+ if (strong) {
+ if (strcmp (rh->name, field) == 0) {
+ gret = g_list_prepend (gret, rh);
+ }
+ }
+ else {
+ if (g_ascii_strcasecmp (rh->name, field) == 0) {
+ gret = g_list_prepend (gret, rh);
+ }
+ }
+ rh = rh->next;
+ }
+
+ if (gret != NULL) {
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, gret);
+ }
+
+ return gret;
+}
diff --git a/src/libmime/message.h b/src/libmime/message.h
new file mode 100644
index 000000000..5e27579d1
--- /dev/null
+++ b/src/libmime/message.h
@@ -0,0 +1,91 @@
+/**
+ * @file message.h
+ * Message processing functions and structures
+ */
+
+#ifndef RSPAMD_MESSAGE_H
+#define RSPAMD_MESSAGE_H
+
+#include "config.h"
+#include "fuzzy.h"
+
+struct rspamd_task;
+struct controller_session;
+
+struct mime_part {
+ GMimeContentType *type;
+ GByteArray *content;
+ GMimeObject *parent;
+ gchar *checksum;
+ const gchar *filename;
+};
+
+struct mime_text_part {
+ gboolean is_html;
+ gboolean is_raw;
+ gboolean is_balanced;
+ gboolean is_empty;
+ gboolean is_utf;
+ const gchar *real_charset;
+ GByteArray *orig;
+ GByteArray *content;
+ GNode *html_nodes;
+ GList *urls_offset; /**< list of offsets of urls */
+ fuzzy_hash_t *fuzzy;
+ fuzzy_hash_t *double_fuzzy;
+ GMimeObject *parent;
+ GUnicodeScript script;
+ f_str_t *diff_str;
+};
+
+struct received_header {
+ gchar *from_hostname;
+ gchar *from_ip;
+ gchar *real_hostname;
+ gchar *real_ip;
+ gchar *by_hostname;
+ gint is_error;
+};
+
+struct raw_header {
+ gchar *name;
+ gchar *value;
+ gboolean tab_separated;
+ gboolean empty_separator;
+ gchar *separator;
+ struct raw_header *next;
+};
+
+/**
+ * Process message with all filters/statfiles, extract mime parts, urls and
+ * call metrics consolidation functions
+ * @param task worker_task object
+ * @return 0 if we have delayed filters to process and 1 if we have finished with processing
+ */
+gint process_message (struct rspamd_task *task);
+
+/*
+ * Set header with specified name and value
+ */
+void message_set_header (GMimeMessage *message, const gchar *field, const gchar *value);
+
+/*
+ * Get a list of header's values with specified header's name
+ * @param pool if not NULL this pool would be used for storing header's values
+ * @param message g_mime_message object
+ * @param field header's name
+ * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not
+ * @return A list of header's values or NULL. If list is not NULL it MUST be freed. If pool is NULL elements must be freed as well.
+ */
+GList* message_get_header (rspamd_mempool_t *pool, GMimeMessage *message, const gchar *field, gboolean strong);
+
+/*
+ * Get a list of header's values with specified header's name using raw headers
+ * @param task worker task structure
+ * @param field header's name
+ * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not
+ * @return A list of header's values or NULL. Unlike previous function it is NOT required to free list or values. I should rework one of these functions some time.
+ */
+GList* message_get_raw_header (struct rspamd_task *task, const gchar *field, gboolean strong);
+
+#endif
diff --git a/src/libmime/protocol.c b/src/libmime/protocol.c
new file mode 100644
index 000000000..8a5c3f0df
--- /dev/null
+++ b/src/libmime/protocol.c
@@ -0,0 +1,821 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "util.h"
+#include "cfg_file.h"
+#include "settings.h"
+#include "message.h"
+
+/* Max line size */
+#define OUTBUFSIZ BUFSIZ
+/*
+ * Just check if the passed message is spam or not and reply as
+ * described below
+ */
+#define MSG_CMD_CHECK "check"
+/*
+ * Check if message is spam or not, and return score plus list
+ * of symbols hit
+ */
+#define MSG_CMD_SYMBOLS "symbols"
+/*
+ * Check if message is spam or not, and return score plus report
+ */
+#define MSG_CMD_REPORT "report"
+/*
+ * Check if message is spam or not, and return score plus report
+ * if the message is spam
+ */
+#define MSG_CMD_REPORT_IFSPAM "report_ifspam"
+/*
+ * Ignore this message -- client opened connection then changed
+ */
+#define MSG_CMD_SKIP "skip"
+/*
+ * Return a confirmation that spamd is alive
+ */
+#define MSG_CMD_PING "ping"
+/*
+ * Process this message as described above and return modified message
+ */
+#define MSG_CMD_PROCESS "process"
+
+/*
+ * Learn specified statfile using message
+ */
+#define MSG_CMD_LEARN "learn"
+
+/*
+ * spamassassin greeting:
+ */
+#define SPAMC_GREETING "SPAMC"
+/*
+ * rspamd greeting:
+ */
+#define RSPAMC_GREETING "RSPAMC"
+/*
+ * Headers
+ */
+#define CONTENT_LENGTH_HEADER "Content-length"
+#define HELO_HEADER "Helo"
+#define FROM_HEADER "From"
+#define IP_ADDR_HEADER "IP"
+#define NRCPT_HEADER "Recipient-Number"
+#define RCPT_HEADER "Rcpt"
+#define SUBJECT_HEADER "Subject"
+#define STATFILE_HEADER "Statfile"
+#define QUEUE_ID_HEADER "Queue-ID"
+#define ERROR_HEADER "Error"
+#define USER_HEADER "User"
+#define PASS_HEADER "Pass"
+#define JSON_HEADER "Json"
+#define HOSTNAME_HEADER "Hostname"
+#define DELIVER_TO_HEADER "Deliver-To"
+#define NO_LOG_HEADER "Log"
+
+static GList *custom_commands = NULL;
+
+
+/*
+ * Remove <> from the fixed string and copy it to the pool
+ */
+static gchar *
+rspamd_protocol_escape_braces (GString *in)
+{
+ gint len = 0;
+ gchar *orig, *p;
+
+ orig = in->str;
+ while ((g_ascii_isspace (*orig) || *orig == '<') && orig - in->str < (gint)in->len) {
+ orig ++;
+ }
+
+ g_string_erase (in, 0, orig - in->str);
+
+ p = orig;
+ while ((!g_ascii_isspace (*p) && *p != '>') && p - in->str < (gint)in->len) {
+ p ++;
+ len ++;
+ }
+
+ g_string_truncate (in, len);
+
+ return in->str;
+}
+
+static gboolean
+rspamd_protocol_handle_url (struct rspamd_task *task, struct rspamd_http_message *msg)
+{
+ GList *cur;
+ struct custom_command *cmd;
+ const gchar *p;
+
+ if (msg->url == NULL || msg->url->len == 0) {
+ task->last_error = "command is absent";
+ task->error_code = 400;
+ return FALSE;
+ }
+
+ if (msg->url->str[0] == '/') {
+ p = &msg->url->str[1];
+ }
+ else {
+ p = msg->url->str;
+ }
+
+ switch (*p) {
+ case 'c':
+ case 'C':
+ /* check */
+ if (g_ascii_strcasecmp (p + 1, MSG_CMD_CHECK + 1) == 0) {
+ task->cmd = CMD_CHECK;
+ }
+ else {
+ goto err;
+ }
+ break;
+ case 's':
+ case 'S':
+ /* symbols, skip */
+ if (g_ascii_strcasecmp (p + 1, MSG_CMD_SYMBOLS + 1) == 0) {
+ task->cmd = CMD_SYMBOLS;
+ }
+ else if (g_ascii_strcasecmp (p + 1, MSG_CMD_SKIP + 1) == 0) {
+ task->cmd = CMD_SKIP;
+ }
+ else {
+ goto err;
+ }
+ break;
+ case 'p':
+ case 'P':
+ /* ping, process */
+ if (g_ascii_strcasecmp (p + 1, MSG_CMD_PING + 1) == 0) {
+ task->cmd = CMD_PING;
+ }
+ else if (g_ascii_strcasecmp (p + 1, MSG_CMD_PROCESS + 1) == 0) {
+ task->cmd = CMD_PROCESS;
+ }
+ else {
+ goto err;
+ }
+ break;
+ case 'r':
+ case 'R':
+ /* report, report_ifspam */
+ if (g_ascii_strcasecmp (p + 1, MSG_CMD_REPORT + 1) == 0) {
+ task->cmd = CMD_REPORT;
+ }
+ else if (g_ascii_strcasecmp (p + 1, MSG_CMD_REPORT_IFSPAM + 1) == 0) {
+ task->cmd = CMD_REPORT_IFSPAM;
+ }
+ else {
+ goto err;
+ }
+ break;
+ default:
+ cur = custom_commands;
+ while (cur) {
+ cmd = cur->data;
+ if (g_ascii_strcasecmp (p, cmd->name) == 0) {
+ task->cmd = CMD_OTHER;
+ task->custom_cmd = cmd;
+ break;
+ }
+ cur = g_list_next (cur);
+ }
+
+ if (cur == NULL) {
+ goto err;
+ }
+ break;
+ }
+
+ return TRUE;
+
+err:
+ debug_task ("bad command: %s", p);
+ task->last_error = "invalid command";
+ task->error_code = 400;
+ return FALSE;
+}
+
+static gboolean
+rspamd_protocol_handle_headers (struct rspamd_task *task, struct rspamd_http_message *msg)
+{
+ gchar *headern, *err, *tmp;
+ gboolean res = TRUE;
+ struct rspamd_http_header *h;
+
+ LL_FOREACH (msg->headers, h) {
+ headern = h->name->str;
+
+ switch (headern[0]) {
+ case 'd':
+ case 'D':
+ if (g_ascii_strcasecmp (headern, DELIVER_TO_HEADER) == 0) {
+ task->deliver_to = rspamd_protocol_escape_braces (h->value);
+ debug_task ("read deliver-to header, value: %s", task->deliver_to);
+ }
+ else {
+ debug_task ("wrong header: %s", headern);
+ res = FALSE;
+ }
+ break;
+ case 'h':
+ case 'H':
+ if (g_ascii_strcasecmp (headern, HELO_HEADER) == 0) {
+ task->helo = h->value->str;
+ debug_task ("read helo header, value: %s", task->helo);
+ }
+ else if (g_ascii_strcasecmp (headern, HOSTNAME_HEADER) == 0) {
+ task->hostname = h->value->str;
+ debug_task ("read hostname header, value: %s", task->hostname);
+ }
+ else {
+ debug_task ("wrong header: %s", headern);
+ res = FALSE;
+ }
+ break;
+ case 'f':
+ case 'F':
+ if (g_ascii_strcasecmp (headern, FROM_HEADER) == 0) {
+ task->from = rspamd_protocol_escape_braces (h->value);
+ debug_task ("read from header, value: %s", task->from);
+ }
+ else {
+ debug_task ("wrong header: %s", headern);
+ res = FALSE;
+ }
+ break;
+ case 'j':
+ case 'J':
+ if (g_ascii_strcasecmp (headern, JSON_HEADER) == 0) {
+ task->is_json = parse_flag (h->value->str);
+ }
+ else {
+ debug_task ("wrong header: %s", headern);
+ res = FALSE;
+ }
+ break;
+ case 'q':
+ case 'Q':
+ if (g_ascii_strcasecmp (headern, QUEUE_ID_HEADER) == 0) {
+ task->queue_id = h->value->str;
+ debug_task ("read queue_id header, value: %s", task->queue_id);
+ }
+ else {
+ debug_task ("wrong header: %s", headern);
+ res = FALSE;
+ }
+ break;
+ case 'r':
+ case 'R':
+ if (g_ascii_strcasecmp (headern, RCPT_HEADER) == 0) {
+ tmp = rspamd_protocol_escape_braces (h->value);
+ task->rcpt = g_list_prepend (task->rcpt, tmp);
+ debug_task ("read rcpt header, value: %s", tmp);
+ }
+ else if (g_ascii_strcasecmp (headern, NRCPT_HEADER) == 0) {
+ task->nrcpt = strtoul (h->value->str, &err, 10);
+ debug_task ("read rcpt header, value: %d", (gint)task->nrcpt);
+ }
+ else {
+ msg_info ("wrong header: %s", headern);
+ res = FALSE;
+ }
+ break;
+ case 'i':
+ case 'I':
+ if (g_ascii_strcasecmp (headern, IP_ADDR_HEADER) == 0) {
+ tmp = h->value->str;
+ if (!rspamd_parse_inet_address (&task->from_addr, tmp)) {
+ msg_err ("bad ip header: '%s'", tmp);
+ return FALSE;
+ }
+ debug_task ("read IP header, value: %s", tmp);
+ }
+ else {
+ debug_task ("wrong header: %s", headern);
+ res = FALSE;
+ }
+ break;
+ case 'p':
+ case 'P':
+ if (g_ascii_strcasecmp (headern, PASS_HEADER) == 0) {
+ if (h->value->len == sizeof ("all") - 1 &&
+ g_ascii_strcasecmp (h->value->str, "all") == 0) {
+ task->pass_all_filters = TRUE;
+ debug_task ("pass all filters");
+ }
+ }
+ else {
+ res = FALSE;
+ }
+ break;
+ case 's':
+ case 'S':
+ if (g_ascii_strcasecmp (headern, SUBJECT_HEADER) == 0) {
+ task->subject = h->value->str;
+ }
+ else {
+ res = FALSE;
+ }
+ break;
+ case 'u':
+ case 'U':
+ if (g_ascii_strcasecmp (headern, USER_HEADER) == 0) {
+ task->user = h->value->str;
+ }
+ else {
+ res = FALSE;
+ }
+ break;
+ case 'l':
+ case 'L':
+ if (g_ascii_strcasecmp (headern, NO_LOG_HEADER) == 0) {
+ if (g_ascii_strcasecmp (h->value->str, "no") == 0) {
+ task->no_log = TRUE;
+ }
+ }
+ else {
+ res = FALSE;
+ }
+ break;
+ default:
+ debug_task ("wrong header: %s", headern);
+ res = FALSE;
+ break;
+ }
+ }
+
+ if (!res && task->cfg->strict_protocol_headers) {
+ msg_err ("deny processing of a request with incorrect or unknown headers");
+ task->last_error = "invalid header";
+ task->error_code = 400;
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_protocol_handle_request (struct rspamd_task *task,
+ struct rspamd_http_message *msg)
+{
+ gboolean ret = TRUE;
+
+ if (msg->method == HTTP_SYMBOLS) {
+ task->cmd = CMD_SYMBOLS;
+ task->is_json = FALSE;
+ }
+ else if (msg->method == HTTP_CHECK) {
+ task->cmd = CMD_CHECK;
+ task->is_json = FALSE;
+ }
+ else {
+ task->is_json = TRUE;
+ ret = rspamd_protocol_handle_url (task, msg);
+ }
+
+ if (ret) {
+ ret = rspamd_protocol_handle_headers (task, msg);
+ }
+
+ return ret;
+}
+
+static void
+write_hashes_to_log (struct rspamd_task *task, GString *logbuf)
+{
+ GList *cur;
+ struct mime_text_part *text_part;
+
+ cur = task->text_parts;
+
+ while (cur) {
+ text_part = cur->data;
+ if (text_part->fuzzy) {
+ if (cur->next != NULL) {
+ rspamd_printf_gstring (logbuf, " part: %Xd,", text_part->fuzzy->h);
+ }
+ else {
+ rspamd_printf_gstring (logbuf, " part: %Xd", text_part->fuzzy->h);
+ }
+ }
+ cur = g_list_next (cur);
+ }
+}
+
+
+/* Structure for writing tree data */
+struct tree_cb_data {
+ ucl_object_t *top;
+ struct rspamd_task *task;
+};
+
+/*
+ * Callback for writing urls
+ */
+static gboolean
+urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
+{
+ struct tree_cb_data *cb = ud;
+ struct uri *url = value;
+ ucl_object_t *obj;
+
+ obj = ucl_object_fromlstring (url->host, url->hostlen);
+ DL_APPEND (cb->top->value.av, obj);
+
+ if (cb->task->cfg->log_urls) {
+ msg_info ("<%s> URL: %s - %s: %s", cb->task->message_id, cb->task->user ?
+ cb->task->user : (cb->task->from ? cb->task->from : "unknown"),
+ rspamd_inet_address_to_string (&cb->task->from_addr),
+ struri (url));
+ }
+
+ return FALSE;
+}
+
+static ucl_object_t *
+rspamd_urls_tree_ucl (GTree *input, struct rspamd_task *task)
+{
+ struct tree_cb_data cb;
+ ucl_object_t *obj;
+
+ obj = ucl_object_typed_new (UCL_ARRAY);
+ cb.top = obj;
+ cb.task = task;
+
+ g_tree_foreach (input, urls_protocol_cb, &cb);
+
+ return obj;
+}
+
+static gboolean
+emails_protocol_cb (gpointer key, gpointer value, gpointer ud)
+{
+ struct tree_cb_data *cb = ud;
+ struct uri *url = value;
+ ucl_object_t *obj;
+
+ obj = ucl_object_fromlstring (url->user, url->userlen + url->hostlen + 1);
+ DL_APPEND (cb->top->value.av, obj);
+
+ return FALSE;
+}
+
+static ucl_object_t *
+rspamd_emails_tree_ucl (GTree *input, struct rspamd_task *task)
+{
+ struct tree_cb_data cb;
+ ucl_object_t *obj;
+
+ obj = ucl_object_typed_new (UCL_ARRAY);
+ cb.top = obj;
+ cb.task = task;
+
+ g_tree_foreach (input, emails_protocol_cb, &cb);
+
+ return obj;
+}
+
+
+/* Write new subject */
+static const gchar *
+make_rewritten_subject (struct metric *metric, struct rspamd_task *task)
+{
+ static gchar subj_buf[1024];
+ gchar *p = subj_buf, *end, *c, *res;
+ const gchar *s;
+
+ end = p + sizeof(subj_buf);
+ c = metric->subject;
+ s = g_mime_message_get_subject (task->message);
+
+ while (p < end) {
+ if (*c == '\0') {
+ *p = '\0';
+ break;
+ }
+ else if (*c == '%' && *(c + 1) == 's') {
+ p += rspamd_strlcpy (p, (s != NULL) ? s : "", end - p);
+ c += 2;
+ }
+ else {
+ *p = *c ++;
+ }
+ p ++;
+ }
+ res = g_mime_utils_header_encode_text (subj_buf);
+
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_free, res);
+
+ return res;
+}
+
+static ucl_object_t *
+rspamd_str_list_ucl (GList *str_list)
+{
+ ucl_object_t *top = NULL, *obj;
+ GList *cur;
+
+ top = ucl_object_typed_new (UCL_ARRAY);
+ cur = str_list;
+ while (cur) {
+ obj = ucl_object_fromstring (cur->data);
+ DL_APPEND (top->value.av, obj);
+ cur = g_list_next (cur);
+ }
+
+ return top;
+}
+
+static ucl_object_t *
+rspamd_metric_symbol_ucl (struct rspamd_task *task, struct metric *m,
+ struct symbol *sym, GString *logbuf)
+{
+ ucl_object_t *obj = NULL;
+ const gchar *description = NULL;
+
+ rspamd_printf_gstring (logbuf, "%s,", sym->name);
+ description = g_hash_table_lookup (m->descriptions, sym->name);
+
+ obj = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (obj, ucl_object_fromstring (sym->name), "name", 0, false);
+ ucl_object_insert_key (obj, ucl_object_fromdouble (sym->score), "score", 0, false);
+ if (description) {
+ ucl_object_insert_key (obj, ucl_object_fromstring (description), "description", 0, false);
+ }
+ if (sym->options != NULL) {
+ ucl_object_insert_key (obj, rspamd_str_list_ucl (sym->options), "options", 0, false);
+ }
+
+ return obj;
+}
+
+static ucl_object_t *
+rspamd_metric_result_ucl (struct rspamd_task *task, struct metric_result *mres, GString *logbuf)
+{
+ GHashTableIter hiter;
+ struct symbol *sym;
+ struct metric *m;
+ gboolean is_spam;
+ enum rspamd_metric_action action = METRIC_ACTION_NOACTION;
+ ucl_object_t *obj = NULL, *sobj;
+ gdouble required_score;
+ gpointer h, v;
+ const gchar *subject;
+ gchar action_char;
+
+ m = mres->metric;
+
+ /* XXX: handle settings */
+ required_score = m->actions[METRIC_ACTION_REJECT].score;
+ is_spam = (mres->score >= required_score);
+ action = check_metric_action (mres->score, required_score, m);
+ if (task->is_skipped) {
+ action_char = 'S';
+ }
+ else if (is_spam) {
+ action_char = 'T';
+ }
+ else {
+ action_char = 'F';
+ }
+ rspamd_printf_gstring (logbuf, "(%s: %c (%s): [%.2f/%.2f] [",
+ m->name, action_char,
+ str_action_metric (action),
+ mres->score, required_score);
+
+ obj = ucl_object_typed_new (UCL_OBJECT);
+ ucl_object_insert_key (obj, ucl_object_frombool (is_spam),
+ "is_spam", 0, false);
+ ucl_object_insert_key (obj, ucl_object_frombool (task->is_skipped),
+ "is_skipped", 0, false);
+ ucl_object_insert_key (obj, ucl_object_fromdouble (mres->score),
+ "score", 0, false);
+ ucl_object_insert_key (obj, ucl_object_fromdouble (required_score),
+ "required_score", 0, false);
+ ucl_object_insert_key (obj, ucl_object_fromstring (str_action_metric (action)),
+ "action", 0, false);
+
+ if (action == METRIC_ACTION_REWRITE_SUBJECT) {
+ subject = make_rewritten_subject (m, task);
+ ucl_object_insert_key (obj, ucl_object_fromstring (subject),
+ "subject", 0, false);
+ }
+ /* Now handle symbols */
+ g_hash_table_iter_init (&hiter, mres->symbols);
+ while (g_hash_table_iter_next (&hiter, &h, &v)) {
+ sym = (struct symbol *)v;
+ sobj = rspamd_metric_symbol_ucl (task, m, sym, logbuf);
+ ucl_object_insert_key (obj, sobj, h, 0, false);
+ }
+
+ /* Cut the trailing comma if needed */
+ if (logbuf->str[logbuf->len - 1] == ',') {
+ logbuf->len --;
+ }
+
+#ifdef HAVE_CLOCK_GETTIME
+ rspamd_printf_gstring (logbuf, "]), len: %z, time: %s, dns req: %d,",
+ task->msg->len, calculate_check_time (&task->tv, &task->ts,
+ task->cfg->clock_res, &task->scan_milliseconds), task->dns_requests);
+#else
+ rspamd_printf_gstring (logbuf, "]), len: %z, time: %s, dns req: %d,",
+ task->msg->len,
+ calculate_check_time (&task->tv, task->cfg->clock_res, &task->scan_milliseconds),
+ task->dns_requests);
+#endif
+
+ return obj;
+}
+
+static void
+rspamd_ucl_tolegacy_output (struct rspamd_task *task, ucl_object_t *top, GString *out)
+{
+ const ucl_object_t *metric, *score,
+ *required_score, *is_spam, *elt, *symbols;
+ ucl_object_iter_t iter = NULL;
+
+ metric = ucl_object_find_key (top, DEFAULT_METRIC);
+ if (metric != NULL) {
+ score = ucl_object_find_key (metric, "score");
+ required_score = ucl_object_find_key (metric, "required_score");
+ is_spam = ucl_object_find_key (metric, "is_spam");
+ g_string_append_printf (out, "Metric: default; %s; %.2f / %.2f / 0.0\r\n",
+ ucl_object_toboolean (is_spam) ? "True" : "False",
+ ucl_object_todouble (score),
+ ucl_object_todouble (required_score));
+ elt = ucl_object_find_key (metric, "action");
+ if (elt != NULL) {
+ g_string_append_printf (out, "Action: %s\r\n",
+ ucl_object_tostring (elt));
+ }
+
+ symbols = ucl_object_find_key (metric, "symbols");
+ while ((elt = ucl_iterate_object (symbols, &iter, true)) != NULL) {
+ const ucl_object_t *sym_score;
+ sym_score = ucl_object_find_key (elt, "score");
+ g_string_append_printf (out, "Symbol: %s; %.2f\r\n",
+ ucl_object_key (elt),
+ ucl_object_todouble (sym_score));
+ }
+
+ elt = ucl_object_find_key (metric, "subject");
+ if (elt != NULL) {
+ g_string_append_printf (out, "Subject: %s\r\n",
+ ucl_object_tostring (elt));
+ }
+ }
+ g_string_append_printf (out, "Message-ID: %s\r\n", task->message_id);
+}
+
+static void
+write_check_reply (struct rspamd_http_message *msg, struct rspamd_task *task)
+{
+ GString *logbuf;
+ struct metric_result *metric_res;
+ GHashTableIter hiter;
+ gpointer h, v;
+ ucl_object_t *top = NULL, *obj;
+
+ /* Output the first line - check status */
+ logbuf = g_string_sized_new (BUFSIZ);
+ rspamd_printf_gstring (logbuf, "id: <%s>, qid: <%s>, ", task->message_id, task->queue_id);
+
+ if (task->user) {
+ rspamd_printf_gstring (logbuf, "user: %s, ", task->user);
+ }
+
+ if (!task->no_log) {
+ rspamd_roll_history_update (task->worker->srv->history, task);
+ }
+ g_hash_table_iter_init (&hiter, task->results);
+
+ top = ucl_object_typed_new (UCL_OBJECT);
+ /* Convert results to an ucl object */
+ while (g_hash_table_iter_next (&hiter, &h, &v)) {
+ metric_res = (struct metric_result *)v;
+ obj = rspamd_metric_result_ucl (task, metric_res, logbuf);
+ ucl_object_insert_key (top, obj, h, 0, false);
+ }
+
+ if (task->messages != NULL) {
+ ucl_object_insert_key (top, rspamd_str_list_ucl (task->messages), "messages", 0, false);
+ }
+ if (g_tree_nnodes (task->urls) > 0) {
+ ucl_object_insert_key (top, rspamd_urls_tree_ucl (task->urls, task), "urls", 0, false);
+ }
+ if (g_tree_nnodes (task->emails) > 0) {
+ ucl_object_insert_key (top, rspamd_emails_tree_ucl (task->emails, task),
+ "emails", 0, false);
+ }
+
+ ucl_object_insert_key (top, ucl_object_fromstring (task->message_id),
+ "message-id", 0, false);
+
+ write_hashes_to_log (task, logbuf);
+ if (!task->no_log) {
+ msg_info ("%v", logbuf);
+ }
+ g_string_free (logbuf, TRUE);
+
+ msg->body = g_string_sized_new (BUFSIZ);
+
+ if (msg->method < HTTP_SYMBOLS) {
+ rspamd_ucl_emit_gstring (top, UCL_EMIT_JSON_COMPACT, msg->body);
+ }
+ else {
+ rspamd_ucl_tolegacy_output (task, top, msg->body);
+ }
+ ucl_object_unref (top);
+
+ /* Increase counters */
+ task->worker->srv->stat->messages_scanned++;
+}
+
+void
+rspamd_protocol_write_reply (struct rspamd_task *task)
+{
+ struct rspamd_http_message *msg;
+ const gchar *ctype = "application/json";
+ ucl_object_t *top = NULL;
+
+ msg = rspamd_http_new_message (HTTP_RESPONSE);
+ if (!task->is_json) {
+ /* Turn compatibility on */
+ msg->method = HTTP_SYMBOLS;
+ }
+ msg->date = time (NULL);
+
+ task->state = CLOSING_CONNECTION;
+
+ top = ucl_object_typed_new (UCL_OBJECT);
+ debug_task ("writing reply to client");
+ if (task->error_code != 0) {
+ msg->code = task->error_code;
+ ucl_object_insert_key (top, ucl_object_fromstring (task->last_error), "error", 0, false);
+ msg->body = g_string_sized_new (256);
+ rspamd_ucl_emit_gstring (top, UCL_EMIT_JSON_COMPACT, msg->body);
+ ucl_object_unref (top);
+ }
+ else {
+ switch (task->cmd) {
+ case CMD_REPORT_IFSPAM:
+ case CMD_REPORT:
+ case CMD_CHECK:
+ case CMD_SYMBOLS:
+ case CMD_PROCESS:
+ case CMD_SKIP:
+ write_check_reply (msg, task);
+ break;
+ case CMD_PING:
+ msg->body = g_string_new ("pong");
+ break;
+ case CMD_OTHER:
+ msg_err ("BROKEN");
+ break;
+ }
+ }
+
+ rspamd_http_connection_reset (task->http_conn);
+ rspamd_http_connection_write_message (task->http_conn, msg, NULL,
+ ctype, task, task->sock, &task->tv, task->ev_base);
+}
+
+void
+register_protocol_command (const gchar *name, protocol_reply_func func)
+{
+ struct custom_command *cmd;
+
+ cmd = g_malloc (sizeof (struct custom_command));
+ cmd->name = name;
+ cmd->func = func;
+
+ custom_commands = g_list_prepend (custom_commands, cmd);
+}
diff --git a/src/libmime/protocol.h b/src/libmime/protocol.h
new file mode 100644
index 000000000..8d2efe118
--- /dev/null
+++ b/src/libmime/protocol.h
@@ -0,0 +1,46 @@
+/**
+ * @file protocol.h
+ * Rspamd protocol definition
+ */
+
+#ifndef RSPAMD_PROTOCOL_H
+#define RSPAMD_PROTOCOL_H
+
+#include "config.h"
+#include "filter.h"
+#include "http.h"
+#include "task.h"
+
+#define RSPAMD_BASE_ERROR 500
+#define RSPAMD_FILTER_ERROR RSPAMD_BASE_ERROR + 1
+#define RSPAMD_NETWORK_ERROR RSPAMD_BASE_ERROR + 2
+#define RSPAMD_PROTOCOL_ERROR RSPAMD_BASE_ERROR + 3
+#define RSPAMD_LENGTH_ERROR RSPAMD_BASE_ERROR + 4
+#define RSPAMD_STATFILE_ERROR RSPAMD_BASE_ERROR + 5
+
+struct metric;
+
+/**
+ * Process HTTP request to the task structure
+ * @param task
+ * @param msg
+ * @return
+ */
+gboolean rspamd_protocol_handle_request (struct rspamd_task *task, struct rspamd_http_message *msg);
+
+/**
+ * Write reply for specified task command
+ * @param task task object
+ * @return 0 if we wrote reply and -1 if there was some error
+ */
+void rspamd_protocol_write_reply (struct rspamd_task *task);
+
+
+/**
+ * Register custom fucntion to extend protocol
+ * @param name symbolic name of custom function
+ * @param func callback function for writing reply
+ */
+void register_protocol_command (const gchar *name, protocol_reply_func func);
+
+#endif
diff --git a/src/libmime/smtp_proto.c b/src/libmime/smtp_proto.c
new file mode 100644
index 000000000..3af1c3910
--- /dev/null
+++ b/src/libmime/smtp_proto.c
@@ -0,0 +1,701 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "cfg_file.h"
+#include "util.h"
+#include "smtp.h"
+#include "smtp_proto.h"
+#include "smtp_utils.h"
+
+gchar *
+make_smtp_error (rspamd_mempool_t *pool, gint error_code, const gchar *format, ...)
+{
+ va_list vp;
+ gchar *result = NULL, *p;
+ size_t len;
+
+ va_start (vp, format);
+ len = g_printf_string_upper_bound (format, vp);
+ va_end (vp);
+ va_start (vp, format);
+ len += sizeof ("65535 ") + sizeof (CRLF) - 1;
+ result = rspamd_mempool_alloc (pool, len);
+ p = result + rspamd_snprintf (result, len, "%d ", error_code);
+ p = rspamd_vsnprintf (p, len - (p - result), format, vp);
+ *p++ = CR; *p++ = LF; *p = '\0';
+ va_end (vp);
+
+ return result;
+}
+
+
+gboolean
+parse_smtp_command (struct smtp_session *session, f_str_t *line, struct smtp_command **cmd)
+{
+ enum {
+ SMTP_PARSE_START = 0,
+ SMTP_PARSE_SPACES,
+ SMTP_PARSE_ARGUMENT,
+ SMTP_PARSE_DONE
+ } state;
+ gchar *p, *c, ch, cmd_buf[4];
+ guint i;
+ f_str_t *arg = NULL;
+ struct smtp_command *pcmd;
+
+ if (line->len == 0) {
+ return FALSE;
+ }
+
+ state = SMTP_PARSE_START;
+ c = line->begin;
+ p = c;
+ *cmd = rspamd_mempool_alloc0 (session->pool, sizeof (struct smtp_command));
+ pcmd = *cmd;
+
+ for (i = 0; i < line->len; i ++, p ++) {
+ ch = *p;
+ switch (state) {
+ case SMTP_PARSE_START:
+ if (ch == ' ' || ch == ':' || ch == CR || ch == LF || i == line->len - 1) {
+ if (i == line->len - 1) {
+ p ++;
+ }
+ if (p - c == 4) {
+ cmd_buf[0] = g_ascii_toupper (c[0]);
+ cmd_buf[1] = g_ascii_toupper (c[1]);
+ cmd_buf[2] = g_ascii_toupper (c[2]);
+ cmd_buf[3] = g_ascii_toupper (c[3]);
+
+ if (memcmp (cmd_buf, "HELO", 4) == 0) {
+ pcmd->command = SMTP_COMMAND_HELO;
+ }
+ else if (memcmp (cmd_buf, "EHLO", 4) == 0) {
+ pcmd->command = SMTP_COMMAND_EHLO;
+ }
+ else if (memcmp (cmd_buf, "MAIL", 4) == 0) {
+ pcmd->command = SMTP_COMMAND_MAIL;
+ }
+ else if (memcmp (cmd_buf, "RCPT", 4) == 0) {
+ pcmd->command = SMTP_COMMAND_RCPT;
+ }
+ else if (memcmp (cmd_buf, "DATA", 4) == 0) {
+ pcmd->command = SMTP_COMMAND_DATA;
+ }
+ else if (memcmp (cmd_buf, "QUIT", 4) == 0) {
+ pcmd->command = SMTP_COMMAND_QUIT;
+ }
+ else if (memcmp (cmd_buf, "NOOP", 4) == 0) {
+ pcmd->command = SMTP_COMMAND_NOOP;
+ }
+ else if (memcmp (cmd_buf, "EXPN", 4) == 0) {
+ pcmd->command = SMTP_COMMAND_EXPN;
+ }
+ else if (memcmp (cmd_buf, "RSET", 4) == 0) {
+ pcmd->command = SMTP_COMMAND_RSET;
+ }
+ else if (memcmp (cmd_buf, "HELP", 4) == 0) {
+ pcmd->command = SMTP_COMMAND_HELP;
+ }
+ else if (memcmp (cmd_buf, "VRFY", 4) == 0) {
+ pcmd->command = SMTP_COMMAND_VRFY;
+ }
+ else {
+ msg_info ("invalid command: %*s", 4, cmd_buf);
+ return FALSE;
+ }
+ }
+ else {
+ /* Invalid command */
+ msg_info ("invalid command: %*s", 4, c);
+ return FALSE;
+ }
+ /* Now check what we have */
+ if (ch == ' ' || ch == ':') {
+ state = SMTP_PARSE_SPACES;
+ }
+ else if (ch == CR) {
+ state = SMTP_PARSE_DONE;
+ }
+ else if (ch == LF) {
+ return TRUE;
+ }
+ }
+ else if ((ch < 'A' || ch > 'Z') && (ch < 'a' || ch > 'z')) {
+ msg_info ("invalid letter code in SMTP command: %d", (gint)ch);
+ return FALSE;
+ }
+ break;
+ case SMTP_PARSE_SPACES:
+ if (ch == CR) {
+ state = SMTP_PARSE_DONE;
+ }
+ else if (ch == LF) {
+ goto end;
+ }
+ else if (ch != ' ' && ch != ':') {
+ state = SMTP_PARSE_ARGUMENT;
+ arg = rspamd_mempool_alloc (session->pool, sizeof (f_str_t));
+ c = p;
+ }
+ break;
+ case SMTP_PARSE_ARGUMENT:
+ if (ch == ' ' || ch == ':' || ch == CR || ch == LF || i == line->len - 1) {
+ if (i == line->len - 1 && (ch != ' ' && ch != CR && ch != LF)) {
+ p ++;
+ }
+ arg->len = p - c;
+ arg->begin = rspamd_mempool_alloc (session->pool, arg->len);
+ memcpy (arg->begin, c, arg->len);
+ pcmd->args = g_list_prepend (pcmd->args, arg);
+ if (ch == ' ' || ch == ':') {
+ state = SMTP_PARSE_SPACES;
+ }
+ else if (ch == CR) {
+ state = SMTP_PARSE_DONE;
+ }
+ else {
+ goto end;
+ }
+ }
+ break;
+ case SMTP_PARSE_DONE:
+ if (ch == LF) {
+ goto end;
+ }
+ msg_info ("CR without LF in SMTP command");
+ return FALSE;
+ }
+ }
+
+end:
+ if (pcmd->args) {
+ pcmd->args = g_list_reverse (pcmd->args);
+ rspamd_mempool_add_destructor (session->pool, (rspamd_mempool_destruct_t)g_list_free, pcmd->args);
+ }
+ return TRUE;
+}
+
+static gboolean
+check_smtp_path (f_str_t *path)
+{
+ guint i;
+ gchar *p;
+
+ p = path->begin;
+ if (*p != '<' || path->len < 2) {
+ return FALSE;
+ }
+ for (i = 0; i < path->len; i++, p ++) {
+ if (*p == '>' && i != path->len - 1) {
+ return FALSE;
+ }
+ }
+
+ return *(p - 1) == '>';
+}
+
+gboolean
+parse_smtp_helo (struct smtp_session *session, struct smtp_command *cmd)
+{
+ f_str_t *arg;
+
+ if (cmd->args == NULL) {
+ session->error = SMTP_ERROR_BAD_ARGUMENTS;
+ return FALSE;
+ }
+ arg = cmd->args->data;
+ session->helo = rspamd_mempool_alloc (session->pool, arg->len + 1);
+ rspamd_strlcpy (session->helo, arg->begin, arg->len + 1);
+ /* Now try to write reply */
+ if (cmd->command == SMTP_COMMAND_HELO) {
+ /* No ESMTP */
+ session->error = SMTP_ERROR_OK;
+ session->esmtp = FALSE;
+ return TRUE;
+ }
+ else {
+ /* Try to write all capabilities */
+ session->esmtp = TRUE;
+ if (session->ctx->smtp_capabilities == NULL) {
+ session->error = SMTP_ERROR_OK;
+ return TRUE;
+ }
+ else {
+ session->error = session->ctx->smtp_capabilities;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+gboolean
+parse_smtp_from (struct smtp_session *session, struct smtp_command *cmd)
+{
+ f_str_t *arg;
+ GList *cur = cmd->args;
+
+ if (cmd->args == NULL) {
+ session->error = SMTP_ERROR_BAD_ARGUMENTS;
+ return FALSE;
+ }
+ arg = cur->data;
+ /* First argument MUST be FROM */
+ if (arg->len != 4 || (
+ g_ascii_toupper (arg->begin[0]) != 'F' ||
+ g_ascii_toupper (arg->begin[1]) != 'R' ||
+ g_ascii_toupper (arg->begin[2]) != 'O' ||
+ g_ascii_toupper (arg->begin[3]) != 'M')) {
+ session->error = SMTP_ERROR_BAD_ARGUMENTS;
+ return FALSE;
+ }
+ /* Next one is from address */
+ cur = g_list_next (cur);
+ if (cur == NULL) {
+ session->error = SMTP_ERROR_BAD_ARGUMENTS;
+ return FALSE;
+ }
+ arg = cur->data;
+ if (check_smtp_path (arg)) {
+ session->from = cur;
+ }
+ else {
+ session->error = SMTP_ERROR_BAD_ARGUMENTS;
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+parse_smtp_rcpt (struct smtp_session *session, struct smtp_command *cmd)
+{
+ f_str_t *arg;
+ GList *cur = cmd->args;
+
+ if (cmd->args == NULL) {
+ session->error = SMTP_ERROR_BAD_ARGUMENTS;
+ return FALSE;
+ }
+ arg = cur->data;
+ /* First argument MUST be FROM */
+ if (arg->len != 2 || (
+ g_ascii_toupper (arg->begin[0]) != 'T' ||
+ g_ascii_toupper (arg->begin[1]) != 'O')) {
+ session->error = SMTP_ERROR_BAD_ARGUMENTS;
+ return FALSE;
+ }
+ /* Next one is from address */
+ cur = g_list_next (cur);
+ if (cur == NULL) {
+ session->error = SMTP_ERROR_BAD_ARGUMENTS;
+ return FALSE;
+ }
+ arg = cur->data;
+ if (check_smtp_path (arg)) {
+ session->rcpt = g_list_prepend (session->rcpt, cur);
+ }
+ else {
+ session->error = SMTP_ERROR_BAD_ARGUMENTS;
+ return FALSE;
+ }
+
+ return TRUE;
+
+}
+
+/* Return -1 if there are some error, 1 if all is ok and 0 in case of incomplete reply */
+static gint
+check_smtp_ustream_reply (f_str_t *in, gchar success_code)
+{
+ gchar *p;
+
+ /* Check for 250 at the begin of line */
+ if (in->len >= sizeof ("220 ") - 1) {
+ p = in->begin;
+ if (p[0] == success_code) {
+ /* Last reply line */
+ if (p[3] == ' ') {
+ return 1;
+ }
+ else {
+ return 0;
+ }
+ }
+ else {
+ return -1;
+ }
+ }
+
+ return -1;
+}
+
+size_t
+smtp_upstream_write_list (GList *args, gchar *buf, size_t buflen)
+{
+ GList *cur = args;
+ size_t r = 0;
+ f_str_t *arg;
+
+ while (cur && r < buflen - 3) {
+ arg = cur->data;
+ r += rspamd_snprintf (buf + r, buflen - r, " %V", arg);
+ cur = g_list_next (cur);
+ }
+
+ buf[r++] = CR;
+ buf[r++] = LF;
+ buf[r] = '\0';
+
+ return r;
+}
+
+gboolean
+smtp_upstream_write_socket (void *arg)
+{
+ struct smtp_session *session = arg;
+
+ if (session->upstream_state == SMTP_STATE_IN_SENDFILE) {
+ session->upstream_state = SMTP_STATE_AFTER_DATA;
+ return rspamd_dispatcher_write (session->upstream_dispatcher, CRLF DATA_END_TRAILER, sizeof (CRLF DATA_END_TRAILER) - 1, FALSE, TRUE);
+ }
+
+ return TRUE;
+}
+
+gboolean
+smtp_upstream_read_socket (f_str_t * in, void *arg)
+{
+ struct smtp_session *session = arg;
+ gchar outbuf[BUFSIZ];
+ gint r;
+
+ msg_debug ("in: %V, state: %d", in, session->upstream_state);
+ switch (session->upstream_state) {
+ case SMTP_STATE_GREETING:
+ r = check_smtp_ustream_reply (in, '2');
+ if (r == -1) {
+ session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+ rspamd_strlcpy (session->error, in->begin, in->len + 1);
+ /* XXX: assume upstream errors as critical errors */
+ session->state = SMTP_STATE_CRITICAL_ERROR;
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) {
+ goto err;
+ }
+ if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+ goto err;
+ }
+ destroy_session (session->s);
+ return FALSE;
+ }
+ else if (r == 1) {
+ if (session->ctx->use_xclient) {
+ r = rspamd_snprintf (outbuf, sizeof (outbuf), "XCLIENT NAME=%s ADDR=%s" CRLF,
+ session->resolved ? session->hostname : "[UNDEFINED]",
+ inet_ntoa (session->client_addr));
+ session->upstream_state = SMTP_STATE_HELO;
+ return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE);
+ }
+ else {
+ session->upstream_state = SMTP_STATE_FROM;
+ if (session->helo) {
+ r = rspamd_snprintf (outbuf, sizeof (outbuf), "%s %s" CRLF,
+ session->esmtp ? "EHLO" : "HELO",
+ session->helo);
+ }
+ else {
+ return smtp_upstream_read_socket (in, arg);
+ }
+ return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE);
+ }
+ }
+ break;
+ case SMTP_STATE_HELO:
+ r = check_smtp_ustream_reply (in, '2');
+ if (r == -1) {
+ session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+ rspamd_strlcpy (session->error, in->begin, in->len + 1);
+ /* XXX: assume upstream errors as critical errors */
+ session->state = SMTP_STATE_CRITICAL_ERROR;
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) {
+ goto err;
+ }
+ if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+ goto err;
+ }
+ destroy_session (session->s);
+ return FALSE;
+ }
+ else if (r == 1) {
+ session->upstream_state = SMTP_STATE_FROM;
+ if (session->helo) {
+ r = rspamd_snprintf (outbuf, sizeof (outbuf), "%s %s" CRLF,
+ session->esmtp ? "EHLO" : "HELO",
+ session->helo);
+ }
+ else {
+ return smtp_upstream_read_socket (in, arg);
+ }
+ return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE);
+ }
+ break;
+ case SMTP_STATE_FROM:
+ r = check_smtp_ustream_reply (in, '2');
+ if (r == -1) {
+ session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+ rspamd_strlcpy (session->error, in->begin, in->len + 1);
+ /* XXX: assume upstream errors as critical errors */
+ session->state = SMTP_STATE_CRITICAL_ERROR;
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) {
+ goto err;
+ }
+ if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+ goto err;
+ }
+ destroy_session (session->s);
+ return FALSE;
+ }
+ else if (r == 1) {
+ r = rspamd_snprintf (outbuf, sizeof (outbuf), "MAIL FROM: ");
+ r += smtp_upstream_write_list (session->from, outbuf + r, sizeof (outbuf) - r);
+ session->upstream_state = SMTP_STATE_RCPT;
+ return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE);
+ }
+ break;
+ case SMTP_STATE_RCPT:
+ r = check_smtp_ustream_reply (in, '2');
+ if (r == -1) {
+ session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+ rspamd_strlcpy (session->error, in->begin, in->len + 1);
+ /* XXX: assume upstream errors as critical errors */
+ session->state = SMTP_STATE_CRITICAL_ERROR;
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) {
+ goto err;
+ }
+ if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+ goto err;
+ }
+ destroy_session (session->s);
+ return FALSE;
+ }
+ else if (r == 1) {
+ r = rspamd_snprintf (outbuf, sizeof (outbuf), "RCPT TO: ");
+ session->cur_rcpt = g_list_first (session->rcpt);
+ r += smtp_upstream_write_list (session->cur_rcpt->data, outbuf + r, sizeof (outbuf) - r);
+ session->cur_rcpt = g_list_next (session->cur_rcpt);
+ session->upstream_state = SMTP_STATE_BEFORE_DATA;
+ return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE);
+ }
+ break;
+ case SMTP_STATE_BEFORE_DATA:
+ r = check_smtp_ustream_reply (in, '2');
+ if (r == -1) {
+ session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+ rspamd_strlcpy (session->error, in->begin, in->len + 1);
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) {
+ goto err;
+ }
+ if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+ goto err;
+ }
+ if (session->cur_rcpt) {
+ session->rcpt = g_list_delete_link (session->rcpt, session->cur_rcpt);
+ }
+ else {
+ session->rcpt = g_list_delete_link (session->rcpt, session->rcpt);
+ }
+ session->errors ++;
+ session->state = SMTP_STATE_RCPT;
+ return TRUE;
+ }
+ else if (r == 1) {
+ if (session->cur_rcpt != NULL) {
+ r = rspamd_snprintf (outbuf, sizeof (outbuf), "RCPT TO: ");
+ r += smtp_upstream_write_list (session->cur_rcpt, outbuf + r, sizeof (outbuf) - r);
+ session->cur_rcpt = g_list_next (session->cur_rcpt);
+ if (! rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE)) {
+ goto err;
+ }
+ }
+ else {
+ session->upstream_state = SMTP_STATE_DATA;
+ rspamd_dispatcher_pause (session->upstream_dispatcher);
+ }
+ session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+ rspamd_strlcpy (session->error, in->begin, in->len + 1);
+ /* Write to client */
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) {
+ goto err;
+ }
+ if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+ goto err;
+ }
+ if (session->state == SMTP_STATE_WAIT_UPSTREAM) {
+ rspamd_dispatcher_restore (session->dispatcher);
+ session->state = SMTP_STATE_RCPT;
+ }
+ }
+ break;
+ case SMTP_STATE_DATA:
+ r = check_smtp_ustream_reply (in, '3');
+ if (r == -1) {
+ session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+ rspamd_strlcpy (session->error, in->begin, in->len + 1);
+ /* XXX: assume upstream errors as critical errors */
+ session->state = SMTP_STATE_CRITICAL_ERROR;
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+ goto err;
+ }
+ if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+ goto err;
+ }
+ destroy_session (session->s);
+ return FALSE;
+ }
+ else if (r == 1) {
+ if (! make_smtp_tempfile (session)) {
+ session->error = SMTP_ERROR_FILE;
+ session->state = SMTP_STATE_CRITICAL_ERROR;
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+ goto err;
+ }
+ destroy_session (session->s);
+ return FALSE;
+ }
+ session->state = SMTP_STATE_AFTER_DATA;
+ session->error = SMTP_ERROR_DATA_OK;
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+ goto err;
+ }
+ rspamd_dispatcher_pause (session->upstream_dispatcher);
+ rspamd_set_dispatcher_policy (session->dispatcher, BUFFER_LINE, 0);
+ session->dispatcher->strip_eol = FALSE;
+ return TRUE;
+ }
+ break;
+ case SMTP_STATE_AFTER_DATA:
+ session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+ rspamd_strlcpy (session->error, in->begin, in->len + 1);
+ session->state = SMTP_STATE_DATA;
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+ goto err;
+ }
+ if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+ goto err;
+ }
+ if (! rspamd_dispatcher_write (session->upstream_dispatcher, "QUIT" CRLF, sizeof ("QUIT" CRLF) - 1, FALSE, TRUE)) {
+ goto err;
+ }
+ session->upstream_state = SMTP_STATE_END;
+ return TRUE;
+ break;
+ case SMTP_STATE_END:
+ r = check_smtp_ustream_reply (in, '5');
+ if (r == -1) {
+ session->error = rspamd_mempool_alloc (session->pool, in->len + 1);
+ rspamd_strlcpy (session->error, in->begin, in->len + 1);
+ /* XXX: assume upstream errors as critical errors */
+ session->state = SMTP_STATE_CRITICAL_ERROR;
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (!rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+ goto err;
+ }
+ if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+ goto err;
+ }
+ destroy_session (session->s);
+ return FALSE;
+ }
+ else {
+ remove_normal_event (session->s, (event_finalizer_t)smtp_upstream_finalize_connection, session);
+ }
+ return FALSE;
+ break;
+ default:
+ msg_err ("got upstream reply at unexpected state: %d, reply: %V", session->upstream_state, in);
+ session->state = SMTP_STATE_CRITICAL_ERROR;
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+ goto err;
+ }
+ if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+ goto err;
+ }
+ destroy_session (session->s);
+ return FALSE;
+ }
+
+ return TRUE;
+err:
+ msg_warn ("write error occured");
+ return FALSE;
+}
+
+void
+smtp_upstream_err_socket (GError *err, void *arg)
+{
+ struct smtp_session *session = arg;
+
+ msg_info ("abnormally closing connection with upstream %s, error: %s", session->upstream->name, err->message);
+ session->error = SMTP_ERROR_UPSTREAM;
+ session->state = SMTP_STATE_CRITICAL_ERROR;
+ /* XXX: assume upstream errors as critical errors */
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+ return;
+ }
+ if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+ return;
+ }
+ upstream_fail (&session->upstream->up, session->session_time);
+ destroy_session (session->s);
+}
+
+void
+smtp_upstream_finalize_connection (gpointer data)
+{
+ struct smtp_session *session = data;
+
+ if (session->state != SMTP_STATE_CRITICAL_ERROR) {
+ if (! rspamd_dispatcher_write (session->upstream_dispatcher, "QUIT" CRLF, 0, FALSE, TRUE)) {
+ msg_warn ("cannot send correctly closing message to upstream");
+ }
+ }
+ rspamd_remove_dispatcher (session->upstream_dispatcher);
+ session->upstream_dispatcher = NULL;
+ close (session->upstream_sock);
+ session->upstream_sock = -1;
+}
diff --git a/src/libmime/smtp_proto.h b/src/libmime/smtp_proto.h
new file mode 100644
index 000000000..42fecd255
--- /dev/null
+++ b/src/libmime/smtp_proto.h
@@ -0,0 +1,95 @@
+#ifndef RSPAMD_SMTP_PROTO_H
+#define RSPAMD_SMTP_PROTO_H
+
+#include "config.h"
+#include "smtp.h"
+
+/* SMTP errors */
+#define SMTP_ERROR_BAD_COMMAND "500 Syntax error, command unrecognized" CRLF
+#define SMTP_ERROR_BAD_ARGUMENTS "501 Syntax error in parameters or arguments" CRLF
+#define SMTP_ERROR_SEQUENCE "503 Bad sequence of commands" CRLF
+#define SMTP_ERROR_RECIPIENTS "554 No valid recipients" CRLF
+#define SMTP_ERROR_UNIMPLIMENTED "502 Command not implemented" CRLF
+#define SMTP_ERROR_LIMIT "505 Too many errors. Aborting." CRLF
+#define SMTP_ERROR_UPSTREAM "421 Service not available, closing transmission channel" CRLF
+#define SMTP_ERROR_FILE "420 Service not available, filesystem error" CRLF
+#define SMTP_ERROR_OK "250 Requested mail action okay, completed" CRLF
+#define SMTP_ERROR_DATA_OK "354 Start mail input; end with <CRLF>.<CRLF>" CRLF
+
+#define DATA_END_TRAILER "." CRLF
+
+#define XCLIENT_HOST_UNAVAILABLE "[UNAVAILABLE]"
+#define XCLIENT_HOST_TEMPFAIL "[TEMPUNAVAIL]"
+
+#define MAX_SMTP_UPSTREAMS 128
+
+struct smtp_command {
+ enum {
+ SMTP_COMMAND_HELO,
+ SMTP_COMMAND_EHLO,
+ SMTP_COMMAND_QUIT,
+ SMTP_COMMAND_NOOP,
+ SMTP_COMMAND_MAIL,
+ SMTP_COMMAND_RCPT,
+ SMTP_COMMAND_RSET,
+ SMTP_COMMAND_DATA,
+ SMTP_COMMAND_VRFY,
+ SMTP_COMMAND_EXPN,
+ SMTP_COMMAND_HELP
+ } command;
+ GList *args;
+};
+
+/*
+ * Generate SMTP error message
+ */
+gchar * make_smtp_error (rspamd_mempool_t *pool, gint error_code, const gchar *format, ...);
+
+/*
+ * Parse a single SMTP command
+ */
+gboolean parse_smtp_command (struct smtp_session *session, f_str_t *line, struct smtp_command **cmd);
+
+/*
+ * Parse HELO command
+ */
+gboolean parse_smtp_helo (struct smtp_session *session, struct smtp_command *cmd);
+
+/*
+ * Parse MAIL command
+ */
+gboolean parse_smtp_from (struct smtp_session *session, struct smtp_command *cmd);
+
+/*
+ * Parse RCPT command
+ */
+gboolean parse_smtp_rcpt (struct smtp_session *session, struct smtp_command *cmd);
+
+/* Upstream SMTP */
+
+/*
+ * Read a line from SMTP upstream
+ */
+gboolean smtp_upstream_read_socket (f_str_t * in, void *arg);
+
+/*
+ * Write to SMTP upstream
+ */
+gboolean smtp_upstream_write_socket (void *arg);
+
+/*
+ * Error handler for SMTP upstream
+ */
+void smtp_upstream_err_socket (GError *err, void *arg);
+
+/*
+ * Terminate connection with upstream
+ */
+void smtp_upstream_finalize_connection (gpointer data);
+
+/*
+ * Write a list of strings to the upstream
+ */
+size_t smtp_upstream_write_list (GList *args, gchar *buf, size_t buflen);
+
+#endif
diff --git a/src/libmime/smtp_utils.c b/src/libmime/smtp_utils.c
new file mode 100644
index 000000000..5178de9dd
--- /dev/null
+++ b/src/libmime/smtp_utils.c
@@ -0,0 +1,362 @@
+/* Copyright (c) 2010, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "filter.h"
+#include "settings.h"
+#include "smtp.h"
+#include "smtp_proto.h"
+
+void
+free_smtp_session (gpointer arg)
+{
+ struct smtp_session *session = arg;
+
+ if (session) {
+ if (session->task) {
+ rspamd_task_free (session->task, FALSE);
+ if (session->task->msg->str) {
+ munmap (session->task->msg->str, session->task->msg->len);
+ }
+ }
+ if (session->rcpt) {
+ g_list_free (session->rcpt);
+ }
+ if (session->dispatcher) {
+ rspamd_remove_dispatcher (session->dispatcher);
+ }
+ close (session->sock);
+ if (session->temp_name != NULL) {
+ unlink (session->temp_name);
+ }
+ if (session->temp_fd != -1) {
+ close (session->temp_fd);
+ }
+ rspamd_mempool_delete (session->pool);
+ g_free (session);
+ }
+}
+
+gboolean
+create_smtp_upstream_connection (struct smtp_session *session)
+{
+ struct smtp_upstream *selected;
+
+ /* Try to select upstream */
+ selected = (struct smtp_upstream *)get_upstream_round_robin (session->ctx->upstreams,
+ session->ctx->upstream_num, sizeof (struct smtp_upstream),
+ session->session_time, DEFAULT_UPSTREAM_ERROR_TIME, DEFAULT_UPSTREAM_DEAD_TIME, DEFAULT_UPSTREAM_MAXERRORS);
+ if (selected == NULL) {
+ msg_err ("no upstreams suitable found");
+ return FALSE;
+ }
+
+ session->upstream = selected;
+
+ /* Now try to create socket */
+ session->upstream_sock = make_universal_socket (selected->addr, selected->port, SOCK_STREAM, TRUE, FALSE, FALSE);
+ if (session->upstream_sock == -1) {
+ msg_err ("cannot make a connection to %s", selected->name);
+ upstream_fail (&selected->up, session->session_time);
+ return FALSE;
+ }
+ /* Create a dispatcher for upstream connection */
+ session->upstream_dispatcher = rspamd_create_dispatcher (session->ev_base, session->upstream_sock, BUFFER_LINE,
+ smtp_upstream_read_socket, smtp_upstream_write_socket, smtp_upstream_err_socket,
+ &session->ctx->smtp_timeout, session);
+ session->state = SMTP_STATE_WAIT_UPSTREAM;
+ session->upstream_state = SMTP_STATE_GREETING;
+ register_async_event (session->s, (event_finalizer_t)smtp_upstream_finalize_connection, session, g_quark_from_static_string ("smtp proxy"));
+ return TRUE;
+}
+
+gboolean
+smtp_send_upstream_message (struct smtp_session *session)
+{
+ rspamd_dispatcher_pause (session->dispatcher);
+ rspamd_dispatcher_restore (session->upstream_dispatcher);
+
+ session->upstream_state = SMTP_STATE_IN_SENDFILE;
+ session->state = SMTP_STATE_WAIT_UPSTREAM;
+ if (! rspamd_dispatcher_sendfile (session->upstream_dispatcher, session->temp_fd, session->temp_size)) {
+ msg_err ("sendfile failed: %s", strerror (errno));
+ goto err;
+ }
+ return TRUE;
+
+err:
+ session->error = SMTP_ERROR_FILE;
+ session->state = SMTP_STATE_CRITICAL_ERROR;
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+ return FALSE;
+ }
+ destroy_session (session->s);
+ return FALSE;
+}
+
+struct smtp_metric_callback_data {
+ struct smtp_session *session;
+ enum rspamd_metric_action action;
+ struct metric_result *res;
+ gchar *log_buf;
+ gint log_offset;
+ gint log_size;
+ gboolean alive;
+};
+
+static void
+smtp_metric_symbols_callback (gpointer key, gpointer value, void *user_data)
+{
+ struct smtp_metric_callback_data *cd = user_data;
+
+ cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "%s,", (gchar *)key);
+}
+
+static void
+smtp_metric_callback (gpointer key, gpointer value, gpointer ud)
+{
+ struct smtp_metric_callback_data *cd = ud;
+ struct metric_result *metric_res = value;
+ enum rspamd_metric_action action = METRIC_ACTION_NOACTION;
+ double ms = 0, rs = 0;
+ gboolean is_spam = FALSE;
+ struct rspamd_task *task;
+
+ task = cd->session->task;
+
+ if (!check_metric_settings (metric_res, &ms, &rs)) {
+ ms = metric_res->metric->actions[METRIC_ACTION_REJECT].score;
+ rs = metric_res->metric->actions[METRIC_ACTION_REJECT].score;
+ }
+ if (! check_metric_action_settings (task, metric_res, metric_res->score, &action)) {
+ action = check_metric_action (metric_res->score, ms, metric_res->metric);
+ }
+ if (metric_res->score >= ms) {
+ is_spam = 1;
+ }
+ if (action < cd->action) {
+ cd->action = action;
+ cd->res = metric_res;
+ }
+
+ if (!task->is_skipped) {
+ cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "(%s: %c (%s): [%.2f/%.2f/%.2f] [",
+ (gchar *)key, is_spam ? 'T' : 'F', str_action_metric (action), metric_res->score, ms, rs);
+ }
+ else {
+ cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "(%s: %c (default): [%.2f/%.2f/%.2f] [",
+ (gchar *)key, 'S', metric_res->score, ms, rs);
+
+ }
+ g_hash_table_foreach (metric_res->symbols, smtp_metric_symbols_callback, cd);
+ /* Remove last , from log buf */
+ if (cd->log_buf[cd->log_offset - 1] == ',') {
+ cd->log_buf[--cd->log_offset] = '\0';
+ }
+
+#ifdef HAVE_CLOCK_GETTIME
+ cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "]), len: %z, time: %s,",
+ task->msg->len, calculate_check_time (&task->tv, &task->ts, task->cfg->clock_res, &task->scan_milliseconds));
+#else
+ cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "]), len: %z, time: %s,",
+ task->msg->len, calculate_check_time (&task->tv, task->cfg->clock_res, &task->scan_milliseconds));
+#endif
+}
+
+gboolean
+make_smtp_tempfile (struct smtp_session *session)
+{
+ gsize r;
+
+ r = strlen (session->cfg->temp_dir) + sizeof ("/rspamd-XXXXXX");
+ session->temp_name = rspamd_mempool_alloc (session->pool, r);
+ rspamd_snprintf (session->temp_name, r, "%s%crspamd-XXXXXX", session->cfg->temp_dir, G_DIR_SEPARATOR);
+#ifdef HAVE_MKSTEMP
+ /* Umask is set before */
+ session->temp_fd = mkstemp (session->temp_name);
+#else
+ session->temp_fd = g_mkstemp_full (session->temp_name, O_RDWR, S_IWUSR | S_IRUSR);
+#endif
+ if (session->temp_fd == -1) {
+ msg_err ("mkstemp error: %s", strerror (errno));
+
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+write_smtp_reply (struct smtp_session *session)
+{
+ gchar logbuf[1024], *new_subject;
+ const gchar *old_subject;
+ struct smtp_metric_callback_data cd;
+ GMimeStream *stream;
+ gint old_fd, sublen;
+
+ /* Check metrics */
+ cd.session = session;
+ cd.action = METRIC_ACTION_NOACTION;
+ cd.res = NULL;
+ cd.log_buf = logbuf;
+ cd.log_offset = rspamd_snprintf (logbuf, sizeof (logbuf), "id: <%s>, qid: <%s>, ",
+ session->task->message_id, session->task->queue_id);
+ cd.log_size = sizeof (logbuf);
+ if (session->task->user) {
+ cd.log_offset += rspamd_snprintf (logbuf + cd.log_offset, sizeof (logbuf) - cd.log_offset,
+ "user: %s, ", session->task->user);
+ }
+
+ g_hash_table_foreach (session->task->results, smtp_metric_callback, &cd);
+
+ msg_info ("%s", logbuf);
+
+ if (cd.action <= METRIC_ACTION_REJECT) {
+ if (! rspamd_dispatcher_write (session->dispatcher, session->ctx->reject_message, 0, FALSE, TRUE)) {
+ return FALSE;
+ }
+ if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
+ return FALSE;
+ }
+ destroy_session (session->s);
+ return FALSE;
+ }
+ else if (cd.action <= METRIC_ACTION_ADD_HEADER || cd.action <= METRIC_ACTION_REWRITE_SUBJECT) {
+ old_fd = session->temp_fd;
+ if (! make_smtp_tempfile (session)) {
+ session->error = SMTP_ERROR_FILE;
+ session->state = SMTP_STATE_CRITICAL_ERROR;
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+ goto err;
+ }
+ destroy_session (session->s);
+ return FALSE;
+ }
+
+ if (cd.action <= METRIC_ACTION_REWRITE_SUBJECT) {
+ /* XXX: add this action */
+ old_subject = g_mime_message_get_subject (session->task->message);
+ if (old_subject != NULL) {
+ sublen = strlen (old_subject) + sizeof (SPAM_SUBJECT);
+ new_subject = rspamd_mempool_alloc (session->pool, sublen);
+ rspamd_snprintf (new_subject, sublen, "%s%s", SPAM_SUBJECT, old_subject);
+ }
+ else {
+ new_subject = SPAM_SUBJECT;
+ }
+ g_mime_message_set_subject (session->task->message, new_subject);
+ }
+ else if (cd.action <= METRIC_ACTION_ADD_HEADER) {
+#ifndef GMIME24
+ g_mime_message_add_header (session->task->message, "X-Spam", "true");
+#else
+ g_mime_object_append_header (GMIME_OBJECT (session->task->message), "X-Spam", "true");
+#endif
+ }
+ stream = g_mime_stream_fs_new (session->temp_fd);
+ g_mime_stream_fs_set_owner (GMIME_STREAM_FS (stream), FALSE);
+ close (old_fd);
+
+ if (g_mime_object_write_to_stream (GMIME_OBJECT (session->task->message), stream) == -1) {
+ msg_err ("cannot write MIME object to stream: %s", strerror (errno));
+ session->error = SMTP_ERROR_FILE;
+ session->state = SMTP_STATE_CRITICAL_ERROR;
+ rspamd_dispatcher_restore (session->dispatcher);
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+ goto err;
+ }
+ destroy_session (session->s);
+ return FALSE;
+ }
+ g_object_unref (stream);
+ }
+ /* XXX: Add other actions */
+ return smtp_send_upstream_message (session);
+err:
+ session->error = SMTP_ERROR_FILE;
+ session->state = SMTP_STATE_CRITICAL_ERROR;
+ if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) {
+ return FALSE;
+ }
+ destroy_session (session->s);
+ return FALSE;
+}
+
+gboolean
+parse_upstreams_line (rspamd_mempool_t *pool, struct smtp_upstream *upstreams, const gchar *line, gsize *count)
+{
+ gchar **strv, *p, *t, *tt, *err_str;
+ guint32 num, i;
+ struct smtp_upstream *cur;
+ gchar resolved_path[PATH_MAX];
+
+ strv = g_strsplit_set (line, ",; ", -1);
+ num = g_strv_length (strv);
+
+ if (num >= MAX_SMTP_UPSTREAMS) {
+ msg_err ("cannot define %d upstreams %d is max", num, MAX_SMTP_UPSTREAMS);
+ return FALSE;
+ }
+ *count = 0;
+
+ for (i = 0; i < num; i ++) {
+ p = strv[i];
+ cur = &upstreams[*count];
+ if ((t = strrchr (p, ':')) != NULL && (tt = strchr (p, ':')) != t) {
+ /* Assume that after last `:' we have weigth */
+ *t = '\0';
+ t ++;
+ errno = 0;
+ cur->up.priority = strtoul (t, &err_str, 10);
+ if (errno != 0 || (err_str && *err_str != '\0')) {
+ msg_err ("cannot convert weight: %s, %s", t, strerror (errno));
+ g_strfreev (strv);
+ return FALSE;
+ }
+ }
+ if (*p == '/') {
+ cur->is_unix = TRUE;
+ if (realpath (p, resolved_path) == NULL) {
+ msg_err ("cannot resolve path: %s", resolved_path);
+ g_strfreev (strv);
+ return FALSE;
+ }
+ cur->name = rspamd_mempool_strdup (pool, resolved_path);
+ (*count) ++;
+ }
+ else {
+ if (! parse_host_port (pool, p, &cur->addr, &cur->port)) {
+ g_strfreev (strv);
+ return FALSE;
+ }
+ cur->name = rspamd_mempool_strdup (pool, p);
+ (*count) ++;
+ }
+ }
+
+ g_strfreev (strv);
+ return TRUE;
+}
diff --git a/src/libmime/smtp_utils.h b/src/libmime/smtp_utils.h
new file mode 100644
index 000000000..652b6759f
--- /dev/null
+++ b/src/libmime/smtp_utils.h
@@ -0,0 +1,63 @@
+#ifndef SMTP_UTILS_H_
+#define SMTP_UTILS_H_
+
+#include "config.h"
+#include "main.h"
+#include "smtp.h"
+
+/**
+ * @file smtp_utils.h
+ * Contains utilities for smtp protocol handling
+ */
+
+struct smtp_upstream {
+ struct upstream up;
+
+ const gchar *name;
+ gchar *addr;
+ guint16 port;
+ gboolean is_unix;
+};
+
+#define MAX_SMTP_UPSTREAMS 128
+
+struct smtp_session;
+
+/**
+ * Send message to upstream
+ * @param session session object
+ */
+gboolean smtp_send_upstream_message (struct smtp_session *session);
+
+/**
+ * Create connection to upstream
+ * @param session session object
+ */
+gboolean create_smtp_upstream_connection (struct smtp_session *session);
+
+/**
+ * Create temporary file for smtp session
+ */
+gboolean make_smtp_tempfile (struct smtp_session *session);
+
+/**
+ * Write reply to upstream
+ * @param session session object
+ */
+gboolean write_smtp_reply (struct smtp_session *session);
+
+/**
+ * Frees smtp session object
+ */
+void free_smtp_session (gpointer arg);
+
+/**
+ * Parse upstreams line
+ * @param upstreams pointer to the array of upstreams (must be at least MAX_SMTP_UPSTREAMS size)
+ * @param line description line
+ * @param count targeted count
+ * @return
+ */
+gboolean parse_upstreams_line (rspamd_mempool_t *pool, struct smtp_upstream *upstreams, const gchar *line, gsize *count);
+
+#endif /* SMTP_UTILS_H_ */
diff --git a/src/libmime/worker_util.c b/src/libmime/worker_util.c
new file mode 100644
index 000000000..d029f5dc4
--- /dev/null
+++ b/src/libmime/worker_util.c
@@ -0,0 +1,255 @@
+/* Copyright (c) 2010-2011, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "message.h"
+#include "lua/lua_common.h"
+
+extern struct rspamd_main *rspamd_main;
+
+/**
+ * Return worker's control structure by its type
+ * @param type
+ * @return worker's control structure or NULL
+ */
+worker_t*
+get_worker_by_type (GQuark type)
+{
+ worker_t **cur;
+
+ cur = &workers[0];
+ while (*cur) {
+ if (g_quark_from_string ((*cur)->name) == type) {
+ return *cur;
+ }
+ cur ++;
+ }
+
+ return NULL;
+}
+
+double
+set_counter (const gchar *name, guint32 value)
+{
+ struct counter_data *cd;
+ double alpha;
+ gchar *key;
+
+ cd = rspamd_hash_lookup (rspamd_main->counters, (gpointer) name);
+
+ if (cd == NULL) {
+ cd = rspamd_mempool_alloc_shared (rspamd_main->counters->pool, sizeof (struct counter_data));
+ cd->value = value;
+ cd->number = 0;
+ key = rspamd_mempool_strdup_shared (rspamd_main->counters->pool, name);
+ rspamd_hash_insert (rspamd_main->counters, (gpointer) key, (gpointer) cd);
+ }
+ else {
+ /* Calculate new value */
+ rspamd_mempool_wlock_rwlock (rspamd_main->counters->lock);
+
+ alpha = 2. / (++cd->number + 1);
+ cd->value = cd->value * (1. - alpha) + value * alpha;
+
+ rspamd_mempool_wunlock_rwlock (rspamd_main->counters->lock);
+ }
+
+ return cd->value;
+}
+
+struct event_base *
+prepare_worker (struct rspamd_worker *worker, const char *name,
+ rspamd_sig_handler_t sig_handler,
+ void (*accept_handler)(int, short, void *))
+{
+ struct event_base *ev_base;
+ struct event *accept_event;
+ struct sigaction signals;
+ GList *cur;
+ gint listen_socket;
+
+#ifdef WITH_PROFILER
+ extern void _start (void), etext (void);
+ monstartup ((u_long) & _start, (u_long) & etext);
+#endif
+
+ gperf_profiler_init (worker->srv->cfg, name);
+
+ worker->srv->pid = getpid ();
+
+ ev_base = event_init ();
+
+ init_signals (&signals, sig_handler);
+ sigprocmask (SIG_UNBLOCK, &signals.sa_mask, NULL);
+
+ /* Accept all sockets */
+ cur = worker->cf->listen_socks;
+ while (cur) {
+ listen_socket = GPOINTER_TO_INT (cur->data);
+ if (listen_socket != -1) {
+ accept_event = g_slice_alloc0 (sizeof (struct event));
+ event_set (accept_event, listen_socket, EV_READ | EV_PERSIST,
+ accept_handler, worker);
+ event_base_set (ev_base, accept_event);
+ event_add (accept_event, NULL);
+ worker->accept_events = g_list_prepend (worker->accept_events, accept_event);
+ }
+ cur = g_list_next (cur);
+ }
+
+ return ev_base;
+}
+
+void
+worker_stop_accept (struct rspamd_worker *worker)
+{
+ GList *cur;
+ struct event *event;
+
+ /* Remove all events */
+ cur = worker->accept_events;
+ while (cur) {
+ event = cur->data;
+ event_del (event);
+ cur = g_list_next (cur);
+ g_slice_free1 (sizeof (struct event), event);
+ }
+
+ if (worker->accept_events != NULL) {
+ g_list_free (worker->accept_events);
+ }
+}
+
+/*
+ * Called if all filters are processed
+ * @return TRUE if session should be terminated
+ */
+gboolean
+rspamd_task_fin (void *arg)
+{
+ struct rspamd_task *task = (struct rspamd_task *) arg;
+ gint r;
+ GError *err = NULL;
+
+ /* Task is already finished or skipped */
+ if (task->state == WRITE_REPLY) {
+ if (task->fin_callback) {
+ task->fin_callback (task->fin_arg);
+ }
+ else {
+ rspamd_protocol_write_reply (task);
+ }
+ return TRUE;
+ }
+
+ /* We processed all filters and want to process statfiles */
+ if (task->state != WAIT_POST_FILTER && task->state != WAIT_PRE_FILTER) {
+ /* Process all statfiles */
+ if (task->classify_pool == NULL) {
+ /* Non-threaded version */
+ process_statfiles (task);
+ }
+ else {
+ /* Just process composites */
+ make_composites (task);
+ }
+ if (task->cfg->post_filters) {
+ /* More to process */
+ /* Special state */
+ task->state = WAIT_POST_FILTER;
+ return FALSE;
+ }
+
+ }
+
+ /* We are on post-filter waiting state */
+ if (task->state != WAIT_PRE_FILTER) {
+ /* Check if we have all events finished */
+ task->state = WRITE_REPLY;
+ if (task->fin_callback) {
+ task->fin_callback (task->fin_arg);
+ }
+ else {
+ rspamd_protocol_write_reply (task);
+ }
+ }
+ else {
+ /* We were waiting for pre-filter */
+ if (task->pre_result.action != METRIC_ACTION_NOACTION) {
+ /* Write result based on pre filters */
+ task->state = WRITE_REPLY;
+ if (task->fin_callback) {
+ task->fin_callback (task->fin_arg);
+ }
+ else {
+ rspamd_protocol_write_reply (task);
+ }
+ return TRUE;
+ }
+ else {
+ task->state = WAIT_FILTER;
+ r = process_filters (task);
+ if (r == -1) {
+ task->last_error = "Filter processing error";
+ task->error_code = RSPAMD_FILTER_ERROR;
+ task->state = WRITE_REPLY;
+ rspamd_protocol_write_reply (task);
+ return TRUE;
+ }
+ /* Add task to classify to classify pool */
+ if (!task->is_skipped && task->classify_pool) {
+ register_async_thread (task->s);
+ g_thread_pool_push (task->classify_pool, task, &err);
+ if (err != NULL) {
+ msg_err ("cannot pull task to the pool: %s", err->message);
+ remove_async_thread (task->s);
+ g_error_free (err);
+ }
+ }
+ if (task->is_skipped) {
+ rspamd_protocol_write_reply (task);
+ }
+ else {
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+/*
+ * Called if session was restored inside fin callback
+ */
+void
+rspamd_task_restore (void *arg)
+{
+ struct rspamd_task *task = (struct rspamd_task *) arg;
+
+ /* Call post filters */
+ if (task->state == WAIT_POST_FILTER) {
+ lua_call_post_filters (task);
+ }
+ task->s->wanna_die = TRUE;
+}