diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-03-23 14:10:07 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-03-23 14:10:07 +0300 |
commit | c79b5ccd22cbc1c273479f4f88189a18effda533 (patch) | |
tree | 1741743779a70146a61cd1767936aa43d671e36b /src/expressions.c | |
parent | afdaddc4d0745a5bcefad73dd74fd4c03ae3de15 (diff) | |
download | rspamd-c79b5ccd22cbc1c273479f4f88189a18effda533.tar.gz rspamd-c79b5ccd22cbc1c273479f4f88189a18effda533.zip |
* Fix error in expression parser that causes bad errors with expressions that have regexp at the end
* Improve test for fuzzy hashes
* Add new object - TextPart to perl XS library that allows access to stripped parts and fuzzy hashes
* Add documentation for expressions parser and fot Mail::Rspamd::TextPart
* Allways calculate fuzzy hash for text parts
* Store text parts separately from other parts
* Add compare_parts_distance for expressions that calculates difference in 2 parts messages
* Do not try to substitute variables in empty strings
Diffstat (limited to 'src/expressions.c')
-rw-r--r-- | src/expressions.c | 64 |
1 files changed, 60 insertions, 4 deletions
diff --git a/src/expressions.c b/src/expressions.c index 5cb30e4c3..eefd11f78 100644 --- a/src/expressions.c +++ b/src/expressions.c @@ -26,12 +26,15 @@ #include "util.h" #include "cfg_file.h" #include "main.h" +#include "message.h" +#include "fuzzy.h" #include "expressions.h" typedef gboolean (*rspamd_internal_func_t)(struct worker_task *, GList *args); gboolean rspamd_compare_encoding (struct worker_task *task, GList *args); gboolean rspamd_header_exists (struct worker_task *task, GList *args); +gboolean rspamd_parts_distance (struct worker_task *task, GList *args); /* * List of internal functions of rspamd * Sorted by name to use bsearch @@ -41,6 +44,7 @@ static struct _fl { rspamd_internal_func_t func; } rspamd_functions_list[] = { { "compare_encoding", rspamd_compare_encoding }, + { "compare_parts_distance", rspamd_parts_distance }, { "header_exists", rspamd_header_exists }, }; @@ -273,7 +277,9 @@ parse_expression (memory_pool_t *pool, char *line) case READ_REGEXP: if (*p == '/' && *(p - 1) != '\\') { - p ++; + if (*(p + 1)) { + p ++; + } state = READ_REGEXP_FLAGS; } else { @@ -285,14 +291,17 @@ parse_expression (memory_pool_t *pool, char *line) if (!is_regexp_flag (*p) || *(p + 1) == '\0') { if (c != p) { /* Copy operand */ - str = memory_pool_alloc (pool, p - c + 3); - g_strlcpy (str, c - 1, (p - c + 3)); + if (*(p + 1) == '\0') { + p++; + } + str = memory_pool_alloc (pool, p - c + 2); + g_strlcpy (str, c - 1, (p - c + 2)); g_strstrip (str); if (strlen (str) > 0) { insert_expression (pool, &expr, EXPR_REGEXP, 0, str); } } - c = ++p; + c = p; state = SKIP_SPACES; } else { @@ -594,5 +603,52 @@ rspamd_header_exists (struct worker_task *task, GList *args) } /* + * This function is designed to find difference between text/html and text/plain parts + * It takes one argument: difference threshold, if we have two text parts, compare + * its hashes and check for threshold, if value is greater than threshold, return TRUE + * and return FALSE otherwise. + */ +gboolean +rspamd_parts_distance (struct worker_task *task, GList *args) +{ + int threshold; + struct mime_text_part *p1, *p2; + GList *cur; + + if (args == NULL) { + msg_debug ("rspamd_parts_distance: no threshold is specified, assume it 100"); + threshold = 100; + } + else { + errno = 0; + threshold = strtoul ((char *)args->data, NULL, 10); + if (errno != 0) { + msg_info ("rspamd_parts_distance: bad numeric value for threshold \"%s\", assume it 100", (char *)args->data); + threshold = 100; + } + } + + if (g_list_length (task->text_parts) == 2) { + cur = g_list_first (task->text_parts); + p1 = cur->data; + cur = g_list_next (cur); + if (cur == NULL) { + msg_info ("rspamd_parts_distance: bad parts list"); + return FALSE; + } + p2 = cur->data; + if (fuzzy_compare_hashes (p1->fuzzy, p2->fuzzy) >= threshold) { + return TRUE; + } + } + else { + msg_debug ("rspamd_parts_distance: message has too many text parts, so do not try to compare them with each other"); + return FALSE; + } + + return FALSE; +} + +/* * vi:ts=4 */ |