From: Vsevolod Stakhov Date: Mon, 29 Jun 2009 15:32:31 +0000 (+0400) Subject: * Add views support (not completely tested yet) X-Git-Tag: 0.2.7~109 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=025f2000d515244e085cd82ac089d7f0271fc531;p=rspamd.git * Add views support (not completely tested yet) --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 94fcfdfd0..5dfe772c1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ PROJECT(rspamd C) SET(RSPAMD_VERSION_MAJOR 0) SET(RSPAMD_VERSION_MINOR 2) -SET(RSPAMD_VERSION_PATCH 1) +SET(RSPAMD_VERSION_PATCH 2) SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}") SET(RSPAMD_MASTER_SITE_URL "http://cebka.pp.ru/hg/rspamd") @@ -312,7 +312,9 @@ SET(RSPAMDSRC src/modules.c src/buffer.c src/html.c src/lmtp.c - src/lmtp_proto.c) + src/lmtp_proto.c + src/radix.c + src/view.c) IF(ENABLE_PERL MATCHES "ON") LIST(APPEND RSPAMDSRC src/perl.c) @@ -344,6 +346,7 @@ SET(TESTDEPENDS src/mem_pool.c src/hash.c src/url.c src/util.c + src/radix.c src/fuzzy.c src/memcached.c src/message.c @@ -361,7 +364,8 @@ SET(UTILSDEPENDS src/mem_pool.c src/expressions.c src/message.c src/html.c - src/util.c) + src/util.c + src/radix.c) LIST(LENGTH PLUGINSSRC RSPAMD_MODULES_NUM) diff --git a/rspamd.conf.sample b/rspamd.conf.sample index 23f31353a..290fa2513 100644 --- a/rspamd.conf.sample +++ b/rspamd.conf.sample @@ -294,3 +294,17 @@ raw_mode = yes; url_filters = "surbl"; header_filters = "regexp"; mime_filters = "chartable,emails"; + +# Definition of view, views may allow to customize rules for different messages +view { + # All directives here may be duplicated to add specific elements or regexp/files + # List of ip/mask for this view + ip = "file://@CMAKE_INSTALL_PREFIX@/etc/rspamd/ip_internal.inc"; + # From addresses for this view: + # list is placed in file: + #from = "file://@CMAKE_INSTALL_PREFIX@/etc/rspamd/from_internal.inc"; + # list is regexp: + #from = "/^.+@example.com$/i"; + # Symbols to check, can also be list of files or regexp: + symbols = "/^[A-Z]{2}_SURBL_MULTI$/i"; +}; diff --git a/src/cfg_file.h b/src/cfg_file.h index 1a02644f7..de3003306 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -219,6 +219,7 @@ struct config_file { GHashTable* statfiles; /**< hash of defined statfiles indexed by alias */ GHashTable* cfg_params; /**< all cfg params indexed by its name in this structure */ int clock_res; /**< resolution of clock used */ + GList *views; /**< views */ }; /** diff --git a/src/cfg_file.l b/src/cfg_file.l index b758c7eb6..a2589441e 100644 --- a/src/cfg_file.l +++ b/src/cfg_file.l @@ -38,6 +38,10 @@ composites return COMPOSITES; tempdir return TEMPDIR; pidfile return PIDFILE; +view return VIEW; +ip return IP; +from return FROM; +symbols return SYMBOLS; error_time return ERROR_TIME; dead_time return DEAD_TIME; diff --git a/src/cfg_file.y b/src/cfg_file.y index 0062cdeea..17854f22b 100644 --- a/src/cfg_file.y +++ b/src/cfg_file.y @@ -8,6 +8,7 @@ #include "expressions.h" #include "classifiers/classifiers.h" #include "tokenizers/tokenizers.h" +#include "view.h" #ifdef WITH_LUA #include "lua-rspamd.h" #else @@ -25,6 +26,8 @@ struct statfile *cur_statfile = NULL; struct statfile_section *cur_section = NULL; struct worker_conf *cur_worker = NULL; +struct rspamd_view *cur_view = NULL; + %} %union @@ -51,6 +54,7 @@ struct worker_conf *cur_worker = NULL; %token LOG_LEVEL LOG_LEVEL_DEBUG LOG_LEVEL_INFO LOG_LEVEL_WARNING LOG_LEVEL_ERROR LOG_FACILITY LOG_FILENAME %token STATFILE ALIAS PATTERN WEIGHT STATFILE_POOL_SIZE SIZE TOKENIZER CLASSIFIER %token DELIVERY LMTP ENABLED AGENT SECTION LUACODE RAW_MODE PROFILE_FILE COUNT +%token VIEW IP FROM SYMBOLS %type STRING %type VARIABLE @@ -93,6 +97,7 @@ command : | luacode | raw_mode | profile_file + | view ; tempdir : @@ -832,6 +837,62 @@ profile_file: } ; +view: + VIEW OBRACE viewbody EBRACE { + if (cur_view == NULL) { + yyerror ("yyparse: not enough arguments in view definition"); + YYERROR; + } + cfg->views = g_list_prepend (cfg->views, cur_view); + cur_view = NULL; + } + ; + +viewbody: + | viewcmd SEMICOLON + | viewbody viewcmd SEMICOLON + ; + +viewcmd: + | viewip + | viewfrom + | viewsymbols + ; + +viewip: + IP EQSIGN QUOTEDSTRING { + if (cur_view == NULL) { + cur_view = init_view (cfg->cfg_pool); + } + if (!add_view_ip (cur_view, $3)) { + yyerror ("yyparse: invalid ip line in view definition: ip = '%s'", $3); + YYERROR; + } + } + ; + +viewfrom: + FROM EQSIGN QUOTEDSTRING { + if (cur_view == NULL) { + cur_view = init_view (cfg->cfg_pool); + } + if (!add_view_from (cur_view, $3)) { + yyerror ("yyparse: invalid from line in view definition: from = '%s'", $3); + YYERROR; + } + } + ; +viewsymbols: + SYMBOLS EQSIGN QUOTEDSTRING { + if (cur_view == NULL) { + cur_view = init_view (cfg->cfg_pool); + } + if (!add_view_symbols (cur_view, $3)) { + yyerror ("yyparse: invalid symbols line in view definition: symbols = '%s'", $3); + YYERROR; + } + } + ; %% /* * vi:ts=4 diff --git a/src/main.h b/src/main.h index c653964b7..c929e6cb7 100644 --- a/src/main.h +++ b/src/main.h @@ -81,6 +81,7 @@ struct config_file; struct tokenizer; struct classifier; struct mime_part; +struct rspamd_view; /** * Server statistics @@ -198,6 +199,8 @@ struct worker_task { int error_code; /**< code of last error */ memory_pool_t *task_pool; /**< memory pool for task */ struct timespec ts; /**< time of connection */ + struct rspamd_view *view; /**< matching view */ + gboolean view_checked; }; /** diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index d05912f57..bb0f79da7 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -32,6 +32,7 @@ #include "../modules.h" #include "../cfg_file.h" #include "../expressions.h" +#include "../view.h" #define DEFAULT_SYMBOL "R_CHARSET_MIXED" #define DEFAULT_THRESHOLD 0.1 @@ -182,12 +183,14 @@ chartable_mime_filter (struct worker_task *task) { GList *cur; - cur = g_list_first (task->text_parts); - while (cur) { - if (check_part ((struct mime_text_part *)cur->data, task->cfg->raw_mode)) { - insert_result (task, chartable_module_ctx->metric, chartable_module_ctx->symbol, 1, NULL); + if (check_view (task->cfg->views, chartable_module_ctx->symbol, task)) { + cur = g_list_first (task->text_parts); + while (cur) { + if (check_part ((struct mime_text_part *)cur->data, task->cfg->raw_mode)) { + insert_result (task, chartable_module_ctx->metric, chartable_module_ctx->symbol, 1, NULL); + } + cur = g_list_next (cur); } - cur = g_list_next (cur); } return 0; diff --git a/src/plugins/emails.c b/src/plugins/emails.c index 2e2dcac63..67a641ab5 100644 --- a/src/plugins/emails.c +++ b/src/plugins/emails.c @@ -33,6 +33,7 @@ #include "../cfg_file.h" #include "../expressions.h" #include "../util.h" +#include "../view.h" #define DEFAULT_SYMBOL "R_BAD_EMAIL" @@ -204,16 +205,18 @@ emails_mime_filter (struct worker_task *task) emails = extract_emails (task); - if (email_module_ctx->blacklist && emails) { - cur = g_list_first (emails); + if (check_view (task->cfg->views, email_module_ctx->symbol, task)) { + if (email_module_ctx->blacklist && emails) { + cur = g_list_first (emails); - while (cur) { - if (g_hash_table_lookup (email_module_ctx->blacklist, cur->data) != NULL) { - insert_result (task, email_module_ctx->metric, email_module_ctx->symbol, 1, - g_list_prepend (NULL, memory_pool_strdup (task->task_pool, (char *)cur->data))); - + while (cur) { + if (g_hash_table_lookup (email_module_ctx->blacklist, cur->data) != NULL) { + insert_result (task, email_module_ctx->metric, email_module_ctx->symbol, 1, + g_list_prepend (NULL, memory_pool_strdup (task->task_pool, (char *)cur->data))); + + } + cur = g_list_next (cur); } - cur = g_list_next (cur); } } diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index 5a1a9cd27..b856aac93 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -36,6 +36,7 @@ #include "../cfg_file.h" #include "../util.h" #include "../expressions.h" +#include "../view.h" #define DEFAULT_STATFILE_PREFIX "./" @@ -591,28 +592,29 @@ process_regexp_item (struct regexp_module_item *item, struct worker_task *task) struct timespec ts1, ts2; uint64_t diff; + if (check_view (task->cfg->views, item->symbol, task)) { #ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID - clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts1); + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts1); #elif defined(HAVE_CLOCK_VIRTUAL) - clock_gettime (CLOCK_VIRTUAL, &ts1); + clock_gettime (CLOCK_VIRTUAL, &ts1); #else - clock_gettime (CLOCK_REALTIME, &ts1); + clock_gettime (CLOCK_REALTIME, &ts1); #endif - - if (process_regexp_expression (item->expr, task)) { - insert_result (task, regexp_module_ctx->metric, item->symbol, 1, NULL); - } + if (process_regexp_expression (item->expr, task)) { + insert_result (task, regexp_module_ctx->metric, item->symbol, 1, NULL); + } #ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID - clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts2); + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts2); #elif defined(HAVE_CLOCK_VIRTUAL) - clock_gettime (CLOCK_VIRTUAL, &ts2); + clock_gettime (CLOCK_VIRTUAL, &ts2); #else - clock_gettime (CLOCK_REALTIME, &ts2); + clock_gettime (CLOCK_REALTIME, &ts2); #endif - diff = (ts2.tv_sec - ts1.tv_sec) * 1000000 + (ts2.tv_nsec - ts1.tv_nsec) / 1000; - set_counter (item->symbol, diff); + diff = (ts2.tv_sec - ts1.tv_sec) * 1000000 + (ts2.tv_nsec - ts1.tv_nsec) / 1000; + set_counter (item->symbol, diff); + } } static int diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 486934fea..092bd383d 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -29,6 +29,7 @@ #include "../config.h" #include "../util.h" #include "../message.h" +#include "../view.h" #include #include "surbl.h" @@ -371,27 +372,32 @@ make_surbl_requests (struct uri* url, struct worker_task *task, GTree *tree) while (cur) { suffix = (struct suffix_item *)cur->data; - if ((surbl_req = format_surbl_request (task->task_pool, &f, suffix, &host_end, TRUE, &err)) != NULL) { - if (g_tree_lookup (tree, surbl_req) == NULL) { - g_tree_insert (tree, surbl_req, surbl_req); - param = memory_pool_alloc (task->task_pool, sizeof (struct dns_param)); - param->url = url; - param->task = task; - param->suffix = suffix; - *host_end = '\0'; - param->host_resolve = memory_pool_strdup (task->task_pool, surbl_req); - *host_end = '.'; - msg_debug ("surbl_test_url: send surbl dns request %s", surbl_req); - evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param); - param->task->save.saved ++; + if (check_view (task->cfg->views, suffix->symbol, task)) { + if ((surbl_req = format_surbl_request (task->task_pool, &f, suffix, &host_end, TRUE, &err)) != NULL) { + if (g_tree_lookup (tree, surbl_req) == NULL) { + g_tree_insert (tree, surbl_req, surbl_req); + param = memory_pool_alloc (task->task_pool, sizeof (struct dns_param)); + param->url = url; + param->task = task; + param->suffix = suffix; + *host_end = '\0'; + param->host_resolve = memory_pool_strdup (task->task_pool, surbl_req); + *host_end = '.'; + msg_debug ("surbl_test_url: send surbl dns request %s", surbl_req); + evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param); + param->task->save.saved ++; + } + else { + msg_debug ("make_surbl_requests: request %s is already sent", surbl_req); + } } - else { - msg_debug ("make_surbl_requests: request %s is already sent", surbl_req); + else if (err != NULL && err->code != WHITELIST_ERROR) { + msg_info ("surbl_test_url: cannot format url string for surbl %s, %s", struri (url), err->message); + return; } } - else if (err != NULL && err->code != WHITELIST_ERROR) { - msg_info ("surbl_test_url: cannot format url string for surbl %s, %s", struri (url), err->message); - return; + else { + msg_debug ("make_surbl_requests: skipping symbol that is not in view: %s", suffix->symbol); } cur = g_list_next (cur); } diff --git a/src/radix.c b/src/radix.c new file mode 100644 index 000000000..8b9df48a7 --- /dev/null +++ b/src/radix.c @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2009, Rambler media + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "radix.h" +#include "mem_pool.h" + +static void *radix_alloc (radix_tree_t *tree); + +radix_tree_t * +radix_tree_create (memory_pool_t *pool) +{ + radix_tree_t *tree; + + tree = memory_pool_alloc (pool, sizeof(radix_tree_t)); + if (tree == NULL) { + return NULL; + } + + tree->size = 0; + + tree->root = radix_alloc (tree); + if (tree->root == NULL) { + return NULL; + } + + tree->root->right = NULL; + tree->root->left = NULL; + tree->root->parent = NULL; + tree->root->value = RADIX_NO_VALUE; + tree->pool = pool; + + return tree; +} + + +int +radix32tree_insert (radix_tree_t *tree, uint32_t key, uint32_t mask, + unsigned char value) +{ + uint32_t bit; + radix_node_t *node, *next; + + bit = 0x80000000; + + node = tree->root; + next = tree->root; + /* Find a place in trie to insert */ + while (bit & mask) { + if (key & bit) { + next = node->right; + + } else { + next = node->left; + } + + if (next == NULL) { + break; + } + + bit >>= 1; + node = next; + } + + if (next) { + if (node->value != RADIX_NO_VALUE) { + return 1; + } + + node->value = value; + return 0; + } + /* Inserting value in trie creating all path components */ + while (bit & mask) { + next = radix_alloc(tree); + if (next == NULL) { + return -1; + } + + next->right = NULL; + next->left = NULL; + next->parent = node; + next->value = RADIX_NO_VALUE; + + if (key & bit) { + node->right = next; + + } else { + node->left = next; + } + + bit >>= 1; + node = next; + } + + node->value = value; + + return 0; +} + + +int +radix32tree_delete (radix_tree_t *tree, uint32_t key, uint32_t mask) +{ + uint32_t bit; + radix_node_t *node; + radix_node_t *tmp; + + bit = 0x80000000; + node = tree->root; + + while (node && (bit & mask)) { + if (key & bit) { + node = node->right; + + } else { + node = node->left; + } + + bit >>= 1; + } + + if (node == NULL || node->parent == NULL) { + return -1; + } + + if (node->right || node->left) { + if (node->value != RADIX_NO_VALUE) { + node->value = RADIX_NO_VALUE; + return 0; + } + + return -1; + } + + for ( ;; ) { + if (node->parent->right == node) { + node->parent->right = NULL; + + } else { + node->parent->left = NULL; + } + + tmp = node; + node = node->parent; + + if (node->right || node->left) { + break; + } + + if (node->value != RADIX_NO_VALUE) { + break; + } + + if (node->parent == NULL) { + break; + } + } + + return 0; +} + + +unsigned char +radix32tree_find (radix_tree_t *tree, uint32_t key) +{ + uint32_t bit; + uintptr_t value; + radix_node_t *node; + + bit = 0x80000000; + value = RADIX_NO_VALUE; + node = tree->root; + + while (node) { + if (node->value != RADIX_NO_VALUE) { + value = node->value; + } + + if (key & bit) { + node = node->right; + + } else { + node = node->left; + } + + bit >>= 1; + } + + return value; +} + + +static void * +radix_alloc (radix_tree_t *tree) +{ + char *p; + + p = memory_pool_alloc (tree->pool, sizeof(radix_node_t)); + + tree->size += sizeof (radix_node_t); + + return p; +} + +void +radix_tree_free (radix_tree_t *tree) +{ + radix_node_t *node, *tmp; + + node = tree->root; + + for (;;) { + /* We are at the trie root and we have no more leaves, end of algorithm */ + if (!node->left && !node->right && !node->parent) { + break; + } + + /* Traverse to the end of trie */ + while (node->left || node->right) { + if (node->left) { + node = node->left; + } + else { + node = node->right; + } + } + /* Found leaf node, free it */ + if (node->parent->right == node) { + node->parent->right = NULL; + + } else { + node->parent->left = NULL; + } + + tmp = node; + /* Go up */ + node = node->parent; + } +} + +/* + * vi:ts=4 + */ diff --git a/src/radix.h b/src/radix.h new file mode 100644 index 000000000..46c6adb05 --- /dev/null +++ b/src/radix.h @@ -0,0 +1,32 @@ +#ifndef RADIX_H +#define RADIX_H + +#include "config.h" +#include "mem_pool.h" + +#define RADIX_NO_VALUE (unsigned char)-1 + +typedef struct radix_node_s radix_node_t; + +struct radix_node_s { + radix_node_t *right; + radix_node_t *left; + radix_node_t *parent; + unsigned char value; +}; + + +typedef struct { + radix_node_t *root; + size_t size; + memory_pool_t *pool; +} radix_tree_t; + + +radix_tree_t *radix_tree_create (); +int radix32tree_insert (radix_tree_t *tree, uint32_t key, uint32_t mask, unsigned char value); +int radix32tree_delete (radix_tree_t *tree, uint32_t key, uint32_t mask); +unsigned char radix32tree_find (radix_tree_t *tree, uint32_t key); +void radix_tree_free (radix_tree_t *tree); + +#endif diff --git a/src/util.c b/src/util.c index c90427935..76c9c31a8 100644 --- a/src/util.c +++ b/src/util.c @@ -878,8 +878,10 @@ set_counter (const char *name, long int value) } } -gboolean -parse_host_list (memory_pool_t *pool, GHashTable *tbl, const char *filename) +typedef void (*insert_func)(gpointer st, gconstpointer key, gpointer value); + +static gboolean +abstract_parse_list (memory_pool_t *pool, void *arg, insert_func func, const char *filename) { int fd; char buf[BUFSIZ], str[BUFSIZ], *s, *p; @@ -917,7 +919,7 @@ parse_host_list (memory_pool_t *pool, GHashTable *tbl, const char *filename) if (s != str) { *s = '\0'; s = memory_pool_strdup (pool, str); - g_hash_table_insert (tbl, s, hash_fill); + func (arg, s, hash_fill); s = str; } state = SKIP_COMMENT; @@ -926,7 +928,7 @@ parse_host_list (memory_pool_t *pool, GHashTable *tbl, const char *filename) if (s != str) { *s = '\0'; s = memory_pool_strdup (pool, str); - g_hash_table_insert (tbl, s, hash_fill); + func (arg, s, hash_fill); s = str; } while (*p == '\r' || *p == '\n') { @@ -963,6 +965,59 @@ parse_host_list (memory_pool_t *pool, GHashTable *tbl, const char *filename) return TRUE; } +static void +radix_tree_insert_helper (gpointer st, gconstpointer key, gpointer value) +{ + radix_tree_t *tree = st; + + uint32_t mask = 0xFFFFFFFF; + uint32_t ip; + char *token, *ipnet; + struct in_addr ina; + int k; + + k = strlen ((char *)key) + 1; + ipnet = alloca (k); + g_strlcpy (ipnet, key, k); + token = strsep (&ipnet, "/"); + + if (ipnet != NULL) { + k = atoi (ipnet); + if (k > 32 || k < 0) { + msg_warn ("radix_tree_insert_helper: invalid netmask value: %d", k); + k = 32; + } + k = 32 - k; + mask = mask << k; + } + + if (inet_aton (token, &ina) == 0) { + msg_err ("radix_tree_insert_helper: invalid ip address: %s", token); + return; + } + + ip = ntohl ((uint32_t)ina.s_addr); + k = radix32tree_insert (tree, ip, mask, 1); + if (k == -1) { + msg_warn ("radix_tree_insert_helper: cannot insert ip to tree: %s, mask %X", inet_ntoa (ina), mask); + } + else if (k == 1) { + msg_warn ("add_ip_radix: ip %s, mask %X, value already exists", inet_ntoa (ina), mask); + } +} + +gboolean +parse_host_list (memory_pool_t *pool, GHashTable *tbl, const char *filename) +{ + return abstract_parse_list (pool, (void *)tbl, (insert_func)g_hash_table_insert, filename); +} + +gboolean +parse_radix_list (memory_pool_t *pool, radix_tree_t *tree, const char *filename) +{ + return abstract_parse_list (pool, (void *)tree, (insert_func)radix_tree_insert_helper, filename); +} + gboolean maybe_parse_host_list (memory_pool_t *pool, GHashTable *tbl, const char *filename) { @@ -990,6 +1045,31 @@ maybe_parse_host_list (memory_pool_t *pool, GHashTable *tbl, const char *filenam return TRUE; } +#ifndef g_tolower +#define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x)) +#endif + +gint +rspamd_strcase_equal (gconstpointer v, gconstpointer v2) +{ + return g_ascii_strcasecmp ((const char *) v, (const char *) v2) == 0; +} + + +guint +rspamd_strcase_hash (gconstpointer key) +{ + const char *p = key; + guint h = 0; + + while (*p != '\0') { + h = (h << 5) - h + g_tolower (*p); + p++; + } + + return h; +} + /* * vi:ts=4 */ diff --git a/src/util.h b/src/util.h index 4bea3d08e..b657316ad 100644 --- a/src/util.h +++ b/src/util.h @@ -3,6 +3,7 @@ #include "config.h" #include "mem_pool.h" +#include "radix.h" struct config_file; struct rspamd_main; @@ -65,6 +66,10 @@ void set_counter (const char *name, long int value); gboolean parse_host_list (memory_pool_t *pool, GHashTable *tbl, const char *filename); gboolean maybe_parse_host_list (memory_pool_t *pool, GHashTable *tbl, const char *filename); +gboolean parse_radix_list (memory_pool_t *pool, radix_tree_t *tree, const char *filename); + +guint rspamd_strcase_hash (gconstpointer key); +gint rspamd_strcase_equal (gconstpointer v, gconstpointer v2); #endif diff --git a/src/view.c b/src/view.c new file mode 100644 index 000000000..aeec8cd51 --- /dev/null +++ b/src/view.c @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2009, Rambler media + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "util.h" +#include "view.h" +#include "expressions.h" +#include "cfg_file.h" + +struct rspamd_view* +init_view (memory_pool_t *pool) +{ + struct rspamd_view *new; + + new = memory_pool_alloc0 (pool, sizeof (struct rspamd_view)); + + new->pool = pool; + new->from_hash = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); + new->symbols_hash = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); + + memory_pool_add_destructor (new->pool, (pool_destruct_func)g_hash_table_destroy, new->from_hash); + memory_pool_add_destructor (new->pool, (pool_destruct_func)g_hash_table_destroy, new->symbols_hash); + + return new; +} + +gboolean +add_view_from (struct rspamd_view *view, char *line) +{ + struct rspamd_regexp *re = NULL; + + if (g_ascii_strncasecmp (line, "file://", sizeof ("file://") - 1) == 0) { + if (parse_host_list (view->pool, view->from_hash, line + sizeof ("file://") - 1)) { + return TRUE; + } + } + else if ((re = parse_regexp (view->pool, line, TRUE)) != NULL) { + view->from_re_list = g_list_prepend (view->from_re_list, re); + return TRUE; + } + + return FALSE; +} + +gboolean +add_view_symbols (struct rspamd_view *view, char *line) +{ + struct rspamd_regexp *re = NULL; + + if (g_ascii_strncasecmp (line, "file://", sizeof ("file://") - 1) == 0) { + if (parse_host_list (view->pool, view->symbols_hash, line + sizeof ("file://") - 1)) { + return TRUE; + } + } + else if ((re = parse_regexp (view->pool, line, TRUE)) != NULL) { + view->symbols_re_list = g_list_prepend (view->symbols_re_list, re); + return TRUE; + } + else { + /* Try to parse symbols line as comma separated list */ + + } + + return FALSE; + +} + +gboolean +add_view_ip (struct rspamd_view *view, char *line) +{ + if (g_ascii_strncasecmp (line, "file://", sizeof ("file://") - 1) == 0) { + if (parse_radix_list (view->pool, view->ip_tree, line + sizeof ("file://") - 1)) { + return TRUE; + } + } + + return FALSE; + +} + + +struct rspamd_view * +find_view_by_ip (GList *views, struct worker_task *task) +{ + GList *cur; + struct rspamd_view *v; + + if (task->from_addr.s_addr == INADDR_NONE) { + return NULL; + } + + cur = views; + while (cur) { + v = cur->data; + if (radix32tree_find (v->ip_tree, task->from_addr.s_addr) != RADIX_NO_VALUE) { + return v; + } + cur = g_list_next (cur); + } + + return NULL; +} + +struct rspamd_view * +find_view_by_from (GList *views, struct worker_task *task) +{ + GList *cur, *cur_re; + struct rspamd_view *v; + struct rspamd_regexp *re; + + if (task->from == NULL) { + return NULL; + } + + cur = views; + while (cur) { + v = cur->data; + /* First try to lookup in hashtable */ + if (g_hash_table_lookup (v->from_hash, task->from) != NULL) { + return v; + } + /* Then try to match re */ + cur_re = v->from_re_list; + + while (cur_re) { + re = cur_re->data; + if (g_regex_match (re->regexp, task->from, 0, NULL) == TRUE) { + return v; + } + cur_re = g_list_next (cur_re); + } + cur = g_list_next (cur); + } + + return NULL; +} + +static gboolean +match_view_symbol (struct rspamd_view *v, const char *symbol) +{ + GList *cur; + struct rspamd_regexp *re; + + /* First try to lookup in hashtable */ + if (g_hash_table_lookup (v->symbols_hash, symbol) != NULL) { + return TRUE; + } + /* Then try to match re */ + cur = v->symbols_re_list; + + while (cur) { + re = cur->data; + if (g_regex_match (re->regexp, symbol, 0, NULL) == TRUE) { + return TRUE; + } + cur = g_list_next (cur); + } + + return FALSE; +} + +gboolean +check_view (GList *views, const char *symbol, struct worker_task *task) +{ + struct rspamd_view *selected = NULL; + + + if (views == NULL || (task->view == NULL && task->view_checked == TRUE)) { + /* If now views defined just return TRUE to check each symbol */ + return TRUE; + } + + if (task->view != NULL) { + goto check_symbol; + } + + if ((selected = find_view_by_ip (views, task)) == NULL) { + if ((selected = find_view_by_from (views, task)) == NULL) { + /* No matching view for this task */ + task->view_checked = TRUE; + return TRUE; + } + } + + task->view_checked = TRUE; + task->view = selected; + +check_symbol: + /* selected is now not NULL */ + if (match_view_symbol (task->view, symbol)) { + return TRUE; + } + + return FALSE; +} diff --git a/src/view.h b/src/view.h new file mode 100644 index 000000000..d27336254 --- /dev/null +++ b/src/view.h @@ -0,0 +1,28 @@ +#ifndef RSPAMD_VIEW_H +#define RSPAMD_VIEW_H + +#include "config.h" +#include "main.h" +#include "radix.h" + +struct rspamd_view { + GList *from_re_list; + GHashTable *from_hash; + + radix_tree_t *ip_tree; + + GHashTable *symbols_hash; + GList *symbols_re_list; + + memory_pool_t *pool; +}; + +struct rspamd_view* init_view (memory_pool_t *pool); + +gboolean add_view_from (struct rspamd_view *view, char *line); +gboolean add_view_ip (struct rspamd_view *view, char *line); +gboolean add_view_symbols (struct rspamd_view *view, char *line); + +gboolean check_view (GList *views, const char *symbol, struct worker_task *task); + +#endif diff --git a/src/worker.c b/src/worker.c index f6846f6f7..01732135c 100644 --- a/src/worker.c +++ b/src/worker.c @@ -287,6 +287,8 @@ accept_socket (int fd, short what, void *arg) new_task->state = READ_COMMAND; new_task->sock = nfd; new_task->cfg = worker->srv->cfg; + new_task->from_addr.s_addr = INADDR_NONE; + new_task->view_checked = FALSE; #ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &new_task->ts); #elif defined(HAVE_CLOCK_VIRTUAL)