From b8211fbcc8f7de342d4a0176bedf182e1f37397d Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 20 Dec 2010 22:09:16 +0300 Subject: [PATCH] * Introduce new system of configuration checks: - now symbols inside metrics definition must be inside rules as well - symbols may be virtual (e.g. when module can insert several symbols inside callback) - symbols may be pure callbacks (when symbol's name is unknown and depends on conditions) * Module 'emails' is removed as it is not used in the current rspamd MANY fixes to sample config files --- CMakeLists.txt | 1 - conf/lua/regexp/headers.lua | 1 - rspamd.xml.sample | 140 ++++++++-------- src/cfg_file.h | 1 + src/cfg_utils.c | 14 ++ src/cfg_xml.c | 26 +-- src/lua/lua_config.c | 45 ++++- src/main.c | 25 ++- src/plugins/emails.c | 231 -------------------------- src/plugins/fuzzy_check.c | 11 +- src/plugins/lua/forged_recipients.lua | 5 +- src/plugins/lua/multimap.lua | 22 ++- src/plugins/lua/once_received.lua | 1 + src/plugins/spf.c | 2 + src/plugins/surbl.c | 40 ++++- src/symbols_cache.c | 138 ++++++++++++++- src/symbols_cache.h | 26 +++ 17 files changed, 395 insertions(+), 334 deletions(-) delete mode 100644 src/plugins/emails.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b685e3e4..7ad2d777a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -573,7 +573,6 @@ SET(CLASSIFIERSSRC src/classifiers/classifiers.c SET(PLUGINSSRC src/plugins/surbl.c src/plugins/regexp.c src/plugins/chartable.c - src/plugins/emails.c src/plugins/fuzzy_check.c src/plugins/spf.c) diff --git a/conf/lua/regexp/headers.lua b/conf/lua/regexp/headers.lua index 4c415a2f3..9a40995f1 100644 --- a/conf/lua/regexp/headers.lua +++ b/conf/lua/regexp/headers.lua @@ -147,7 +147,6 @@ reconf['MIME_HEADER_CTYPE_ONLY'] = string.format('!(%s) & !(%s) & (%s) & !(%s) & local msgid_dollars_ok = 'Message-Id=/[0-9a-f]{4,}\\$[0-9a-f]{4,}\\$[0-9a-f]{4,}\\@\\S+/Hr' local mimeole_ms = 'X-MimeOLE=/^Produced By Microsoft MimeOLE/H' local rcvd_with_exchange = 'Received=/with Microsoft Exchange Server/H' -reconf['R_MUA_EXCHANGE'] = 'X-MimeOLE=/Microsoft Exchange/H' reconf['RATWARE_MS_HASH'] = string.format('(%s) & !(%s) & !(%s)', msgid_dollars_ok, mimeole_ms, rcvd_with_exchange) -- Reply-type in content-type diff --git a/rspamd.xml.sample b/rspamd.xml.sample index 494b7baf1..b56820b51 100644 --- a/rspamd.xml.sample +++ b/rspamd.xml.sample @@ -33,12 +33,8 @@ reject greylist:5 add_header:5 - R_SPAM_FROM_MTU - R_WWW_EKONF_COM - R_TINYURL MISSING_SUBJECT FORGED_OUTLOOK_TAGS - R_FAKE_THEBAT FORGED_SENDER DRUGS_MANYKINDS ADVANCE_FEE_2 @@ -54,27 +50,27 @@ RCVD_DOUBLE_IP_SPAM OB_SURBL_MULTI FORGED_OUTLOOK_HTML - HTML_MIME_NO_HTML_TAG - R_BAD_EMAIL - R_SPAM_FROM_LIBERO WHITELIST_IP R_UNDISC_RCPT DRUGS_ANXIETY + DRUGS_MUSCLE DRUGS_ANXIETY_EREC PH_SURBL_MULTI R_WHITE_ON_WHITE - FAKE_HTML - R_SPAM_FROM_VERSATEL HTML_SHORT_LINK_IMG_2 FORGED_MUA_OUTLOOK - R_FREE_HOSTING DRUGS_ERECTILE - R_FREE_HOSTING_NAROD - R_SPAM_FROM_ONO FM_FAKE_HELO_VERIZON REPTO_QUOTE_YAHOO MISSING_MIMEOLE - RAMBLER_URIBL + RAMBLER_URIBL + MISSING_TO + FROM_EXCESS_BASE64 + FROM_WORLDBANK + FROM_CBR + FROM_CSHOP + FROM_MIRHOSTING + FROM_PASSIFLORA R_SPAM_FROM_VALUEHOST R_MIXED_CHARSET SORTED_RECIPS @@ -83,25 +79,41 @@ R_TO_SEEMS_AUTO SUBJECT_NEEDS_ENCODING TRACKER_ID - KAM_LOTTO1 + R_LOTTO R_NO_SPACE_IN_FROM R_SAJDING R_BAD_CTE_7BIT WS_SURBL_MULTI - R_POCHTA_RU R_FLASH_REDIR_IMGSHACK INVALID_MSGID - R_FORGED_MPOP_WEBMAIL MISSING_MID DRUGS_DIET FORGED_RECIPIENTS RATWARE_MS_HASH - HTML_TAG_BALANCE_HEAD STOX_REPLY_TYPE BAYES_SPAM BAYES_HAM + R_FUZZY + R_FUZZY1 + R_FUZZY2 + R_FUZZY3 + + R_SPF_FAIL + R_SPF_SOFTFAIL + R_SPF_ALLOW + + MAILLIST + + R_IP_PBL + + + ONCE_RECEIVED + ONCE_RECEIVED_STRICT + + RECEIVED_RBL + + R_PARTS_DIFFER MIME_HEADER_CTYPE_ONLY - R_FAKE_OUTLOOK @@ -116,8 +128,8 @@ 2048 0 - /tmp/fuzzy.db - yes + /tmp/fuzzy.db + yes controller @@ -126,7 +138,7 @@ 2048 0 - q1 + q1 normal @@ -141,82 +153,79 @@ - - - - - - - + localhost:11335 + R_FUZZY + 300 + 10 + application/pdf + 1:R_FUZZY1:10,2:R_FUZZY2:5,3:R_FUZZY3:-2.1 - - + FORGED_SENDER + FORGED_RECIPIENTS - + MAILLIST - - - - - - - - - - + file://@ETC_PREFIX@/rspamd/surbl-whitelist.inc + file://@ETC_PREFIX@/rspamd/2tld.inc + JP + AB + OB + PH + WS + SC + uribl.rambler.ru - - - + 10s + 1s + localhost:8080 - - - - + RECEIVED_RBL + pbl.spamhaus.org + xbl.spamhaus.org + insecure-bl.rambler.ru - - + http://cebka.pp.ru/stuff/grey_whitelist.conf + WHITELIST_IP - - - + 0.1 + R_MIXED_CHARSET - - - - - + mail + static + dynamic + ONCE_RECEIVED_STRICT + ONCE_RECEIVED - + type = dnsbl, map = pbl.spamhaus.org, symbol = R_IP_PBL @@ -227,7 +236,7 @@ osb-text default - + 20 WINNOW_HAM 100M @@ -244,8 +253,8 @@ osb-text default - - + 10 + 0.2 BAYES_HAM 10M @@ -266,8 +275,7 @@ osb-text default - - + 10 BAYES_HAM 10M diff --git a/src/cfg_file.h b/src/cfg_file.h index 29dd35200..81cfe65cc 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -417,6 +417,7 @@ struct metric* check_metric_conf (struct config_file *cfg, struct metric *c); gboolean parse_normalizer (struct config_file *cfg, struct statfile *st, const gchar *line); gboolean read_xml_config (struct config_file *cfg, const gchar *filename); gboolean check_modules_config (struct config_file *cfg); +void insert_classifier_symbols (struct config_file *cfg); #endif /* ifdef CFG_FILE_H */ /* diff --git a/src/cfg_utils.c b/src/cfg_utils.c index 334283d22..2685d1281 100644 --- a/src/cfg_utils.c +++ b/src/cfg_utils.c @@ -983,6 +983,20 @@ check_modules_config (struct config_file *cfg) return res; } +static void +symbols_classifiers_callback (gpointer key, gpointer value, gpointer ud) +{ + struct config_file *cfg = ud; + + register_virtual_symbol (&cfg->cache, key, 1.0); +} + +void +insert_classifier_symbols (struct config_file *cfg) +{ + g_hash_table_foreach (cfg->classifiers_symbols, symbols_classifiers_callback, cfg); +} + /* * vi:ts=4 */ diff --git a/src/cfg_xml.c b/src/cfg_xml.c index 7fd096334..389298e4a 100644 --- a/src/cfg_xml.c +++ b/src/cfg_xml.c @@ -586,7 +586,7 @@ handle_log_type (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHash gchar *val; if (g_ascii_strcasecmp (data, "file") == 0) { /* Find filename attribute */ - if ((val = g_hash_table_lookup (attrs, "filename")) == NULL) { + if (attrs == NULL || (val = g_hash_table_lookup (attrs, "filename")) == NULL) { msg_err ("cannot log to file that is not specified"); return FALSE; } @@ -597,7 +597,7 @@ handle_log_type (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHash cfg->log_type = RSPAMD_LOG_CONSOLE; } else if (g_ascii_strcasecmp (data, "syslog") == 0) { - if ((val = g_hash_table_lookup (attrs, "facility")) == NULL) { + if (attrs == NULL || (val = g_hash_table_lookup (attrs, "facility")) == NULL) { msg_err ("cannot log to syslog when facility is not specified"); return FALSE; } @@ -688,13 +688,13 @@ worker_handle_param (struct config_file *cfg, struct rspamd_xml_userdata *ctx, c GHashTable *worker_config; if (g_ascii_strcasecmp (tag, "option") == 0 || g_ascii_strcasecmp (tag, "param") == 0) { - if ((name = g_hash_table_lookup (attrs, "name")) == NULL) { + if (attrs == NULL || (name = g_hash_table_lookup (attrs, "name")) == NULL) { msg_err ("worker param tag must have \"name\" attribute"); return FALSE; } } else { - name = tag; + name = memory_pool_strdup (cfg->cfg_pool, tag); } if (!worker_options || @@ -805,7 +805,7 @@ handle_metric_symbol (struct config_file *cfg, struct rspamd_xml_userdata *ctx, struct metric *metric = ctx->section_pointer; value = memory_pool_alloc (cfg->cfg_pool, sizeof (double)); - if ((strval = g_hash_table_lookup (attrs, "weight")) == NULL) { + if (attrs == NULL || (strval = g_hash_table_lookup (attrs, "weight")) == NULL) { msg_info ("symbol tag should have \"weight\" attribute, assume weight 1.0"); *value = 1.0; } @@ -849,11 +849,11 @@ handle_module_opt (struct config_file *cfg, struct rspamd_xml_userdata *ctx, con } } else { - name = tag; + name = memory_pool_strdup (cfg->cfg_pool, tag); } /* Check for lua */ - if ((val = g_hash_table_lookup (attrs, "lua")) != NULL) { + if (attrs != NULL && (val = g_hash_table_lookup (attrs, "lua")) != NULL) { if (g_ascii_strcasecmp (val, "yes") == 0) { is_lua = TRUE; } @@ -890,7 +890,7 @@ handle_lua (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable /* Now config table can be used for configuring rspamd */ } /* First check "src" attribute */ - if ((val = g_hash_table_lookup (attrs, "src")) != NULL) { + if (attrs != NULL && (val = g_hash_table_lookup (attrs, "src")) != NULL) { /* Chdir */ tmp1 = g_strdup (val); tmp2 = g_strdup (val); @@ -998,7 +998,7 @@ handle_variable (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHash { gchar *val; - if ((val = g_hash_table_lookup (attrs, "name")) == NULL) { + if (attrs == NULL || (val = g_hash_table_lookup (attrs, "name")) == NULL) { msg_err ("'name' attribute is required for tag 'variable'"); return FALSE; } @@ -1013,7 +1013,7 @@ handle_composite (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHas gchar *val; struct expression *expr; - if ((val = g_hash_table_lookup (attrs, "name")) == NULL) { + if (attrs == NULL || (val = g_hash_table_lookup (attrs, "name")) == NULL) { msg_err ("'name' attribute is required for tag 'composite'"); return FALSE; } @@ -1136,13 +1136,13 @@ handle_classifier_opt (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *classifier_config; if (g_ascii_strcasecmp (tag, "option") == 0 || g_ascii_strcasecmp (tag, "param") == 0) { - if ((name = g_hash_table_lookup (attrs, "name")) == NULL) { + if (attrs == NULL || (name = g_hash_table_lookup (attrs, "name")) == NULL) { msg_err ("worker param tag must have \"name\" attribute"); return FALSE; } } else { - name = tag; + name = memory_pool_strdup (cfg->cfg_pool, tag); } if (!classifier_options || @@ -1376,7 +1376,7 @@ rspamd_xml_start_element (GMarkupParseContext *context, const gchar *element_nam g_queue_push_head (ud->if_stack, GSIZE_TO_POINTER ((gsize)ud->state)); /* Now get attributes */ ud->cur_attrs = process_attrs (ud->cfg, attribute_names, attribute_values); - if ((condition = g_hash_table_lookup (ud->cur_attrs, "condition")) == NULL) { + if (ud->cur_attrs == NULL || (condition = g_hash_table_lookup (ud->cur_attrs, "condition")) == NULL) { msg_err ("unknown condition attribute for if tag"); *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'condition' is required for tag 'if'"); ud->state = XML_ERROR; diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c index 9bd12173c..e05545355 100644 --- a/src/lua/lua_config.c +++ b/src/lua/lua_config.c @@ -40,6 +40,8 @@ LUA_FUNCTION_DEF (config, add_radix_map); LUA_FUNCTION_DEF (config, add_hash_map); LUA_FUNCTION_DEF (config, get_classifier); LUA_FUNCTION_DEF (config, register_symbol); +LUA_FUNCTION_DEF (config, register_virtual_symbol); +LUA_FUNCTION_DEF (config, register_callback_symbol); LUA_FUNCTION_DEF (config, register_post_filter); LUA_FUNCTION_DEF (config, register_module_option); @@ -51,6 +53,8 @@ static const struct luaL_reg configlib_m[] = { LUA_INTERFACE_DEF (config, add_hash_map), LUA_INTERFACE_DEF (config, get_classifier), LUA_INTERFACE_DEF (config, register_symbol), + LUA_INTERFACE_DEF (config, register_virtual_symbol), + LUA_INTERFACE_DEF (config, register_callback_symbol), LUA_INTERFACE_DEF (config, register_module_option), LUA_INTERFACE_DEF (config, register_post_filter), {"__tostring", lua_class_tostring}, @@ -504,7 +508,7 @@ lua_config_register_symbol (lua_State * L) struct lua_callback_data *cd; if (cfg) { - name = g_strdup (luaL_checkstring (L, 2)); + name = memory_pool_strdup (cfg->cfg_pool, luaL_checkstring (L, 2)); weight = luaL_checknumber (L, 3); callback = luaL_checkstring (L, 4); if (name) { @@ -517,6 +521,45 @@ lua_config_register_symbol (lua_State * L) return 1; } +static gint +lua_config_register_virtual_symbol (lua_State * L) +{ + struct config_file *cfg = lua_check_config (L); + const gchar *name; + double weight; + + if (cfg) { + name = memory_pool_strdup (cfg->cfg_pool, luaL_checkstring (L, 2)); + weight = luaL_checknumber (L, 3); + if (name) { + register_virtual_symbol (&cfg->cache, name, weight); + } + } + return 1; +} + +static gint +lua_config_register_callback_symbol (lua_State * L) +{ + struct config_file *cfg = lua_check_config (L); + const gchar *name, *callback; + double weight; + struct lua_callback_data *cd; + + if (cfg) { + name = memory_pool_strdup (cfg->cfg_pool, luaL_checkstring (L, 2)); + weight = luaL_checknumber (L, 3); + callback = luaL_checkstring (L, 4); + if (name) { + cd = g_malloc (sizeof (struct lua_callback_data)); + cd->name = g_strdup (callback); + cd->L = L; + register_callback_symbol (&cfg->cache, name, weight, lua_metric_symbol_callback, cd); + } + } + return 1; +} + /* Radix and hash table functions */ static gint lua_radix_get_key (lua_State * L) diff --git a/src/main.c b/src/main.c index ab62c8037..72a7b6eac 100644 --- a/src/main.c +++ b/src/main.c @@ -31,6 +31,7 @@ #include "map.h" #include "fuzzy_storage.h" #include "cfg_xml.h" +#include "symbols_cache.h" #ifndef WITHOUT_PERL @@ -715,16 +716,20 @@ print_symbols_cache (struct config_file *cfg) cur = cfg->cache->negative_items; while (cur) { item = cur->data; - printf ("-----------------------------------------------------------------\n"); - printf ("| %3d | %22s | %6.1f | %9d | %9.3f |\n", i, item->s->symbol, item->s->weight, item->s->frequency, item->s->avg_time); + if (!item->is_callback) { + printf ("-----------------------------------------------------------------\n"); + printf ("| %3d | %22s | %6.1f | %9d | %9.3f |\n", i, item->s->symbol, item->s->weight, item->s->frequency, item->s->avg_time); + } cur = g_list_next (cur); i ++; } cur = cfg->cache->static_items; while (cur) { item = cur->data; - printf ("-----------------------------------------------------------------\n"); - printf ("| %3d | %22s | %6.1f | %9d | %9.3f |\n", i, item->s->symbol, item->s->weight, item->s->frequency, item->s->avg_time); + if (!item->is_callback) { + printf ("-----------------------------------------------------------------\n"); + printf ("| %3d | %22s | %6.1f | %9d | %9.3f |\n", i, item->s->symbol, item->s->weight, item->s->frequency, item->s->avg_time); + } cur = g_list_next (cur); i ++; } @@ -859,6 +864,12 @@ main (gint argc, gchar **argv, gchar **env) } l = g_list_next (l); } + /* Insert classifiers symbols */ + (void)insert_classifier_symbols (rspamd->cfg); + + if (! validate_cache (rspamd->cfg->cache, rspamd->cfg, TRUE)) { + res = FALSE; + } if (dump_vars) { dump_cfg_vars (rspamd->cfg); } @@ -917,6 +928,9 @@ main (gint argc, gchar **argv, gchar **env) /* Check configuration for modules */ (void)check_modules_config (rspamd->cfg); + /* Insert classifiers symbols */ + (void)insert_classifier_symbols (rspamd->cfg); + /* Perform modules configuring */ l = g_list_first (rspamd->cfg->filters); @@ -933,6 +947,9 @@ main (gint argc, gchar **argv, gchar **env) /* Init config cache */ init_cfg_cache (rspamd->cfg); + /* Validate cache */ + (void)validate_cache (rspamd->cfg->cache, rspamd->cfg, FALSE); + /* Flush log */ flush_log_buf (); diff --git a/src/plugins/emails.c b/src/plugins/emails.c deleted file mode 100644 index 29dc040a3..000000000 --- a/src/plugins/emails.c +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2009, Rambler media - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/***MODULE:email - * rspamd module that extracts emails from messages and check them via blacklist - * - * Allowed options: - * - symbol (string): symbol to insert (default: 'R_BAD_EMAIL') - * - blacklist (map string): map that contains list of bad emails - */ - -#include "../config.h" -#include "../main.h" -#include "../message.h" -#include "../modules.h" -#include "../cfg_file.h" -#include "../expressions.h" -#include "../util.h" -#include "../view.h" -#include "../map.h" - -#define DEFAULT_SYMBOL "R_BAD_EMAIL" - -static const gchar *email_re_text = - "[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+(?:[A-Z]{2}|com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\\b"; - -struct email_ctx { - gint (*filter) (struct worker_task * task); - gchar *symbol; - GRegex *email_re; - - GHashTable *blacklist; - gchar *blacklist_file; - - memory_pool_t *email_pool; -}; - -static struct email_ctx *email_module_ctx = NULL; - -static gint emails_mime_filter (struct worker_task *task); -static void emails_symbol_callback (struct worker_task *task, void *unused); -static gint emails_command_handler (struct worker_task *task); - -gint -emails_module_init (struct config_file *cfg, struct module_ctx **ctx) -{ - GError *err = NULL; - - email_module_ctx = g_malloc (sizeof (struct email_ctx)); - - email_module_ctx->filter = emails_mime_filter; - email_module_ctx->email_pool = memory_pool_new (memory_pool_get_size ()); - email_module_ctx->email_re = g_regex_new (email_re_text, G_REGEX_RAW | G_REGEX_OPTIMIZE | G_REGEX_CASELESS, 0, &err); - email_module_ctx->blacklist = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); - - *ctx = (struct module_ctx *)email_module_ctx; - - register_protocol_command ("emails", emails_command_handler); - - return 0; -} - - -gint -emails_module_config (struct config_file *cfg) -{ - gchar *value; - gint res = TRUE; - - if ((value = get_module_opt (cfg, "emails", "symbol")) != NULL) { - email_module_ctx->symbol = memory_pool_strdup (email_module_ctx->email_pool, value); - } - else { - email_module_ctx->symbol = DEFAULT_SYMBOL; - } - if ((value = get_module_opt (cfg, "emails", "blacklist")) != NULL) { - if (add_map (value, read_host_list, fin_host_list, (void **)&email_module_ctx->blacklist)) { - email_module_ctx->blacklist_file = memory_pool_strdup (email_module_ctx->email_pool, value + sizeof ("file://") - 1); - } - } - - - register_symbol (&cfg->cache, email_module_ctx->symbol, 1, emails_symbol_callback, NULL); - - return res; -} - -gint -emails_module_reconfig (struct config_file *cfg) -{ - memory_pool_delete (email_module_ctx->email_pool); - email_module_ctx->email_pool = memory_pool_new (memory_pool_get_size ()); - - return emails_module_config (cfg); -} - -static GList * -extract_emails (struct worker_task *task) -{ - GList *res = NULL, *cur; - GMatchInfo *info; - GError *err = NULL; - struct mime_text_part *part; - gchar *email_str; - gint rc; - - cur = g_list_first (task->text_parts); - while (cur) { - part = cur->data; - - if (part->is_empty) { - cur = g_list_next (cur); - continue; - } - - rc = g_regex_match_full (email_module_ctx->email_re, (const gchar *)part->orig->data, part->orig->len, 0, 0, &info, &err); - if (rc) { - while (g_match_info_matches (info)) { - email_str = g_match_info_fetch (info, 0); - if (email_str != NULL) { - res = g_list_prepend (res, email_str); - memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_free, email_str); - } - /* Get next match */ - g_match_info_next (info, &err); - } - } - else if (err != NULL) { - debug_task ("error matching regexp: %s", err->message); - } - else { - debug_task ("cannot find url pattern in given string"); - } - g_match_info_free (info); - - cur = g_list_next (cur); - } - if (res != NULL) { - memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, res); - } - - return res; -} - -static gint -emails_command_handler (struct worker_task *task) -{ - GList *emails, *cur; - gchar outbuf[BUFSIZ]; - gint r, num = 0; - - emails = extract_emails (task); - - r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, "OK"); - - r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "Emails: "); - - cur = g_list_first (emails); - - while (cur) { - num++; - if (g_list_next (cur) != NULL) { - r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s, ", (gchar *)cur->data); - } - else { - r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s", (gchar *)cur->data); - } - cur = g_list_next (cur); - } - - outbuf[r++] = '\r'; - outbuf[r++] = '\n'; - - if (! rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE, FALSE)) { - return -1; - } - msg_info ("msg ok, id: <%s>, %d emails extracted", task->message_id, num); - - return 0; -} - -static void -emails_symbol_callback (struct worker_task *task, void *unused) -{ - GList *emails, *cur; - - - if (check_view (task->cfg->views, email_module_ctx->symbol, task)) { - emails = extract_emails (task); - if (email_module_ctx->blacklist && emails) { - cur = g_list_first (emails); - - while (cur) { - if (g_hash_table_lookup (email_module_ctx->blacklist, cur->data) != NULL) { - insert_result (task, email_module_ctx->symbol, 1, g_list_prepend (NULL, memory_pool_strdup (task->task_pool, (gchar *)cur->data))); - - } - cur = g_list_next (cur); - } - } - } - -} - -static gint -emails_mime_filter (struct worker_task *task) -{ - /* XXX: remove this */ - return 0; -} diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index e253955d0..a9f598736 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -128,7 +128,7 @@ static void fuzzy_delete_handler (gchar **args, struct contr /* Flags string is in format ::weight[, ::weight...] */ static void -parse_flags_string (gchar *str) +parse_flags_string (struct config_file *cfg, gchar *str) { gchar **strvec, *item, *err_str, **map_str; gint num, i, t; @@ -152,6 +152,7 @@ parse_flags_string (gchar *str) map->fuzzy_flag = strtol (map_str[0], &err_str, 10); if (errno != 0 || (err_str && *err_str != '\0')) { msg_info ("cannot parse flag %s: %s", map_str[0], strerror (errno)); + continue; } else if (t == 2) { /* Weight is skipped in definition */ @@ -159,9 +160,11 @@ parse_flags_string (gchar *str) } else { map->weight = strtol (map_str[2], &err_str, 10); - /* Add flag to hash table */ - g_hash_table_insert (fuzzy_module_ctx->mappings, GINT_TO_POINTER(map->fuzzy_flag), map); + } + /* Add flag to hash table */ + g_hash_table_insert (fuzzy_module_ctx->mappings, GINT_TO_POINTER(map->fuzzy_flag), map); + register_virtual_symbol (&cfg->cache, map->symbol, map->weight); } g_strfreev (map_str); } @@ -407,7 +410,7 @@ fuzzy_check_module_config (struct config_file *cfg) parse_servers_string (value); } if ((value = get_module_opt (cfg, "fuzzy_check", "fuzzy_map")) != NULL) { - parse_flags_string (value); + parse_flags_string (cfg, value); } register_symbol (&cfg->cache, fuzzy_module_ctx->symbol, fuzzy_module_ctx->max_score, fuzzy_symbol_callback, NULL); diff --git a/src/plugins/lua/forged_recipients.lua b/src/plugins/lua/forged_recipients.lua index 1283fa77e..d759960d3 100644 --- a/src/plugins/lua/forged_recipients.lua +++ b/src/plugins/lua/forged_recipients.lua @@ -73,10 +73,13 @@ if opts then if opts['symbol_rcpt'] or opts['symbol_sender'] then if opts['symbol_rcpt'] then symbol_rcpt = opts['symbol_rcpt'] + rspamd_config:register_virtual_symbol(symbol_rcpt, 1.0, 'check_forged_headers') end if opts['symbol_sender'] then symbol_sender = opts['symbol_sender'] + rspamd_config:register_virtual_symbol(symbol_sender, 1.0) end - rspamd_config:register_symbol(symbol_rcpt, 1.0, 'check_forged_headers') + rspamd_config:register_callback_symbol('FORGED_RECIPIENTS', 1.0, 'check_forged_headers') + end end diff --git a/src/plugins/lua/multimap.lua b/src/plugins/lua/multimap.lua index e4ba9a4fc..97f122c48 100644 --- a/src/plugins/lua/multimap.lua +++ b/src/plugins/lua/multimap.lua @@ -89,7 +89,7 @@ function add_rule(params) local _,_,name,value = string.find(param, '(%w+)%s*=%s*(.+)') if not name or not value then rspamd_logger:err('invalid rule: '..param) - return 0 + return nil end if name == 'type' then if value == 'ip' then @@ -100,7 +100,7 @@ function add_rule(params) newrule['type'] = 'header' else rspamd_logger:err('invalid rule type: '.. value) - return 0 + return nil end elseif name == 'header' then newrule['header'] = value @@ -112,13 +112,13 @@ function add_rule(params) newrule['symbol'] = value else rspamd_logger:err('invalid rule option: '.. name) - return 0 + return nil end end if not newrule['symbol'] or not newrule['map'] or not newrule['symbol'] then rspamd_logger:err('incomplete rule') - return 0 + return nil end if newrule['type'] == 'ip' then newrule['ips'] = rspamd_config:add_radix_map (newrule['map']) @@ -126,7 +126,7 @@ function add_rule(params) newrule['hash'] = rspamd_config:add_hash_map (newrule['map']) end table.insert(rules, newrule) - return 1 + return newrule end -- Registration @@ -139,14 +139,20 @@ if opts then if type(strrules) == 'table' then for _,value in ipairs(strrules) do local params = split(value, ',') - if not add_rule (params) then + local rule = add_rule (params) + if not rule then rspamd_logger:err('cannot add rule: "'..value..'"') + else + rspamd_config:register_virtual_symbol(rule['symbol'], 1.0) end end elseif type(strrules) == 'string' then local params = split(strrules, ',') - if not add_rule (params) then + local rule = add_rule (params) + if not rule then rspamd_logger:err('cannot add rule: "'..strrules..'"') + else + rspamd_config:register_virtual_symbol(rule['symbol'], 1.0) end end end @@ -154,5 +160,5 @@ end if table.maxn(rules) > 0 then -- add fake symbol to check all maps inside a single callback - rspamd_config:register_symbol('MULTIMAP', 1.0, 'check_multimap') + rspamd_config:register_callback_symbol('MULTIMAP', 1.0, 'check_multimap') end diff --git a/src/plugins/lua/once_received.lua b/src/plugins/lua/once_received.lua index d20d432be..97df71da3 100644 --- a/src/plugins/lua/once_received.lua +++ b/src/plugins/lua/once_received.lua @@ -59,6 +59,7 @@ if opts then for n,v in pairs(opts) do if n == 'symbol_strict' then symbol_strict = v + rspamd_config:register_virtual_symbol(symbol_strict, 1.0) elseif n == 'bad_host' then bad_hosts = v elseif n == 'good_host' then diff --git a/src/plugins/spf.c b/src/plugins/spf.c index 043f0b821..f6077d2c2 100644 --- a/src/plugins/spf.c +++ b/src/plugins/spf.c @@ -112,6 +112,8 @@ spf_module_config (struct config_file *cfg) } register_symbol (&cfg->cache, spf_module_ctx->symbol_fail, 1, spf_symbol_callback, NULL); + register_virtual_symbol (&cfg->cache, spf_module_ctx->symbol_softfail, 1); + register_virtual_symbol (&cfg->cache, spf_module_ctx->symbol_allow, 1); return res; } diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 2f761c67b..9a759bdad 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -240,6 +240,42 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) return 0; } +/* + * Register virtual symbols for suffixes with bit wildcard + */ +static void +register_bit_symbols (struct config_file *cfg) +{ + gchar *c, *symbol; + GList *symit, *cur; + struct surbl_bit_item *bit; + struct suffix_item *suffix; + gint len; + + symit = surbl_module_ctx->suffixes; + + while (symit) { + suffix = symit->data; + if ((c = strchr (suffix->symbol, '%')) != NULL && *(c + 1) == 'b') { + cur = g_list_first (surbl_module_ctx->bits); + while (cur) { + bit = (struct surbl_bit_item *)cur->data; + len = strlen (suffix->symbol) - 2 + strlen (bit->symbol) + 1; + *c = '\0'; + symbol = memory_pool_alloc (cfg->cfg_pool, len); + rspamd_snprintf (symbol, len, "%s%s%s", suffix->symbol, bit->symbol, c + 2); + *c = '%'; + register_virtual_symbol (&cfg->cache, symbol, 1); + cur = g_list_next (cur); + } + } + else { + register_virtual_symbol (&cfg->cache, suffix->symbol, 1); + } + symit = g_list_next (symit); + } +} + gint surbl_module_config (struct config_file *cfg) { @@ -339,7 +375,7 @@ surbl_module_config (struct config_file *cfg) msg_debug ("add new surbl suffix: %s with symbol: %s", new_suffix->suffix, new_suffix->symbol); *str = '_'; surbl_module_ctx->suffixes = g_list_prepend (surbl_module_ctx->suffixes, new_suffix); - register_symbol (&cfg->cache, new_suffix->symbol, 1, surbl_test_url, new_suffix); + register_callback_symbol (&cfg->cache, new_suffix->symbol, 1, surbl_test_url, new_suffix); } } if (!g_strncasecmp (cur->param, "bit", sizeof ("bit") - 1)) { @@ -366,6 +402,8 @@ surbl_module_config (struct config_file *cfg) register_symbol (&cfg->cache, new_suffix->symbol, 1, surbl_test_url, new_suffix); } + register_bit_symbols (cfg); + return TRUE; } diff --git a/src/symbols_cache.c b/src/symbols_cache.c index d549218c2..492387bac 100644 --- a/src/symbols_cache.c +++ b/src/symbols_cache.c @@ -243,8 +243,15 @@ create_cache_file (struct symbols_cache *cache, const gchar *filename, gint fd, return mmap_cache_file (cache, fd, pool); } -void -register_symbol (struct symbols_cache **cache, const gchar *name, double weight, symbol_func_t func, gpointer user_data) +enum rspamd_symbol_type { + SYMBOL_TYPE_NORMAL, + SYMBOL_TYPE_VIRTUAL, + SYMBOL_TYPE_CALLBACK +}; + +static void +register_symbol_common (struct symbols_cache **cache, const gchar *name, double weight, + symbol_func_t func, gpointer user_data, enum rspamd_symbol_type type) { struct cache_item *item = NULL; struct symbols_cache *pcache = *cache; @@ -271,6 +278,17 @@ register_symbol (struct symbols_cache **cache, const gchar *name, double weight, item->func = func; item->user_data = user_data; + switch (type) { + case SYMBOL_TYPE_NORMAL: + break; + case SYMBOL_TYPE_VIRTUAL: + item->is_virtual = TRUE; + break; + case SYMBOL_TYPE_CALLBACK: + item->is_callback = TRUE; + break; + } + /* Handle weight using default metric */ if (pcache->cfg && pcache->cfg->default_metric && (w = g_hash_table_lookup (pcache->cfg->default_metric->symbols, name)) != NULL) { item->s->weight = weight * (*w); @@ -285,6 +303,26 @@ register_symbol (struct symbols_cache **cache, const gchar *name, double weight, *target = g_list_prepend (*target, item); } +void +register_symbol (struct symbols_cache **cache, const gchar *name, double weight, + symbol_func_t func, gpointer user_data) +{ + register_symbol_common (cache, name, weight, func, user_data, SYMBOL_TYPE_NORMAL); +} + +void +register_virtual_symbol (struct symbols_cache **cache, const gchar *name, double weight) +{ + register_symbol_common (cache, name, weight, NULL, NULL, SYMBOL_TYPE_VIRTUAL); +} + +void +register_callback_symbol (struct symbols_cache **cache, const gchar *name, double weight, + symbol_func_t func, gpointer user_data) +{ + register_symbol_common (cache, name, weight, func, user_data, SYMBOL_TYPE_CALLBACK); +} + void register_dynamic_symbol (memory_pool_t *dynamic_pool, struct symbols_cache **cache, const gchar *name, double weight, symbol_func_t func, @@ -501,6 +539,17 @@ init_symbols_cache (memory_pool_t * pool, struct symbols_cache *cache, struct co g_checksum_get_digest (cksum, mem_sum, &cklen); /* Now try to read file sum */ if (lseek (fd, -(cklen), SEEK_END) == -1) { + if (errno == EINVAL) { + /* Try to create file */ + msg_info ("recreate cache file"); + if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno)); + return FALSE; + } + else { + return create_cache_file (cache, filename, fd, pool); + } + } close (fd); g_free (mem_sum); g_checksum_free (cksum); @@ -598,6 +647,89 @@ check_debug_symbol (struct config_file *cfg, const gchar *symbol) return FALSE; } + +gboolean +validate_cache (struct symbols_cache *cache, struct config_file *cfg, gboolean strict) +{ + struct cache_item *item; + GList *cur, *p, *metric_symbols; + gboolean res; + + if (cache == NULL) { + msg_err ("empty cache is invalid"); + return FALSE; + } + + /* Check each symbol in a cache and find its weight definition */ + cur = cache->negative_items; + while (cur) { + item = cur->data; + if (!item->is_callback) { + if (g_hash_table_lookup (cfg->metrics_symbols, item->s->symbol) == NULL) { + if (strict) { + msg_warn ("no weight registered for symbol %s", item->s->symbol); + return FALSE; + } + else { + msg_info ("no weight registered for symbol %s", item->s->symbol); + } + } + } + cur = g_list_next (cur); + } + cur = cache->static_items; + while (cur) { + item = cur->data; + if (!item->is_callback) { + if (g_hash_table_lookup (cfg->metrics_symbols, item->s->symbol) == NULL) { + if (strict) { + msg_warn ("no weight registered for symbol %s", item->s->symbol); + return FALSE; + } + else { + msg_info ("no weight registered for symbol %s", item->s->symbol); + } + } + } + cur = g_list_next (cur); + } + /* Now check each metric item and find corresponding symbol in a cache */ + metric_symbols = g_hash_table_get_keys (cfg->metrics_symbols); + cur = metric_symbols; + while (cur) { + res = FALSE; + p = cache->negative_items; + while (p) { + item = p->data; + if (strcmp (item->s->symbol, cur->data) == 0) { + res = TRUE; + break; + } + p = g_list_next (p); + } + if (!res) { + p = cache->static_items; + while (p) { + item = p->data; + if (strcmp (item->s->symbol, cur->data) == 0) { + res = TRUE; + break; + } + p = g_list_next (p); + } + } + if (!res) { + msg_warn ("symbol '%s' is registered in metric but not found in cache", cur->data); + if (strict) { + return FALSE; + } + } + cur = g_list_next (cur); + } + + return TRUE; +} + struct symbol_callback_data { enum { CACHE_STATE_NEGATIVE, @@ -772,7 +904,7 @@ call_symbol_callback (struct worker_task * task, struct symbols_cache * cache, g if (!item) { return FALSE; } - if (check_view (task->cfg->views, item->s->symbol, task)) { + if (!item->is_virtual && check_view (task->cfg->views, item->s->symbol, task)) { #ifdef HAVE_CLOCK_GETTIME # ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts1); diff --git a/src/symbols_cache.h b/src/symbols_cache.h index 8e7ba636f..2fe03d32a 100644 --- a/src/symbols_cache.h +++ b/src/symbols_cache.h @@ -36,6 +36,10 @@ struct cache_item { /* Callback data */ symbol_func_t func; gpointer user_data; + + /* Flags of virtual symbols */ + gboolean is_virtual; + gboolean is_callback; }; @@ -76,6 +80,20 @@ gboolean init_symbols_cache (memory_pool_t *pool, struct symbols_cache *cache, s */ void register_symbol (struct symbols_cache **cache, const gchar *name, double weight, symbol_func_t func, gpointer user_data); +/** + * Register virtual symbol + * @param name name of symbol + */ +void register_virtual_symbol (struct symbols_cache **cache, const gchar *name, double weight); + +/** + * Register callback function for symbols parsing + * @param name name of symbol + * @param func pointer to handler + * @param user_data pointer to user_data + */ +void register_callback_symbol (struct symbols_cache **cache, const gchar *name, double weight, symbol_func_t func, gpointer user_data); + /** * Register function for dynamic symbols parsing * @param name name of symbol @@ -100,5 +118,13 @@ gboolean call_symbol_callback (struct worker_task *task, struct symbols_cache *c */ void remove_dynamic_rules (struct symbols_cache *cache); +/** + * Validate cache items agains theirs weights defined in metrics + * @param cache symbols cache + * @param cfg configuration + * @param strict do strict checks - symbols MUST be described in metrics + */ +gboolean validate_cache (struct symbols_cache *cache, struct config_file *cfg, gboolean strict); + #endif -- 2.39.5