diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2010-07-25 16:58:11 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2010-07-25 16:58:11 +0400 |
commit | 34ae83f0151a3fd31f4c045968defa39a2c40985 (patch) | |
tree | 3bb121336c76d16e3e5257c12a7905af0e04b70f | |
parent | 4a8c30c78940a9153de23dc4d031273649e93cce (diff) | |
download | rspamd-34ae83f0151a3fd31f4c045968defa39a2c40985.tar.gz rspamd-34ae83f0151a3fd31f4c045968defa39a2c40985.zip |
* Add rspamd_log variable to lua plugins to access logging functions
* Each part in rspamd task now can have parent part
* Check for parts distance only for multipart/alternative subparts
* Do not check attachements even if they are text (but attached as file)
* Do not die if write (2) returned ENOSPACE while doing logging, turn on throttling mode instead (1 write try in a second)
* Add ability to turn on debug for specific symbols
* Add ability to configure dns timeouts and dns retransmits in config file
-rw-r--r-- | conf/lua/rspamd.lua | 6 | ||||
-rw-r--r-- | src/cfg_file.h | 9 | ||||
-rw-r--r-- | src/cfg_xml.c | 45 | ||||
-rw-r--r-- | src/cfg_xml.h | 1 | ||||
-rw-r--r-- | src/expressions.c | 16 | ||||
-rw-r--r-- | src/filter.c | 2 | ||||
-rw-r--r-- | src/logger.c | 44 | ||||
-rw-r--r-- | src/logger.h | 9 | ||||
-rw-r--r-- | src/lua/lua_common.c | 9 | ||||
-rw-r--r-- | src/main.h | 1 | ||||
-rw-r--r-- | src/message.c | 32 | ||||
-rw-r--r-- | src/message.h | 2 | ||||
-rw-r--r-- | src/symbols_cache.c | 26 |
13 files changed, 182 insertions, 20 deletions
diff --git a/conf/lua/rspamd.lua b/conf/lua/rspamd.lua index f546c6189..bd04beab4 100644 --- a/conf/lua/rspamd.lua +++ b/conf/lua/rspamd.lua @@ -16,11 +16,13 @@ local html_link_image = '/<img /iPr' reconf['HTML_SHORT_LINK_IMG_2'] = string.format('(%s) & (%s)', html_length_1024_1536, html_link_image) -- Local rules -local r_bgcolor = '/BGCOLOR=/iM' -local r_font_color = '/font color=[\\"\']?\\#FFFFFF[\\"\']?/iM' +local r_bgcolor = '/BGCOLOR=/iP' +local r_font_color = '/font color=[\\"\']?\\#FFFFFF[\\"\']?/iP' reconf['R_WHITE_ON_WHITE'] = string.format('(!(%s) & (%s))', r_bgcolor, r_font_color) reconf['R_FLASH_REDIR_IMGSHACK'] = '/^(?:http:\\/\\/)?img\\d{1,5}\\.imageshack\\.us\\/\\S+\\.swf/U' local r_rcvd_from_valuehost = 'Received=/\\sb0\\.valuehost\\.ru/H' local r_cyr_phone = '/8 \\(\\xD799\\)/P' reconf['R_SPAM_FROM_VALUEHOST'] = string.format('(%s) & (%s)', r_rcvd_from_valuehost, r_cyr_phone) +-- Different text parts +reconf['R_PARTS_DIFFER'] = 'compare_parts_distance(70)'; diff --git a/src/cfg_file.h b/src/cfg_file.h index fa73643f0..b2ca61150 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -252,6 +252,7 @@ struct config_file { uint32_t log_buf_size; /**< length of log buffer */ gchar *debug_ip_map; /**< turn on debugging for specified ip addresses */ gboolean log_urls; /**< whether we should log URLs */ + GList *debug_symbols; /**< symbols to debug */ gsize max_statfile_size; /**< maximum size for statfile */ @@ -266,8 +267,8 @@ struct config_file { gboolean delivery_enable; /**< is delivery agent is enabled */ gchar *deliver_host; /**< host for mail deliviring */ struct in_addr deliver_addr; /**< its address */ - uint16_t deliver_port; /**< port for deliviring */ - uint16_t deliver_family; /**< socket family for delivirnig */ + guint16 deliver_port; /**< port for deliviring */ + guint16 deliver_family; /**< socket family for delivirnig */ gchar *deliver_agent_path; /**< deliver to pipe instead of socket */ gboolean deliver_lmtp; /**< use LMTP instead of SMTP */ @@ -302,8 +303,8 @@ struct config_file { gchar* dump_checksum; /**< dump checksum of config file */ gpointer lua_state; /**< pointer to lua state */ - guint dns_timeout; /**< timeout in milliseconds for waiting for dns reply */ - guint dns_retransmits; /**< maximum retransmits count */ + guint32 dns_timeout; /**< timeout in milliseconds for waiting for dns reply */ + guint32 dns_retransmits; /**< maximum retransmits count */ GList *nameservers; /**< list of nameservers or NULL to parse resolv.conf */ }; diff --git a/src/cfg_xml.c b/src/cfg_xml.c index 534bfa961..0f9bff97e 100644 --- a/src/cfg_xml.c +++ b/src/cfg_xml.c @@ -151,6 +151,18 @@ static struct xml_parser_rule grammar[] = { G_STRUCT_OFFSET (struct config_file, cache_filename), NULL }, + { + "dns_timeout", + xml_handle_seconds, + G_STRUCT_OFFSET (struct config_file, dns_timeout), + NULL + }, + { + "dns_retransmits", + xml_handle_uint32, + G_STRUCT_OFFSET (struct config_file, dns_retransmits), + NULL + }, NULL_ATTR }, NULL_ATTR @@ -186,6 +198,12 @@ static struct xml_parser_rule grammar[] = { G_STRUCT_OFFSET (struct config_file, debug_ip_map), NULL }, + { + "debug_symbols", + xml_handle_string_list, + G_STRUCT_OFFSET (struct config_file, debug_symbols), + NULL + }, NULL_ATTR }, NULL_ATTR @@ -1144,6 +1162,29 @@ xml_handle_string (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHa return TRUE; } +gboolean +xml_handle_string_list (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset) +{ + GList **dest; + gchar **tokens, **cur; + + dest = (GList **)G_STRUCT_MEMBER_P (dest_struct, offset); + *dest = NULL; + + tokens = g_strsplit (data, ";,", 0); + if (!tokens || !tokens[0]) { + return FALSE; + } + memory_pool_add_destructor (cfg->cfg_pool, (pool_destruct_func)g_strfreev, tokens); + cur = tokens; + while (*cur) { + *dest = g_list_prepend (*dest, *cur); + cur ++; + } + + return TRUE; +} + gboolean xml_handle_size (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset) @@ -1159,9 +1200,9 @@ xml_handle_size (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHash gboolean xml_handle_seconds (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset) { - time_t *dest; + guint32 *dest; - dest = (time_t *)G_STRUCT_MEMBER_P (dest_struct, offset); + dest = (guint32 *)G_STRUCT_MEMBER_P (dest_struct, offset); *dest = parse_seconds (data); return TRUE; diff --git a/src/cfg_xml.h b/src/cfg_xml.h index c91bb1779..a72eb46f4 100644 --- a/src/cfg_xml.h +++ b/src/cfg_xml.h @@ -74,6 +74,7 @@ void rspamd_xml_error (GMarkupParseContext *context, /* Handlers */ /* Basic xml parsing functions */ gboolean xml_handle_string (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset); +gboolean xml_handle_string_list (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset); /* Numeric params */ gboolean xml_handle_size (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset); diff --git a/src/expressions.c b/src/expressions.c index 447bdcbc2..c3ece6808 100644 --- a/src/expressions.c +++ b/src/expressions.c @@ -929,6 +929,9 @@ rspamd_parts_distance (struct worker_task * task, GList * args, void *unused) struct mime_text_part *p1, *p2; GList *cur; struct expression_argument *arg; + GMimeObject *parent; + const GMimeContentType *ct; + if (args == NULL) { debug_task ("no threshold is specified, assume it 100"); @@ -953,6 +956,19 @@ rspamd_parts_distance (struct worker_task * task, GList * args, void *unused) return FALSE; } p2 = cur->data; + /* First of all check parent object */ + if (p1->parent && p1->parent == p2->parent) { + parent = p1->parent; + ct = g_mime_object_get_content_type (parent); + if (ct == NULL || ! g_mime_content_type_is_type (ct, "multipart", "alternative")) { + debug_task ("two parts are not belong to multipart/alternative container, skip check"); + return FALSE; + } + } + else { + debug_task ("message contains two parts but they are in different multi-parts"); + return FALSE; + } if (!p1->is_empty && !p2->is_empty) { diff = fuzzy_compare_hashes (p1->fuzzy, p2->fuzzy); debug_task ("got likeliness between parts of %d%%, threshold is %d%%", diff, threshold); diff --git a/src/filter.c b/src/filter.c index d91ef9a25..83868e9b9 100644 --- a/src/filter.c +++ b/src/filter.c @@ -110,7 +110,7 @@ insert_metric_result (struct worker_task *task, struct metric *metric, const cha g_hash_table_insert (metric_res->symbols, (gpointer) symbol, s); } - debug_task ("got %.2f score for metric %s, factor: %f", s->score, metric->name, w); + debug_task ("symbol %s, score %.2f, metric %s, factor: %f", symbol, s->score, metric->name, w); } diff --git a/src/logger.c b/src/logger.c index f17835764..9bd3ce3f0 100644 --- a/src/logger.c +++ b/src/logger.c @@ -48,6 +48,9 @@ typedef struct rspamd_logger_s { int fd; gboolean is_buffered; gboolean enabled; + gboolean is_debug; + gboolean throttling; + time_t throttling_time; enum rspamd_log_type type; pid_t pid; enum process_type process_type; @@ -118,16 +121,17 @@ direct_write_log_line (void *data, int count, gboolean is_iov) } else if (errno == EFAULT || errno == EINVAL || errno == EFBIG || errno == ENOSPC) { /* Rare case */ - if (write (rspamd_log->fd, errmsg, r) == -1) { - /* Don't know what to do */ - exit (EXIT_FAILURE); - } + rspamd_log->throttling = TRUE; + rspamd_log->throttling_time = time (NULL); } else if (errno == EPIPE) { /* We write to some pipe and it disappears, disable logging */ rspamd_log->enabled = FALSE; } } + else if (rspamd_log->throttling) { + rspamd_log->throttling = FALSE; + } } } @@ -421,6 +425,7 @@ file_log_function (const gchar * log_domain, const gchar *function, GLogLevelFla uint32_t cksum; size_t mlen; const char *cptype = NULL; + gboolean got_time = FALSE; if (! rspamd_log->enabled) { return; @@ -428,6 +433,18 @@ file_log_function (const gchar * log_domain, const gchar *function, GLogLevelFla if (forced || log_level <= rspamd_log->cfg->log_level) { + /* Check throttling due to write errors */ + if (rspamd_log->throttling) { + now = time (NULL); + if (rspamd_log->throttling_time != now) { + rspamd_log->throttling_time = now; + got_time = TRUE; + } + else { + /* Do not try to write to file too often while throtling */ + return; + } + } /* Check repeats */ mlen = strlen (message); cksum = rspamd_log_calculate_cksum (message, mlen); @@ -476,7 +493,10 @@ file_log_function (const gchar * log_domain, const gchar *function, GLogLevelFla } } - now = time (NULL); + if (! got_time) { + now = time (NULL); + } + tms = localtime (&now); strftime (timebuf, sizeof (timebuf), "%F %H:%M:%S", tms); @@ -527,7 +547,7 @@ rspamd_conditional_debug (uint32_t addr, const char *function, const char *fmt, va_list vp; u_char *end; - if (rspamd_log->cfg->log_level >= G_LOG_LEVEL_DEBUG || + if (rspamd_log->cfg->log_level >= G_LOG_LEVEL_DEBUG || rspamd_log->is_debug || (rspamd_log->debug_ip != NULL && radix32tree_find (rspamd_log->debug_ip, ntohl (addr)) != RADIX_NO_VALUE)) { va_start (vp, fmt); @@ -545,3 +565,15 @@ rspamd_glib_log_function (const gchar *log_domain, GLogLevelFlags log_level, con rspamd_log->log_func (log_domain, NULL, log_level, message, FALSE, rspamd_log->cfg); } } + +void +rspamd_log_debug () +{ + rspamd_log->is_debug = TRUE; +} + +void +rspamd_log_nodebug () +{ + rspamd_log->is_debug = FALSE; +} diff --git a/src/logger.h b/src/logger.h index fe6e0bda5..9b419c8a6 100644 --- a/src/logger.h +++ b/src/logger.h @@ -62,6 +62,15 @@ void rspamd_common_log_function (GLogLevelFlags log_level, const char *function, */ void rspamd_conditional_debug (uint32_t addr, const char *function, const char *fmt, ...) ; +/** + * Temporary turn on debug + */ +void rspamd_log_debug (); + +/** + * Turn off debug + */ +void rspamd_log_nodebug (); /* Typical functions */ diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index aec72006c..2691a2abe 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -209,6 +209,7 @@ void init_lua_filters (struct config_file *cfg) { struct config_file **pcfg; + gpointer *plogger; GList *cur, *tmp; struct script_module *module; struct statfile *st; @@ -224,11 +225,17 @@ init_lua_filters (struct config_file *cfg) continue; } - /* Call module init function */ + /* Initialize config structure */ pcfg = lua_newuserdata (L, sizeof (struct config_file *)); lua_setclass (L, "rspamd{config}", -1); *pcfg = cfg; lua_setglobal (L, "rspamd_config"); + /* Initialize logger */ + plogger = lua_newuserdata (L, sizeof (gpointer)); + lua_setclass (L, "rspamd{logger}", -1); + *plogger = NULL; + lua_setglobal (L, "rspamd_log"); + /* do the call (1 arguments, 1 result) */ if (lua_pcall (L, 0, LUA_MULTRET, 0) != 0) { msg_info ("init of %s failed: %s", module->path, lua_tostring (L, -1)); diff --git a/src/main.h b/src/main.h index b729e87de..e35a7eea3 100644 --- a/src/main.h +++ b/src/main.h @@ -195,6 +195,7 @@ struct worker_task { struct rspamd_async_session* s; /**< async session object */ int parts_count; /**< mime parts count */ GMimeMessage *message; /**< message, parsed with GMime */ + GMimeObject *parser_parent_part; /**< current parent part */ InternetAddressList *rcpts; /**< list of all recipients */ GList *parts; /**< list of parsed parts */ GList *text_parts; /**< list of text parts */ diff --git a/src/message.c b/src/message.c index f3be3032a..2c53deca1 100644 --- a/src/message.c +++ b/src/message.c @@ -525,9 +525,26 @@ convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeC } static void -process_text_part (struct worker_task *task, GByteArray * part_content, GMimeContentType * type, gboolean is_empty) +process_text_part (struct worker_task *task, GByteArray *part_content, GMimeContentType *type, + GMimeObject *part, GMimeObject *parent, gboolean is_empty) { struct mime_text_part *text_part; + const char *cd; + + /* Skip attachements */ +#ifndef GMIME24 + cd = g_mime_part_get_content_disposition (GMIME_PART (part)); + if (cd && g_ascii_strcasecmp (cd, "attachment") == 0) { + debug_task ("skip attachments for checking as text parts"); + return; + } +#else + cd = g_mime_object_get_disposition (GMIME_OBJECT (part)); + if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0) { + debug_task ("skip attachments for checking as text parts"); + return; + } +#endif if (g_mime_content_type_is_type (type, "text", "html") || g_mime_content_type_is_type (type, "text", "xhtml")) { debug_task ("got urls from text/html part"); @@ -544,6 +561,7 @@ process_text_part (struct worker_task *task, GByteArray * part_content, GMimeCon text_part->orig = convert_text_to_utf (task, part_content, type, text_part); text_part->is_balanced = TRUE; text_part->html_nodes = NULL; + text_part->parent = parent; text_part->html_urls = g_tree_new ((GCompareFunc) g_ascii_strcasecmp); text_part->urls = g_tree_new ((GCompareFunc) g_ascii_strcasecmp); @@ -572,6 +590,7 @@ process_text_part (struct worker_task *task, GByteArray * part_content, GMimeCon text_part = memory_pool_alloc0 (task->task_pool, sizeof (struct mime_text_part)); text_part->is_html = FALSE; + text_part->parent = parent; if (is_empty) { text_part->is_empty = TRUE; text_part->orig = NULL; @@ -645,6 +664,7 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data) } else if (GMIME_IS_MULTIPART (part)) { /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */ + task->parser_parent_part = part; #ifndef GMIME24 debug_task ("detected multipart part"); /* we'll get to finding out if this is a signed/encrypted multipart later... */ @@ -656,7 +676,6 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data) return; } #endif - /* XXX: do nothing with multiparts in gmime 2.4 */ } else if (GMIME_IS_PART (part)) { /* a normal leaf part, could be text/plain or image/jpeg etc */ @@ -688,10 +707,11 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data) mime_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_part)); mime_part->type = type; mime_part->content = part_content; + mime_part->parent = task->parser_parent_part; debug_task ("found part with content-type: %s/%s", type->type, type->subtype); task->parts = g_list_prepend (task->parts, mime_part); /* Skip empty parts */ - process_text_part (task, part_content, type, (part_content->len <= 0)); + process_text_part (task, part_content, type, part, task->parser_parent_part, (part_content->len <= 0)); } else { msg_warn ("write to stream failed: %d, %s", errno, strerror (errno)); @@ -762,6 +782,12 @@ process_message (struct worker_task *task) #ifdef GMIME24 g_mime_message_foreach (message, mime_foreach_callback, task); #else + /* + * This is rather strange, but gmime 2.2 do NOT pass top-level part to foreach callback + * so we need to set up parent part by hands + */ + task->parser_parent_part = g_mime_message_get_mime_part (message); + g_object_unref (task->parser_parent_part); g_mime_message_foreach_part (message, mime_foreach_callback, task); #endif diff --git a/src/message.h b/src/message.h index abebe4862..db42f2094 100644 --- a/src/message.h +++ b/src/message.h @@ -15,6 +15,7 @@ struct controller_session; struct mime_part { GMimeContentType *type; GByteArray *content; + GMimeObject *parent; }; struct mime_text_part { @@ -30,6 +31,7 @@ struct mime_text_part { GTree *urls; GTree *html_urls; fuzzy_hash_t *fuzzy; + GMimeObject *parent; }; struct received_header { diff --git a/src/symbols_cache.c b/src/symbols_cache.c index f8257a716..ee931458e 100644 --- a/src/symbols_cache.c +++ b/src/symbols_cache.c @@ -582,6 +582,22 @@ check_negative_dynamic_item (struct worker_task *task, struct symbols_cache *cac return FALSE; } +static gboolean +check_debug_symbol (struct config_file *cfg, const char *symbol) +{ + GList *cur; + + cur = cfg->debug_symbols; + while (cur) { + if (strcmp (symbol, (const char *)cur->data) == 0) { + return TRUE; + } + cur = g_list_next (cur); + } + + return FALSE; +} + struct symbol_callback_data { enum { CACHE_STATE_NEGATIVE, @@ -770,7 +786,15 @@ call_symbol_callback (struct worker_task * task, struct symbols_cache * cache, g msg_warn ("gettimeofday failed: %s", strerror (errno)); } #endif - item->func (task, item->user_data); + if (G_UNLIKELY (check_debug_symbol (task->cfg, item->s->symbol))) { + rspamd_log_debug (); + item->func (task, item->user_data); + rspamd_log_nodebug (); + } + else { + item->func (task, item->user_data); + } + #ifdef HAVE_CLOCK_GETTIME # ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID |