summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2010-07-25 16:58:11 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2010-07-25 16:58:11 +0400
commit34ae83f0151a3fd31f4c045968defa39a2c40985 (patch)
tree3bb121336c76d16e3e5257c12a7905af0e04b70f
parent4a8c30c78940a9153de23dc4d031273649e93cce (diff)
downloadrspamd-34ae83f0151a3fd31f4c045968defa39a2c40985.tar.gz
rspamd-34ae83f0151a3fd31f4c045968defa39a2c40985.zip
* Add rspamd_log variable to lua plugins to access logging functions
* Each part in rspamd task now can have parent part * Check for parts distance only for multipart/alternative subparts * Do not check attachements even if they are text (but attached as file) * Do not die if write (2) returned ENOSPACE while doing logging, turn on throttling mode instead (1 write try in a second) * Add ability to turn on debug for specific symbols * Add ability to configure dns timeouts and dns retransmits in config file
-rw-r--r--conf/lua/rspamd.lua6
-rw-r--r--src/cfg_file.h9
-rw-r--r--src/cfg_xml.c45
-rw-r--r--src/cfg_xml.h1
-rw-r--r--src/expressions.c16
-rw-r--r--src/filter.c2
-rw-r--r--src/logger.c44
-rw-r--r--src/logger.h9
-rw-r--r--src/lua/lua_common.c9
-rw-r--r--src/main.h1
-rw-r--r--src/message.c32
-rw-r--r--src/message.h2
-rw-r--r--src/symbols_cache.c26
13 files changed, 182 insertions, 20 deletions
diff --git a/conf/lua/rspamd.lua b/conf/lua/rspamd.lua
index f546c6189..bd04beab4 100644
--- a/conf/lua/rspamd.lua
+++ b/conf/lua/rspamd.lua
@@ -16,11 +16,13 @@ local html_link_image = '/<img /iPr'
reconf['HTML_SHORT_LINK_IMG_2'] = string.format('(%s) & (%s)', html_length_1024_1536, html_link_image)
-- Local rules
-local r_bgcolor = '/BGCOLOR=/iM'
-local r_font_color = '/font color=[\\"\']?\\#FFFFFF[\\"\']?/iM'
+local r_bgcolor = '/BGCOLOR=/iP'
+local r_font_color = '/font color=[\\"\']?\\#FFFFFF[\\"\']?/iP'
reconf['R_WHITE_ON_WHITE'] = string.format('(!(%s) & (%s))', r_bgcolor, r_font_color)
reconf['R_FLASH_REDIR_IMGSHACK'] = '/^(?:http:\\/\\/)?img\\d{1,5}\\.imageshack\\.us\\/\\S+\\.swf/U'
local r_rcvd_from_valuehost = 'Received=/\\sb0\\.valuehost\\.ru/H'
local r_cyr_phone = '/8 \\(\\xD799\\)/P'
reconf['R_SPAM_FROM_VALUEHOST'] = string.format('(%s) & (%s)', r_rcvd_from_valuehost, r_cyr_phone)
+-- Different text parts
+reconf['R_PARTS_DIFFER'] = 'compare_parts_distance(70)';
diff --git a/src/cfg_file.h b/src/cfg_file.h
index fa73643f0..b2ca61150 100644
--- a/src/cfg_file.h
+++ b/src/cfg_file.h
@@ -252,6 +252,7 @@ struct config_file {
uint32_t log_buf_size; /**< length of log buffer */
gchar *debug_ip_map; /**< turn on debugging for specified ip addresses */
gboolean log_urls; /**< whether we should log URLs */
+ GList *debug_symbols; /**< symbols to debug */
gsize max_statfile_size; /**< maximum size for statfile */
@@ -266,8 +267,8 @@ struct config_file {
gboolean delivery_enable; /**< is delivery agent is enabled */
gchar *deliver_host; /**< host for mail deliviring */
struct in_addr deliver_addr; /**< its address */
- uint16_t deliver_port; /**< port for deliviring */
- uint16_t deliver_family; /**< socket family for delivirnig */
+ guint16 deliver_port; /**< port for deliviring */
+ guint16 deliver_family; /**< socket family for delivirnig */
gchar *deliver_agent_path; /**< deliver to pipe instead of socket */
gboolean deliver_lmtp; /**< use LMTP instead of SMTP */
@@ -302,8 +303,8 @@ struct config_file {
gchar* dump_checksum; /**< dump checksum of config file */
gpointer lua_state; /**< pointer to lua state */
- guint dns_timeout; /**< timeout in milliseconds for waiting for dns reply */
- guint dns_retransmits; /**< maximum retransmits count */
+ guint32 dns_timeout; /**< timeout in milliseconds for waiting for dns reply */
+ guint32 dns_retransmits; /**< maximum retransmits count */
GList *nameservers; /**< list of nameservers or NULL to parse resolv.conf */
};
diff --git a/src/cfg_xml.c b/src/cfg_xml.c
index 534bfa961..0f9bff97e 100644
--- a/src/cfg_xml.c
+++ b/src/cfg_xml.c
@@ -151,6 +151,18 @@ static struct xml_parser_rule grammar[] = {
G_STRUCT_OFFSET (struct config_file, cache_filename),
NULL
},
+ {
+ "dns_timeout",
+ xml_handle_seconds,
+ G_STRUCT_OFFSET (struct config_file, dns_timeout),
+ NULL
+ },
+ {
+ "dns_retransmits",
+ xml_handle_uint32,
+ G_STRUCT_OFFSET (struct config_file, dns_retransmits),
+ NULL
+ },
NULL_ATTR
},
NULL_ATTR
@@ -186,6 +198,12 @@ static struct xml_parser_rule grammar[] = {
G_STRUCT_OFFSET (struct config_file, debug_ip_map),
NULL
},
+ {
+ "debug_symbols",
+ xml_handle_string_list,
+ G_STRUCT_OFFSET (struct config_file, debug_symbols),
+ NULL
+ },
NULL_ATTR
},
NULL_ATTR
@@ -1144,6 +1162,29 @@ xml_handle_string (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHa
return TRUE;
}
+gboolean
+xml_handle_string_list (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset)
+{
+ GList **dest;
+ gchar **tokens, **cur;
+
+ dest = (GList **)G_STRUCT_MEMBER_P (dest_struct, offset);
+ *dest = NULL;
+
+ tokens = g_strsplit (data, ";,", 0);
+ if (!tokens || !tokens[0]) {
+ return FALSE;
+ }
+ memory_pool_add_destructor (cfg->cfg_pool, (pool_destruct_func)g_strfreev, tokens);
+ cur = tokens;
+ while (*cur) {
+ *dest = g_list_prepend (*dest, *cur);
+ cur ++;
+ }
+
+ return TRUE;
+}
+
gboolean
xml_handle_size (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset)
@@ -1159,9 +1200,9 @@ xml_handle_size (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHash
gboolean
xml_handle_seconds (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset)
{
- time_t *dest;
+ guint32 *dest;
- dest = (time_t *)G_STRUCT_MEMBER_P (dest_struct, offset);
+ dest = (guint32 *)G_STRUCT_MEMBER_P (dest_struct, offset);
*dest = parse_seconds (data);
return TRUE;
diff --git a/src/cfg_xml.h b/src/cfg_xml.h
index c91bb1779..a72eb46f4 100644
--- a/src/cfg_xml.h
+++ b/src/cfg_xml.h
@@ -74,6 +74,7 @@ void rspamd_xml_error (GMarkupParseContext *context,
/* Handlers */
/* Basic xml parsing functions */
gboolean xml_handle_string (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset);
+gboolean xml_handle_string_list (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset);
/* Numeric params */
gboolean xml_handle_size (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset);
diff --git a/src/expressions.c b/src/expressions.c
index 447bdcbc2..c3ece6808 100644
--- a/src/expressions.c
+++ b/src/expressions.c
@@ -929,6 +929,9 @@ rspamd_parts_distance (struct worker_task * task, GList * args, void *unused)
struct mime_text_part *p1, *p2;
GList *cur;
struct expression_argument *arg;
+ GMimeObject *parent;
+ const GMimeContentType *ct;
+
if (args == NULL) {
debug_task ("no threshold is specified, assume it 100");
@@ -953,6 +956,19 @@ rspamd_parts_distance (struct worker_task * task, GList * args, void *unused)
return FALSE;
}
p2 = cur->data;
+ /* First of all check parent object */
+ if (p1->parent && p1->parent == p2->parent) {
+ parent = p1->parent;
+ ct = g_mime_object_get_content_type (parent);
+ if (ct == NULL || ! g_mime_content_type_is_type (ct, "multipart", "alternative")) {
+ debug_task ("two parts are not belong to multipart/alternative container, skip check");
+ return FALSE;
+ }
+ }
+ else {
+ debug_task ("message contains two parts but they are in different multi-parts");
+ return FALSE;
+ }
if (!p1->is_empty && !p2->is_empty) {
diff = fuzzy_compare_hashes (p1->fuzzy, p2->fuzzy);
debug_task ("got likeliness between parts of %d%%, threshold is %d%%", diff, threshold);
diff --git a/src/filter.c b/src/filter.c
index d91ef9a25..83868e9b9 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -110,7 +110,7 @@ insert_metric_result (struct worker_task *task, struct metric *metric, const cha
g_hash_table_insert (metric_res->symbols, (gpointer) symbol, s);
}
- debug_task ("got %.2f score for metric %s, factor: %f", s->score, metric->name, w);
+ debug_task ("symbol %s, score %.2f, metric %s, factor: %f", symbol, s->score, metric->name, w);
}
diff --git a/src/logger.c b/src/logger.c
index f17835764..9bd3ce3f0 100644
--- a/src/logger.c
+++ b/src/logger.c
@@ -48,6 +48,9 @@ typedef struct rspamd_logger_s {
int fd;
gboolean is_buffered;
gboolean enabled;
+ gboolean is_debug;
+ gboolean throttling;
+ time_t throttling_time;
enum rspamd_log_type type;
pid_t pid;
enum process_type process_type;
@@ -118,16 +121,17 @@ direct_write_log_line (void *data, int count, gboolean is_iov)
}
else if (errno == EFAULT || errno == EINVAL || errno == EFBIG || errno == ENOSPC) {
/* Rare case */
- if (write (rspamd_log->fd, errmsg, r) == -1) {
- /* Don't know what to do */
- exit (EXIT_FAILURE);
- }
+ rspamd_log->throttling = TRUE;
+ rspamd_log->throttling_time = time (NULL);
}
else if (errno == EPIPE) {
/* We write to some pipe and it disappears, disable logging */
rspamd_log->enabled = FALSE;
}
}
+ else if (rspamd_log->throttling) {
+ rspamd_log->throttling = FALSE;
+ }
}
}
@@ -421,6 +425,7 @@ file_log_function (const gchar * log_domain, const gchar *function, GLogLevelFla
uint32_t cksum;
size_t mlen;
const char *cptype = NULL;
+ gboolean got_time = FALSE;
if (! rspamd_log->enabled) {
return;
@@ -428,6 +433,18 @@ file_log_function (const gchar * log_domain, const gchar *function, GLogLevelFla
if (forced || log_level <= rspamd_log->cfg->log_level) {
+ /* Check throttling due to write errors */
+ if (rspamd_log->throttling) {
+ now = time (NULL);
+ if (rspamd_log->throttling_time != now) {
+ rspamd_log->throttling_time = now;
+ got_time = TRUE;
+ }
+ else {
+ /* Do not try to write to file too often while throtling */
+ return;
+ }
+ }
/* Check repeats */
mlen = strlen (message);
cksum = rspamd_log_calculate_cksum (message, mlen);
@@ -476,7 +493,10 @@ file_log_function (const gchar * log_domain, const gchar *function, GLogLevelFla
}
}
- now = time (NULL);
+ if (! got_time) {
+ now = time (NULL);
+ }
+
tms = localtime (&now);
strftime (timebuf, sizeof (timebuf), "%F %H:%M:%S", tms);
@@ -527,7 +547,7 @@ rspamd_conditional_debug (uint32_t addr, const char *function, const char *fmt,
va_list vp;
u_char *end;
- if (rspamd_log->cfg->log_level >= G_LOG_LEVEL_DEBUG ||
+ if (rspamd_log->cfg->log_level >= G_LOG_LEVEL_DEBUG || rspamd_log->is_debug ||
(rspamd_log->debug_ip != NULL && radix32tree_find (rspamd_log->debug_ip, ntohl (addr)) != RADIX_NO_VALUE)) {
va_start (vp, fmt);
@@ -545,3 +565,15 @@ rspamd_glib_log_function (const gchar *log_domain, GLogLevelFlags log_level, con
rspamd_log->log_func (log_domain, NULL, log_level, message, FALSE, rspamd_log->cfg);
}
}
+
+void
+rspamd_log_debug ()
+{
+ rspamd_log->is_debug = TRUE;
+}
+
+void
+rspamd_log_nodebug ()
+{
+ rspamd_log->is_debug = FALSE;
+}
diff --git a/src/logger.h b/src/logger.h
index fe6e0bda5..9b419c8a6 100644
--- a/src/logger.h
+++ b/src/logger.h
@@ -62,6 +62,15 @@ void rspamd_common_log_function (GLogLevelFlags log_level, const char *function,
*/
void rspamd_conditional_debug (uint32_t addr, const char *function, const char *fmt, ...) ;
+/**
+ * Temporary turn on debug
+ */
+void rspamd_log_debug ();
+
+/**
+ * Turn off debug
+ */
+void rspamd_log_nodebug ();
/* Typical functions */
diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c
index aec72006c..2691a2abe 100644
--- a/src/lua/lua_common.c
+++ b/src/lua/lua_common.c
@@ -209,6 +209,7 @@ void
init_lua_filters (struct config_file *cfg)
{
struct config_file **pcfg;
+ gpointer *plogger;
GList *cur, *tmp;
struct script_module *module;
struct statfile *st;
@@ -224,11 +225,17 @@ init_lua_filters (struct config_file *cfg)
continue;
}
- /* Call module init function */
+ /* Initialize config structure */
pcfg = lua_newuserdata (L, sizeof (struct config_file *));
lua_setclass (L, "rspamd{config}", -1);
*pcfg = cfg;
lua_setglobal (L, "rspamd_config");
+ /* Initialize logger */
+ plogger = lua_newuserdata (L, sizeof (gpointer));
+ lua_setclass (L, "rspamd{logger}", -1);
+ *plogger = NULL;
+ lua_setglobal (L, "rspamd_log");
+
/* do the call (1 arguments, 1 result) */
if (lua_pcall (L, 0, LUA_MULTRET, 0) != 0) {
msg_info ("init of %s failed: %s", module->path, lua_tostring (L, -1));
diff --git a/src/main.h b/src/main.h
index b729e87de..e35a7eea3 100644
--- a/src/main.h
+++ b/src/main.h
@@ -195,6 +195,7 @@ struct worker_task {
struct rspamd_async_session* s; /**< async session object */
int parts_count; /**< mime parts count */
GMimeMessage *message; /**< message, parsed with GMime */
+ GMimeObject *parser_parent_part; /**< current parent part */
InternetAddressList *rcpts; /**< list of all recipients */
GList *parts; /**< list of parsed parts */
GList *text_parts; /**< list of text parts */
diff --git a/src/message.c b/src/message.c
index f3be3032a..2c53deca1 100644
--- a/src/message.c
+++ b/src/message.c
@@ -525,9 +525,26 @@ convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeC
}
static void
-process_text_part (struct worker_task *task, GByteArray * part_content, GMimeContentType * type, gboolean is_empty)
+process_text_part (struct worker_task *task, GByteArray *part_content, GMimeContentType *type,
+ GMimeObject *part, GMimeObject *parent, gboolean is_empty)
{
struct mime_text_part *text_part;
+ const char *cd;
+
+ /* Skip attachements */
+#ifndef GMIME24
+ cd = g_mime_part_get_content_disposition (GMIME_PART (part));
+ if (cd && g_ascii_strcasecmp (cd, "attachment") == 0) {
+ debug_task ("skip attachments for checking as text parts");
+ return;
+ }
+#else
+ cd = g_mime_object_get_disposition (GMIME_OBJECT (part));
+ if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0) {
+ debug_task ("skip attachments for checking as text parts");
+ return;
+ }
+#endif
if (g_mime_content_type_is_type (type, "text", "html") || g_mime_content_type_is_type (type, "text", "xhtml")) {
debug_task ("got urls from text/html part");
@@ -544,6 +561,7 @@ process_text_part (struct worker_task *task, GByteArray * part_content, GMimeCon
text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
text_part->is_balanced = TRUE;
text_part->html_nodes = NULL;
+ text_part->parent = parent;
text_part->html_urls = g_tree_new ((GCompareFunc) g_ascii_strcasecmp);
text_part->urls = g_tree_new ((GCompareFunc) g_ascii_strcasecmp);
@@ -572,6 +590,7 @@ process_text_part (struct worker_task *task, GByteArray * part_content, GMimeCon
text_part = memory_pool_alloc0 (task->task_pool, sizeof (struct mime_text_part));
text_part->is_html = FALSE;
+ text_part->parent = parent;
if (is_empty) {
text_part->is_empty = TRUE;
text_part->orig = NULL;
@@ -645,6 +664,7 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data)
}
else if (GMIME_IS_MULTIPART (part)) {
/* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */
+ task->parser_parent_part = part;
#ifndef GMIME24
debug_task ("detected multipart part");
/* we'll get to finding out if this is a signed/encrypted multipart later... */
@@ -656,7 +676,6 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data)
return;
}
#endif
- /* XXX: do nothing with multiparts in gmime 2.4 */
}
else if (GMIME_IS_PART (part)) {
/* a normal leaf part, could be text/plain or image/jpeg etc */
@@ -688,10 +707,11 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data)
mime_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_part));
mime_part->type = type;
mime_part->content = part_content;
+ mime_part->parent = task->parser_parent_part;
debug_task ("found part with content-type: %s/%s", type->type, type->subtype);
task->parts = g_list_prepend (task->parts, mime_part);
/* Skip empty parts */
- process_text_part (task, part_content, type, (part_content->len <= 0));
+ process_text_part (task, part_content, type, part, task->parser_parent_part, (part_content->len <= 0));
}
else {
msg_warn ("write to stream failed: %d, %s", errno, strerror (errno));
@@ -762,6 +782,12 @@ process_message (struct worker_task *task)
#ifdef GMIME24
g_mime_message_foreach (message, mime_foreach_callback, task);
#else
+ /*
+ * This is rather strange, but gmime 2.2 do NOT pass top-level part to foreach callback
+ * so we need to set up parent part by hands
+ */
+ task->parser_parent_part = g_mime_message_get_mime_part (message);
+ g_object_unref (task->parser_parent_part);
g_mime_message_foreach_part (message, mime_foreach_callback, task);
#endif
diff --git a/src/message.h b/src/message.h
index abebe4862..db42f2094 100644
--- a/src/message.h
+++ b/src/message.h
@@ -15,6 +15,7 @@ struct controller_session;
struct mime_part {
GMimeContentType *type;
GByteArray *content;
+ GMimeObject *parent;
};
struct mime_text_part {
@@ -30,6 +31,7 @@ struct mime_text_part {
GTree *urls;
GTree *html_urls;
fuzzy_hash_t *fuzzy;
+ GMimeObject *parent;
};
struct received_header {
diff --git a/src/symbols_cache.c b/src/symbols_cache.c
index f8257a716..ee931458e 100644
--- a/src/symbols_cache.c
+++ b/src/symbols_cache.c
@@ -582,6 +582,22 @@ check_negative_dynamic_item (struct worker_task *task, struct symbols_cache *cac
return FALSE;
}
+static gboolean
+check_debug_symbol (struct config_file *cfg, const char *symbol)
+{
+ GList *cur;
+
+ cur = cfg->debug_symbols;
+ while (cur) {
+ if (strcmp (symbol, (const char *)cur->data) == 0) {
+ return TRUE;
+ }
+ cur = g_list_next (cur);
+ }
+
+ return FALSE;
+}
+
struct symbol_callback_data {
enum {
CACHE_STATE_NEGATIVE,
@@ -770,7 +786,15 @@ call_symbol_callback (struct worker_task * task, struct symbols_cache * cache, g
msg_warn ("gettimeofday failed: %s", strerror (errno));
}
#endif
- item->func (task, item->user_data);
+ if (G_UNLIKELY (check_debug_symbol (task->cfg, item->s->symbol))) {
+ rspamd_log_debug ();
+ item->func (task, item->user_data);
+ rspamd_log_nodebug ();
+ }
+ else {
+ item->func (task, item->user_data);
+ }
+
#ifdef HAVE_CLOCK_GETTIME
# ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID