]> source.dussan.org Git - rspamd.git/commitdiff
* Add rspamd_log variable to lua plugins to access logging functions
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Sun, 25 Jul 2010 12:58:11 +0000 (16:58 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Sun, 25 Jul 2010 12:58:11 +0000 (16:58 +0400)
* Each part in rspamd task now can have parent part
* Check for parts distance only for multipart/alternative subparts
* Do not check attachements even if they are text (but attached as file)
* Do not die if write (2) returned ENOSPACE while doing logging, turn on throttling mode instead (1 write try in a second)
* Add ability to turn on debug for specific symbols
* Add ability to configure dns timeouts and dns retransmits in config file

13 files changed:
conf/lua/rspamd.lua
src/cfg_file.h
src/cfg_xml.c
src/cfg_xml.h
src/expressions.c
src/filter.c
src/logger.c
src/logger.h
src/lua/lua_common.c
src/main.h
src/message.c
src/message.h
src/symbols_cache.c

index f546c6189f18ee75d4d88d4f5808b1885a6e73b6..bd04beab44eb5df5e10a05fdaa695be6b797ec04 100644 (file)
@@ -16,11 +16,13 @@ local html_link_image = '/<img /iPr'
 reconf['HTML_SHORT_LINK_IMG_2'] = string.format('(%s) & (%s)', html_length_1024_1536, html_link_image)
 
 -- Local rules
-local r_bgcolor = '/BGCOLOR=/iM'
-local r_font_color = '/font color=[\\"\']?\\#FFFFFF[\\"\']?/iM'
+local r_bgcolor = '/BGCOLOR=/iP'
+local r_font_color = '/font color=[\\"\']?\\#FFFFFF[\\"\']?/iP'
 reconf['R_WHITE_ON_WHITE'] = string.format('(!(%s) & (%s))', r_bgcolor, r_font_color)
 reconf['R_FLASH_REDIR_IMGSHACK'] = '/^(?:http:\\/\\/)?img\\d{1,5}\\.imageshack\\.us\\/\\S+\\.swf/U'
 local r_rcvd_from_valuehost = 'Received=/\\sb0\\.valuehost\\.ru/H'
 local r_cyr_phone = '/8 \\(\\xD799\\)/P'
 reconf['R_SPAM_FROM_VALUEHOST'] = string.format('(%s) & (%s)', r_rcvd_from_valuehost, r_cyr_phone)
 
+-- Different text parts
+reconf['R_PARTS_DIFFER'] = 'compare_parts_distance(70)';
index fa73643f049ebaeb0f98c195dc5210b30547ce32..b2ca611500d1efca41e4e9bd11506fb8f5fd9a0d 100644 (file)
@@ -252,6 +252,7 @@ struct config_file {
        uint32_t log_buf_size;                                                  /**< length of log buffer                                                               */
        gchar *debug_ip_map;                                                /**< turn on debugging for specified ip addresses       */
        gboolean log_urls;                                                              /**< whether we should log URLs                         */
+       GList *debug_symbols;                                                   /**< symbols to debug                                                                   */
 
        gsize max_statfile_size;                                                /**< maximum size for statfile                                                  */
 
@@ -266,8 +267,8 @@ struct config_file {
        gboolean delivery_enable;                                               /**< is delivery agent is enabled                                               */
        gchar *deliver_host;                                                    /**< host for mail deliviring                                                   */
        struct in_addr deliver_addr;                                    /**< its address                                                                                */
-       uint16_t deliver_port;                                                  /**< port for deliviring                                                                */
-       uint16_t deliver_family;                                                /**< socket family for delivirnig                                               */
+       guint16 deliver_port;                                                   /**< port for deliviring                                                                */
+       guint16 deliver_family;                                                 /**< socket family for delivirnig                                               */
        gchar *deliver_agent_path;                                              /**< deliver to pipe instead of socket                                  */
        gboolean deliver_lmtp;                                                  /**< use LMTP instead of SMTP                                                   */
 
@@ -302,8 +303,8 @@ struct config_file {
        gchar* dump_checksum;                                                   /**< dump checksum of config file                                               */ 
        gpointer lua_state;                                                             /**< pointer to lua state                                                               */
 
-       guint dns_timeout;                                                              /**< timeout in milliseconds for waiting for dns reply  */
-       guint dns_retransmits;                                                  /**< maximum retransmits count                                                  */
+       guint32 dns_timeout;                                                    /**< timeout in milliseconds for waiting for dns reply  */
+       guint32 dns_retransmits;                                                /**< maximum retransmits count                                                  */
        GList *nameservers;                                                             /**< list of nameservers or NULL to parse resolv.conf   */
 };
 
index 534bfa961c2be1d56602b83736efda2922bd48c8..0f9bff97ed3d9e8bace3e5a7ca367be7ea0a28ef 100644 (file)
@@ -151,6 +151,18 @@ static struct xml_parser_rule grammar[] = {
                                G_STRUCT_OFFSET (struct config_file, cache_filename),
                                NULL
                        },
+                       {
+                               "dns_timeout",
+                               xml_handle_seconds,
+                               G_STRUCT_OFFSET (struct config_file, dns_timeout),
+                               NULL
+                       },
+                       {
+                               "dns_retransmits",
+                               xml_handle_uint32,
+                               G_STRUCT_OFFSET (struct config_file, dns_retransmits),
+                               NULL
+                       },
                        NULL_ATTR
                },
                NULL_ATTR
@@ -186,6 +198,12 @@ static struct xml_parser_rule grammar[] = {
                                G_STRUCT_OFFSET (struct config_file, debug_ip_map),
                                NULL
                        },
+                       {
+                               "debug_symbols",
+                               xml_handle_string_list,
+                               G_STRUCT_OFFSET (struct config_file, debug_symbols),
+                               NULL
+                       },
                        NULL_ATTR
                },
                NULL_ATTR
@@ -1144,6 +1162,29 @@ xml_handle_string (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHa
        return TRUE;
 }
 
+gboolean
+xml_handle_string_list (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset)
+{
+       GList                      **dest;
+       gchar                      **tokens, **cur;
+
+       dest = (GList **)G_STRUCT_MEMBER_P (dest_struct, offset);
+       *dest = NULL;
+
+       tokens = g_strsplit (data, ";,", 0);
+       if (!tokens || !tokens[0]) {
+               return FALSE;
+       }
+       memory_pool_add_destructor (cfg->cfg_pool, (pool_destruct_func)g_strfreev, tokens);
+       cur = tokens;
+       while (*cur) {
+               *dest = g_list_prepend (*dest, *cur);
+               cur ++;
+       }
+
+       return TRUE;
+}
+
 
 gboolean 
 xml_handle_size (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset)
@@ -1159,9 +1200,9 @@ xml_handle_size (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHash
 gboolean 
 xml_handle_seconds (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset)
 {
-       time_t                      *dest;
+       guint32                      *dest;
 
-       dest = (time_t *)G_STRUCT_MEMBER_P (dest_struct, offset);
+       dest = (guint32 *)G_STRUCT_MEMBER_P (dest_struct, offset);
        *dest = parse_seconds (data);
        
        return TRUE;
index c91bb17792a9f192bca21496417a2cd420467019..a72eb46f4f168a49ad8b5eb679a0312611022855 100644 (file)
@@ -74,6 +74,7 @@ void rspamd_xml_error (GMarkupParseContext            *context,
 /* Handlers */
 /* Basic xml parsing functions */
 gboolean xml_handle_string (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset);
+gboolean xml_handle_string_list (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset);
 
 /* Numeric params */
 gboolean xml_handle_size (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable *attrs, gchar *data, gpointer user_data, gpointer dest_struct, int offset);
index 447bdcbc22e48752c9fdc03ea2157ebe2da442d0..c3ece6808009c9a6c1068b38415b668eb883b784 100644 (file)
@@ -929,6 +929,9 @@ rspamd_parts_distance (struct worker_task * task, GList * args, void *unused)
        struct mime_text_part          *p1, *p2;
        GList                          *cur;
        struct expression_argument     *arg;
+       GMimeObject                    *parent;
+       const GMimeContentType         *ct;
+
 
        if (args == NULL) {
                debug_task ("no threshold is specified, assume it 100");
@@ -953,6 +956,19 @@ rspamd_parts_distance (struct worker_task * task, GList * args, void *unused)
                        return FALSE;
                }
                p2 = cur->data;
+               /* First of all check parent object */
+               if (p1->parent && p1->parent == p2->parent) {
+                       parent = p1->parent;
+                       ct = g_mime_object_get_content_type (parent);
+                       if (ct == NULL || ! g_mime_content_type_is_type (ct, "multipart", "alternative")) {
+                               debug_task ("two parts are not belong to multipart/alternative container, skip check");
+                               return FALSE;
+                       }
+               }
+               else {
+                       debug_task ("message contains two parts but they are in different multi-parts");
+                       return FALSE;
+               }
                if (!p1->is_empty && !p2->is_empty) {
                        diff = fuzzy_compare_hashes (p1->fuzzy, p2->fuzzy);
                        debug_task ("got likeliness between parts of %d%%, threshold is %d%%", diff, threshold);
index d91ef9a254fd09cdad9e8963ebed6881c8fc44e9..83868e9b9a57a45dc739d81970903ea05ef8c880 100644 (file)
@@ -110,7 +110,7 @@ insert_metric_result (struct worker_task *task, struct metric *metric, const cha
 
                g_hash_table_insert (metric_res->symbols, (gpointer) symbol, s);
        }
-       debug_task ("got %.2f score for metric %s, factor: %f", s->score, metric->name, w);
+       debug_task ("symbol %s, score %.2f, metric %s, factor: %f", symbol, s->score, metric->name, w);
        
 }
 
index f17835764e198d3ba2aeff1e6e6c15129c9dd617..9bd3ce3f0a127a07bad05eb764b242583930176d 100644 (file)
@@ -48,6 +48,9 @@ typedef struct rspamd_logger_s {
        int                      fd;
        gboolean                 is_buffered;
        gboolean                 enabled;
+       gboolean                 is_debug;
+       gboolean                 throttling;
+       time_t                   throttling_time;
        enum rspamd_log_type     type;
        pid_t                    pid;
        enum process_type                process_type;
@@ -118,16 +121,17 @@ direct_write_log_line (void *data, int count, gboolean is_iov)
                        }
                        else if (errno == EFAULT || errno == EINVAL || errno == EFBIG || errno == ENOSPC) {
                                /* Rare case */
-                               if (write (rspamd_log->fd, errmsg, r) == -1) {
-                                       /* Don't know what to do */
-                                       exit (EXIT_FAILURE);
-                               }
+                               rspamd_log->throttling = TRUE;
+                               rspamd_log->throttling_time = time (NULL);
                        }
                        else if (errno == EPIPE) {
                                /* We write to some pipe and it disappears, disable logging */
                                rspamd_log->enabled = FALSE;
                        }
                }
+               else if (rspamd_log->throttling) {
+                       rspamd_log->throttling = FALSE;
+               }
        }
 }
 
@@ -421,6 +425,7 @@ file_log_function (const gchar * log_domain, const gchar *function, GLogLevelFla
        uint32_t                        cksum;
        size_t                          mlen;
        const char                     *cptype = NULL;
+       gboolean                        got_time = FALSE;
 
        if (! rspamd_log->enabled) {
                return;
@@ -428,6 +433,18 @@ file_log_function (const gchar * log_domain, const gchar *function, GLogLevelFla
 
 
        if (forced || log_level <= rspamd_log->cfg->log_level) {
+               /* Check throttling due to write errors */
+               if (rspamd_log->throttling) {
+                       now = time (NULL);
+                       if (rspamd_log->throttling_time != now) {
+                               rspamd_log->throttling_time = now;
+                               got_time = TRUE;
+                       }
+                       else {
+                               /* Do not try to write to file too often while throtling */
+                               return;
+                       }
+               }
                /* Check repeats */
                mlen = strlen (message);
                cksum = rspamd_log_calculate_cksum (message, mlen);
@@ -476,7 +493,10 @@ file_log_function (const gchar * log_domain, const gchar *function, GLogLevelFla
                        }
                }
 
-               now = time (NULL);
+               if (! got_time) {
+                       now = time (NULL);
+               }
+
                tms = localtime (&now);
 
                strftime (timebuf, sizeof (timebuf), "%F %H:%M:%S", tms);
@@ -527,7 +547,7 @@ rspamd_conditional_debug (uint32_t addr, const char *function, const char *fmt,
        va_list                         vp;
     u_char                         *end;
 
-       if (rspamd_log->cfg->log_level >= G_LOG_LEVEL_DEBUG || 
+       if (rspamd_log->cfg->log_level >= G_LOG_LEVEL_DEBUG || rspamd_log->is_debug ||
                        (rspamd_log->debug_ip != NULL && radix32tree_find (rspamd_log->debug_ip, ntohl (addr)) != RADIX_NO_VALUE)) {
 
                va_start (vp, fmt);
@@ -545,3 +565,15 @@ rspamd_glib_log_function (const gchar *log_domain, GLogLevelFlags log_level, con
                rspamd_log->log_func (log_domain, NULL, log_level, message, FALSE, rspamd_log->cfg);
        }
 }
+
+void
+rspamd_log_debug ()
+{
+       rspamd_log->is_debug = TRUE;
+}
+
+void
+rspamd_log_nodebug ()
+{
+       rspamd_log->is_debug = FALSE;
+}
index fe6e0bda53e07e4c75b5605f5512e0883f720ea2..9b419c8a698d538f85686b8ec98a9311557828ba 100644 (file)
@@ -62,6 +62,15 @@ void rspamd_common_log_function (GLogLevelFlags log_level, const char *function,
  */
 void rspamd_conditional_debug (uint32_t addr, const char *function, const char *fmt, ...) ;
 
+/**
+ * Temporary turn on debug
+ */
+void rspamd_log_debug ();
+
+/**
+ * Turn off debug
+ */
+void rspamd_log_nodebug ();
 
 /* Typical functions */
 
index aec72006cdae51603f2cfd18c2d8ad91d559bb5d..2691a2abe8fb9b9bbfc7d098c3f37091ed125813 100644 (file)
@@ -209,6 +209,7 @@ void
 init_lua_filters (struct config_file *cfg)
 {
        struct config_file            **pcfg;
+       gpointer                       *plogger;
        GList                          *cur, *tmp;
        struct script_module           *module;
     struct statfile                *st;
@@ -224,11 +225,17 @@ init_lua_filters (struct config_file *cfg)
                                continue;
                        }
 
-                       /* Call module init function */
+                       /* Initialize config structure */
                        pcfg = lua_newuserdata (L, sizeof (struct config_file *));
                        lua_setclass (L, "rspamd{config}", -1);
                        *pcfg = cfg;
                        lua_setglobal (L, "rspamd_config");
+                       /* Initialize logger */
+                       plogger = lua_newuserdata (L, sizeof (gpointer));
+                       lua_setclass (L, "rspamd{logger}", -1);
+                       *plogger = NULL;
+                       lua_setglobal (L, "rspamd_log");
+
                        /* do the call (1 arguments, 1 result) */
                        if (lua_pcall (L, 0, LUA_MULTRET, 0) != 0) {
                                msg_info ("init of %s failed: %s", module->path, lua_tostring (L, -1));
index b729e87de2dca67c67ddd4db5735fbe43f3c293b..e35a7eea30e584db7db12551b7615536d900410a 100644 (file)
@@ -195,6 +195,7 @@ struct worker_task {
     struct rspamd_async_session* s;                                                            /**< async session object                                                       */
        int parts_count;                                                                                        /**< mime parts count                                                           */
        GMimeMessage *message;                                                                          /**< message, parsed with GMime                                         */
+       GMimeObject *parser_parent_part;                                                        /**< current parent part                                                        */
        InternetAddressList *rcpts;                                                                     /**< list of all recipients                                             */
        GList *parts;                                                                                           /**< list of parsed parts                                                       */
        GList *text_parts;                                                                                      /**< list of text parts                                                         */
index f3be3032af88afc17f329f0ae66b0451b34bb41b..2c53deca126f5d535c70377a539d53f96c06659c 100644 (file)
@@ -525,9 +525,26 @@ convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeC
 }
 
 static void
-process_text_part (struct worker_task *task, GByteArray * part_content, GMimeContentType * type, gboolean is_empty)
+process_text_part (struct worker_task *task, GByteArray *part_content, GMimeContentType *type,
+               GMimeObject *part, GMimeObject *parent, gboolean is_empty)
 {
        struct mime_text_part          *text_part;
+       const char                     *cd;
+
+       /* Skip attachements */
+#ifndef GMIME24
+       cd = g_mime_part_get_content_disposition (GMIME_PART (part));
+       if (cd && g_ascii_strcasecmp (cd, "attachment") == 0) {
+               debug_task ("skip attachments for checking as text parts");
+               return;
+       }
+#else
+       cd = g_mime_object_get_disposition (GMIME_OBJECT (part));
+       if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0) {
+               debug_task ("skip attachments for checking as text parts");
+               return;
+       }
+#endif
 
        if (g_mime_content_type_is_type (type, "text", "html") || g_mime_content_type_is_type (type, "text", "xhtml")) {
                debug_task ("got urls from text/html part");
@@ -544,6 +561,7 @@ process_text_part (struct worker_task *task, GByteArray * part_content, GMimeCon
                text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
                text_part->is_balanced = TRUE;
                text_part->html_nodes = NULL;
+               text_part->parent = parent;
 
                text_part->html_urls = g_tree_new ((GCompareFunc) g_ascii_strcasecmp);
                text_part->urls = g_tree_new ((GCompareFunc) g_ascii_strcasecmp);
@@ -572,6 +590,7 @@ process_text_part (struct worker_task *task, GByteArray * part_content, GMimeCon
 
                text_part = memory_pool_alloc0 (task->task_pool, sizeof (struct mime_text_part));
                text_part->is_html = FALSE;
+               text_part->parent = parent;
                if (is_empty) {
                        text_part->is_empty = TRUE;
                        text_part->orig = NULL;
@@ -645,6 +664,7 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data)
        }
        else if (GMIME_IS_MULTIPART (part)) {
                /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */
+               task->parser_parent_part = part;
 #ifndef GMIME24        
                debug_task ("detected multipart part");
                /* we'll get to finding out if this is a signed/encrypted multipart later... */
@@ -656,7 +676,6 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data)
                        return;
                }
 #endif
-               /* XXX: do nothing with multiparts in gmime 2.4 */
        }
        else if (GMIME_IS_PART (part)) {
                /* a normal leaf part, could be text/plain or image/jpeg etc */
@@ -688,10 +707,11 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data)
                                mime_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_part));
                                mime_part->type = type;
                                mime_part->content = part_content;
+                               mime_part->parent = task->parser_parent_part;
                                debug_task ("found part with content-type: %s/%s", type->type, type->subtype);
                                task->parts = g_list_prepend (task->parts, mime_part);
                                /* Skip empty parts */
-                               process_text_part (task, part_content, type, (part_content->len <= 0));
+                               process_text_part (task, part_content, type, part, task->parser_parent_part, (part_content->len <= 0));
                        }
                        else {
                                msg_warn ("write to stream failed: %d, %s", errno, strerror (errno));
@@ -762,6 +782,12 @@ process_message (struct worker_task *task)
 #ifdef GMIME24
                g_mime_message_foreach (message, mime_foreach_callback, task);
 #else
+               /*
+                * This is rather strange, but gmime 2.2 do NOT pass top-level part to foreach callback
+                * so we need to set up parent part by hands
+                */
+               task->parser_parent_part = g_mime_message_get_mime_part (message);
+               g_object_unref (task->parser_parent_part);
                g_mime_message_foreach_part (message, mime_foreach_callback, task);
 #endif
 
index abebe4862cbd8b772086629ff42f8587141ac695..db42f2094a4fc0a290bde9b70d48341ff2024d71 100644 (file)
@@ -15,6 +15,7 @@ struct controller_session;
 struct mime_part {
        GMimeContentType *type;
        GByteArray *content;
+       GMimeObject *parent;
 };
 
 struct mime_text_part {
@@ -30,6 +31,7 @@ struct mime_text_part {
        GTree *urls;
        GTree *html_urls;
        fuzzy_hash_t *fuzzy;
+       GMimeObject *parent;
 };
 
 struct received_header {
index f8257a7168aeca6d3ff511d853ac398334b86ef0..ee931458e8e6aa7db413934bfb928eed3986af1c 100644 (file)
@@ -582,6 +582,22 @@ check_negative_dynamic_item (struct worker_task *task, struct symbols_cache *cac
        return FALSE;
 }
 
+static gboolean
+check_debug_symbol (struct config_file *cfg, const char *symbol)
+{
+       GList                         *cur;
+
+       cur = cfg->debug_symbols;
+       while (cur) {
+               if (strcmp (symbol, (const char *)cur->data) == 0) {
+                       return TRUE;
+               }
+               cur = g_list_next (cur);
+       }
+
+       return FALSE;
+}
+
 struct symbol_callback_data {
        enum {
                CACHE_STATE_NEGATIVE,
@@ -770,7 +786,15 @@ call_symbol_callback (struct worker_task * task, struct symbols_cache * cache, g
                        msg_warn ("gettimeofday failed: %s", strerror (errno));
                }
 #endif
-               item->func (task, item->user_data);
+               if (G_UNLIKELY (check_debug_symbol (task->cfg, item->s->symbol))) {
+                       rspamd_log_debug ();
+                       item->func (task, item->user_data);
+                       rspamd_log_nodebug ();
+               }
+               else {
+                       item->func (task, item->user_data);
+               }
+
 
 #ifdef HAVE_CLOCK_GETTIME
 # ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID