]> source.dussan.org Git - rspamd.git/commitdiff
* New module for checking emails inside messages (rules based, like multimap)
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Thu, 3 Feb 2011 17:29:27 +0000 (20:29 +0300)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Thu, 3 Feb 2011 17:29:27 +0000 (20:29 +0300)
* Emails now are separated from urls and urls checks
* Add ability to check text attachements if option is presented in a configuration

Version is 0.3.6 now

14 files changed:
CMakeLists.txt
lib/librspamdclient.c
src/cfg_file.h
src/cfg_xml.c
src/lua/lua_common.h
src/lua/lua_task.c
src/main.h
src/message.c
src/plugins/lua/emails.lua [new file with mode: 0644]
src/plugins/lua/multimap.lua
src/plugins/lua/trie.lua
src/url.c
src/url.h
src/worker.c

index d6c3ac5761810430208b628a71b3e4844276a56f..79cf3b7825cdb1a3d9de4b515d69383a5758a942 100644 (file)
@@ -7,7 +7,7 @@ PROJECT(rspamd C)
 
 SET(RSPAMD_VERSION_MAJOR 0)
 SET(RSPAMD_VERSION_MINOR 3)
-SET(RSPAMD_VERSION_PATCH 5)
+SET(RSPAMD_VERSION_PATCH 6)
 
 
 SET(RSPAMD_VERSION         "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}")
index 53c3dcc0f3cb85d21de702b8561d876e0b54ea6d..2bc45c8887a72334fc58903280393eeda546fb38 100644 (file)
@@ -606,9 +606,9 @@ parse_rspamd_header_line (struct rspamd_connection *conn, guint len, GError **er
                                }
                                else {
                                        /* Create header value */
-                                       hvalue = g_malloc (p - c + 1);
-                                       hvalue[p - c] = '\0';
-                                       memcpy (hvalue, c, p - c);
+                                       hvalue = g_malloc (p - c + 2);
+                                       hvalue[p - c + 1] = '\0';
+                                       memcpy (hvalue, c, p - c + 1);
                                        g_hash_table_replace (conn->result->headers, hname, hvalue);
                                        state = 99;
                                }
index 81cfe65cc3f0997d12e4ba9b5e311fb623d0bd00..e33b1585a767b6d5492063f2bc861d141222bad9 100644 (file)
@@ -252,6 +252,7 @@ struct config_file {
        gboolean no_fork;                                                               /**< if 1 do not call daemon()                                                  */
        gboolean config_test;                                                   /**< if TRUE do only config file test                                   */
        gboolean raw_mode;                                                              /**< work in raw mode instead of utf one                                */
+       gboolean check_text_attachements;                               /**< check text attachements as text                                    */
        gboolean convert_config;                                                /**< convert config to XML format                                               */
 
        enum rspamd_log_type log_type;                                  /**< log type                                                                                   */
index 70f3ebda890682ca824a42dfb8b6dcefebe824c0..b4eaee045723c4a55ca7ea202ae020d6bee9d0fd 100644 (file)
@@ -111,6 +111,12 @@ static struct xml_parser_rule grammar[] = {
                                G_STRUCT_OFFSET (struct config_file, raw_mode),
                                NULL
                        },
+                       {
+                               "check_attachements",
+                               xml_handle_boolean,
+                               G_STRUCT_OFFSET (struct config_file, check_text_attachements),
+                               NULL
+                       },
                        {
                                "tempdir",
                                xml_handle_string,
index 3e0fbed88595f350105ee4323e3b767196a8383c..d70501034b3b32104e003ab3fb37c778311bdfb0 100644 (file)
@@ -16,7 +16,7 @@
 
 extern const luaL_reg null_reg[];
 
-#define RSPAMD_LUA_API_VERSION 1
+#define RSPAMD_LUA_API_VERSION 2
 
 /* Common utility functions */
 void lua_newclass (lua_State *L, const gchar *classname, const struct luaL_reg *func);
index 67fd3e7b99d3ba2e7f6133cef96a98d212c3c7e9..892bbdf0736193b0964fc23660d7d73f70981449 100644 (file)
@@ -45,6 +45,7 @@ extern stat_file_t* get_statfile_by_symbol (statfile_pool_t *pool, struct classi
 LUA_FUNCTION_DEF (task, get_message);
 LUA_FUNCTION_DEF (task, insert_result);
 LUA_FUNCTION_DEF (task, get_urls);
+LUA_FUNCTION_DEF (task, get_emails);
 LUA_FUNCTION_DEF (task, get_text_parts);
 LUA_FUNCTION_DEF (task, get_raw_headers);
 LUA_FUNCTION_DEF (task, get_received_headers);
@@ -68,6 +69,7 @@ static const struct luaL_reg    tasklib_m[] = {
        LUA_INTERFACE_DEF (task, get_message),
        LUA_INTERFACE_DEF (task, insert_result),
        LUA_INTERFACE_DEF (task, get_urls),
+       LUA_INTERFACE_DEF (task, get_emails),
        LUA_INTERFACE_DEF (task, get_text_parts),
        LUA_INTERFACE_DEF (task, get_raw_headers),
        LUA_INTERFACE_DEF (task, get_received_headers),
@@ -241,6 +243,33 @@ lua_task_get_urls (lua_State * L)
        return 1;
 }
 
+static gint
+lua_task_get_emails (lua_State * L)
+{
+       gint                            i = 1;
+       struct worker_task             *task = lua_check_task (L);
+       GList                          *cur;
+       struct uri                    **purl;
+
+       if (task) {
+               cur = task->emails;
+               if (cur != NULL) {
+                       lua_newtable (L);
+                       while (cur) {
+                               purl = lua_newuserdata (L, sizeof (struct uri *));
+                               lua_setclass (L, "rspamd{url}", -1);
+                               *purl = cur->data;
+                               lua_rawseti (L, -2, i++);
+                               cur = g_list_next (cur);
+                       }
+                       return 1;
+               }
+       }
+
+       lua_pushnil (L);
+       return 1;
+}
+
 static gint
 lua_task_get_text_parts (lua_State * L)
 {
@@ -315,8 +344,14 @@ lua_task_get_received_headers (lua_State * L)
 struct lua_dns_callback_data {
        lua_State                      *L;
        struct worker_task             *task;
-       const gchar                     *callback;
-       const gchar                     *to_resolve;
+       const gchar                    *callback;
+       const gchar                    *to_resolve;
+       gint                            cbtype;
+       union {
+               gpointer                    string;
+               gboolean                    boolean;
+               gdouble                     number;
+       }                               cbdata;
 };
 
 static void
@@ -385,7 +420,22 @@ lua_dns_callback (struct rspamd_dns_reply *reply, gpointer arg)
                lua_pushstring (cd->L, dns_strerror (reply->code));
        }
 
-       if (lua_pcall (cd->L, 4, 0, 0) != 0) {
+       switch (cd->cbtype) {
+       case LUA_TBOOLEAN:
+               lua_pushboolean (cd->L, cd->cbdata.boolean);
+               break;
+       case LUA_TNUMBER:
+               lua_pushnumber (cd->L, cd->cbdata.number);
+               break;
+       case LUA_TSTRING:
+               lua_pushstring (cd->L, cd->cbdata.string);
+               break;
+       default:
+               lua_pushnil (cd->L);
+               break;
+       }
+
+       if (lua_pcall (cd->L, 5, 0, 0) != 0) {
                msg_info ("call to %s failed: %s", cd->callback, lua_tostring (cd->L, -1));
        }
 
@@ -409,6 +459,25 @@ lua_task_resolve_dns_a (lua_State * L)
                cd->L = L;
                cd->to_resolve = memory_pool_strdup (task->task_pool, luaL_checkstring (L, 2));
                cd->callback = memory_pool_strdup (task->task_pool, luaL_checkstring (L, 3));
+               cd->cbtype = lua_type (L, 4);
+               if (cd->cbtype != LUA_TNONE && cd->cbtype != LUA_TNIL) {
+                       switch (cd->cbtype) {
+                       case LUA_TBOOLEAN:
+                               cd->cbdata.boolean = lua_toboolean (L, 4);
+                               break;
+                       case LUA_TNUMBER:
+                               cd->cbdata.number = lua_tonumber (L, 4);
+                               break;
+                       case LUA_TSTRING:
+                               cd->cbdata.string = memory_pool_strdup (task->task_pool, lua_tostring (L, 4));
+                               break;
+                       default:
+                               msg_warn ("cannot handle type %s as callback data", lua_typename (L, cd->cbtype));
+                               cd->cbtype = LUA_TNONE;
+                               break;
+                       }
+               }
+
                if (!cd->to_resolve || !cd->callback) {
                        msg_info ("invalid parameters passed to function");
                        return 0;
@@ -432,6 +501,24 @@ lua_task_resolve_dns_txt (lua_State * L)
                cd->L = L;
                cd->to_resolve = memory_pool_strdup (task->task_pool, luaL_checkstring (L, 2));
                cd->callback = memory_pool_strdup (task->task_pool, luaL_checkstring (L, 3));
+               cd->cbtype = lua_type (L, 4);
+               if (cd->cbtype != LUA_TNONE && cd->cbtype != LUA_TNIL) {
+                       switch (cd->cbtype) {
+                       case LUA_TBOOLEAN:
+                               cd->cbdata.boolean = lua_toboolean (L, 4);
+                               break;
+                       case LUA_TNUMBER:
+                               cd->cbdata.number = lua_tonumber (L, 4);
+                               break;
+                       case LUA_TSTRING:
+                               cd->cbdata.string = memory_pool_strdup (task->task_pool, lua_tostring (L, 4));
+                               break;
+                       default:
+                               msg_warn ("cannot handle type %s as callback data", lua_typename (L, cd->cbtype));
+                               cd->cbtype = LUA_TNONE;
+                               break;
+                       }
+               }
                if (!cd->to_resolve || !cd->callback) {
                        msg_info ("invalid parameters passed to function");
                        return 0;
@@ -456,6 +543,24 @@ lua_task_resolve_dns_ptr (lua_State * L)
                cd->L = L;
                cd->to_resolve = memory_pool_strdup (task->task_pool, luaL_checkstring (L, 2));
                cd->callback = memory_pool_strdup (task->task_pool, luaL_checkstring (L, 3));
+               cd->cbtype = lua_type (L, 4);
+               if (cd->cbtype != LUA_TNONE && cd->cbtype != LUA_TNIL) {
+                       switch (cd->cbtype) {
+                       case LUA_TBOOLEAN:
+                               cd->cbdata.boolean = lua_toboolean (L, 4);
+                               break;
+                       case LUA_TNUMBER:
+                               cd->cbdata.number = lua_tonumber (L, 4);
+                               break;
+                       case LUA_TSTRING:
+                               cd->cbdata.string = memory_pool_strdup (task->task_pool, lua_tostring (L, 4));
+                               break;
+                       default:
+                               msg_warn ("cannot handle type %s as callback data", lua_typename (L, cd->cbtype));
+                               cd->cbtype = LUA_TNONE;
+                               break;
+                       }
+               }
                ina = memory_pool_alloc (task->task_pool, sizeof (struct in_addr));
                if (!cd->to_resolve || !cd->callback || !inet_aton (cd->to_resolve, ina)) {
                        msg_info ("invalid parameters passed to function");
index b934ae31e401ffdd411b9d1e97f92708c7119d7f..9269d4ca37d3793a0fafb6cfcde12d993499cf11 100644 (file)
@@ -204,6 +204,7 @@ struct worker_task {
        gchar *raw_headers;                                                                                     /**< list of raw headers                                                        */
        GList *received;                                                                                        /**< list of received headers                                           */
        GList *urls;                                                                                            /**< list of parsed urls                                                        */
+       GList *emails;                                                                                          /**< list of parsed emails                                                      */
        GList *images;                                                                                          /**< list of images                                                                     */
        GHashTable *results;                                                                            /**< hash table of metric_result indexed by 
                                                                                                                                 *    metric's name                                                                     */
index 8e8b8feb079972330dee11a362f60fe827c7090f..010edf22bda793c3f2e2ff65515df4899669294f 100644 (file)
@@ -536,13 +536,13 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont
        /* Skip attachements */
 #ifndef GMIME24
        cd = g_mime_part_get_content_disposition (GMIME_PART (part));
-       if (cd && g_ascii_strcasecmp (cd, "attachment") == 0) {
+       if (cd && g_ascii_strcasecmp (cd, "attachment") == 0 && !task->cfg->check_text_attachements) {
                debug_task ("skip attachments for checking as text parts");
                return;
        }
 #else
        cd = g_mime_object_get_disposition (GMIME_OBJECT (part));
-       if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0) {
+       if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0 && !task->cfg->check_text_attachements) {
                debug_task ("skip attachments for checking as text parts");
                return;
        }
diff --git a/src/plugins/lua/emails.lua b/src/plugins/lua/emails.lua
new file mode 100644 (file)
index 0000000..969c762
--- /dev/null
@@ -0,0 +1,167 @@
+-- Emails is module for different checks for emails inside messages
+
+-- Rules format:
+-- symbol = sym, map = file:///path/to/file, domain_only = yes
+-- symbol = sym2, dnsbl = bl.somehost.com, domain_only = no
+local rules = {}
+
+function split(str, delim, maxNb)
+       -- Eliminate bad cases...
+       if string.find(str, delim) == nil then
+               return { str }
+       end
+       if maxNb == nil or maxNb < 1 then
+               maxNb = 0    -- No limit
+       end
+       local result = {}
+       local pat = "(.-)" .. delim .. "()"
+       local nb = 0
+       local lastPos
+       for part, pos in string.gfind(str, pat) do
+               nb = nb + 1
+               result[nb] = part
+               lastPos = pos
+               if nb == maxNb then break end
+       end
+       -- Handle the last field
+       if nb ~= maxNb then
+               result[nb + 1] = string.sub(str, lastPos)
+       end
+       return result
+end
+
+function emails_dns_cb(task, to_resolve, results, err, symbol)
+       if results then
+               task:insert_result(symbol, 1)
+       end
+end
+
+-- Check rule for a single email
+function check_email_rule(task, rule, addr)
+       if rule['dnsbl'] then
+               local to_resolve = ''
+               if rule['domain_only'] then
+                       to_resolve = string.format('%s.%s', addr:get_host(), rule['dnsbl'])
+               else
+                       to_resolve = string.format('%s.%s.%s', addr:get_user(), addr:get_host(), rule['dnsbl'])
+               end
+               task:resolve_dns_a(to_resolve, 'emails_dns_cb', rule['symbol'])
+       elseif rule['map'] then
+               if rule['domain_only'] then
+                       if rule['map']:get_key(addr:get_host()) then
+                               task:insert_result(rule['symbol'], 1)
+                       end
+               else
+                       if rule['map']:get_key(string.format('%s@%s', addr:get_user(), addr:get_host())) then
+                               task:insert_result(rule['symbol'], 1)
+                       end
+               end
+       end
+end
+
+-- Check email
+function check_emails(task)
+       local emails = task:get_emails()
+       local checked = {}
+       if emails then
+               for _,addr in ipairs(emails) do
+                       local to_check = string.format('%s@%s', addr:get_user(), addr:get_host())
+                       if not checked['to_check'] then
+                               for _,rule in ipairs(rules) do
+                                       check_email_rule(task, rule, addr)
+                               end
+                               checked[to_check] = true
+                       end 
+               end
+       end
+end
+
+-- Add rule to ruleset
+local function add_emails_rule(params)
+       local newrule = {
+               name = nil,
+               dnsbl = nil,
+               map = nil,
+               domain_only = false,
+               symbol = nil
+       }
+       for _,param in ipairs(params) do
+               local _,_,name,value = string.find(param, '([a-zA-Z_0-9]+)%s*=%s*(.+)')
+               if not name or not value then
+                       rspamd_logger:err('invalid rule: '..param)
+                       return nil
+               end
+               if name == 'dnsbl' then
+                       newrule['dnsbl'] = value
+                       newrule['name'] = value
+               elseif name == 'map' then
+                       newrule['name'] = value
+                       newrule['map'] = rspamd_config:add_hash_map (newrule['name'])
+               elseif name == 'symbol' then
+                       newrule['symbol'] = value
+               elseif name == 'domain_only' then
+                       if value == 'yes' or value == 'true' or value == '1' then
+                               newrule['domain_only'] = true
+                       end
+               else    
+                       rspamd_logger:err('invalid rule option: '.. name)
+                       return nil
+               end
+
+       end
+       if not newrule['symbol'] or (not newrule['map'] and not newrule['dnsbl']) then
+               rspamd_logger:err('incomplete rule')
+               return nil
+       end
+       table.insert(rules, newrule)
+       return newrule
+end
+
+
+-- Registration
+if type(rspamd_config.get_api_version) ~= 'nil' then
+       if rspamd_config:get_api_version() >= 2 then
+               rspamd_config:register_module_option('emails', 'rule', 'string')
+       else
+               rspamd_logger:err('Invalid rspamd version for this plugin')
+       end
+end
+
+local opts =  rspamd_config:get_all_opt('emails')
+if opts then
+       local strrules = opts['rule']
+       if strrules then
+               if type(strrules) == 'table' then 
+                       for _,value in ipairs(strrules) do
+                               local params = split(value, ',')
+                               local rule = add_emails_rule (params)
+                               if not rule then
+                                       rspamd_logger:err('cannot add rule: "'..value..'"')
+                               else
+                                       if type(rspamd_config.get_api_version) ~= 'nil' then
+                                               rspamd_config:register_virtual_symbol(rule['symbol'], 1.0)
+                                       end
+                               end
+                       end
+               elseif type(strrules) == 'string' then
+                       local params = split(strrules, ',')
+                       local rule = add_emails_rule (params)
+                       if not rule then
+                               rspamd_logger:err('cannot add rule: "'..strrules..'"')
+                       else
+                               if type(rspamd_config.get_api_version) ~= 'nil' then
+                                       rspamd_config:register_virtual_symbol(rule['symbol'], 1.0)
+                               end
+                       end
+               end
+       end
+end
+
+if table.maxn(rules) > 0 then
+       -- add fake symbol to check all maps inside a single callback
+       if type(rspamd_config.get_api_version) ~= 'nil' then
+               rspamd_config:register_callback_symbol('EMAILS', 1.0, 'check_emails')
+       else
+               rspamd_config:register_symbol('EMAILS', 1.0, 'check_emails')
+       end
+end
index 6986c8c7214d3506591a5eb6e7cbcaa82c8f02c4..9512ff89041fc44a7a7b73d31375a1f35c3c9a57 100644 (file)
@@ -27,7 +27,7 @@ function split(str, delim, maxNb)
        return result
 end
 
-function rbl_cb(task, to_resolve, results, err)
+function multimap_rbl_cb(task, to_resolve, results, err)
        if results then
                local _,_,o4,o3,o2,o1,in_rbl = string.find(to_resolve, '(%d+)%.(%d+)%.(%d+)%.(%d+)%.(.+)')
                -- Get corresponding rule by rbl name
@@ -71,13 +71,13 @@ function check_multimap(task)
                        if ip then
                                local _,_,o1,o2,o3,o4 = string.find(ip, '(%d+)%.(%d+)%.(%d+)%.(%d+)')
                                local rbl_str = o4 .. '.' .. o3 .. '.' .. o2 .. '.' .. o1 .. '.' .. rule['map']
-                               task:resolve_dns_a(rbl_str, 'rbl_cb')
+                               task:resolve_dns_a(rbl_str, 'multimap_rbl_cb')
                        end
                end
        end
 end
 
-function add_rule(params)
+local function add_multimap_rule(params)
        local newrule = {
                type = 'ip',
                header = nil,
@@ -143,7 +143,7 @@ if opts then
                if type(strrules) == 'table' then 
                        for _,value in ipairs(strrules) do
                                local params = split(value, ',')
-                               local rule = add_rule (params)
+                               local rule = add_multimap_rule (params)
                                if not rule then
                                        rspamd_logger:err('cannot add rule: "'..value..'"')
                                else
@@ -154,7 +154,7 @@ if opts then
                        end
                elseif type(strrules) == 'string' then
                        local params = split(strrules, ',')
-                       local rule = add_rule (params)
+                       local rule = add_multimap_rule (params)
                        if not rule then
                                rspamd_logger:err('cannot add rule: "'..strrules..'"')
                        else
index d4bafe943e4ce1ad5491615ad743e7da632bcc51..98248f29ff8cea0be042c042ee1eb8b04569db98 100644 (file)
@@ -50,7 +50,6 @@ local function add_trie(params)
                local patterns = split(params[2], ',')
                local trie = {}
                trie['trie'] = rspamd_trie:create(true)
-               print (type(trie['trie']))
                for num,pattern in ipairs(patterns) do
                        trie['trie']:add_pattern(pattern, num)
                end
@@ -64,7 +63,6 @@ end
 
 function check_trie(task)
        for _,trie in ipairs(tries) do
-               print (type(trie['trie']))
                if trie['trie']:search_task(task) then
                        task:insert_result(trie['symbol'], 1)
                end
index f1b4242a17260cc47b52c1a59550898ff0b37d73..596d17d3ad21465b8e12c18bb69d5044bff1b44c 100644 (file)
--- a/src/url.c
+++ b/src/url.c
@@ -41,7 +41,7 @@
     (LOWEST_PORT <= (port) && (port) <= HIGHEST_PORT)
 
 struct _proto {
-       guchar                          *name;
+       guchar                         *name;
        gint                            port;
        uintptr_t                      *unused;
        guint                           need_slashes:1;
@@ -55,6 +55,7 @@ typedef struct url_match_s {
        gsize m_len;
        const gchar *pattern;
        const gchar *prefix;
+       gboolean add_prefix;
 } url_match_t;
 
 struct url_matcher {
@@ -1111,20 +1112,24 @@ domain:
 static gboolean
 url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
 {
+       const gchar                    *p;
        /* Check what we have found */
        if (pos > begin && *pos == '@') {
-               if (is_atom (*(pos - 1)) && is_domain (*(pos + 1))) {
-                       match->m_begin = pos + 1;
+               /* Try to extract it with username */
+               p = pos - 1;
+               while (p > begin && is_atom (*p)) {
+                       p --;
+               }
+               if (!is_atom (*p)) {
+                       match->m_begin = p + 1;
                        return TRUE;
                }
        }
        else {
-               while (pos < end && is_atom (*pos)) {
-                       if (*pos == '@') {
-                               match->m_begin = pos + 1;
-                               return TRUE;
-                       }
-                       pos ++;
+               p = pos + strlen (match->pattern);
+               if (is_atom (*p)) {
+                       match->m_begin = p;
+                       return TRUE;
                }
        }
        return FALSE;
@@ -1141,6 +1146,7 @@ url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match
                p ++;
        }
        match->m_len = p - match->m_begin;
+       match->add_prefix = TRUE;
        return TRUE;
 }
 
@@ -1148,7 +1154,7 @@ void
 url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text_part *part, gboolean is_html)
 {
        gint                            rc, off = 0;
-       gchar                           *url_str = NULL;
+       gchar                          *url_str = NULL;
        struct uri                     *new;
        const guint8                   *p, *end;
 
@@ -1176,8 +1182,13 @@ url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text
                                                g_strstrip (url_str);
                                                rc = parse_uri (new, url_str, pool);
                                                if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) {
-                                                       g_tree_insert (is_html ? part->html_urls : part->urls, url_str, new);
-                                                       task->urls = g_list_prepend (task->urls, new);
+                                                       if (new->protocol == PROTOCOL_MAILTO) {
+                                                               task->emails = g_list_prepend (task->emails, new);
+                                                       }
+                                                       else {
+                                                               g_tree_insert (is_html ? part->html_urls : part->urls, url_str, new);
+                                                               task->urls = g_list_prepend (task->urls, new);
+                                                       }
                                                }
                                                else {
                                                        msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc));
@@ -1197,7 +1208,7 @@ gboolean
 url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gint *res, gchar **url_str)
 {
        const gchar                    *end, *pos;
-       gint                            idx;
+       gint                            idx, l;
        struct url_matcher             *matcher;
        url_match_t                     m;
 
@@ -1210,10 +1221,18 @@ url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gint *res, gch
                        matcher = &matchers[idx];
                        m.pattern = matcher->pattern;
                        m.prefix = matcher->prefix;
+                       m.add_prefix = FALSE;
                        if (matcher->start (begin, end, pos, &m) && matcher->end (begin, end, pos, &m)) {
-                               *url_str = memory_pool_alloc (pool, m.m_len + 1);
-                               memcpy (*url_str, m.m_begin, m.m_len);
-                               (*url_str)[m.m_len] = '\0';
+                               if (m.add_prefix) {
+                                       l = m.m_len + 1 + strlen (m.prefix);
+                                       *url_str = memory_pool_alloc (pool, l);
+                                       rspamd_snprintf (*url_str, l, "%s%*s", m.prefix, m.m_len, m.m_begin);
+                               }
+                               else {
+                                       *url_str = memory_pool_alloc (pool, m.m_len + 1);
+                                       memcpy (*url_str, m.m_begin, m.m_len);
+                                       (*url_str)[m.m_len] = '\0';
+                               }
 
                        }
                        else {
index 6b08682ba92bfaa64745975c4ff7edf812611efa..eb11ceba3c2a5532d91f16ff2eb56d0bed147ee0 100644 (file)
--- a/src/url.h
+++ b/src/url.h
@@ -60,7 +60,7 @@ enum uri_errno {
        URI_ERRNO_NO_HOST_SLASH,        /* Slash after host missing */
        URI_ERRNO_IPV6_SECURITY,        /* IPv6 security bug detected */
        URI_ERRNO_INVALID_PORT,         /* Port number is bad */
-       URI_ERRNO_INVALID_PORT_RANGE,   /* Port number is not within 0-65535 */
+       URI_ERRNO_INVALID_PORT_RANGE    /* Port number is not within 0-65535 */
 };
 
 enum protocol {
@@ -68,8 +68,8 @@ enum protocol {
        PROTOCOL_FTP,
        PROTOCOL_HTTP,
        PROTOCOL_HTTPS,
-
-       PROTOCOL_UNKNOWN,
+       PROTOCOL_MAILTO,
+       PROTOCOL_UNKNOWN
 };
 
 #define struri(uri) ((uri)->string)
index 160aa6969b4074e32ea1bf163d467388478514b1..1d6ec05fba62d2601fa6f7535b305b0b879dd53c 100644 (file)
@@ -264,6 +264,9 @@ free_task (struct worker_task *task, gboolean is_soft)
                if (task->urls) {
                        g_list_free (task->urls);
                }
+               if (task->emails) {
+                       g_list_free (task->emails);
+               }
                if (task->images) {
                        g_list_free (task->images);
                }