From: Vsevolod Stakhov Date: Wed, 23 Mar 2011 17:14:08 +0000 (+0300) Subject: * Add ability to extract urls from subject field X-Git-Tag: 0.3.10 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=7c3c86ce3fad9d222561fddc3bb30e7f1aa66b32;p=rspamd.git * Add ability to extract urls from subject field Fix phishing plugin. * Important fix for multimap/cdb handling * Important fix for phishing detector --- diff --git a/src/html.c b/src/html.c index 31863ba9c..b18914e41 100644 --- a/src/html.c +++ b/src/html.c @@ -728,7 +728,13 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url } } /* Compare parts and check for phished hostname */ - if (c != NULL && g_ascii_strncasecmp (p, c, len) != 0) { + if (c != NULL) { + if (g_ascii_strncasecmp (p, c, len) != 0) { + href_url->is_phished = TRUE; + href_url->phished_url = new; + } + } + else { href_url->is_phished = TRUE; href_url->phished_url = new; } diff --git a/src/main.c b/src/main.c index 64357c264..cef64d8a1 100644 --- a/src/main.c +++ b/src/main.c @@ -605,8 +605,8 @@ wait_for_workers (gpointer key, gpointer value, gpointer unused) if (got_alarm) { got_alarm = 0; - /* Set alarm for hard termination */ - set_alarm (HARD_TERMINATION_TIME); + /* Set alarm for hard termination but with less time */ + set_alarm (HARD_TERMINATION_TIME / 10); } if (waitpid (w->pid, &res, 0) == -1) { diff --git a/src/message.c b/src/message.c index 4f4627351..48eb4d89c 100644 --- a/src/message.c +++ b/src/message.c @@ -945,7 +945,10 @@ process_message (struct worker_task *task) GMimePart *part; GMimeDataWrapper *wrapper; struct received_header *recv; - gchar *mid; + gchar *mid, *url_str, *p, *end; + struct uri *subject_url; + gsize len; + gint pos, rc; tmp = memory_pool_alloc (task->task_pool, sizeof (GByteArray)); tmp->data = task->msg->begin; @@ -1090,6 +1093,44 @@ process_message (struct worker_task *task) #endif } + /* Parse urls inside Subject header */ + cur = message_get_header (task->task_pool, task->message, "Subject", FALSE); + if (cur) { + p = cur->data; + len = strlen (p); + end = p + len; + + while (p < end) { + /* Search to the end of url */ + if (url_try_text (task->task_pool, p, end - p, &pos, &url_str)) { + if (url_str != NULL) { + subject_url = memory_pool_alloc0 (task->task_pool, sizeof (struct uri)); + if (subject_url != NULL) { + /* Try to parse url */ + rc = parse_uri (subject_url, url_str, task->task_pool); + if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) && + subject_url->hostlen > 0) { + if (subject_url->protocol != PROTOCOL_MAILTO) { + if (!g_tree_lookup (task->urls, subject_url)) { + g_tree_insert (task->urls, subject_url, subject_url); + } + } + } + else if (rc != URI_ERRNO_OK) { + msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); + } + } + } + } + else { + break; + } + p += pos; + } + /* Free header's list */ + g_list_free (cur); + } + return 0; } diff --git a/src/plugins/lua/multimap.lua b/src/plugins/lua/multimap.lua index 98ebeb353..5d1312dc2 100644 --- a/src/plugins/lua/multimap.lua +++ b/src/plugins/lua/multimap.lua @@ -283,13 +283,13 @@ local function add_multimap_rule(params) if string.find(newrule['map'], '^cdb://.*$') then local test = cdb.create(newrule['map']) newrule['hash'] = cdb.create(newrule['map']) + newrule['cdb'] = true if newrule['hash'] then table.insert(rules, newrule) return newrule else rspamd_logger.warn('Cannot add rule: map doesn\'t exists: ' .. newrule['map']) end - newrule['cdb'] = true else if newrule['type'] == 'ip' then newrule['ips'] = rspamd_config:add_radix_map (newrule['map']) diff --git a/src/plugins/lua/phishing.lua b/src/plugins/lua/phishing.lua index 1e648768a..e3def6fd9 100644 --- a/src/plugins/lua/phishing.lua +++ b/src/plugins/lua/phishing.lua @@ -11,10 +11,10 @@ function phishing_cb (task) if urls then for _,url in ipairs(urls) do if url:is_phished() then + local found = false local purl = url:get_phished() if table.maxn(strict_domains) > 0 then local _,_,tld = string.find(purl:get_host(), '([a-zA-Z0-9%-]+\.[a-zA-Z0-9%-]+)$') - local found = false if tld then for _,rule in ipairs(strict_domains) do if rule['map']:get_key(tld) then @@ -22,20 +22,19 @@ function phishing_cb (task) found = true end end - if found then - return - end end end - if domains then - local _,_,tld = string.find(purl:get_host(), '([a-zA-Z0-9%-]+\.[a-zA-Z0-9%-]+)$') - if tld then - if domains:get_key(tld) then - task:insert_result(symbol, 1, purl:get_host()) + if not found then + if domains then + local _,_,tld = string.find(purl:get_host(), '([a-zA-Z0-9%-]+\.[a-zA-Z0-9%-]+)$') + if tld then + if domains:get_key(tld) then + task:insert_result(symbol, 1, purl:get_host()) + end end + else + task:insert_result(symbol, 1, purl:get_host()) end - else - task:insert_result(symbol, 1, purl:get_host()) end end end @@ -70,7 +69,7 @@ if opts then sd[1] = opts['strict_domains'] end for _,d in ipairs(sd) do - local s, _ = string.find(d, ':') + local s, _ = string.find(d, ':[^:]+$') if s then local sym = string.sub(d, s + 1, -1) local map = string.sub(d, 1, s - 1)