diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-02-03 20:29:27 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-02-03 20:29:27 +0300 |
commit | 65f15e69284e38d5bbf2177f4466975eca5779b8 (patch) | |
tree | 2966f19baebf839fe02b5823b054a9539d921e6c /src/url.c | |
parent | 99cb83cd06ca693a032616361bd0b1ae1efabdba (diff) | |
download | rspamd-65f15e69284e38d5bbf2177f4466975eca5779b8.tar.gz rspamd-65f15e69284e38d5bbf2177f4466975eca5779b8.zip |
* New module for checking emails inside messages (rules based, like multimap)
* Emails now are separated from urls and urls checks
* Add ability to check text attachements if option is presented in a configuration
Version is 0.3.6 now
Diffstat (limited to 'src/url.c')
-rw-r--r-- | src/url.c | 51 |
1 files changed, 35 insertions, 16 deletions
@@ -41,7 +41,7 @@ (LOWEST_PORT <= (port) && (port) <= HIGHEST_PORT) struct _proto { - guchar *name; + guchar *name; gint port; uintptr_t *unused; guint need_slashes:1; @@ -55,6 +55,7 @@ typedef struct url_match_s { gsize m_len; const gchar *pattern; const gchar *prefix; + gboolean add_prefix; } url_match_t; struct url_matcher { @@ -1111,20 +1112,24 @@ domain: static gboolean url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) { + const gchar *p; /* Check what we have found */ if (pos > begin && *pos == '@') { - if (is_atom (*(pos - 1)) && is_domain (*(pos + 1))) { - match->m_begin = pos + 1; + /* Try to extract it with username */ + p = pos - 1; + while (p > begin && is_atom (*p)) { + p --; + } + if (!is_atom (*p)) { + match->m_begin = p + 1; return TRUE; } } else { - while (pos < end && is_atom (*pos)) { - if (*pos == '@') { - match->m_begin = pos + 1; - return TRUE; - } - pos ++; + p = pos + strlen (match->pattern); + if (is_atom (*p)) { + match->m_begin = p; + return TRUE; } } return FALSE; @@ -1141,6 +1146,7 @@ url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match p ++; } match->m_len = p - match->m_begin; + match->add_prefix = TRUE; return TRUE; } @@ -1148,7 +1154,7 @@ void url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text_part *part, gboolean is_html) { gint rc, off = 0; - gchar *url_str = NULL; + gchar *url_str = NULL; struct uri *new; const guint8 *p, *end; @@ -1176,8 +1182,13 @@ url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text g_strstrip (url_str); rc = parse_uri (new, url_str, pool); if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) { - g_tree_insert (is_html ? part->html_urls : part->urls, url_str, new); - task->urls = g_list_prepend (task->urls, new); + if (new->protocol == PROTOCOL_MAILTO) { + task->emails = g_list_prepend (task->emails, new); + } + else { + g_tree_insert (is_html ? part->html_urls : part->urls, url_str, new); + task->urls = g_list_prepend (task->urls, new); + } } else { msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); @@ -1197,7 +1208,7 @@ gboolean url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gint *res, gchar **url_str) { const gchar *end, *pos; - gint idx; + gint idx, l; struct url_matcher *matcher; url_match_t m; @@ -1210,10 +1221,18 @@ url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gint *res, gch matcher = &matchers[idx]; m.pattern = matcher->pattern; m.prefix = matcher->prefix; + m.add_prefix = FALSE; if (matcher->start (begin, end, pos, &m) && matcher->end (begin, end, pos, &m)) { - *url_str = memory_pool_alloc (pool, m.m_len + 1); - memcpy (*url_str, m.m_begin, m.m_len); - (*url_str)[m.m_len] = '\0'; + if (m.add_prefix) { + l = m.m_len + 1 + strlen (m.prefix); + *url_str = memory_pool_alloc (pool, l); + rspamd_snprintf (*url_str, l, "%s%*s", m.prefix, m.m_len, m.m_begin); + } + else { + *url_str = memory_pool_alloc (pool, m.m_len + 1); + memcpy (*url_str, m.m_begin, m.m_len); + (*url_str)[m.m_len] = '\0'; + } } else { |