diff options
-rw-r--r-- | src/libmime/message.c | 6 | ||||
-rw-r--r-- | src/libserver/html.c | 34 | ||||
-rw-r--r-- | src/libserver/html.h | 4 |
3 files changed, 39 insertions, 5 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index 6c1bad06c..cdd532f5a 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1137,10 +1137,12 @@ process_text_part (struct rspamd_task *task, text_part->mime_part = mime_part; text_part->flags |= RSPAMD_MIME_PART_FLAG_BALANCED; - text_part->content = rspamd_html_process_part ( + text_part->content = rspamd_html_process_part_full ( task->task_pool, text_part->html, - part_content); + part_content, + &text_part->urls_offset, + task->urls); rspamd_url_text_extract (task->task_pool, task, text_part, TRUE); diff --git a/src/libserver/html.c b/src/libserver/html.c index c23b228b6..f8220eabd 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1382,8 +1382,8 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, } GByteArray* -rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, - GByteArray *in) +rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, + GByteArray *in, GList **exceptions, GHashTable *urls) { const guchar *p, *c, *end, *tag_start = NULL, *savep = NULL; guchar t; @@ -1391,8 +1391,9 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, GByteArray *dest; guint obrace = 0, ebrace = 0; GNode *cur_level = NULL; - gint substate, len; + gint substate, len, href_offset = -1; struct html_tag *cur_tag = NULL; + struct process_exception *ex; enum { parse_start = 0, tag_begin, @@ -1696,6 +1697,25 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, g_byte_array_append (dest, "\r\n", 2); save_space = FALSE; } + + if (cur_tag->id == Tag_A) { + if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) { + href_offset = dest->len; + } + else if (cur_tag->flags & FL_CLOSING) { + /* Insert exception */ + if (exceptions && href_offset != -1 + && (gint)dest->len > href_offset) { + ex = rspamd_mempool_alloc (pool, sizeof (*ex)); + ex->pos = href_offset; + ex->len = dest->len - href_offset; + + *exceptions = g_list_prepend (*exceptions, ex); + } + + href_offset = -1; + } + } } else { state = content_write; @@ -1711,3 +1731,11 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, return dest; } + +GByteArray* +rspamd_html_process_part (rspamd_mempool_t *pool, + struct html_content *hc, + GByteArray *in) +{ + return rspamd_html_process_part_full (pool, hc, in, NULL, NULL); +} diff --git a/src/libserver/html.h b/src/libserver/html.h index 83c58c9f1..1a98a3e9a 100644 --- a/src/libserver/html.h +++ b/src/libserver/html.h @@ -57,4 +57,8 @@ GByteArray* rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, GByteArray *in); +GByteArray* rspamd_html_process_part_full (rspamd_mempool_t *pool, + struct html_content *hc, + GByteArray *in, GList **exceptions, GHashTable *urls); + #endif |