diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-16 17:19:03 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-16 17:19:03 +0100 |
commit | 420c7091bb675b13b684b430e2bb86c053b876a2 (patch) | |
tree | 5915404ca85d33b56b20efa64ea4041839891d1d /src/libserver | |
parent | 7665629b68f09550dbda76654f7ef2d8eb39721a (diff) | |
download | rspamd-420c7091bb675b13b684b430e2bb86c053b876a2.tar.gz rspamd-420c7091bb675b13b684b430e2bb86c053b876a2.zip |
Exclude HTML urls content from texts
Diffstat (limited to 'src/libserver')
-rw-r--r-- | src/libserver/html.c | 34 | ||||
-rw-r--r-- | src/libserver/html.h | 4 |
2 files changed, 35 insertions, 3 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c index c23b228b6..f8220eabd 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1382,8 +1382,8 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, } GByteArray* -rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, - GByteArray *in) +rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, + GByteArray *in, GList **exceptions, GHashTable *urls) { const guchar *p, *c, *end, *tag_start = NULL, *savep = NULL; guchar t; @@ -1391,8 +1391,9 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, GByteArray *dest; guint obrace = 0, ebrace = 0; GNode *cur_level = NULL; - gint substate, len; + gint substate, len, href_offset = -1; struct html_tag *cur_tag = NULL; + struct process_exception *ex; enum { parse_start = 0, tag_begin, @@ -1696,6 +1697,25 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, g_byte_array_append (dest, "\r\n", 2); save_space = FALSE; } + + if (cur_tag->id == Tag_A) { + if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) { + href_offset = dest->len; + } + else if (cur_tag->flags & FL_CLOSING) { + /* Insert exception */ + if (exceptions && href_offset != -1 + && (gint)dest->len > href_offset) { + ex = rspamd_mempool_alloc (pool, sizeof (*ex)); + ex->pos = href_offset; + ex->len = dest->len - href_offset; + + *exceptions = g_list_prepend (*exceptions, ex); + } + + href_offset = -1; + } + } } else { state = content_write; @@ -1711,3 +1731,11 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, return dest; } + +GByteArray* +rspamd_html_process_part (rspamd_mempool_t *pool, + struct html_content *hc, + GByteArray *in) +{ + return rspamd_html_process_part_full (pool, hc, in, NULL, NULL); +} diff --git a/src/libserver/html.h b/src/libserver/html.h index 83c58c9f1..1a98a3e9a 100644 --- a/src/libserver/html.h +++ b/src/libserver/html.h @@ -57,4 +57,8 @@ GByteArray* rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc, GByteArray *in); +GByteArray* rspamd_html_process_part_full (rspamd_mempool_t *pool, + struct html_content *hc, + GByteArray *in, GList **exceptions, GHashTable *urls); + #endif |