From: Vsevolod Stakhov Date: Tue, 24 Nov 2015 13:19:20 +0000 (+0000) Subject: Fix parsing of tags with spaces used for obfuscation X-Git-Tag: 1.1.0~494 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=90c81188fcddb424e3ff876cbcbfe166e17a16ba;p=rspamd.git Fix parsing of tags with spaces used for obfuscation --- diff --git a/src/libserver/html.c b/src/libserver/html.c index daea7f7ff..45eeb8e4a 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1284,7 +1284,9 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag) struct html_tag_component *comp; struct rspamd_url *url; GList *cur; + const guchar *p; gint rc; + gboolean has_spaces = FALSE; cur = tag->params->head; @@ -1292,10 +1294,35 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag) comp = cur->data; if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) { + /* Strip spaces from the url component */ + p = comp->start; + + while (g_ascii_isspace (*p) && p < comp->start + comp->len) { + p ++; + has_spaces = TRUE; + } + + comp->start = p; + comp->len -= p - comp->start; + + p = comp->start + comp->len - 1; + + while (g_ascii_isspace (*p) && p >= comp->start) { + p --; + comp->len --; + has_spaces = TRUE; + } + url = rspamd_mempool_alloc (pool, sizeof (*url)); rc = rspamd_url_parse (url, (gchar *)comp->start, comp->len, pool); if (rc == URI_ERRNO_OK) { + + /* Spaces in href usually mean an attempt to obfusicate URL */ + if (has_spaces) { + url->flags |= RSPAMD_URL_FLAG_OBSCURED; + } + return url; } }