From beb4003fd6f6459e52d10e475663095f70fbcac1 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 24 Feb 2017 11:20:27 +0000 Subject: [PATCH] [Feature] Fix phishing detection for IDNA urls Issue: #842 Reported by: @moisseev --- src/libserver/html.c | 63 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/src/libserver/html.c b/src/libserver/html.c index 449766a0c..f644a7d02 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -21,6 +21,7 @@ #include "html_tags.h" #include "html_colors.h" #include "url.h" +#include static sig_atomic_t tags_sorted = 0; static sig_atomic_t entities_sorted = 0; @@ -769,13 +770,24 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool, struct rspamd_url **ptext_url) { struct rspamd_url *text_url; - rspamd_ftok_t phished_tld; + rspamd_ftok_t phished_tld, disp_host_tok, href_host_tok; gint rc; - gchar *url_str = NULL; + gchar *url_str = NULL, *idn_hbuf; const guchar *end = url_text + len; + static UIDNA *udn; + UErrorCode uc_err = U_ZERO_ERROR; + UIDNAInfo uinfo = UIDNA_INFO_INITIALIZER; *url_found = FALSE; + if (udn == NULL) { + udn = uidna_openUTS46 (UIDNA_DEFAULT, &uc_err); + + if (uc_err != U_ZERO_ERROR) { + msg_err_pool ("cannot init idna convertor: %s", u_errorName (uc_err)); + } + } + while (url_text < end && g_ascii_isspace (*url_text)) { url_text ++; } @@ -786,8 +798,49 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool, rc = rspamd_url_parse (text_url, url_str, strlen (url_str), pool); if (rc == URI_ERRNO_OK) { - if (href_url->hostlen != text_url->hostlen || memcmp (href_url->host, - text_url->host, href_url->hostlen) != 0) { + disp_host_tok.len = text_url->hostlen; + disp_host_tok.begin = text_url->host; + + if (rspamd_substring_search_caseless (text_url->host, + text_url->hostlen, "xn--", 4) != -1) { + idn_hbuf = rspamd_mempool_alloc (pool, text_url->hostlen * 2 + 1); + /* We need to convert it to the normal value first */ + disp_host_tok.len = uidna_nameToUnicodeUTF8 (udn, + text_url->host, text_url->hostlen, + idn_hbuf, text_url->hostlen * 2 + 1, &uinfo, &uc_err); + + if (uc_err != U_ZERO_ERROR) { + msg_err_pool ("cannot convert to IDN: %s", + u_errorName (uc_err)); + disp_host_tok.len = text_url->hostlen; + } + else { + disp_host_tok.begin = idn_hbuf; + } + } + + href_host_tok.len = href_url->hostlen; + href_host_tok.begin = href_url->host; + + if (rspamd_substring_search_caseless (href_url->host, + href_url->hostlen, "xn--", 4) != -1) { + idn_hbuf = rspamd_mempool_alloc (pool, href_url->hostlen * 2 + 1); + /* We need to convert it to the normal value first */ + href_host_tok.len = uidna_nameToUnicodeUTF8 (udn, + href_url->host, href_url->hostlen, + idn_hbuf, href_url->hostlen * 2 + 1, &uinfo, &uc_err); + + if (uc_err != U_ZERO_ERROR) { + msg_err_pool ("cannot convert to IDN: %s", + u_errorName (uc_err)); + href_host_tok.len = href_url->hostlen; + } + else { + href_host_tok.begin = idn_hbuf; + } + } + + if (rspamd_ftok_casecmp (&disp_host_tok, &href_host_tok) != 0) { if (href_url->tldlen != text_url->tldlen || memcmp (href_url->tld, text_url->tld, href_url->tldlen) != 0) { @@ -2094,7 +2147,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, } if (cur_tag->id == Tag_A || cur_tag->id == Tag_IFRAME) { - if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) { + if (!(cur_tag->flags & (FL_CLOSING))) { url = rspamd_html_process_url_tag (pool, cur_tag); if (url != NULL) { -- 2.39.5