]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Fix phishing detection for IDNA urls
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 24 Feb 2017 11:20:27 +0000 (11:20 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 24 Feb 2017 11:20:27 +0000 (11:20 +0000)
Issue: #842
Reported by: @moisseev

src/libserver/html.c

index 449766a0cd935aa391e22fb28a8d1a3028b5a80c..f644a7d02b4fb3e47f72c98e620c7ce03b5bf17a 100644 (file)
@@ -21,6 +21,7 @@
 #include "html_tags.h"
 #include "html_colors.h"
 #include "url.h"
+#include <unicode/uidna.h>
 
 static sig_atomic_t tags_sorted = 0;
 static sig_atomic_t entities_sorted = 0;
@@ -769,13 +770,24 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool,
        struct rspamd_url **ptext_url)
 {
        struct rspamd_url *text_url;
-       rspamd_ftok_t phished_tld;
+       rspamd_ftok_t phished_tld, disp_host_tok, href_host_tok;
        gint rc;
-       gchar *url_str = NULL;
+       gchar *url_str = NULL, *idn_hbuf;
        const guchar *end = url_text + len;
+       static UIDNA *udn;
+       UErrorCode uc_err = U_ZERO_ERROR;
+       UIDNAInfo uinfo = UIDNA_INFO_INITIALIZER;
 
        *url_found = FALSE;
 
+       if (udn == NULL) {
+               udn = uidna_openUTS46 (UIDNA_DEFAULT, &uc_err);
+
+               if (uc_err != U_ZERO_ERROR) {
+                       msg_err_pool ("cannot init idna convertor: %s", u_errorName (uc_err));
+               }
+       }
+
        while (url_text < end && g_ascii_isspace (*url_text)) {
                url_text ++;
        }
@@ -786,8 +798,49 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool,
                rc = rspamd_url_parse (text_url, url_str, strlen (url_str), pool);
 
                if (rc == URI_ERRNO_OK) {
-                       if (href_url->hostlen != text_url->hostlen || memcmp (href_url->host,
-                                       text_url->host, href_url->hostlen) != 0) {
+                       disp_host_tok.len = text_url->hostlen;
+                       disp_host_tok.begin = text_url->host;
+
+                       if (rspamd_substring_search_caseless (text_url->host,
+                                       text_url->hostlen, "xn--", 4) != -1) {
+                               idn_hbuf = rspamd_mempool_alloc (pool, text_url->hostlen * 2 + 1);
+                               /* We need to convert it to the normal value first */
+                               disp_host_tok.len = uidna_nameToUnicodeUTF8 (udn,
+                                               text_url->host, text_url->hostlen,
+                                               idn_hbuf, text_url->hostlen * 2 + 1, &uinfo, &uc_err);
+
+                               if (uc_err != U_ZERO_ERROR) {
+                                       msg_err_pool ("cannot convert to IDN: %s",
+                                                       u_errorName (uc_err));
+                                       disp_host_tok.len = text_url->hostlen;
+                               }
+                               else {
+                                       disp_host_tok.begin = idn_hbuf;
+                               }
+                       }
+
+                       href_host_tok.len = href_url->hostlen;
+                       href_host_tok.begin = href_url->host;
+
+                       if (rspamd_substring_search_caseless (href_url->host,
+                                       href_url->hostlen, "xn--", 4) != -1) {
+                               idn_hbuf = rspamd_mempool_alloc (pool, href_url->hostlen * 2 + 1);
+                               /* We need to convert it to the normal value first */
+                               href_host_tok.len = uidna_nameToUnicodeUTF8 (udn,
+                                               href_url->host, href_url->hostlen,
+                                               idn_hbuf, href_url->hostlen * 2 + 1, &uinfo, &uc_err);
+
+                               if (uc_err != U_ZERO_ERROR) {
+                                       msg_err_pool ("cannot convert to IDN: %s",
+                                                       u_errorName (uc_err));
+                                       href_host_tok.len = href_url->hostlen;
+                               }
+                               else {
+                                       href_host_tok.begin = idn_hbuf;
+                               }
+                       }
+
+                       if (rspamd_ftok_casecmp (&disp_host_tok, &href_host_tok) != 0) {
 
                                if (href_url->tldlen != text_url->tldlen || memcmp (href_url->tld,
                                                text_url->tld, href_url->tldlen) != 0) {
@@ -2094,7 +2147,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
                                }
 
                                if (cur_tag->id == Tag_A || cur_tag->id == Tag_IFRAME) {
-                                       if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) {
+                                       if (!(cur_tag->flags & (FL_CLOSING))) {
                                                url = rspamd_html_process_url_tag (pool, cur_tag);
 
                                                if (url != NULL) {