summaryrefslogtreecommitdiffstats
path: root/src/libserver
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-02-24 11:20:27 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-02-24 11:20:27 +0000
commitbeb4003fd6f6459e52d10e475663095f70fbcac1 (patch)
tree9b028e11c9e71ad8723f2cf22802a62143c34c25 /src/libserver
parente8e2a4d65385a038299f5b2f3ed24988d4b9c6e8 (diff)
downloadrspamd-beb4003fd6f6459e52d10e475663095f70fbcac1.tar.gz
rspamd-beb4003fd6f6459e52d10e475663095f70fbcac1.zip
[Feature] Fix phishing detection for IDNA urls
Issue: #842 Reported by: @moisseev
Diffstat (limited to 'src/libserver')
-rw-r--r--src/libserver/html.c63
1 files changed, 58 insertions, 5 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 449766a0c..f644a7d02 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -21,6 +21,7 @@
#include "html_tags.h"
#include "html_colors.h"
#include "url.h"
+#include <unicode/uidna.h>
static sig_atomic_t tags_sorted = 0;
static sig_atomic_t entities_sorted = 0;
@@ -769,13 +770,24 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool,
struct rspamd_url **ptext_url)
{
struct rspamd_url *text_url;
- rspamd_ftok_t phished_tld;
+ rspamd_ftok_t phished_tld, disp_host_tok, href_host_tok;
gint rc;
- gchar *url_str = NULL;
+ gchar *url_str = NULL, *idn_hbuf;
const guchar *end = url_text + len;
+ static UIDNA *udn;
+ UErrorCode uc_err = U_ZERO_ERROR;
+ UIDNAInfo uinfo = UIDNA_INFO_INITIALIZER;
*url_found = FALSE;
+ if (udn == NULL) {
+ udn = uidna_openUTS46 (UIDNA_DEFAULT, &uc_err);
+
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err_pool ("cannot init idna convertor: %s", u_errorName (uc_err));
+ }
+ }
+
while (url_text < end && g_ascii_isspace (*url_text)) {
url_text ++;
}
@@ -786,8 +798,49 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool,
rc = rspamd_url_parse (text_url, url_str, strlen (url_str), pool);
if (rc == URI_ERRNO_OK) {
- if (href_url->hostlen != text_url->hostlen || memcmp (href_url->host,
- text_url->host, href_url->hostlen) != 0) {
+ disp_host_tok.len = text_url->hostlen;
+ disp_host_tok.begin = text_url->host;
+
+ if (rspamd_substring_search_caseless (text_url->host,
+ text_url->hostlen, "xn--", 4) != -1) {
+ idn_hbuf = rspamd_mempool_alloc (pool, text_url->hostlen * 2 + 1);
+ /* We need to convert it to the normal value first */
+ disp_host_tok.len = uidna_nameToUnicodeUTF8 (udn,
+ text_url->host, text_url->hostlen,
+ idn_hbuf, text_url->hostlen * 2 + 1, &uinfo, &uc_err);
+
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err_pool ("cannot convert to IDN: %s",
+ u_errorName (uc_err));
+ disp_host_tok.len = text_url->hostlen;
+ }
+ else {
+ disp_host_tok.begin = idn_hbuf;
+ }
+ }
+
+ href_host_tok.len = href_url->hostlen;
+ href_host_tok.begin = href_url->host;
+
+ if (rspamd_substring_search_caseless (href_url->host,
+ href_url->hostlen, "xn--", 4) != -1) {
+ idn_hbuf = rspamd_mempool_alloc (pool, href_url->hostlen * 2 + 1);
+ /* We need to convert it to the normal value first */
+ href_host_tok.len = uidna_nameToUnicodeUTF8 (udn,
+ href_url->host, href_url->hostlen,
+ idn_hbuf, href_url->hostlen * 2 + 1, &uinfo, &uc_err);
+
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err_pool ("cannot convert to IDN: %s",
+ u_errorName (uc_err));
+ href_host_tok.len = href_url->hostlen;
+ }
+ else {
+ href_host_tok.begin = idn_hbuf;
+ }
+ }
+
+ if (rspamd_ftok_casecmp (&disp_host_tok, &href_host_tok) != 0) {
if (href_url->tldlen != text_url->tldlen || memcmp (href_url->tld,
text_url->tld, href_url->tldlen) != 0) {
@@ -2094,7 +2147,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
}
if (cur_tag->id == Tag_A || cur_tag->id == Tag_IFRAME) {
- if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) {
+ if (!(cur_tag->flags & (FL_CLOSING))) {
url = rspamd_html_process_url_tag (pool, cur_tag);
if (url != NULL) {