#include "html_tags.h"
#include "html_colors.h"
#include "url.h"
+#include <unicode/uidna.h>
static sig_atomic_t tags_sorted = 0;
static sig_atomic_t entities_sorted = 0;
struct rspamd_url **ptext_url)
{
struct rspamd_url *text_url;
- rspamd_ftok_t phished_tld;
+ rspamd_ftok_t phished_tld, disp_host_tok, href_host_tok;
gint rc;
- gchar *url_str = NULL;
+ gchar *url_str = NULL, *idn_hbuf;
const guchar *end = url_text + len;
+ static UIDNA *udn;
+ UErrorCode uc_err = U_ZERO_ERROR;
+ UIDNAInfo uinfo = UIDNA_INFO_INITIALIZER;
*url_found = FALSE;
+ if (udn == NULL) {
+ udn = uidna_openUTS46 (UIDNA_DEFAULT, &uc_err);
+
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err_pool ("cannot init idna convertor: %s", u_errorName (uc_err));
+ }
+ }
+
while (url_text < end && g_ascii_isspace (*url_text)) {
url_text ++;
}
rc = rspamd_url_parse (text_url, url_str, strlen (url_str), pool);
if (rc == URI_ERRNO_OK) {
- if (href_url->hostlen != text_url->hostlen || memcmp (href_url->host,
- text_url->host, href_url->hostlen) != 0) {
+ disp_host_tok.len = text_url->hostlen;
+ disp_host_tok.begin = text_url->host;
+
+ if (rspamd_substring_search_caseless (text_url->host,
+ text_url->hostlen, "xn--", 4) != -1) {
+ idn_hbuf = rspamd_mempool_alloc (pool, text_url->hostlen * 2 + 1);
+ /* We need to convert it to the normal value first */
+ disp_host_tok.len = uidna_nameToUnicodeUTF8 (udn,
+ text_url->host, text_url->hostlen,
+ idn_hbuf, text_url->hostlen * 2 + 1, &uinfo, &uc_err);
+
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err_pool ("cannot convert to IDN: %s",
+ u_errorName (uc_err));
+ disp_host_tok.len = text_url->hostlen;
+ }
+ else {
+ disp_host_tok.begin = idn_hbuf;
+ }
+ }
+
+ href_host_tok.len = href_url->hostlen;
+ href_host_tok.begin = href_url->host;
+
+ if (rspamd_substring_search_caseless (href_url->host,
+ href_url->hostlen, "xn--", 4) != -1) {
+ idn_hbuf = rspamd_mempool_alloc (pool, href_url->hostlen * 2 + 1);
+ /* We need to convert it to the normal value first */
+ href_host_tok.len = uidna_nameToUnicodeUTF8 (udn,
+ href_url->host, href_url->hostlen,
+ idn_hbuf, href_url->hostlen * 2 + 1, &uinfo, &uc_err);
+
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err_pool ("cannot convert to IDN: %s",
+ u_errorName (uc_err));
+ href_host_tok.len = href_url->hostlen;
+ }
+ else {
+ href_host_tok.begin = idn_hbuf;
+ }
+ }
+
+ if (rspamd_ftok_casecmp (&disp_host_tok, &href_host_tok) != 0) {
if (href_url->tldlen != text_url->tldlen || memcmp (href_url->tld,
text_url->tld, href_url->tldlen) != 0) {
}
if (cur_tag->id == Tag_A || cur_tag->id == Tag_IFRAME) {
- if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) {
+ if (!(cur_tag->flags & (FL_CLOSING))) {
url = rspamd_html_process_url_tag (pool, cur_tag);
if (url != NULL) {