From: Vsevolod Stakhov Date: Sun, 13 Feb 2022 12:56:43 +0000 (+0000) Subject: [Fix] When checking for phishing, we need to convert punicode -> UTF8, not vice versa X-Git-Tag: 3.2~54 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=b1bef4702ca704b65bb8d8c19d89fa4f3fd28c49;p=rspamd.git [Fix] When checking for phishing, we need to convert punicode -> UTF8, not vice versa Previosly, Rspamd has checked for `xn--` signature just to convert it back to `xn--` form. According to the expected results, it should do the opposite conversion. Issue: #4065 --- diff --git a/src/libserver/html/html_url.cxx b/src/libserver/html/html_url.cxx index 34775ba13..0b48c7a75 100644 --- a/src/libserver/html/html_url.cxx +++ b/src/libserver/html/html_url.cxx @@ -92,17 +92,20 @@ convert_idna_hostname_maybe(rspamd_mempool_t *pool, struct rspamd_url *url, bool /* Handle IDN url's */ if (ret.size() > 4 && rspamd_substring_search_caseless(ret.data(), ret.size(), "xn--", 4) != -1) { + const auto buf_capacity = ret.size() * 2 + 1; auto *idn_hbuf = (char *)rspamd_mempool_alloc (pool, buf_capacity); icu::CheckedArrayByteSink byte_sink{idn_hbuf, (int)buf_capacity}; + /* We need to convert it to the normal value first */ icu::IDNAInfo info; auto uc_err = U_ZERO_ERROR; auto *udn = get_icu_idna_instance(); - udn->nameToASCII_UTF8(icu::StringPiece(ret.data(), ret.size()), + udn->nameToUnicodeUTF8(icu::StringPiece(ret.data(), ret.size()), byte_sink, info, uc_err); if (uc_err == U_ZERO_ERROR && !info.hasErrors()) { + /* idn_hbuf is allocated in mempool, so it is safe to use */ ret = std::string_view{idn_hbuf, (std::size_t)byte_sink.NumberOfBytesWritten()}; } else {