aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2022-02-13 12:56:43 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2022-02-13 12:56:43 +0000
commitb1bef4702ca704b65bb8d8c19d89fa4f3fd28c49 (patch)
tree36373c8661b4ca794a1cb073badffb5f9b5f9453 /src/libserver
parent192a5fd8bbf150ce515519477b466b3a811e8a30 (diff)
downloadrspamd-b1bef4702ca704b65bb8d8c19d89fa4f3fd28c49.tar.gz
rspamd-b1bef4702ca704b65bb8d8c19d89fa4f3fd28c49.zip
[Fix] When checking for phishing, we need to convert punicode -> UTF8, not vice versa
Previosly, Rspamd has checked for `xn--` signature just to convert it back to `xn--` form. According to the expected results, it should do the opposite conversion. Issue: #4065
Diffstat (limited to 'src/libserver')
-rw-r--r--src/libserver/html/html_url.cxx5
1 files changed, 4 insertions, 1 deletions
diff --git a/src/libserver/html/html_url.cxx b/src/libserver/html/html_url.cxx
index 34775ba13..0b48c7a75 100644
--- a/src/libserver/html/html_url.cxx
+++ b/src/libserver/html/html_url.cxx
@@ -92,17 +92,20 @@ convert_idna_hostname_maybe(rspamd_mempool_t *pool, struct rspamd_url *url, bool
/* Handle IDN url's */
if (ret.size() > 4 &&
rspamd_substring_search_caseless(ret.data(), ret.size(), "xn--", 4) != -1) {
+
const auto buf_capacity = ret.size() * 2 + 1;
auto *idn_hbuf = (char *)rspamd_mempool_alloc (pool, buf_capacity);
icu::CheckedArrayByteSink byte_sink{idn_hbuf, (int)buf_capacity};
+
/* We need to convert it to the normal value first */
icu::IDNAInfo info;
auto uc_err = U_ZERO_ERROR;
auto *udn = get_icu_idna_instance();
- udn->nameToASCII_UTF8(icu::StringPiece(ret.data(), ret.size()),
+ udn->nameToUnicodeUTF8(icu::StringPiece(ret.data(), ret.size()),
byte_sink, info, uc_err);
if (uc_err == U_ZERO_ERROR && !info.hasErrors()) {
+ /* idn_hbuf is allocated in mempool, so it is safe to use */
ret = std::string_view{idn_hbuf, (std::size_t)byte_sink.NumberOfBytesWritten()};
}
else {