aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2020-09-18 15:27:46 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2020-09-18 15:27:46 +0100
commitdb1bb1e0e405f1d319c5360bf9896c03f0257907 (patch)
tree8f90eae5fb6b955521c2158d711bad073ce44b2e
parentfd396a6293be7997767b1e97662693abf39c5a1a (diff)
downloadrspamd-db1bb1e0e405f1d319c5360bf9896c03f0257907.tar.gz
rspamd-db1bb1e0e405f1d319c5360bf9896c03f0257907.zip
[CritFix] Fix IDNA dots parsing
Found by: Dr. Hajime Shimada and Mr. Shirakura from Nagoya University
-rw-r--r--src/libserver/url.c78
1 files changed, 77 insertions, 1 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 8186cc804..379be6293 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -2084,6 +2084,77 @@ rspamd_telephone_normalise_inplace (struct rspamd_url *uri)
uri->urllen -= (orig_len - uri->hostlen);
}
+static inline bool
+is_idna_label_dot (UChar ch)
+{
+ switch(ch){
+ case 0x3002:
+ case 0xFF0E:
+ case 0xFF61:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * All credits for this investigation should go to
+ * Dr. Hajime Shimada and Mr. Shirakura as they have revealed this case in their
+ * research.
+ */
+
+/*
+ * This function replaces unsafe IDNA dots in host labels. Unfortunately,
+ * IDNA extends dot definition from '.' to multiple other characters that
+ * should be treated equally.
+ * This function replaces such dots and returns `true` if these dots are found.
+ * In this case, it should be treated as obfuscation attempt.
+ */
+static bool
+rspamd_url_remove_dots (struct rspamd_url *uri)
+{
+ const gchar *hstart = rspamd_url_host_unsafe (uri);
+ gchar *t;
+ UChar32 uc;
+ gint i = 0, hlen;
+ bool ret = false;
+
+ if (uri->hostlen == 0) {
+ return false;
+ }
+
+ hlen = uri->hostlen;
+ t = rspamd_url_host_unsafe (uri);
+
+ while (i < hlen) {
+ gint prev_i = i;
+ U8_NEXT (hstart, i, hlen, uc);
+
+ if (is_idna_label_dot (uc)) {
+ *t ++ = '.';
+ ret = true;
+ }
+ else {
+ if (ret) {
+ /* We have to shift the remaining stuff */
+ while (prev_i < i) {
+ *t ++ = *(hstart + prev_i);
+ prev_i ++;
+ }
+ }
+ else {
+ t += (i - prev_i);
+ }
+ }
+ }
+
+ if (ret) {
+ rspamd_url_shift (uri, t - hstart, UF_HOST);
+ }
+
+ return ret;
+}
+
enum uri_errno
rspamd_url_parse (struct rspamd_url *uri,
gchar *uristring, gsize len,
@@ -2221,6 +2292,10 @@ rspamd_url_parse (struct rspamd_url *uri,
uri->flags |= RSPAMD_URL_FLAG_UNNORMALISED;
}
+ if (rspamd_url_remove_dots (uri)) {
+ uri->flags |= RSPAMD_URL_FLAG_OBSCURED;
+ }
+
if (uri->protocol & (PROTOCOL_HTTP|PROTOCOL_HTTPS|PROTOCOL_MAILTO|PROTOCOL_FTP|PROTOCOL_FILE)) {
/* Ensure that hostname starts with something sane (exclude numeric urls) */
@@ -2246,6 +2321,7 @@ rspamd_url_parse (struct rspamd_url *uri,
UChar *utf16_hostname, *norm_utf16;
gint32 utf16_len, norm_utf16_len, norm_utf8_len;
+ UParseError parse_error;
utf16_hostname = rspamd_mempool_alloc (pool, uri->hostlen * sizeof (UChar));
struct UConverter *utf8_conv = rspamd_get_utf8_converter ();
@@ -2260,7 +2336,7 @@ rspamd_url_parse (struct rspamd_url *uri,
norm_utf16 = rspamd_mempool_alloc (pool, utf16_len * sizeof (UChar));
norm_utf16_len = usprep_prepare (nameprep, utf16_hostname, utf16_len,
- norm_utf16, utf16_len, USPREP_DEFAULT, NULL, &uc_err);
+ norm_utf16, utf16_len, USPREP_DEFAULT, &parse_error, &uc_err);
if (!U_SUCCESS (uc_err)) {