From 10bb08dd2d8484d0d1d2ae507b94aaa24f48b61b Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 5 Mar 2021 16:50:48 +0000 Subject: [PATCH] [Minor] Various fixes for display link detection --- src/controller.c | 4 ++-- src/libserver/html.c | 24 ++++++++++++++++++------ src/libserver/http/http_router.c | 2 +- src/libserver/http/http_util.c | 2 +- src/libserver/http/http_util.h | 2 +- src/libserver/url.c | 2 +- src/libutil/str_util.c | 2 +- src/libutil/str_util.h | 2 +- 8 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/controller.c b/src/controller.c index 174382879..0ecaf860d 100644 --- a/src/controller.c +++ b/src/controller.c @@ -2764,7 +2764,7 @@ rspamd_controller_handle_custom (struct rspamd_http_connection_entry *conn_ent, http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u); if (u.field_set & (1 << UF_PATH)) { - guint unnorm_len; + gsize unnorm_len; lookup.begin = msg->url->str + u.field_data[UF_PATH].off; lookup.len = u.field_data[UF_PATH].len; @@ -2971,7 +2971,7 @@ rspamd_controller_handle_lua_plugin (struct rspamd_http_connection_entry *conn_e http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u); if (u.field_set & (1 << UF_PATH)) { - guint unnorm_len; + gsize unnorm_len; lookup.begin = msg->url->str + u.field_data[UF_PATH].off; lookup.len = u.field_data[UF_PATH].len; diff --git a/src/libserver/html.c b/src/libserver/html.c index 5b3aafca0..401c55f31 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1452,7 +1452,8 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, gsize decoded_len; const gchar *p, *s, *prefix = "http://"; gchar *d; - guint i, dlen; + guint i; + gsize dlen; gboolean has_bad_chars = FALSE, no_prefix = FALSE; static const gchar hexdigests[16] = "0123456789abcdef"; @@ -2588,8 +2589,11 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool, struct rspamd_url *turl; gboolean url_found = FALSE; struct rspamd_process_exception *ex; + enum rspamd_normalise_result norm_res; + guint saved_flags = 0; + gsize dlen; - if (href_offset <= 0) { + if (href_offset < 0) { /* No dispalyed url, just some text within tag */ return; } @@ -2597,15 +2601,23 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool, url->visible_part = rspamd_mempool_alloc (pool, dest->len - href_offset + 1); rspamd_strlcpy (url->visible_part, dest->data + href_offset, dest->len - href_offset + 1); - g_strstrip (url->visible_part); + dlen = dest->len - href_offset; + url->visible_part = + (gchar *)rspamd_string_len_strip (url->visible_part, &dlen, " \t\v\r\n"); + + norm_res = rspamd_normalise_unicode_inplace (pool, url->visible_part, &dlen); + + if (norm_res & RSPAMD_UNICODE_NORM_UNNORMAL) { + saved_flags |= RSPAMD_URL_FLAG_UNNORMALISED; + } rspamd_html_url_is_phished (pool, url, - dest->data + href_offset, - dest->len - href_offset, + url->visible_part, + dlen, &url_found, &displayed_url); if (url_found) { - url->flags |= RSPAMD_URL_FLAG_DISPLAY_URL; + url->flags |= saved_flags|RSPAMD_URL_FLAG_DISPLAY_URL; } if (exceptions && url_found) { diff --git a/src/libserver/http/http_router.c b/src/libserver/http/http_router.c index a5b960e72..960df0ce3 100644 --- a/src/libserver/http/http_router.c +++ b/src/libserver/http/http_router.c @@ -291,7 +291,7 @@ rspamd_http_router_finish_handler (struct rspamd_http_connection *conn, http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u); if (u.field_set & (1 << UF_PATH)) { - guint unnorm_len; + gsize unnorm_len; pathbuf = g_malloc (u.field_data[UF_PATH].len); memcpy (pathbuf, msg->url->str + u.field_data[UF_PATH].off, diff --git a/src/libserver/http/http_util.c b/src/libserver/http/http_util.c index ec9d9fa58..fd5adb3c1 100644 --- a/src/libserver/http/http_util.c +++ b/src/libserver/http/http_util.c @@ -302,7 +302,7 @@ rspamd_http_date_format (gchar *buf, gsize len, time_t time) } void -rspamd_http_normalize_path_inplace (gchar *path, guint len, guint *nlen) +rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen) { const gchar *p, *end, *slash = NULL, *dot = NULL; gchar *o; diff --git a/src/libserver/http/http_util.h b/src/libserver/http/http_util.h index 7a22ffb16..19b497f30 100644 --- a/src/libserver/http/http_util.h +++ b/src/libserver/http/http_util.h @@ -47,7 +47,7 @@ glong rspamd_http_date_format (gchar *buf, gsize len, time_t time); * @param len * @param nlen */ -void rspamd_http_normalize_path_inplace (gchar *path, guint len, guint *nlen); +void rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen); #ifdef __cplusplus } diff --git a/src/libserver/url.c b/src/libserver/url.c index d83c1988f..a5de7ebdf 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -2166,7 +2166,7 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *p; const gchar *end; guint i, complen, ret, flags = 0; - guint unquoted_len = 0; + gsize unquoted_len = 0; memset (uri, 0, sizeof (*uri)); memset (&u, 0, sizeof (u)); diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 5a44ed311..00774d588 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -3023,7 +3023,7 @@ rspamd_get_unicode_normalizer (void) enum rspamd_normalise_result rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool, gchar *start, - guint *len) + gsize *len) { #if U_ICU_VERSION_MAJOR_NUM >= 44 UErrorCode uc_err = U_ZERO_ERROR; diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index 0e66d0ed1..427d6b94e 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -491,7 +491,7 @@ enum rspamd_normalise_result { * @return TRUE if a string has been normalised */ enum rspamd_normalise_result rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool, - gchar *start, guint *len); + gchar *start, gsize *len); enum rspamd_regexp_escape_flags { RSPAMD_REGEXP_ESCAPE_ASCII = 0, -- 2.39.5