]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Various fixes for display link detection
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 5 Mar 2021 16:50:48 +0000 (16:50 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 5 Mar 2021 16:50:48 +0000 (16:50 +0000)
src/controller.c
src/libserver/html.c
src/libserver/http/http_router.c
src/libserver/http/http_util.c
src/libserver/http/http_util.h
src/libserver/url.c
src/libutil/str_util.c
src/libutil/str_util.h

index 174382879dcac19dddc3ae5f148acec7a491d7f3..0ecaf860d6e45b68fefa228eccaf892a059b85e3 100644 (file)
@@ -2764,7 +2764,7 @@ rspamd_controller_handle_custom (struct rspamd_http_connection_entry *conn_ent,
        http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u);
 
        if (u.field_set & (1 << UF_PATH)) {
-               guint unnorm_len;
+               gsize unnorm_len;
                lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
                lookup.len = u.field_data[UF_PATH].len;
 
@@ -2971,7 +2971,7 @@ rspamd_controller_handle_lua_plugin (struct rspamd_http_connection_entry *conn_e
        http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u);
 
        if (u.field_set & (1 << UF_PATH)) {
-               guint unnorm_len;
+               gsize unnorm_len;
                lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
                lookup.len = u.field_data[UF_PATH].len;
 
index 5b3aafca076d373718e40cbcdbc96f3967cbdb32..401c55f314c30f9e659cb567b18c46838abbd075 100644 (file)
@@ -1452,7 +1452,8 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,
        gsize decoded_len;
        const gchar *p, *s, *prefix = "http://";
        gchar *d;
-       guint i, dlen;
+       guint i;
+       gsize dlen;
        gboolean has_bad_chars = FALSE, no_prefix = FALSE;
        static const gchar hexdigests[16] = "0123456789abcdef";
 
@@ -2588,8 +2589,11 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool,
        struct rspamd_url *turl;
        gboolean url_found = FALSE;
        struct rspamd_process_exception *ex;
+       enum rspamd_normalise_result norm_res;
+       guint saved_flags = 0;
+       gsize dlen;
 
-       if (href_offset <= 0) {
+       if (href_offset < 0) {
                /* No dispalyed url, just some text within <a> tag */
                return;
        }
@@ -2597,15 +2601,23 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool,
        url->visible_part = rspamd_mempool_alloc (pool, dest->len - href_offset + 1);
        rspamd_strlcpy (url->visible_part, dest->data + href_offset,
                        dest->len - href_offset + 1);
-       g_strstrip (url->visible_part);
+       dlen = dest->len - href_offset;
+       url->visible_part =
+                       (gchar *)rspamd_string_len_strip (url->visible_part, &dlen, " \t\v\r\n");
+
+       norm_res = rspamd_normalise_unicode_inplace (pool, url->visible_part, &dlen);
+
+       if (norm_res & RSPAMD_UNICODE_NORM_UNNORMAL) {
+               saved_flags |= RSPAMD_URL_FLAG_UNNORMALISED;
+       }
 
        rspamd_html_url_is_phished (pool, url,
-                       dest->data + href_offset,
-                       dest->len - href_offset,
+                       url->visible_part,
+                       dlen,
                        &url_found, &displayed_url);
 
        if (url_found) {
-               url->flags |= RSPAMD_URL_FLAG_DISPLAY_URL;
+               url->flags |= saved_flags|RSPAMD_URL_FLAG_DISPLAY_URL;
        }
 
        if (exceptions && url_found) {
index a5b960e723f9bf0305752bb05ef5170eff36b4c2..960df0ce311af6701d2986c8843ff4b0ba80c5aa 100644 (file)
@@ -291,7 +291,7 @@ rspamd_http_router_finish_handler (struct rspamd_http_connection *conn,
                        http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u);
 
                        if (u.field_set & (1 << UF_PATH)) {
-                               guint unnorm_len;
+                               gsize unnorm_len;
 
                                pathbuf = g_malloc (u.field_data[UF_PATH].len);
                                memcpy (pathbuf, msg->url->str + u.field_data[UF_PATH].off,
index ec9d9fa584fd665f849524a0899b6f3daa90e21f..fd5adb3c1d2fce3822352e0cfd6a7741d0b24c83 100644 (file)
@@ -302,7 +302,7 @@ rspamd_http_date_format (gchar *buf, gsize len, time_t time)
 }
 
 void
-rspamd_http_normalize_path_inplace (gchar *path, guint len, guint *nlen)
+rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen)
 {
        const gchar *p, *end, *slash = NULL, *dot = NULL;
        gchar *o;
index 7a22ffb16cd06d661aa02f86aa1ece945919e4fe..19b497f301746186c6bb8241d9a180d25b7b90de 100644 (file)
@@ -47,7 +47,7 @@ glong rspamd_http_date_format (gchar *buf, gsize len, time_t time);
  * @param len
  * @param nlen
  */
-void rspamd_http_normalize_path_inplace (gchar *path, guint len, guint *nlen);
+void rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen);
 
 #ifdef  __cplusplus
 }
index d83c1988f2e23f2ef6977f0b5ade2f3674bc14f4..a5de7ebdfb03cdfcfc9fe8c1fe9064d138c72105 100644 (file)
@@ -2166,7 +2166,7 @@ rspamd_url_parse (struct rspamd_url *uri,
        gchar *p;
        const gchar *end;
        guint i, complen, ret, flags = 0;
-       guint unquoted_len = 0;
+       gsize unquoted_len = 0;
 
        memset (uri, 0, sizeof (*uri));
        memset (&u, 0, sizeof (u));
index 5a44ed3112e7b736c3cc3a0ac7e03010b8a5c363..00774d5886d46cf4f4aa2b4f732b52bec2a42f02 100644 (file)
@@ -3023,7 +3023,7 @@ rspamd_get_unicode_normalizer (void)
 
 enum rspamd_normalise_result
 rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool, gchar *start,
-               guint *len)
+               gsize *len)
 {
 #if U_ICU_VERSION_MAJOR_NUM >= 44
        UErrorCode uc_err = U_ZERO_ERROR;
index 0e66d0ed18f012e232ceca2e54965d5e646fba94..427d6b94ee11fc206f3d4a50c7f15ed59a86bcf1 100644 (file)
@@ -491,7 +491,7 @@ enum rspamd_normalise_result {
  * @return TRUE if a string has been normalised
  */
 enum rspamd_normalise_result rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool,
-                                                                                                                          gchar *start, guint *len);
+                                                                                                                          gchar *start, gsize *len);
 
 enum rspamd_regexp_escape_flags {
        RSPAMD_REGEXP_ESCAPE_ASCII = 0,