]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Urls: Fix processing of html urls when it comes to the flags
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 6 Mar 2021 23:49:16 +0000 (23:49 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 6 Mar 2021 23:49:16 +0000 (23:49 +0000)
Issue: #3664

src/libserver/html.c
src/libserver/url.c
src/libserver/url.h
src/lua/lua_task.c

index 974b59129f4f89f885c31dbe6d0fc8b387009f42..aa1cdf6cc335dafca82fd25abd9bbc4ffaf5779a 100644 (file)
@@ -1764,7 +1764,7 @@ rspamd_html_url_query_callback (struct rspamd_url *url, gsize start_offset,
 
        url->flags |= RSPAMD_URL_FLAG_QUERY;
 
-       if (rspamd_url_set_add_or_increase (cbd->url_set, url) && cbd->part_urls) {
+       if (rspamd_url_set_add_or_increase(cbd->url_set, url, false) && cbd->part_urls) {
                g_ptr_array_add (cbd->part_urls, url);
        }
 
@@ -1903,7 +1903,7 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
                                                if (img->url) {
                                                        img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
 
-                                                       if (rspamd_url_set_add_or_increase (url_set, img->url) &&
+                                                       if (rspamd_url_set_add_or_increase(url_set, img->url, false) &&
                                                                part_urls) {
                                                                g_ptr_array_add (part_urls, img->url);
                                                        }
@@ -3245,10 +3245,15 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
                                                if (url != NULL) {
 
                                                        if (url_set != NULL) {
-                                                               if (rspamd_url_set_add_or_increase (url_set, url)) {
+                                                               struct rspamd_url *maybe_existing =
+                                                                               rspamd_url_set_add_or_return (url_set, url);
+                                                               if (maybe_existing == url) {
                                                                        rspamd_process_html_url (pool, url, url_set,
                                                                                        part_urls);
                                                                }
+                                                               else {
+                                                                       url = maybe_existing;
+                                                               }
                                                        }
 
                                                        href_offset = dest->len;
index a5de7ebdfb03cdfcfc9fe8c1fe9064d138c72105..8183213b66493b9001620221ef05f7b7c8a17bec 100644 (file)
@@ -3377,7 +3377,7 @@ rspamd_url_query_callback (struct rspamd_url *url, gsize start_offset,
        url->flags |= RSPAMD_URL_FLAG_QUERY;
 
 
-       if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url)) {
+       if (rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url, false)) {
                if (cbd->part && cbd->part->mime_part->urls) {
                        g_ptr_array_add (cbd->part->mime_part->urls, url);
                }
@@ -3433,8 +3433,8 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
 
        url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
 
-       if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url) &&
-                       cbd->part->mime_part->urls) {
+       if (rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url, false) &&
+               cbd->part->mime_part->urls) {
                g_ptr_array_add (cbd->part->mime_part->urls, url);
        }
 
@@ -3592,7 +3592,7 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
                }
        }
 
-       rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
+       rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url, false);
 
        /* We also search the query for additional url inside */
        if (url->querylen > 0) {
@@ -3622,8 +3622,8 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
                                        }
                                }
 
-                               rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls),
-                                               query_url);
+                               rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls),
+                                               query_url, false);
                        }
                }
        }
@@ -4044,21 +4044,44 @@ rspamd_url_protocol_from_string (const gchar *str)
 
 
 bool
-rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
-                                                                        struct rspamd_url *u)
+rspamd_url_set_add_or_increase(khash_t (rspamd_url_hash) *set,
+                                                          struct rspamd_url *u,
+                                                          bool enforce_replace)
 {
        khiter_t k;
        gint r;
 
-       k = kh_put (rspamd_url_hash, set, u, &r);
+       k = kh_get (rspamd_url_hash, set, u);
 
-       if (r == 0) {
+       if (k != kh_end (set)) {
+               /* Existing url */
                struct rspamd_url *ex = kh_key (set, k);
-
-               ex->count ++;
+#define SUSPICIOUS_URL_FLAGS (RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_OBSCURED|RSPAMD_URL_FLAG_ZW_SPACES)
+               if (enforce_replace) {
+                       kh_key (set, k) = u;
+                       u->count++;
+               }
+               else {
+                       if (u->flags & SUSPICIOUS_URL_FLAGS) {
+                               if (!(ex->flags & SUSPICIOUS_URL_FLAGS)) {
+                                       /* Propagate new url to an old one */
+                                       kh_key (set, k) = u;
+                                       u->count++;
+                               }
+                               else {
+                                       ex->count++;
+                               }
+                       }
+                       else {
+                               ex->count++;
+                       }
+               }
 
                return false;
        }
+       else {
+               k = kh_put (rspamd_url_hash, set, u, &r);
+       }
 
        return true;
 }
@@ -4071,12 +4094,15 @@ rspamd_url_set_add_or_return (khash_t (rspamd_url_hash) *set,
        gint r;
 
        if (set) {
-               k = kh_put (rspamd_url_hash, set, u, &r);
+               k = kh_get (rspamd_url_hash, set, u);
 
-               if (r == 0) {
-                       struct rspamd_url *ex = kh_key (set, k);
+               if (k != kh_end (set)) {
+                       return kh_key (set, k);
+               }
+               else {
+                       k = kh_put (rspamd_url_hash, set, u, &r);
 
-                       return ex;
+                       return kh_key (set, k);
                }
        }
 
index 567cdd137a8a540924b0b4687e817c5cc461530d..59485ab9a368d1dba0f4c21f17bf3ac0bdbe8c33 100644 (file)
@@ -296,8 +296,9 @@ KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char);
  * @param u
  * @return true if a new url has been added
  */
-bool rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
-               struct rspamd_url *u);
+bool rspamd_url_set_add_or_increase(khash_t (rspamd_url_hash) *set,
+                                                                       struct rspamd_url *u,
+                                                                       bool enforce_replace);
 
 /**
  * Same as rspamd_url_set_add_or_increase but returns the existing url if found
index d2bd17aba50c8671416cb7e9134d2a869df59bec..579f04fb9b3b08041fd24e6fff84e31834fc314b 100644 (file)
@@ -2507,7 +2507,7 @@ lua_task_inject_url (lua_State * L)
        }
 
        if (task && task->message && url && url->url) {
-               if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url->url)) {
+               if (rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url->url, false)) {
                        if (mpart && mpart->urls) {
                                /* Also add url to the mime part */
                                g_ptr_array_add (mpart->urls, url->url);