]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] One more fix to skip images that are not urls
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 1 May 2020 12:12:11 +0000 (13:12 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 1 May 2020 12:12:11 +0000 (13:12 +0100)
src/libserver/html.c
src/libserver/url.c
src/libserver/url.h

index 80fa3479b2734535f1f1722f74eb81da228fc1c3..f8c43bdd5d721bbb371613c92d4a0d286ba7d6ed 100644 (file)
@@ -191,8 +191,7 @@ khash_t(color_by_name) *html_color_by_name;
 
 static struct rspamd_url *rspamd_html_process_url (rspamd_mempool_t *pool,
                                                                                                   const gchar *start, guint len,
-                                                                                                  struct html_tag_component *comp,
-                                                                                                  bool is_image);
+                                                                                                  struct html_tag_component *comp);
 
 static void
 rspamd_html_library_init (void)
@@ -1362,7 +1361,7 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
 
 struct rspamd_url *
 rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,
-               struct html_tag_component *comp, bool is_image)
+               struct html_tag_component *comp)
 {
        struct rspamd_url *url;
        guint saved_flags = 0;
@@ -1506,8 +1505,7 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,
                }
        }
 
-       rc = rspamd_url_parse (url, decoded, dlen, pool,
-                       is_image ? RSPAMD_URL_PARSE_TEXT :RSPAMD_URL_PARSE_HREF);
+       rc = rspamd_url_parse (url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF);
 
        /* Filter some completely damaged urls */
        if (rc == URI_ERRNO_OK && url->hostlen > 0 &&
@@ -1520,6 +1518,11 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,
 
                if (no_prefix) {
                        url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
+
+                       if (url->tldlen == 0 || (url->flags & RSPAMD_URL_FLAG_NO_TLD)) {
+                               /* Ignore urls with both no schema and no tld */
+                               return NULL;
+                       }
                }
 
                decoded = url->string;
@@ -1606,7 +1609,7 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag,
                                }
                        }
 
-                       url = rspamd_html_process_url (pool, start, len, comp, false);
+                       url = rspamd_html_process_url (pool, start, len, comp);
 
                        if (url && tag->extra == NULL) {
                                tag->extra = url;
@@ -1771,7 +1774,7 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
                                        if (img->src) {
 
                                                img->url = rspamd_html_process_url (pool,
-                                                               img->src, fstr.len, NULL, true);
+                                                               img->src, fstr.len, NULL);
 
                                                if (img->url) {
                                                        img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
index 195727c1392cf9373a1a30344555551fec1ba940..a47d732f7cd28863827ce3c324923384ec1b7311 100644 (file)
@@ -240,7 +240,8 @@ struct rspamd_url_flag_name {
                {"url_displayed", RSPAMD_URL_FLAG_DISPLAY_URL, -1},
                {"image", RSPAMD_URL_FLAG_IMAGE, -1},
                {"query", RSPAMD_URL_FLAG_QUERY, -1},
-               {"content", RSPAMD_URL_FLAG_CONTENT, -1}
+               {"content", RSPAMD_URL_FLAG_CONTENT, -1},
+               {"no_tld", RSPAMD_URL_FLAG_NO_TLD, -1},
 };
 
 
@@ -2348,6 +2349,12 @@ rspamd_url_parse (struct rspamd_url *uri,
                                        uri->tldshift = uri->hostshift;
                                        uri->tldlen = uri->hostlen;
                                }
+                               else if (uri->flags & RSPAMD_URL_FLAG_SCHEMALESS) {
+                                       /* Ignore urls with both no schema and no tld */
+                                       return URI_ERRNO_TLD_MISSING;
+                               }
+
+                               uri->flags |= RSPAMD_URL_FLAG_NO_TLD;
                        }
                }
 
index 2a5892fc5e45fd1c052e91ba6fe1a6f57be04d1e..7fddd07ef0d05345fc25136c02a7a1ceb7e50a23 100644 (file)
@@ -37,6 +37,7 @@ enum rspamd_url_flags {
        RSPAMD_URL_FLAG_IMAGE = 1u << 19u,
        RSPAMD_URL_FLAG_QUERY = 1u << 20u,
        RSPAMD_URL_FLAG_CONTENT = 1u << 21u,
+       RSPAMD_URL_FLAG_NO_TLD = 1u << 22u,
 };
 
 struct rspamd_url_tag {