]> source.dussan.org Git - rspamd.git/commitdiff
[Rework] Rework image urls processing
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 29 Aug 2019 10:42:46 +0000 (11:42 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 29 Aug 2019 10:42:46 +0000 (11:42 +0100)
src/libserver/html.c
src/libserver/url.h
src/lua/lua_url.c

index fa33ffdfb349d5588a9c5e4f79f4686afbeafd33..014beff18ffbf387539985ec5b10c8bb78ace43e 100644 (file)
@@ -1677,7 +1677,7 @@ rspamd_html_process_data_image (rspamd_mempool_t *pool,
 
 static void
 rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
-               struct html_content *hc)
+               struct html_content *hc, GHashTable *urls)
 {
        struct html_tag_component *comp;
        struct html_image *img;
@@ -1717,8 +1717,23 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
                                else {
                                        img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
                                        if (img->src) {
+
                                                img->url = rspamd_html_process_url (pool,
                                                                img->src, fstr.len, NULL);
+
+                                               if (img->url) {
+                                                       struct rspamd_url *turl = g_hash_table_lookup (urls,
+                                                                       img->url);
+
+                                                       img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
+
+                                                       if (turl == NULL) {
+                                                               g_hash_table_insert (urls, img->url, img->url);
+                                                       }
+                                                       else {
+                                                               turl->count++;
+                                                       }
+                                               }
                                        }
                                }
                        }
@@ -3041,7 +3056,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
                                }
 
                                if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) {
-                                       rspamd_html_process_img_tag (pool, cur_tag, hc);
+                                       rspamd_html_process_img_tag (pool, cur_tag, hc, urls);
                                }
                                else if (cur_tag->flags & FL_BLOCK) {
                                        struct html_block *bl;
index ae21b6ab320846259785d869d441199ec5d9b420..83a2a7f1789eb82b37b3497ffbf86c71f0cf0cb8 100644 (file)
@@ -33,6 +33,7 @@ enum rspamd_url_flags {
        RSPAMD_URL_FLAG_UNNORMALISED = 1u << 16u,
        RSPAMD_URL_FLAG_ZW_SPACES = 1u << 17u,
        RSPAMD_URL_FLAG_DISPLAY_URL = 1u << 18u,
+       RSPAMD_URL_FLAG_IMAGE = 1u << 19u,
 };
 
 struct rspamd_url_tag {
index b30e560c93a4d9a8086bbbe975d140efb6aea453..8742a6027738d9c9013243f131b2f4d605e4fc55 100644 (file)
@@ -881,6 +881,7 @@ lua_url_get_flags (lua_State *L)
                PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised");
                PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces");
                PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL, "url_displayed");
+               PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE, "image");
        }
        else {
                return luaL_error (L, "invalid arguments");