diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-05-01 13:12:11 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-05-01 13:12:11 +0100 |
commit | d01de01be22e81accd84a7f346b27269c3c91990 (patch) | |
tree | 431279b314df28995b7d7a0fc7506502abbd80be /src/libserver | |
parent | 91d051fea0bbed8f005493bdf0bd99b680ee7393 (diff) | |
download | rspamd-d01de01be22e81accd84a7f346b27269c3c91990.tar.gz rspamd-d01de01be22e81accd84a7f346b27269c3c91990.zip |
[Fix] One more fix to skip images that are not urls
Diffstat (limited to 'src/libserver')
-rw-r--r-- | src/libserver/html.c | 17 | ||||
-rw-r--r-- | src/libserver/url.c | 9 | ||||
-rw-r--r-- | src/libserver/url.h | 1 |
3 files changed, 19 insertions, 8 deletions
diff --git a/src/libserver/html.c b/src/libserver/html.c index 80fa3479b..f8c43bdd5 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -191,8 +191,7 @@ khash_t(color_by_name) *html_color_by_name; static struct rspamd_url *rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, - struct html_tag_component *comp, - bool is_image); + struct html_tag_component *comp); static void rspamd_html_library_init (void) @@ -1362,7 +1361,7 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, struct rspamd_url * rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, - struct html_tag_component *comp, bool is_image) + struct html_tag_component *comp) { struct rspamd_url *url; guint saved_flags = 0; @@ -1506,8 +1505,7 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, } } - rc = rspamd_url_parse (url, decoded, dlen, pool, - is_image ? RSPAMD_URL_PARSE_TEXT :RSPAMD_URL_PARSE_HREF); + rc = rspamd_url_parse (url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF); /* Filter some completely damaged urls */ if (rc == URI_ERRNO_OK && url->hostlen > 0 && @@ -1520,6 +1518,11 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, if (no_prefix) { url->flags |= RSPAMD_URL_FLAG_SCHEMALESS; + + if (url->tldlen == 0 || (url->flags & RSPAMD_URL_FLAG_NO_TLD)) { + /* Ignore urls with both no schema and no tld */ + return NULL; + } } decoded = url->string; @@ -1606,7 +1609,7 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag, } } - url = rspamd_html_process_url (pool, start, len, comp, false); + url = rspamd_html_process_url (pool, start, len, comp); if (url && tag->extra == NULL) { tag->extra = url; @@ -1771,7 +1774,7 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag, if (img->src) { img->url = rspamd_html_process_url (pool, - img->src, fstr.len, NULL, true); + img->src, fstr.len, NULL); if (img->url) { img->url->flags |= RSPAMD_URL_FLAG_IMAGE; diff --git a/src/libserver/url.c b/src/libserver/url.c index 195727c13..a47d732f7 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -240,7 +240,8 @@ struct rspamd_url_flag_name { {"url_displayed", RSPAMD_URL_FLAG_DISPLAY_URL, -1}, {"image", RSPAMD_URL_FLAG_IMAGE, -1}, {"query", RSPAMD_URL_FLAG_QUERY, -1}, - {"content", RSPAMD_URL_FLAG_CONTENT, -1} + {"content", RSPAMD_URL_FLAG_CONTENT, -1}, + {"no_tld", RSPAMD_URL_FLAG_NO_TLD, -1}, }; @@ -2348,6 +2349,12 @@ rspamd_url_parse (struct rspamd_url *uri, uri->tldshift = uri->hostshift; uri->tldlen = uri->hostlen; } + else if (uri->flags & RSPAMD_URL_FLAG_SCHEMALESS) { + /* Ignore urls with both no schema and no tld */ + return URI_ERRNO_TLD_MISSING; + } + + uri->flags |= RSPAMD_URL_FLAG_NO_TLD; } } diff --git a/src/libserver/url.h b/src/libserver/url.h index 2a5892fc5..7fddd07ef 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -37,6 +37,7 @@ enum rspamd_url_flags { RSPAMD_URL_FLAG_IMAGE = 1u << 19u, RSPAMD_URL_FLAG_QUERY = 1u << 20u, RSPAMD_URL_FLAG_CONTENT = 1u << 21u, + RSPAMD_URL_FLAG_NO_TLD = 1u << 22u, }; struct rspamd_url_tag { |