|
|
@@ -191,8 +191,7 @@ khash_t(color_by_name) *html_color_by_name; |
|
|
|
|
|
|
|
static struct rspamd_url *rspamd_html_process_url (rspamd_mempool_t *pool, |
|
|
|
const gchar *start, guint len, |
|
|
|
struct html_tag_component *comp, |
|
|
|
bool is_image); |
|
|
|
struct html_tag_component *comp); |
|
|
|
|
|
|
|
static void |
|
|
|
rspamd_html_library_init (void) |
|
|
@@ -1362,7 +1361,7 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool, |
|
|
|
|
|
|
|
struct rspamd_url * |
|
|
|
rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, |
|
|
|
struct html_tag_component *comp, bool is_image) |
|
|
|
struct html_tag_component *comp) |
|
|
|
{ |
|
|
|
struct rspamd_url *url; |
|
|
|
guint saved_flags = 0; |
|
|
@@ -1506,8 +1505,7 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
rc = rspamd_url_parse (url, decoded, dlen, pool, |
|
|
|
is_image ? RSPAMD_URL_PARSE_TEXT :RSPAMD_URL_PARSE_HREF); |
|
|
|
rc = rspamd_url_parse (url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF); |
|
|
|
|
|
|
|
/* Filter some completely damaged urls */ |
|
|
|
if (rc == URI_ERRNO_OK && url->hostlen > 0 && |
|
|
@@ -1520,6 +1518,11 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len, |
|
|
|
|
|
|
|
if (no_prefix) { |
|
|
|
url->flags |= RSPAMD_URL_FLAG_SCHEMALESS; |
|
|
|
|
|
|
|
if (url->tldlen == 0 || (url->flags & RSPAMD_URL_FLAG_NO_TLD)) { |
|
|
|
/* Ignore urls with both no schema and no tld */ |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
decoded = url->string; |
|
|
@@ -1606,7 +1609,7 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
url = rspamd_html_process_url (pool, start, len, comp, false); |
|
|
|
url = rspamd_html_process_url (pool, start, len, comp); |
|
|
|
|
|
|
|
if (url && tag->extra == NULL) { |
|
|
|
tag->extra = url; |
|
|
@@ -1771,7 +1774,7 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag, |
|
|
|
if (img->src) { |
|
|
|
|
|
|
|
img->url = rspamd_html_process_url (pool, |
|
|
|
img->src, fstr.len, NULL, true); |
|
|
|
img->src, fstr.len, NULL); |
|
|
|
|
|
|
|
if (img->url) { |
|
|
|
img->url->flags |= RSPAMD_URL_FLAG_IMAGE; |