Browse Source

[Fix] One more fix to skip images that are not urls

tags/2.6
Vsevolod Stakhov 4 years ago
parent
commit
d01de01be2
3 changed files with 19 additions and 8 deletions
  1. 10
    7
      src/libserver/html.c
  2. 8
    1
      src/libserver/url.c
  3. 1
    0
      src/libserver/url.h

+ 10
- 7
src/libserver/html.c View File

@@ -191,8 +191,7 @@ khash_t(color_by_name) *html_color_by_name;

static struct rspamd_url *rspamd_html_process_url (rspamd_mempool_t *pool,
const gchar *start, guint len,
struct html_tag_component *comp,
bool is_image);
struct html_tag_component *comp);

static void
rspamd_html_library_init (void)
@@ -1362,7 +1361,7 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,

struct rspamd_url *
rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,
struct html_tag_component *comp, bool is_image)
struct html_tag_component *comp)
{
struct rspamd_url *url;
guint saved_flags = 0;
@@ -1506,8 +1505,7 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,
}
}

rc = rspamd_url_parse (url, decoded, dlen, pool,
is_image ? RSPAMD_URL_PARSE_TEXT :RSPAMD_URL_PARSE_HREF);
rc = rspamd_url_parse (url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF);

/* Filter some completely damaged urls */
if (rc == URI_ERRNO_OK && url->hostlen > 0 &&
@@ -1520,6 +1518,11 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,

if (no_prefix) {
url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;

if (url->tldlen == 0 || (url->flags & RSPAMD_URL_FLAG_NO_TLD)) {
/* Ignore urls with both no schema and no tld */
return NULL;
}
}

decoded = url->string;
@@ -1606,7 +1609,7 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag,
}
}

url = rspamd_html_process_url (pool, start, len, comp, false);
url = rspamd_html_process_url (pool, start, len, comp);

if (url && tag->extra == NULL) {
tag->extra = url;
@@ -1771,7 +1774,7 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
if (img->src) {

img->url = rspamd_html_process_url (pool,
img->src, fstr.len, NULL, true);
img->src, fstr.len, NULL);

if (img->url) {
img->url->flags |= RSPAMD_URL_FLAG_IMAGE;

+ 8
- 1
src/libserver/url.c View File

@@ -240,7 +240,8 @@ struct rspamd_url_flag_name {
{"url_displayed", RSPAMD_URL_FLAG_DISPLAY_URL, -1},
{"image", RSPAMD_URL_FLAG_IMAGE, -1},
{"query", RSPAMD_URL_FLAG_QUERY, -1},
{"content", RSPAMD_URL_FLAG_CONTENT, -1}
{"content", RSPAMD_URL_FLAG_CONTENT, -1},
{"no_tld", RSPAMD_URL_FLAG_NO_TLD, -1},
};


@@ -2348,6 +2349,12 @@ rspamd_url_parse (struct rspamd_url *uri,
uri->tldshift = uri->hostshift;
uri->tldlen = uri->hostlen;
}
else if (uri->flags & RSPAMD_URL_FLAG_SCHEMALESS) {
/* Ignore urls with both no schema and no tld */
return URI_ERRNO_TLD_MISSING;
}

uri->flags |= RSPAMD_URL_FLAG_NO_TLD;
}
}


+ 1
- 0
src/libserver/url.h View File

@@ -37,6 +37,7 @@ enum rspamd_url_flags {
RSPAMD_URL_FLAG_IMAGE = 1u << 19u,
RSPAMD_URL_FLAG_QUERY = 1u << 20u,
RSPAMD_URL_FLAG_CONTENT = 1u << 21u,
RSPAMD_URL_FLAG_NO_TLD = 1u << 22u,
};

struct rspamd_url_tag {

Loading…
Cancel
Save