Browse Source

[Fix] Urls: Fix processing of html urls when it comes to the flags

Issue: #3664
tags/3.0
Vsevolod Stakhov 3 years ago
parent
commit
b7467f9d29
4 changed files with 54 additions and 22 deletions
  1. 8
    3
      src/libserver/html.c
  2. 42
    16
      src/libserver/url.c
  3. 3
    2
      src/libserver/url.h
  4. 1
    1
      src/lua/lua_task.c

+ 8
- 3
src/libserver/html.c View File

@@ -1764,7 +1764,7 @@ rspamd_html_url_query_callback (struct rspamd_url *url, gsize start_offset,

url->flags |= RSPAMD_URL_FLAG_QUERY;

if (rspamd_url_set_add_or_increase (cbd->url_set, url) && cbd->part_urls) {
if (rspamd_url_set_add_or_increase(cbd->url_set, url, false) && cbd->part_urls) {
g_ptr_array_add (cbd->part_urls, url);
}

@@ -1903,7 +1903,7 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
if (img->url) {
img->url->flags |= RSPAMD_URL_FLAG_IMAGE;

if (rspamd_url_set_add_or_increase (url_set, img->url) &&
if (rspamd_url_set_add_or_increase(url_set, img->url, false) &&
part_urls) {
g_ptr_array_add (part_urls, img->url);
}
@@ -3245,10 +3245,15 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
if (url != NULL) {

if (url_set != NULL) {
if (rspamd_url_set_add_or_increase (url_set, url)) {
struct rspamd_url *maybe_existing =
rspamd_url_set_add_or_return (url_set, url);
if (maybe_existing == url) {
rspamd_process_html_url (pool, url, url_set,
part_urls);
}
else {
url = maybe_existing;
}
}

href_offset = dest->len;

+ 42
- 16
src/libserver/url.c View File

@@ -3377,7 +3377,7 @@ rspamd_url_query_callback (struct rspamd_url *url, gsize start_offset,
url->flags |= RSPAMD_URL_FLAG_QUERY;


if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url)) {
if (rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url, false)) {
if (cbd->part && cbd->part->mime_part->urls) {
g_ptr_array_add (cbd->part->mime_part->urls, url);
}
@@ -3433,8 +3433,8 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,

url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;

if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url) &&
cbd->part->mime_part->urls) {
if (rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url, false) &&
cbd->part->mime_part->urls) {
g_ptr_array_add (cbd->part->mime_part->urls, url);
}

@@ -3592,7 +3592,7 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
}
}

rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url, false);

/* We also search the query for additional url inside */
if (url->querylen > 0) {
@@ -3622,8 +3622,8 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
}
}

rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls),
query_url);
rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls),
query_url, false);
}
}
}
@@ -4044,21 +4044,44 @@ rspamd_url_protocol_from_string (const gchar *str)


bool
rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
struct rspamd_url *u)
rspamd_url_set_add_or_increase(khash_t (rspamd_url_hash) *set,
struct rspamd_url *u,
bool enforce_replace)
{
khiter_t k;
gint r;

k = kh_put (rspamd_url_hash, set, u, &r);
k = kh_get (rspamd_url_hash, set, u);

if (r == 0) {
if (k != kh_end (set)) {
/* Existing url */
struct rspamd_url *ex = kh_key (set, k);

ex->count ++;
#define SUSPICIOUS_URL_FLAGS (RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_OBSCURED|RSPAMD_URL_FLAG_ZW_SPACES)
if (enforce_replace) {
kh_key (set, k) = u;
u->count++;
}
else {
if (u->flags & SUSPICIOUS_URL_FLAGS) {
if (!(ex->flags & SUSPICIOUS_URL_FLAGS)) {
/* Propagate new url to an old one */
kh_key (set, k) = u;
u->count++;
}
else {
ex->count++;
}
}
else {
ex->count++;
}
}

return false;
}
else {
k = kh_put (rspamd_url_hash, set, u, &r);
}

return true;
}
@@ -4071,12 +4094,15 @@ rspamd_url_set_add_or_return (khash_t (rspamd_url_hash) *set,
gint r;

if (set) {
k = kh_put (rspamd_url_hash, set, u, &r);
k = kh_get (rspamd_url_hash, set, u);

if (r == 0) {
struct rspamd_url *ex = kh_key (set, k);
if (k != kh_end (set)) {
return kh_key (set, k);
}
else {
k = kh_put (rspamd_url_hash, set, u, &r);

return ex;
return kh_key (set, k);
}
}


+ 3
- 2
src/libserver/url.h View File

@@ -296,8 +296,9 @@ KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char);
* @param u
* @return true if a new url has been added
*/
bool rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
struct rspamd_url *u);
bool rspamd_url_set_add_or_increase(khash_t (rspamd_url_hash) *set,
struct rspamd_url *u,
bool enforce_replace);

/**
* Same as rspamd_url_set_add_or_increase but returns the existing url if found

+ 1
- 1
src/lua/lua_task.c View File

@@ -2507,7 +2507,7 @@ lua_task_inject_url (lua_State * L)
}

if (task && task->message && url && url->url) {
if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url->url)) {
if (rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url->url, false)) {
if (mpart && mpart->urls) {
/* Also add url to the mime part */
g_ptr_array_add (mpart->urls, url->url);

Loading…
Cancel
Save