@@ -859,7 +859,7 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool, | |||
if (href_url->tldlen != text_url->tldlen || memcmp (href_url->tld, | |||
text_url->tld, href_url->tldlen) != 0) { | |||
href_url->is_phished = TRUE; | |||
href_url->flags |= RSPAMD_URL_FLAG_PHISHED; | |||
href_url->phished_url = text_url; | |||
} | |||
} |
@@ -645,7 +645,7 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud) | |||
ucl_object_insert_key (obj, elt, "surbl", 0, false); | |||
} | |||
elt = ucl_object_frombool (url->is_phished); | |||
elt = ucl_object_frombool (url->flags & RSPAMD_URL_FLAG_PHISHED); | |||
ucl_object_insert_key (obj, elt, "phished", 0, false); | |||
} | |||
ucl_array_append (cb->top, obj); |
@@ -1093,7 +1093,7 @@ rspamd_url_is_ip (struct rspamd_url *uri, rspamd_mempool_t *pool) | |||
uri->hostlen = strlen (uri->host); | |||
uri->tld = uri->host; | |||
uri->tldlen = uri->hostlen; | |||
uri->is_numeric = TRUE; | |||
uri->flags |= RSPAMD_URL_FLAG_NUMERIC; | |||
ret = TRUE; | |||
} | |||
else if (inet_pton (AF_INET6, buf, &in6) == 1) { | |||
@@ -1103,7 +1103,7 @@ rspamd_url_is_ip (struct rspamd_url *uri, rspamd_mempool_t *pool) | |||
uri->hostlen = strlen (uri->host); | |||
uri->tld = uri->host; | |||
uri->tldlen = uri->hostlen; | |||
uri->is_numeric = TRUE; | |||
uri->flags |= RSPAMD_URL_FLAG_NUMERIC; | |||
ret = TRUE; | |||
} | |||
else { | |||
@@ -1175,7 +1175,7 @@ rspamd_url_is_ip (struct rspamd_url *uri, rspamd_mempool_t *pool) | |||
uri->hostlen = strlen (uri->host); | |||
uri->tld = uri->host; | |||
uri->tldlen = uri->hostlen; | |||
uri->is_numeric = TRUE; | |||
uri->flags |= RSPAMD_URL_FLAG_NUMERIC|RSPAMD_URL_FLAG_OBSCURED; | |||
ret = TRUE; | |||
} | |||
} |
@@ -8,6 +8,12 @@ | |||
struct rspamd_task; | |||
struct mime_text_part; | |||
enum rspamd_url_flags { | |||
RSPAMD_URL_FLAG_PHISHED = 1 << 0, | |||
RSPAMD_URL_FLAG_NUMERIC = 1 << 1, | |||
RSPAMD_URL_FLAG_OBSCURED = 1 << 2, | |||
}; | |||
struct rspamd_url { | |||
gchar *string; | |||
gint protocol; | |||
@@ -35,8 +41,7 @@ struct rspamd_url { | |||
guint tldlen; | |||
guint urllen; | |||
gboolean is_phished; /* URI maybe phishing */ | |||
gboolean is_numeric; /* URI contains IP address */ | |||
enum rspamd_url_flags flags; | |||
}; | |||
enum uri_errno { |
@@ -1383,7 +1383,7 @@ rspamd_url_hash (gconstpointer u) | |||
if (url->userlen > 0) { | |||
XXH64_update (&st, url->user, url->userlen); | |||
} | |||
XXH64_update (&st, &url->is_phished, sizeof (url->is_phished)); | |||
XXH64_update (&st, &url->flags, sizeof (url->flags)); | |||
return XXH64_digest (&st); | |||
} | |||
@@ -1426,7 +1426,7 @@ rspamd_urls_cmp (gconstpointer a, gconstpointer b) | |||
} | |||
else { | |||
r = g_ascii_strncasecmp (u1->host, u2->host, u1->hostlen); | |||
if (r == 0 && u1->is_phished != u2->is_phished) { | |||
if (r == 0 && u1->flags != u2->flags) { | |||
/* Always insert phished urls to the tree */ | |||
return FALSE; | |||
} |
@@ -261,7 +261,7 @@ lua_url_is_phished (lua_State *L) | |||
struct rspamd_lua_url *url = lua_check_url (L, 1); | |||
if (url != NULL) { | |||
lua_pushboolean (L, url->url->is_phished); | |||
lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_PHISHED); | |||
} | |||
else { | |||
lua_pushnil (L); | |||
@@ -281,7 +281,8 @@ lua_url_get_phished (lua_State *L) | |||
struct rspamd_lua_url *purl, *url = lua_check_url (L, 1); | |||
if (url) { | |||
if (url->url->is_phished && url->url->phished_url != NULL) { | |||
if ((url->url->flags & RSPAMD_URL_FLAG_PHISHED) | |||
&& url->url->phished_url != NULL) { | |||
purl = lua_newuserdata (L, sizeof (struct rspamd_lua_url)); | |||
rspamd_lua_setclass (L, "rspamd{url}", -1); | |||
purl->url = url->url->phished_url; | |||
@@ -476,7 +477,7 @@ lua_url_all (lua_State *L) | |||
pos = text; | |||
end = text + length; | |||
lua_newtable (L); | |||
while (pos <= end) { | |||
url = rspamd_url_get_next (pool, text, &pos, NULL); | |||
@@ -678,7 +678,7 @@ format_surbl_request (rspamd_mempool_t * pool, | |||
len = hostname->len + slen + 2; | |||
p = hostname->begin; | |||
is_numeric = url->is_numeric; | |||
is_numeric = url->flags & RSPAMD_URL_FLAG_NUMERIC; | |||
while (p - hostname->begin < (gint)hostname->len && dots_num < MAX_LEVELS) { | |||
if (*p == '.') { |