Browse Source

[Rework] Drop url tags

tags/2.0
Vsevolod Stakhov 4 years ago
parent
commit
6b676918bb
6 changed files with 1 additions and 289 deletions
  1. 0
    3
      src/libserver/html.c
  2. 0
    28
      src/libserver/url.c
  3. 0
    11
      src/libserver/url.h
  4. 1
    129
      src/lua/lua_url.c
  5. 0
    62
      src/plugins/surbl.c
  6. 0
    56
      test/functional/lua/url_tags.lua

+ 0
- 3
src/libserver/html.c View File

href_url->phished_url = text_url; href_url->phished_url = text_url;
phished_tld.begin = href_tok.begin; phished_tld.begin = href_tok.begin;
phished_tld.len = href_tok.len; phished_tld.len = href_tok.len;
rspamd_url_add_tag (text_url, "phishing",
rspamd_mempool_ftokdup (pool, &phished_tld),
pool);
text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED; text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
} }
} }

+ 0
- 28
src/libserver/url.c View File

} }
} }


void
rspamd_url_add_tag (struct rspamd_url *url, const gchar *tag,
const gchar *value,
rspamd_mempool_t *pool)
{
struct rspamd_url_tag *found, *ntag;

g_assert (url != NULL && tag != NULL && value != NULL);

if (url->tags == NULL) {
url->tags = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
rspamd_mempool_add_destructor (pool,
(rspamd_mempool_destruct_t)g_hash_table_unref, url->tags);
}

found = g_hash_table_lookup (url->tags, tag);

ntag = rspamd_mempool_alloc0 (pool, sizeof (*ntag));
ntag->data = rspamd_mempool_strdup (pool, value);

if (found == NULL) {
g_hash_table_insert (url->tags, rspamd_mempool_strdup (pool, tag),
ntag);
}

DL_APPEND (found, ntag);
}

guint guint
rspamd_url_hash (gconstpointer u) rspamd_url_hash (gconstpointer u)
{ {

+ 0
- 11
src/libserver/url.h View File



enum rspamd_url_flags flags; enum rspamd_url_flags flags;
guint count; guint count;
GHashTable *tags;
}; };


enum uri_errno { enum uri_errno {
gsize start_offset, gsize start_offset,
gsize end_offset, gpointer ud); gsize end_offset, gpointer ud);


/**
* Adds a tag for url
* @param url
* @param tag
* @param pool
*/
void rspamd_url_add_tag (struct rspamd_url *url, const gchar *tag,
const gchar *value,
rspamd_mempool_t *pool);

guint rspamd_url_hash (gconstpointer u); guint rspamd_url_hash (gconstpointer u);


guint rspamd_email_hash (gconstpointer u); guint rspamd_email_hash (gconstpointer u);

+ 1
- 129
src/lua/lua_url.c View File

LUA_FUNCTION_DEF (url, is_html_displayed); LUA_FUNCTION_DEF (url, is_html_displayed);
LUA_FUNCTION_DEF (url, is_subject); LUA_FUNCTION_DEF (url, is_subject);
LUA_FUNCTION_DEF (url, get_phished); LUA_FUNCTION_DEF (url, get_phished);
LUA_FUNCTION_DEF (url, get_tag);
LUA_FUNCTION_DEF (url, get_count); LUA_FUNCTION_DEF (url, get_count);
LUA_FUNCTION_DEF (url, get_tags);
LUA_FUNCTION_DEF (url, add_tag);
LUA_FUNCTION_DEF (url, get_visible); LUA_FUNCTION_DEF (url, get_visible);
LUA_FUNCTION_DEF (url, create); LUA_FUNCTION_DEF (url, create);
LUA_FUNCTION_DEF (url, init); LUA_FUNCTION_DEF (url, init);
LUA_INTERFACE_DEF (url, is_html_displayed), LUA_INTERFACE_DEF (url, is_html_displayed),
LUA_INTERFACE_DEF (url, is_subject), LUA_INTERFACE_DEF (url, is_subject),
LUA_INTERFACE_DEF (url, get_phished), LUA_INTERFACE_DEF (url, get_phished),
LUA_INTERFACE_DEF (url, get_tag),
LUA_INTERFACE_DEF (url, get_tags),
LUA_INTERFACE_DEF (url, add_tag),

LUA_INTERFACE_DEF (url, get_visible), LUA_INTERFACE_DEF (url, get_visible),
LUA_INTERFACE_DEF (url, get_count), LUA_INTERFACE_DEF (url, get_count),
LUA_INTERFACE_DEF (url, get_flags), LUA_INTERFACE_DEF (url, get_flags),
return 1; return 1;
} }


/***
* @method url:get_tag(tag)
* Returns list of string for a specific tagname for an url
* @return {table/strings} list of tags for an url
*/
static gint
lua_url_get_tag (lua_State *L)
{
LUA_TRACE_POINT;
struct rspamd_lua_url *url = lua_check_url (L, 1);
guint i;
const gchar *tag = luaL_checkstring (L, 2);
struct rspamd_url_tag *tval, *cur;

if (url != NULL && tag != NULL) {

if (url->url->tags == NULL) {
lua_createtable (L, 0, 0);
}
else {
tval = g_hash_table_lookup (url->url->tags, tag);

if (tval) {
lua_newtable (L);
i = 1;

DL_FOREACH (tval, cur) {
lua_pushstring (L, cur->data);
lua_rawseti (L, -2, i ++);
}

lua_settable (L, -3);
}
else {
lua_createtable (L, 0, 0);
}
}
}
else {
lua_pushnil (L);
}

return 1;
}


/***
* @method url:get_tags()
* Returns list of string tags for an url
* @return {table/strings} list of tags for an url
*/
static gint
lua_url_get_tags (lua_State *L)
{
LUA_TRACE_POINT;
struct rspamd_lua_url *url = lua_check_url (L, 1);
guint i;
GHashTableIter it;
struct rspamd_url_tag *tval, *cur;
gpointer k, v;

if (url != NULL) {
if (url->url->tags == NULL) {
lua_createtable (L, 0, 0);
}
else {
lua_createtable (L, 0, g_hash_table_size (url->url->tags));
g_hash_table_iter_init (&it, url->url->tags);

while (g_hash_table_iter_next (&it, &k, &v)) {
tval = v;
lua_pushstring (L, (const gchar *)k);
lua_newtable (L);
i = 1;

DL_FOREACH (tval, cur) {
lua_pushstring (L, cur->data);
lua_rawseti (L, -2, i ++);
}

lua_settable (L, -3);
}
}
}
else {
lua_pushnil (L);
}

return 1;
}

/***
* @method url:add_tag(tag, mempool)
* Adds a new tag for url
* @param {string} tag new tag to add
* @param {mempool} mempool memory pool (e.g. `task:get_pool()`)
*/
static gint
lua_url_add_tag (lua_State *L)
{
LUA_TRACE_POINT;
struct rspamd_lua_url *url = lua_check_url (L, 1);
rspamd_mempool_t *mempool = rspamd_lua_check_mempool (L, 4);
const gchar *tag = luaL_checkstring (L, 2);
const gchar *value;

if (lua_type (L, 3) == LUA_TSTRING) {
value = lua_tostring (L, 3);
}
else {
value = "1"; /* Some stupid placeholder */
}

if (url != NULL && mempool != NULL && tag != NULL) {
rspamd_url_add_tag (url->url, tag, value, mempool);
}
else {
return luaL_error (L, "invalid arguments");
}

return 0;
}

/*** /***
* @method url:get_phished() * @method url:get_phished()
* Get another URL that pretends to be this URL (e.g. used in phishing) * Get another URL that pretends to be this URL (e.g. used in phishing)

+ 0
- 62
src/plugins/surbl.c View File

bit->bit); bit->bit);
rspamd_task_insert_result (task, bit->symbol, 1, resolved_name); rspamd_task_insert_result (task, bit->symbol, 1, resolved_name);


if (surbl_module_ctx->use_tags) {
rspamd_url_add_tag (uri, "surbl", bit->symbol, task->task_pool);
}
got_result = TRUE; got_result = TRUE;
} }
} }
resolved_name, suffix->suffix, resolved_name, suffix->suffix,
bit->bit); bit->bit);
rspamd_task_insert_result (task, bit->symbol, 1, resolved_name); rspamd_task_insert_result (task, bit->symbol, 1, resolved_name);

if (surbl_module_ctx->use_tags) {
rspamd_url_add_tag (uri, "surbl", bit->symbol, task->task_pool);
}
} }
} }
} }
msg_info_surbl ("domain [%s] is in surbl %s", msg_info_surbl ("domain [%s] is in surbl %s",
resolved_name, suffix->suffix); resolved_name, suffix->suffix);
rspamd_task_insert_result (task, suffix->symbol, 1, resolved_name); rspamd_task_insert_result (task, suffix->symbol, 1, resolved_name);

if (surbl_module_ctx->use_tags) {
rspamd_url_add_tag (uri, "surbl", suffix->symbol, task->task_pool);
}
} }
else { else {
ina.s_addr = addr; ina.s_addr = addr;
else { else {
existing->count ++; existing->count ++;
} }

if (surbl_module_ctx->use_tags) {
rspamd_url_add_tag (param->url, "redirector", urlstr,
task->task_pool);
}
} }
else { else {
msg_info_surbl ("cannot parse redirector reply: %s", urlstr); msg_info_surbl ("cannot parse redirector reply: %s", urlstr);
} }
} }


static gboolean
surbl_test_tags (struct rspamd_task *task, struct redirector_param *param,
struct rspamd_url *url)
{
struct rspamd_url_tag *tag = NULL, *cur;
gchar *ftld = NULL;
rspamd_ftok_t tld;
gboolean processed = FALSE;

if (url->tags) {
tag = g_hash_table_lookup (url->tags, "surbl");
}

if (tag) {
tld.begin = url->tld;
tld.len = url->tldlen;

ftld = rspamd_mempool_ftokdup (task->task_pool, &tld);
/* We know results for this URL */

DL_FOREACH (tag, cur) {
msg_info_surbl ("domain [%s] is in surbl %s (tags)",
ftld, cur->data);
rspamd_task_insert_result (task, cur->data, 1, ftld);
}

processed = TRUE;
}

return processed;
}

static void static void
surbl_tree_redirector_callback (gpointer key, gpointer value, void *data) surbl_tree_redirector_callback (gpointer key, gpointer value, void *data)
{ {
msg_debug_surbl ("check url %*s in %s", url->urllen, url->string, msg_debug_surbl ("check url %*s in %s", url->urllen, url->string,
param->suffix->suffix); param->suffix->suffix);


if (surbl_module_ctx->use_tags && surbl_test_tags (param->task, param, url)) {
return;
}

if (url->tags && g_hash_table_lookup (url->tags, "redirector")) {
/* URL is redirected, skip from checks */
return;
}

make_surbl_requests (url, param->task, param->item, param->suffix, FALSE, make_surbl_requests (url, param->task, param->item, param->suffix, FALSE,
param->tree, surbl_module_ctx); param->tree, surbl_module_ctx);
} }
redirected_url->phished_url = param->url; redirected_url->phished_url = param->url;
redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED; redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
} }

if (surbl_module_ctx->use_tags) {
rspamd_url_add_tag (param->url, "redirector", urlstr,
task->task_pool);
}
} }
else { else {
msg_info_surbl ("could not resolve '%*s' on redirector", msg_info_surbl ("could not resolve '%*s' on redirector",

+ 0
- 56
test/functional/lua/url_tags.lua View File


local rspamd_logger = require 'rspamd_logger'

rspamd_config:register_symbol({
name = 'ADDED_TAGS',
score = 1.0,
callback = function(task)
if not task:get_request_header('addtags') then
return true, 'nope! not requested'
end
local urls = task:get_urls()
if not (urls and urls[1]) then
return true, 'nope! found no urls'
end
local mpool = task:get_mempool()
for _, u in ipairs(urls) do
u:add_tag('test1', 'meta1', mpool)
u:add_tag('test1', 'meta2', mpool)
u:add_tag('test2', 'http://www.example.com', mpool)
end
return true, 'no worry'
end
})

rspamd_config:register_symbol({
name = 'FOUND_TAGS',
score = 1.0,
callback = function(task)
local urls = task:get_urls()
if not (urls and urls[1]) then
return true, 'nope! found no urls'
end
for _, u in ipairs(urls) do
local tags = u:get_tags()
rspamd_logger.debugx(task, 'tags: %1', tags)
if not tags['test1'] then
return true, 'no key - test1'
end
local found1, found2 = false, false
for _, e in ipairs(tags['test1']) do
if e == 'meta1' then found1 = true end
if e == 'meta2' then found2 = true end
end
if not (found1 and found2) then
return true, 'missing metatags in test1'
end
if not tags['test2'] then
return true, 'no key - test2'
end
if not tags['test2'][1] == 'http://www.example.com' then
return true, 'wrong value in test2 metatag: ' .. tags['test2'][1]
end
end
return true, 'no worry'
end
})

Loading…
Cancel
Save