From 6b676918bbb037c111fa2616f1709ead9ac3c788 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 21 Aug 2019 15:52:43 +0100 Subject: [PATCH] [Rework] Drop url tags --- src/libserver/html.c | 3 - src/libserver/url.c | 28 ------- src/libserver/url.h | 11 --- src/lua/lua_url.c | 130 +------------------------------ src/plugins/surbl.c | 62 --------------- test/functional/lua/url_tags.lua | 56 ------------- 6 files changed, 1 insertion(+), 289 deletions(-) delete mode 100644 test/functional/lua/url_tags.lua diff --git a/src/libserver/html.c b/src/libserver/html.c index 4ff310f1c..fa33ffdfb 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -721,9 +721,6 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool, href_url->phished_url = text_url; phished_tld.begin = href_tok.begin; phished_tld.len = href_tok.len; - rspamd_url_add_tag (text_url, "phishing", - rspamd_mempool_ftokdup (pool, &phished_tld), - pool); text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED; } } diff --git a/src/libserver/url.c b/src/libserver/url.c index 9314ce2bb..d770b2191 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -3176,34 +3176,6 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset, } } -void -rspamd_url_add_tag (struct rspamd_url *url, const gchar *tag, - const gchar *value, - rspamd_mempool_t *pool) -{ - struct rspamd_url_tag *found, *ntag; - - g_assert (url != NULL && tag != NULL && value != NULL); - - if (url->tags == NULL) { - url->tags = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); - rspamd_mempool_add_destructor (pool, - (rspamd_mempool_destruct_t)g_hash_table_unref, url->tags); - } - - found = g_hash_table_lookup (url->tags, tag); - - ntag = rspamd_mempool_alloc0 (pool, sizeof (*ntag)); - ntag->data = rspamd_mempool_strdup (pool, value); - - if (found == NULL) { - g_hash_table_insert (url->tags, rspamd_mempool_strdup (pool, tag), - ntag); - } - - DL_APPEND (found, ntag); -} - guint rspamd_url_hash (gconstpointer u) { diff --git a/src/libserver/url.h b/src/libserver/url.h index 6f1ccf59f..d9e15e212 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -70,7 +70,6 @@ struct rspamd_url { enum rspamd_url_flags flags; guint count; - GHashTable *tags; }; enum uri_errno { @@ -214,16 +213,6 @@ void rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset, gsize end_offset, gpointer ud); -/** - * Adds a tag for url - * @param url - * @param tag - * @param pool - */ -void rspamd_url_add_tag (struct rspamd_url *url, const gchar *tag, - const gchar *value, - rspamd_mempool_t *pool); - guint rspamd_url_hash (gconstpointer u); guint rspamd_email_hash (gconstpointer u); diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index a0f8c4648..648de08d8 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -59,10 +59,7 @@ LUA_FUNCTION_DEF (url, is_obscured); LUA_FUNCTION_DEF (url, is_html_displayed); LUA_FUNCTION_DEF (url, is_subject); LUA_FUNCTION_DEF (url, get_phished); -LUA_FUNCTION_DEF (url, get_tag); LUA_FUNCTION_DEF (url, get_count); -LUA_FUNCTION_DEF (url, get_tags); -LUA_FUNCTION_DEF (url, add_tag); LUA_FUNCTION_DEF (url, get_visible); LUA_FUNCTION_DEF (url, create); LUA_FUNCTION_DEF (url, init); @@ -87,9 +84,7 @@ static const struct luaL_reg urllib_m[] = { LUA_INTERFACE_DEF (url, is_html_displayed), LUA_INTERFACE_DEF (url, is_subject), LUA_INTERFACE_DEF (url, get_phished), - LUA_INTERFACE_DEF (url, get_tag), - LUA_INTERFACE_DEF (url, get_tags), - LUA_INTERFACE_DEF (url, add_tag), + LUA_INTERFACE_DEF (url, get_visible), LUA_INTERFACE_DEF (url, get_count), LUA_INTERFACE_DEF (url, get_flags), @@ -438,129 +433,6 @@ lua_url_is_subject (lua_State *L) return 1; } -/*** - * @method url:get_tag(tag) - * Returns list of string for a specific tagname for an url - * @return {table/strings} list of tags for an url - */ -static gint -lua_url_get_tag (lua_State *L) -{ - LUA_TRACE_POINT; - struct rspamd_lua_url *url = lua_check_url (L, 1); - guint i; - const gchar *tag = luaL_checkstring (L, 2); - struct rspamd_url_tag *tval, *cur; - - if (url != NULL && tag != NULL) { - - if (url->url->tags == NULL) { - lua_createtable (L, 0, 0); - } - else { - tval = g_hash_table_lookup (url->url->tags, tag); - - if (tval) { - lua_newtable (L); - i = 1; - - DL_FOREACH (tval, cur) { - lua_pushstring (L, cur->data); - lua_rawseti (L, -2, i ++); - } - - lua_settable (L, -3); - } - else { - lua_createtable (L, 0, 0); - } - } - } - else { - lua_pushnil (L); - } - - return 1; -} - - -/*** - * @method url:get_tags() - * Returns list of string tags for an url - * @return {table/strings} list of tags for an url - */ -static gint -lua_url_get_tags (lua_State *L) -{ - LUA_TRACE_POINT; - struct rspamd_lua_url *url = lua_check_url (L, 1); - guint i; - GHashTableIter it; - struct rspamd_url_tag *tval, *cur; - gpointer k, v; - - if (url != NULL) { - if (url->url->tags == NULL) { - lua_createtable (L, 0, 0); - } - else { - lua_createtable (L, 0, g_hash_table_size (url->url->tags)); - g_hash_table_iter_init (&it, url->url->tags); - - while (g_hash_table_iter_next (&it, &k, &v)) { - tval = v; - lua_pushstring (L, (const gchar *)k); - lua_newtable (L); - i = 1; - - DL_FOREACH (tval, cur) { - lua_pushstring (L, cur->data); - lua_rawseti (L, -2, i ++); - } - - lua_settable (L, -3); - } - } - } - else { - lua_pushnil (L); - } - - return 1; -} - -/*** - * @method url:add_tag(tag, mempool) - * Adds a new tag for url - * @param {string} tag new tag to add - * @param {mempool} mempool memory pool (e.g. `task:get_pool()`) - */ -static gint -lua_url_add_tag (lua_State *L) -{ - LUA_TRACE_POINT; - struct rspamd_lua_url *url = lua_check_url (L, 1); - rspamd_mempool_t *mempool = rspamd_lua_check_mempool (L, 4); - const gchar *tag = luaL_checkstring (L, 2); - const gchar *value; - - if (lua_type (L, 3) == LUA_TSTRING) { - value = lua_tostring (L, 3); - } - else { - value = "1"; /* Some stupid placeholder */ - } - - if (url != NULL && mempool != NULL && tag != NULL) { - rspamd_url_add_tag (url->url, tag, value, mempool); - } - else { - return luaL_error (L, "invalid arguments"); - } - - return 0; -} - /*** * @method url:get_phished() * Get another URL that pretends to be this URL (e.g. used in phishing) diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 0d03f9516..4113130db 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -1584,9 +1584,6 @@ process_dns_results (struct rspamd_task *task, bit->bit); rspamd_task_insert_result (task, bit->symbol, 1, resolved_name); - if (surbl_module_ctx->use_tags) { - rspamd_url_add_tag (uri, "surbl", bit->symbol, task->task_pool); - } got_result = TRUE; } } @@ -1605,10 +1602,6 @@ process_dns_results (struct rspamd_task *task, resolved_name, suffix->suffix, bit->bit); rspamd_task_insert_result (task, bit->symbol, 1, resolved_name); - - if (surbl_module_ctx->use_tags) { - rspamd_url_add_tag (uri, "surbl", bit->symbol, task->task_pool); - } } } } @@ -1618,10 +1611,6 @@ process_dns_results (struct rspamd_task *task, msg_info_surbl ("domain [%s] is in surbl %s", resolved_name, suffix->suffix); rspamd_task_insert_result (task, suffix->symbol, 1, resolved_name); - - if (surbl_module_ctx->use_tags) { - rspamd_url_add_tag (uri, "surbl", suffix->symbol, task->task_pool); - } } else { ina.s_addr = addr; @@ -1795,11 +1784,6 @@ surbl_redirector_finish (struct rspamd_http_connection *conn, else { existing->count ++; } - - if (surbl_module_ctx->use_tags) { - rspamd_url_add_tag (param->url, "redirector", urlstr, - task->task_pool); - } } else { msg_info_surbl ("cannot parse redirector reply: %s", urlstr); @@ -1879,38 +1863,6 @@ register_redirector_call (struct rspamd_url *url, struct rspamd_task *task, } } -static gboolean -surbl_test_tags (struct rspamd_task *task, struct redirector_param *param, - struct rspamd_url *url) -{ - struct rspamd_url_tag *tag = NULL, *cur; - gchar *ftld = NULL; - rspamd_ftok_t tld; - gboolean processed = FALSE; - - if (url->tags) { - tag = g_hash_table_lookup (url->tags, "surbl"); - } - - if (tag) { - tld.begin = url->tld; - tld.len = url->tldlen; - - ftld = rspamd_mempool_ftokdup (task->task_pool, &tld); - /* We know results for this URL */ - - DL_FOREACH (tag, cur) { - msg_info_surbl ("domain [%s] is in surbl %s (tags)", - ftld, cur->data); - rspamd_task_insert_result (task, cur->data, 1, ftld); - } - - processed = TRUE; - } - - return processed; -} - static void surbl_tree_redirector_callback (gpointer key, gpointer value, void *data) { @@ -2026,15 +1978,6 @@ surbl_tree_url_callback (gpointer key, gpointer value, void *data) msg_debug_surbl ("check url %*s in %s", url->urllen, url->string, param->suffix->suffix); - if (surbl_module_ctx->use_tags && surbl_test_tags (param->task, param, url)) { - return; - } - - if (url->tags && g_hash_table_lookup (url->tags, "redirector")) { - /* URL is redirected, skip from checks */ - return; - } - make_surbl_requests (url, param->task, param->item, param->suffix, FALSE, param->tree, surbl_module_ctx); } @@ -2324,11 +2267,6 @@ surbl_continue_process_handler (lua_State *L) redirected_url->phished_url = param->url; redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED; } - - if (surbl_module_ctx->use_tags) { - rspamd_url_add_tag (param->url, "redirector", urlstr, - task->task_pool); - } } else { msg_info_surbl ("could not resolve '%*s' on redirector", diff --git a/test/functional/lua/url_tags.lua b/test/functional/lua/url_tags.lua deleted file mode 100644 index 9dc38ac72..000000000 --- a/test/functional/lua/url_tags.lua +++ /dev/null @@ -1,56 +0,0 @@ - -local rspamd_logger = require 'rspamd_logger' - -rspamd_config:register_symbol({ - name = 'ADDED_TAGS', - score = 1.0, - callback = function(task) - if not task:get_request_header('addtags') then - return true, 'nope! not requested' - end - local urls = task:get_urls() - if not (urls and urls[1]) then - return true, 'nope! found no urls' - end - local mpool = task:get_mempool() - for _, u in ipairs(urls) do - u:add_tag('test1', 'meta1', mpool) - u:add_tag('test1', 'meta2', mpool) - u:add_tag('test2', 'http://www.example.com', mpool) - end - return true, 'no worry' - end -}) - -rspamd_config:register_symbol({ - name = 'FOUND_TAGS', - score = 1.0, - callback = function(task) - local urls = task:get_urls() - if not (urls and urls[1]) then - return true, 'nope! found no urls' - end - for _, u in ipairs(urls) do - local tags = u:get_tags() - rspamd_logger.debugx(task, 'tags: %1', tags) - if not tags['test1'] then - return true, 'no key - test1' - end - local found1, found2 = false, false - for _, e in ipairs(tags['test1']) do - if e == 'meta1' then found1 = true end - if e == 'meta2' then found2 = true end - end - if not (found1 and found2) then - return true, 'missing metatags in test1' - end - if not tags['test2'] then - return true, 'no key - test2' - end - if not tags['test2'][1] == 'http://www.example.com' then - return true, 'wrong value in test2 metatag: ' .. tags['test2'][1] - end - end - return true, 'no worry' - end -}) -- 2.39.5