From 4d1b94d4c3b4be24df8661b4fc15103a346dfa1e Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 7 Feb 2017 12:12:37 +0000 Subject: [PATCH] [Feature] Rework surbl module to avoid extra redirector calls --- src/lua/lua_url.c | 3 - src/plugins/surbl.c | 192 +++++++++++++++++++++++++++----------------- 2 files changed, 117 insertions(+), 78 deletions(-) diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index 2f1270099..60a01b6ce 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -327,9 +327,7 @@ lua_url_get_tag (lua_State *L) struct rspamd_lua_url *url = lua_check_url (L, 1); guint i; const gchar *tag = luaL_checkstring (L, 2); - GHashTableIter it; struct rspamd_url_tag *tval, *cur; - gpointer k, v; if (url != NULL && tag != NULL) { @@ -373,7 +371,6 @@ lua_url_get_tags (lua_State *L) { struct rspamd_lua_url *url = lua_check_url (L, 1); guint i; - const gchar *tag; GHashTableIter it; struct rspamd_url_tag *tval, *cur; gpointer k, v; diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 403c7241e..a93845d41 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -61,10 +61,13 @@ G_STRFUNC, \ __VA_ARGS__) +#define SURBL_REDIRECTOR_CALLBACK "SURBL_REDIRECTOR_CALLBACK" + static struct surbl_ctx *surbl_module_ctx = NULL; static const guint64 rspamd_surbl_cb_magic = 0xe09b8536f80de0d1ULL; static void surbl_test_url (struct rspamd_task *task, void *user_data); +static void surbl_test_redirector (struct rspamd_task *task, void *user_data); static void surbl_dns_callback (struct rdns_reply *reply, gpointer arg); static void surbl_dns_ip_callback (struct rdns_reply *reply, gpointer arg); static void process_dns_results (struct rspamd_task *task, @@ -606,6 +609,8 @@ surbl_module_parse_rule (const ucl_object_t* value, struct rspamd_config* cfg, cb_id = rspamd_symbols_cache_add_symbol (cfg->cache, "SURBL_CALLBACK", 0, surbl_test_url, new_suffix, SYMBOL_TYPE_CALLBACK, -1); + rspamd_symbols_cache_add_dependency (cfg->cache, cb_id, + SURBL_REDIRECTOR_CALLBACK); nrules++; new_suffix->callback_id = cb_id; cur = ucl_object_lookup (cur_rule, "bits"); @@ -719,6 +724,10 @@ surbl_module_config (struct rspamd_config *cfg) ucl_object_insert_key (monitored_opts, ucl_object_fromstring ("nxdomain"), "rcode", 0, false); + (void)rspamd_symbols_cache_add_symbol (cfg->cache, SURBL_REDIRECTOR_CALLBACK, + 0, surbl_test_redirector, NULL, + SYMBOL_TYPE_CALLBACK, -1); + if ((value = rspamd_config_get_module_opt (cfg, "surbl", "redirector")) != NULL) { surbl_module_ctx->redirectors = rspamd_upstreams_create (cfg->ups_ctx); @@ -1368,11 +1377,6 @@ surbl_redirector_finish (struct rspamd_http_connection *conn, redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED; } - make_surbl_requests (redirected_url, - param->task, - param->suffix, - FALSE, - param->tree); rspamd_url_add_tag (param->url, "redirector", urlstr, task->task_pool); } @@ -1397,7 +1401,7 @@ surbl_redirector_finish (struct rspamd_http_connection *conn, static void register_redirector_call (struct rspamd_url *url, struct rspamd_task *task, - struct suffix_item *suffix, const gchar *rule, GHashTable *tree) + const gchar *rule) { gint s = -1; struct redirector_param *param; @@ -1417,7 +1421,6 @@ register_redirector_call (struct rspamd_url *url, struct rspamd_task *task, msg_info_surbl ("<%s> cannot create tcp socket failed: %s", task->message_id, strerror (errno)); - make_surbl_requests (url, task, suffix, FALSE, tree); return; } @@ -1436,9 +1439,7 @@ register_redirector_call (struct rspamd_url *url, struct rspamd_task *task, msg = rspamd_http_new_message (HTTP_REQUEST); msg->url = rspamd_fstring_assign (msg->url, url->string, url->urllen); param->sock = s; - param->suffix = suffix; param->redirector = selected; - param->tree = tree; timeout = rspamd_mempool_alloc (task->task_pool, sizeof (struct timeval)); double_to_tv (surbl_module_ctx->read_timeout, timeout); @@ -1518,7 +1519,7 @@ surbl_test_tags (struct rspamd_task *task, struct redirector_param *param, processed = TRUE; } - else if (redirected_url) { + else if (redirected_url && param->suffix) { /* Just register surbl call for the redirected url */ make_surbl_requests (redirected_url, param->task, @@ -1540,7 +1541,7 @@ surbl_test_tags (struct rspamd_task *task, struct redirector_param *param, } static void -surbl_tree_url_callback (gpointer key, gpointer value, void *data) +surbl_tree_redirector_callback (gpointer key, gpointer value, void *data) { struct redirector_param *param = data, *nparam; struct rspamd_task *task, **ptask; @@ -1562,77 +1563,82 @@ surbl_tree_url_callback (gpointer key, gpointer value, void *data) return; } - if (surbl_module_ctx->use_redirector && surbl_module_ctx->redirector_tlds) { - /* Search in trie */ - srch.begin = url->tld; - srch.len = url->tldlen; - re = g_hash_table_lookup (surbl_module_ctx->redirector_tlds, &srch); + /* Search in trie */ + srch.begin = url->tld; + srch.len = url->tldlen; + re = g_hash_table_lookup (surbl_module_ctx->redirector_tlds, &srch); - if (re) { - if (re == NO_REGEXP) { - found = TRUE; - } - else if (rspamd_regexp_search (re, url->string, 0, - NULL, NULL, TRUE, NULL)) { - found = TRUE; - } + if (re) { + if (re == NO_REGEXP) { + found = TRUE; + } + else if (rspamd_regexp_search (re, url->string, 0, + NULL, NULL, TRUE, NULL)) { + found = TRUE; + } - if (found) { - found_tld = rspamd_mempool_ftokdup (task->task_pool, &srch); + if (found) { + found_tld = rspamd_mempool_ftokdup (task->task_pool, &srch); - if (surbl_module_ctx->redirector_symbol != NULL) { - rspamd_task_insert_result (param->task, - surbl_module_ctx->redirector_symbol, - 1, - found_tld); - } + if (surbl_module_ctx->redirector_symbol != NULL) { + rspamd_task_insert_result (param->task, + surbl_module_ctx->redirector_symbol, + 1, + found_tld); + } - if (surbl_module_ctx->redirector_cbid != -1) { - nparam = rspamd_mempool_alloc (task->task_pool, - sizeof (*nparam)); - /* Copy to detach from the shared param */ - memcpy (nparam, param, sizeof (*param)); - nparam->url = url; - L = task->cfg->lua_state; - lua_rawgeti (L, LUA_REGISTRYINDEX, - surbl_module_ctx->redirector_cbid); - ptask = lua_newuserdata (L, sizeof (*ptask)); - *ptask = task; - rspamd_lua_setclass (L, "rspamd{task}", -1); - purl = lua_newuserdata (L, sizeof (*purl)); - *purl = url; - rspamd_lua_setclass (L, "rspamd{url}", -1); - lua_pushlightuserdata (L, nparam); - - if (lua_pcall (L, 3, 0, 0) != 0) { - msg_err_task ("cannot call for redirector script: %s", - lua_tostring (L, -1)); - lua_pop (L, 1); - } - else { - nparam->w = rspamd_session_get_watcher (task->s); - rspamd_session_watcher_push (task->s); - } + if (surbl_module_ctx->redirector_cbid != -1) { + nparam = rspamd_mempool_alloc (task->task_pool, + sizeof (*nparam)); + /* Copy to detach from the shared param */ + memcpy (nparam, param, sizeof (*param)); + nparam->url = url; + L = task->cfg->lua_state; + lua_rawgeti (L, LUA_REGISTRYINDEX, + surbl_module_ctx->redirector_cbid); + ptask = lua_newuserdata (L, sizeof (*ptask)); + *ptask = task; + rspamd_lua_setclass (L, "rspamd{task}", -1); + purl = lua_newuserdata (L, sizeof (*purl)); + *purl = url; + rspamd_lua_setclass (L, "rspamd{url}", -1); + lua_pushlightuserdata (L, nparam); + + if (lua_pcall (L, 3, 0, 0) != 0) { + msg_err_task ("cannot call for redirector script: %s", + lua_tostring (L, -1)); + lua_pop (L, 1); } else { - register_redirector_call (url, - param->task, - param->suffix, - found_tld, - param->tree); + nparam->w = rspamd_session_get_watcher (task->s); + rspamd_session_watcher_push (task->s); } - - return; + } + else { + register_redirector_call (url, + param->task, + found_tld); } } + } +} - make_surbl_requests (url, param->task, param->suffix, FALSE, - param->tree); +static void +surbl_tree_url_callback (gpointer key, gpointer value, void *data) +{ + struct redirector_param *param = data; + struct rspamd_url *url = value; + + if (url->hostlen <= 0) { + return; } - else { - make_surbl_requests (url, param->task, param->suffix, FALSE, - param->tree); + + if (surbl_module_ctx->use_tags && surbl_test_tags (param->task, param, url)) { + return; } + + make_surbl_requests (url, param->task, param->suffix, FALSE, + param->tree); } static void @@ -1686,6 +1692,47 @@ surbl_test_url (struct rspamd_task *task, void *user_data) } } +static void +surbl_test_redirector (struct rspamd_task *task, void *user_data) +{ + struct redirector_param *param; + guint i, j; + struct rspamd_mime_text_part *part; + struct html_image *img; + struct rspamd_url *url; + + if (!surbl_module_ctx->use_redirector || !surbl_module_ctx->redirector_tlds) { + return; + } + + param = rspamd_mempool_alloc0 (task->task_pool, sizeof (*param)); + param->task = task; + param->suffix = NULL; + g_hash_table_foreach (task->urls, surbl_tree_redirector_callback, param); + + /* We also need to check and process img URLs */ + for (i = 0; i < task->text_parts->len; i ++) { + part = g_ptr_array_index (task->text_parts, i); + if (part->html && part->html->images) { + for (j = 0; j < part->html->images->len; j ++) { + img = g_ptr_array_index (part->html->images, j); + + if ((img->flags & RSPAMD_HTML_FLAG_IMAGE_EXTERNAL) + && img->src) { + url = rspamd_html_process_url (task->task_pool, + img->src, strlen (img->src), NULL); + + if (url) { + surbl_tree_redirector_callback (url, url, param); + msg_debug_surbl ("checked image url %s for redirectors", + img->src); + } + } + } + } + } +} + static gint surbl_register_redirect_handler (lua_State *L) @@ -1755,11 +1802,6 @@ surbl_continue_process_handler (lua_State *L) redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED; } - make_surbl_requests (redirected_url, - param->task, - param->suffix, - FALSE, - param->tree); rspamd_url_add_tag (param->url, "redirector", urlstr, task->task_pool); } -- 2.39.5