From: Vsevolod Stakhov Date: Fri, 1 Oct 2010 17:13:32 +0000 (+0400) Subject: * Improve definitions of redirector domains X-Git-Tag: 0.3.3~24 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=dc42a41c5e1d738373305468d61f70e0f2ee6d1a;p=rspamd.git * Improve definitions of redirector domains * Add support of regexp to check only specific urls for each domains * Check redirector in trie and not extract specific domains parts * Add POE errors handling for redirector --- diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 0619d6cbf..25132a0f9 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -58,6 +58,7 @@ static void dns_callback (struct rspamd_dns_reply *reply, gp static void process_dns_results (struct worker_task *task, struct suffix_item *suffix, char *url, uint32_t addr); static int urls_command_handler (struct worker_task *task); +#define NO_REGEXP (gpointer)-1 #define SURBL_ERROR surbl_error_quark () #define WHITELIST_ERROR 0 @@ -122,6 +123,73 @@ fin_exceptions_list (memory_pool_t * pool, struct map_cb_data *data) } } +static void +redirector_insert (gpointer st, gconstpointer key, gpointer value) +{ + GHashTable *t = st; + const char *p = key, *begin = key; + gchar *new; + gsize len; + GRegex *re = NO_REGEXP; + GError *err = NULL; + guint idx; + + while (*p && !g_ascii_isspace (*p)) { + p ++; + } + + len = p - begin; + new = g_malloc (len + 1); + memcpy (new, begin, len); + new[len] = '\0'; + idx = surbl_module_ctx->redirector_ptrs->len; + rspamd_trie_insert (surbl_module_ctx->redirector_trie, new, idx); + g_ptr_array_add (surbl_module_ctx->redirector_ptrs, new); + + if (g_ascii_isspace (*p)) { + while (g_ascii_isspace (*p) && *p) { + p ++; + } + if (*p) { + re = g_regex_new (p, G_REGEX_RAW | G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE | G_REGEX_CASELESS, + 0, &err); + if (re == NULL) { + msg_warn ("could not read regexp: %s while reading regexp %s", err->message, p); + re = NO_REGEXP; + } + } + } + g_hash_table_insert (t, new, re); +} + +static void +redirector_item_free (gpointer p) +{ + GRegex *re; + if (p != NULL && p != NO_REGEXP) { + re = (GRegex *)p; + g_regex_unref (re); + } +} + +static u_char * +read_redirectors_list (memory_pool_t * pool, u_char * chunk, size_t len, struct map_cb_data *data) +{ + if (data->cur_data == NULL) { + data->cur_data = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, g_free, redirector_item_free); + } + + return abstract_parse_list (pool, chunk, len, data, (insert_func) redirector_insert); +} + +void +fin_redirectors_list (memory_pool_t * pool, struct map_cb_data *data) +{ + if (data->prev_data) { + g_hash_table_destroy (data->prev_data); + } +} + int surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) { @@ -136,6 +204,8 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) surbl_module_ctx->tld2_file = NULL; surbl_module_ctx->whitelist_file = NULL; surbl_module_ctx->redirectors_number = 0; + surbl_module_ctx->redirector_trie = rspamd_trie_create (TRUE); + surbl_module_ctx->redirector_ptrs = g_ptr_array_new (); surbl_module_ctx->redirector_hosts = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); surbl_module_ctx->whitelist = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); @@ -148,6 +218,9 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_list_free, surbl_module_ctx->suffixes); memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_list_free, surbl_module_ctx->bits); + memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) rspamd_trie_free, surbl_module_ctx->redirector_trie); + memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_ptr_array_unref, surbl_module_ctx->redirector_ptrs); + *ctx = (struct module_ctx *)surbl_module_ctx; register_protocol_command ("urls", urls_command_handler); @@ -219,7 +292,7 @@ surbl_module_config (struct config_file *cfg) surbl_module_ctx->read_timeout = DEFAULT_REDIRECTOR_READ_TIMEOUT; } if ((value = get_module_opt (cfg, "surbl", "redirector_hosts_map")) != NULL) { - add_map (value, read_host_list, fin_host_list, (void **)&surbl_module_ctx->redirector_hosts); + add_map (value, read_redirectors_list, fin_redirectors_list, (void **)&surbl_module_ctx->redirector_hosts); } else { surbl_module_ctx->read_timeout = DEFAULT_REDIRECTOR_READ_TIMEOUT; @@ -299,6 +372,7 @@ surbl_module_reconfig (struct config_file *cfg) surbl_module_ctx->tld2_file = NULL; surbl_module_ctx->whitelist_file = NULL; surbl_module_ctx->redirectors_number = 0; + surbl_module_ctx->redirector_trie = rspamd_trie_create (TRUE); surbl_module_ctx->redirector_hosts = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); surbl_module_ctx->whitelist = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); @@ -311,6 +385,9 @@ surbl_module_reconfig (struct config_file *cfg) memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_list_free, surbl_module_ctx->suffixes); memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_list_free, surbl_module_ctx->bits); + memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) rspamd_trie_free, surbl_module_ctx->redirector_trie); + memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_ptr_array_unref, surbl_module_ctx->redirector_ptrs); + /* Perform configure */ return surbl_module_config (cfg); } @@ -793,14 +870,15 @@ register_redirector_call (struct uri *url, struct worker_task *task, GTree * url } static gboolean -tree_url_callback (gpointer key, gpointer value, void *data) +surbl_tree_url_callback (gpointer key, gpointer value, void *data) { struct redirector_param *param = data; struct worker_task *task = param->task; struct uri *url = value; f_str_t f; - char *urlstr; - GError *err = NULL; + char *red_domain; + GRegex *re; + guint idx; debug_task ("check url %s", struri (url)); @@ -808,8 +886,15 @@ tree_url_callback (gpointer key, gpointer value, void *data) if (surbl_module_ctx->use_redirector) { f.begin = url->host; f.len = url->hostlen; - if ((urlstr = format_surbl_request (param->task->task_pool, &f, NULL, FALSE, &err, TRUE)) != NULL) { - if (g_hash_table_lookup (surbl_module_ctx->redirector_hosts, urlstr) != NULL) { + /* Search in trie */ + if (surbl_module_ctx->redirector_trie && + rspamd_trie_lookup (surbl_module_ctx->redirector_trie, url->host, url->hostlen, &idx)) { + /* Get corresponding prefix */ + red_domain = g_ptr_array_index (surbl_module_ctx->redirector_ptrs, idx); + /* Try to find corresponding regexp */ + re = g_hash_table_lookup (surbl_module_ctx->redirector_hosts, red_domain); + if (re == NO_REGEXP || g_regex_match (re, url->string, 0, NULL)) { + /* If no regexp found or founded regexp matches url string register redirector's call */ register_redirector_call (url, param->task, param->tree, param->suffix); param->task->save.saved++; return FALSE; @@ -848,10 +933,10 @@ surbl_test_url (struct worker_task *task, void *user_data) while (cur) { part = cur->data; if (part->urls) { - g_tree_foreach (part->urls, tree_url_callback, ¶m); + g_tree_foreach (part->urls, surbl_tree_url_callback, ¶m); } if (part->html_urls) { - g_tree_foreach (part->html_urls, tree_url_callback, ¶m); + g_tree_foreach (part->html_urls, surbl_tree_url_callback, ¶m); } cur = g_list_next (cur); diff --git a/src/plugins/surbl.h b/src/plugins/surbl.h index 4aa059265..1df6fae31 100644 --- a/src/plugins/surbl.h +++ b/src/plugins/surbl.h @@ -6,6 +6,7 @@ #include "../modules.h" #include "../cfg_file.h" #include "../memcached.h" +#include "../trie.h" #define DEFAULT_REDIRECTOR_PORT 8080 #define DEFAULT_SURBL_WEIGHT 10 @@ -39,6 +40,8 @@ struct surbl_ctx { GHashTable **exceptions; GHashTable *whitelist; GHashTable *redirector_hosts; + rspamd_trie_t *redirector_trie; + GPtrArray *redirector_ptrs; unsigned use_redirector; struct redirector_upstream *redirectors; guint32 redirectors_number; diff --git a/utils/redirector.pl.in b/utils/redirector.pl.in index 3ed8838e2..ab68e2151 100755 --- a/utils/redirector.pl.in +++ b/utils/redirector.pl.in @@ -421,6 +421,18 @@ sub process_input { $kernel->post( "cl", "request", "got_response", $new_request, [0, ""]); } +sub sig_DIE { + my( $sig, $ex ) = @_[ ARG0, ARG1 ]; + write_log ("", "$$: error in $ex->{event}: $ex->{error_str}"); + $poe_kernel->sig_handled(); + + # Send the signal to session that sent the original event. + if( $ex->{source_session} ne $_[SESSION] ) { + $poe_kernel->signal( $ex->{source_session}, 'DIE', $sig, $ex ); + } +} + + ############################### Main code fragment ################################## @@ -458,7 +470,8 @@ if (!$cfg{debug}) { } # Reopen log on SIGUSR1 -$SIG{USR1} = sub { $do_reopen_log = 1; }; +$poe_kernel->sig(DIE => 'sig_DIE'); +$SIG{USR1} = sub { $do_reopen_log = 1; $poe_kernel->sig_handled(); }; $SIG{INT} = sub { $poe_kernel->stop(); }; $SIG{QUIT} = sub { $poe_kernel->stop(); }; $SIG{PIPE} = 'IGNORE';