]> source.dussan.org Git - rspamd.git/commitdiff
* Improve definitions of redirector domains
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Fri, 1 Oct 2010 17:13:32 +0000 (21:13 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Fri, 1 Oct 2010 17:13:32 +0000 (21:13 +0400)
* Add support of regexp to check only specific urls for each domains
* Check redirector in trie and not extract specific domains parts
* Add POE errors handling for redirector

src/plugins/surbl.c
src/plugins/surbl.h
utils/redirector.pl.in

index 0619d6cbfba5b3b23ef18e1a78fab664f149ab79..25132a0f91f8f9c6ea0e3cfea6638ddd203d2508 100644 (file)
@@ -58,6 +58,7 @@ static void                     dns_callback (struct rspamd_dns_reply *reply, gp
 static void                     process_dns_results (struct worker_task *task, struct suffix_item *suffix, char *url, uint32_t addr);
 static int                      urls_command_handler (struct worker_task *task);
 
+#define NO_REGEXP (gpointer)-1
 
 #define SURBL_ERROR surbl_error_quark ()
 #define WHITELIST_ERROR 0
@@ -122,6 +123,73 @@ fin_exceptions_list (memory_pool_t * pool, struct map_cb_data *data)
        }
 }
 
+static void
+redirector_insert (gpointer st, gconstpointer key, gpointer value)
+{
+       GHashTable                     *t = st;
+       const char                     *p = key, *begin = key;
+       gchar                          *new;
+       gsize                           len;
+       GRegex                                     *re = NO_REGEXP;
+       GError                         *err = NULL;
+       guint                           idx;
+
+       while (*p && !g_ascii_isspace (*p)) {
+               p ++;
+       }
+
+       len = p - begin;
+       new = g_malloc (len + 1);
+       memcpy (new, begin, len);
+       new[len] = '\0';
+       idx = surbl_module_ctx->redirector_ptrs->len;
+       rspamd_trie_insert (surbl_module_ctx->redirector_trie, new, idx);
+       g_ptr_array_add (surbl_module_ctx->redirector_ptrs, new);
+
+       if (g_ascii_isspace (*p)) {
+               while (g_ascii_isspace (*p) && *p) {
+                       p ++;
+               }
+               if (*p) {
+                       re = g_regex_new (p, G_REGEX_RAW | G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE | G_REGEX_CASELESS,
+                                       0, &err);
+                       if (re == NULL) {
+                               msg_warn ("could not read regexp: %s while reading regexp %s", err->message, p);
+                               re = NO_REGEXP;
+                       }
+               }
+       }
+       g_hash_table_insert (t, new, re);
+}
+
+static void
+redirector_item_free (gpointer p)
+{
+       GRegex                       *re;
+       if (p != NULL && p != NO_REGEXP) {
+               re = (GRegex *)p;
+               g_regex_unref (re);
+       }
+}
+
+static u_char                         *
+read_redirectors_list (memory_pool_t * pool, u_char * chunk, size_t len, struct map_cb_data *data)
+{
+       if (data->cur_data == NULL) {
+               data->cur_data = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, g_free, redirector_item_free);
+       }
+
+       return abstract_parse_list (pool, chunk, len, data, (insert_func) redirector_insert);
+}
+
+void
+fin_redirectors_list (memory_pool_t * pool, struct map_cb_data *data)
+{
+       if (data->prev_data) {
+               g_hash_table_destroy (data->prev_data);
+       }
+}
+
 int
 surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)
 {
@@ -136,6 +204,8 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)
        surbl_module_ctx->tld2_file = NULL;
        surbl_module_ctx->whitelist_file = NULL;
        surbl_module_ctx->redirectors_number = 0;
+       surbl_module_ctx->redirector_trie = rspamd_trie_create (TRUE);
+       surbl_module_ctx->redirector_ptrs = g_ptr_array_new ();
 
        surbl_module_ctx->redirector_hosts = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
        surbl_module_ctx->whitelist = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
@@ -148,6 +218,9 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)
        memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_list_free, surbl_module_ctx->suffixes);
        memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_list_free, surbl_module_ctx->bits);
 
+       memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) rspamd_trie_free, surbl_module_ctx->redirector_trie);
+       memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_ptr_array_unref, surbl_module_ctx->redirector_ptrs);
+
        *ctx = (struct module_ctx *)surbl_module_ctx;
 
        register_protocol_command ("urls", urls_command_handler);
@@ -219,7 +292,7 @@ surbl_module_config (struct config_file *cfg)
                surbl_module_ctx->read_timeout = DEFAULT_REDIRECTOR_READ_TIMEOUT;
        }
        if ((value = get_module_opt (cfg, "surbl", "redirector_hosts_map")) != NULL) {
-               add_map (value, read_host_list, fin_host_list, (void **)&surbl_module_ctx->redirector_hosts);
+               add_map (value, read_redirectors_list, fin_redirectors_list, (void **)&surbl_module_ctx->redirector_hosts);
        }
        else {
                surbl_module_ctx->read_timeout = DEFAULT_REDIRECTOR_READ_TIMEOUT;
@@ -299,6 +372,7 @@ surbl_module_reconfig (struct config_file *cfg)
        surbl_module_ctx->tld2_file = NULL;
        surbl_module_ctx->whitelist_file = NULL;
        surbl_module_ctx->redirectors_number = 0;
+       surbl_module_ctx->redirector_trie = rspamd_trie_create (TRUE);
 
        surbl_module_ctx->redirector_hosts = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
        surbl_module_ctx->whitelist = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
@@ -311,6 +385,9 @@ surbl_module_reconfig (struct config_file *cfg)
        memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_list_free, surbl_module_ctx->suffixes);
        memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_list_free, surbl_module_ctx->bits);
        
+       memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) rspamd_trie_free, surbl_module_ctx->redirector_trie);
+       memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_ptr_array_unref, surbl_module_ctx->redirector_ptrs);
+
        /* Perform configure */
        return surbl_module_config (cfg);
 }
@@ -793,14 +870,15 @@ register_redirector_call (struct uri *url, struct worker_task *task, GTree * url
 }
 
 static                          gboolean
-tree_url_callback (gpointer key, gpointer value, void *data)
+surbl_tree_url_callback (gpointer key, gpointer value, void *data)
 {
        struct redirector_param        *param = data;
        struct worker_task             *task = param->task;
        struct uri                     *url = value;
        f_str_t                         f;
-       char                           *urlstr;
-       GError                         *err = NULL;
+       char                           *red_domain;
+       GRegex                         *re;
+       guint                           idx;
 
        debug_task ("check url %s", struri (url));
 
@@ -808,8 +886,15 @@ tree_url_callback (gpointer key, gpointer value, void *data)
        if (surbl_module_ctx->use_redirector) {
                f.begin = url->host;
                f.len = url->hostlen;
-               if ((urlstr = format_surbl_request (param->task->task_pool, &f, NULL, FALSE, &err, TRUE)) != NULL) {
-                       if (g_hash_table_lookup (surbl_module_ctx->redirector_hosts, urlstr) != NULL) {
+               /* Search in trie */
+               if (surbl_module_ctx->redirector_trie &&
+                               rspamd_trie_lookup (surbl_module_ctx->redirector_trie, url->host, url->hostlen, &idx)) {
+                       /* Get corresponding prefix */
+                       red_domain = g_ptr_array_index (surbl_module_ctx->redirector_ptrs, idx);
+                       /* Try to find corresponding regexp */
+                       re = g_hash_table_lookup (surbl_module_ctx->redirector_hosts, red_domain);
+                       if (re == NO_REGEXP || g_regex_match (re, url->string, 0, NULL)) {
+                               /* If no regexp found or founded regexp matches url string register redirector's call */
                                register_redirector_call (url, param->task, param->tree, param->suffix);
                                param->task->save.saved++;
                                return FALSE;
@@ -848,10 +933,10 @@ surbl_test_url (struct worker_task *task, void *user_data)
        while (cur) {
                part = cur->data;
                if (part->urls) {
-                       g_tree_foreach (part->urls, tree_url_callback, &param);
+                       g_tree_foreach (part->urls, surbl_tree_url_callback, &param);
                }
                if (part->html_urls) {
-                       g_tree_foreach (part->html_urls, tree_url_callback, &param);
+                       g_tree_foreach (part->html_urls, surbl_tree_url_callback, &param);
                }
 
                cur = g_list_next (cur);
index 4aa059265802973df9076940a3712c59a52d0629..1df6fae3175735f00324b108e16e837a4e125f15 100644 (file)
@@ -6,6 +6,7 @@
 #include "../modules.h"
 #include "../cfg_file.h"
 #include "../memcached.h"
+#include "../trie.h"
 
 #define DEFAULT_REDIRECTOR_PORT 8080
 #define DEFAULT_SURBL_WEIGHT 10
@@ -39,6 +40,8 @@ struct surbl_ctx {
        GHashTable **exceptions;
        GHashTable *whitelist;
        GHashTable *redirector_hosts;
+       rspamd_trie_t *redirector_trie;
+       GPtrArray *redirector_ptrs;
        unsigned use_redirector;
        struct redirector_upstream *redirectors;
        guint32 redirectors_number;
index 3ed8838e21306c8951dde91b81bf79d7b707e610..ab68e21512ed751c7b834da8a8de116a6eef2644 100755 (executable)
@@ -421,6 +421,18 @@ sub process_input {
     $kernel->post( "cl", "request", "got_response", $new_request, [0, ""]);
 }
 
+sub sig_DIE {
+       my( $sig, $ex ) = @_[ ARG0, ARG1 ];
+       write_log ("", "$$: error in $ex->{event}: $ex->{error_str}");
+       $poe_kernel->sig_handled();
+
+       # Send the signal to session that sent the original event.
+       if( $ex->{source_session} ne $_[SESSION] ) {
+               $poe_kernel->signal( $ex->{source_session}, 'DIE', $sig, $ex );
+       }
+}
+
+
 ############################### Main code fragment ##################################
 
 
@@ -458,7 +470,8 @@ if (!$cfg{debug}) {
 }
 
 # Reopen log on SIGUSR1
-$SIG{USR1} = sub { $do_reopen_log = 1; };
+$poe_kernel->sig(DIE => 'sig_DIE');
+$SIG{USR1} = sub { $do_reopen_log = 1; $poe_kernel->sig_handled(); };
 $SIG{INT} = sub { $poe_kernel->stop(); };
 $SIG{QUIT} = sub { $poe_kernel->stop(); };
 $SIG{PIPE} = 'IGNORE';