From dbc8bb8dbc278b80dd13e732da9647c9df856fa4 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 13 Dec 2010 20:54:22 +0300 Subject: [PATCH] Implement checking options for modules Implement checking for classifier options Fix redirector to handle timeouts and invalid replies properly Fix surbl module not to check each url --- src/cfg_file.h | 1 + src/cfg_utils.c | 48 +++++++++++++++++++--- src/cfg_xml.c | 84 +++++++++++++++++++++++++++++++-------- src/cfg_xml.h | 6 ++- src/classifiers/bayes.c | 1 + src/main.c | 9 +++++ src/plugins/chartable.c | 3 ++ src/plugins/fuzzy_check.c | 12 ++++++ src/plugins/regexp.c | 3 ++ src/plugins/spf.c | 5 +++ src/plugins/surbl.c | 15 ++++++- utils/redirector.pl.in | 11 +++++ 12 files changed, 174 insertions(+), 24 deletions(-) diff --git a/src/cfg_file.h b/src/cfg_file.h index 2595cc779..ab4a98361 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -415,6 +415,7 @@ struct worker_conf* check_worker_conf (struct config_file *cfg, struct worker_co struct metric* check_metric_conf (struct config_file *cfg, struct metric *c); gboolean parse_normalizer (struct config_file *cfg, struct statfile *st, const gchar *line); gboolean read_xml_config (struct config_file *cfg, const gchar *filename); +gboolean check_modules_config (struct config_file *cfg); #endif /* ifdef CFG_FILE_H */ /* diff --git a/src/cfg_utils.c b/src/cfg_utils.c index 2a0f548b8..b98746fe9 100644 --- a/src/cfg_utils.c +++ b/src/cfg_utils.c @@ -901,12 +901,12 @@ static GMarkupParser xml_parser = { gboolean read_xml_config (struct config_file *cfg, const gchar *filename) { - struct stat st; + struct stat st; gint fd; - gchar *data; - gboolean res; - GMarkupParseContext *ctx; - GError *err = NULL; + gchar *data; + gboolean res; + GMarkupParseContext *ctx; + GError *err = NULL; struct rspamd_xml_userdata ud; @@ -939,6 +939,44 @@ read_xml_config (struct config_file *cfg, const gchar *filename) return res; } +static void +modules_config_callback (gpointer key, gpointer value, gpointer ud) +{ + extern GHashTable *module_options; + GHashTable *cur_module; + GList *cur; + struct module_opt *opt; + const gchar *mname = key; + gboolean *res = ud; + + if ((cur_module = g_hash_table_lookup (module_options, mname)) == NULL) { + msg_warn ("module %s has not registered any options but is presented in configuration", mname); + *res = FALSE; + return; + } + + cur = value; + while (cur) { + opt = cur->data; + + if (!opt->is_lua && !check_module_option (mname, opt->param, opt->value)) { + *res = FALSE; + return; + } + + cur = g_list_next (cur); + } +} + +gboolean +check_modules_config (struct config_file *cfg) +{ + gboolean res = TRUE; + + g_hash_table_foreach (cfg->modules_opts, modules_config_callback, &res); + return res; +} + /* * vi:ts=4 */ diff --git a/src/cfg_xml.c b/src/cfg_xml.c index 5ed8229bb..374941bb0 100644 --- a/src/cfg_xml.c +++ b/src/cfg_xml.c @@ -433,9 +433,9 @@ static struct xml_parser_rule grammar[] = { }, }; -static GHashTable *module_options = NULL, - *worker_options = NULL, - *classifier_options = NULL; +GHashTable *module_options = NULL, + *worker_options = NULL, + *classifier_options = NULL; GQuark xml_error_quark (void) @@ -1107,10 +1107,10 @@ handle_classifier_opt (struct config_file *cfg, struct rspamd_xml_userdata *ctx, (classifier_config = g_hash_table_lookup (classifier_options, ccf->classifier->name)) == NULL || (cparam = g_hash_table_lookup (classifier_config, name)) == NULL) { msg_warn ("unregistered classifier attribute '%s' for classifier %s", name, ccf->classifier->name); - g_hash_table_insert (ccf->opts, (char *)name, memory_pool_strdup (cfg->cfg_pool, data)); + return FALSE; } else { - return cparam->handler (cfg, ctx, attrs, data, NULL, cparam->user_data, cparam->offset); + g_hash_table_insert (ccf->opts, (char *)name, memory_pool_strdup (cfg->cfg_pool, data)); } return TRUE; @@ -1690,13 +1690,51 @@ rspamd_xml_error (GMarkupParseContext *context, GError *error, gpointer user_dat /* Register handlers for specific parts of config */ /* Checker for module options */ -static gboolean -check_module_option (struct config_file *cfg, const gchar *mname, const gchar *optname, const gchar *data) +struct option_callback_data { + const gchar *optname; + gboolean res; + struct xml_config_param *param; +}; + +static void +module_option_callback (gpointer key, gpointer value, gpointer ud) +{ + const gchar *optname = key; + static gchar rebuf[512]; + struct option_callback_data *cd = ud; + GRegex *re; + GError *err = NULL; + gsize relen; + + if (*optname == '/') { + relen = strcspn (optname + 1, "/"); + if (relen > sizeof (rebuf)) { + relen = sizeof (rebuf); + } + rspamd_strlcpy (rebuf, optname + 1, relen); + /* This is a regexp so compile and check it */ + re = g_regex_new (rebuf, G_REGEX_CASELESS, 0, &err); + if (err != NULL) { + msg_err ("failed to compile regexp for option '%s', error was: %s, regexp was: %s", cd->optname, err->message, rebuf); + return; + } + if (g_regex_match (re, cd->optname, 0, NULL)) { + cd->res = TRUE; + cd->param = value; + } + } + + return; +} + +gboolean +check_module_option (const gchar *mname, const gchar *optname, const gchar *data) { struct xml_config_param *param; enum module_opt_type type; GHashTable *module; gchar *err_str; + struct option_callback_data cd; if (module_options == NULL) { msg_warn ("no module options registered while checking option %s for module %s", mname, optname); @@ -1708,8 +1746,15 @@ check_module_option (struct config_file *cfg, const gchar *mname, const gchar *o } if ((param = g_hash_table_lookup (module, optname)) == NULL) { - msg_warn ("module %s has not registered option %s", mname, optname); - return FALSE; + /* Try to handle regexp options */ + cd.optname = optname; + cd.res = FALSE; + g_hash_table_foreach (module, module_option_callback, &cd); + if (!cd.res) { + msg_warn ("module %s has not registered option %s", mname, optname); + return FALSE; + } + param = cd.param; } type = param->offset; @@ -1734,6 +1779,13 @@ check_module_option (struct config_file *cfg, const gchar *mname, const gchar *o return FALSE; } break; + case MODULE_OPT_TYPE_DOUBLE: + (void)strtod (data, &err_str); + if (*err_str != '\0') { + msg_warn ("non-numeric data for option: '%s' for module: '%s' at position: '%s'", optname, mname, err_str); + return FALSE; + } + break; case MODULE_OPT_TYPE_TIME: (void)parse_time (data, TIME_SECONDS); if (errno != 0) { @@ -1833,7 +1885,7 @@ register_worker_opt (gint wtype, const gchar *optname, element_handler_func func /* Register new classifier option */ void -register_classifier_opt (const gchar *ctype, const gchar *optname, element_handler_func func, gpointer dest_struct, gint offset) +register_classifier_opt (const gchar *ctype, const gchar *optname) { struct xml_config_param *param; GHashTable *classifier; @@ -1848,9 +1900,9 @@ register_classifier_opt (const gchar *ctype, const gchar *optname, element_handl if ((param = g_hash_table_lookup (classifier, optname)) == NULL) { /* Register new param */ param = g_malloc (sizeof (struct xml_config_param)); - param->handler = func; - param->user_data = dest_struct; - param->offset = offset; + param->handler = NULL; + param->user_data = NULL; + param->offset = 0; param->name = optname; g_hash_table_insert (classifier, (char *)optname, param); } @@ -1859,9 +1911,9 @@ register_classifier_opt (const gchar *ctype, const gchar *optname, element_handl msg_warn ("replace old handler for param '%s'", optname); g_free (param); param = g_malloc (sizeof (struct xml_config_param)); - param->handler = func; - param->user_data = dest_struct; - param->offset = offset; + param->handler = NULL; + param->user_data = NULL; + param->offset = 0; param->name = optname; g_hash_table_insert (classifier, (char *)optname, param); } diff --git a/src/cfg_xml.h b/src/cfg_xml.h index 7c976e78e..db3dd9e8f 100644 --- a/src/cfg_xml.h +++ b/src/cfg_xml.h @@ -32,6 +32,7 @@ enum module_opt_type { MODULE_OPT_TYPE_STRING = 0, MODULE_OPT_TYPE_INT, MODULE_OPT_TYPE_UINT, + MODULE_OPT_TYPE_DOUBLE, MODULE_OPT_TYPE_TIME, MODULE_OPT_TYPE_MAP, MODULE_OPT_TYPE_SIZE, @@ -153,7 +154,10 @@ void register_module_opt (const gchar *mname, const gchar *optname, enum module_ void register_worker_opt (gint wtype, const gchar *optname, element_handler_func func, gpointer dest_struct, gint offset); /* Register new classifier option */ -void register_classifier_opt (const gchar *ctype, const gchar *optname, element_handler_func func, gpointer dest_struct, gint offset); +void register_classifier_opt (const gchar *ctype, const gchar *optname); + +/* Check validity of module option */ +gboolean check_module_option (const gchar *mname, const gchar *optname, const gchar *data); /* Dumper functions */ gboolean xml_dump_config (struct config_file *cfg, const gchar *filename); diff --git a/src/classifiers/bayes.c b/src/classifiers/bayes.c index 274f82ad0..64783e0b4 100644 --- a/src/classifiers/bayes.c +++ b/src/classifiers/bayes.c @@ -157,6 +157,7 @@ bayes_init (memory_pool_t *pool, struct classifier_config *cfg) ctx->pool = pool; ctx->cfg = cfg; + return ctx; } diff --git a/src/main.c b/src/main.c index 0fcd4e89a..db8f6f276 100644 --- a/src/main.c +++ b/src/main.c @@ -819,6 +819,11 @@ main (gint argc, gchar **argv, gchar **env) /* Init contextes */ init_workers_ctx (rspamd); + /* Init classifiers options */ + register_classifier_opt ("bayes", "min_tokens"); + register_classifier_opt ("winnow", "min_tokens"); + register_classifier_opt ("winnow", "learn_threshold"); + if (! load_rspamd_config (rspamd->cfg, TRUE)) { exit (EXIT_FAILURE); } @@ -836,6 +841,9 @@ main (gint argc, gchar **argv, gchar **env) /* Init events to test modules */ event_init (); res = TRUE; + if (!check_modules_config (rspamd->cfg)) { + res = FALSE; + } /* Perform modules configuring */ l = g_list_first (rspamd->cfg->filters); @@ -871,6 +879,7 @@ main (gint argc, gchar **argv, gchar **env) msg_info ("rspamd " RVERSION " is starting, build id: " RID); rspamd->cfg->cfg_name = memory_pool_strdup (rspamd->cfg->cfg_pool, rspamd->cfg->cfg_name); + (void)check_modules_config (rspamd->cfg); if (!rspamd->cfg->no_fork && daemon (0, 0) == -1) { fprintf (stderr, "Cannot daemonize\n"); diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index 2f62464a6..7432e9f61 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -38,6 +38,7 @@ #include "../cfg_file.h" #include "../expressions.h" #include "../view.h" +#include "../cfg_xml.h" #define DEFAULT_SYMBOL "R_CHARSET_MIXED" #define DEFAULT_THRESHOLD 0.1 @@ -64,6 +65,8 @@ chartable_module_init (struct config_file *cfg, struct module_ctx **ctx) chartable_module_ctx->chartable_pool = memory_pool_new (memory_pool_get_size ()); *ctx = (struct module_ctx *)chartable_module_ctx; + register_module_opt ("chartable", "symbol", MODULE_OPT_TYPE_STRING); + register_module_opt ("chartable", "threshold", MODULE_OPT_TYPE_STRING); return 0; } diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index 70abea2cd..e253955d0 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -50,6 +50,7 @@ #include "../map.h" #include "../images.h" #include "../fuzzy_storage.h" +#include "../cfg_xml.h" #define DEFAULT_SYMBOL "R_FUZZY_HASH" #define DEFAULT_UPSTREAM_ERROR_TIME 10 @@ -330,6 +331,17 @@ fuzzy_check_module_init (struct config_file *cfg, struct module_ctx **ctx) fuzzy_module_ctx->mappings = g_hash_table_new (g_direct_hash, g_direct_equal); *ctx = (struct module_ctx *)fuzzy_module_ctx; + /* Register module options */ + register_module_opt ("fuzzy_check", "symbol", MODULE_OPT_TYPE_STRING); + register_module_opt ("fuzzy_check", "max_score", MODULE_OPT_TYPE_DOUBLE); + register_module_opt ("fuzzy_check", "servers", MODULE_OPT_TYPE_STRING); + register_module_opt ("fuzzy_check", "fuzzy_map", MODULE_OPT_TYPE_STRING); + register_module_opt ("fuzzy_check", "whitelist", MODULE_OPT_TYPE_STRING); + register_module_opt ("fuzzy_check", "mime_types", MODULE_OPT_TYPE_STRING); + register_module_opt ("fuzzy_check", "min_bytes", MODULE_OPT_TYPE_UINT); + register_module_opt ("fuzzy_check", "min_height", MODULE_OPT_TYPE_UINT); + register_module_opt ("fuzzy_check", "min_width", MODULE_OPT_TYPE_UINT); + register_module_opt ("fuzzy_check", "min_symbols", MODULE_OPT_TYPE_UINT); return 0; } diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index 8a33402f2..4e1f8f61c 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -39,6 +39,7 @@ #include "../view.h" #include "../lua/lua_common.h" #include "../json/jansson.h" +#include "../cfg_xml.h" #define DEFAULT_STATFILE_PREFIX "./" @@ -407,6 +408,8 @@ regexp_module_init (struct config_file *cfg, struct module_ctx **ctx) register_expression_function ("check_smtp_data", rspamd_check_smtp_data, NULL); (void)luaopen_regexp (cfg->lua_state); + register_module_opt ("regexp", "dynamic_rules", MODULE_OPT_TYPE_STRING); + register_module_opt ("regexp", "/^\\S+$/", MODULE_OPT_TYPE_STRING); return 0; } diff --git a/src/plugins/spf.c b/src/plugins/spf.c index 868ad7a32..043f0b821 100644 --- a/src/plugins/spf.c +++ b/src/plugins/spf.c @@ -42,6 +42,7 @@ #include "../view.h" #include "../map.h" #include "../spf.h" +#include "../cfg_xml.h" #define DEFAULT_SYMBOL_FAIL "R_SPF_FAIL" #define DEFAULT_SYMBOL_SOFTFAIL "R_SPF_SOFTFAIL" @@ -69,6 +70,10 @@ spf_module_init (struct config_file *cfg, struct module_ctx **ctx) spf_module_ctx->spf_pool = memory_pool_new (memory_pool_get_size ()); *ctx = (struct module_ctx *)spf_module_ctx; + register_module_opt ("spf", "symbol_fail", MODULE_OPT_TYPE_STRING); + register_module_opt ("spf", "symbol_softfail", MODULE_OPT_TYPE_STRING); + register_module_opt ("spf", "symbol_allow", MODULE_OPT_TYPE_STRING); + register_module_opt ("spf", "whitelist", MODULE_OPT_TYPE_MAP); return 0; } diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 85f457a7e..2f761c67b 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -47,6 +47,7 @@ #include "../view.h" #include "../map.h" #include "../dns.h" +#include "../cfg_xml.h" #include "surbl.h" @@ -225,6 +226,16 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) register_protocol_command ("urls", urls_command_handler); /* Register module options */ + register_module_opt ("surbl", "redirector", MODULE_OPT_TYPE_STRING); + register_module_opt ("surbl", "url_expire", MODULE_OPT_TYPE_TIME); + register_module_opt ("surbl", "redirector_connect_timeout", MODULE_OPT_TYPE_TIME); + register_module_opt ("surbl", "redirector_read_timeout", MODULE_OPT_TYPE_TIME); + register_module_opt ("surbl", "max_urls", MODULE_OPT_TYPE_UINT); + register_module_opt ("surbl", "redirector_hosts_map", MODULE_OPT_TYPE_STRING); + register_module_opt ("surbl", "exceptions", MODULE_OPT_TYPE_STRING); + register_module_opt ("surbl", "whitelist", MODULE_OPT_TYPE_STRING); + register_module_opt ("surbl", "/^suffix_.*$/", MODULE_OPT_TYPE_STRING); + register_module_opt ("surbl", "/^bit_.*$/", MODULE_OPT_TYPE_STRING); return 0; } @@ -275,7 +286,7 @@ surbl_module_config (struct config_file *cfg) surbl_module_ctx->weight = DEFAULT_SURBL_WEIGHT; } if ((value = get_module_opt (cfg, "surbl", "url_expire")) != NULL) { - surbl_module_ctx->url_expire = atoi (value); + surbl_module_ctx->url_expire = parse_time (value, TIME_SECONDS) / 1000; } else { surbl_module_ctx->url_expire = DEFAULT_SURBL_URL_EXPIRE; @@ -907,7 +918,7 @@ surbl_tree_url_callback (gpointer key, gpointer value, void *data) red_domain = g_ptr_array_index (surbl_module_ctx->redirector_ptrs, idx); /* Try to find corresponding regexp */ re = g_hash_table_lookup (surbl_module_ctx->redirector_hosts, red_domain); - if (re == NO_REGEXP || g_regex_match (re, url->string, 0, NULL)) { + if (re != NULL && (re == NO_REGEXP || g_regex_match (re, url->string, 0, NULL))) { /* If no regexp found or founded regexp matches url string register redirector's call */ register_redirector_call (url, param->task, param->tree, param->suffix); param->task->save.saved++; diff --git a/utils/redirector.pl.in b/utils/redirector.pl.in index be2a0e13d..8f535b406 100755 --- a/utils/redirector.pl.in +++ b/utils/redirector.pl.in @@ -275,6 +275,17 @@ sub process_client { } } } + elsif ($http_response->code != 200) { + _log (LOG_INFO, "HTTP response was %d, for request to %s", $http_response->code, $http_request->uri); + my $new_response = HTTP::Response->new($http_response->code); + + # Avoid sending the response if the client has gone away. + $heap->{client}->put($new_response) if defined $heap->{client}; + + # Shut down the client's connection when the response is sent. + $kernel->yield("shutdown"); + return; + } my $response_type = $http_response->content_type(); if ( $response_type =~ /^text/i ) { my $content = $http_response->decoded_content(); -- 2.39.5