summaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2010-06-18 22:07:28 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2010-06-18 22:07:28 +0400
commit9759175c6dcb897101db3528dc4f36e8c20639c1 (patch)
tree0452c2167c33c71990fc2713925d395e14651635 /src/plugins
parente4eb49311b06fc0f820fb64d800d15a295d190a6 (diff)
downloadrspamd-9759175c6dcb897101db3528dc4f36e8c20639c1.tar.gz
rspamd-9759175c6dcb897101db3528dc4f36e8c20639c1.zip
* New logic of SURBL module:
- remove completely 2tld - add option "exception" - for domains from exception list check (level + 1) parts of url: If we have url mail.some.com.ru and have com.ru in exception list then we would check some.com.ru. If we have some.com.ru in exceptions list them mail.some.com.ru would be checked and so on. - optimized parsing of surbl requests * Use system mkstemp(3) on systems where it is available as glib implementation has poor security and generate rather predictable temporary file names.
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/surbl.c325
-rw-r--r--src/plugins/surbl.h7
2 files changed, 169 insertions, 163 deletions
diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c
index 6a5a83f3c..4219bdd3d 100644
--- a/src/plugins/surbl.c
+++ b/src/plugins/surbl.c
@@ -33,7 +33,7 @@
* - redirector_read_timeout (seconds): timeout for reading data (default: 5s)
* - redirector_hosts_map (map string): map that contains domains to check with redirector
* Surbl options:
- * - 2tld (map string): map of domains that should be checked via surbl using 3 (e.g. somehost.domain.com)
+ * - exceptions (map string): map of domains that should be checked via surbl using 3 (e.g. somehost.domain.com)
* components of domain name instead of normal 2 (e.g. domain.com)
* - whitelist (map string): map of domains that should be whitelisted for surbl checks
* - max_urls (integer): maximum allowed number of urls in message to be checked
@@ -67,11 +67,63 @@ surbl_error_quark (void)
return g_quark_from_static_string ("surbl-error-quark");
}
+static void
+exception_insert (gpointer st, gconstpointer key, gpointer value)
+{
+ GHashTable **t = st;
+ int level = 0;
+ const char *p = key;
+ f_str_t *val;
+
+
+ while (*p) {
+ if (*p == '.') {
+ level ++;
+ }
+ p ++;
+ }
+ if (level >= MAX_LEVELS) {
+ msg_err ("invalid domain in exceptions list: %s, levels: %d", (char *)key, level);
+ return;
+ }
+
+ val = g_malloc (sizeof (f_str_t));
+ val->begin = (char *)key;
+ val->len = strlen (key);
+ if (t[level] == NULL) {
+ t[level] = g_hash_table_new_full (fstr_strcase_hash, fstr_strcase_equal, g_free, NULL);
+ }
+ g_hash_table_insert (t[level], val, value);
+}
+
+static u_char *
+read_exceptions_list (memory_pool_t * pool, u_char * chunk, size_t len, struct map_cb_data *data)
+{
+ if (data->cur_data == NULL) {
+ data->cur_data = memory_pool_alloc (pool, sizeof (GHashTable *) * MAX_LEVELS);
+ }
+ return abstract_parse_list (pool, chunk, len, data, (insert_func) exception_insert);
+}
+
+static void
+fin_exceptions_list (memory_pool_t * pool, struct map_cb_data *data)
+{
+ GHashTable **t;
+ int i;
+
+ if (data->prev_data) {
+ t = data->prev_data;
+ for (i = 0; i < MAX_LEVELS; i ++) {
+ if (t[i] != NULL) {
+ g_hash_table_destroy (t[i]);
+ }
+ }
+ }
+}
+
int
surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)
{
- GError *err = NULL;
-
surbl_module_ctx = g_malloc (sizeof (struct surbl_ctx));
surbl_module_ctx->filter = surbl_filter;
@@ -83,23 +135,17 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)
surbl_module_ctx->tld2_file = NULL;
surbl_module_ctx->whitelist_file = NULL;
- surbl_module_ctx->tld2 = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
surbl_module_ctx->redirector_hosts = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
surbl_module_ctx->whitelist = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
+ /* Zero exceptions hashes */
+ surbl_module_ctx->exceptions = memory_pool_alloc0 (surbl_module_ctx->surbl_pool, MAX_LEVELS * sizeof (GHashTable *));
/* Register destructors */
- memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_hash_table_destroy, surbl_module_ctx->tld2);
memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_hash_table_destroy, surbl_module_ctx->whitelist);
memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_hash_table_destroy, surbl_module_ctx->redirector_hosts);
memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_list_free, surbl_module_ctx->suffixes);
memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_list_free, surbl_module_ctx->bits);
- /* Init matching regexps */
- surbl_module_ctx->extract_hoster_regexp = g_regex_new ("([^.]+)\\.([^.]+)\\.([^.]+)$", G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &err);
- surbl_module_ctx->extract_normal_regexp = g_regex_new ("([^.]+)\\.([^.]+)$", G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &err);
- surbl_module_ctx->extract_ip_regexp = g_regex_new ("(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$", G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &err);
- surbl_module_ctx->extract_numeric_regexp = g_regex_new ("(\\d{5,20})$", G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &err);
-
*ctx = (struct module_ctx *)surbl_module_ctx;
register_protocol_command ("urls", urls_command_handler);
@@ -174,8 +220,8 @@ surbl_module_config (struct config_file *cfg)
else {
surbl_module_ctx->max_urls = DEFAULT_SURBL_MAX_URLS;
}
- if ((value = get_module_opt (cfg, "surbl", "2tld")) != NULL) {
- if (add_map (value, read_host_list, fin_host_list, (void **)&surbl_module_ctx->tld2)) {
+ if ((value = get_module_opt (cfg, "surbl", "exceptions")) != NULL) {
+ if (add_map (value, read_exceptions_list, fin_exceptions_list, (void **)&surbl_module_ctx->exceptions)) {
surbl_module_ctx->tld2_file = memory_pool_strdup (surbl_module_ctx->surbl_pool, value + sizeof ("file://") - 1);
}
}
@@ -240,13 +286,16 @@ surbl_module_reconfig (struct config_file *cfg)
static char *
-format_surbl_request (memory_pool_t * pool, f_str_t * hostname, struct suffix_item *suffix, char **host_end, gboolean append_suffix, GError ** err)
+format_surbl_request (memory_pool_t * pool, f_str_t * hostname, struct suffix_item *suffix, gboolean append_suffix, GError ** err)
{
- GMatchInfo *info;
- char *result = NULL;
- int len, slen, r;
+ GHashTable *t;
+ char *result = NULL, *dots[MAX_LEVELS], num_buf[sizeof("18446744073709551616")], *p;
+ int len, slen, r, i, dots_num = 0, level = MAX_LEVELS;
+ gboolean is_numeric = TRUE;
+ guint64 ip_num;
+ f_str_t f;
- if (suffix != NULL) {
+ if (G_LIKELY (suffix != NULL)) {
slen = strlen (suffix->suffix);
}
else if (!append_suffix) {
@@ -256,148 +305,112 @@ format_surbl_request (memory_pool_t * pool, f_str_t * hostname, struct suffix_it
g_assert_not_reached ();
}
len = hostname->len + slen + 2;
+
+ p = hostname->begin;
+ while (p - hostname->begin < hostname->len && dots_num < MAX_LEVELS) {
+ if (*p == '.') {
+ dots[dots_num] = p;
+ dots_num ++;
+ }
+ else if (! g_ascii_isdigit (*p)) {
+ is_numeric = FALSE;
+ }
+ p ++;
+ }
+
+ /* Check for numeric expressions */
+ if (is_numeric && dots_num == 3) {
+ /* This is ip address */
+ result = memory_pool_alloc (pool, len);
+ r = snprintf (result, len, "%*s.%*s.%*s.%*s",
+ (int)(hostname->len - (dots[2] - hostname->begin + 1)),
+ dots[2] + 1,
+ (int)(dots[2] - (dots[1] - hostname->begin + 1)),
+ dots[1],
+ (int)(dots[1] - (dots[0] - hostname->begin + 1)),
+ dots[0],
+ (int)(dots[0] - (hostname->begin + 1)),
+ hostname->begin);
+ }
+ else if (is_numeric && dots_num == 0) {
+ /* This is number */
+ g_strlcpy (num_buf, hostname->begin, MIN (hostname->len + 1, sizeof (num_buf)));
+ errno = 0;
+ ip_num = strtoull (num_buf, NULL, 10);
+ if (errno != 0) {
+ msg_info ("cannot convert ip to number '%s': %s", num_buf, strerror (errno));
+ g_set_error (err, SURBL_ERROR, /* error domain */
+ CONVERSION_ERROR, /* error code */
+ "URL cannot be decoded");
+ return NULL;
+ }
- /* First try to match numeric expression */
- if (g_ascii_isdigit (*hostname->begin)) {
- if (g_regex_match_full (surbl_module_ctx->extract_ip_regexp, hostname->begin, hostname->len, 0, 0, &info, NULL) == TRUE) {
- gchar *octet1, *octet2, *octet3, *octet4;
- octet1 = g_match_info_fetch (info, 1);
- octet2 = g_match_info_fetch (info, 2);
- octet3 = g_match_info_fetch (info, 3);
- octet4 = g_match_info_fetch (info, 4);
- result = memory_pool_alloc (pool, len);
- msg_debug ("got numeric host for check: %s.%s.%s.%s", octet1, octet2, octet3, octet4);
- r = snprintf (result, len, "%s.%s.%s.%s", octet4, octet3, octet2, octet1);
- if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) {
- g_free (octet1);
- g_free (octet2);
- g_free (octet3);
- g_free (octet4);
- g_match_info_free (info);
- msg_debug ("url %s is whitelisted", result);
- g_set_error (err, SURBL_ERROR, /* error domain */
- WHITELIST_ERROR, /* error code */
- "URL is whitelisted: %s", /* error message format string */
- result);
-
- return NULL;
- }
- if (append_suffix) {
- r += snprintf (result + r, len - r, ".%s", suffix->suffix);
- }
- *host_end = result + r - slen - 1;
- g_free (octet1);
- g_free (octet2);
- g_free (octet3);
- g_free (octet4);
- g_match_info_free (info);
- return result;
- }
- g_match_info_free (info);
- if (g_regex_match_full (surbl_module_ctx->extract_numeric_regexp, hostname->begin, hostname->len, 0, 0, &info, NULL) == TRUE) {
- gchar *ip = g_match_info_fetch (info, 1);
- uint64_t ip_num;
-
- errno = 0;
- ip_num = strtoull (ip, NULL, 10);
- if (errno != 0) {
- g_match_info_free (info);
- msg_info ("cannot convert ip to number '%s': %s", ip, strerror (errno));
- g_set_error (err, SURBL_ERROR, /* error domain */
- CONVERSION_ERROR, /* error code */
- "URL cannot be decoded");
- g_free (ip);
-
- return NULL;
- }
-
- len = sizeof ("255.255.255.255") + slen;
- result = memory_pool_alloc (pool, len);
- /* Hack for bugged windows resolver */
- ip_num &= 0xFFFFFFFF;
- /* Get octets */
- r = snprintf (result, len, "%u.%u.%u.%u",
- (uint32_t) ip_num & 0x000000FF, (uint32_t) (ip_num & 0x0000FF00) >> 8, (uint32_t) (ip_num & 0x00FF0000) >> 16, (uint32_t) (ip_num & 0xFF000000) >> 24);
- if (append_suffix) {
- r += snprintf (result + r, len - r, ".%s", suffix->suffix);
- }
- *host_end = result + r - slen - 1;
- g_free (ip);
- g_match_info_free (info);
- return result;
- }
- g_match_info_free (info);
- }
- /* Try to match normal domain */
- if (g_regex_match_full (surbl_module_ctx->extract_normal_regexp, hostname->begin, hostname->len, 0, 0, &info, NULL) == TRUE) {
- gchar *part1, *part2;
- part1 = g_match_info_fetch (info, 1);
- part2 = g_match_info_fetch (info, 2);
- g_match_info_free (info);
+ len = sizeof ("255.255.255.255") + slen;
result = memory_pool_alloc (pool, len);
- r = snprintf (result, len, "%s.%s", part1, part2);
- if (g_hash_table_lookup (surbl_module_ctx->tld2, result) != NULL) {
- /* Match additional part for hosters */
- g_free (part1);
- g_free (part2);
- if (g_regex_match_full (surbl_module_ctx->extract_hoster_regexp, hostname->begin, hostname->len, 0, 0, &info, NULL) == TRUE) {
- gchar *hpart1, *hpart2, *hpart3;
- hpart1 = g_match_info_fetch (info, 1);
- hpart2 = g_match_info_fetch (info, 2);
- hpart3 = g_match_info_fetch (info, 3);
- msg_debug ("got hoster 3-d level domain %s.%s.%s", hpart1, hpart2, hpart3);
- r = snprintf (result, len, "%s.%s.%s", hpart1, hpart2, hpart3);
- if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) {
- g_free (hpart1);
- g_free (hpart2);
- g_free (hpart3);
- g_match_info_free (info);
- msg_debug ("url %s is whitelisted", result);
- g_set_error (err, SURBL_ERROR, /* error domain */
- WHITELIST_ERROR, /* error code */
- "URL is whitelisted: %s", /* error message format string */
- result);
- return NULL;
- }
- if (append_suffix) {
- r += snprintf (result + r, len - r, ".%s", suffix->suffix);
+ /* Hack for bugged windows resolver */
+ ip_num &= 0xFFFFFFFF;
+ /* Get octets */
+ r = snprintf (result, len, "%u.%u.%u.%u",
+ (uint32_t) ip_num & 0x000000FF, (uint32_t) (ip_num & 0x0000FF00) >> 8, (uint32_t) (ip_num & 0x00FF0000) >> 16, (uint32_t) (ip_num & 0xFF000000) >> 24);
+ }
+ else {
+ /* Not a numeric url */
+ result = memory_pool_alloc (pool, len);
+ /* Now we should try to check for exceptions */
+ for (i = MAX_LEVELS - 1; i >= 0; i --) {
+ t = surbl_module_ctx->exceptions[i];
+ if (t != NULL && dots_num >= i + 1) {
+ f.begin = dots[dots_num - i - 1] + 1;
+ f.len = hostname->len - (dots[dots_num - i - 1] - hostname->begin + 1);
+ if (g_hash_table_lookup (t, &f) != NULL) {
+ level = dots_num - i - 1;
+ break;
}
- *host_end = result + r - slen - 1;
- g_free (hpart1);
- g_free (hpart2);
- g_free (hpart3);
- g_match_info_free (info);
- return result;
}
- g_match_info_free (info);
- *host_end = NULL;
- return NULL;
}
- else {
- if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) {
- g_free (part1);
- g_free (part2);
- msg_debug ("url %s is whitelisted", result);
- g_set_error (err, SURBL_ERROR, /* error domain */
- WHITELIST_ERROR, /* error code */
- "URL is whitelisted: %s", /* error message format string */
- result);
- return NULL;
+ if (level != MAX_LEVELS) {
+ if (level == 0) {
+ r = snprintf (result, len, "%*s", (int)hostname->len, hostname->begin);
}
- if (append_suffix) {
- r += snprintf (result + r, len - r, ".%s", suffix->suffix);
+ else {
+ r = snprintf (result, len, "%*s",
+ (int)(hostname->len - (dots[level - 1] - hostname->begin + 1)),
+ dots[level - 1] + 1);
+ }
+ }
+ else if (dots_num >= 2) {
+ r = snprintf (result, len, "%*s",
+ (int)(hostname->len - (dots[dots_num - 2] - hostname->begin + 1)),
+ dots[dots_num - 2] + 1);
+ for (i = 0; i < dots_num; i ++) {
+ msg_info ("dot: %d, data: %*s", i,
+ (int)(hostname->len - (dots[i] - hostname->begin + 1)),
+ dots[i] + 1);
+
}
- *host_end = result + r - slen - 1;
- msg_debug ("got normal 2-d level domain %s.%s", part1, part2);
}
- g_free (part1);
- g_free (part2);
- return result;
+ else {
+ r = snprintf (result, len, "%*s", (int)hostname->len, hostname->begin);
+ }
+ }
+
+ if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) {
+ msg_debug ("url %s is whitelisted", result);
+ g_set_error (err, SURBL_ERROR, /* error domain */
+ WHITELIST_ERROR, /* error code */
+ "URL is whitelisted: %s", /* error message format string */
+ result);
+ return NULL;
}
- g_match_info_free (info);
- *host_end = NULL;
- return NULL;
+
+ if (append_suffix) {
+ r += snprintf (result + r, len - r, ".%s", suffix->suffix);
+ }
+
+ msg_debug ("request: %s, dots: %d, level: %d, orig: %*s", result, dots_num, level, (int)hostname->len, hostname->begin);
+
+ return result;
}
static void
@@ -407,22 +420,19 @@ make_surbl_requests (struct uri *url, struct worker_task *task, GTree * tree, st
f_str_t f;
GError *err = NULL;
struct dns_param *param;
- char *host_end;
f.begin = url->host;
f.len = url->hostlen;
if (check_view (task->cfg->views, suffix->symbol, task)) {
- if ((surbl_req = format_surbl_request (task->task_pool, &f, suffix, &host_end, TRUE, &err)) != NULL) {
+ if ((surbl_req = format_surbl_request (task->task_pool, &f, suffix, TRUE, &err)) != NULL) {
if (g_tree_lookup (tree, surbl_req) == NULL) {
g_tree_insert (tree, surbl_req, surbl_req);
param = memory_pool_alloc (task->task_pool, sizeof (struct dns_param));
param->url = url;
param->task = task;
param->suffix = suffix;
- *host_end = '\0';
param->host_resolve = memory_pool_strdup (task->task_pool, surbl_req);
- *host_end = '.';
debug_task ("send surbl dns request %s", surbl_req);
if (evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param) == 0) {
param->task->save.saved++;
@@ -741,7 +751,7 @@ tree_url_callback (gpointer key, gpointer value, void *data)
struct worker_task *task = param->task;
struct uri *url = value;
f_str_t f;
- char *urlstr, *host_end;
+ char *urlstr;
GError *err = NULL;
debug_task ("check url %s", struri (url));
@@ -750,7 +760,7 @@ tree_url_callback (gpointer key, gpointer value, void *data)
if (surbl_module_ctx->use_redirector) {
f.begin = url->host;
f.len = url->hostlen;
- if ((urlstr = format_surbl_request (param->task->task_pool, &f, NULL, &host_end, FALSE, &err)) != NULL) {
+ if ((urlstr = format_surbl_request (param->task->task_pool, &f, NULL, FALSE, &err)) != NULL) {
if (g_hash_table_lookup (surbl_module_ctx->redirector_hosts, urlstr) != NULL) {
register_redirector_call (url, param->task, param->tree, param->suffix);
param->task->save.saved++;
@@ -819,7 +829,6 @@ urls_command_handler (struct worker_task *task)
GError *err = NULL;
GTree *url_tree;
f_str_t f;
- char *host_end;
url_tree = g_tree_new ((GCompareFunc) g_ascii_strcasecmp);
@@ -849,7 +858,7 @@ urls_command_handler (struct worker_task *task)
g_tree_insert (url_tree, struri (url), url);
f.begin = url->host;
f.len = url->hostlen;
- if ((urlstr = format_surbl_request (task->task_pool, &f, NULL, &host_end, FALSE, &err)) != NULL) {
+ if ((urlstr = format_surbl_request (task->task_pool, &f, NULL, FALSE, &err)) != NULL) {
if (g_list_next (cur) != NULL) {
r += snprintf (outbuf + r, buflen - r - 2, "%s <\"%s\">, ", (char *)urlstr, struri (url));
}
diff --git a/src/plugins/surbl.h b/src/plugins/surbl.h
index 7169e59a1..c38e2c16b 100644
--- a/src/plugins/surbl.h
+++ b/src/plugins/surbl.h
@@ -15,6 +15,7 @@
#define DEFAULT_SURBL_URL_EXPIRE 86400
#define DEFAULT_SURBL_SYMBOL "SURBL_DNS"
#define DEFAULT_SURBL_SUFFIX "multi.surbl.org"
+#define MAX_LEVELS 10
struct surbl_ctx {
int (*filter)(struct worker_task *task);
@@ -30,15 +31,11 @@ struct surbl_ctx {
char *metric;
const char *tld2_file;
const char *whitelist_file;
- GHashTable *tld2;
+ GHashTable **exceptions;
GHashTable *whitelist;
GHashTable *redirector_hosts;
unsigned use_redirector;
memory_pool_t *surbl_pool;
- GRegex *extract_hoster_regexp;
- GRegex *extract_normal_regexp;
- GRegex *extract_ip_regexp;
- GRegex *extract_numeric_regexp;
};
struct suffix_item {