diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2008-09-04 18:07:41 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2008-09-04 18:07:41 +0400 |
commit | 32d41689d6328e9075bf4c6a822f0fe22207838b (patch) | |
tree | 64e64f4861491e9c09918a2197c25239415c009a /plugins | |
parent | b58d2b7e4d76f3c6a60dbb3d49ad782d53b239e7 (diff) | |
download | rspamd-32d41689d6328e9075bf4c6a822f0fe22207838b.tar.gz rspamd-32d41689d6328e9075bf4c6a822f0fe22207838b.zip |
* Add SURBL logic to surbl plugin
* Add whitelist and hosters list to params
Diffstat (limited to 'plugins')
-rw-r--r-- | plugins/surbl.c | 157 |
1 files changed, 156 insertions, 1 deletions
diff --git a/plugins/surbl.c b/plugins/surbl.c index 266816bef..e31f8cc5d 100644 --- a/plugins/surbl.c +++ b/plugins/surbl.c @@ -14,6 +14,8 @@ #include <fcntl.h> #include <stdlib.h> +#include <evdns.h> + #include "../config.h" #include "../main.h" #include "../modules.h" @@ -26,6 +28,7 @@ #define DEFAULT_REDIRECTOR_READ_TIMEOUT 5000 #define DEFAULT_SURBL_MAX_URLS 1000 #define DEFAULT_SURBL_URL_EXPIRE 86400 +#define DEFAULT_SURBL_SUFFIX "multi.surbl.org" struct surbl_ctx { int (*header_filter)(struct worker_task *task); @@ -39,6 +42,9 @@ struct surbl_ctx { unsigned int read_timeout; unsigned int max_urls; unsigned int url_expire; + char *suffix; + GHashTable *hosters; + GHashTable *whitelist; unsigned use_redirector:1; }; @@ -59,7 +65,9 @@ struct memcached_param { memcached_ctx_t *ctx; }; +static char *hash_fill = "1"; struct surbl_ctx *surbl_module_ctx; +GRegex *extract_hoster_regexp, *extract_normal_regexp, *extract_numeric_regexp; static int surbl_test_url (struct worker_task *task); @@ -67,6 +75,7 @@ int surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) { struct hostent *hent; + GError *err = NULL; char *value, *cur_tok, *str; @@ -128,6 +137,58 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) else { surbl_module_ctx->max_urls = DEFAULT_SURBL_MAX_URLS; } + if ((value = get_module_opt (cfg, "surbl", "suffix")) != NULL) { + surbl_module_ctx->suffix = value; + } + else { + surbl_module_ctx->suffix = DEFAULT_SURBL_SUFFIX; + } + + surbl_module_ctx->hosters = g_hash_table_new (g_str_hash, g_str_equal); + surbl_module_ctx->whitelist = g_hash_table_new (g_str_hash, g_str_equal); + if ((value = get_module_opt (cfg, "surbl", "hostings")) != NULL) { + char comment_flag = 0; + str = value; + while (*value ++) { + if (*value == '#') { + comment_flag = 1; + } + if (*value == '\r' || *value == '\n' || *value == ',') { + if (!comment_flag && str != value) { + g_hash_table_insert (surbl_module_ctx->hosters, g_strstrip(str), hash_fill); + str = value + 1; + } + else if (*value != ',') { + comment_flag = 0; + str = value + 1; + } + } + } + } + if ((value = get_module_opt (cfg, "surbl", "whitelist")) != NULL) { + char comment_flag = 0; + str = value; + while (*value ++) { + if (*value == '#') { + comment_flag = 1; + } + if (*value == '\r' || *value == '\n' || *value == ',') { + if (!comment_flag && str != value) { + g_hash_table_insert (surbl_module_ctx->whitelist, g_strstrip(str), hash_fill); + str = value + 1; + } + else if (*value != ',') { + comment_flag = 0; + str = value + 1; + } + } + } + } + + /* Init matching regexps */ + extract_hoster_regexp = g_regex_new ("([^.]+)\\.([^.]+)\\.([^.]+)$", G_REGEX_RAW, 0, &err); + extract_normal_regexp = g_regex_new ("([^.]+)\\.([^.]+)$", G_REGEX_RAW, 0, &err); + extract_numeric_regexp = g_regex_new ("(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$", G_REGEX_RAW, 0, &err); *ctx = (struct module_ctx *)surbl_module_ctx; @@ -136,12 +197,102 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) return 0; } +static char * +format_surbl_request (char *hostname) +{ + GMatchInfo *info; + char *result; + + result = g_malloc (strlen (hostname) + strlen (surbl_module_ctx->suffix) + 1); + + /* First try to match numeric expression */ + if (g_regex_match (extract_numeric_regexp, hostname, 0, &info) == TRUE) { + gchar *octet1, *octet2, *octet3, *octet4; + octet1 = g_match_info_fetch (info, 0); + g_match_info_next (info, NULL); + octet2 = g_match_info_fetch (info, 0); + g_match_info_next (info, NULL); + octet3 = g_match_info_fetch (info, 0); + g_match_info_next (info, NULL); + octet4 = g_match_info_fetch (info, 0); + g_match_info_free (info); + sprintf (result, "%s.%s.%s.%s.%s", octet4, octet3, octet2, octet1, surbl_module_ctx->suffix); + g_free (octet1); + g_free (octet2); + g_free (octet3); + g_free (octet4); + return result; + } + g_match_info_free (info); + /* Try to match normal domain */ + if (g_regex_match (extract_normal_regexp, hostname, 0, &info) == TRUE) { + gchar *part1, *part2; + part1 = g_match_info_fetch (info, 0); + g_match_info_next (info, NULL); + part2 = g_match_info_fetch (info, 0); + g_match_info_free (info); + sprintf (result, "%s.%s", part1, part2); + if (g_hash_table_lookup (surbl_module_ctx->hosters, result) != NULL) { + /* Match additional part for hosters */ + g_free (part1); + g_free (part2); + if (g_regex_match (extract_hoster_regexp, hostname, 0, &info) == TRUE) { + gchar *hpart1, *hpart2, *hpart3; + hpart1 = g_match_info_fetch (info, 0); + g_match_info_next (info, NULL); + hpart2 = g_match_info_fetch (info, 0); + g_match_info_next (info, NULL); + hpart3 = g_match_info_fetch (info, 0); + g_match_info_free (info); + sprintf (result, "%s.%s.%s.%s", hpart1, hpart2, hpart3, surbl_module_ctx->suffix); + g_free (hpart1); + g_free (hpart2); + g_free (hpart3); + return result; + } + return NULL; + } + g_free (part1); + g_free (part2); + return result; + } + + return NULL; +} + +static void +dns_callback (int result, char type, int count, int ttl, void *addresses, void *data) +{ + struct memcached_param *param = (struct memcached_param *)data; + struct filter_result *res; + + /* If we have result from DNS server, this url exists in SURBL, so increase score */ + if (result != DNS_ERR_NONE || type != DNS_IPv4_A) { + msg_info ("surbl_check: url %s is in surbl %s", param->url->host, surbl_module_ctx->suffix); + res = TAILQ_LAST (¶m->task->results, resultsq); + res->mark += surbl_module_ctx->weight; + } + + param->task->save.saved --; + if (param->task->save.saved == 0) { + /* Call other filters */ + param->task->save.saved = 1; + process_filters (param->task); + } + + g_free (param->ctx->param->buf); + g_free (param->ctx->param); + g_free (param->ctx); + g_free (param); +} + static void memcached_callback (memcached_ctx_t *ctx, memc_error_t error, void *data) { struct memcached_param *param = (struct memcached_param *)data; int *url_count; struct filter_result *res; + char *surbl_req; switch (ctx->op) { case CMD_CONNECT: @@ -201,7 +352,11 @@ memcached_callback (memcached_ctx_t *ctx, memc_error_t error, void *data) param->task->save.saved = 1; process_filters (param->task); } - //XXX: read http://surbl.org and add surbl request here + if ((surbl_req = format_surbl_request (param->url->host)) != NULL) { + param->task->save.saved ++; + evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param); + return; + } g_free (param->ctx->param->buf); g_free (param->ctx->param); g_free (param->ctx); |