From 6f5f7e7fde16088ddfdf993c2efd1736fc83b6e0 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 3 Jun 2009 17:10:07 +0400 Subject: [PATCH] * Implement new way for parsing text lists --- CMakeLists.txt | 2 +- rspamd.conf.sample | 4 +- src/plugins/surbl.c | 116 ++++++++++++++++++++++++++++++-------------- src/plugins/surbl.h | 2 +- 4 files changed, 84 insertions(+), 40 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d55565e9..e9f5c61ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ PROJECT(rspamd C) SET(RSPAMD_VERSION_MAJOR 0) SET(RSPAMD_VERSION_MINOR 1) -SET(RSPAMD_VERSION_PATCH 3) +SET(RSPAMD_VERSION_PATCH 4) SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}") SET(RSPAMD_MASTER_SITE_URL "http://cebka.pp.ru/hg/rspamd") diff --git a/rspamd.conf.sample b/rspamd.conf.sample index 900eb0599..5184aef09 100644 --- a/rspamd.conf.sample +++ b/rspamd.conf.sample @@ -126,9 +126,9 @@ delivery { # Metric for surbl module metric = "default"; # List of public known hostings (for which we should use 3 components of domain name instead of 2) - hostings = "narod.ru,pp.ru,org.ru,net.ru"; + 2tld = "file:///some/path/file"; # Whitelisted urls - whitelist = "highsecure.ru,freebsd.org"; + whitelist = "file:///some/other/file"; }; diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 1514cae1c..0fccb14b2 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -40,6 +40,78 @@ GRegex *extract_hoster_regexp, *extract_normal_regexp, *extract_numeric_regexp; static int surbl_test_url (struct worker_task *task); static void dns_callback (int result, char type, int count, int ttl, void *addresses, void *data); +static void +parse_host_list (GHashTable *tbl, const char *filename) +{ + int fd; + char buf[BUFSIZ], str[BUFSIZ], *s, *p; + ssize_t r; + enum { + READ_SYMBOL, + SKIP_COMMENT, + } state = READ_SYMBOL; + + if ((fd = open (filename, O_RDONLY)) == -1) { + msg_warn ("parse_host_list: cannot open file '%s': %s", filename, strerror (errno)); + return; + } + + s = str; + + while ((r = read (fd, buf, sizeof (buf) - 1)) > 0) { + buf[r] = '\0'; + p = buf; + while (*p) { + switch (state) { + case READ_SYMBOL: + if (*p == '#') { + if (s != str) { + *s = '\0'; + s = memory_pool_strdup (surbl_module_ctx->surbl_pool, str); + g_hash_table_insert (tbl, s, hash_fill); + s = str; + } + state = SKIP_COMMENT; + } + else if (*p == '\r' || *p == '\n') { + if (s != str) { + *s = '\0'; + s = memory_pool_strdup (surbl_module_ctx->surbl_pool, str); + g_hash_table_insert (tbl, s, hash_fill); + s = str; + } + while (*p == '\r' || *p == '\n') { + p ++; + } + } + else if (g_ascii_isspace (*p)) { + p ++; + } + else { + *s = *p; + s ++; + p ++; + } + break; + case SKIP_COMMENT: + if (*p == '\r' || *p == '\n') { + while (*p == '\r' || *p == '\n') { + p ++; + } + s = str; + state = READ_SYMBOL; + } + else { + p ++; + } + break; + } + } + } + + close (fd); +} + int surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) { @@ -56,9 +128,9 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) surbl_module_ctx->bits = NULL; surbl_module_ctx->surbl_pool = memory_pool_new (1024); - surbl_module_ctx->hosters = g_hash_table_new (g_str_hash, g_str_equal); + surbl_module_ctx->tld2 = g_hash_table_new (g_str_hash, g_str_equal); /* Register destructors */ - memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func)g_hash_table_remove_all, surbl_module_ctx->hosters); + memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func)g_hash_table_remove_all, surbl_module_ctx->tld2); surbl_module_ctx->whitelist = g_hash_table_new (g_str_hash, g_str_equal); /* Register destructors */ @@ -143,42 +215,14 @@ surbl_module_config (struct config_file *cfg) else { surbl_module_ctx->metric = DEFAULT_METRIC; } - if ((value = get_module_opt (cfg, "surbl", "hostings")) != NULL) { - char comment_flag = 0; - str = value; - while (*value ++) { - if (*value == '#') { - comment_flag = 1; - } - if (*value == '\r' || *value == '\n' || *value == ',') { - if (!comment_flag && str != value) { - g_hash_table_insert (surbl_module_ctx->hosters, g_strstrip(str), hash_fill); - str = value + 1; - } - else if (*value != ',') { - comment_flag = 0; - str = value + 1; - } - } + if ((value = get_module_opt (cfg, "surbl", "2tld")) != NULL) { + if (g_ascii_strncasecmp (value, "file://", sizeof ("file://") - 1) == 0) { + parse_host_list (surbl_module_ctx->tld2, value + sizeof ("file://") - 1); } } if ((value = get_module_opt (cfg, "surbl", "whitelist")) != NULL) { - char comment_flag = 0; - str = value; - while (*value ++) { - if (*value == '#') { - comment_flag = 1; - } - if (*value == '\r' || *value == '\n' || *value == ',') { - if (!comment_flag && str != value) { - g_hash_table_insert (surbl_module_ctx->whitelist, g_strstrip(str), hash_fill); - str = value + 1; - } - else if (*value != ',') { - comment_flag = 0; - str = value + 1; - } - } + if (g_ascii_strncasecmp (value, "file://", sizeof ("file://") - 1) == 0) { + parse_host_list (surbl_module_ctx->whitelist, value + sizeof ("file://") - 1); } } @@ -269,7 +313,7 @@ format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item g_match_info_free (info); result = memory_pool_alloc (pool, len); snprintf (result, len, "%s.%s", part1, part2); - if (g_hash_table_lookup (surbl_module_ctx->hosters, result) != NULL) { + if (g_hash_table_lookup (surbl_module_ctx->tld2, result) != NULL) { /* Match additional part for hosters */ g_free (part1); g_free (part2); diff --git a/src/plugins/surbl.h b/src/plugins/surbl.h index 90c86a490..4849e29f0 100644 --- a/src/plugins/surbl.h +++ b/src/plugins/surbl.h @@ -32,7 +32,7 @@ struct surbl_ctx { GList *suffixes; GList *bits; char *metric; - GHashTable *hosters; + GHashTable *tld2; GHashTable *whitelist; unsigned use_redirector; memory_pool_t *surbl_pool; -- 2.39.5