]> source.dussan.org Git - rspamd.git/commitdiff
* Add SURBL logic to surbl plugin
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Thu, 4 Sep 2008 14:07:41 +0000 (18:07 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Thu, 4 Sep 2008 14:07:41 +0000 (18:07 +0400)
* Add whitelist and hosters list to params

plugins/surbl.c

index 266816bef55022a035508459c8067feede855b2e..e31f8cc5d66f6c779cd68d2ed0ef53da5f3811a2 100644 (file)
@@ -14,6 +14,8 @@
 #include <fcntl.h>
 #include <stdlib.h>
 
+#include <evdns.h>
+
 #include "../config.h"
 #include "../main.h"
 #include "../modules.h"
@@ -26,6 +28,7 @@
 #define DEFAULT_REDIRECTOR_READ_TIMEOUT 5000
 #define DEFAULT_SURBL_MAX_URLS 1000
 #define DEFAULT_SURBL_URL_EXPIRE 86400
+#define DEFAULT_SURBL_SUFFIX "multi.surbl.org"
 
 struct surbl_ctx {
        int (*header_filter)(struct worker_task *task);
@@ -39,6 +42,9 @@ struct surbl_ctx {
        unsigned int read_timeout;
        unsigned int max_urls;
        unsigned int url_expire;
+       char *suffix;
+       GHashTable *hosters;
+       GHashTable *whitelist;
        unsigned use_redirector:1;
 };
 
@@ -59,7 +65,9 @@ struct memcached_param {
        memcached_ctx_t *ctx;
 };
 
+static char *hash_fill = "1";
 struct surbl_ctx *surbl_module_ctx;
+GRegex *extract_hoster_regexp, *extract_normal_regexp, *extract_numeric_regexp;
 
 static int surbl_test_url (struct worker_task *task);
 
@@ -67,6 +75,7 @@ int
 surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)
 {
        struct hostent *hent;
+       GError *err = NULL;
 
        char *value, *cur_tok, *str;
 
@@ -128,6 +137,58 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)
        else {
                surbl_module_ctx->max_urls = DEFAULT_SURBL_MAX_URLS;
        }
+       if ((value = get_module_opt (cfg, "surbl", "suffix")) != NULL) {
+               surbl_module_ctx->suffix = value;
+       }
+       else {
+               surbl_module_ctx->suffix = DEFAULT_SURBL_SUFFIX;
+       }
+       
+       surbl_module_ctx->hosters = g_hash_table_new (g_str_hash, g_str_equal);
+       surbl_module_ctx->whitelist = g_hash_table_new (g_str_hash, g_str_equal);
+       if ((value = get_module_opt (cfg, "surbl", "hostings")) != NULL) {
+               char comment_flag = 0;
+               str = value;
+               while (*value ++) {
+                       if (*value == '#') {
+                               comment_flag = 1;
+                       }
+                       if (*value == '\r' || *value == '\n' || *value == ',') {
+                               if (!comment_flag && str != value) {
+                                       g_hash_table_insert (surbl_module_ctx->hosters, g_strstrip(str), hash_fill);
+                                       str = value + 1;
+                               }
+                               else if (*value != ',') {
+                                       comment_flag = 0;
+                                       str = value + 1;
+                               }
+                       }
+               }
+       }
+       if ((value = get_module_opt (cfg, "surbl", "whitelist")) != NULL) {
+               char comment_flag = 0;
+               str = value;
+               while (*value ++) {
+                       if (*value == '#') {
+                               comment_flag = 1;
+                       }
+                       if (*value == '\r' || *value == '\n' || *value == ',') {
+                               if (!comment_flag && str != value) {
+                                       g_hash_table_insert (surbl_module_ctx->whitelist, g_strstrip(str), hash_fill);
+                                       str = value + 1;
+                               }
+                               else if (*value != ',') {
+                                       comment_flag = 0;
+                                       str = value + 1;
+                               }
+                       }
+               }
+       }
+       
+       /* Init matching regexps */
+       extract_hoster_regexp = g_regex_new ("([^.]+)\\.([^.]+)\\.([^.]+)$", G_REGEX_RAW, 0, &err);
+       extract_normal_regexp = g_regex_new ("([^.]+)\\.([^.]+)$", G_REGEX_RAW, 0, &err);
+       extract_numeric_regexp = g_regex_new ("(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$", G_REGEX_RAW, 0, &err);
 
        *ctx = (struct module_ctx *)surbl_module_ctx;
 
@@ -136,12 +197,102 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)
        return 0;
 }
 
+static char *
+format_surbl_request (char *hostname) 
+{
+       GMatchInfo *info;
+       char *result;
+
+       result = g_malloc (strlen (hostname) + strlen (surbl_module_ctx->suffix) + 1); 
+
+       /* First try to match numeric expression */
+       if (g_regex_match (extract_numeric_regexp, hostname, 0, &info) == TRUE) {
+               gchar *octet1, *octet2, *octet3, *octet4;
+               octet1 = g_match_info_fetch (info, 0);
+               g_match_info_next (info, NULL);
+               octet2 = g_match_info_fetch (info, 0);
+               g_match_info_next (info, NULL);
+               octet3 = g_match_info_fetch (info, 0);
+               g_match_info_next (info, NULL);
+               octet4 = g_match_info_fetch (info, 0);
+               g_match_info_free (info);
+               sprintf (result, "%s.%s.%s.%s.%s", octet4, octet3, octet2, octet1, surbl_module_ctx->suffix);
+               g_free (octet1);
+               g_free (octet2);
+               g_free (octet3);
+               g_free (octet4);
+               return result;
+       }
+       g_match_info_free (info);
+       /* Try to match normal domain */
+       if (g_regex_match (extract_normal_regexp, hostname, 0, &info) == TRUE) {
+               gchar *part1, *part2;
+               part1 = g_match_info_fetch (info, 0);
+               g_match_info_next (info, NULL);
+               part2 = g_match_info_fetch (info, 0);
+               g_match_info_free (info);
+               sprintf (result, "%s.%s", part1, part2);
+               if (g_hash_table_lookup (surbl_module_ctx->hosters, result) != NULL) {
+                       /* Match additional part for hosters */
+                       g_free (part1);
+                       g_free (part2);
+                       if (g_regex_match (extract_hoster_regexp, hostname, 0, &info) == TRUE) {
+                               gchar *hpart1, *hpart2, *hpart3;
+                               hpart1 = g_match_info_fetch (info, 0);
+                               g_match_info_next (info, NULL);
+                               hpart2 = g_match_info_fetch (info, 0);
+                               g_match_info_next (info, NULL);
+                               hpart3 = g_match_info_fetch (info, 0);
+                               g_match_info_free (info);
+                               sprintf (result, "%s.%s.%s.%s", hpart1, hpart2, hpart3, surbl_module_ctx->suffix);
+                               g_free (hpart1);
+                               g_free (hpart2);
+                               g_free (hpart3);
+                               return result;
+                       }
+                       return NULL;
+               }
+               g_free (part1);
+               g_free (part2);
+               return result;
+       }
+
+       return NULL;
+}
+
+static void 
+dns_callback (int result, char type, int count, int ttl, void *addresses, void *data)
+{
+       struct memcached_param *param = (struct memcached_param *)data;
+       struct filter_result *res;
+       
+       /* If we have result from DNS server, this url exists in SURBL, so increase score */
+       if (result != DNS_ERR_NONE || type != DNS_IPv4_A) {
+               msg_info ("surbl_check: url %s is in surbl %s", param->url->host, surbl_module_ctx->suffix);
+               res = TAILQ_LAST (&param->task->results, resultsq);
+               res->mark += surbl_module_ctx->weight;
+       }
+
+       param->task->save.saved --;
+       if (param->task->save.saved == 0) {
+               /* Call other filters */
+               param->task->save.saved = 1;
+               process_filters (param->task);
+       }
+
+       g_free (param->ctx->param->buf);
+       g_free (param->ctx->param);
+       g_free (param->ctx);
+       g_free (param);
+}
+
 static void 
 memcached_callback (memcached_ctx_t *ctx, memc_error_t error, void *data)
 {
        struct memcached_param *param = (struct memcached_param *)data;
        int *url_count;
        struct filter_result *res;
+       char *surbl_req;
 
        switch (ctx->op) {
                case CMD_CONNECT:
@@ -201,7 +352,11 @@ memcached_callback (memcached_ctx_t *ctx, memc_error_t error, void *data)
                                param->task->save.saved = 1;
                                process_filters (param->task);
                        }
-                       //XXX: read http://surbl.org and add surbl request here
+                       if ((surbl_req = format_surbl_request (param->url->host)) != NULL) {
+                               param->task->save.saved ++;
+                               evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param);
+                               return;
+                       }
                        g_free (param->ctx->param->buf);
                        g_free (param->ctx->param);
                        g_free (param->ctx);