]> source.dussan.org Git - rspamd.git/commitdiff
* Improve logic of urls command to extract only those urls that would be checked...
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 29 Jun 2009 12:22:10 +0000 (16:22 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 29 Jun 2009 12:22:10 +0000 (16:22 +0400)
* Fix surbl whitelisting
* Fix bug with processing custom commands
* Update version to 0.2.1

CMakeLists.txt
src/plugins/surbl.c
src/plugins/surbl.h
src/protocol.c
src/protocol.h
src/worker.c

index c13721a4a84f3901a4b67d9abac0571c36246b33..94fcfdfd06d6bea0bbc36efcb0547626e341ca0f 100644 (file)
@@ -7,7 +7,7 @@ PROJECT(rspamd C)
 
 SET(RSPAMD_VERSION_MAJOR 0)
 SET(RSPAMD_VERSION_MINOR 2)
-SET(RSPAMD_VERSION_PATCH 0)
+SET(RSPAMD_VERSION_PATCH 1)
 
 SET(RSPAMD_VERSION         "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}")
 SET(RSPAMD_MASTER_SITE_URL "http://cebka.pp.ru/hg/rspamd")
index 0711d9cff20c6d540bb5a73028020844caecf3b9..486934fead63472d48623d62a9e9e0fe069ffa96 100644 (file)
@@ -38,6 +38,15 @@ static struct surbl_ctx *surbl_module_ctx = NULL;
 static int surbl_test_url (struct worker_task *task);
 static void dns_callback (int result, char type, int count, int ttl, void *addresses, void *data);
 static void process_dns_results (struct worker_task *task, struct suffix_item *suffix, char *url, uint32_t addr);
+static int  urls_command_handler (struct worker_task *task);
+
+#define SURBL_ERROR surbl_error_quark ()
+#define WHITELIST_ERROR 0
+GQuark
+surbl_error_quark (void)
+{
+       return g_quark_from_static_string ("surbl-error-quark");
+}
 
 int
 surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)
@@ -75,6 +84,8 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)
 
        *ctx = (struct module_ctx *)surbl_module_ctx;
 
+       register_protocol_command ("urls", urls_command_handler);
+
        return 0;
 }
 
@@ -212,14 +223,24 @@ surbl_module_reconfig (struct config_file *cfg)
        return surbl_module_config (cfg);
 }
 
+
+
 static char *
-format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item *suffix, char **host_end) 
+format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item *suffix, char **host_end, gboolean append_suffix, GError **err
 {
        GMatchInfo *info;
        char *result = NULL;
     int len, slen, r;
-    
-       slen = strlen (suffix->suffix);
+       
+       if (suffix != NULL) {
+               slen = strlen (suffix->suffix);
+       }
+       else if (!append_suffix) {
+               slen = 0;
+       }
+       else {
+               g_assert_not_reached ();
+       }
     len = hostname->len + slen + 2;
 
        /* First try to match numeric expression */
@@ -231,7 +252,25 @@ format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item
                octet4 = g_match_info_fetch (info, 4);
                result = memory_pool_alloc (pool, len);
                msg_debug ("format_surbl_request: got numeric host for check: %s.%s.%s.%s", octet1, octet2, octet3, octet4);
-               r = snprintf (result, len, "%s.%s.%s.%s.%s", octet4, octet3, octet2, octet1, suffix->suffix);
+               r = snprintf (result, len, "%s.%s.%s.%s", octet4, octet3, octet2, octet1);
+               if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) {
+                       g_free (octet1);
+                       g_free (octet2);
+                       g_free (octet3);
+                       g_free (octet4);
+                       g_match_info_free (info);
+                       msg_debug ("format_surbl_request: url %s is whitelisted", result);
+                       g_set_error (err,
+                   SURBL_ERROR,                 /* error domain */
+                   WHITELIST_ERROR,                    /* error code */
+                   "URL is whitelisted: %s",   /* error message format string */
+                   result);
+
+                       return NULL;
+               }
+               if (append_suffix) {
+                       r += snprintf (result + r, len - r, ".%s", suffix->suffix);
+               }
                *host_end = result + r - slen - 1;
                g_free (octet1);
                g_free (octet2);
@@ -259,7 +298,23 @@ format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item
                                hpart2 = g_match_info_fetch (info, 2);
                                hpart3 = g_match_info_fetch (info, 3);
                                msg_debug ("format_surbl_request: got hoster 3-d level domain %s.%s.%s", hpart1, hpart2, hpart3);
-                               r = snprintf (result, len, "%s.%s.%s.%s", hpart1, hpart2, hpart3, suffix->suffix);
+                               r = snprintf (result, len, "%s.%s.%s", hpart1, hpart2, hpart3);
+                               if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) {
+                                       g_free (hpart1);
+                                       g_free (hpart2);
+                                       g_free (hpart3);
+                                       g_match_info_free (info);
+                                       msg_debug ("format_surbl_request: url %s is whitelisted", result);
+                                       g_set_error (err,
+                                                  SURBL_ERROR,                 /* error domain */
+                                                  WHITELIST_ERROR,             /* error code */
+                                                  "URL is whitelisted: %s",    /* error message format string */
+                                                  result);
+                                       return NULL;
+                               }
+                               if (append_suffix) {
+                                       r += snprintf (result + r, len - r, ".%s", suffix->suffix);
+                               }
                                *host_end = result + r - slen - 1;
                                g_free (hpart1);
                                g_free (hpart2);
@@ -272,7 +327,20 @@ format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item
                        return NULL;
                }
                else {
-                       r = snprintf (result, len, "%s.%s.%s", part1, part2, suffix->suffix);
+                       if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) {
+                               g_free (part1);
+                               g_free (part2);
+                               msg_debug ("format_surbl_request: url %s is whitelisted", result);
+                               g_set_error (err,
+                                          SURBL_ERROR,                 /* error domain */
+                                          WHITELIST_ERROR,             /* error code */
+                                          "URL is whitelisted: %s",    /* error message format string */
+                                          result);
+                               return NULL;
+                       }
+                       if (append_suffix) {
+                               r += snprintf (result + r, len - r, ".%s", suffix->suffix);
+                       }
                        *host_end = result + r - slen - 1;
                        msg_debug ("format_surbl_request: got normal 2-d level domain %s.%s", part1, part2);
                }
@@ -292,6 +360,7 @@ make_surbl_requests (struct uri* url, struct worker_task *task, GTree *tree)
        char *surbl_req;
        f_str_t f;
        GList *cur;
+       GError *err = NULL;
        struct dns_param *param;
        struct suffix_item *suffix;
        char *host_end;
@@ -302,7 +371,7 @@ make_surbl_requests (struct uri* url, struct worker_task *task, GTree *tree)
 
        while (cur) {
                suffix = (struct suffix_item *)cur->data;
-               if ((surbl_req = format_surbl_request (task->task_pool, &f, suffix, &host_end)) != NULL) {
+               if ((surbl_req = format_surbl_request (task->task_pool, &f, suffix, &host_end, TRUE, &err)) != NULL) {
                        if (g_tree_lookup (tree, surbl_req) == NULL) {
                                g_tree_insert (tree, surbl_req, surbl_req);
                                param = memory_pool_alloc (task->task_pool, sizeof (struct dns_param));
@@ -312,23 +381,16 @@ make_surbl_requests (struct uri* url, struct worker_task *task, GTree *tree)
                                *host_end = '\0';
                                param->host_resolve = memory_pool_strdup (task->task_pool, surbl_req);
                                *host_end = '.';
-                               if (task->cmd == CMD_URLS) {
-                                       process_dns_results (task, suffix, param->host_resolve, 0);
-                                       /* Immideately break cycle */
-                                       break;
-                               }
-                               else {
-                                       msg_debug ("surbl_test_url: send surbl dns request %s", surbl_req);
-                                       evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param);
-                                       param->task->save.saved ++;
-                               }
+                               msg_debug ("surbl_test_url: send surbl dns request %s", surbl_req);
+                               evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param);
+                               param->task->save.saved ++;
                        }
                        else {
                                msg_debug ("make_surbl_requests: request %s is already sent", surbl_req);
                        }
                }
-               else {
-                       msg_info ("surbl_test_url: cannot format url string for surbl %s", struri (url));
+               else if (err != NULL && err->code != WHITELIST_ERROR) {
+                       msg_info ("surbl_test_url: cannot format url string for surbl %s, %s", struri (url), err->message);
                        return;
                }
                cur = g_list_next (cur);
@@ -343,12 +405,6 @@ process_dns_results (struct worker_task *task, struct suffix_item *suffix, char
        struct surbl_bit_item *bit;
        int len, found = 0;
        
-       if (task->cmd == CMD_URLS) {
-               insert_result (task, surbl_module_ctx->metric, suffix->symbol, 1, 
-                                                       g_list_prepend (NULL, memory_pool_strdup (task->task_pool, url)));
-               return;
-       }
-
        if ((c = strchr (suffix->symbol, '%')) != NULL && *(c + 1) == 'b') {
                cur = g_list_first (surbl_module_ctx->bits);
 
@@ -656,22 +712,17 @@ tree_url_callback (gpointer key, gpointer value, void *data)
 
        msg_debug ("surbl_test_url: check url %s", struri (url));
 
-       if (param->task->cmd == CMD_URLS) {
-               make_surbl_requests (url, param->task, param->tree);
+       if (surbl_module_ctx->use_redirector) {
+               register_redirector_call (url, param->task, param->tree);
+               param->task->save.saved++;
        }
        else {
-               if (surbl_module_ctx->use_redirector) {
-                       register_redirector_call (url, param->task, param->tree);
+               if (param->task->worker->srv->cfg->memcached_servers_num > 0) {
+                       register_memcached_call (url, param->task, param->tree);
                        param->task->save.saved++;
                }
                else {
-                       if (param->task->worker->srv->cfg->memcached_servers_num > 0) {
-                               register_memcached_call (url, param->task, param->tree);
-                               param->task->save.saved++;
-                       }
-                       else {
-                               make_surbl_requests (url, param->task, param->tree);
-                       }
+                       make_surbl_requests (url, param->task, param->tree);
                }
        }
 
@@ -715,6 +766,55 @@ surbl_test_url (struct worker_task *task)
        return 0;
 }
 
+static int 
+urls_command_handler (struct worker_task *task)
+{
+       GList *cur;
+       char outbuf[16384], *urlstr;
+       int r, num = 0;
+       struct uri *url;
+       GError *err = NULL;
+       GTree *url_tree;
+       f_str_t f;
+       char *host_end;
+
+       url_tree = g_tree_new ((GCompareFunc)g_ascii_strcasecmp);
+
+       r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, "OK");
+       
+       r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "URLs: ");
+       
+       cur = g_list_first (task->urls);
+
+       while (cur) {
+               num ++;
+               url = cur->data;
+               if (g_tree_lookup (url_tree, struri (url)) == NULL) {
+                       g_tree_insert (url_tree, struri (url), url);
+                       f.begin = url->host;
+                       f.len = url->hostlen;
+                       if ((urlstr = format_surbl_request (task->task_pool, &f, NULL, &host_end, FALSE, &err)) != NULL) {
+                               if (g_list_next (cur) != NULL) {
+                                       r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s, ", (char *)urlstr);
+                               }
+                               else {
+                                       r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s", (char *)urlstr);
+                               }
+                       }
+               }
+               cur = g_list_next (cur);
+       }
+       
+       outbuf[r++] = '\r'; outbuf[r++] = '\n';
+
+       rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE);
+       msg_info ("process_message: msg ok, id: <%s>, %d urls extracted", task->message_id, num);
+       g_tree_destroy (url_tree);
+
+       return 0;
+}
+
+
 /*
  * vi:ts=4 
  */
index 27fea82106b6a968e3543f457241b8ede359869d..8f674c9ca869962509e34efd2f455189b54baff1 100644 (file)
@@ -16,7 +16,6 @@
 #define DEFAULT_SURBL_SYMBOL "SURBL_DNS"
 #define DEFAULT_SURBL_SUFFIX "multi.surbl.org"
 
-
 struct surbl_ctx {
        int (*header_filter)(struct worker_task *task);
        int (*mime_filter)(struct worker_task *task);
index 014a5fae39e819f592b9b68003d5a473bde20267..43a7217656628535b3e9816b4f94e0c05ea2161b 100644 (file)
  */
 #define MSG_CMD_PROCESS "process"
 
-/*
- * Only extract urls from message
- */
-#define MSG_CMD_URLS "urls"
-
 /*
  * spamassassin greeting:
  */
@@ -177,17 +172,6 @@ parse_command (struct worker_task *task, f_str_t *line)
                                return -1;
                        }
                        break;
-               case 'u':
-               case 'U':
-                       /* urls */
-                       if (g_ascii_strcasecmp (token + 1, MSG_CMD_URLS + 1) == 0) {
-                               task->cmd = CMD_URLS;
-                       }
-                       else {
-                               msg_debug ("parse_command: bad command: %s", token);
-                               return -1;
-                       }
-                       break;
                default:
                        cur = custom_commands;
                        while (cur) {
@@ -197,6 +181,7 @@ parse_command (struct worker_task *task, f_str_t *line)
                                        task->custom_cmd = cmd;
                                        break;
                                }
+                               cur = g_list_next (cur);
                        }
 
                        if (cur == NULL) {
@@ -445,12 +430,7 @@ metric_symbols_callback (gpointer key, gpointer value, void *user_data)
        GList *cur;
 
        if (s->options) {
-               if (task->cmd != CMD_URLS) {
-                       r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s; ", (char *)key);
-               }
-               else {
-                       r = snprintf (outbuf, OUTBUFSIZ, "Urls: ");
-               }
+               r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s; ", (char *)key);
                cur = s->options;
                while (cur) {
                        if (g_list_next (cur)) {
@@ -467,9 +447,7 @@ metric_symbols_callback (gpointer key, gpointer value, void *user_data)
                        outbuf[OUTBUFSIZ - 1] = '\n';
                }
        }
-       else if (task->cmd != CMD_URLS) {
-               r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s" CRLF, (char *)key);
-       }
+       r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s" CRLF, (char *)key);
        cd->log_offset += snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset,
                                                "%s,", (char *)key); 
 
@@ -616,23 +594,6 @@ write_check_reply (struct worker_task *task)
        return 0;
 }
 
-static int
-write_urls_reply (struct worker_task *task)
-{
-       int r;
-       char outbuf[OUTBUFSIZ];
-
-       r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, "OK");
-       rspamd_dispatcher_write (task->dispatcher, outbuf, r, TRUE);
-
-       show_url_header (task);
-
-       msg_info ("process_message: msg ok, id: <%s>, %d urls extracted", task->message_id, g_list_length (task->urls));
-
-       return 0;
-}
-
-
 static int
 write_process_reply (struct worker_task *task)
 {
@@ -730,9 +691,6 @@ write_reply (struct worker_task *task)
                                r = snprintf (outbuf, sizeof (outbuf), "%s 0 PONG" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER);
                                rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE);
                                break;
-                       case CMD_URLS:
-                               return write_urls_reply (task);
-                               break;
                        case CMD_OTHER:
                                return task->custom_cmd->func (task);
                }
index bd2ac5343fe9b2bcb874762da59d5e29d2bbff2c..b4783cb30771eb1fb81a8c9c0ee288015916fe6a 100644 (file)
@@ -37,7 +37,6 @@ enum rspamd_command {
        CMD_SKIP,
        CMD_PING,
        CMD_PROCESS,
-       CMD_URLS,
        CMD_OTHER,
 };
 
index 8a9dd9d3a1c8f6ee3170405cf62d91c9dba139a5..f6846f6f737fd20611be54c3c6277a73b669f215 100644 (file)
@@ -180,7 +180,7 @@ read_socket (f_str_t *in, void *arg)
                                task->state = WRITE_ERROR;
                                write_socket (task);
             }
-                       if (task->cmd == CMD_URLS || task->cmd == CMD_OTHER) {
+                       if (task->cmd == CMD_OTHER) {
                                /* Skip filters */
                                task->state = WRITE_REPLY;
                                write_socket (task);