From b29cec5f64146efd075e43ead4644ab647ca57c9 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 29 Jun 2009 16:22:10 +0400 Subject: [PATCH] * Improve logic of urls command to extract only those urls that would be checked against surbl lists * Fix surbl whitelisting * Fix bug with processing custom commands * Update version to 0.2.1 --- CMakeLists.txt | 2 +- src/plugins/surbl.c | 172 ++++++++++++++++++++++++++++++++++---------- src/plugins/surbl.h | 1 - src/protocol.c | 48 +------------ src/protocol.h | 1 - src/worker.c | 2 +- 6 files changed, 141 insertions(+), 85 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c13721a4a..94fcfdfd0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ PROJECT(rspamd C) SET(RSPAMD_VERSION_MAJOR 0) SET(RSPAMD_VERSION_MINOR 2) -SET(RSPAMD_VERSION_PATCH 0) +SET(RSPAMD_VERSION_PATCH 1) SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}") SET(RSPAMD_MASTER_SITE_URL "http://cebka.pp.ru/hg/rspamd") diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 0711d9cff..486934fea 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -38,6 +38,15 @@ static struct surbl_ctx *surbl_module_ctx = NULL; static int surbl_test_url (struct worker_task *task); static void dns_callback (int result, char type, int count, int ttl, void *addresses, void *data); static void process_dns_results (struct worker_task *task, struct suffix_item *suffix, char *url, uint32_t addr); +static int urls_command_handler (struct worker_task *task); + +#define SURBL_ERROR surbl_error_quark () +#define WHITELIST_ERROR 0 +GQuark +surbl_error_quark (void) +{ + return g_quark_from_static_string ("surbl-error-quark"); +} int surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) @@ -75,6 +84,8 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) *ctx = (struct module_ctx *)surbl_module_ctx; + register_protocol_command ("urls", urls_command_handler); + return 0; } @@ -212,14 +223,24 @@ surbl_module_reconfig (struct config_file *cfg) return surbl_module_config (cfg); } + + static char * -format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item *suffix, char **host_end) +format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item *suffix, char **host_end, gboolean append_suffix, GError **err) { GMatchInfo *info; char *result = NULL; int len, slen, r; - - slen = strlen (suffix->suffix); + + if (suffix != NULL) { + slen = strlen (suffix->suffix); + } + else if (!append_suffix) { + slen = 0; + } + else { + g_assert_not_reached (); + } len = hostname->len + slen + 2; /* First try to match numeric expression */ @@ -231,7 +252,25 @@ format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item octet4 = g_match_info_fetch (info, 4); result = memory_pool_alloc (pool, len); msg_debug ("format_surbl_request: got numeric host for check: %s.%s.%s.%s", octet1, octet2, octet3, octet4); - r = snprintf (result, len, "%s.%s.%s.%s.%s", octet4, octet3, octet2, octet1, suffix->suffix); + r = snprintf (result, len, "%s.%s.%s.%s", octet4, octet3, octet2, octet1); + if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) { + g_free (octet1); + g_free (octet2); + g_free (octet3); + g_free (octet4); + g_match_info_free (info); + msg_debug ("format_surbl_request: url %s is whitelisted", result); + g_set_error (err, + SURBL_ERROR, /* error domain */ + WHITELIST_ERROR, /* error code */ + "URL is whitelisted: %s", /* error message format string */ + result); + + return NULL; + } + if (append_suffix) { + r += snprintf (result + r, len - r, ".%s", suffix->suffix); + } *host_end = result + r - slen - 1; g_free (octet1); g_free (octet2); @@ -259,7 +298,23 @@ format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item hpart2 = g_match_info_fetch (info, 2); hpart3 = g_match_info_fetch (info, 3); msg_debug ("format_surbl_request: got hoster 3-d level domain %s.%s.%s", hpart1, hpart2, hpart3); - r = snprintf (result, len, "%s.%s.%s.%s", hpart1, hpart2, hpart3, suffix->suffix); + r = snprintf (result, len, "%s.%s.%s", hpart1, hpart2, hpart3); + if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) { + g_free (hpart1); + g_free (hpart2); + g_free (hpart3); + g_match_info_free (info); + msg_debug ("format_surbl_request: url %s is whitelisted", result); + g_set_error (err, + SURBL_ERROR, /* error domain */ + WHITELIST_ERROR, /* error code */ + "URL is whitelisted: %s", /* error message format string */ + result); + return NULL; + } + if (append_suffix) { + r += snprintf (result + r, len - r, ".%s", suffix->suffix); + } *host_end = result + r - slen - 1; g_free (hpart1); g_free (hpart2); @@ -272,7 +327,20 @@ format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item return NULL; } else { - r = snprintf (result, len, "%s.%s.%s", part1, part2, suffix->suffix); + if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) { + g_free (part1); + g_free (part2); + msg_debug ("format_surbl_request: url %s is whitelisted", result); + g_set_error (err, + SURBL_ERROR, /* error domain */ + WHITELIST_ERROR, /* error code */ + "URL is whitelisted: %s", /* error message format string */ + result); + return NULL; + } + if (append_suffix) { + r += snprintf (result + r, len - r, ".%s", suffix->suffix); + } *host_end = result + r - slen - 1; msg_debug ("format_surbl_request: got normal 2-d level domain %s.%s", part1, part2); } @@ -292,6 +360,7 @@ make_surbl_requests (struct uri* url, struct worker_task *task, GTree *tree) char *surbl_req; f_str_t f; GList *cur; + GError *err = NULL; struct dns_param *param; struct suffix_item *suffix; char *host_end; @@ -302,7 +371,7 @@ make_surbl_requests (struct uri* url, struct worker_task *task, GTree *tree) while (cur) { suffix = (struct suffix_item *)cur->data; - if ((surbl_req = format_surbl_request (task->task_pool, &f, suffix, &host_end)) != NULL) { + if ((surbl_req = format_surbl_request (task->task_pool, &f, suffix, &host_end, TRUE, &err)) != NULL) { if (g_tree_lookup (tree, surbl_req) == NULL) { g_tree_insert (tree, surbl_req, surbl_req); param = memory_pool_alloc (task->task_pool, sizeof (struct dns_param)); @@ -312,23 +381,16 @@ make_surbl_requests (struct uri* url, struct worker_task *task, GTree *tree) *host_end = '\0'; param->host_resolve = memory_pool_strdup (task->task_pool, surbl_req); *host_end = '.'; - if (task->cmd == CMD_URLS) { - process_dns_results (task, suffix, param->host_resolve, 0); - /* Immideately break cycle */ - break; - } - else { - msg_debug ("surbl_test_url: send surbl dns request %s", surbl_req); - evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param); - param->task->save.saved ++; - } + msg_debug ("surbl_test_url: send surbl dns request %s", surbl_req); + evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param); + param->task->save.saved ++; } else { msg_debug ("make_surbl_requests: request %s is already sent", surbl_req); } } - else { - msg_info ("surbl_test_url: cannot format url string for surbl %s", struri (url)); + else if (err != NULL && err->code != WHITELIST_ERROR) { + msg_info ("surbl_test_url: cannot format url string for surbl %s, %s", struri (url), err->message); return; } cur = g_list_next (cur); @@ -343,12 +405,6 @@ process_dns_results (struct worker_task *task, struct suffix_item *suffix, char struct surbl_bit_item *bit; int len, found = 0; - if (task->cmd == CMD_URLS) { - insert_result (task, surbl_module_ctx->metric, suffix->symbol, 1, - g_list_prepend (NULL, memory_pool_strdup (task->task_pool, url))); - return; - } - if ((c = strchr (suffix->symbol, '%')) != NULL && *(c + 1) == 'b') { cur = g_list_first (surbl_module_ctx->bits); @@ -656,22 +712,17 @@ tree_url_callback (gpointer key, gpointer value, void *data) msg_debug ("surbl_test_url: check url %s", struri (url)); - if (param->task->cmd == CMD_URLS) { - make_surbl_requests (url, param->task, param->tree); + if (surbl_module_ctx->use_redirector) { + register_redirector_call (url, param->task, param->tree); + param->task->save.saved++; } else { - if (surbl_module_ctx->use_redirector) { - register_redirector_call (url, param->task, param->tree); + if (param->task->worker->srv->cfg->memcached_servers_num > 0) { + register_memcached_call (url, param->task, param->tree); param->task->save.saved++; } else { - if (param->task->worker->srv->cfg->memcached_servers_num > 0) { - register_memcached_call (url, param->task, param->tree); - param->task->save.saved++; - } - else { - make_surbl_requests (url, param->task, param->tree); - } + make_surbl_requests (url, param->task, param->tree); } } @@ -715,6 +766,55 @@ surbl_test_url (struct worker_task *task) return 0; } +static int +urls_command_handler (struct worker_task *task) +{ + GList *cur; + char outbuf[16384], *urlstr; + int r, num = 0; + struct uri *url; + GError *err = NULL; + GTree *url_tree; + f_str_t f; + char *host_end; + + url_tree = g_tree_new ((GCompareFunc)g_ascii_strcasecmp); + + r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, "OK"); + + r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "URLs: "); + + cur = g_list_first (task->urls); + + while (cur) { + num ++; + url = cur->data; + if (g_tree_lookup (url_tree, struri (url)) == NULL) { + g_tree_insert (url_tree, struri (url), url); + f.begin = url->host; + f.len = url->hostlen; + if ((urlstr = format_surbl_request (task->task_pool, &f, NULL, &host_end, FALSE, &err)) != NULL) { + if (g_list_next (cur) != NULL) { + r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s, ", (char *)urlstr); + } + else { + r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s", (char *)urlstr); + } + } + } + cur = g_list_next (cur); + } + + outbuf[r++] = '\r'; outbuf[r++] = '\n'; + + rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE); + msg_info ("process_message: msg ok, id: <%s>, %d urls extracted", task->message_id, num); + g_tree_destroy (url_tree); + + return 0; +} + + /* * vi:ts=4 */ diff --git a/src/plugins/surbl.h b/src/plugins/surbl.h index 27fea8210..8f674c9ca 100644 --- a/src/plugins/surbl.h +++ b/src/plugins/surbl.h @@ -16,7 +16,6 @@ #define DEFAULT_SURBL_SYMBOL "SURBL_DNS" #define DEFAULT_SURBL_SUFFIX "multi.surbl.org" - struct surbl_ctx { int (*header_filter)(struct worker_task *task); int (*mime_filter)(struct worker_task *task); diff --git a/src/protocol.c b/src/protocol.c index 014a5fae3..43a721765 100644 --- a/src/protocol.c +++ b/src/protocol.c @@ -61,11 +61,6 @@ */ #define MSG_CMD_PROCESS "process" -/* - * Only extract urls from message - */ -#define MSG_CMD_URLS "urls" - /* * spamassassin greeting: */ @@ -177,17 +172,6 @@ parse_command (struct worker_task *task, f_str_t *line) return -1; } break; - case 'u': - case 'U': - /* urls */ - if (g_ascii_strcasecmp (token + 1, MSG_CMD_URLS + 1) == 0) { - task->cmd = CMD_URLS; - } - else { - msg_debug ("parse_command: bad command: %s", token); - return -1; - } - break; default: cur = custom_commands; while (cur) { @@ -197,6 +181,7 @@ parse_command (struct worker_task *task, f_str_t *line) task->custom_cmd = cmd; break; } + cur = g_list_next (cur); } if (cur == NULL) { @@ -445,12 +430,7 @@ metric_symbols_callback (gpointer key, gpointer value, void *user_data) GList *cur; if (s->options) { - if (task->cmd != CMD_URLS) { - r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s; ", (char *)key); - } - else { - r = snprintf (outbuf, OUTBUFSIZ, "Urls: "); - } + r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s; ", (char *)key); cur = s->options; while (cur) { if (g_list_next (cur)) { @@ -467,9 +447,7 @@ metric_symbols_callback (gpointer key, gpointer value, void *user_data) outbuf[OUTBUFSIZ - 1] = '\n'; } } - else if (task->cmd != CMD_URLS) { - r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s" CRLF, (char *)key); - } + r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s" CRLF, (char *)key); cd->log_offset += snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "%s,", (char *)key); @@ -616,23 +594,6 @@ write_check_reply (struct worker_task *task) return 0; } -static int -write_urls_reply (struct worker_task *task) -{ - int r; - char outbuf[OUTBUFSIZ]; - - r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, "OK"); - rspamd_dispatcher_write (task->dispatcher, outbuf, r, TRUE); - - show_url_header (task); - - msg_info ("process_message: msg ok, id: <%s>, %d urls extracted", task->message_id, g_list_length (task->urls)); - - return 0; -} - - static int write_process_reply (struct worker_task *task) { @@ -730,9 +691,6 @@ write_reply (struct worker_task *task) r = snprintf (outbuf, sizeof (outbuf), "%s 0 PONG" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER); rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE); break; - case CMD_URLS: - return write_urls_reply (task); - break; case CMD_OTHER: return task->custom_cmd->func (task); } diff --git a/src/protocol.h b/src/protocol.h index bd2ac5343..b4783cb30 100644 --- a/src/protocol.h +++ b/src/protocol.h @@ -37,7 +37,6 @@ enum rspamd_command { CMD_SKIP, CMD_PING, CMD_PROCESS, - CMD_URLS, CMD_OTHER, }; diff --git a/src/worker.c b/src/worker.c index 8a9dd9d3a..f6846f6f7 100644 --- a/src/worker.c +++ b/src/worker.c @@ -180,7 +180,7 @@ read_socket (f_str_t *in, void *arg) task->state = WRITE_ERROR; write_socket (task); } - if (task->cmd == CMD_URLS || task->cmd == CMD_OTHER) { + if (task->cmd == CMD_OTHER) { /* Skip filters */ task->state = WRITE_REPLY; write_socket (task); -- 2.39.5