From 58f19b06569c3ef46949874e2c3d5bb62ec54fc2 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 5 Jun 2009 19:19:44 +0400 Subject: [PATCH] * Add urls command for extracting urls --- CMakeLists.txt | 2 +- rspamc.pl.in | 2 +- src/plugins/surbl.c | 38 ++++++++++++++++++++------- src/protocol.c | 64 +++++++++++++++++++++++++++++++++++++++++++-- src/protocol.h | 1 + 5 files changed, 94 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5dbec8c7d..c41477181 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ PROJECT(rspamd C) SET(RSPAMD_VERSION_MAJOR 0) SET(RSPAMD_VERSION_MINOR 1) -SET(RSPAMD_VERSION_PATCH 5) +SET(RSPAMD_VERSION_PATCH 6) SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}") SET(RSPAMD_MASTER_SITE_URL "http://cebka.pp.ru/hg/rspamd") diff --git a/rspamc.pl.in b/rspamc.pl.in index 9d5712117..d9e5def14 100755 --- a/rspamc.pl.in +++ b/rspamc.pl.in @@ -237,7 +237,7 @@ if (defined ($args{p})) { $cfg{'port'} = $args{p}; } -if ($cmd =~ /(SYMBOLS|SCAN|PROCESS|CHECK|REPORT_IFSPAM|REPORT)/i) { +if ($cmd =~ /(SYMBOLS|SCAN|PROCESS|CHECK|REPORT_IFSPAM|REPORT|URLS)/i) { $cfg{'command'} = $1; $cfg{'control'} = 0; } diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index eb7ed2f20..020bf7764 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -37,6 +37,7 @@ static struct surbl_ctx *surbl_module_ctx = NULL; static int surbl_test_url (struct worker_task *task); static void dns_callback (int result, char type, int count, int ttl, void *addresses, void *data); +static void process_dns_results (struct worker_task *task, struct suffix_item *suffix, char *url, uint32_t addr); int surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) @@ -311,9 +312,16 @@ make_surbl_requests (struct uri* url, struct worker_task *task, GTree *tree) *host_end = '\0'; param->host_resolve = memory_pool_strdup (task->task_pool, surbl_req); *host_end = '.'; - msg_debug ("surbl_test_url: send surbl dns request %s", surbl_req); - evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param); - param->task->save.saved ++; + if (task->cmd == CMD_URLS) { + process_dns_results (task, suffix, param->host_resolve, 0); + /* Immideately break cycle */ + break; + } + else { + msg_debug ("surbl_test_url: send surbl dns request %s", surbl_req); + evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param); + param->task->save.saved ++; + } } else { msg_debug ("make_surbl_requests: request %s is already sent", surbl_req); @@ -334,6 +342,12 @@ process_dns_results (struct worker_task *task, struct suffix_item *suffix, char GList *cur; struct surbl_bit_item *bit; int len, found = 0; + + if (task->cmd == CMD_URLS) { + insert_result (task, surbl_module_ctx->metric, suffix->symbol, 1, + g_list_prepend (NULL, memory_pool_strdup (task->task_pool, url))); + return; + } if ((c = strchr (suffix->symbol, '%')) != NULL && *(c + 1) == 'b') { cur = g_list_first (surbl_module_ctx->bits); @@ -639,17 +653,23 @@ tree_url_callback (gpointer key, gpointer value, void *data) struct uri *url = value; msg_debug ("surbl_test_url: check url %s", struri (url)); - if (surbl_module_ctx->use_redirector) { - register_redirector_call (url, param->task, param->tree); - param->task->save.saved++; + + if (param->task->cmd == CMD_URLS) { + make_surbl_requests (url, param->task, param->tree); } else { - if (param->task->worker->srv->cfg->memcached_servers_num > 0) { - register_memcached_call (url, param->task, param->tree); + if (surbl_module_ctx->use_redirector) { + register_redirector_call (url, param->task, param->tree); param->task->save.saved++; } else { - make_surbl_requests (url, param->task, param->tree); + if (param->task->worker->srv->cfg->memcached_servers_num > 0) { + register_memcached_call (url, param->task, param->tree); + param->task->save.saved++; + } + else { + make_surbl_requests (url, param->task, param->tree); + } } } diff --git a/src/protocol.c b/src/protocol.c index c551bb783..0209adb0c 100644 --- a/src/protocol.c +++ b/src/protocol.c @@ -61,6 +61,11 @@ */ #define MSG_CMD_PROCESS "process" +/* + * Only extract urls from message + */ +#define MSG_CMD_URLS "urls" + /* * spamassassin greeting: */ @@ -176,6 +181,17 @@ parse_command (struct worker_task *task, f_str_t *line) return -1; } break; + case 'u': + case 'U': + /* urls */ + if (strcasecmp (token + 1, MSG_CMD_URLS + 1) == 0) { + task->cmd = CMD_URLS; + } + else { + msg_debug ("parse_command: bad command: %s", token); + return -1; + } + break; default: msg_debug ("parse_command: bad command: %s", token); return -1; @@ -419,7 +435,12 @@ metric_symbols_callback (gpointer key, gpointer value, void *user_data) GList *cur; if (s->options) { - r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s; ", (char *)key); + if (task->cmd != CMD_URLS) { + r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s; ", (char *)key); + } + else { + r = snprintf (outbuf, OUTBUFSIZ, "Urls: "); + } cur = s->options; while (cur) { if (g_list_next (cur)) { @@ -436,7 +457,7 @@ metric_symbols_callback (gpointer key, gpointer value, void *user_data) outbuf[OUTBUFSIZ - 1] = '\n'; } } - else { + else if (task->cmd != CMD_URLS) { r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s" CRLF, (char *)key); } cd->log_offset += snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, @@ -585,6 +606,42 @@ write_check_reply (struct worker_task *task) return 0; } +static int +write_urls_reply (struct worker_task *task) +{ + int r; + char outbuf[OUTBUFSIZ], logbuf[OUTBUFSIZ]; + struct metric_result *metric_res; + struct metric_callback_data cd; + + r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, "OK"); + rspamd_dispatcher_write (task->dispatcher, outbuf, r, TRUE); + + cd.task = task; + cd.log_buf = logbuf; + cd.log_offset = snprintf (logbuf, sizeof (logbuf), "process_message: msg ok, id: <%s>, ", task->message_id); + cd.log_size = sizeof (logbuf); + + /* Ignore metrics, just write report for 'default' metric */ + metric_res = g_hash_table_lookup (task->results, "default"); + if (metric_res == NULL) { + /* Implicit metric result */ + show_metric_result (NULL, NULL, (void *)&cd); + } + else { + g_hash_table_foreach (metric_res->symbols, metric_symbols_callback, &cd); + /* Remove last , from log buf */ + if (cd.log_buf[cd.log_offset - 1] == ',') { + cd.log_buf[--cd.log_offset] = '\0'; + } + } + msg_info ("%s", logbuf); + rspamd_dispatcher_write (task->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE); + + return 0; +} + + static int write_process_reply (struct worker_task *task) { @@ -682,6 +739,9 @@ write_reply (struct worker_task *task) r = snprintf (outbuf, sizeof (outbuf), "%s 0 PONG" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER); rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE); break; + case CMD_URLS: + return write_urls_reply (task); + break; } } diff --git a/src/protocol.h b/src/protocol.h index 74e7f7f98..ed00edc26 100644 --- a/src/protocol.h +++ b/src/protocol.h @@ -28,6 +28,7 @@ enum rspamd_command { CMD_SKIP, CMD_PING, CMD_PROCESS, + CMD_URLS, }; /** -- 2.39.5