Browse Source

* Improve logic of urls command to extract only those urls that would be checked against surbl lists

* Fix surbl whitelisting
* Fix bug with processing custom commands
* Update version to 0.2.1
tags/0.2.7
Vsevolod Stakhov 15 years ago
parent
commit
b29cec5f64
6 changed files with 141 additions and 85 deletions
  1. 1
    1
      CMakeLists.txt
  2. 136
    36
      src/plugins/surbl.c
  3. 0
    1
      src/plugins/surbl.h
  4. 3
    45
      src/protocol.c
  5. 0
    1
      src/protocol.h
  6. 1
    1
      src/worker.c

+ 1
- 1
CMakeLists.txt View File

@@ -7,7 +7,7 @@ PROJECT(rspamd C)

SET(RSPAMD_VERSION_MAJOR 0)
SET(RSPAMD_VERSION_MINOR 2)
SET(RSPAMD_VERSION_PATCH 0)
SET(RSPAMD_VERSION_PATCH 1)

SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}")
SET(RSPAMD_MASTER_SITE_URL "http://cebka.pp.ru/hg/rspamd")

+ 136
- 36
src/plugins/surbl.c View File

@@ -38,6 +38,15 @@ static struct surbl_ctx *surbl_module_ctx = NULL;
static int surbl_test_url (struct worker_task *task);
static void dns_callback (int result, char type, int count, int ttl, void *addresses, void *data);
static void process_dns_results (struct worker_task *task, struct suffix_item *suffix, char *url, uint32_t addr);
static int urls_command_handler (struct worker_task *task);

#define SURBL_ERROR surbl_error_quark ()
#define WHITELIST_ERROR 0
GQuark
surbl_error_quark (void)
{
return g_quark_from_static_string ("surbl-error-quark");
}

int
surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)
@@ -75,6 +84,8 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)

*ctx = (struct module_ctx *)surbl_module_ctx;

register_protocol_command ("urls", urls_command_handler);

return 0;
}

@@ -212,14 +223,24 @@ surbl_module_reconfig (struct config_file *cfg)
return surbl_module_config (cfg);
}



static char *
format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item *suffix, char **host_end)
format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item *suffix, char **host_end, gboolean append_suffix, GError **err)
{
GMatchInfo *info;
char *result = NULL;
int len, slen, r;
slen = strlen (suffix->suffix);
if (suffix != NULL) {
slen = strlen (suffix->suffix);
}
else if (!append_suffix) {
slen = 0;
}
else {
g_assert_not_reached ();
}
len = hostname->len + slen + 2;

/* First try to match numeric expression */
@@ -231,7 +252,25 @@ format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item
octet4 = g_match_info_fetch (info, 4);
result = memory_pool_alloc (pool, len);
msg_debug ("format_surbl_request: got numeric host for check: %s.%s.%s.%s", octet1, octet2, octet3, octet4);
r = snprintf (result, len, "%s.%s.%s.%s.%s", octet4, octet3, octet2, octet1, suffix->suffix);
r = snprintf (result, len, "%s.%s.%s.%s", octet4, octet3, octet2, octet1);
if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) {
g_free (octet1);
g_free (octet2);
g_free (octet3);
g_free (octet4);
g_match_info_free (info);
msg_debug ("format_surbl_request: url %s is whitelisted", result);
g_set_error (err,
SURBL_ERROR, /* error domain */
WHITELIST_ERROR, /* error code */
"URL is whitelisted: %s", /* error message format string */
result);

return NULL;
}
if (append_suffix) {
r += snprintf (result + r, len - r, ".%s", suffix->suffix);
}
*host_end = result + r - slen - 1;
g_free (octet1);
g_free (octet2);
@@ -259,7 +298,23 @@ format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item
hpart2 = g_match_info_fetch (info, 2);
hpart3 = g_match_info_fetch (info, 3);
msg_debug ("format_surbl_request: got hoster 3-d level domain %s.%s.%s", hpart1, hpart2, hpart3);
r = snprintf (result, len, "%s.%s.%s.%s", hpart1, hpart2, hpart3, suffix->suffix);
r = snprintf (result, len, "%s.%s.%s", hpart1, hpart2, hpart3);
if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) {
g_free (hpart1);
g_free (hpart2);
g_free (hpart3);
g_match_info_free (info);
msg_debug ("format_surbl_request: url %s is whitelisted", result);
g_set_error (err,
SURBL_ERROR, /* error domain */
WHITELIST_ERROR, /* error code */
"URL is whitelisted: %s", /* error message format string */
result);
return NULL;
}
if (append_suffix) {
r += snprintf (result + r, len - r, ".%s", suffix->suffix);
}
*host_end = result + r - slen - 1;
g_free (hpart1);
g_free (hpart2);
@@ -272,7 +327,20 @@ format_surbl_request (memory_pool_t *pool, f_str_t *hostname, struct suffix_item
return NULL;
}
else {
r = snprintf (result, len, "%s.%s.%s", part1, part2, suffix->suffix);
if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) {
g_free (part1);
g_free (part2);
msg_debug ("format_surbl_request: url %s is whitelisted", result);
g_set_error (err,
SURBL_ERROR, /* error domain */
WHITELIST_ERROR, /* error code */
"URL is whitelisted: %s", /* error message format string */
result);
return NULL;
}
if (append_suffix) {
r += snprintf (result + r, len - r, ".%s", suffix->suffix);
}
*host_end = result + r - slen - 1;
msg_debug ("format_surbl_request: got normal 2-d level domain %s.%s", part1, part2);
}
@@ -292,6 +360,7 @@ make_surbl_requests (struct uri* url, struct worker_task *task, GTree *tree)
char *surbl_req;
f_str_t f;
GList *cur;
GError *err = NULL;
struct dns_param *param;
struct suffix_item *suffix;
char *host_end;
@@ -302,7 +371,7 @@ make_surbl_requests (struct uri* url, struct worker_task *task, GTree *tree)

while (cur) {
suffix = (struct suffix_item *)cur->data;
if ((surbl_req = format_surbl_request (task->task_pool, &f, suffix, &host_end)) != NULL) {
if ((surbl_req = format_surbl_request (task->task_pool, &f, suffix, &host_end, TRUE, &err)) != NULL) {
if (g_tree_lookup (tree, surbl_req) == NULL) {
g_tree_insert (tree, surbl_req, surbl_req);
param = memory_pool_alloc (task->task_pool, sizeof (struct dns_param));
@@ -312,23 +381,16 @@ make_surbl_requests (struct uri* url, struct worker_task *task, GTree *tree)
*host_end = '\0';
param->host_resolve = memory_pool_strdup (task->task_pool, surbl_req);
*host_end = '.';
if (task->cmd == CMD_URLS) {
process_dns_results (task, suffix, param->host_resolve, 0);
/* Immideately break cycle */
break;
}
else {
msg_debug ("surbl_test_url: send surbl dns request %s", surbl_req);
evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param);
param->task->save.saved ++;
}
msg_debug ("surbl_test_url: send surbl dns request %s", surbl_req);
evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param);
param->task->save.saved ++;
}
else {
msg_debug ("make_surbl_requests: request %s is already sent", surbl_req);
}
}
else {
msg_info ("surbl_test_url: cannot format url string for surbl %s", struri (url));
else if (err != NULL && err->code != WHITELIST_ERROR) {
msg_info ("surbl_test_url: cannot format url string for surbl %s, %s", struri (url), err->message);
return;
}
cur = g_list_next (cur);
@@ -343,12 +405,6 @@ process_dns_results (struct worker_task *task, struct suffix_item *suffix, char
struct surbl_bit_item *bit;
int len, found = 0;
if (task->cmd == CMD_URLS) {
insert_result (task, surbl_module_ctx->metric, suffix->symbol, 1,
g_list_prepend (NULL, memory_pool_strdup (task->task_pool, url)));
return;
}

if ((c = strchr (suffix->symbol, '%')) != NULL && *(c + 1) == 'b') {
cur = g_list_first (surbl_module_ctx->bits);

@@ -656,22 +712,17 @@ tree_url_callback (gpointer key, gpointer value, void *data)

msg_debug ("surbl_test_url: check url %s", struri (url));

if (param->task->cmd == CMD_URLS) {
make_surbl_requests (url, param->task, param->tree);
if (surbl_module_ctx->use_redirector) {
register_redirector_call (url, param->task, param->tree);
param->task->save.saved++;
}
else {
if (surbl_module_ctx->use_redirector) {
register_redirector_call (url, param->task, param->tree);
if (param->task->worker->srv->cfg->memcached_servers_num > 0) {
register_memcached_call (url, param->task, param->tree);
param->task->save.saved++;
}
else {
if (param->task->worker->srv->cfg->memcached_servers_num > 0) {
register_memcached_call (url, param->task, param->tree);
param->task->save.saved++;
}
else {
make_surbl_requests (url, param->task, param->tree);
}
make_surbl_requests (url, param->task, param->tree);
}
}

@@ -715,6 +766,55 @@ surbl_test_url (struct worker_task *task)
return 0;
}

static int
urls_command_handler (struct worker_task *task)
{
GList *cur;
char outbuf[16384], *urlstr;
int r, num = 0;
struct uri *url;
GError *err = NULL;
GTree *url_tree;
f_str_t f;
char *host_end;

url_tree = g_tree_new ((GCompareFunc)g_ascii_strcasecmp);

r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, "OK");
r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "URLs: ");
cur = g_list_first (task->urls);

while (cur) {
num ++;
url = cur->data;
if (g_tree_lookup (url_tree, struri (url)) == NULL) {
g_tree_insert (url_tree, struri (url), url);
f.begin = url->host;
f.len = url->hostlen;
if ((urlstr = format_surbl_request (task->task_pool, &f, NULL, &host_end, FALSE, &err)) != NULL) {
if (g_list_next (cur) != NULL) {
r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s, ", (char *)urlstr);
}
else {
r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s", (char *)urlstr);
}
}
}
cur = g_list_next (cur);
}
outbuf[r++] = '\r'; outbuf[r++] = '\n';

rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE);
msg_info ("process_message: msg ok, id: <%s>, %d urls extracted", task->message_id, num);
g_tree_destroy (url_tree);

return 0;
}


/*
* vi:ts=4
*/

+ 0
- 1
src/plugins/surbl.h View File

@@ -16,7 +16,6 @@
#define DEFAULT_SURBL_SYMBOL "SURBL_DNS"
#define DEFAULT_SURBL_SUFFIX "multi.surbl.org"


struct surbl_ctx {
int (*header_filter)(struct worker_task *task);
int (*mime_filter)(struct worker_task *task);

+ 3
- 45
src/protocol.c View File

@@ -61,11 +61,6 @@
*/
#define MSG_CMD_PROCESS "process"

/*
* Only extract urls from message
*/
#define MSG_CMD_URLS "urls"

/*
* spamassassin greeting:
*/
@@ -177,17 +172,6 @@ parse_command (struct worker_task *task, f_str_t *line)
return -1;
}
break;
case 'u':
case 'U':
/* urls */
if (g_ascii_strcasecmp (token + 1, MSG_CMD_URLS + 1) == 0) {
task->cmd = CMD_URLS;
}
else {
msg_debug ("parse_command: bad command: %s", token);
return -1;
}
break;
default:
cur = custom_commands;
while (cur) {
@@ -197,6 +181,7 @@ parse_command (struct worker_task *task, f_str_t *line)
task->custom_cmd = cmd;
break;
}
cur = g_list_next (cur);
}

if (cur == NULL) {
@@ -445,12 +430,7 @@ metric_symbols_callback (gpointer key, gpointer value, void *user_data)
GList *cur;

if (s->options) {
if (task->cmd != CMD_URLS) {
r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s; ", (char *)key);
}
else {
r = snprintf (outbuf, OUTBUFSIZ, "Urls: ");
}
r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s; ", (char *)key);
cur = s->options;
while (cur) {
if (g_list_next (cur)) {
@@ -467,9 +447,7 @@ metric_symbols_callback (gpointer key, gpointer value, void *user_data)
outbuf[OUTBUFSIZ - 1] = '\n';
}
}
else if (task->cmd != CMD_URLS) {
r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s" CRLF, (char *)key);
}
r = snprintf (outbuf, OUTBUFSIZ, "Symbol: %s" CRLF, (char *)key);
cd->log_offset += snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset,
"%s,", (char *)key);

@@ -616,23 +594,6 @@ write_check_reply (struct worker_task *task)
return 0;
}

static int
write_urls_reply (struct worker_task *task)
{
int r;
char outbuf[OUTBUFSIZ];

r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, "OK");
rspamd_dispatcher_write (task->dispatcher, outbuf, r, TRUE);

show_url_header (task);

msg_info ("process_message: msg ok, id: <%s>, %d urls extracted", task->message_id, g_list_length (task->urls));

return 0;
}


static int
write_process_reply (struct worker_task *task)
{
@@ -730,9 +691,6 @@ write_reply (struct worker_task *task)
r = snprintf (outbuf, sizeof (outbuf), "%s 0 PONG" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER);
rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE);
break;
case CMD_URLS:
return write_urls_reply (task);
break;
case CMD_OTHER:
return task->custom_cmd->func (task);
}

+ 0
- 1
src/protocol.h View File

@@ -37,7 +37,6 @@ enum rspamd_command {
CMD_SKIP,
CMD_PING,
CMD_PROCESS,
CMD_URLS,
CMD_OTHER,
};


+ 1
- 1
src/worker.c View File

@@ -180,7 +180,7 @@ read_socket (f_str_t *in, void *arg)
task->state = WRITE_ERROR;
write_socket (task);
}
if (task->cmd == CMD_URLS || task->cmd == CMD_OTHER) {
if (task->cmd == CMD_OTHER) {
/* Skip filters */
task->state = WRITE_REPLY;
write_socket (task);

Loading…
Cancel
Save