From: Vsevolod Stakhov Date: Mon, 15 Jun 2009 15:42:28 +0000 (+0400) Subject: * Add module for blacklisting emails (self documented in sample config) X-Git-Tag: 0.2.7~123 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=c76bd2e9ab2506ef06cc291c3e4b6b57ae08ea65;p=rspamd.git * Add module for blacklisting emails (self documented in sample config) * Add command 'emails' for extracting emails from a message * Rework protocol layout to allow expanding rspamd protocol by custom commands that can be added from anywhere in code * Allow rspamc to work without strictly parameter 'command'. Command by default is 'symbols'. * Update version to 0.1.8 --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 88b649a4b..1a4a0b042 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ PROJECT(rspamd C) SET(RSPAMD_VERSION_MAJOR 0) SET(RSPAMD_VERSION_MINOR 1) -SET(RSPAMD_VERSION_PATCH 7) +SET(RSPAMD_VERSION_PATCH 8) SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}") SET(RSPAMD_MASTER_SITE_URL "http://cebka.pp.ru/hg/rspamd") @@ -323,7 +323,8 @@ SET(CLASSIFIERSSRC src/classifiers/classifiers.c SET(PLUGINSSRC src/plugins/surbl.c src/plugins/regexp.c - src/plugins/chartable.c) + src/plugins/chartable.c + src/plugins/emails.c) SET(TESTSRC test/rspamd_expression_test.c test/rspamd_memcached_test.c diff --git a/rspamc.pl.in b/rspamc.pl.in index d9e5def14..a1f667511 100755 --- a/rspamc.pl.in +++ b/rspamc.pl.in @@ -203,8 +203,7 @@ my $cmd = shift; my $do_parse_config = 1; if (!defined ($cmd) || $cmd eq '') { - HELP_MESSAGE(); - exit; + $cmd = 'SYMBOLS'; } if (defined ($args{c})) { @@ -237,7 +236,7 @@ if (defined ($args{p})) { $cfg{'port'} = $args{p}; } -if ($cmd =~ /(SYMBOLS|SCAN|PROCESS|CHECK|REPORT_IFSPAM|REPORT|URLS)/i) { +if ($cmd =~ /(SYMBOLS|SCAN|PROCESS|CHECK|REPORT_IFSPAM|REPORT|URLS|EMAILS)/i) { $cfg{'command'} = $1; $cfg{'control'} = 0; } diff --git a/rspamd.conf.sample b/rspamd.conf.sample index 5184aef09..2b70cbd7c 100644 --- a/rspamd.conf.sample +++ b/rspamd.conf.sample @@ -146,6 +146,12 @@ $subject_blah = "Subject=/blah/H"; threshold = "0.1"; }; +.module 'emails' { + metric = "default"; + symbold = "R_BAD_EMAIL"; + blacklist = "file:///some/path/emails.lst"; +}; + # If enables threat each regexp as raw regex and do not try to convert # each text part to utf8 encoding. Save a lot of resources but less # portable. @@ -154,4 +160,4 @@ raw_mode = yes; url_filters = "surbl"; header_filters = "regexp"; -mime_filters = "chartable"; +mime_filters = "chartable,emails"; diff --git a/src/main.h b/src/main.h index 924fe13ed..c80efc494 100644 --- a/src/main.h +++ b/src/main.h @@ -166,6 +166,7 @@ struct worker_task { size_t content_length; /**< length of user's input */ enum rspamd_protocol proto; /**< protocol (rspamc or spamc) */ enum rspamd_command cmd; /**< command */ + struct custom_command *custom_cmd; /**< custom command if any */ int sock; /**< socket descriptor */ char *helo; /**< helo header value */ char *from; /**< from header value */ diff --git a/src/plugins/emails.c b/src/plugins/emails.c new file mode 100644 index 000000000..2e2dcac63 --- /dev/null +++ b/src/plugins/emails.c @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2009, Rambler media + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/***MODULE:email + * rspamd module that extracts emails from messages and check them via blacklist + */ + +#include "../config.h" +#include "../main.h" +#include "../message.h" +#include "../modules.h" +#include "../cfg_file.h" +#include "../expressions.h" +#include "../util.h" + +#define DEFAULT_SYMBOL "R_BAD_EMAIL" + +static const char *email_re_text = "[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+(?:[A-Z]{2}|com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\\b"; + +struct email_ctx { + int (*header_filter)(struct worker_task *task); + int (*mime_filter)(struct worker_task *task); + int (*message_filter)(struct worker_task *task); + int (*url_filter)(struct worker_task *task); + char *metric; + char *symbol; + GRegex *email_re; + + GHashTable *blacklist; + char *blacklist_file; + + memory_pool_t *email_pool; +}; + +static struct email_ctx *email_module_ctx = NULL; + +static int emails_mime_filter (struct worker_task *task); +static int emails_command_handler (struct worker_task *task); + +int +emails_module_init (struct config_file *cfg, struct module_ctx **ctx) +{ + GError *err = NULL; + + email_module_ctx = g_malloc (sizeof (struct email_ctx)); + + email_module_ctx->header_filter = NULL; + email_module_ctx->mime_filter = emails_mime_filter; + email_module_ctx->message_filter = NULL; + email_module_ctx->url_filter = NULL; + email_module_ctx->email_pool = memory_pool_new (memory_pool_get_size ()); + email_module_ctx->email_re = g_regex_new (email_re_text, G_REGEX_RAW | G_REGEX_OPTIMIZE | G_REGEX_CASELESS, 0, &err); + email_module_ctx->blacklist = g_hash_table_new (g_str_hash, g_str_equal); + + *ctx = (struct module_ctx *)email_module_ctx; + + register_protocol_command ("emails", emails_command_handler); + + return 0; +} + + +int +emails_module_config (struct config_file *cfg) +{ + char *value; + int res = TRUE; + + if ((value = get_module_opt (cfg, "emails", "metric")) != NULL) { + email_module_ctx->metric = memory_pool_strdup (email_module_ctx->email_pool, value); + g_free (value); + } + else { + email_module_ctx->metric = DEFAULT_METRIC; + } + if ((value = get_module_opt (cfg, "emails", "symbol")) != NULL) { + email_module_ctx->symbol = memory_pool_strdup (email_module_ctx->email_pool, value); + g_free (value); + } + else { + email_module_ctx->symbol = DEFAULT_SYMBOL; + } + if ((value = get_module_opt (cfg, "emails", "blacklist")) != NULL) { + if (g_ascii_strncasecmp (value, "file://", sizeof ("file://") - 1) == 0) { + if (parse_host_list (email_module_ctx->email_pool, email_module_ctx->blacklist, value + sizeof ("file://") - 1)) { + email_module_ctx->blacklist_file = memory_pool_strdup (email_module_ctx->email_pool, value + sizeof ("file://") - 1); + } + } + } + return res; +} + +int +emails_module_reconfig (struct config_file *cfg) +{ + memory_pool_delete (email_module_ctx->email_pool); + email_module_ctx->email_pool = memory_pool_new (memory_pool_get_size ()); + + return emails_module_config (cfg); +} + +static GList * +extract_emails (struct worker_task *task) +{ + GList *res = NULL, *cur; + GMatchInfo *info; + GError *err = NULL; + struct mime_text_part *part; + char *email_str; + int rc; + + cur = g_list_first (task->text_parts); + while (cur) { + part = cur->data; + + rc = g_regex_match_full (email_module_ctx->email_re, (const char *)part->orig->data, part->orig->len, 0, 0, &info, &err); + if (rc) { + while (g_match_info_matches (info)) { + email_str = g_match_info_fetch (info, 0); + if (email_str != NULL) { + res = g_list_prepend (res, email_str); + memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, email_str); + } + /* Get next match */ + g_match_info_next (info, &err); + } + } + else if (err != NULL) { + msg_debug ("extract_emails: error matching regexp: %s", err->message); + } + else { + msg_debug ("extract_emails: cannot find url pattern in given string"); + } + g_match_info_free (info); + + cur = g_list_next (cur); + } + if (res != NULL) { + memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, res); + } + + return res; +} + +static int +emails_command_handler (struct worker_task *task) +{ + GList *emails, *cur; + char outbuf[BUFSIZ]; + int r, num = 0; + + emails = extract_emails (task); + + r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, "OK"); + + r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "Emails: "); + + cur = g_list_first (emails); + + while (cur) { + num ++; + if (g_list_next (cur) != NULL) { + r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s, ", (char *)cur->data); + } + else { + r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s", (char *)cur->data); + } + cur = g_list_next (cur); + } + + outbuf[r++] = '\r'; outbuf[r++] = '\n'; + + rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE); + msg_info ("process_message: msg ok, id: <%s>, %d emails extracted", task->message_id, num); + + return 0; +} + +static int +emails_mime_filter (struct worker_task *task) +{ + GList *emails, *cur; + + emails = extract_emails (task); + + if (email_module_ctx->blacklist && emails) { + cur = g_list_first (emails); + + while (cur) { + if (g_hash_table_lookup (email_module_ctx->blacklist, cur->data) != NULL) { + insert_result (task, email_module_ctx->metric, email_module_ctx->symbol, 1, + g_list_prepend (NULL, memory_pool_strdup (task->task_pool, (char *)cur->data))); + + } + cur = g_list_next (cur); + } + } + + return 0; +} + diff --git a/src/protocol.c b/src/protocol.c index 0209adb0c..7f6c98295 100644 --- a/src/protocol.c +++ b/src/protocol.c @@ -86,14 +86,8 @@ #define QUEUE_ID_HEADER "Queue-ID" #define ERROR_HEADER "Error" #define USER_HEADER "User" -/* - * Reply messages - */ -#define RSPAMD_REPLY_BANNER "RSPAMD/1.0" -#define SPAMD_REPLY_BANNER "SPAMD/1.1" -#define SPAMD_OK "EX_OK" -/* XXX: try to convert rspamd errors to spamd errors */ -#define SPAMD_ERROR "EX_ERROR" + +static GList *custom_commands = NULL; static char * separate_command (f_str_t *in, char c) @@ -120,6 +114,8 @@ static int parse_command (struct worker_task *task, f_str_t *line) { char *token; + struct custom_command *cmd; + GList *cur; token = separate_command (line, ' '); if (line == NULL || token == NULL) { @@ -131,7 +127,7 @@ parse_command (struct worker_task *task, f_str_t *line) case 'c': case 'C': /* check */ - if (strcasecmp (token + 1, MSG_CMD_CHECK + 1) == 0) { + if (g_ascii_strcasecmp (token + 1, MSG_CMD_CHECK + 1) == 0) { task->cmd = CMD_CHECK; } else { @@ -142,10 +138,10 @@ parse_command (struct worker_task *task, f_str_t *line) case 's': case 'S': /* symbols, skip */ - if (strcasecmp (token + 1, MSG_CMD_SYMBOLS + 1) == 0) { + if (g_ascii_strcasecmp (token + 1, MSG_CMD_SYMBOLS + 1) == 0) { task->cmd = CMD_SYMBOLS; } - else if (strcasecmp (token + 1, MSG_CMD_SKIP + 1) == 0) { + else if (g_ascii_strcasecmp (token + 1, MSG_CMD_SKIP + 1) == 0) { task->cmd = CMD_SKIP; } else { @@ -156,10 +152,10 @@ parse_command (struct worker_task *task, f_str_t *line) case 'p': case 'P': /* ping, process */ - if (strcasecmp (token + 1, MSG_CMD_PING + 1) == 0) { + if (g_ascii_strcasecmp (token + 1, MSG_CMD_PING + 1) == 0) { task->cmd = CMD_PING; } - else if (strcasecmp (token + 1, MSG_CMD_PROCESS + 1) == 0) { + else if (g_ascii_strcasecmp (token + 1, MSG_CMD_PROCESS + 1) == 0) { task->cmd = CMD_PROCESS; } else { @@ -170,10 +166,10 @@ parse_command (struct worker_task *task, f_str_t *line) case 'r': case 'R': /* report, report_ifspam */ - if (strcasecmp (token + 1, MSG_CMD_REPORT + 1) == 0) { + if (g_ascii_strcasecmp (token + 1, MSG_CMD_REPORT + 1) == 0) { task->cmd = CMD_REPORT; } - else if (strcasecmp (token + 1, MSG_CMD_REPORT_IFSPAM + 1) == 0) { + else if (g_ascii_strcasecmp (token + 1, MSG_CMD_REPORT_IFSPAM + 1) == 0) { task->cmd = CMD_REPORT_IFSPAM; } else { @@ -184,7 +180,7 @@ parse_command (struct worker_task *task, f_str_t *line) case 'u': case 'U': /* urls */ - if (strcasecmp (token + 1, MSG_CMD_URLS + 1) == 0) { + if (g_ascii_strcasecmp (token + 1, MSG_CMD_URLS + 1) == 0) { task->cmd = CMD_URLS; } else { @@ -193,8 +189,21 @@ parse_command (struct worker_task *task, f_str_t *line) } break; default: - msg_debug ("parse_command: bad command: %s", token); - return -1; + cur = custom_commands; + while (cur) { + cmd = cur->data; + if (g_ascii_strcasecmp (token, cmd->name) == 0) { + task->cmd = CMD_OTHER; + task->custom_cmd = cmd; + break; + } + } + + if (cur == NULL) { + msg_debug ("parse_command: bad command: %s", token); + return -1; + } + break; } if (strncasecmp (line->begin, RSPAMC_GREETING, sizeof (RSPAMC_GREETING) - 1) == 0) { @@ -742,8 +751,22 @@ write_reply (struct worker_task *task) case CMD_URLS: return write_urls_reply (task); break; + case CMD_OTHER: + return task->custom_cmd->func (task); } } return 0; } + +void +register_protocol_command (const char *name, protocol_reply_func func) +{ + struct custom_command *cmd; + + cmd = g_malloc (sizeof (struct custom_command)); + cmd->name = name; + cmd->func = func; + + custom_commands = g_list_prepend (custom_commands, cmd); +} diff --git a/src/protocol.h b/src/protocol.h index ed00edc26..bd2ac5343 100644 --- a/src/protocol.h +++ b/src/protocol.h @@ -13,6 +13,15 @@ #define RSPAMD_PROTOCOL_ERROR 3 #define RSPAMD_LENGTH_ERROR 4 +/* + * Reply messages + */ +#define RSPAMD_REPLY_BANNER "RSPAMD/1.0" +#define SPAMD_REPLY_BANNER "SPAMD/1.1" +#define SPAMD_OK "EX_OK" +/* XXX: try to convert rspamd errors to spamd errors */ +#define SPAMD_ERROR "EX_ERROR" + struct worker_task; enum rspamd_protocol { @@ -29,6 +38,15 @@ enum rspamd_command { CMD_PING, CMD_PROCESS, CMD_URLS, + CMD_OTHER, +}; + + +typedef int (*protocol_reply_func)(struct worker_task *task); + +struct custom_command { + const char *name; + protocol_reply_func func; }; /** @@ -46,4 +64,12 @@ int read_rspamd_input_line (struct worker_task *task, f_str_t *line); */ int write_reply (struct worker_task *task); + +/** + * Register custom fucntion to extend protocol + * @param name symbolic name of custom function + * @param func callback function for writing reply + */ +void register_protocol_command (const char *name, protocol_reply_func func); + #endif diff --git a/src/worker.c b/src/worker.c index 99c311e1f..2da9383e9 100644 --- a/src/worker.c +++ b/src/worker.c @@ -180,6 +180,12 @@ read_socket (f_str_t *in, void *arg) task->state = WRITE_ERROR; write_socket (task); } + if (task->cmd == CMD_URLS || task->cmd == CMD_OTHER) { + /* Skip filters */ + task->state = WRITE_REPLY; + write_socket (task); + return; + } r = process_filters (task); if (r == -1) { task->last_error = "Filter processing error";