diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-06-15 19:42:28 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-06-15 19:42:28 +0400 |
commit | c76bd2e9ab2506ef06cc291c3e4b6b57ae08ea65 (patch) | |
tree | deb00c3ceb9659a1d8b80845a7218b93cea4cbab /src/plugins | |
parent | c9d11a65b5c801a27f154091aebe86cbd08fd319 (diff) | |
download | rspamd-c76bd2e9ab2506ef06cc291c3e4b6b57ae08ea65.tar.gz rspamd-c76bd2e9ab2506ef06cc291c3e4b6b57ae08ea65.zip |
* Add module for blacklisting emails (self documented in sample config)
* Add command 'emails' for extracting emails from a message
* Rework protocol layout to allow expanding rspamd protocol by custom commands that can be added from anywhere in code
* Allow rspamc to work without strictly parameter 'command'. Command by default is 'symbols'.
* Update version to 0.1.8
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/emails.c | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/src/plugins/emails.c b/src/plugins/emails.c new file mode 100644 index 000000000..2e2dcac63 --- /dev/null +++ b/src/plugins/emails.c @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2009, Rambler media + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/***MODULE:email + * rspamd module that extracts emails from messages and check them via blacklist + */ + +#include "../config.h" +#include "../main.h" +#include "../message.h" +#include "../modules.h" +#include "../cfg_file.h" +#include "../expressions.h" +#include "../util.h" + +#define DEFAULT_SYMBOL "R_BAD_EMAIL" + +static const char *email_re_text = "[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+(?:[A-Z]{2}|com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\\b"; + +struct email_ctx { + int (*header_filter)(struct worker_task *task); + int (*mime_filter)(struct worker_task *task); + int (*message_filter)(struct worker_task *task); + int (*url_filter)(struct worker_task *task); + char *metric; + char *symbol; + GRegex *email_re; + + GHashTable *blacklist; + char *blacklist_file; + + memory_pool_t *email_pool; +}; + +static struct email_ctx *email_module_ctx = NULL; + +static int emails_mime_filter (struct worker_task *task); +static int emails_command_handler (struct worker_task *task); + +int +emails_module_init (struct config_file *cfg, struct module_ctx **ctx) +{ + GError *err = NULL; + + email_module_ctx = g_malloc (sizeof (struct email_ctx)); + + email_module_ctx->header_filter = NULL; + email_module_ctx->mime_filter = emails_mime_filter; + email_module_ctx->message_filter = NULL; + email_module_ctx->url_filter = NULL; + email_module_ctx->email_pool = memory_pool_new (memory_pool_get_size ()); + email_module_ctx->email_re = g_regex_new (email_re_text, G_REGEX_RAW | G_REGEX_OPTIMIZE | G_REGEX_CASELESS, 0, &err); + email_module_ctx->blacklist = g_hash_table_new (g_str_hash, g_str_equal); + + *ctx = (struct module_ctx *)email_module_ctx; + + register_protocol_command ("emails", emails_command_handler); + + return 0; +} + + +int +emails_module_config (struct config_file *cfg) +{ + char *value; + int res = TRUE; + + if ((value = get_module_opt (cfg, "emails", "metric")) != NULL) { + email_module_ctx->metric = memory_pool_strdup (email_module_ctx->email_pool, value); + g_free (value); + } + else { + email_module_ctx->metric = DEFAULT_METRIC; + } + if ((value = get_module_opt (cfg, "emails", "symbol")) != NULL) { + email_module_ctx->symbol = memory_pool_strdup (email_module_ctx->email_pool, value); + g_free (value); + } + else { + email_module_ctx->symbol = DEFAULT_SYMBOL; + } + if ((value = get_module_opt (cfg, "emails", "blacklist")) != NULL) { + if (g_ascii_strncasecmp (value, "file://", sizeof ("file://") - 1) == 0) { + if (parse_host_list (email_module_ctx->email_pool, email_module_ctx->blacklist, value + sizeof ("file://") - 1)) { + email_module_ctx->blacklist_file = memory_pool_strdup (email_module_ctx->email_pool, value + sizeof ("file://") - 1); + } + } + } + return res; +} + +int +emails_module_reconfig (struct config_file *cfg) +{ + memory_pool_delete (email_module_ctx->email_pool); + email_module_ctx->email_pool = memory_pool_new (memory_pool_get_size ()); + + return emails_module_config (cfg); +} + +static GList * +extract_emails (struct worker_task *task) +{ + GList *res = NULL, *cur; + GMatchInfo *info; + GError *err = NULL; + struct mime_text_part *part; + char *email_str; + int rc; + + cur = g_list_first (task->text_parts); + while (cur) { + part = cur->data; + + rc = g_regex_match_full (email_module_ctx->email_re, (const char *)part->orig->data, part->orig->len, 0, 0, &info, &err); + if (rc) { + while (g_match_info_matches (info)) { + email_str = g_match_info_fetch (info, 0); + if (email_str != NULL) { + res = g_list_prepend (res, email_str); + memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, email_str); + } + /* Get next match */ + g_match_info_next (info, &err); + } + } + else if (err != NULL) { + msg_debug ("extract_emails: error matching regexp: %s", err->message); + } + else { + msg_debug ("extract_emails: cannot find url pattern in given string"); + } + g_match_info_free (info); + + cur = g_list_next (cur); + } + if (res != NULL) { + memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, res); + } + + return res; +} + +static int +emails_command_handler (struct worker_task *task) +{ + GList *emails, *cur; + char outbuf[BUFSIZ]; + int r, num = 0; + + emails = extract_emails (task); + + r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, "OK"); + + r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "Emails: "); + + cur = g_list_first (emails); + + while (cur) { + num ++; + if (g_list_next (cur) != NULL) { + r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s, ", (char *)cur->data); + } + else { + r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s", (char *)cur->data); + } + cur = g_list_next (cur); + } + + outbuf[r++] = '\r'; outbuf[r++] = '\n'; + + rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE); + msg_info ("process_message: msg ok, id: <%s>, %d emails extracted", task->message_id, num); + + return 0; +} + +static int +emails_mime_filter (struct worker_task *task) +{ + GList *emails, *cur; + + emails = extract_emails (task); + + if (email_module_ctx->blacklist && emails) { + cur = g_list_first (emails); + + while (cur) { + if (g_hash_table_lookup (email_module_ctx->blacklist, cur->data) != NULL) { + insert_result (task, email_module_ctx->metric, email_module_ctx->symbol, 1, + g_list_prepend (NULL, memory_pool_strdup (task->task_pool, (char *)cur->data))); + + } + cur = g_list_next (cur); + } + } + + return 0; +} + |