aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-06-15 19:42:28 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-06-15 19:42:28 +0400
commitc76bd2e9ab2506ef06cc291c3e4b6b57ae08ea65 (patch)
treedeb00c3ceb9659a1d8b80845a7218b93cea4cbab
parentc9d11a65b5c801a27f154091aebe86cbd08fd319 (diff)
downloadrspamd-c76bd2e9ab2506ef06cc291c3e4b6b57ae08ea65.tar.gz
rspamd-c76bd2e9ab2506ef06cc291c3e4b6b57ae08ea65.zip
* Add module for blacklisting emails (self documented in sample config)
* Add command 'emails' for extracting emails from a message * Rework protocol layout to allow expanding rspamd protocol by custom commands that can be added from anywhere in code * Allow rspamc to work without strictly parameter 'command'. Command by default is 'symbols'. * Update version to 0.1.8
-rw-r--r--CMakeLists.txt5
-rwxr-xr-xrspamc.pl.in5
-rw-r--r--rspamd.conf.sample8
-rw-r--r--src/main.h1
-rw-r--r--src/plugins/emails.c222
-rw-r--r--src/protocol.c59
-rw-r--r--src/protocol.h26
-rw-r--r--src/worker.c6
8 files changed, 308 insertions, 24 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 88b649a4b..1a4a0b042 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,7 +7,7 @@ PROJECT(rspamd C)
SET(RSPAMD_VERSION_MAJOR 0)
SET(RSPAMD_VERSION_MINOR 1)
-SET(RSPAMD_VERSION_PATCH 7)
+SET(RSPAMD_VERSION_PATCH 8)
SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}")
SET(RSPAMD_MASTER_SITE_URL "http://cebka.pp.ru/hg/rspamd")
@@ -323,7 +323,8 @@ SET(CLASSIFIERSSRC src/classifiers/classifiers.c
SET(PLUGINSSRC src/plugins/surbl.c
src/plugins/regexp.c
- src/plugins/chartable.c)
+ src/plugins/chartable.c
+ src/plugins/emails.c)
SET(TESTSRC test/rspamd_expression_test.c
test/rspamd_memcached_test.c
diff --git a/rspamc.pl.in b/rspamc.pl.in
index d9e5def14..a1f667511 100755
--- a/rspamc.pl.in
+++ b/rspamc.pl.in
@@ -203,8 +203,7 @@ my $cmd = shift;
my $do_parse_config = 1;
if (!defined ($cmd) || $cmd eq '') {
- HELP_MESSAGE();
- exit;
+ $cmd = 'SYMBOLS';
}
if (defined ($args{c})) {
@@ -237,7 +236,7 @@ if (defined ($args{p})) {
$cfg{'port'} = $args{p};
}
-if ($cmd =~ /(SYMBOLS|SCAN|PROCESS|CHECK|REPORT_IFSPAM|REPORT|URLS)/i) {
+if ($cmd =~ /(SYMBOLS|SCAN|PROCESS|CHECK|REPORT_IFSPAM|REPORT|URLS|EMAILS)/i) {
$cfg{'command'} = $1;
$cfg{'control'} = 0;
}
diff --git a/rspamd.conf.sample b/rspamd.conf.sample
index 5184aef09..2b70cbd7c 100644
--- a/rspamd.conf.sample
+++ b/rspamd.conf.sample
@@ -146,6 +146,12 @@ $subject_blah = "Subject=/blah/H";
threshold = "0.1";
};
+.module 'emails' {
+ metric = "default";
+ symbold = "R_BAD_EMAIL";
+ blacklist = "file:///some/path/emails.lst";
+};
+
# If enables threat each regexp as raw regex and do not try to convert
# each text part to utf8 encoding. Save a lot of resources but less
# portable.
@@ -154,4 +160,4 @@ raw_mode = yes;
url_filters = "surbl";
header_filters = "regexp";
-mime_filters = "chartable";
+mime_filters = "chartable,emails";
diff --git a/src/main.h b/src/main.h
index 924fe13ed..c80efc494 100644
--- a/src/main.h
+++ b/src/main.h
@@ -166,6 +166,7 @@ struct worker_task {
size_t content_length; /**< length of user's input */
enum rspamd_protocol proto; /**< protocol (rspamc or spamc) */
enum rspamd_command cmd; /**< command */
+ struct custom_command *custom_cmd; /**< custom command if any */
int sock; /**< socket descriptor */
char *helo; /**< helo header value */
char *from; /**< from header value */
diff --git a/src/plugins/emails.c b/src/plugins/emails.c
new file mode 100644
index 000000000..2e2dcac63
--- /dev/null
+++ b/src/plugins/emails.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2009, Rambler media
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/***MODULE:email
+ * rspamd module that extracts emails from messages and check them via blacklist
+ */
+
+#include "../config.h"
+#include "../main.h"
+#include "../message.h"
+#include "../modules.h"
+#include "../cfg_file.h"
+#include "../expressions.h"
+#include "../util.h"
+
+#define DEFAULT_SYMBOL "R_BAD_EMAIL"
+
+static const char *email_re_text = "[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+(?:[A-Z]{2}|com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\\b";
+
+struct email_ctx {
+ int (*header_filter)(struct worker_task *task);
+ int (*mime_filter)(struct worker_task *task);
+ int (*message_filter)(struct worker_task *task);
+ int (*url_filter)(struct worker_task *task);
+ char *metric;
+ char *symbol;
+ GRegex *email_re;
+
+ GHashTable *blacklist;
+ char *blacklist_file;
+
+ memory_pool_t *email_pool;
+};
+
+static struct email_ctx *email_module_ctx = NULL;
+
+static int emails_mime_filter (struct worker_task *task);
+static int emails_command_handler (struct worker_task *task);
+
+int
+emails_module_init (struct config_file *cfg, struct module_ctx **ctx)
+{
+ GError *err = NULL;
+
+ email_module_ctx = g_malloc (sizeof (struct email_ctx));
+
+ email_module_ctx->header_filter = NULL;
+ email_module_ctx->mime_filter = emails_mime_filter;
+ email_module_ctx->message_filter = NULL;
+ email_module_ctx->url_filter = NULL;
+ email_module_ctx->email_pool = memory_pool_new (memory_pool_get_size ());
+ email_module_ctx->email_re = g_regex_new (email_re_text, G_REGEX_RAW | G_REGEX_OPTIMIZE | G_REGEX_CASELESS, 0, &err);
+ email_module_ctx->blacklist = g_hash_table_new (g_str_hash, g_str_equal);
+
+ *ctx = (struct module_ctx *)email_module_ctx;
+
+ register_protocol_command ("emails", emails_command_handler);
+
+ return 0;
+}
+
+
+int
+emails_module_config (struct config_file *cfg)
+{
+ char *value;
+ int res = TRUE;
+
+ if ((value = get_module_opt (cfg, "emails", "metric")) != NULL) {
+ email_module_ctx->metric = memory_pool_strdup (email_module_ctx->email_pool, value);
+ g_free (value);
+ }
+ else {
+ email_module_ctx->metric = DEFAULT_METRIC;
+ }
+ if ((value = get_module_opt (cfg, "emails", "symbol")) != NULL) {
+ email_module_ctx->symbol = memory_pool_strdup (email_module_ctx->email_pool, value);
+ g_free (value);
+ }
+ else {
+ email_module_ctx->symbol = DEFAULT_SYMBOL;
+ }
+ if ((value = get_module_opt (cfg, "emails", "blacklist")) != NULL) {
+ if (g_ascii_strncasecmp (value, "file://", sizeof ("file://") - 1) == 0) {
+ if (parse_host_list (email_module_ctx->email_pool, email_module_ctx->blacklist, value + sizeof ("file://") - 1)) {
+ email_module_ctx->blacklist_file = memory_pool_strdup (email_module_ctx->email_pool, value + sizeof ("file://") - 1);
+ }
+ }
+ }
+ return res;
+}
+
+int
+emails_module_reconfig (struct config_file *cfg)
+{
+ memory_pool_delete (email_module_ctx->email_pool);
+ email_module_ctx->email_pool = memory_pool_new (memory_pool_get_size ());
+
+ return emails_module_config (cfg);
+}
+
+static GList *
+extract_emails (struct worker_task *task)
+{
+ GList *res = NULL, *cur;
+ GMatchInfo *info;
+ GError *err = NULL;
+ struct mime_text_part *part;
+ char *email_str;
+ int rc;
+
+ cur = g_list_first (task->text_parts);
+ while (cur) {
+ part = cur->data;
+
+ rc = g_regex_match_full (email_module_ctx->email_re, (const char *)part->orig->data, part->orig->len, 0, 0, &info, &err);
+ if (rc) {
+ while (g_match_info_matches (info)) {
+ email_str = g_match_info_fetch (info, 0);
+ if (email_str != NULL) {
+ res = g_list_prepend (res, email_str);
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, email_str);
+ }
+ /* Get next match */
+ g_match_info_next (info, &err);
+ }
+ }
+ else if (err != NULL) {
+ msg_debug ("extract_emails: error matching regexp: %s", err->message);
+ }
+ else {
+ msg_debug ("extract_emails: cannot find url pattern in given string");
+ }
+ g_match_info_free (info);
+
+ cur = g_list_next (cur);
+ }
+ if (res != NULL) {
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, res);
+ }
+
+ return res;
+}
+
+static int
+emails_command_handler (struct worker_task *task)
+{
+ GList *emails, *cur;
+ char outbuf[BUFSIZ];
+ int r, num = 0;
+
+ emails = extract_emails (task);
+
+ r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, "OK");
+
+ r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "Emails: ");
+
+ cur = g_list_first (emails);
+
+ while (cur) {
+ num ++;
+ if (g_list_next (cur) != NULL) {
+ r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s, ", (char *)cur->data);
+ }
+ else {
+ r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s", (char *)cur->data);
+ }
+ cur = g_list_next (cur);
+ }
+
+ outbuf[r++] = '\r'; outbuf[r++] = '\n';
+
+ rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE);
+ msg_info ("process_message: msg ok, id: <%s>, %d emails extracted", task->message_id, num);
+
+ return 0;
+}
+
+static int
+emails_mime_filter (struct worker_task *task)
+{
+ GList *emails, *cur;
+
+ emails = extract_emails (task);
+
+ if (email_module_ctx->blacklist && emails) {
+ cur = g_list_first (emails);
+
+ while (cur) {
+ if (g_hash_table_lookup (email_module_ctx->blacklist, cur->data) != NULL) {
+ insert_result (task, email_module_ctx->metric, email_module_ctx->symbol, 1,
+ g_list_prepend (NULL, memory_pool_strdup (task->task_pool, (char *)cur->data)));
+
+ }
+ cur = g_list_next (cur);
+ }
+ }
+
+ return 0;
+}
+
diff --git a/src/protocol.c b/src/protocol.c
index 0209adb0c..7f6c98295 100644
--- a/src/protocol.c
+++ b/src/protocol.c
@@ -86,14 +86,8 @@
#define QUEUE_ID_HEADER "Queue-ID"
#define ERROR_HEADER "Error"
#define USER_HEADER "User"
-/*
- * Reply messages
- */
-#define RSPAMD_REPLY_BANNER "RSPAMD/1.0"
-#define SPAMD_REPLY_BANNER "SPAMD/1.1"
-#define SPAMD_OK "EX_OK"
-/* XXX: try to convert rspamd errors to spamd errors */
-#define SPAMD_ERROR "EX_ERROR"
+
+static GList *custom_commands = NULL;
static char *
separate_command (f_str_t *in, char c)
@@ -120,6 +114,8 @@ static int
parse_command (struct worker_task *task, f_str_t *line)
{
char *token;
+ struct custom_command *cmd;
+ GList *cur;
token = separate_command (line, ' ');
if (line == NULL || token == NULL) {
@@ -131,7 +127,7 @@ parse_command (struct worker_task *task, f_str_t *line)
case 'c':
case 'C':
/* check */
- if (strcasecmp (token + 1, MSG_CMD_CHECK + 1) == 0) {
+ if (g_ascii_strcasecmp (token + 1, MSG_CMD_CHECK + 1) == 0) {
task->cmd = CMD_CHECK;
}
else {
@@ -142,10 +138,10 @@ parse_command (struct worker_task *task, f_str_t *line)
case 's':
case 'S':
/* symbols, skip */
- if (strcasecmp (token + 1, MSG_CMD_SYMBOLS + 1) == 0) {
+ if (g_ascii_strcasecmp (token + 1, MSG_CMD_SYMBOLS + 1) == 0) {
task->cmd = CMD_SYMBOLS;
}
- else if (strcasecmp (token + 1, MSG_CMD_SKIP + 1) == 0) {
+ else if (g_ascii_strcasecmp (token + 1, MSG_CMD_SKIP + 1) == 0) {
task->cmd = CMD_SKIP;
}
else {
@@ -156,10 +152,10 @@ parse_command (struct worker_task *task, f_str_t *line)
case 'p':
case 'P':
/* ping, process */
- if (strcasecmp (token + 1, MSG_CMD_PING + 1) == 0) {
+ if (g_ascii_strcasecmp (token + 1, MSG_CMD_PING + 1) == 0) {
task->cmd = CMD_PING;
}
- else if (strcasecmp (token + 1, MSG_CMD_PROCESS + 1) == 0) {
+ else if (g_ascii_strcasecmp (token + 1, MSG_CMD_PROCESS + 1) == 0) {
task->cmd = CMD_PROCESS;
}
else {
@@ -170,10 +166,10 @@ parse_command (struct worker_task *task, f_str_t *line)
case 'r':
case 'R':
/* report, report_ifspam */
- if (strcasecmp (token + 1, MSG_CMD_REPORT + 1) == 0) {
+ if (g_ascii_strcasecmp (token + 1, MSG_CMD_REPORT + 1) == 0) {
task->cmd = CMD_REPORT;
}
- else if (strcasecmp (token + 1, MSG_CMD_REPORT_IFSPAM + 1) == 0) {
+ else if (g_ascii_strcasecmp (token + 1, MSG_CMD_REPORT_IFSPAM + 1) == 0) {
task->cmd = CMD_REPORT_IFSPAM;
}
else {
@@ -184,7 +180,7 @@ parse_command (struct worker_task *task, f_str_t *line)
case 'u':
case 'U':
/* urls */
- if (strcasecmp (token + 1, MSG_CMD_URLS + 1) == 0) {
+ if (g_ascii_strcasecmp (token + 1, MSG_CMD_URLS + 1) == 0) {
task->cmd = CMD_URLS;
}
else {
@@ -193,8 +189,21 @@ parse_command (struct worker_task *task, f_str_t *line)
}
break;
default:
- msg_debug ("parse_command: bad command: %s", token);
- return -1;
+ cur = custom_commands;
+ while (cur) {
+ cmd = cur->data;
+ if (g_ascii_strcasecmp (token, cmd->name) == 0) {
+ task->cmd = CMD_OTHER;
+ task->custom_cmd = cmd;
+ break;
+ }
+ }
+
+ if (cur == NULL) {
+ msg_debug ("parse_command: bad command: %s", token);
+ return -1;
+ }
+ break;
}
if (strncasecmp (line->begin, RSPAMC_GREETING, sizeof (RSPAMC_GREETING) - 1) == 0) {
@@ -742,8 +751,22 @@ write_reply (struct worker_task *task)
case CMD_URLS:
return write_urls_reply (task);
break;
+ case CMD_OTHER:
+ return task->custom_cmd->func (task);
}
}
return 0;
}
+
+void
+register_protocol_command (const char *name, protocol_reply_func func)
+{
+ struct custom_command *cmd;
+
+ cmd = g_malloc (sizeof (struct custom_command));
+ cmd->name = name;
+ cmd->func = func;
+
+ custom_commands = g_list_prepend (custom_commands, cmd);
+}
diff --git a/src/protocol.h b/src/protocol.h
index ed00edc26..bd2ac5343 100644
--- a/src/protocol.h
+++ b/src/protocol.h
@@ -13,6 +13,15 @@
#define RSPAMD_PROTOCOL_ERROR 3
#define RSPAMD_LENGTH_ERROR 4
+/*
+ * Reply messages
+ */
+#define RSPAMD_REPLY_BANNER "RSPAMD/1.0"
+#define SPAMD_REPLY_BANNER "SPAMD/1.1"
+#define SPAMD_OK "EX_OK"
+/* XXX: try to convert rspamd errors to spamd errors */
+#define SPAMD_ERROR "EX_ERROR"
+
struct worker_task;
enum rspamd_protocol {
@@ -29,6 +38,15 @@ enum rspamd_command {
CMD_PING,
CMD_PROCESS,
CMD_URLS,
+ CMD_OTHER,
+};
+
+
+typedef int (*protocol_reply_func)(struct worker_task *task);
+
+struct custom_command {
+ const char *name;
+ protocol_reply_func func;
};
/**
@@ -46,4 +64,12 @@ int read_rspamd_input_line (struct worker_task *task, f_str_t *line);
*/
int write_reply (struct worker_task *task);
+
+/**
+ * Register custom fucntion to extend protocol
+ * @param name symbolic name of custom function
+ * @param func callback function for writing reply
+ */
+void register_protocol_command (const char *name, protocol_reply_func func);
+
#endif
diff --git a/src/worker.c b/src/worker.c
index 99c311e1f..2da9383e9 100644
--- a/src/worker.c
+++ b/src/worker.c
@@ -180,6 +180,12 @@ read_socket (f_str_t *in, void *arg)
task->state = WRITE_ERROR;
write_socket (task);
}
+ if (task->cmd == CMD_URLS || task->cmd == CMD_OTHER) {
+ /* Skip filters */
+ task->state = WRITE_REPLY;
+ write_socket (task);
+ return;
+ }
r = process_filters (task);
if (r == -1) {
task->last_error = "Filter processing error";