]> source.dussan.org Git - rspamd.git/commitdiff
* Add module for blacklisting emails (self documented in sample config)
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 15 Jun 2009 15:42:28 +0000 (19:42 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 15 Jun 2009 15:42:28 +0000 (19:42 +0400)
* Add command 'emails' for extracting emails from a message
* Rework protocol layout to allow expanding rspamd protocol by custom commands that can be added from anywhere in code
* Allow rspamc to work without strictly parameter 'command'. Command by default is 'symbols'.
* Update version to 0.1.8

CMakeLists.txt
rspamc.pl.in
rspamd.conf.sample
src/main.h
src/plugins/emails.c [new file with mode: 0644]
src/protocol.c
src/protocol.h
src/worker.c

index 88b649a4bbd98c8f6265142ab5cce457b23e3b86..1a4a0b042aaf35dbf92c5fe206833876f7885a53 100644 (file)
@@ -7,7 +7,7 @@ PROJECT(rspamd C)
 
 SET(RSPAMD_VERSION_MAJOR 0)
 SET(RSPAMD_VERSION_MINOR 1)
-SET(RSPAMD_VERSION_PATCH 7)
+SET(RSPAMD_VERSION_PATCH 8)
 
 SET(RSPAMD_VERSION         "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}")
 SET(RSPAMD_MASTER_SITE_URL "http://cebka.pp.ru/hg/rspamd")
@@ -323,7 +323,8 @@ SET(CLASSIFIERSSRC src/classifiers/classifiers.c
 
 SET(PLUGINSSRC src/plugins/surbl.c
                                src/plugins/regexp.c
-                               src/plugins/chartable.c)
+                               src/plugins/chartable.c
+                               src/plugins/emails.c)
 
 SET(TESTSRC            test/rspamd_expression_test.c
                                test/rspamd_memcached_test.c
index d9e5def14fef8d54fe30de58bc280f1392e1ff6c..a1f6675114eb5c6888e76095607cb0d0017e0803 100755 (executable)
@@ -203,8 +203,7 @@ my $cmd = shift;
 my $do_parse_config = 1;
 
 if (!defined ($cmd) || $cmd eq '') {
-    HELP_MESSAGE();
-    exit;
+    $cmd = 'SYMBOLS';
 }
 
 if (defined ($args{c})) {
@@ -237,7 +236,7 @@ if (defined ($args{p})) {
     $cfg{'port'} = $args{p};
 }
 
-if ($cmd =~ /(SYMBOLS|SCAN|PROCESS|CHECK|REPORT_IFSPAM|REPORT|URLS)/i) {
+if ($cmd =~ /(SYMBOLS|SCAN|PROCESS|CHECK|REPORT_IFSPAM|REPORT|URLS|EMAILS)/i) {
     $cfg{'command'} = $1;
     $cfg{'control'} = 0;
 }
index 5184aef0972cfbc72eba723cae452731c20be1f5..2b70cbd7c6be9c6036ff6ca14c523836f5739949 100644 (file)
@@ -146,6 +146,12 @@ $subject_blah = "Subject=/blah/H";
        threshold = "0.1";
 };
 
+.module 'emails' {
+    metric = "default";
+       symbold = "R_BAD_EMAIL";
+    blacklist = "file:///some/path/emails.lst";
+};
+
 # If enables threat each regexp as raw regex and do not try to convert
 # each text part to utf8 encoding. Save a lot of resources but less
 # portable.
@@ -154,4 +160,4 @@ raw_mode = yes;
 
 url_filters = "surbl";
 header_filters = "regexp";
-mime_filters = "chartable";
+mime_filters = "chartable,emails";
index 924fe13edb8e16347e8c551d0dffafff549e09c3..c80efc494e5dad0eff5fa4172dcecf34c396d3a9 100644 (file)
@@ -166,6 +166,7 @@ struct worker_task {
        size_t content_length;                                                                          /**< length of user's input                                                     */
        enum rspamd_protocol proto;                                                                     /**< protocol (rspamc or spamc)                                         */
        enum rspamd_command cmd;                                                                        /**< command                                                                            */
+       struct custom_command *custom_cmd;                                                      /**< custom command if any                                                      */      
        int sock;                                                                                                       /**< socket descriptor                                                          */
        char *helo;                                                                                                     /**< helo header value                                                          */
        char *from;                                                                                                     /**< from header value                                                          */
diff --git a/src/plugins/emails.c b/src/plugins/emails.c
new file mode 100644 (file)
index 0000000..2e2dcac
--- /dev/null
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2009, Rambler media
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/***MODULE:email
+ * rspamd module that extracts emails from messages and check them via blacklist
+ */
+
+#include "../config.h"
+#include "../main.h"
+#include "../message.h"
+#include "../modules.h"
+#include "../cfg_file.h"
+#include "../expressions.h"
+#include "../util.h"
+
+#define DEFAULT_SYMBOL "R_BAD_EMAIL"
+
+static const char *email_re_text = "[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+(?:[A-Z]{2}|com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\\b";
+
+struct email_ctx {
+       int (*header_filter)(struct worker_task *task);
+       int (*mime_filter)(struct worker_task *task);
+       int (*message_filter)(struct worker_task *task);
+       int (*url_filter)(struct worker_task *task);
+       char *metric;
+       char *symbol;
+       GRegex *email_re;
+
+       GHashTable *blacklist;
+       char *blacklist_file;
+
+       memory_pool_t *email_pool;
+};
+
+static struct email_ctx *email_module_ctx = NULL;
+
+static int emails_mime_filter (struct worker_task *task);
+static int emails_command_handler (struct worker_task *task);
+
+int
+emails_module_init (struct config_file *cfg, struct module_ctx **ctx)
+{
+       GError *err = NULL;
+
+       email_module_ctx = g_malloc (sizeof (struct email_ctx));
+
+       email_module_ctx->header_filter = NULL;
+       email_module_ctx->mime_filter = emails_mime_filter;
+       email_module_ctx->message_filter = NULL;
+       email_module_ctx->url_filter = NULL;
+       email_module_ctx->email_pool = memory_pool_new (memory_pool_get_size ());
+       email_module_ctx->email_re = g_regex_new (email_re_text, G_REGEX_RAW | G_REGEX_OPTIMIZE | G_REGEX_CASELESS, 0, &err);
+       email_module_ctx->blacklist = g_hash_table_new (g_str_hash, g_str_equal);
+       
+       *ctx = (struct module_ctx *)email_module_ctx;
+       
+       register_protocol_command ("emails", emails_command_handler);
+
+       return 0;
+}
+
+
+int
+emails_module_config (struct config_file *cfg)
+{
+       char *value;
+       int res = TRUE;
+
+       if ((value = get_module_opt (cfg, "emails", "metric")) != NULL) {
+               email_module_ctx->metric = memory_pool_strdup (email_module_ctx->email_pool, value);
+               g_free (value);
+       }
+       else {
+               email_module_ctx->metric = DEFAULT_METRIC;
+       }
+       if ((value = get_module_opt (cfg, "emails", "symbol")) != NULL) {
+               email_module_ctx->symbol = memory_pool_strdup (email_module_ctx->email_pool, value);
+               g_free (value);
+       }
+       else {
+               email_module_ctx->symbol = DEFAULT_SYMBOL;
+       }
+       if ((value = get_module_opt (cfg, "emails", "blacklist")) != NULL) {
+               if (g_ascii_strncasecmp (value, "file://", sizeof ("file://") - 1) == 0) {
+                       if (parse_host_list (email_module_ctx->email_pool, email_module_ctx->blacklist, value + sizeof ("file://") - 1)) {
+                               email_module_ctx->blacklist_file = memory_pool_strdup (email_module_ctx->email_pool, value + sizeof ("file://") - 1);
+                       }
+               }
+       }       
+       return res;
+}
+
+int
+emails_module_reconfig (struct config_file *cfg)
+{
+       memory_pool_delete (email_module_ctx->email_pool);
+       email_module_ctx->email_pool = memory_pool_new (memory_pool_get_size ());
+
+       return emails_module_config (cfg);
+}
+
+static GList *
+extract_emails (struct worker_task *task)
+{
+       GList *res = NULL, *cur;
+       GMatchInfo *info;
+       GError *err = NULL;
+       struct mime_text_part *part;
+       char *email_str;
+       int rc;
+
+       cur = g_list_first (task->text_parts);
+       while (cur) {
+               part = cur->data;
+
+               rc = g_regex_match_full (email_module_ctx->email_re, (const char *)part->orig->data, part->orig->len, 0, 0, &info, &err);
+               if (rc) {
+                       while (g_match_info_matches (info)) {
+                               email_str = g_match_info_fetch (info, 0);
+                               if (email_str != NULL) {
+                                       res = g_list_prepend (res, email_str);
+                                       memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, email_str);
+                               }
+                               /* Get next match */
+                               g_match_info_next (info, &err);
+                       }
+               }
+               else if (err != NULL) {
+                       msg_debug ("extract_emails: error matching regexp: %s", err->message);
+               }
+               else {
+                       msg_debug ("extract_emails: cannot find url pattern in given string");
+               }
+               g_match_info_free (info);
+
+               cur = g_list_next (cur);
+       }
+       if (res != NULL) {
+               memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, res);
+       }
+       
+       return res;
+}
+
+static int 
+emails_command_handler (struct worker_task *task)
+{
+       GList *emails, *cur;
+       char outbuf[BUFSIZ];
+       int r, num = 0;
+
+       emails = extract_emails (task);
+
+       r = snprintf (outbuf, sizeof (outbuf), "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, "OK");
+       
+       r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "Emails: ");
+       
+       cur = g_list_first (emails);
+
+       while (cur) {
+               num ++;
+               if (g_list_next (cur) != NULL) {
+                       r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s, ", (char *)cur->data);
+               }
+               else {
+                       r += snprintf (outbuf + r, sizeof (outbuf) - r - 2, "%s", (char *)cur->data);
+               }
+               cur = g_list_next (cur);
+       }
+       
+       outbuf[r++] = '\r'; outbuf[r++] = '\n';
+
+       rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE);
+       msg_info ("process_message: msg ok, id: <%s>, %d emails extracted", task->message_id, num);
+
+       return 0;
+}
+
+static int 
+emails_mime_filter (struct worker_task *task)
+{      
+       GList *emails, *cur;
+
+       emails = extract_emails (task);
+
+       if (email_module_ctx->blacklist && emails) {
+               cur = g_list_first (emails);
+
+               while (cur) {
+                       if (g_hash_table_lookup (email_module_ctx->blacklist, cur->data) != NULL) {
+                               insert_result (task, email_module_ctx->metric, email_module_ctx->symbol, 1, 
+                                                       g_list_prepend (NULL, memory_pool_strdup (task->task_pool, (char *)cur->data)));
+       
+                       }
+                       cur = g_list_next (cur);
+               }
+       }
+
+       return 0;
+}
+
index 0209adb0cee69214f54a82318742b07fdba43236..7f6c982955129f401a0493fe7aedd66b702046de 100644 (file)
 #define QUEUE_ID_HEADER "Queue-ID"
 #define ERROR_HEADER "Error"
 #define USER_HEADER "User"
-/*
- * Reply messages
- */
-#define RSPAMD_REPLY_BANNER "RSPAMD/1.0"
-#define SPAMD_REPLY_BANNER "SPAMD/1.1"
-#define SPAMD_OK "EX_OK"
-/* XXX: try to convert rspamd errors to spamd errors */
-#define SPAMD_ERROR "EX_ERROR"
+
+static GList *custom_commands = NULL;
 
 static char *
 separate_command (f_str_t *in, char c)
@@ -120,6 +114,8 @@ static int
 parse_command (struct worker_task *task, f_str_t *line)
 {
        char *token;
+       struct custom_command *cmd;
+       GList *cur;
 
        token = separate_command (line, ' ');
        if (line == NULL || token == NULL) {
@@ -131,7 +127,7 @@ parse_command (struct worker_task *task, f_str_t *line)
                case 'c':
                case 'C':
                        /* check */
-                       if (strcasecmp (token + 1, MSG_CMD_CHECK + 1) == 0) {
+                       if (g_ascii_strcasecmp (token + 1, MSG_CMD_CHECK + 1) == 0) {
                                task->cmd = CMD_CHECK;  
                        }
                        else {
@@ -142,10 +138,10 @@ parse_command (struct worker_task *task, f_str_t *line)
                case 's':
                case 'S':
                        /* symbols, skip */
-                       if (strcasecmp (token + 1, MSG_CMD_SYMBOLS + 1) == 0) {
+                       if (g_ascii_strcasecmp (token + 1, MSG_CMD_SYMBOLS + 1) == 0) {
                                task->cmd = CMD_SYMBOLS;
                        }
-                       else if (strcasecmp (token + 1, MSG_CMD_SKIP + 1) == 0) {
+                       else if (g_ascii_strcasecmp (token + 1, MSG_CMD_SKIP + 1) == 0) {
                                task->cmd = CMD_SKIP;
                        }
                        else {
@@ -156,10 +152,10 @@ parse_command (struct worker_task *task, f_str_t *line)
                case 'p':
                case 'P':
                        /* ping, process */
-                       if (strcasecmp (token + 1, MSG_CMD_PING + 1) == 0) {
+                       if (g_ascii_strcasecmp (token + 1, MSG_CMD_PING + 1) == 0) {
                                task->cmd = CMD_PING;
                        }
-                       else if (strcasecmp (token + 1, MSG_CMD_PROCESS + 1) == 0) {
+                       else if (g_ascii_strcasecmp (token + 1, MSG_CMD_PROCESS + 1) == 0) {
                                task->cmd = CMD_PROCESS;
                        }
                        else {
@@ -170,10 +166,10 @@ parse_command (struct worker_task *task, f_str_t *line)
                case 'r':
                case 'R':
                        /* report, report_ifspam */
-                       if (strcasecmp (token + 1, MSG_CMD_REPORT + 1) == 0) {
+                       if (g_ascii_strcasecmp (token + 1, MSG_CMD_REPORT + 1) == 0) {
                                task->cmd = CMD_REPORT;
                        }
-                       else if (strcasecmp (token + 1, MSG_CMD_REPORT_IFSPAM + 1) == 0) {
+                       else if (g_ascii_strcasecmp (token + 1, MSG_CMD_REPORT_IFSPAM + 1) == 0) {
                                task->cmd = CMD_REPORT_IFSPAM;
                        }
                        else {
@@ -184,7 +180,7 @@ parse_command (struct worker_task *task, f_str_t *line)
                case 'u':
                case 'U':
                        /* urls */
-                       if (strcasecmp (token + 1, MSG_CMD_URLS + 1) == 0) {
+                       if (g_ascii_strcasecmp (token + 1, MSG_CMD_URLS + 1) == 0) {
                                task->cmd = CMD_URLS;
                        }
                        else {
@@ -193,8 +189,21 @@ parse_command (struct worker_task *task, f_str_t *line)
                        }
                        break;
                default:
-                       msg_debug ("parse_command: bad command: %s", token);
-                       return -1;
+                       cur = custom_commands;
+                       while (cur) {
+                               cmd = cur->data;
+                               if (g_ascii_strcasecmp (token, cmd->name) == 0) {
+                                       task->cmd = CMD_OTHER;
+                                       task->custom_cmd = cmd;
+                                       break;
+                               }
+                       }
+
+                       if (cur == NULL) {
+                               msg_debug ("parse_command: bad command: %s", token);
+                               return -1;
+                       }
+                       break;
        }
 
        if (strncasecmp (line->begin, RSPAMC_GREETING, sizeof (RSPAMC_GREETING) - 1) == 0) {
@@ -742,8 +751,22 @@ write_reply (struct worker_task *task)
                        case CMD_URLS:
                                return write_urls_reply (task);
                                break;
+                       case CMD_OTHER:
+                               return task->custom_cmd->func (task);
                }
        }
 
        return 0;
 }
+
+void 
+register_protocol_command (const char *name, protocol_reply_func func)
+{
+       struct custom_command *cmd;
+
+       cmd = g_malloc (sizeof (struct custom_command));
+       cmd->name = name;
+       cmd->func = func;
+
+       custom_commands = g_list_prepend (custom_commands, cmd);
+}
index ed00edc262ea7336adfd2eb5b2b086978a6bb821..bd2ac5343fe9b2bcb874762da59d5e29d2bbff2c 100644 (file)
 #define RSPAMD_PROTOCOL_ERROR 3
 #define RSPAMD_LENGTH_ERROR 4
 
+/*
+ * Reply messages
+ */
+#define RSPAMD_REPLY_BANNER "RSPAMD/1.0"
+#define SPAMD_REPLY_BANNER "SPAMD/1.1"
+#define SPAMD_OK "EX_OK"
+/* XXX: try to convert rspamd errors to spamd errors */
+#define SPAMD_ERROR "EX_ERROR"
+
 struct worker_task;
 
 enum rspamd_protocol {
@@ -29,6 +38,15 @@ enum rspamd_command {
        CMD_PING,
        CMD_PROCESS,
        CMD_URLS,
+       CMD_OTHER,
+};
+
+
+typedef int (*protocol_reply_func)(struct worker_task *task);
+
+struct custom_command {
+       const char *name;
+       protocol_reply_func func;
 };
 
 /**
@@ -46,4 +64,12 @@ int read_rspamd_input_line (struct worker_task *task, f_str_t *line);
  */
 int write_reply (struct worker_task *task);
 
+
+/**
+ * Register custom fucntion to extend protocol
+ * @param name symbolic name of custom function
+ * @param func callback function for writing reply
+ */
+void register_protocol_command (const char *name, protocol_reply_func func);
+
 #endif
index 99c311e1f1fb8814dcd94956c35096ec02e1b114..2da9383e9c3ca35e464a9c73b08a0745e223e776 100644 (file)
@@ -180,6 +180,12 @@ read_socket (f_str_t *in, void *arg)
                                task->state = WRITE_ERROR;
                                write_socket (task);
             }
+                       if (task->cmd == CMD_URLS || task->cmd == CMD_OTHER) {
+                               /* Skip filters */
+                               task->state = WRITE_REPLY;
+                               write_socket (task);
+                               return;
+                       }
                        r = process_filters (task);
                        if (r == -1) {
                                task->last_error = "Filter processing error";