]> source.dussan.org Git - rspamd.git/commitdiff
* Add initial implementation of regexp module
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Wed, 22 Oct 2008 15:41:12 +0000 (19:41 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Wed, 22 Oct 2008 15:41:12 +0000 (19:41 +0400)
configure
plugins/regexp.c [new file with mode: 0644]

index a34153948268dd280f5a0a920242c6ecbd2796d6..f255de36dcbdb758d64d0177fa0af4d4be18445f 100755 (executable)
--- a/configure
+++ b/configure
@@ -21,8 +21,8 @@ YACC_OUTPUT="cfg_yacc.c"
 LEX_OUTPUT="cfg_lex.c"
 CONFIG="config.h"
 
-SOURCES="upstream.c cfg_utils.c memcached.c main.c util.c worker.c fstring.c url.c perl.c protocol.c mem_pool.c filter.c plugins/surbl.c ${LEX_OUTPUT} ${YACC_OUTPUT}"
-MODULES="surbl"
+SOURCES="upstream.c cfg_utils.c memcached.c main.c util.c worker.c fstring.c url.c perl.c protocol.c mem_pool.c filter.c plugins/regexp.c plugins/surbl.c ${LEX_OUTPUT} ${YACC_OUTPUT}"
+MODULES="surbl regexp"
 
 CFLAGS="$CFLAGS -W -Wpointer-arith -Wno-unused-parameter"
 CFLAGS="$CFLAGS -Wno-unused-function -Wunused-variable -Wno-sign-compare"
diff --git a/plugins/regexp.c b/plugins/regexp.c
new file mode 100644 (file)
index 0000000..f82543a
--- /dev/null
@@ -0,0 +1,247 @@
+/***MODULE:regexp
+ * rspamd module that implements different regexp rules
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/param.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <evdns.h>
+
+#include "../config.h"
+#include "../main.h"
+#include "../modules.h"
+#include "../cfg_file.h"
+
+struct regexp_module_item {
+       struct expression *expr;
+       int regexp_number;
+       int op_number;
+       char *symbol;
+};
+
+struct regexp_ctx {
+       int (*header_filter)(struct worker_task *task);
+       int (*mime_filter)(struct worker_task *task);
+       int (*message_filter)(struct worker_task *task);
+       int (*url_filter)(struct worker_task *task);
+       GList *items;
+       char *metric;
+
+       memory_pool_t *regexp_pool;
+};
+
+static struct regexp_ctx *regexp_module_ctx = NULL;
+
+static int regexp_common_filter (struct worker_task *task);
+
+int
+regexp_module_init (struct config_file *cfg, struct module_ctx **ctx)
+{
+       regexp_module_ctx = g_malloc (sizeof (struct regexp_ctx));
+
+       regexp_module_ctx->header_filter = regexp_common_filter;
+       regexp_module_ctx->mime_filter = NULL;
+       regexp_module_ctx->message_filter = NULL;
+       regexp_module_ctx->url_filter = NULL;
+       regexp_module_ctx->regexp_pool = memory_pool_new (1024);
+       regexp_module_ctx->items = NULL;
+       
+       return 0;
+}
+
+static void
+read_regexp_expression (memory_pool_t *pool, struct regexp_module_item *chain, char *line)
+{      
+       struct expression *e, *cur;
+
+       e = parse_expression (regexp_module_ctx->regexp_pool, line);
+       chain->expr = e;
+       cur = e;
+       while (cur) {
+               if (cur->type == EXPR_OPERAND) {
+                       cur->content.operand = parse_regexp (pool, cur->content.operand);
+                       chain->regexp_number ++;
+               }
+               else {
+                       chain->op_number ++;
+               }
+               cur = cur->next;
+       }
+}
+
+int
+regexp_module_config (struct config_file *cfg)
+{
+       LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = NULL;
+       struct module_opt *cur;
+       struct regexp_module_item *cur_item;
+       char *value;
+
+       if ((value = get_module_opt (cfg, "regexp", "metric")) != NULL) {
+               regexp_module_ctx->metric = memory_pool_strdup (regexp_module_ctx->regexp_pool, value);
+               g_free (value);
+       }
+       else {
+               regexp_module_ctx->metric = DEFAULT_METRIC;
+       }
+
+       cur_module_opt = g_hash_table_lookup (cfg->modules_opts, "regexp");
+       if (cur_module_opt != NULL) {
+               LIST_FOREACH (cur, cur_module_opt, next) {
+                       if (strcmp (cur->param, "metric") == 0) {
+                               continue;
+                       }
+                       cur_item = memory_pool_alloc0 (regexp_module_ctx->regexp_pool, sizeof (struct regexp_module_item));
+                       cur_item->symbol = cur->param;
+                       read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->value);
+                       regexp_module_ctx->items = g_list_prepend (regexp_module_ctx->items, cur_item);
+               }
+       }
+       
+       return 0;
+}
+
+int
+regexp_module_reconfig (struct config_file *cfg)
+{
+       memory_pool_delete (regexp_module_ctx->regexp_pool);
+       regexp_module_ctx->regexp_pool = memory_pool_new (1024);
+
+       return regexp_module_config (cfg);
+}
+
+static gsize
+process_regexp (struct rspamd_regexp *re, struct worker_task *task)
+{
+       char *headerv;
+       struct mime_part *part;
+       struct uri *url;
+
+       switch (re->type) {
+               case REGEXP_NONE:
+                       return 0;
+               case REGEXP_HEADER:
+                       if (re->header == NULL) {
+                               msg_info ("process_regexp: header regexp without header name");
+                               return 0;
+                       }
+                       msg_debug ("process_regexp: checking header regexp: %s = /%s/", re->header, re->regexp_text);
+                       headerv = (char *)g_mime_message_get_header (task->message, re->header);
+                       if (headerv == NULL) {
+                               return 0;
+                       }
+                       else {
+                               if (re->regexp == NULL) {
+                                       msg_debug ("process_regexp: regexp contains only header and it is found %s", re->header);
+                                       return 1;
+                               }
+                               if (g_regex_match (re->regexp, headerv, 0, NULL) == TRUE) {
+                                       return 1;
+                               }
+                               else {
+                                       return 0;
+                               }
+                       }
+                       break;
+               case REGEXP_MIME:
+                       msg_debug ("process_regexp: checking mime regexp: /%s/", re->regexp_text);
+                       TAILQ_FOREACH (part, &task->parts, next) {
+                               if (g_regex_match_full (re->regexp, part->content->data, part->content->len, 0, 0, NULL, NULL) == TRUE) {
+                                       return 1;
+                               }
+                       }
+                       return 0;
+               case REGEXP_MESSAGE:
+                       msg_debug ("process_message: checking mime regexp: /%s/", re->regexp_text);
+                       if (g_regex_match_full (re->regexp, task->msg->buf->begin, task->msg->buf->len, 0, 0, NULL, NULL) == TRUE) {
+                               return 1;
+                       }
+                       return 0;
+               case REGEXP_URL:
+                       msg_debug ("process_url: checking mime regexp: /%s/", re->regexp_text);
+                       TAILQ_FOREACH (url, &task->urls, next) {
+                               if (g_regex_match (re->regexp, struri (url), 0, NULL) == TRUE) {
+                                       return 1;
+                               }
+                       }
+                       return 0;
+       }
+
+       /* Not reached */
+       return 0;
+}
+
+static void
+process_regexp_item (struct regexp_module_item *item, struct worker_task *task)
+{
+       GQueue *stack;
+       gsize cur, op1, op2;
+       struct expression *it = item->expr;
+       
+       stack = g_queue_new ();
+
+       while (it) {
+               if (it->type == EXPR_OPERAND) {
+                       /* Find corresponding symbol */
+                       cur = process_regexp ((struct rspamd_regexp *)it->content.operand, task);
+                       msg_debug ("process_regexp_item: regexp %s found", cur ? "is" : "is not");
+                       g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
+               }
+               else {
+                       if (g_queue_is_empty (stack)) {
+                               /* Queue has no operands for operation, exiting */
+                               g_queue_free (stack);
+                               return;
+                       }
+                       switch (it->content.operation) {
+                               case '!':
+                                       op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+                                       op1 = !op1;
+                                       g_queue_push_head (stack, GSIZE_TO_POINTER (op1));
+                                       break;
+                               case '&':
+                                       op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+                                       op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+                                       g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2));
+                               case '|':
+                                       op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+                                       op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+                                       g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2));
+                               default:
+                                       it = it->next;
+                                       continue;
+                       }
+               }
+               it = it->next;
+       }
+       if (!g_queue_is_empty (stack)) {
+               op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
+               if (op1) {
+                       /* Add symbol to results */
+                       insert_result (task, regexp_module_ctx->metric, item->symbol, op1);
+               }
+       }
+
+       g_queue_free (stack);
+}
+
+static int
+regexp_common_filter (struct worker_task *task)
+{
+       GList *cur_expr = g_list_first (regexp_module_ctx->items);
+
+       while (cur_expr) {
+               process_regexp_item ((struct regexp_module_item *)cur_expr->data, task);
+               cur_expr = g_list_next (cur_expr);
+       }
+}