From 9d6f80f8a3fbdc7d8079f6a60d936532098e27a4 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 22 Oct 2008 19:41:12 +0400 Subject: [PATCH] * Add initial implementation of regexp module --- configure | 4 +- plugins/regexp.c | 247 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 249 insertions(+), 2 deletions(-) create mode 100644 plugins/regexp.c diff --git a/configure b/configure index a34153948..f255de36d 100755 --- a/configure +++ b/configure @@ -21,8 +21,8 @@ YACC_OUTPUT="cfg_yacc.c" LEX_OUTPUT="cfg_lex.c" CONFIG="config.h" -SOURCES="upstream.c cfg_utils.c memcached.c main.c util.c worker.c fstring.c url.c perl.c protocol.c mem_pool.c filter.c plugins/surbl.c ${LEX_OUTPUT} ${YACC_OUTPUT}" -MODULES="surbl" +SOURCES="upstream.c cfg_utils.c memcached.c main.c util.c worker.c fstring.c url.c perl.c protocol.c mem_pool.c filter.c plugins/regexp.c plugins/surbl.c ${LEX_OUTPUT} ${YACC_OUTPUT}" +MODULES="surbl regexp" CFLAGS="$CFLAGS -W -Wpointer-arith -Wno-unused-parameter" CFLAGS="$CFLAGS -Wno-unused-function -Wunused-variable -Wno-sign-compare" diff --git a/plugins/regexp.c b/plugins/regexp.c new file mode 100644 index 000000000..f82543a7e --- /dev/null +++ b/plugins/regexp.c @@ -0,0 +1,247 @@ +/***MODULE:regexp + * rspamd module that implements different regexp rules + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../config.h" +#include "../main.h" +#include "../modules.h" +#include "../cfg_file.h" + +struct regexp_module_item { + struct expression *expr; + int regexp_number; + int op_number; + char *symbol; +}; + +struct regexp_ctx { + int (*header_filter)(struct worker_task *task); + int (*mime_filter)(struct worker_task *task); + int (*message_filter)(struct worker_task *task); + int (*url_filter)(struct worker_task *task); + GList *items; + char *metric; + + memory_pool_t *regexp_pool; +}; + +static struct regexp_ctx *regexp_module_ctx = NULL; + +static int regexp_common_filter (struct worker_task *task); + +int +regexp_module_init (struct config_file *cfg, struct module_ctx **ctx) +{ + regexp_module_ctx = g_malloc (sizeof (struct regexp_ctx)); + + regexp_module_ctx->header_filter = regexp_common_filter; + regexp_module_ctx->mime_filter = NULL; + regexp_module_ctx->message_filter = NULL; + regexp_module_ctx->url_filter = NULL; + regexp_module_ctx->regexp_pool = memory_pool_new (1024); + regexp_module_ctx->items = NULL; + + return 0; +} + +static void +read_regexp_expression (memory_pool_t *pool, struct regexp_module_item *chain, char *line) +{ + struct expression *e, *cur; + + e = parse_expression (regexp_module_ctx->regexp_pool, line); + chain->expr = e; + cur = e; + while (cur) { + if (cur->type == EXPR_OPERAND) { + cur->content.operand = parse_regexp (pool, cur->content.operand); + chain->regexp_number ++; + } + else { + chain->op_number ++; + } + cur = cur->next; + } +} + +int +regexp_module_config (struct config_file *cfg) +{ + LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = NULL; + struct module_opt *cur; + struct regexp_module_item *cur_item; + char *value; + + if ((value = get_module_opt (cfg, "regexp", "metric")) != NULL) { + regexp_module_ctx->metric = memory_pool_strdup (regexp_module_ctx->regexp_pool, value); + g_free (value); + } + else { + regexp_module_ctx->metric = DEFAULT_METRIC; + } + + cur_module_opt = g_hash_table_lookup (cfg->modules_opts, "regexp"); + if (cur_module_opt != NULL) { + LIST_FOREACH (cur, cur_module_opt, next) { + if (strcmp (cur->param, "metric") == 0) { + continue; + } + cur_item = memory_pool_alloc0 (regexp_module_ctx->regexp_pool, sizeof (struct regexp_module_item)); + cur_item->symbol = cur->param; + read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->value); + regexp_module_ctx->items = g_list_prepend (regexp_module_ctx->items, cur_item); + } + } + + return 0; +} + +int +regexp_module_reconfig (struct config_file *cfg) +{ + memory_pool_delete (regexp_module_ctx->regexp_pool); + regexp_module_ctx->regexp_pool = memory_pool_new (1024); + + return regexp_module_config (cfg); +} + +static gsize +process_regexp (struct rspamd_regexp *re, struct worker_task *task) +{ + char *headerv; + struct mime_part *part; + struct uri *url; + + switch (re->type) { + case REGEXP_NONE: + return 0; + case REGEXP_HEADER: + if (re->header == NULL) { + msg_info ("process_regexp: header regexp without header name"); + return 0; + } + msg_debug ("process_regexp: checking header regexp: %s = /%s/", re->header, re->regexp_text); + headerv = (char *)g_mime_message_get_header (task->message, re->header); + if (headerv == NULL) { + return 0; + } + else { + if (re->regexp == NULL) { + msg_debug ("process_regexp: regexp contains only header and it is found %s", re->header); + return 1; + } + if (g_regex_match (re->regexp, headerv, 0, NULL) == TRUE) { + return 1; + } + else { + return 0; + } + } + break; + case REGEXP_MIME: + msg_debug ("process_regexp: checking mime regexp: /%s/", re->regexp_text); + TAILQ_FOREACH (part, &task->parts, next) { + if (g_regex_match_full (re->regexp, part->content->data, part->content->len, 0, 0, NULL, NULL) == TRUE) { + return 1; + } + } + return 0; + case REGEXP_MESSAGE: + msg_debug ("process_message: checking mime regexp: /%s/", re->regexp_text); + if (g_regex_match_full (re->regexp, task->msg->buf->begin, task->msg->buf->len, 0, 0, NULL, NULL) == TRUE) { + return 1; + } + return 0; + case REGEXP_URL: + msg_debug ("process_url: checking mime regexp: /%s/", re->regexp_text); + TAILQ_FOREACH (url, &task->urls, next) { + if (g_regex_match (re->regexp, struri (url), 0, NULL) == TRUE) { + return 1; + } + } + return 0; + } + + /* Not reached */ + return 0; +} + +static void +process_regexp_item (struct regexp_module_item *item, struct worker_task *task) +{ + GQueue *stack; + gsize cur, op1, op2; + struct expression *it = item->expr; + + stack = g_queue_new (); + + while (it) { + if (it->type == EXPR_OPERAND) { + /* Find corresponding symbol */ + cur = process_regexp ((struct rspamd_regexp *)it->content.operand, task); + msg_debug ("process_regexp_item: regexp %s found", cur ? "is" : "is not"); + g_queue_push_head (stack, GSIZE_TO_POINTER (cur)); + } + else { + if (g_queue_is_empty (stack)) { + /* Queue has no operands for operation, exiting */ + g_queue_free (stack); + return; + } + switch (it->content.operation) { + case '!': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op1 = !op1; + g_queue_push_head (stack, GSIZE_TO_POINTER (op1)); + break; + case '&': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); + case '|': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); + default: + it = it->next; + continue; + } + } + it = it->next; + } + if (!g_queue_is_empty (stack)) { + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + if (op1) { + /* Add symbol to results */ + insert_result (task, regexp_module_ctx->metric, item->symbol, op1); + } + } + + g_queue_free (stack); +} + +static int +regexp_common_filter (struct worker_task *task) +{ + GList *cur_expr = g_list_first (regexp_module_ctx->items); + + while (cur_expr) { + process_regexp_item ((struct regexp_module_item *)cur_expr->data, task); + cur_expr = g_list_next (cur_expr); + } +} -- 2.39.5