aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2010-06-01 19:40:59 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2010-06-01 19:40:59 +0400
commit6be6bdef362e1120dedfe2f45fdb3a74e092f3aa (patch)
tree78e2662b887a813d4c15d74a4229ca971b6c4d96
parent8c89dee54e1be32e8191cc0e8cd6b680bafadae1 (diff)
downloadrspamd-6be6bdef362e1120dedfe2f45fdb3a74e092f3aa.tar.gz
rspamd-6be6bdef362e1120dedfe2f45fdb3a74e092f3aa.zip
* Add experimental support of dynamic rules to regexp module
-rw-r--r--CMakeLists.txt5
-rw-r--r--src/plugins/regexp.c311
-rw-r--r--src/symbols_cache.c45
-rw-r--r--src/symbols_cache.h6
4 files changed, 317 insertions, 50 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 681bb252d..75cd5083a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -133,11 +133,6 @@ ELSE(NOT GMIME2_FOUND)
LINK_DIRECTORIES(${GMIME2_LIBRARY_DIRS})
ENDIF(NOT GMIME2_FOUND)
-
-IF(NOT GLIB2_FOUND OR (NOT GMIME2_FOUND AND NOT GMIME24_FOUND))
- MESSAGE(FATAL_ERROR "Error: gmime2 and glib2 are required for rspamd")
-ENDIF(NOT GLIB2_FOUND OR (NOT GMIME2_FOUND AND NOT GMIME24_FOUND))
-
# Make from ; separated list normal space separated list
# Glib2
FOREACH(arg ${GLIB2_CFLAGS})
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index 33aa60f29..636b26c03 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -34,10 +34,12 @@
#include "../message.h"
#include "../modules.h"
#include "../cfg_file.h"
+#include "../map.h"
#include "../util.h"
#include "../expressions.h"
#include "../view.h"
#include "../lua/lua_common.h"
+#include "../json/jansson.h"
#define DEFAULT_STATFILE_PREFIX "./"
@@ -61,6 +63,14 @@ struct regexp_ctx {
char *statfile_prefix;
memory_pool_t *regexp_pool;
+ memory_pool_t *dynamic_pool;
+};
+
+struct regexp_json_buf {
+ u_char *buf;
+ u_char *pos;
+ size_t buflen;
+ struct config_file *cfg;
};
static struct regexp_ctx *regexp_module_ctx = NULL;
@@ -72,29 +82,96 @@ static gboolean rspamd_check_smtp_data (struct worker_task *task
static void process_regexp_item (struct worker_task *task, void *user_data);
-int
-regexp_module_init (struct config_file *cfg, struct module_ctx **ctx)
+static void
+regexp_dynamic_insert_result (struct worker_task *task, void *user_data)
{
- regexp_module_ctx = g_malloc (sizeof (struct regexp_ctx));
+ char *symbol = user_data;
+
+ insert_result (task, regexp_module_ctx->metric, symbol, 1, NULL);
+}
- regexp_module_ctx->filter = regexp_common_filter;
- regexp_module_ctx->regexp_pool = memory_pool_new (memory_pool_get_size ());
- regexp_module_ctx->autolearn_symbols = g_hash_table_new (g_str_hash, g_str_equal);
+static gboolean
+parse_regexp_ipmask (const char *begin, struct dynamic_map_item *addr)
+{
+ const char *pos;
+ char ip_buf[sizeof ("255.255.255.255")], mask_buf[3], *p;
+ int state = 0, dots = 0;
+
+ bzero (ip_buf, sizeof (ip_buf));
+ bzero (mask_buf, sizeof (mask_buf));
+ pos = begin;
+ p = ip_buf;
+
+ while (*pos) {
+ switch (state) {
+ case 0:
+ state = 1;
+ p = ip_buf;
+ dots = 0;
+ break;
+ case 1:
+ /* Begin parse ip */
+ if (p - ip_buf >= sizeof (ip_buf) || dots > 3) {
+ return FALSE;
+ }
+ if (g_ascii_isdigit (*pos)) {
+ *p ++ = *pos ++;
+ }
+ else if (*pos == '.') {
+ *p ++ = *pos ++;
+ dots ++;
+ }
+ else if (*pos == '/') {
+ pos ++;
+ p = mask_buf;
+ state = 2;
+ }
+ else {
+ /* Invalid character */
+ return FALSE;
+ }
+ break;
+ case 2:
+ /* Parse mask */
+ if (p - mask_buf > 2) {
+ return FALSE;
+ }
+ if (g_ascii_isdigit (*pos)) {
+ *p ++ = *pos ++;
+ }
+ else {
+ return FALSE;
+ }
+ break;
+ }
+ }
- *ctx = (struct module_ctx *)regexp_module_ctx;
- register_expression_function ("regexp_match_number", rspamd_regexp_match_number, NULL);
- register_expression_function ("raw_header_exists", rspamd_raw_header_exists, NULL);
- register_expression_function ("check_smtp_data", rspamd_check_smtp_data, NULL);
+ if (!inet_aton (ip_buf, &addr->addr)) {
+ return FALSE;
+ }
+ if (state == 2) {
+ /* Also parse mask */
+ addr->mask = (mask_buf[0] - '0') * 10 + mask_buf[1] - '0';
+ if (addr->mask > 32) {
+ msg_info ("bad ipmask value: '%s'", begin);
+ return FALSE;
+ }
+ }
+ else {
+ addr->mask = 32;
+ }
+
+ return TRUE;
- return 0;
}
+/* Process regexp expression */
static gboolean
-read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain, char *symbol, char *line, struct config_file *cfg)
+read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain, char *symbol, char *line, gboolean raw_mode)
{
struct expression *e, *cur;
- e = parse_expression (regexp_module_ctx->regexp_pool, line);
+ e = parse_expression (pool, line);
if (e == NULL) {
msg_warn ("%s = \"%s\" is invalid regexp expression", symbol, line);
return FALSE;
@@ -103,7 +180,7 @@ read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain,
cur = e;
while (cur) {
if (cur->type == EXPR_REGEXP) {
- cur->content.operand = parse_regexp (pool, cur->content.operand, cfg->raw_mode);
+ cur->content.operand = parse_regexp (pool, cur->content.operand, raw_mode);
if (cur->content.operand == NULL) {
msg_warn ("cannot parse regexp, skip expression %s = \"%s\"", symbol, line);
return FALSE;
@@ -116,6 +193,197 @@ read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain,
return TRUE;
}
+
+/* Callbacks for reading json dynamic rules */
+u_char *
+json_regexp_read_cb (memory_pool_t * pool, u_char * chunk, size_t len, struct map_cb_data *data)
+{
+ struct regexp_json_buf *jb;
+ size_t free, off;
+
+ if (data->cur_data == NULL) {
+ jb = g_malloc (sizeof (struct regexp_json_buf));
+ jb->cfg = ((struct regexp_json_buf *)data->prev_data)->cfg;
+ jb->buf = NULL;
+ jb->pos = NULL;
+ data->cur_data = jb;
+ }
+ else {
+ jb = data->cur_data;
+ }
+
+ if (jb->buf == NULL) {
+ /* Allocate memory for buffer */
+ jb->buflen = len * 2;
+ jb->buf = g_malloc (jb->buflen);
+ jb->pos = jb->buf;
+ }
+
+ off = jb->pos - jb->buf;
+ free = jb->buflen - off;
+
+ if (free < len) {
+ jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2);
+ jb->buf = g_realloc (jb->buf, jb->buflen);
+ jb->pos = jb->buf + off;
+ }
+
+ memcpy (jb->pos, chunk, len);
+ jb->pos += len;
+
+ /* Say not to copy any part of this buffer */
+ return NULL;
+}
+
+void
+json_regexp_fin_cb (memory_pool_t * pool, struct map_cb_data *data)
+{
+ struct regexp_json_buf *jb;
+ int nelts, i, j;
+ json_t *js, *cur_elt, *cur_nm, *it_val;
+ json_error_t je;
+ char *cur_rule, *cur_symbol;
+ double score;
+ struct regexp_module_item *cur_item;
+ GList *cur_networks = NULL;
+ struct dynamic_map_item *cur_nitem;
+ memory_pool_t *new_pool;
+ struct metric *metric;
+
+ if (data->prev_data) {
+ jb = data->prev_data;
+ /* Clean prev data */
+ if (jb->buf) {
+ g_free (jb->buf);
+ }
+ g_free (jb);
+ }
+
+ /* Now parse json */
+ if (data->cur_data) {
+ jb = data->cur_data;
+ }
+ else {
+ msg_err ("no data read");
+ return;
+ }
+ if (jb->buf == NULL) {
+ msg_err ("no data read");
+ return;
+ }
+ /* NULL terminate current buf */
+ *jb->pos = '\0';
+
+ js = json_loads (jb->buf, &je);
+ if (!js) {
+ msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line);
+ return;
+ }
+
+ if (!json_is_array (js)) {
+ json_decref (js);
+ msg_err ("loaded json is not an array");
+ return;
+ }
+
+ new_pool = memory_pool_new (memory_pool_get_size ());
+ metric = g_hash_table_lookup (jb->cfg->metrics, regexp_module_ctx->metric);
+ if (metric == NULL) {
+ msg_err ("cannot find metric definition %s", regexp_module_ctx->metric);
+ return;
+ }
+
+ remove_dynamic_rules (metric->cache);
+ if (regexp_module_ctx->dynamic_pool != NULL) {
+ memory_pool_delete (regexp_module_ctx->dynamic_pool);
+ }
+ regexp_module_ctx->dynamic_pool = new_pool;
+
+ nelts = json_array_size (js);
+ for (i = 0; i < nelts; i++) {
+ cur_networks = NULL;
+ cur_rule = NULL;
+
+ cur_elt = json_array_get (js, i);
+ if (!cur_elt || !json_is_object (cur_elt)) {
+ msg_err ("loaded json is not an object");
+ continue;
+ }
+ /* Factor param */
+ cur_nm = json_object_get (cur_elt, "factor");
+ if (cur_nm == NULL || !json_is_number (cur_nm)) {
+ msg_err ("factor is not a number or not exists, but is required");
+ continue;
+ }
+ score = json_number_value (cur_nm);
+ /* Symbol param */
+ cur_nm = json_object_get (cur_elt, "symbol");
+ if (cur_nm == NULL || !json_is_string (cur_nm)) {
+ msg_err ("symbol is not a string or not exists, but is required");
+ continue;
+ }
+ cur_symbol = memory_pool_strdup (new_pool, json_string_value (cur_nm));
+ /* Now check other settings */
+ /* Rule */
+ cur_nm = json_object_get (cur_elt, "rule");
+ if (cur_nm != NULL && json_is_string (cur_nm)) {
+ cur_rule = memory_pool_strdup (new_pool, json_string_value (cur_nm));
+ }
+ /* Networks array */
+ cur_nm = json_object_get (cur_elt, "networks");
+ if (cur_nm != NULL && json_is_array (cur_nm)) {
+ for (j = 0; j < json_array_size (cur_nm); j++) {
+ it_val = json_array_get (cur_nm, i);
+ if (it_val && json_is_string (it_val)) {
+ cur_nitem = memory_pool_alloc (new_pool, sizeof (struct dynamic_map_item));
+ if (parse_regexp_ipmask (json_string_value (it_val), cur_nitem)) {
+ cur_networks = g_list_prepend (cur_networks, cur_nitem);
+ }
+ }
+ }
+ }
+ if (cur_rule) {
+ /* Dynamic rule has rule option */
+ cur_item = memory_pool_alloc0 (new_pool, sizeof (struct regexp_module_item));
+ cur_item->symbol = cur_symbol;
+ if (read_regexp_expression (new_pool, cur_item, cur_symbol, cur_rule, jb->cfg->raw_mode)) {
+ register_dynamic_symbol (new_pool, &metric->cache, cur_symbol, score, process_regexp_item, cur_item, cur_networks);
+ }
+ else {
+ msg_warn ("cannot parse dynamic rule");
+ }
+ }
+ else {
+ /* Just rule that is allways true (for whitelisting for example) */
+ register_dynamic_symbol (new_pool, &metric->cache, cur_symbol, score, regexp_dynamic_insert_result, cur_symbol, cur_networks);
+ }
+ if (cur_networks) {
+ g_list_free (cur_networks);
+ }
+ }
+ json_decref (js);
+}
+
+/* Init function */
+int
+regexp_module_init (struct config_file *cfg, struct module_ctx **ctx)
+{
+ regexp_module_ctx = g_malloc (sizeof (struct regexp_ctx));
+
+ regexp_module_ctx->filter = regexp_common_filter;
+ regexp_module_ctx->regexp_pool = memory_pool_new (memory_pool_get_size ());
+ regexp_module_ctx->dynamic_pool = NULL;
+ regexp_module_ctx->autolearn_symbols = g_hash_table_new (g_str_hash, g_str_equal);
+
+ *ctx = (struct module_ctx *)regexp_module_ctx;
+ register_expression_function ("regexp_match_number", rspamd_regexp_match_number, NULL);
+ register_expression_function ("raw_header_exists", rspamd_raw_header_exists, NULL);
+ register_expression_function ("check_smtp_data", rspamd_check_smtp_data, NULL);
+
+ return 0;
+}
+
+
/*
* Parse string in format:
* SYMBOL:statfile:weight
@@ -157,6 +425,7 @@ regexp_module_config (struct config_file *cfg)
char *value;
int res = TRUE;
double *w;
+ struct regexp_json_buf *jb, **pjb;
if ((value = get_module_opt (cfg, "regexp", "metric")) != NULL) {
regexp_module_ctx->metric = memory_pool_strdup (regexp_module_ctx->regexp_pool, value);
@@ -170,6 +439,16 @@ regexp_module_config (struct config_file *cfg)
else {
regexp_module_ctx->statfile_prefix = DEFAULT_STATFILE_PREFIX;
}
+ if ((value = get_module_opt (cfg, "regexp", "dynamic_rules")) != NULL) {
+ jb = g_malloc (sizeof (struct regexp_json_buf));
+ pjb = g_malloc (sizeof (struct regexp_json_buf *));
+ jb->buf = NULL;
+ jb->cfg = cfg;
+ *pjb = jb;
+ if (!add_map (value, json_regexp_read_cb, json_regexp_fin_cb, (void **)pjb)) {
+ msg_err ("cannot add map %s", value);
+ }
+ }
metric = g_hash_table_lookup (cfg->metrics, regexp_module_ctx->metric);
if (metric == NULL) {
@@ -190,7 +469,7 @@ regexp_module_config (struct config_file *cfg)
cur_item = memory_pool_alloc0 (regexp_module_ctx->regexp_pool, sizeof (struct regexp_module_item));
cur_item->symbol = cur->param;
if (cur->is_lua && cur->lua_type == LUA_VAR_STRING) {
- if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->actual_data, cfg)) {
+ if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->actual_data, cfg->raw_mode)) {
res = FALSE;
}
}
@@ -198,7 +477,7 @@ regexp_module_config (struct config_file *cfg)
cur_item->lua_function = cur->actual_data;
}
else if (! cur->is_lua) {
- if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->value, cfg)) {
+ if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->value, cfg->raw_mode)) {
res = FALSE;
}
}
diff --git a/src/symbols_cache.c b/src/symbols_cache.c
index a632152e4..07aabf6d4 100644
--- a/src/symbols_cache.c
+++ b/src/symbols_cache.c
@@ -278,15 +278,16 @@ register_symbol (struct symbols_cache **cache, const char *name, double weight,
}
void
-register_dynamic_symbol (struct symbols_cache **cache, const char *name, double weight, symbol_func_t func,
- gpointer user_data, struct dynamic_map_item *networks, gsize network_count)
+register_dynamic_symbol (memory_pool_t *dynamic_pool, struct symbols_cache **cache,
+ const char *name, double weight, symbol_func_t func,
+ gpointer user_data, GList *networks)
{
struct cache_item *item = NULL;
struct symbols_cache *pcache = *cache;
- GList **target, *t;
- gsize i;
+ GList **target, *t, *cur;
uintptr_t r;
uint32_t mask = 0xFFFFFFFF;
+ struct dynamic_map_item *it;
if (*cache == NULL) {
pcache = g_new0 (struct symbols_cache, 1);
@@ -294,11 +295,8 @@ register_dynamic_symbol (struct symbols_cache **cache, const char *name, double
pcache->static_pool = memory_pool_new (memory_pool_get_size ());
}
- if (pcache->dynamic_pool == NULL) {
- pcache->dynamic_pool = memory_pool_new (memory_pool_get_size ());
- }
- item = memory_pool_alloc0 (pcache->dynamic_pool, sizeof (struct cache_item));
- item->s = memory_pool_alloc (pcache->dynamic_pool, sizeof (struct saved_cache_item));
+ item = memory_pool_alloc0 (dynamic_pool, sizeof (struct cache_item));
+ item->s = memory_pool_alloc (dynamic_pool, sizeof (struct saved_cache_item));
g_strlcpy (item->s->symbol, name, sizeof (item->s->symbol));
item->func = func;
item->user_data = user_data;
@@ -309,39 +307,41 @@ register_dynamic_symbol (struct symbols_cache **cache, const char *name, double
msg_debug ("used items: %d, added symbol: %s", (*cache)->used_items, name);
set_counter (item->s->symbol, 0);
- if (network_count == 0 || networks == NULL) {
+ if (networks == NULL) {
target = &pcache->dynamic_items;
}
else {
if (pcache->dynamic_map == NULL) {
pcache->dynamic_map = radix_tree_create ();
}
- for (i = 0; i < network_count; i ++) {
- mask = mask << (32 - networks[i].mask);
- r = ntohl (networks[i].addr.s_addr & mask);
+ cur = networks;
+ while (cur) {
+ it = cur->data;
+ mask = mask << (32 - it->mask);
+ r = ntohl (it->addr.s_addr & mask);
if ((r = radix32tree_find (pcache->dynamic_map, r)) != RADIX_NO_VALUE) {
t = (GList *)((gpointer)r);
target = &t;
}
else {
t = g_list_prepend (NULL, item);
- memory_pool_add_destructor (pcache->dynamic_pool, (pool_destruct_func)g_list_free, t);
- r = radix32tree_insert (pcache->dynamic_map, ntohl (networks[i].addr.s_addr), mask, (uintptr_t)t);
+ memory_pool_add_destructor (dynamic_pool, (pool_destruct_func)g_list_free, t);
+ r = radix32tree_insert (pcache->dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t);
if (r == -1) {
- msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (networks[i].addr), mask);
+ msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (it->addr), mask);
}
else if (r == 1) {
- msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (networks[i].addr), mask);
+ msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (it->addr), mask);
}
- return;
}
+ cur = g_list_next (cur);
}
}
*target = g_list_prepend (*target, item);
}
void
-remove_dynamic_items (struct symbols_cache *cache)
+remove_dynamic_rules (struct symbols_cache *cache)
{
if (cache->dynamic_items) {
g_list_free (cache->dynamic_items);
@@ -351,10 +351,6 @@ remove_dynamic_items (struct symbols_cache *cache)
if (cache->dynamic_map) {
radix_tree_free (cache->dynamic_map);
}
-
- /* Do magic */
- memory_pool_delete (cache->dynamic_pool);
- cache->dynamic_pool = NULL;
}
static void
@@ -380,9 +376,6 @@ free_cache (gpointer arg)
}
memory_pool_delete (cache->static_pool);
- if (cache->dynamic_pool) {
- memory_pool_delete (cache->dynamic_pool);
- }
g_free (cache);
}
diff --git a/src/symbols_cache.h b/src/symbols_cache.h
index af92b4891..dfd5672ad 100644
--- a/src/symbols_cache.h
+++ b/src/symbols_cache.h
@@ -51,7 +51,6 @@ struct symbols_cache {
GList *dynamic_items;
memory_pool_t *static_pool;
- memory_pool_t *dynamic_pool;
guint cur_items;
guint used_items;
@@ -79,8 +78,9 @@ void register_symbol (struct symbols_cache **cache, const char *name, double wei
* @param func pointer to handler
* @param user_data pointer to user_data
*/
-void register_dynamic_symbol (struct symbols_cache **cache, const char *name, double weight, symbol_func_t func,
- gpointer user_data, struct dynamic_map_item *networks, gsize network_count);
+void register_dynamic_symbol (memory_pool_t *pool, struct symbols_cache **cache, const char *name,
+ double weight, symbol_func_t func,
+ gpointer user_data, GList *networks);
/**
* Call function for cached symbol using saved callback