diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2010-06-01 19:40:59 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2010-06-01 19:40:59 +0400 |
commit | 6be6bdef362e1120dedfe2f45fdb3a74e092f3aa (patch) | |
tree | 78e2662b887a813d4c15d74a4229ca971b6c4d96 | |
parent | 8c89dee54e1be32e8191cc0e8cd6b680bafadae1 (diff) | |
download | rspamd-6be6bdef362e1120dedfe2f45fdb3a74e092f3aa.tar.gz rspamd-6be6bdef362e1120dedfe2f45fdb3a74e092f3aa.zip |
* Add experimental support of dynamic rules to regexp module
-rw-r--r-- | CMakeLists.txt | 5 | ||||
-rw-r--r-- | src/plugins/regexp.c | 311 | ||||
-rw-r--r-- | src/symbols_cache.c | 45 | ||||
-rw-r--r-- | src/symbols_cache.h | 6 |
4 files changed, 317 insertions, 50 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 681bb252d..75cd5083a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -133,11 +133,6 @@ ELSE(NOT GMIME2_FOUND) LINK_DIRECTORIES(${GMIME2_LIBRARY_DIRS}) ENDIF(NOT GMIME2_FOUND) - -IF(NOT GLIB2_FOUND OR (NOT GMIME2_FOUND AND NOT GMIME24_FOUND)) - MESSAGE(FATAL_ERROR "Error: gmime2 and glib2 are required for rspamd") -ENDIF(NOT GLIB2_FOUND OR (NOT GMIME2_FOUND AND NOT GMIME24_FOUND)) - # Make from ; separated list normal space separated list # Glib2 FOREACH(arg ${GLIB2_CFLAGS}) diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index 33aa60f29..636b26c03 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -34,10 +34,12 @@ #include "../message.h" #include "../modules.h" #include "../cfg_file.h" +#include "../map.h" #include "../util.h" #include "../expressions.h" #include "../view.h" #include "../lua/lua_common.h" +#include "../json/jansson.h" #define DEFAULT_STATFILE_PREFIX "./" @@ -61,6 +63,14 @@ struct regexp_ctx { char *statfile_prefix; memory_pool_t *regexp_pool; + memory_pool_t *dynamic_pool; +}; + +struct regexp_json_buf { + u_char *buf; + u_char *pos; + size_t buflen; + struct config_file *cfg; }; static struct regexp_ctx *regexp_module_ctx = NULL; @@ -72,29 +82,96 @@ static gboolean rspamd_check_smtp_data (struct worker_task *task static void process_regexp_item (struct worker_task *task, void *user_data); -int -regexp_module_init (struct config_file *cfg, struct module_ctx **ctx) +static void +regexp_dynamic_insert_result (struct worker_task *task, void *user_data) { - regexp_module_ctx = g_malloc (sizeof (struct regexp_ctx)); + char *symbol = user_data; + + insert_result (task, regexp_module_ctx->metric, symbol, 1, NULL); +} - regexp_module_ctx->filter = regexp_common_filter; - regexp_module_ctx->regexp_pool = memory_pool_new (memory_pool_get_size ()); - regexp_module_ctx->autolearn_symbols = g_hash_table_new (g_str_hash, g_str_equal); +static gboolean +parse_regexp_ipmask (const char *begin, struct dynamic_map_item *addr) +{ + const char *pos; + char ip_buf[sizeof ("255.255.255.255")], mask_buf[3], *p; + int state = 0, dots = 0; + + bzero (ip_buf, sizeof (ip_buf)); + bzero (mask_buf, sizeof (mask_buf)); + pos = begin; + p = ip_buf; + + while (*pos) { + switch (state) { + case 0: + state = 1; + p = ip_buf; + dots = 0; + break; + case 1: + /* Begin parse ip */ + if (p - ip_buf >= sizeof (ip_buf) || dots > 3) { + return FALSE; + } + if (g_ascii_isdigit (*pos)) { + *p ++ = *pos ++; + } + else if (*pos == '.') { + *p ++ = *pos ++; + dots ++; + } + else if (*pos == '/') { + pos ++; + p = mask_buf; + state = 2; + } + else { + /* Invalid character */ + return FALSE; + } + break; + case 2: + /* Parse mask */ + if (p - mask_buf > 2) { + return FALSE; + } + if (g_ascii_isdigit (*pos)) { + *p ++ = *pos ++; + } + else { + return FALSE; + } + break; + } + } - *ctx = (struct module_ctx *)regexp_module_ctx; - register_expression_function ("regexp_match_number", rspamd_regexp_match_number, NULL); - register_expression_function ("raw_header_exists", rspamd_raw_header_exists, NULL); - register_expression_function ("check_smtp_data", rspamd_check_smtp_data, NULL); + if (!inet_aton (ip_buf, &addr->addr)) { + return FALSE; + } + if (state == 2) { + /* Also parse mask */ + addr->mask = (mask_buf[0] - '0') * 10 + mask_buf[1] - '0'; + if (addr->mask > 32) { + msg_info ("bad ipmask value: '%s'", begin); + return FALSE; + } + } + else { + addr->mask = 32; + } + + return TRUE; - return 0; } +/* Process regexp expression */ static gboolean -read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain, char *symbol, char *line, struct config_file *cfg) +read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain, char *symbol, char *line, gboolean raw_mode) { struct expression *e, *cur; - e = parse_expression (regexp_module_ctx->regexp_pool, line); + e = parse_expression (pool, line); if (e == NULL) { msg_warn ("%s = \"%s\" is invalid regexp expression", symbol, line); return FALSE; @@ -103,7 +180,7 @@ read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain, cur = e; while (cur) { if (cur->type == EXPR_REGEXP) { - cur->content.operand = parse_regexp (pool, cur->content.operand, cfg->raw_mode); + cur->content.operand = parse_regexp (pool, cur->content.operand, raw_mode); if (cur->content.operand == NULL) { msg_warn ("cannot parse regexp, skip expression %s = \"%s\"", symbol, line); return FALSE; @@ -116,6 +193,197 @@ read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain, return TRUE; } + +/* Callbacks for reading json dynamic rules */ +u_char * +json_regexp_read_cb (memory_pool_t * pool, u_char * chunk, size_t len, struct map_cb_data *data) +{ + struct regexp_json_buf *jb; + size_t free, off; + + if (data->cur_data == NULL) { + jb = g_malloc (sizeof (struct regexp_json_buf)); + jb->cfg = ((struct regexp_json_buf *)data->prev_data)->cfg; + jb->buf = NULL; + jb->pos = NULL; + data->cur_data = jb; + } + else { + jb = data->cur_data; + } + + if (jb->buf == NULL) { + /* Allocate memory for buffer */ + jb->buflen = len * 2; + jb->buf = g_malloc (jb->buflen); + jb->pos = jb->buf; + } + + off = jb->pos - jb->buf; + free = jb->buflen - off; + + if (free < len) { + jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2); + jb->buf = g_realloc (jb->buf, jb->buflen); + jb->pos = jb->buf + off; + } + + memcpy (jb->pos, chunk, len); + jb->pos += len; + + /* Say not to copy any part of this buffer */ + return NULL; +} + +void +json_regexp_fin_cb (memory_pool_t * pool, struct map_cb_data *data) +{ + struct regexp_json_buf *jb; + int nelts, i, j; + json_t *js, *cur_elt, *cur_nm, *it_val; + json_error_t je; + char *cur_rule, *cur_symbol; + double score; + struct regexp_module_item *cur_item; + GList *cur_networks = NULL; + struct dynamic_map_item *cur_nitem; + memory_pool_t *new_pool; + struct metric *metric; + + if (data->prev_data) { + jb = data->prev_data; + /* Clean prev data */ + if (jb->buf) { + g_free (jb->buf); + } + g_free (jb); + } + + /* Now parse json */ + if (data->cur_data) { + jb = data->cur_data; + } + else { + msg_err ("no data read"); + return; + } + if (jb->buf == NULL) { + msg_err ("no data read"); + return; + } + /* NULL terminate current buf */ + *jb->pos = '\0'; + + js = json_loads (jb->buf, &je); + if (!js) { + msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line); + return; + } + + if (!json_is_array (js)) { + json_decref (js); + msg_err ("loaded json is not an array"); + return; + } + + new_pool = memory_pool_new (memory_pool_get_size ()); + metric = g_hash_table_lookup (jb->cfg->metrics, regexp_module_ctx->metric); + if (metric == NULL) { + msg_err ("cannot find metric definition %s", regexp_module_ctx->metric); + return; + } + + remove_dynamic_rules (metric->cache); + if (regexp_module_ctx->dynamic_pool != NULL) { + memory_pool_delete (regexp_module_ctx->dynamic_pool); + } + regexp_module_ctx->dynamic_pool = new_pool; + + nelts = json_array_size (js); + for (i = 0; i < nelts; i++) { + cur_networks = NULL; + cur_rule = NULL; + + cur_elt = json_array_get (js, i); + if (!cur_elt || !json_is_object (cur_elt)) { + msg_err ("loaded json is not an object"); + continue; + } + /* Factor param */ + cur_nm = json_object_get (cur_elt, "factor"); + if (cur_nm == NULL || !json_is_number (cur_nm)) { + msg_err ("factor is not a number or not exists, but is required"); + continue; + } + score = json_number_value (cur_nm); + /* Symbol param */ + cur_nm = json_object_get (cur_elt, "symbol"); + if (cur_nm == NULL || !json_is_string (cur_nm)) { + msg_err ("symbol is not a string or not exists, but is required"); + continue; + } + cur_symbol = memory_pool_strdup (new_pool, json_string_value (cur_nm)); + /* Now check other settings */ + /* Rule */ + cur_nm = json_object_get (cur_elt, "rule"); + if (cur_nm != NULL && json_is_string (cur_nm)) { + cur_rule = memory_pool_strdup (new_pool, json_string_value (cur_nm)); + } + /* Networks array */ + cur_nm = json_object_get (cur_elt, "networks"); + if (cur_nm != NULL && json_is_array (cur_nm)) { + for (j = 0; j < json_array_size (cur_nm); j++) { + it_val = json_array_get (cur_nm, i); + if (it_val && json_is_string (it_val)) { + cur_nitem = memory_pool_alloc (new_pool, sizeof (struct dynamic_map_item)); + if (parse_regexp_ipmask (json_string_value (it_val), cur_nitem)) { + cur_networks = g_list_prepend (cur_networks, cur_nitem); + } + } + } + } + if (cur_rule) { + /* Dynamic rule has rule option */ + cur_item = memory_pool_alloc0 (new_pool, sizeof (struct regexp_module_item)); + cur_item->symbol = cur_symbol; + if (read_regexp_expression (new_pool, cur_item, cur_symbol, cur_rule, jb->cfg->raw_mode)) { + register_dynamic_symbol (new_pool, &metric->cache, cur_symbol, score, process_regexp_item, cur_item, cur_networks); + } + else { + msg_warn ("cannot parse dynamic rule"); + } + } + else { + /* Just rule that is allways true (for whitelisting for example) */ + register_dynamic_symbol (new_pool, &metric->cache, cur_symbol, score, regexp_dynamic_insert_result, cur_symbol, cur_networks); + } + if (cur_networks) { + g_list_free (cur_networks); + } + } + json_decref (js); +} + +/* Init function */ +int +regexp_module_init (struct config_file *cfg, struct module_ctx **ctx) +{ + regexp_module_ctx = g_malloc (sizeof (struct regexp_ctx)); + + regexp_module_ctx->filter = regexp_common_filter; + regexp_module_ctx->regexp_pool = memory_pool_new (memory_pool_get_size ()); + regexp_module_ctx->dynamic_pool = NULL; + regexp_module_ctx->autolearn_symbols = g_hash_table_new (g_str_hash, g_str_equal); + + *ctx = (struct module_ctx *)regexp_module_ctx; + register_expression_function ("regexp_match_number", rspamd_regexp_match_number, NULL); + register_expression_function ("raw_header_exists", rspamd_raw_header_exists, NULL); + register_expression_function ("check_smtp_data", rspamd_check_smtp_data, NULL); + + return 0; +} + + /* * Parse string in format: * SYMBOL:statfile:weight @@ -157,6 +425,7 @@ regexp_module_config (struct config_file *cfg) char *value; int res = TRUE; double *w; + struct regexp_json_buf *jb, **pjb; if ((value = get_module_opt (cfg, "regexp", "metric")) != NULL) { regexp_module_ctx->metric = memory_pool_strdup (regexp_module_ctx->regexp_pool, value); @@ -170,6 +439,16 @@ regexp_module_config (struct config_file *cfg) else { regexp_module_ctx->statfile_prefix = DEFAULT_STATFILE_PREFIX; } + if ((value = get_module_opt (cfg, "regexp", "dynamic_rules")) != NULL) { + jb = g_malloc (sizeof (struct regexp_json_buf)); + pjb = g_malloc (sizeof (struct regexp_json_buf *)); + jb->buf = NULL; + jb->cfg = cfg; + *pjb = jb; + if (!add_map (value, json_regexp_read_cb, json_regexp_fin_cb, (void **)pjb)) { + msg_err ("cannot add map %s", value); + } + } metric = g_hash_table_lookup (cfg->metrics, regexp_module_ctx->metric); if (metric == NULL) { @@ -190,7 +469,7 @@ regexp_module_config (struct config_file *cfg) cur_item = memory_pool_alloc0 (regexp_module_ctx->regexp_pool, sizeof (struct regexp_module_item)); cur_item->symbol = cur->param; if (cur->is_lua && cur->lua_type == LUA_VAR_STRING) { - if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->actual_data, cfg)) { + if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->actual_data, cfg->raw_mode)) { res = FALSE; } } @@ -198,7 +477,7 @@ regexp_module_config (struct config_file *cfg) cur_item->lua_function = cur->actual_data; } else if (! cur->is_lua) { - if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->value, cfg)) { + if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->value, cfg->raw_mode)) { res = FALSE; } } diff --git a/src/symbols_cache.c b/src/symbols_cache.c index a632152e4..07aabf6d4 100644 --- a/src/symbols_cache.c +++ b/src/symbols_cache.c @@ -278,15 +278,16 @@ register_symbol (struct symbols_cache **cache, const char *name, double weight, } void -register_dynamic_symbol (struct symbols_cache **cache, const char *name, double weight, symbol_func_t func, - gpointer user_data, struct dynamic_map_item *networks, gsize network_count) +register_dynamic_symbol (memory_pool_t *dynamic_pool, struct symbols_cache **cache, + const char *name, double weight, symbol_func_t func, + gpointer user_data, GList *networks) { struct cache_item *item = NULL; struct symbols_cache *pcache = *cache; - GList **target, *t; - gsize i; + GList **target, *t, *cur; uintptr_t r; uint32_t mask = 0xFFFFFFFF; + struct dynamic_map_item *it; if (*cache == NULL) { pcache = g_new0 (struct symbols_cache, 1); @@ -294,11 +295,8 @@ register_dynamic_symbol (struct symbols_cache **cache, const char *name, double pcache->static_pool = memory_pool_new (memory_pool_get_size ()); } - if (pcache->dynamic_pool == NULL) { - pcache->dynamic_pool = memory_pool_new (memory_pool_get_size ()); - } - item = memory_pool_alloc0 (pcache->dynamic_pool, sizeof (struct cache_item)); - item->s = memory_pool_alloc (pcache->dynamic_pool, sizeof (struct saved_cache_item)); + item = memory_pool_alloc0 (dynamic_pool, sizeof (struct cache_item)); + item->s = memory_pool_alloc (dynamic_pool, sizeof (struct saved_cache_item)); g_strlcpy (item->s->symbol, name, sizeof (item->s->symbol)); item->func = func; item->user_data = user_data; @@ -309,39 +307,41 @@ register_dynamic_symbol (struct symbols_cache **cache, const char *name, double msg_debug ("used items: %d, added symbol: %s", (*cache)->used_items, name); set_counter (item->s->symbol, 0); - if (network_count == 0 || networks == NULL) { + if (networks == NULL) { target = &pcache->dynamic_items; } else { if (pcache->dynamic_map == NULL) { pcache->dynamic_map = radix_tree_create (); } - for (i = 0; i < network_count; i ++) { - mask = mask << (32 - networks[i].mask); - r = ntohl (networks[i].addr.s_addr & mask); + cur = networks; + while (cur) { + it = cur->data; + mask = mask << (32 - it->mask); + r = ntohl (it->addr.s_addr & mask); if ((r = radix32tree_find (pcache->dynamic_map, r)) != RADIX_NO_VALUE) { t = (GList *)((gpointer)r); target = &t; } else { t = g_list_prepend (NULL, item); - memory_pool_add_destructor (pcache->dynamic_pool, (pool_destruct_func)g_list_free, t); - r = radix32tree_insert (pcache->dynamic_map, ntohl (networks[i].addr.s_addr), mask, (uintptr_t)t); + memory_pool_add_destructor (dynamic_pool, (pool_destruct_func)g_list_free, t); + r = radix32tree_insert (pcache->dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t); if (r == -1) { - msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (networks[i].addr), mask); + msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (it->addr), mask); } else if (r == 1) { - msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (networks[i].addr), mask); + msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (it->addr), mask); } - return; } + cur = g_list_next (cur); } } *target = g_list_prepend (*target, item); } void -remove_dynamic_items (struct symbols_cache *cache) +remove_dynamic_rules (struct symbols_cache *cache) { if (cache->dynamic_items) { g_list_free (cache->dynamic_items); @@ -351,10 +351,6 @@ remove_dynamic_items (struct symbols_cache *cache) if (cache->dynamic_map) { radix_tree_free (cache->dynamic_map); } - - /* Do magic */ - memory_pool_delete (cache->dynamic_pool); - cache->dynamic_pool = NULL; } static void @@ -380,9 +376,6 @@ free_cache (gpointer arg) } memory_pool_delete (cache->static_pool); - if (cache->dynamic_pool) { - memory_pool_delete (cache->dynamic_pool); - } g_free (cache); } diff --git a/src/symbols_cache.h b/src/symbols_cache.h index af92b4891..dfd5672ad 100644 --- a/src/symbols_cache.h +++ b/src/symbols_cache.h @@ -51,7 +51,6 @@ struct symbols_cache { GList *dynamic_items; memory_pool_t *static_pool; - memory_pool_t *dynamic_pool; guint cur_items; guint used_items; @@ -79,8 +78,9 @@ void register_symbol (struct symbols_cache **cache, const char *name, double wei * @param func pointer to handler * @param user_data pointer to user_data */ -void register_dynamic_symbol (struct symbols_cache **cache, const char *name, double weight, symbol_func_t func, - gpointer user_data, struct dynamic_map_item *networks, gsize network_count); +void register_dynamic_symbol (memory_pool_t *pool, struct symbols_cache **cache, const char *name, + double weight, symbol_func_t func, + gpointer user_data, GList *networks); /** * Call function for cached symbol using saved callback |