]> source.dussan.org Git - rspamd.git/commitdiff
* Add experimental support of dynamic rules to regexp module
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Tue, 1 Jun 2010 15:40:59 +0000 (19:40 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Tue, 1 Jun 2010 15:40:59 +0000 (19:40 +0400)
CMakeLists.txt
src/plugins/regexp.c
src/symbols_cache.c
src/symbols_cache.h

index 681bb252d8fbfa66ad48bb07601acb1f3adde285..75cd5083ad49e1b3bebce3ba616129bcc8a8bf66 100644 (file)
@@ -133,11 +133,6 @@ ELSE(NOT GMIME2_FOUND)
        LINK_DIRECTORIES(${GMIME2_LIBRARY_DIRS})
 ENDIF(NOT GMIME2_FOUND)
 
-
-IF(NOT GLIB2_FOUND OR (NOT GMIME2_FOUND AND NOT GMIME24_FOUND))
-       MESSAGE(FATAL_ERROR "Error: gmime2 and glib2 are required for rspamd")
-ENDIF(NOT GLIB2_FOUND OR (NOT GMIME2_FOUND AND NOT GMIME24_FOUND))
-
 # Make from ; separated list normal space separated list
 # Glib2
 FOREACH(arg ${GLIB2_CFLAGS})
index 33aa60f298f64de44f6447951113f70182755399..636b26c03866bda3868a7cee2ead43d7f28d5b4d 100644 (file)
 #include "../message.h"
 #include "../modules.h"
 #include "../cfg_file.h"
+#include "../map.h"
 #include "../util.h"
 #include "../expressions.h"
 #include "../view.h"
 #include "../lua/lua_common.h"
+#include "../json/jansson.h"
 
 #define DEFAULT_STATFILE_PREFIX "./"
 
@@ -61,6 +63,14 @@ struct regexp_ctx {
        char                           *statfile_prefix;
 
        memory_pool_t                  *regexp_pool;
+       memory_pool_t                  *dynamic_pool;
+};
+
+struct regexp_json_buf {
+       u_char                         *buf;
+       u_char                         *pos;
+       size_t                          buflen;
+       struct config_file             *cfg;
 };
 
 static struct regexp_ctx       *regexp_module_ctx = NULL;
@@ -72,29 +82,96 @@ static gboolean                 rspamd_check_smtp_data (struct worker_task *task
 static void                     process_regexp_item (struct worker_task *task, void *user_data);
 
 
-int
-regexp_module_init (struct config_file *cfg, struct module_ctx **ctx)
+static void 
+regexp_dynamic_insert_result (struct worker_task *task, void *user_data)
 {
-       regexp_module_ctx = g_malloc (sizeof (struct regexp_ctx));
+       char                           *symbol = user_data;
+               
+       insert_result (task, regexp_module_ctx->metric, symbol, 1, NULL);
+}
 
-       regexp_module_ctx->filter = regexp_common_filter;
-       regexp_module_ctx->regexp_pool = memory_pool_new (memory_pool_get_size ());
-       regexp_module_ctx->autolearn_symbols = g_hash_table_new (g_str_hash, g_str_equal);
+static gboolean
+parse_regexp_ipmask (const char *begin, struct dynamic_map_item *addr)
+{
+       const char *pos;
+       char ip_buf[sizeof ("255.255.255.255")], mask_buf[3], *p;
+       int state = 0, dots = 0;
+       
+       bzero (ip_buf, sizeof (ip_buf));
+       bzero (mask_buf, sizeof (mask_buf));
+       pos = begin;
+       p = ip_buf;
+
+       while (*pos) {
+               switch (state) {
+                       case 0:
+                               state = 1;
+                               p = ip_buf;
+                               dots = 0;
+                               break;
+                       case 1:
+                               /* Begin parse ip */
+                               if (p - ip_buf >= sizeof (ip_buf) || dots > 3) {
+                                       return FALSE;
+                               }
+                               if (g_ascii_isdigit (*pos)) {
+                                       *p ++ = *pos ++;
+                               }
+                               else if (*pos == '.') {
+                                       *p ++ = *pos ++;
+                                       dots ++;
+                               }
+                               else if (*pos == '/') {
+                                       pos ++;
+                                       p = mask_buf;
+                                       state = 2;
+                               }
+                               else {
+                                       /* Invalid character */
+                                       return FALSE;
+                               }
+                               break;
+                       case 2:
+                               /* Parse mask */
+                               if (p - mask_buf > 2) {
+                                       return FALSE;
+                               }
+                               if (g_ascii_isdigit (*pos)) {
+                                       *p ++ = *pos ++;
+                               }
+                               else {
+                                       return FALSE;
+                               }
+                               break;
+               }
+       }
 
-       *ctx = (struct module_ctx *)regexp_module_ctx;
-       register_expression_function ("regexp_match_number", rspamd_regexp_match_number, NULL);
-       register_expression_function ("raw_header_exists", rspamd_raw_header_exists, NULL);
-       register_expression_function ("check_smtp_data", rspamd_check_smtp_data, NULL);
+       if (!inet_aton (ip_buf, &addr->addr)) {
+               return FALSE;
+       }
+       if (state == 2) {
+               /* Also parse mask */
+               addr->mask = (mask_buf[0] - '0') * 10 + mask_buf[1] - '0';
+               if (addr->mask > 32) {
+                       msg_info ("bad ipmask value: '%s'", begin);
+                       return FALSE;
+               }
+       }
+       else {
+               addr->mask = 32;
+       }
+
+       return TRUE;
 
-       return 0;
 }
 
+/* Process regexp expression */
 static                          gboolean
-read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain, char *symbol, char *line, struct config_file *cfg)
+read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain, char *symbol, char *line, gboolean raw_mode)
 {
        struct expression              *e, *cur;
 
-       e = parse_expression (regexp_module_ctx->regexp_pool, line);
+       e = parse_expression (pool, line);
        if (e == NULL) {
                msg_warn ("%s = \"%s\" is invalid regexp expression", symbol, line);
                return FALSE;
@@ -103,7 +180,7 @@ read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain,
        cur = e;
        while (cur) {
                if (cur->type == EXPR_REGEXP) {
-                       cur->content.operand = parse_regexp (pool, cur->content.operand, cfg->raw_mode);
+                       cur->content.operand = parse_regexp (pool, cur->content.operand, raw_mode);
                        if (cur->content.operand == NULL) {
                                msg_warn ("cannot parse regexp, skip expression %s = \"%s\"", symbol, line);
                                return FALSE;
@@ -116,6 +193,197 @@ read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain,
        return TRUE;
 }
 
+
+/* Callbacks for reading json dynamic rules */
+u_char                         *
+json_regexp_read_cb (memory_pool_t * pool, u_char * chunk, size_t len, struct map_cb_data *data)
+{
+       struct regexp_json_buf                *jb;
+       size_t                          free, off;
+
+       if (data->cur_data == NULL) {
+               jb = g_malloc (sizeof (struct regexp_json_buf));
+               jb->cfg = ((struct regexp_json_buf *)data->prev_data)->cfg;
+               jb->buf = NULL;
+               jb->pos = NULL;
+               data->cur_data = jb;
+       }
+       else {
+               jb = data->cur_data;
+       }
+
+       if (jb->buf == NULL) {
+               /* Allocate memory for buffer */
+               jb->buflen = len * 2;
+               jb->buf = g_malloc (jb->buflen);
+               jb->pos = jb->buf;
+       }
+
+       off = jb->pos - jb->buf;
+       free = jb->buflen - off;
+
+       if (free < len) {
+               jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2);
+               jb->buf = g_realloc (jb->buf, jb->buflen);
+               jb->pos = jb->buf + off;
+       }
+
+       memcpy (jb->pos, chunk, len);
+       jb->pos += len;
+
+       /* Say not to copy any part of this buffer */
+       return NULL;
+}
+
+void
+json_regexp_fin_cb (memory_pool_t * pool, struct map_cb_data *data)
+{
+       struct regexp_json_buf         *jb;
+       int                             nelts, i, j;
+       json_t                         *js, *cur_elt, *cur_nm, *it_val;
+       json_error_t                    je;
+       char                           *cur_rule, *cur_symbol;
+       double                          score;
+       struct regexp_module_item      *cur_item;
+       GList                          *cur_networks = NULL;
+       struct dynamic_map_item        *cur_nitem;
+       memory_pool_t                  *new_pool;
+       struct metric                  *metric;
+
+       if (data->prev_data) {
+               jb = data->prev_data;
+               /* Clean prev data */
+               if (jb->buf) {
+                       g_free (jb->buf);
+               }
+               g_free (jb);
+       }
+
+       /* Now parse json */
+       if (data->cur_data) {
+               jb = data->cur_data;
+       }
+       else {
+               msg_err ("no data read");
+               return;
+       }
+       if (jb->buf == NULL) {
+               msg_err ("no data read");
+               return;
+       }
+       /* NULL terminate current buf */
+       *jb->pos = '\0';
+
+       js = json_loads (jb->buf, &je);
+       if (!js) {
+               msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line);
+               return;
+       }
+
+       if (!json_is_array (js)) {
+               json_decref (js);
+               msg_err ("loaded json is not an array");
+               return;
+       }
+       
+       new_pool = memory_pool_new (memory_pool_get_size ());
+       metric = g_hash_table_lookup (jb->cfg->metrics, regexp_module_ctx->metric);
+       if (metric == NULL) {
+               msg_err ("cannot find metric definition %s", regexp_module_ctx->metric);
+               return;
+       }
+               
+       remove_dynamic_rules (metric->cache);
+       if (regexp_module_ctx->dynamic_pool != NULL) {
+               memory_pool_delete (regexp_module_ctx->dynamic_pool);
+       }
+       regexp_module_ctx->dynamic_pool = new_pool;
+
+       nelts = json_array_size (js);
+       for (i = 0; i < nelts; i++) {
+               cur_networks = NULL;
+               cur_rule = NULL;
+
+               cur_elt = json_array_get (js, i);
+               if (!cur_elt || !json_is_object (cur_elt)) {
+                       msg_err ("loaded json is not an object");
+                       continue;
+               }
+               /* Factor param */
+               cur_nm = json_object_get (cur_elt, "factor");
+               if (cur_nm == NULL || !json_is_number (cur_nm)) {
+                       msg_err ("factor is not a number or not exists, but is required");
+                       continue;
+               }
+               score = json_number_value (cur_nm); 
+               /* Symbol param */
+               cur_nm = json_object_get (cur_elt, "symbol");
+               if (cur_nm == NULL || !json_is_string (cur_nm)) {
+                       msg_err ("symbol is not a string or not exists, but is required");
+                       continue;
+               }
+               cur_symbol = memory_pool_strdup (new_pool, json_string_value (cur_nm)); 
+               /* Now check other settings */
+               /* Rule */
+               cur_nm = json_object_get (cur_elt, "rule");
+               if (cur_nm != NULL && json_is_string (cur_nm)) {
+                       cur_rule = memory_pool_strdup (new_pool, json_string_value (cur_nm));
+               }
+               /* Networks array */
+               cur_nm = json_object_get (cur_elt, "networks");
+               if (cur_nm != NULL && json_is_array (cur_nm)) {
+                       for (j = 0; j < json_array_size (cur_nm); j++) {
+                               it_val = json_array_get (cur_nm, i);
+                               if (it_val && json_is_string (it_val)) {
+                                       cur_nitem = memory_pool_alloc (new_pool, sizeof (struct dynamic_map_item));
+                                       if (parse_regexp_ipmask (json_string_value (it_val), cur_nitem)) {
+                                               cur_networks = g_list_prepend (cur_networks, cur_nitem);
+                                       }
+                               }
+                       }
+               }
+               if (cur_rule) {
+                       /* Dynamic rule has rule option */
+                       cur_item = memory_pool_alloc0 (new_pool, sizeof (struct regexp_module_item));
+                       cur_item->symbol = cur_symbol;
+                       if (read_regexp_expression (new_pool, cur_item, cur_symbol, cur_rule, jb->cfg->raw_mode)) {
+                               register_dynamic_symbol (new_pool, &metric->cache, cur_symbol, score, process_regexp_item, cur_item, cur_networks);
+                       }
+                       else {
+                               msg_warn ("cannot parse dynamic rule");
+                       }
+               }
+               else {
+                       /* Just rule that is allways true (for whitelisting for example) */
+                       register_dynamic_symbol (new_pool, &metric->cache, cur_symbol, score, regexp_dynamic_insert_result, cur_symbol, cur_networks);
+               }
+               if (cur_networks) {
+                       g_list_free (cur_networks);
+               }
+       }
+       json_decref (js);
+}
+
+/* Init function */
+int
+regexp_module_init (struct config_file *cfg, struct module_ctx **ctx)
+{
+       regexp_module_ctx = g_malloc (sizeof (struct regexp_ctx));
+
+       regexp_module_ctx->filter = regexp_common_filter;
+       regexp_module_ctx->regexp_pool = memory_pool_new (memory_pool_get_size ());
+       regexp_module_ctx->dynamic_pool = NULL;
+       regexp_module_ctx->autolearn_symbols = g_hash_table_new (g_str_hash, g_str_equal);
+
+       *ctx = (struct module_ctx *)regexp_module_ctx;
+       register_expression_function ("regexp_match_number", rspamd_regexp_match_number, NULL);
+       register_expression_function ("raw_header_exists", rspamd_raw_header_exists, NULL);
+       register_expression_function ("check_smtp_data", rspamd_check_smtp_data, NULL);
+
+       return 0;
+}
+
+
 /* 
  * Parse string in format:
  * SYMBOL:statfile:weight
@@ -157,6 +425,7 @@ regexp_module_config (struct config_file *cfg)
        char                           *value;
        int                             res = TRUE;
        double                         *w;
+       struct regexp_json_buf         *jb, **pjb;
 
        if ((value = get_module_opt (cfg, "regexp", "metric")) != NULL) {
                regexp_module_ctx->metric = memory_pool_strdup (regexp_module_ctx->regexp_pool, value);
@@ -170,6 +439,16 @@ regexp_module_config (struct config_file *cfg)
        else {
                regexp_module_ctx->statfile_prefix = DEFAULT_STATFILE_PREFIX;
        }
+       if ((value = get_module_opt (cfg, "regexp", "dynamic_rules")) != NULL) {
+               jb = g_malloc (sizeof (struct regexp_json_buf));
+               pjb = g_malloc (sizeof (struct regexp_json_buf *));
+               jb->buf = NULL;
+               jb->cfg = cfg;
+               *pjb = jb;
+               if (!add_map (value, json_regexp_read_cb, json_regexp_fin_cb, (void **)pjb)) {
+                       msg_err ("cannot add map %s", value);
+               }
+       }
 
        metric = g_hash_table_lookup (cfg->metrics, regexp_module_ctx->metric);
        if (metric == NULL) {
@@ -190,7 +469,7 @@ regexp_module_config (struct config_file *cfg)
                cur_item = memory_pool_alloc0 (regexp_module_ctx->regexp_pool, sizeof (struct regexp_module_item));
                cur_item->symbol = cur->param;
                if (cur->is_lua && cur->lua_type == LUA_VAR_STRING) {
-                       if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->actual_data, cfg)) {
+                       if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->actual_data, cfg->raw_mode)) {
                                res = FALSE;
                        }
                }
@@ -198,7 +477,7 @@ regexp_module_config (struct config_file *cfg)
                        cur_item->lua_function = cur->actual_data;
                }
                else if (! cur->is_lua) {
-                       if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->value, cfg)) {
+                       if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->value, cfg->raw_mode)) {
                                res = FALSE;
                        }
                }
index a632152e437e35b293ca6edf074e560ba3e24c0f..07aabf6d41332703b0af2dac90cbaeec836f420c 100644 (file)
@@ -278,15 +278,16 @@ register_symbol (struct symbols_cache **cache, const char *name, double weight,
 }
 
 void
-register_dynamic_symbol (struct symbols_cache **cache, const char *name, double weight, symbol_func_t func, 
-               gpointer user_data, struct dynamic_map_item *networks, gsize network_count)
+register_dynamic_symbol (memory_pool_t *dynamic_pool, struct symbols_cache **cache,
+               const char *name, double weight, symbol_func_t func, 
+               gpointer user_data, GList *networks)
 {
        struct cache_item              *item = NULL;
        struct symbols_cache           *pcache = *cache;
-       GList                         **target, *t;
-       gsize                           i;
+       GList                         **target, *t, *cur;
        uintptr_t                       r;
        uint32_t                        mask = 0xFFFFFFFF;
+       struct dynamic_map_item        *it;
 
        if (*cache == NULL) {
                pcache = g_new0 (struct symbols_cache, 1);
@@ -294,11 +295,8 @@ register_dynamic_symbol (struct symbols_cache **cache, const char *name, double
                pcache->static_pool = memory_pool_new (memory_pool_get_size ());
        }
        
-       if (pcache->dynamic_pool == NULL) {
-               pcache->dynamic_pool = memory_pool_new (memory_pool_get_size ());
-       }
-       item = memory_pool_alloc0 (pcache->dynamic_pool, sizeof (struct cache_item));
-       item->s = memory_pool_alloc (pcache->dynamic_pool, sizeof (struct saved_cache_item));
+       item = memory_pool_alloc0 (dynamic_pool, sizeof (struct cache_item));
+       item->s = memory_pool_alloc (dynamic_pool, sizeof (struct saved_cache_item));
        g_strlcpy (item->s->symbol, name, sizeof (item->s->symbol));
        item->func = func;
        item->user_data = user_data;
@@ -309,39 +307,41 @@ register_dynamic_symbol (struct symbols_cache **cache, const char *name, double
        msg_debug ("used items: %d, added symbol: %s", (*cache)->used_items, name);
        set_counter (item->s->symbol, 0);
        
-       if (network_count == 0 || networks == NULL) {
+       if (networks == NULL) {
                target = &pcache->dynamic_items;
        }
        else {
                if (pcache->dynamic_map == NULL) {
                        pcache->dynamic_map = radix_tree_create ();
                }
-               for (i = 0; i < network_count; i ++) {
-                       mask = mask << (32 - networks[i].mask);
-                       r = ntohl (networks[i].addr.s_addr & mask);
+               cur = networks;
+               while (cur) {
+                       it = cur->data;
+                       mask = mask << (32 - it->mask);
+                       r = ntohl (it->addr.s_addr & mask);
                        if ((r = radix32tree_find (pcache->dynamic_map, r)) != RADIX_NO_VALUE) {
                                t = (GList *)((gpointer)r);
                                target = &t;
                        }
                        else {
                                t = g_list_prepend (NULL, item);
-                               memory_pool_add_destructor (pcache->dynamic_pool, (pool_destruct_func)g_list_free, t);
-                               r = radix32tree_insert (pcache->dynamic_map, ntohl (networks[i].addr.s_addr), mask, (uintptr_t)t);
+                               memory_pool_add_destructor (dynamic_pool, (pool_destruct_func)g_list_free, t);
+                               r = radix32tree_insert (pcache->dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t);
                                if (r == -1) {
-                                       msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (networks[i].addr), mask);
+                                       msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (it->addr), mask);
                                }
                                else if (r == 1) {
-                                       msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (networks[i].addr), mask);
+                                       msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (it->addr), mask);
                                }
-                               return;
                        }
+                       cur = g_list_next (cur);
                }
        }
        *target = g_list_prepend (*target, item);
 }
 
 void
-remove_dynamic_items (struct symbols_cache *cache)
+remove_dynamic_rules (struct symbols_cache *cache)
 {
        if (cache->dynamic_items) {
                g_list_free (cache->dynamic_items);
@@ -351,10 +351,6 @@ remove_dynamic_items (struct symbols_cache *cache)
        if (cache->dynamic_map) {
                radix_tree_free (cache->dynamic_map);
        }
-
-       /* Do magic */
-       memory_pool_delete (cache->dynamic_pool);
-       cache->dynamic_pool = NULL;
 }
 
 static void
@@ -380,9 +376,6 @@ free_cache (gpointer arg)
        }
 
        memory_pool_delete (cache->static_pool);
-       if (cache->dynamic_pool) {
-               memory_pool_delete (cache->dynamic_pool);
-       }
 
        g_free (cache);
 }
index af92b48916448426ae7e8f9caad2244e211258bb..dfd5672ad0162b802007ecfa60fea3f61b4f0ed7 100644 (file)
@@ -51,7 +51,6 @@ struct symbols_cache {
        GList *dynamic_items;
 
        memory_pool_t *static_pool;
-       memory_pool_t *dynamic_pool;
 
        guint cur_items;
        guint used_items;
@@ -79,8 +78,9 @@ void register_symbol (struct symbols_cache **cache, const char *name, double wei
  * @param func pointer to handler
  * @param user_data pointer to user_data
  */
-void register_dynamic_symbol (struct symbols_cache **cache, const char *name, double weight, symbol_func_t func, 
-                                               gpointer user_data, struct dynamic_map_item *networks, gsize network_count);
+void register_dynamic_symbol (memory_pool_t *pool, struct symbols_cache **cache, const char *name, 
+                                               double weight, symbol_func_t func, 
+                                               gpointer user_data, GList *networks);
 
 /**
  * Call function for cached symbol using saved callback