]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Add support of maps with multiple regexps matches
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 21 Apr 2017 11:37:58 +0000 (12:37 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 21 Apr 2017 11:37:58 +0000 (12:37 +0100)
src/libutil/map.c
src/libutil/map.h
src/lua/lua_map.c

index f57597fd8f93a081f96ed14b966662f1ba537ad2..436f7e17e3a8bd2d0f70dadc2dae2cd91f588c4f 100644 (file)
@@ -2360,11 +2360,16 @@ rspamd_radix_fin (struct map_cb_data *data)
        }
 }
 
+enum rspamd_regexp_map_flags {
+       RSPAMD_REGEXP_FLAG_UTF = (1 << 0),
+       RSPAMD_REGEXP_FLAG_MULTIPLE = (1 << 1)
+};
+
 struct rspamd_regexp_map {
        struct rspamd_map *map;
        GPtrArray *regexps;
        GPtrArray *values;
-       gboolean has_utf;
+       enum rspamd_regexp_map_flags map_flags;
 #ifdef WITH_HYPERSCAN
        hs_database_t *hs_db;
        hs_scratch_t *hs_scratch;
@@ -2375,7 +2380,8 @@ struct rspamd_regexp_map {
 };
 
 static struct rspamd_regexp_map *
-rspamd_regexp_map_create (struct rspamd_map *map)
+rspamd_regexp_map_create (struct rspamd_map *map,
+               enum rspamd_regexp_map_flags flags)
 {
        struct rspamd_regexp_map *re_map;
 
@@ -2383,6 +2389,7 @@ rspamd_regexp_map_create (struct rspamd_map *map)
        re_map->values = g_ptr_array_new ();
        re_map->regexps = g_ptr_array_new ();
        re_map->map = map;
+       re_map->map_flags = flags;
 
        return re_map;
 }
@@ -2453,7 +2460,7 @@ rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value
 
 #ifndef WITH_PCRE2
        if (pcre_flags & PCRE_FLAG(UTF8)) {
-               re_map->has_utf = TRUE;
+               re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
        }
 #else
        if (pcre_flags & PCRE_FLAG(UTF)) {
@@ -2557,16 +2564,39 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map)
 }
 
 gchar *
-rspamd_regexp_list_read (
-       gchar *chunk,
-       gint len,
-       struct map_cb_data *data,
-       gboolean final)
+rspamd_regexp_list_read_single (
+               gchar *chunk,
+               gint len,
+               struct map_cb_data *data,
+               gboolean final)
+{
+       struct rspamd_regexp_map *re_map;
+
+       if (data->cur_data == NULL) {
+               re_map = rspamd_regexp_map_create (data->map, 0);
+               data->cur_data = re_map;
+       }
+
+       return rspamd_parse_kv_list (
+                       chunk,
+                       len,
+                       data,
+                       rspamd_re_map_insert_helper,
+                       hash_fill,
+                       final);
+}
+
+gchar *
+rspamd_regexp_list_read_multiple (
+               gchar *chunk,
+               gint len,
+               struct map_cb_data *data,
+               gboolean final)
 {
        struct rspamd_regexp_map *re_map;
 
        if (data->cur_data == NULL) {
-               re_map = rspamd_regexp_map_create (data->map);
+               re_map = rspamd_regexp_map_create (data->map, RSPAMD_REGEXP_FLAG_MULTIPLE);
                data->cur_data = re_map;
        }
 
@@ -2610,7 +2640,7 @@ rspamd_match_hs_single_handler (unsigned int id, unsigned long long from,
 }
 
 gpointer
-rspamd_match_regexp_map (struct rspamd_regexp_map *map,
+rspamd_match_regexp_map_single (struct rspamd_regexp_map *map,
                const gchar *in, gsize len)
 {
        guint i;
@@ -2625,7 +2655,7 @@ rspamd_match_regexp_map (struct rspamd_regexp_map *map,
                return NULL;
        }
 
-       if (map->has_utf) {
+       if (map->map_flags & RSPAMD_REGEXP_FLAG_UTF) {
                if (g_utf8_validate (in, len, NULL)) {
                        validated = TRUE;
                }
@@ -2666,3 +2696,84 @@ rspamd_match_regexp_map (struct rspamd_regexp_map *map,
 
        return ret;
 }
+
+static int
+rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from,
+               unsigned long long to,
+               unsigned int flags, void *context)
+{
+       guint *i = context;
+       /* Always return zero as we need all matches here */
+
+       *i = id;
+
+       return 0;
+}
+
+gpointer
+rspamd_match_regexp_map_all (struct rspamd_regexp_map *map,
+               const gchar *in, gsize len)
+{
+       guint i;
+       rspamd_regexp_t *re;
+       GPtrArray *ret;
+       gint res = 0;
+       gboolean validated = FALSE;
+
+       g_assert (in != NULL);
+
+       if (map == NULL || len == 0) {
+               return NULL;
+       }
+
+       if (map->map_flags & RSPAMD_REGEXP_FLAG_UTF) {
+               if (g_utf8_validate (in, len, NULL)) {
+                       validated = TRUE;
+               }
+       }
+       else {
+               validated = TRUE;
+       }
+
+       ret = g_ptr_array_new ();
+
+#ifdef WITH_HYPERSCAN
+       if (map->hs_db && map->hs_scratch) {
+
+               if (validated) {
+                       res = hs_scan (map->hs_db, in, len, 0, map->hs_scratch,
+                                       rspamd_match_hs_single_handler, (void *)&i);
+
+                       if (res == HS_SUCCESS) {
+                               return ret;
+                       }
+                       else {
+                               g_ptr_array_free (ret, TRUE);
+
+                               return NULL;
+                       }
+               }
+       }
+#endif
+
+       if (!res) {
+               /* PCRE version */
+               for (i = 0; i < map->regexps->len; i ++) {
+                       re = g_ptr_array_index (map->regexps, i);
+
+                       if (rspamd_regexp_search (re, in, len, NULL, NULL,
+                                       !validated, NULL)) {
+                               g_ptr_array_add (ret, g_ptr_array_index (map->values, i));
+                       }
+               }
+       }
+
+       if (ret->len > 0) {
+
+               return ret;
+       }
+
+       g_ptr_array_free (ret, TRUE);
+
+       return NULL;
+}
\ No newline at end of file
index 3b6439efb3c82a6045449dde26ad1e78888b2823..b25ef5c5405e23cb8093e27c8c90b2b78ec39b05 100644 (file)
@@ -120,11 +120,16 @@ void rspamd_kv_list_fin (struct map_cb_data *data);
  */
 struct rspamd_regexp_map;
 
-gchar * rspamd_regexp_list_read (
-       gchar *chunk,
-       gint len,
-       struct map_cb_data *data,
-       gboolean final);
+gchar * rspamd_regexp_list_read_single (
+               gchar *chunk,
+               gint len,
+               struct map_cb_data *data,
+               gboolean final);
+gchar * rspamd_regexp_list_read_multiple (
+               gchar *chunk,
+               gint len,
+               struct map_cb_data *data,
+               gboolean final);
 void rspamd_regexp_list_fin (struct map_cb_data *data);
 
 /**
@@ -147,7 +152,18 @@ rspamd_parse_kv_list (
  * @param len
  * @return
  */
-gpointer rspamd_match_regexp_map (struct rspamd_regexp_map *map,
+gpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map *map,
+               const gchar *in, gsize len);
+
+/**
+ * Find a multiple (all) matching regexp for the specified text or NULL if
+ * no matches found. Returns GPtrArray that *must* be freed by a caller if not NULL
+ * @param map
+ * @param in
+ * @param len
+ * @return
+ */
+gpointer rspamd_match_regexp_map_all (struct rspamd_regexp_map *map,
                const gchar *in, gsize len);
 
 #endif
index 05eb1d00712597b6c8ee159ecc16057f1995a718..c8130ede8ad41dd5f8eeb98d968d85593a0ff01b 100644 (file)
@@ -467,9 +467,9 @@ lua_config_add_map (lua_State *L)
                        map->type = RSPAMD_LUA_MAP_REGEXP;
 
                        if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description,
-                                       rspamd_regexp_list_read,
+                                       rspamd_regexp_list_read_single,
                                        rspamd_regexp_list_fin,
-                                       (void **)&map->data.re_map)) == NULL) {
+                                       (void **) &map->data.re_map)) == NULL) {
                                lua_pushnil (L);
                                ucl_object_unref (map_obj);
 
@@ -606,7 +606,8 @@ lua_map_get_key (lua_State * L)
                        key = lua_map_process_string_key (L, 2, &len);
 
                        if (key && map->data.re_map) {
-                               value = rspamd_match_regexp_map (map->data.re_map, key, len);
+                               value = rspamd_match_regexp_map_single (map->data.re_map, key,
+                                               len);
 
                                if (value) {
                                        lua_pushstring (L, value);