From 7301151c6d6351e0bfe32947e153571f6ba8c22b Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 21 Apr 2017 12:37:58 +0100 Subject: [PATCH] [Feature] Add support of maps with multiple regexps matches --- src/libutil/map.c | 133 ++++++++++++++++++++++++++++++++++++++++++---- src/libutil/map.h | 28 +++++++--- src/lua/lua_map.c | 7 +-- 3 files changed, 148 insertions(+), 20 deletions(-) diff --git a/src/libutil/map.c b/src/libutil/map.c index f57597fd8..436f7e17e 100644 --- a/src/libutil/map.c +++ b/src/libutil/map.c @@ -2360,11 +2360,16 @@ rspamd_radix_fin (struct map_cb_data *data) } } +enum rspamd_regexp_map_flags { + RSPAMD_REGEXP_FLAG_UTF = (1 << 0), + RSPAMD_REGEXP_FLAG_MULTIPLE = (1 << 1) +}; + struct rspamd_regexp_map { struct rspamd_map *map; GPtrArray *regexps; GPtrArray *values; - gboolean has_utf; + enum rspamd_regexp_map_flags map_flags; #ifdef WITH_HYPERSCAN hs_database_t *hs_db; hs_scratch_t *hs_scratch; @@ -2375,7 +2380,8 @@ struct rspamd_regexp_map { }; static struct rspamd_regexp_map * -rspamd_regexp_map_create (struct rspamd_map *map) +rspamd_regexp_map_create (struct rspamd_map *map, + enum rspamd_regexp_map_flags flags) { struct rspamd_regexp_map *re_map; @@ -2383,6 +2389,7 @@ rspamd_regexp_map_create (struct rspamd_map *map) re_map->values = g_ptr_array_new (); re_map->regexps = g_ptr_array_new (); re_map->map = map; + re_map->map_flags = flags; return re_map; } @@ -2453,7 +2460,7 @@ rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value #ifndef WITH_PCRE2 if (pcre_flags & PCRE_FLAG(UTF8)) { - re_map->has_utf = TRUE; + re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; } #else if (pcre_flags & PCRE_FLAG(UTF)) { @@ -2557,16 +2564,39 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map) } gchar * -rspamd_regexp_list_read ( - gchar *chunk, - gint len, - struct map_cb_data *data, - gboolean final) +rspamd_regexp_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_regexp_map *re_map; + + if (data->cur_data == NULL) { + re_map = rspamd_regexp_map_create (data->map, 0); + data->cur_data = re_map; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_re_map_insert_helper, + hash_fill, + final); +} + +gchar * +rspamd_regexp_list_read_multiple ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final) { struct rspamd_regexp_map *re_map; if (data->cur_data == NULL) { - re_map = rspamd_regexp_map_create (data->map); + re_map = rspamd_regexp_map_create (data->map, RSPAMD_REGEXP_FLAG_MULTIPLE); data->cur_data = re_map; } @@ -2610,7 +2640,7 @@ rspamd_match_hs_single_handler (unsigned int id, unsigned long long from, } gpointer -rspamd_match_regexp_map (struct rspamd_regexp_map *map, +rspamd_match_regexp_map_single (struct rspamd_regexp_map *map, const gchar *in, gsize len) { guint i; @@ -2625,7 +2655,7 @@ rspamd_match_regexp_map (struct rspamd_regexp_map *map, return NULL; } - if (map->has_utf) { + if (map->map_flags & RSPAMD_REGEXP_FLAG_UTF) { if (g_utf8_validate (in, len, NULL)) { validated = TRUE; } @@ -2666,3 +2696,84 @@ rspamd_match_regexp_map (struct rspamd_regexp_map *map, return ret; } + +static int +rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from, + unsigned long long to, + unsigned int flags, void *context) +{ + guint *i = context; + /* Always return zero as we need all matches here */ + + *i = id; + + return 0; +} + +gpointer +rspamd_match_regexp_map_all (struct rspamd_regexp_map *map, + const gchar *in, gsize len) +{ + guint i; + rspamd_regexp_t *re; + GPtrArray *ret; + gint res = 0; + gboolean validated = FALSE; + + g_assert (in != NULL); + + if (map == NULL || len == 0) { + return NULL; + } + + if (map->map_flags & RSPAMD_REGEXP_FLAG_UTF) { + if (g_utf8_validate (in, len, NULL)) { + validated = TRUE; + } + } + else { + validated = TRUE; + } + + ret = g_ptr_array_new (); + +#ifdef WITH_HYPERSCAN + if (map->hs_db && map->hs_scratch) { + + if (validated) { + res = hs_scan (map->hs_db, in, len, 0, map->hs_scratch, + rspamd_match_hs_single_handler, (void *)&i); + + if (res == HS_SUCCESS) { + return ret; + } + else { + g_ptr_array_free (ret, TRUE); + + return NULL; + } + } + } +#endif + + if (!res) { + /* PCRE version */ + for (i = 0; i < map->regexps->len; i ++) { + re = g_ptr_array_index (map->regexps, i); + + if (rspamd_regexp_search (re, in, len, NULL, NULL, + !validated, NULL)) { + g_ptr_array_add (ret, g_ptr_array_index (map->values, i)); + } + } + } + + if (ret->len > 0) { + + return ret; + } + + g_ptr_array_free (ret, TRUE); + + return NULL; +} \ No newline at end of file diff --git a/src/libutil/map.h b/src/libutil/map.h index 3b6439efb..b25ef5c54 100644 --- a/src/libutil/map.h +++ b/src/libutil/map.h @@ -120,11 +120,16 @@ void rspamd_kv_list_fin (struct map_cb_data *data); */ struct rspamd_regexp_map; -gchar * rspamd_regexp_list_read ( - gchar *chunk, - gint len, - struct map_cb_data *data, - gboolean final); +gchar * rspamd_regexp_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); +gchar * rspamd_regexp_list_read_multiple ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); void rspamd_regexp_list_fin (struct map_cb_data *data); /** @@ -147,7 +152,18 @@ rspamd_parse_kv_list ( * @param len * @return */ -gpointer rspamd_match_regexp_map (struct rspamd_regexp_map *map, +gpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map *map, + const gchar *in, gsize len); + +/** + * Find a multiple (all) matching regexp for the specified text or NULL if + * no matches found. Returns GPtrArray that *must* be freed by a caller if not NULL + * @param map + * @param in + * @param len + * @return + */ +gpointer rspamd_match_regexp_map_all (struct rspamd_regexp_map *map, const gchar *in, gsize len); #endif diff --git a/src/lua/lua_map.c b/src/lua/lua_map.c index 05eb1d007..c8130ede8 100644 --- a/src/lua/lua_map.c +++ b/src/lua/lua_map.c @@ -467,9 +467,9 @@ lua_config_add_map (lua_State *L) map->type = RSPAMD_LUA_MAP_REGEXP; if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description, - rspamd_regexp_list_read, + rspamd_regexp_list_read_single, rspamd_regexp_list_fin, - (void **)&map->data.re_map)) == NULL) { + (void **) &map->data.re_map)) == NULL) { lua_pushnil (L); ucl_object_unref (map_obj); @@ -606,7 +606,8 @@ lua_map_get_key (lua_State * L) key = lua_map_process_string_key (L, 2, &len); if (key && map->data.re_map) { - value = rspamd_match_regexp_map (map->data.re_map, key, len); + value = rspamd_match_regexp_map_single (map->data.re_map, key, + len); if (value) { lua_pushstring (L, value); -- 2.39.5