aboutsummaryrefslogtreecommitdiffstats
path: root/src/libutil
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-04-21 12:37:58 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-04-21 12:37:58 +0100
commit7301151c6d6351e0bfe32947e153571f6ba8c22b (patch)
treefa2057f2da17f4e22ed71bf45bb69ee4836a2966 /src/libutil
parente6a60ab0da437564434969e0f3b63990c6aabbfa (diff)
downloadrspamd-7301151c6d6351e0bfe32947e153571f6ba8c22b.tar.gz
rspamd-7301151c6d6351e0bfe32947e153571f6ba8c22b.zip
[Feature] Add support of maps with multiple regexps matches
Diffstat (limited to 'src/libutil')
-rw-r--r--src/libutil/map.c133
-rw-r--r--src/libutil/map.h28
2 files changed, 144 insertions, 17 deletions
diff --git a/src/libutil/map.c b/src/libutil/map.c
index f57597fd8..436f7e17e 100644
--- a/src/libutil/map.c
+++ b/src/libutil/map.c
@@ -2360,11 +2360,16 @@ rspamd_radix_fin (struct map_cb_data *data)
}
}
+enum rspamd_regexp_map_flags {
+ RSPAMD_REGEXP_FLAG_UTF = (1 << 0),
+ RSPAMD_REGEXP_FLAG_MULTIPLE = (1 << 1)
+};
+
struct rspamd_regexp_map {
struct rspamd_map *map;
GPtrArray *regexps;
GPtrArray *values;
- gboolean has_utf;
+ enum rspamd_regexp_map_flags map_flags;
#ifdef WITH_HYPERSCAN
hs_database_t *hs_db;
hs_scratch_t *hs_scratch;
@@ -2375,7 +2380,8 @@ struct rspamd_regexp_map {
};
static struct rspamd_regexp_map *
-rspamd_regexp_map_create (struct rspamd_map *map)
+rspamd_regexp_map_create (struct rspamd_map *map,
+ enum rspamd_regexp_map_flags flags)
{
struct rspamd_regexp_map *re_map;
@@ -2383,6 +2389,7 @@ rspamd_regexp_map_create (struct rspamd_map *map)
re_map->values = g_ptr_array_new ();
re_map->regexps = g_ptr_array_new ();
re_map->map = map;
+ re_map->map_flags = flags;
return re_map;
}
@@ -2453,7 +2460,7 @@ rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value
#ifndef WITH_PCRE2
if (pcre_flags & PCRE_FLAG(UTF8)) {
- re_map->has_utf = TRUE;
+ re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
}
#else
if (pcre_flags & PCRE_FLAG(UTF)) {
@@ -2557,16 +2564,39 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map)
}
gchar *
-rspamd_regexp_list_read (
- gchar *chunk,
- gint len,
- struct map_cb_data *data,
- gboolean final)
+rspamd_regexp_list_read_single (
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final)
+{
+ struct rspamd_regexp_map *re_map;
+
+ if (data->cur_data == NULL) {
+ re_map = rspamd_regexp_map_create (data->map, 0);
+ data->cur_data = re_map;
+ }
+
+ return rspamd_parse_kv_list (
+ chunk,
+ len,
+ data,
+ rspamd_re_map_insert_helper,
+ hash_fill,
+ final);
+}
+
+gchar *
+rspamd_regexp_list_read_multiple (
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final)
{
struct rspamd_regexp_map *re_map;
if (data->cur_data == NULL) {
- re_map = rspamd_regexp_map_create (data->map);
+ re_map = rspamd_regexp_map_create (data->map, RSPAMD_REGEXP_FLAG_MULTIPLE);
data->cur_data = re_map;
}
@@ -2610,7 +2640,7 @@ rspamd_match_hs_single_handler (unsigned int id, unsigned long long from,
}
gpointer
-rspamd_match_regexp_map (struct rspamd_regexp_map *map,
+rspamd_match_regexp_map_single (struct rspamd_regexp_map *map,
const gchar *in, gsize len)
{
guint i;
@@ -2625,7 +2655,7 @@ rspamd_match_regexp_map (struct rspamd_regexp_map *map,
return NULL;
}
- if (map->has_utf) {
+ if (map->map_flags & RSPAMD_REGEXP_FLAG_UTF) {
if (g_utf8_validate (in, len, NULL)) {
validated = TRUE;
}
@@ -2666,3 +2696,84 @@ rspamd_match_regexp_map (struct rspamd_regexp_map *map,
return ret;
}
+
+static int
+rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from,
+ unsigned long long to,
+ unsigned int flags, void *context)
+{
+ guint *i = context;
+ /* Always return zero as we need all matches here */
+
+ *i = id;
+
+ return 0;
+}
+
+gpointer
+rspamd_match_regexp_map_all (struct rspamd_regexp_map *map,
+ const gchar *in, gsize len)
+{
+ guint i;
+ rspamd_regexp_t *re;
+ GPtrArray *ret;
+ gint res = 0;
+ gboolean validated = FALSE;
+
+ g_assert (in != NULL);
+
+ if (map == NULL || len == 0) {
+ return NULL;
+ }
+
+ if (map->map_flags & RSPAMD_REGEXP_FLAG_UTF) {
+ if (g_utf8_validate (in, len, NULL)) {
+ validated = TRUE;
+ }
+ }
+ else {
+ validated = TRUE;
+ }
+
+ ret = g_ptr_array_new ();
+
+#ifdef WITH_HYPERSCAN
+ if (map->hs_db && map->hs_scratch) {
+
+ if (validated) {
+ res = hs_scan (map->hs_db, in, len, 0, map->hs_scratch,
+ rspamd_match_hs_single_handler, (void *)&i);
+
+ if (res == HS_SUCCESS) {
+ return ret;
+ }
+ else {
+ g_ptr_array_free (ret, TRUE);
+
+ return NULL;
+ }
+ }
+ }
+#endif
+
+ if (!res) {
+ /* PCRE version */
+ for (i = 0; i < map->regexps->len; i ++) {
+ re = g_ptr_array_index (map->regexps, i);
+
+ if (rspamd_regexp_search (re, in, len, NULL, NULL,
+ !validated, NULL)) {
+ g_ptr_array_add (ret, g_ptr_array_index (map->values, i));
+ }
+ }
+ }
+
+ if (ret->len > 0) {
+
+ return ret;
+ }
+
+ g_ptr_array_free (ret, TRUE);
+
+ return NULL;
+} \ No newline at end of file
diff --git a/src/libutil/map.h b/src/libutil/map.h
index 3b6439efb..b25ef5c54 100644
--- a/src/libutil/map.h
+++ b/src/libutil/map.h
@@ -120,11 +120,16 @@ void rspamd_kv_list_fin (struct map_cb_data *data);
*/
struct rspamd_regexp_map;
-gchar * rspamd_regexp_list_read (
- gchar *chunk,
- gint len,
- struct map_cb_data *data,
- gboolean final);
+gchar * rspamd_regexp_list_read_single (
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final);
+gchar * rspamd_regexp_list_read_multiple (
+ gchar *chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final);
void rspamd_regexp_list_fin (struct map_cb_data *data);
/**
@@ -147,7 +152,18 @@ rspamd_parse_kv_list (
* @param len
* @return
*/
-gpointer rspamd_match_regexp_map (struct rspamd_regexp_map *map,
+gpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map *map,
+ const gchar *in, gsize len);
+
+/**
+ * Find a multiple (all) matching regexp for the specified text or NULL if
+ * no matches found. Returns GPtrArray that *must* be freed by a caller if not NULL
+ * @param map
+ * @param in
+ * @param len
+ * @return
+ */
+gpointer rspamd_match_regexp_map_all (struct rspamd_regexp_map *map,
const gchar *in, gsize len);
#endif