From 43ce13764ceb1afbb504ca1d9e802f0f4aaafcca Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 23 Apr 2018 16:05:56 +0100 Subject: [PATCH] [Project] Implement helpers for major map types --- src/libutil/map_helpers.c | 442 +++++++++++++++++++++++--------------- src/libutil/map_helpers.h | 33 ++- 2 files changed, 285 insertions(+), 190 deletions(-) diff --git a/src/libutil/map_helpers.c b/src/libutil/map_helpers.c index f06174237..bf99e4294 100644 --- a/src/libutil/map_helpers.c +++ b/src/libutil/map_helpers.c @@ -18,6 +18,7 @@ #include "map_private.h" #include "khash.h" #include "radix.h" +#include "rspamd.h" #ifdef WITH_HYPERSCAN #include "hs.h" @@ -32,18 +33,20 @@ static const gchar *hash_fill = "1"; struct rspamd_map_helper_value { gsize hits; + gconstpointer key; gchar value[]; /* Null terminated */ }; +KHASH_INIT (rspamd_map_hash, const gchar *, + struct rspamd_map_helper_value *, true, + rspamd_strcase_hash, rspamd_strcase_equal); + struct rspamd_radix_map_helper { rspamd_mempool_t *pool; + khash_t(rspamd_map_hash) *htb; radix_compressed_t *trie; }; -KHASH_INIT (rspamd_map_hash, const gchar *, - struct rspamd_map_helper_value *, true, - rspamd_strcase_hash, rspamd_strcase_equal); - struct rspamd_hash_map_helper { rspamd_mempool_t *pool; khash_t(rspamd_map_hash) *htb; @@ -51,7 +54,8 @@ struct rspamd_hash_map_helper { enum rspamd_regexp_map_flags { RSPAMD_REGEXP_FLAG_UTF = (1u << 0), - RSPAMD_REGEXP_FLAG_MULTIPLE = (1u << 1) + RSPAMD_REGEXP_FLAG_MULTIPLE = (1u << 1), + RSPAMD_REGEXP_FLAG_GLOB = (1u << 2), }; struct rspamd_regexp_map_helper { @@ -59,6 +63,7 @@ struct rspamd_regexp_map_helper { struct rspamd_map *map; GPtrArray *regexps; GPtrArray *values; + khash_t(rspamd_map_hash) *htb; enum rspamd_regexp_map_flags map_flags; #ifdef WITH_HYPERSCAN hs_database_t *hs_db; @@ -419,133 +424,158 @@ rspamd_parse_kv_list ( static void radix_tree_insert_helper (gpointer st, gconstpointer key, gconstpointer value) { - radix_compressed_t *tree = (radix_compressed_t *)st; - rspamd_mempool_t *pool; - gpointer nvalue; + struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *)st; + struct rspamd_map_helper_value *val; + gsize vlen; + khiter_t k; + gconstpointer nk; + gint res; + + vlen = strlen (value); + val = rspamd_mempool_alloc0 (r->pool, sizeof (*val) + + vlen + 1); + memcpy (val->value, value, vlen); + + k = kh_get (rspamd_map_hash, r->htb, key); + + if (k == kh_end (r->htb)) { + nk = rspamd_mempool_strdup (r->pool, key); + k = kh_put (rspamd_map_hash, r->htb, nk, &res); + } - pool = radix_get_pool (tree); - nvalue = rspamd_mempool_strdup (pool, value); - rspamd_radix_add_iplist (key, ",", tree, nvalue, FALSE); + nk = kh_key (r->htb, k); + val->key = nk; + kh_value (r->htb, k) = val; + rspamd_radix_add_iplist (key, ",", r->trie, val, FALSE); } static void hash_insert_helper (gpointer st, gconstpointer key, gconstpointer value) { - GHashTable *ht = st; - gpointer k, v; + struct rspamd_hash_map_helper *ht = st; + struct rspamd_map_helper_value *val; + khiter_t k; + gconstpointer nk; + gsize vlen; + gint r; + + vlen = strlen (value); + val = rspamd_mempool_alloc0 (ht->pool, sizeof (*val) + + vlen + 1); + memcpy (val->value, value, vlen); + + k = kh_get (rspamd_map_hash, ht->htb, key); + + if (k == kh_end (ht->htb)) { + nk = rspamd_mempool_strdup (ht->pool, key); + k = kh_put (rspamd_map_hash, ht->htb, nk, &r); + } - k = g_strdup (key); - v = g_strdup (value); - g_hash_table_replace (ht, k, v); + nk = kh_key (ht->htb, k); + val->key = nk; + kh_value (ht->htb, k) = val; } -/* Helpers */ -gchar * -rspamd_hosts_read ( - gchar * chunk, - gint len, - struct map_cb_data *data, - gboolean final) +static void +rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value) { - if (data->cur_data == NULL) { - data->cur_data = g_hash_table_new_full (rspamd_strcase_hash, - rspamd_strcase_equal, g_free, g_free); - } - return rspamd_parse_kv_list ( - chunk, - len, - data, - hash_insert_helper, - hash_fill, - final); -} + struct rspamd_regexp_map_helper *re_map = st; + struct rspamd_map *map; + rspamd_regexp_t *re; + gchar *escaped; + GError *err = NULL; + gint pcre_flags; + gsize escaped_len; + struct rspamd_map_helper_value *val; + khiter_t k; + gconstpointer nk; + gsize vlen; + gint r; -void -rspamd_hosts_fin (struct map_cb_data *data) -{ - struct rspamd_map *map = data->map; + map = re_map->map; - if (data->prev_data) { - g_hash_table_unref (data->prev_data); + if (re_map->map_flags & RSPAMD_REGEXP_FLAG_GLOB) { + escaped = rspamd_str_regexp_escape (key, strlen (key), &escaped_len, + TRUE); + re = rspamd_regexp_new (escaped, NULL, &err); + g_free (escaped); } - if (data->cur_data) { - msg_info_map ("read hash of %d elements", g_hash_table_size - (data->cur_data)); + else { + re = rspamd_regexp_new (key, NULL, &err); } -} -gchar * -rspamd_kv_list_read ( - gchar * chunk, - gint len, - struct map_cb_data *data, - gboolean final) -{ - if (data->cur_data == NULL) { - data->cur_data = g_hash_table_new_full (rspamd_strcase_hash, - rspamd_strcase_equal, g_free, g_free); + if (re == NULL) { + msg_err_map ("cannot parse regexp %s: %e", key, err); + + if (err) { + g_error_free (err); + } + + return; } - return rspamd_parse_kv_list ( - chunk, - len, - data, - hash_insert_helper, - "", - final); -} -void -rspamd_kv_list_fin (struct map_cb_data *data) -{ - struct rspamd_map *map = data->map; + vlen = strlen (value); + val = rspamd_mempool_alloc0 (re_map->pool, sizeof (*val) + + vlen + 1); + memcpy (val->value, value, vlen); - if (data->prev_data) { - g_hash_table_unref (data->prev_data); + k = kh_get (rspamd_map_hash, re_map->htb, key); + + if (k == kh_end (re_map->htb)) { + nk = rspamd_mempool_strdup (re_map->pool, key); + k = kh_put (rspamd_map_hash, re_map->htb, nk, &r); } - if (data->cur_data) { - msg_info_map ("read hash of %d elements", g_hash_table_size - (data->cur_data)); + + nk = kh_key (re_map->htb, k); + val->key = nk; + kh_value (re_map->htb, k) = val; + + pcre_flags = rspamd_regexp_get_pcre_flags (re); + +#ifndef WITH_PCRE2 + if (pcre_flags & PCRE_FLAG(UTF8)) { + re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; } +#else + if (pcre_flags & PCRE_FLAG(UTF)) { + re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; + } +#endif + + g_ptr_array_add (re_map->regexps, re); + g_ptr_array_add (re_map->values, val); } -gchar * -rspamd_radix_read ( - gchar * chunk, - gint len, - struct map_cb_data *data, - gboolean final) + +static struct rspamd_hash_map_helper * +rspamd_map_helper_new_hash (struct rspamd_map *map) { - radix_compressed_t *tree; - rspamd_mempool_t *rpool; - struct rspamd_map *map = data->map; + struct rspamd_hash_map_helper *htb; + rspamd_mempool_t *pool; - if (data->cur_data == NULL) { - tree = radix_create_compressed (); - rpool = radix_get_pool (tree); - memcpy (rpool->tag.uid, map->tag, sizeof (rpool->tag.uid)); - data->cur_data = tree; - } - return rspamd_parse_kv_list ( - chunk, - len, - data, - radix_tree_insert_helper, - hash_fill, - final); + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), + map->tag); + htb = rspamd_mempool_alloc0 (pool, sizeof (*htb)); + htb->htb = kh_init (rspamd_map_hash); + htb->pool = pool; + + return htb; } -void -rspamd_radix_fin (struct map_cb_data *data) +static struct rspamd_radix_map_helper * +rspamd_map_helper_new_radix (struct rspamd_map *map) { - struct rspamd_map *map = data->map; + struct rspamd_radix_map_helper *r; + rspamd_mempool_t *pool; - if (data->prev_data) { - radix_destroy_compressed (data->prev_data); - } - if (data->cur_data) { - msg_info_map ("read radix trie of %z elements: %s", - radix_get_size (data->cur_data), radix_get_info (data->cur_data)); - } + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), + map->tag); + r = rspamd_mempool_alloc0 (pool, sizeof (*r)); + r->trie = radix_create_compressed_with_pool (pool); + r->htb = kh_init (rspamd_map_hash); + r->pool = pool; + + return r; } static struct rspamd_regexp_map_helper * @@ -553,12 +583,18 @@ rspamd_regexp_map_create (struct rspamd_map *map, enum rspamd_regexp_map_flags flags) { struct rspamd_regexp_map_helper *re_map; + rspamd_mempool_t *pool; - re_map = g_malloc0 (sizeof (*re_map)); + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), + map->tag); + + re_map = rspamd_mempool_alloc0 (pool, sizeof (*re_map)); + re_map->pool = pool; re_map->values = g_ptr_array_new (); re_map->regexps = g_ptr_array_new (); re_map->map = map; re_map->map_flags = flags; + re_map->htb = kh_init (rspamd_map_hash); return re_map; } @@ -575,12 +611,9 @@ rspamd_regexp_map_destroy (struct rspamd_regexp_map_helper *re_map) rspamd_regexp_unref (re); } - for (i = 0; i < re_map->values->len; i ++) { - g_free (g_ptr_array_index (re_map->values, i)); - } - g_ptr_array_free (re_map->regexps, TRUE); g_ptr_array_free (re_map->values, TRUE); + kh_destroy (rspamd_map_hash, re_map->htb); #ifdef WITH_HYPERSCAN if (re_map->hs_scratch) { @@ -600,87 +633,88 @@ rspamd_regexp_map_destroy (struct rspamd_regexp_map_helper *re_map) } #endif - g_free (re_map); + rspamd_mempool_delete (re_map->pool); } -static void -rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value) +gchar * +rspamd_kv_list_read ( + gchar * chunk, + gint len, + struct map_cb_data *data, + gboolean final) { - struct rspamd_regexp_map_helper *re_map = st; - struct rspamd_map *map; - rspamd_regexp_t *re; - GError *err = NULL; - gint pcre_flags; - - map = re_map->map; - re = rspamd_regexp_new (key, NULL, &err); + if (data->cur_data == NULL) { + data->cur_data = rspamd_map_helper_new_hash (data->map); + } - if (re == NULL) { - msg_err_map ("cannot parse regexp %s: %e", key, err); + return rspamd_parse_kv_list ( + chunk, + len, + data, + hash_insert_helper, + "", + final); +} - if (err) { - g_error_free (err); - } +void +rspamd_kv_list_fin (struct map_cb_data *data) +{ + struct rspamd_map *map = data->map; + struct rspamd_hash_map_helper *htb; - return; + if (data->prev_data) { + htb = (struct rspamd_hash_map_helper *)data->prev_data; + kh_destroy (rspamd_map_hash, htb->htb); + rspamd_mempool_delete (htb->pool); } - pcre_flags = rspamd_regexp_get_pcre_flags (re); - -#ifndef WITH_PCRE2 - if (pcre_flags & PCRE_FLAG(UTF8)) { - re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; - } -#else - if (pcre_flags & PCRE_FLAG(UTF)) { - re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; + if (data->cur_data) { + htb = (struct rspamd_hash_map_helper *)data->cur_data; + msg_info_map ("read hash of %d elements", kh_size (htb->htb)); } -#endif - - g_ptr_array_add (re_map->regexps, re); - g_ptr_array_add (re_map->values, g_strdup (value)); } -static void -rspamd_glob_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value) +gchar * +rspamd_radix_read ( + gchar * chunk, + gint len, + struct map_cb_data *data, + gboolean final) { - struct rspamd_regexp_map_helper *re_map = st; - struct rspamd_map *map; - rspamd_regexp_t *re; - gchar *escaped; - GError *err = NULL; - gint pcre_flags; - gsize escaped_len; + struct rspamd_radix_map_helper *r; + struct rspamd_map *map = data->map; - map = re_map->map; - escaped = rspamd_str_regexp_escape (key, strlen (key), &escaped_len, TRUE); - re = rspamd_regexp_new (escaped, NULL, &err); - g_free (escaped); + if (data->cur_data == NULL) { + r = rspamd_map_helper_new_radix (map); + data->cur_data = r; + } - if (re == NULL) { - msg_err_map ("cannot parse regexp %s: %e", key, err); + return rspamd_parse_kv_list ( + chunk, + len, + data, + radix_tree_insert_helper, + hash_fill, + final); +} - if (err) { - g_error_free (err); - } +void +rspamd_radix_fin (struct map_cb_data *data) +{ + struct rspamd_map *map = data->map; + struct rspamd_radix_map_helper *r; - return; + if (data->prev_data) { + r = (struct rspamd_radix_map_helper *)data->prev_data; + kh_destroy (rspamd_map_hash, r->htb); + rspamd_mempool_delete (r->pool); } - pcre_flags = rspamd_regexp_get_pcre_flags (re); - -#ifndef WITH_PCRE2 - if (pcre_flags & PCRE_FLAG(UTF8)) { - re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; - } -#else - if (pcre_flags & PCRE_FLAG(UTF)) { - re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; + if (data->cur_data) { + r = (struct rspamd_radix_map_helper *)data->cur_data; + msg_info_map ("read radix trie of %z elements: %s", + radix_get_size (r->trie), radix_get_info (r->trie)); } -#endif - - g_ptr_array_add (re_map->regexps, re); - g_ptr_array_add (re_map->values, g_strdup (value)); } static void @@ -807,7 +841,7 @@ rspamd_glob_list_read_single ( struct rspamd_regexp_map_helper *re_map; if (data->cur_data == NULL) { - re_map = rspamd_regexp_map_create (data->map, 0); + re_map = rspamd_regexp_map_create (data->map, RSPAMD_REGEXP_FLAG_GLOB); data->cur_data = re_map; } @@ -815,7 +849,7 @@ rspamd_glob_list_read_single ( chunk, len, data, - rspamd_glob_map_insert_helper, + rspamd_re_map_insert_helper, hash_fill, final); } @@ -875,7 +909,7 @@ rspamd_match_hs_single_handler (unsigned int id, unsigned long long from, } #endif -gpointer +gconstpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map, const gchar *in, gsize len) { @@ -883,6 +917,7 @@ rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map, rspamd_regexp_t *re; gint res = 0; gpointer ret = NULL; + struct rspamd_map_helper_value *val; gboolean validated = FALSE; g_assert (in != NULL); @@ -910,7 +945,10 @@ rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map, if (res == HS_SCAN_TERMINATED) { res = 1; - ret = g_ptr_array_index (map->values, i); + val = g_ptr_array_index (map->values, i); + + ret = val->value; + val->hits ++; } return ret; @@ -924,7 +962,10 @@ rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map, re = g_ptr_array_index (map->regexps, i); if (rspamd_regexp_search (re, in, len, NULL, NULL, !validated, NULL)) { - ret = g_ptr_array_index (map->values, i); + val = g_ptr_array_index (map->values, i); + + ret = val->value; + val->hits ++; break; } } @@ -945,9 +986,13 @@ rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from, unsigned int flags, void *context) { struct rspamd_multiple_cbdata *cbd = context; + struct rspamd_map_helper_value *val; + if (id < cbd->map->values->len) { - g_ptr_array_add (cbd->ar, g_ptr_array_index (cbd->map->values, id)); + val = g_ptr_array_index (cbd->map->values, id); + val->hits ++; + g_ptr_array_add (cbd->ar, val->value); } /* Always return zero as we need all matches here */ @@ -955,7 +1000,7 @@ rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from, } #endif -gpointer +gconstpointer rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map, const gchar *in, gsize len) { @@ -964,6 +1009,7 @@ rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map, GPtrArray *ret; gint res = 0; gboolean validated = FALSE; + struct rspamd_map_helper_value *val; g_assert (in != NULL); @@ -1006,7 +1052,9 @@ rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map, if (rspamd_regexp_search (re, in, len, NULL, NULL, !validated, NULL)) { - g_ptr_array_add (ret, g_ptr_array_index (map->values, i)); + val = g_ptr_array_index (map->values, i); + val->hits ++; + g_ptr_array_add (ret, val->value); } } } @@ -1019,3 +1067,39 @@ rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map, return NULL; } + +gconstpointer +rspamd_match_hash_map (struct rspamd_hash_map_helper *map, const gchar *in) +{ + khiter_t k; + struct rspamd_map_helper_value *val; + + k = kh_get (rspamd_map_hash, map->htb, in); + + if (k != kh_end (map->htb)) { + val = kh_value (map->htb, k); + val->hits ++; + + return val->value; + } + + return NULL; +} + +gconstpointer +rspamd_match_radix_map (struct rspamd_radix_map_helper *map, + const guchar *in, gsize inlen) +{ + struct rspamd_map_helper_value *val; + + val = (struct rspamd_map_helper_value *)radix_find_compressed (map->trie, + in, inlen); + + if (val) { + val->hits ++; + + return val->value; + } + + return NULL; +} \ No newline at end of file diff --git a/src/libutil/map_helpers.h b/src/libutil/map_helpers.h index cbdb80478..da98c1e97 100644 --- a/src/libutil/map_helpers.h +++ b/src/libutil/map_helpers.h @@ -46,15 +46,6 @@ gchar * rspamd_radix_read ( gboolean final); void rspamd_radix_fin (struct map_cb_data *data); -/** - * Host list is an ordinal list of hosts or domains - */ -gchar * rspamd_hosts_read ( - gchar *chunk, - gint len, - struct map_cb_data *data, - gboolean final); -void rspamd_hosts_fin (struct map_cb_data *data); /** * Kv list is an ordinal list of keys and values separated by whitespace @@ -107,7 +98,7 @@ rspamd_parse_kv_list ( * @param len * @return */ -gpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map *map, +gconstpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map, const gchar *in, gsize len); /** @@ -118,7 +109,27 @@ gpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map *map, * @param len * @return */ -gpointer rspamd_match_regexp_map_all (struct rspamd_regexp_map *map, +gconstpointer rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map, const gchar *in, gsize len); +/** + * Find value matching specific key in a hash map + * @param map + * @param in + * @param len + * @return + */ +gconstpointer rspamd_match_hash_map (struct rspamd_hash_map_helper *map, + const gchar *in); + +/** + * Find value matching specific key in a hash map + * @param map + * @param in raw ip address + * @param inlen ip address length (4 for IPv4 and 16 for IPv6) + * @return + */ +gconstpointer rspamd_match_radix_map (struct rspamd_radix_map_helper *map, + const guchar *in, gsize inlen); + #endif -- 2.39.5