]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Implement helpers for major map types
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 23 Apr 2018 15:05:56 +0000 (16:05 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 23 Apr 2018 15:05:56 +0000 (16:05 +0100)
src/libutil/map_helpers.c
src/libutil/map_helpers.h

index f06174237de8642c6e2e37d14a800117e22e47de..bf99e429496580f92e306dd7387d5a5cefb8209b 100644 (file)
@@ -18,6 +18,7 @@
 #include "map_private.h"
 #include "khash.h"
 #include "radix.h"
+#include "rspamd.h"
 
 #ifdef WITH_HYPERSCAN
 #include "hs.h"
@@ -32,18 +33,20 @@ static const gchar *hash_fill = "1";
 
 struct rspamd_map_helper_value {
        gsize hits;
+       gconstpointer key;
        gchar value[]; /* Null terminated */
 };
 
+KHASH_INIT (rspamd_map_hash, const gchar *,
+               struct rspamd_map_helper_value *, true,
+               rspamd_strcase_hash, rspamd_strcase_equal);
+
 struct rspamd_radix_map_helper {
        rspamd_mempool_t *pool;
+       khash_t(rspamd_map_hash) *htb;
        radix_compressed_t *trie;
 };
 
-KHASH_INIT (rspamd_map_hash, const gchar *,
-               struct rspamd_map_helper_value *, true,
-               rspamd_strcase_hash, rspamd_strcase_equal);
-
 struct rspamd_hash_map_helper {
        rspamd_mempool_t *pool;
        khash_t(rspamd_map_hash) *htb;
@@ -51,7 +54,8 @@ struct rspamd_hash_map_helper {
 
 enum rspamd_regexp_map_flags {
        RSPAMD_REGEXP_FLAG_UTF = (1u << 0),
-       RSPAMD_REGEXP_FLAG_MULTIPLE = (1u << 1)
+       RSPAMD_REGEXP_FLAG_MULTIPLE = (1u << 1),
+       RSPAMD_REGEXP_FLAG_GLOB = (1u << 2),
 };
 
 struct rspamd_regexp_map_helper {
@@ -59,6 +63,7 @@ struct rspamd_regexp_map_helper {
        struct rspamd_map *map;
        GPtrArray *regexps;
        GPtrArray *values;
+       khash_t(rspamd_map_hash) *htb;
        enum rspamd_regexp_map_flags map_flags;
 #ifdef WITH_HYPERSCAN
        hs_database_t *hs_db;
@@ -419,133 +424,158 @@ rspamd_parse_kv_list (
 static void
 radix_tree_insert_helper (gpointer st, gconstpointer key, gconstpointer value)
 {
-       radix_compressed_t *tree = (radix_compressed_t *)st;
-       rspamd_mempool_t *pool;
-       gpointer nvalue;
+       struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *)st;
+       struct rspamd_map_helper_value *val;
+       gsize vlen;
+       khiter_t k;
+       gconstpointer nk;
+       gint res;
+
+       vlen = strlen (value);
+       val = rspamd_mempool_alloc0 (r->pool, sizeof (*val) +
+                       vlen + 1);
+       memcpy (val->value, value, vlen);
+
+       k = kh_get (rspamd_map_hash, r->htb, key);
+
+       if (k == kh_end (r->htb)) {
+               nk = rspamd_mempool_strdup (r->pool, key);
+               k = kh_put (rspamd_map_hash, r->htb, nk, &res);
+       }
 
-       pool = radix_get_pool (tree);
-       nvalue = rspamd_mempool_strdup (pool, value);
-       rspamd_radix_add_iplist (key, ",", tree, nvalue, FALSE);
+       nk = kh_key (r->htb, k);
+       val->key = nk;
+       kh_value (r->htb, k) = val;
+       rspamd_radix_add_iplist (key, ",", r->trie, val, FALSE);
 }
 
 static void
 hash_insert_helper (gpointer st, gconstpointer key, gconstpointer value)
 {
-       GHashTable *ht = st;
-       gpointer k, v;
+       struct rspamd_hash_map_helper *ht = st;
+       struct rspamd_map_helper_value *val;
+       khiter_t k;
+       gconstpointer nk;
+       gsize vlen;
+       gint r;
+
+       vlen = strlen (value);
+       val = rspamd_mempool_alloc0 (ht->pool, sizeof (*val) +
+                       vlen + 1);
+       memcpy (val->value, value, vlen);
+
+       k = kh_get (rspamd_map_hash, ht->htb, key);
+
+       if (k == kh_end (ht->htb)) {
+               nk = rspamd_mempool_strdup (ht->pool, key);
+               k = kh_put (rspamd_map_hash, ht->htb, nk, &r);
+       }
 
-       k = g_strdup (key);
-       v = g_strdup (value);
-       g_hash_table_replace (ht, k, v);
+       nk = kh_key (ht->htb, k);
+       val->key = nk;
+       kh_value (ht->htb, k) = val;
 }
 
-/* Helpers */
-gchar *
-rspamd_hosts_read (
-               gchar * chunk,
-               gint len,
-               struct map_cb_data *data,
-               gboolean final)
+static void
+rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value)
 {
-       if (data->cur_data == NULL) {
-               data->cur_data = g_hash_table_new_full (rspamd_strcase_hash,
-                               rspamd_strcase_equal, g_free, g_free);
-       }
-       return rspamd_parse_kv_list (
-                       chunk,
-                       len,
-                       data,
-                       hash_insert_helper,
-                       hash_fill,
-                       final);
-}
+       struct rspamd_regexp_map_helper *re_map = st;
+       struct rspamd_map *map;
+       rspamd_regexp_t *re;
+       gchar *escaped;
+       GError *err = NULL;
+       gint pcre_flags;
+       gsize escaped_len;
+       struct rspamd_map_helper_value *val;
+       khiter_t k;
+       gconstpointer nk;
+       gsize vlen;
+       gint r;
 
-void
-rspamd_hosts_fin (struct map_cb_data *data)
-{
-       struct rspamd_map *map = data->map;
+       map = re_map->map;
 
-       if (data->prev_data) {
-               g_hash_table_unref (data->prev_data);
+       if (re_map->map_flags & RSPAMD_REGEXP_FLAG_GLOB) {
+               escaped = rspamd_str_regexp_escape (key, strlen (key), &escaped_len,
+                               TRUE);
+               re = rspamd_regexp_new (escaped, NULL, &err);
+               g_free (escaped);
        }
-       if (data->cur_data) {
-               msg_info_map ("read hash of %d elements", g_hash_table_size
-                               (data->cur_data));
+       else {
+               re = rspamd_regexp_new (key, NULL, &err);
        }
-}
 
-gchar *
-rspamd_kv_list_read (
-               gchar * chunk,
-               gint len,
-               struct map_cb_data *data,
-               gboolean final)
-{
-       if (data->cur_data == NULL) {
-               data->cur_data = g_hash_table_new_full (rspamd_strcase_hash,
-                               rspamd_strcase_equal, g_free, g_free);
+       if (re == NULL) {
+               msg_err_map ("cannot parse regexp %s: %e", key, err);
+
+               if (err) {
+                       g_error_free (err);
+               }
+
+               return;
        }
-       return rspamd_parse_kv_list (
-                       chunk,
-                       len,
-                       data,
-                       hash_insert_helper,
-                       "",
-                       final);
-}
 
-void
-rspamd_kv_list_fin (struct map_cb_data *data)
-{
-       struct rspamd_map *map = data->map;
+       vlen = strlen (value);
+       val = rspamd_mempool_alloc0 (re_map->pool, sizeof (*val) +
+                       vlen + 1);
+       memcpy (val->value, value, vlen);
 
-       if (data->prev_data) {
-               g_hash_table_unref (data->prev_data);
+       k = kh_get (rspamd_map_hash, re_map->htb, key);
+
+       if (k == kh_end (re_map->htb)) {
+               nk = rspamd_mempool_strdup (re_map->pool, key);
+               k = kh_put (rspamd_map_hash, re_map->htb, nk, &r);
        }
-       if (data->cur_data) {
-               msg_info_map ("read hash of %d elements", g_hash_table_size
-                               (data->cur_data));
+
+       nk = kh_key (re_map->htb, k);
+       val->key = nk;
+       kh_value (re_map->htb, k) = val;
+
+       pcre_flags = rspamd_regexp_get_pcre_flags (re);
+
+#ifndef WITH_PCRE2
+       if (pcre_flags & PCRE_FLAG(UTF8)) {
+               re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
        }
+#else
+       if (pcre_flags & PCRE_FLAG(UTF)) {
+               re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
+       }
+#endif
+
+       g_ptr_array_add (re_map->regexps, re);
+       g_ptr_array_add (re_map->values, val);
 }
 
-gchar *
-rspamd_radix_read (
-               gchar * chunk,
-               gint len,
-               struct map_cb_data *data,
-               gboolean final)
+
+static struct rspamd_hash_map_helper *
+rspamd_map_helper_new_hash (struct rspamd_map *map)
 {
-       radix_compressed_t *tree;
-       rspamd_mempool_t *rpool;
-       struct rspamd_map *map = data->map;
+       struct rspamd_hash_map_helper *htb;
+       rspamd_mempool_t *pool;
 
-       if (data->cur_data == NULL) {
-               tree = radix_create_compressed ();
-               rpool = radix_get_pool (tree);
-               memcpy (rpool->tag.uid, map->tag, sizeof (rpool->tag.uid));
-               data->cur_data = tree;
-       }
-       return rspamd_parse_kv_list (
-                       chunk,
-                       len,
-                       data,
-                       radix_tree_insert_helper,
-                       hash_fill,
-                       final);
+       pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
+                       map->tag);
+       htb = rspamd_mempool_alloc0 (pool, sizeof (*htb));
+       htb->htb = kh_init (rspamd_map_hash);
+       htb->pool = pool;
+
+       return htb;
 }
 
-void
-rspamd_radix_fin (struct map_cb_data *data)
+static struct rspamd_radix_map_helper *
+rspamd_map_helper_new_radix (struct rspamd_map *map)
 {
-       struct rspamd_map *map = data->map;
+       struct rspamd_radix_map_helper *r;
+       rspamd_mempool_t *pool;
 
-       if (data->prev_data) {
-               radix_destroy_compressed (data->prev_data);
-       }
-       if (data->cur_data) {
-               msg_info_map ("read radix trie of %z elements: %s",
-                               radix_get_size (data->cur_data), radix_get_info (data->cur_data));
-       }
+       pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
+                       map->tag);
+       r = rspamd_mempool_alloc0 (pool, sizeof (*r));
+       r->trie = radix_create_compressed_with_pool (pool);
+       r->htb = kh_init (rspamd_map_hash);
+       r->pool = pool;
+
+       return r;
 }
 
 static struct rspamd_regexp_map_helper *
@@ -553,12 +583,18 @@ rspamd_regexp_map_create (struct rspamd_map *map,
                enum rspamd_regexp_map_flags flags)
 {
        struct rspamd_regexp_map_helper *re_map;
+       rspamd_mempool_t *pool;
 
-       re_map = g_malloc0 (sizeof (*re_map));
+       pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
+                       map->tag);
+
+       re_map = rspamd_mempool_alloc0 (pool, sizeof (*re_map));
+       re_map->pool = pool;
        re_map->values = g_ptr_array_new ();
        re_map->regexps = g_ptr_array_new ();
        re_map->map = map;
        re_map->map_flags = flags;
+       re_map->htb = kh_init (rspamd_map_hash);
 
        return re_map;
 }
@@ -575,12 +611,9 @@ rspamd_regexp_map_destroy (struct rspamd_regexp_map_helper *re_map)
                rspamd_regexp_unref (re);
        }
 
-       for (i = 0; i < re_map->values->len; i ++) {
-               g_free (g_ptr_array_index (re_map->values, i));
-       }
-
        g_ptr_array_free (re_map->regexps, TRUE);
        g_ptr_array_free (re_map->values, TRUE);
+       kh_destroy (rspamd_map_hash, re_map->htb);
 
 #ifdef WITH_HYPERSCAN
        if (re_map->hs_scratch) {
@@ -600,87 +633,88 @@ rspamd_regexp_map_destroy (struct rspamd_regexp_map_helper *re_map)
        }
 #endif
 
-       g_free (re_map);
+       rspamd_mempool_delete (re_map->pool);
 }
 
-static void
-rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value)
+gchar *
+rspamd_kv_list_read (
+               gchar * chunk,
+               gint len,
+               struct map_cb_data *data,
+               gboolean final)
 {
-       struct rspamd_regexp_map_helper *re_map = st;
-       struct rspamd_map *map;
-       rspamd_regexp_t *re;
-       GError *err = NULL;
-       gint pcre_flags;
-
-       map = re_map->map;
-       re = rspamd_regexp_new (key, NULL, &err);
+       if (data->cur_data == NULL) {
+               data->cur_data = rspamd_map_helper_new_hash (data->map);
+       }
 
-       if (re == NULL) {
-               msg_err_map ("cannot parse regexp %s: %e", key, err);
+       return rspamd_parse_kv_list (
+                       chunk,
+                       len,
+                       data,
+                       hash_insert_helper,
+                       "",
+                       final);
+}
 
-               if (err) {
-                       g_error_free (err);
-               }
+void
+rspamd_kv_list_fin (struct map_cb_data *data)
+{
+       struct rspamd_map *map = data->map;
+       struct rspamd_hash_map_helper *htb;
 
-               return;
+       if (data->prev_data) {
+               htb = (struct rspamd_hash_map_helper *)data->prev_data;
+               kh_destroy (rspamd_map_hash, htb->htb);
+               rspamd_mempool_delete (htb->pool);
        }
 
-       pcre_flags = rspamd_regexp_get_pcre_flags (re);
-
-#ifndef WITH_PCRE2
-       if (pcre_flags & PCRE_FLAG(UTF8)) {
-               re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
-       }
-#else
-       if (pcre_flags & PCRE_FLAG(UTF)) {
-               re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
+       if (data->cur_data) {
+               htb = (struct rspamd_hash_map_helper *)data->cur_data;
+               msg_info_map ("read hash of %d elements", kh_size (htb->htb));
        }
-#endif
-
-       g_ptr_array_add (re_map->regexps, re);
-       g_ptr_array_add (re_map->values, g_strdup (value));
 }
 
-static void
-rspamd_glob_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value)
+gchar *
+rspamd_radix_read (
+               gchar * chunk,
+               gint len,
+               struct map_cb_data *data,
+               gboolean final)
 {
-       struct rspamd_regexp_map_helper *re_map = st;
-       struct rspamd_map *map;
-       rspamd_regexp_t *re;
-       gchar *escaped;
-       GError *err = NULL;
-       gint pcre_flags;
-       gsize escaped_len;
+       struct rspamd_radix_map_helper *r;
+       struct rspamd_map *map = data->map;
 
-       map = re_map->map;
-       escaped = rspamd_str_regexp_escape (key, strlen (key), &escaped_len, TRUE);
-       re = rspamd_regexp_new (escaped, NULL, &err);
-       g_free (escaped);
+       if (data->cur_data == NULL) {
+               r = rspamd_map_helper_new_radix (map);
+               data->cur_data = r;
+       }
 
-       if (re == NULL) {
-               msg_err_map ("cannot parse regexp %s: %e", key, err);
+       return rspamd_parse_kv_list (
+                       chunk,
+                       len,
+                       data,
+                       radix_tree_insert_helper,
+                       hash_fill,
+                       final);
+}
 
-               if (err) {
-                       g_error_free (err);
-               }
+void
+rspamd_radix_fin (struct map_cb_data *data)
+{
+       struct rspamd_map *map = data->map;
+       struct rspamd_radix_map_helper *r;
 
-               return;
+       if (data->prev_data) {
+               r = (struct rspamd_radix_map_helper *)data->prev_data;
+               kh_destroy (rspamd_map_hash, r->htb);
+               rspamd_mempool_delete (r->pool);
        }
 
-       pcre_flags = rspamd_regexp_get_pcre_flags (re);
-
-#ifndef WITH_PCRE2
-       if (pcre_flags & PCRE_FLAG(UTF8)) {
-               re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
-       }
-#else
-       if (pcre_flags & PCRE_FLAG(UTF)) {
-               re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
+       if (data->cur_data) {
+               r = (struct rspamd_radix_map_helper *)data->cur_data;
+               msg_info_map ("read radix trie of %z elements: %s",
+                               radix_get_size (r->trie), radix_get_info (r->trie));
        }
-#endif
-
-       g_ptr_array_add (re_map->regexps, re);
-       g_ptr_array_add (re_map->values, g_strdup (value));
 }
 
 static void
@@ -807,7 +841,7 @@ rspamd_glob_list_read_single (
        struct rspamd_regexp_map_helper *re_map;
 
        if (data->cur_data == NULL) {
-               re_map = rspamd_regexp_map_create (data->map, 0);
+               re_map = rspamd_regexp_map_create (data->map, RSPAMD_REGEXP_FLAG_GLOB);
                data->cur_data = re_map;
        }
 
@@ -815,7 +849,7 @@ rspamd_glob_list_read_single (
                        chunk,
                        len,
                        data,
-                       rspamd_glob_map_insert_helper,
+                       rspamd_re_map_insert_helper,
                        hash_fill,
                        final);
 }
@@ -875,7 +909,7 @@ rspamd_match_hs_single_handler (unsigned int id, unsigned long long from,
 }
 #endif
 
-gpointer
+gconstpointer
 rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
                const gchar *in, gsize len)
 {
@@ -883,6 +917,7 @@ rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
        rspamd_regexp_t *re;
        gint res = 0;
        gpointer ret = NULL;
+       struct rspamd_map_helper_value *val;
        gboolean validated = FALSE;
 
        g_assert (in != NULL);
@@ -910,7 +945,10 @@ rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
 
                        if (res == HS_SCAN_TERMINATED) {
                                res = 1;
-                               ret = g_ptr_array_index (map->values, i);
+                               val = g_ptr_array_index (map->values, i);
+
+                               ret = val->value;
+                               val->hits ++;
                        }
 
                        return ret;
@@ -924,7 +962,10 @@ rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
                        re = g_ptr_array_index (map->regexps, i);
 
                        if (rspamd_regexp_search (re, in, len, NULL, NULL, !validated, NULL)) {
-                               ret = g_ptr_array_index (map->values, i);
+                               val = g_ptr_array_index (map->values, i);
+
+                               ret = val->value;
+                               val->hits ++;
                                break;
                        }
                }
@@ -945,9 +986,13 @@ rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from,
                unsigned int flags, void *context)
 {
        struct rspamd_multiple_cbdata *cbd = context;
+       struct rspamd_map_helper_value *val;
+
 
        if (id < cbd->map->values->len) {
-               g_ptr_array_add (cbd->ar, g_ptr_array_index (cbd->map->values, id));
+               val = g_ptr_array_index (cbd->map->values, id);
+               val->hits ++;
+               g_ptr_array_add (cbd->ar, val->value);
        }
 
        /* Always return zero as we need all matches here */
@@ -955,7 +1000,7 @@ rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from,
 }
 #endif
 
-gpointer
+gconstpointer
 rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
                const gchar *in, gsize len)
 {
@@ -964,6 +1009,7 @@ rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
        GPtrArray *ret;
        gint res = 0;
        gboolean validated = FALSE;
+       struct rspamd_map_helper_value *val;
 
        g_assert (in != NULL);
 
@@ -1006,7 +1052,9 @@ rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
 
                        if (rspamd_regexp_search (re, in, len, NULL, NULL,
                                        !validated, NULL)) {
-                               g_ptr_array_add (ret, g_ptr_array_index (map->values, i));
+                               val = g_ptr_array_index (map->values, i);
+                               val->hits ++;
+                               g_ptr_array_add (ret, val->value);
                        }
                }
        }
@@ -1019,3 +1067,39 @@ rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
 
        return NULL;
 }
+
+gconstpointer
+rspamd_match_hash_map (struct rspamd_hash_map_helper *map, const gchar *in)
+{
+       khiter_t k;
+       struct rspamd_map_helper_value *val;
+
+       k = kh_get (rspamd_map_hash, map->htb, in);
+
+       if (k != kh_end (map->htb)) {
+               val = kh_value (map->htb, k);
+               val->hits ++;
+
+               return val->value;
+       }
+
+       return NULL;
+}
+
+gconstpointer
+rspamd_match_radix_map (struct rspamd_radix_map_helper *map,
+               const guchar *in, gsize inlen)
+{
+       struct rspamd_map_helper_value *val;
+
+       val = (struct rspamd_map_helper_value *)radix_find_compressed (map->trie,
+                       in, inlen);
+
+       if (val) {
+               val->hits ++;
+
+               return val->value;
+       }
+
+       return NULL;
+}
\ No newline at end of file
index cbdb80478f8284f3148f7664962fbcfe153399b9..da98c1e97638b823caf505abfbd3548c6033d2ab 100644 (file)
@@ -46,15 +46,6 @@ gchar * rspamd_radix_read (
                gboolean final);
 void rspamd_radix_fin (struct map_cb_data *data);
 
-/**
- * Host list is an ordinal list of hosts or domains
- */
-gchar * rspamd_hosts_read (
-               gchar *chunk,
-               gint len,
-               struct map_cb_data *data,
-               gboolean final);
-void rspamd_hosts_fin (struct map_cb_data *data);
 
 /**
  * Kv list is an ordinal list of keys and values separated by whitespace
@@ -107,7 +98,7 @@ rspamd_parse_kv_list (
  * @param len
  * @return
  */
-gpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map *map,
+gconstpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
                const gchar *in, gsize len);
 
 /**
@@ -118,7 +109,27 @@ gpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map *map,
  * @param len
  * @return
  */
-gpointer rspamd_match_regexp_map_all (struct rspamd_regexp_map *map,
+gconstpointer rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
                const gchar *in, gsize len);
 
+/**
+ * Find value matching specific key in a hash map
+ * @param map
+ * @param in
+ * @param len
+ * @return
+ */
+gconstpointer rspamd_match_hash_map (struct rspamd_hash_map_helper *map,
+               const gchar *in);
+
+/**
+ * Find value matching specific key in a hash map
+ * @param map
+ * @param in raw ip address
+ * @param inlen ip address length (4 for IPv4 and 16 for IPv6)
+ * @return
+ */
+gconstpointer rspamd_match_radix_map (struct rspamd_radix_map_helper *map,
+               const guchar *in, gsize inlen);
+
 #endif