summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-04-23 16:05:56 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-04-23 16:05:56 +0100
commit43ce13764ceb1afbb504ca1d9e802f0f4aaafcca (patch)
tree6f21d3b8050fc16cd735f4ce2f1082233187e0db /src
parent3856d40776b033e4f9b249b87bc97b66be40b30e (diff)
downloadrspamd-43ce13764ceb1afbb504ca1d9e802f0f4aaafcca.tar.gz
rspamd-43ce13764ceb1afbb504ca1d9e802f0f4aaafcca.zip
[Project] Implement helpers for major map types
Diffstat (limited to 'src')
-rw-r--r--src/libutil/map_helpers.c442
-rw-r--r--src/libutil/map_helpers.h33
2 files changed, 285 insertions, 190 deletions
diff --git a/src/libutil/map_helpers.c b/src/libutil/map_helpers.c
index f06174237..bf99e4294 100644
--- a/src/libutil/map_helpers.c
+++ b/src/libutil/map_helpers.c
@@ -18,6 +18,7 @@
#include "map_private.h"
#include "khash.h"
#include "radix.h"
+#include "rspamd.h"
#ifdef WITH_HYPERSCAN
#include "hs.h"
@@ -32,18 +33,20 @@ static const gchar *hash_fill = "1";
struct rspamd_map_helper_value {
gsize hits;
+ gconstpointer key;
gchar value[]; /* Null terminated */
};
+KHASH_INIT (rspamd_map_hash, const gchar *,
+ struct rspamd_map_helper_value *, true,
+ rspamd_strcase_hash, rspamd_strcase_equal);
+
struct rspamd_radix_map_helper {
rspamd_mempool_t *pool;
+ khash_t(rspamd_map_hash) *htb;
radix_compressed_t *trie;
};
-KHASH_INIT (rspamd_map_hash, const gchar *,
- struct rspamd_map_helper_value *, true,
- rspamd_strcase_hash, rspamd_strcase_equal);
-
struct rspamd_hash_map_helper {
rspamd_mempool_t *pool;
khash_t(rspamd_map_hash) *htb;
@@ -51,7 +54,8 @@ struct rspamd_hash_map_helper {
enum rspamd_regexp_map_flags {
RSPAMD_REGEXP_FLAG_UTF = (1u << 0),
- RSPAMD_REGEXP_FLAG_MULTIPLE = (1u << 1)
+ RSPAMD_REGEXP_FLAG_MULTIPLE = (1u << 1),
+ RSPAMD_REGEXP_FLAG_GLOB = (1u << 2),
};
struct rspamd_regexp_map_helper {
@@ -59,6 +63,7 @@ struct rspamd_regexp_map_helper {
struct rspamd_map *map;
GPtrArray *regexps;
GPtrArray *values;
+ khash_t(rspamd_map_hash) *htb;
enum rspamd_regexp_map_flags map_flags;
#ifdef WITH_HYPERSCAN
hs_database_t *hs_db;
@@ -419,133 +424,158 @@ rspamd_parse_kv_list (
static void
radix_tree_insert_helper (gpointer st, gconstpointer key, gconstpointer value)
{
- radix_compressed_t *tree = (radix_compressed_t *)st;
- rspamd_mempool_t *pool;
- gpointer nvalue;
+ struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *)st;
+ struct rspamd_map_helper_value *val;
+ gsize vlen;
+ khiter_t k;
+ gconstpointer nk;
+ gint res;
+
+ vlen = strlen (value);
+ val = rspamd_mempool_alloc0 (r->pool, sizeof (*val) +
+ vlen + 1);
+ memcpy (val->value, value, vlen);
+
+ k = kh_get (rspamd_map_hash, r->htb, key);
+
+ if (k == kh_end (r->htb)) {
+ nk = rspamd_mempool_strdup (r->pool, key);
+ k = kh_put (rspamd_map_hash, r->htb, nk, &res);
+ }
- pool = radix_get_pool (tree);
- nvalue = rspamd_mempool_strdup (pool, value);
- rspamd_radix_add_iplist (key, ",", tree, nvalue, FALSE);
+ nk = kh_key (r->htb, k);
+ val->key = nk;
+ kh_value (r->htb, k) = val;
+ rspamd_radix_add_iplist (key, ",", r->trie, val, FALSE);
}
static void
hash_insert_helper (gpointer st, gconstpointer key, gconstpointer value)
{
- GHashTable *ht = st;
- gpointer k, v;
+ struct rspamd_hash_map_helper *ht = st;
+ struct rspamd_map_helper_value *val;
+ khiter_t k;
+ gconstpointer nk;
+ gsize vlen;
+ gint r;
+
+ vlen = strlen (value);
+ val = rspamd_mempool_alloc0 (ht->pool, sizeof (*val) +
+ vlen + 1);
+ memcpy (val->value, value, vlen);
+
+ k = kh_get (rspamd_map_hash, ht->htb, key);
+
+ if (k == kh_end (ht->htb)) {
+ nk = rspamd_mempool_strdup (ht->pool, key);
+ k = kh_put (rspamd_map_hash, ht->htb, nk, &r);
+ }
- k = g_strdup (key);
- v = g_strdup (value);
- g_hash_table_replace (ht, k, v);
+ nk = kh_key (ht->htb, k);
+ val->key = nk;
+ kh_value (ht->htb, k) = val;
}
-/* Helpers */
-gchar *
-rspamd_hosts_read (
- gchar * chunk,
- gint len,
- struct map_cb_data *data,
- gboolean final)
+static void
+rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value)
{
- if (data->cur_data == NULL) {
- data->cur_data = g_hash_table_new_full (rspamd_strcase_hash,
- rspamd_strcase_equal, g_free, g_free);
- }
- return rspamd_parse_kv_list (
- chunk,
- len,
- data,
- hash_insert_helper,
- hash_fill,
- final);
-}
+ struct rspamd_regexp_map_helper *re_map = st;
+ struct rspamd_map *map;
+ rspamd_regexp_t *re;
+ gchar *escaped;
+ GError *err = NULL;
+ gint pcre_flags;
+ gsize escaped_len;
+ struct rspamd_map_helper_value *val;
+ khiter_t k;
+ gconstpointer nk;
+ gsize vlen;
+ gint r;
-void
-rspamd_hosts_fin (struct map_cb_data *data)
-{
- struct rspamd_map *map = data->map;
+ map = re_map->map;
- if (data->prev_data) {
- g_hash_table_unref (data->prev_data);
+ if (re_map->map_flags & RSPAMD_REGEXP_FLAG_GLOB) {
+ escaped = rspamd_str_regexp_escape (key, strlen (key), &escaped_len,
+ TRUE);
+ re = rspamd_regexp_new (escaped, NULL, &err);
+ g_free (escaped);
}
- if (data->cur_data) {
- msg_info_map ("read hash of %d elements", g_hash_table_size
- (data->cur_data));
+ else {
+ re = rspamd_regexp_new (key, NULL, &err);
}
-}
-gchar *
-rspamd_kv_list_read (
- gchar * chunk,
- gint len,
- struct map_cb_data *data,
- gboolean final)
-{
- if (data->cur_data == NULL) {
- data->cur_data = g_hash_table_new_full (rspamd_strcase_hash,
- rspamd_strcase_equal, g_free, g_free);
+ if (re == NULL) {
+ msg_err_map ("cannot parse regexp %s: %e", key, err);
+
+ if (err) {
+ g_error_free (err);
+ }
+
+ return;
}
- return rspamd_parse_kv_list (
- chunk,
- len,
- data,
- hash_insert_helper,
- "",
- final);
-}
-void
-rspamd_kv_list_fin (struct map_cb_data *data)
-{
- struct rspamd_map *map = data->map;
+ vlen = strlen (value);
+ val = rspamd_mempool_alloc0 (re_map->pool, sizeof (*val) +
+ vlen + 1);
+ memcpy (val->value, value, vlen);
- if (data->prev_data) {
- g_hash_table_unref (data->prev_data);
+ k = kh_get (rspamd_map_hash, re_map->htb, key);
+
+ if (k == kh_end (re_map->htb)) {
+ nk = rspamd_mempool_strdup (re_map->pool, key);
+ k = kh_put (rspamd_map_hash, re_map->htb, nk, &r);
}
- if (data->cur_data) {
- msg_info_map ("read hash of %d elements", g_hash_table_size
- (data->cur_data));
+
+ nk = kh_key (re_map->htb, k);
+ val->key = nk;
+ kh_value (re_map->htb, k) = val;
+
+ pcre_flags = rspamd_regexp_get_pcre_flags (re);
+
+#ifndef WITH_PCRE2
+ if (pcre_flags & PCRE_FLAG(UTF8)) {
+ re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
}
+#else
+ if (pcre_flags & PCRE_FLAG(UTF)) {
+ re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
+ }
+#endif
+
+ g_ptr_array_add (re_map->regexps, re);
+ g_ptr_array_add (re_map->values, val);
}
-gchar *
-rspamd_radix_read (
- gchar * chunk,
- gint len,
- struct map_cb_data *data,
- gboolean final)
+
+static struct rspamd_hash_map_helper *
+rspamd_map_helper_new_hash (struct rspamd_map *map)
{
- radix_compressed_t *tree;
- rspamd_mempool_t *rpool;
- struct rspamd_map *map = data->map;
+ struct rspamd_hash_map_helper *htb;
+ rspamd_mempool_t *pool;
- if (data->cur_data == NULL) {
- tree = radix_create_compressed ();
- rpool = radix_get_pool (tree);
- memcpy (rpool->tag.uid, map->tag, sizeof (rpool->tag.uid));
- data->cur_data = tree;
- }
- return rspamd_parse_kv_list (
- chunk,
- len,
- data,
- radix_tree_insert_helper,
- hash_fill,
- final);
+ pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
+ map->tag);
+ htb = rspamd_mempool_alloc0 (pool, sizeof (*htb));
+ htb->htb = kh_init (rspamd_map_hash);
+ htb->pool = pool;
+
+ return htb;
}
-void
-rspamd_radix_fin (struct map_cb_data *data)
+static struct rspamd_radix_map_helper *
+rspamd_map_helper_new_radix (struct rspamd_map *map)
{
- struct rspamd_map *map = data->map;
+ struct rspamd_radix_map_helper *r;
+ rspamd_mempool_t *pool;
- if (data->prev_data) {
- radix_destroy_compressed (data->prev_data);
- }
- if (data->cur_data) {
- msg_info_map ("read radix trie of %z elements: %s",
- radix_get_size (data->cur_data), radix_get_info (data->cur_data));
- }
+ pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
+ map->tag);
+ r = rspamd_mempool_alloc0 (pool, sizeof (*r));
+ r->trie = radix_create_compressed_with_pool (pool);
+ r->htb = kh_init (rspamd_map_hash);
+ r->pool = pool;
+
+ return r;
}
static struct rspamd_regexp_map_helper *
@@ -553,12 +583,18 @@ rspamd_regexp_map_create (struct rspamd_map *map,
enum rspamd_regexp_map_flags flags)
{
struct rspamd_regexp_map_helper *re_map;
+ rspamd_mempool_t *pool;
- re_map = g_malloc0 (sizeof (*re_map));
+ pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
+ map->tag);
+
+ re_map = rspamd_mempool_alloc0 (pool, sizeof (*re_map));
+ re_map->pool = pool;
re_map->values = g_ptr_array_new ();
re_map->regexps = g_ptr_array_new ();
re_map->map = map;
re_map->map_flags = flags;
+ re_map->htb = kh_init (rspamd_map_hash);
return re_map;
}
@@ -575,12 +611,9 @@ rspamd_regexp_map_destroy (struct rspamd_regexp_map_helper *re_map)
rspamd_regexp_unref (re);
}
- for (i = 0; i < re_map->values->len; i ++) {
- g_free (g_ptr_array_index (re_map->values, i));
- }
-
g_ptr_array_free (re_map->regexps, TRUE);
g_ptr_array_free (re_map->values, TRUE);
+ kh_destroy (rspamd_map_hash, re_map->htb);
#ifdef WITH_HYPERSCAN
if (re_map->hs_scratch) {
@@ -600,87 +633,88 @@ rspamd_regexp_map_destroy (struct rspamd_regexp_map_helper *re_map)
}
#endif
- g_free (re_map);
+ rspamd_mempool_delete (re_map->pool);
}
-static void
-rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value)
+gchar *
+rspamd_kv_list_read (
+ gchar * chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final)
{
- struct rspamd_regexp_map_helper *re_map = st;
- struct rspamd_map *map;
- rspamd_regexp_t *re;
- GError *err = NULL;
- gint pcre_flags;
-
- map = re_map->map;
- re = rspamd_regexp_new (key, NULL, &err);
+ if (data->cur_data == NULL) {
+ data->cur_data = rspamd_map_helper_new_hash (data->map);
+ }
- if (re == NULL) {
- msg_err_map ("cannot parse regexp %s: %e", key, err);
+ return rspamd_parse_kv_list (
+ chunk,
+ len,
+ data,
+ hash_insert_helper,
+ "",
+ final);
+}
- if (err) {
- g_error_free (err);
- }
+void
+rspamd_kv_list_fin (struct map_cb_data *data)
+{
+ struct rspamd_map *map = data->map;
+ struct rspamd_hash_map_helper *htb;
- return;
+ if (data->prev_data) {
+ htb = (struct rspamd_hash_map_helper *)data->prev_data;
+ kh_destroy (rspamd_map_hash, htb->htb);
+ rspamd_mempool_delete (htb->pool);
}
- pcre_flags = rspamd_regexp_get_pcre_flags (re);
-
-#ifndef WITH_PCRE2
- if (pcre_flags & PCRE_FLAG(UTF8)) {
- re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
- }
-#else
- if (pcre_flags & PCRE_FLAG(UTF)) {
- re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
+ if (data->cur_data) {
+ htb = (struct rspamd_hash_map_helper *)data->cur_data;
+ msg_info_map ("read hash of %d elements", kh_size (htb->htb));
}
-#endif
-
- g_ptr_array_add (re_map->regexps, re);
- g_ptr_array_add (re_map->values, g_strdup (value));
}
-static void
-rspamd_glob_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value)
+gchar *
+rspamd_radix_read (
+ gchar * chunk,
+ gint len,
+ struct map_cb_data *data,
+ gboolean final)
{
- struct rspamd_regexp_map_helper *re_map = st;
- struct rspamd_map *map;
- rspamd_regexp_t *re;
- gchar *escaped;
- GError *err = NULL;
- gint pcre_flags;
- gsize escaped_len;
+ struct rspamd_radix_map_helper *r;
+ struct rspamd_map *map = data->map;
- map = re_map->map;
- escaped = rspamd_str_regexp_escape (key, strlen (key), &escaped_len, TRUE);
- re = rspamd_regexp_new (escaped, NULL, &err);
- g_free (escaped);
+ if (data->cur_data == NULL) {
+ r = rspamd_map_helper_new_radix (map);
+ data->cur_data = r;
+ }
- if (re == NULL) {
- msg_err_map ("cannot parse regexp %s: %e", key, err);
+ return rspamd_parse_kv_list (
+ chunk,
+ len,
+ data,
+ radix_tree_insert_helper,
+ hash_fill,
+ final);
+}
- if (err) {
- g_error_free (err);
- }
+void
+rspamd_radix_fin (struct map_cb_data *data)
+{
+ struct rspamd_map *map = data->map;
+ struct rspamd_radix_map_helper *r;
- return;
+ if (data->prev_data) {
+ r = (struct rspamd_radix_map_helper *)data->prev_data;
+ kh_destroy (rspamd_map_hash, r->htb);
+ rspamd_mempool_delete (r->pool);
}
- pcre_flags = rspamd_regexp_get_pcre_flags (re);
-
-#ifndef WITH_PCRE2
- if (pcre_flags & PCRE_FLAG(UTF8)) {
- re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
- }
-#else
- if (pcre_flags & PCRE_FLAG(UTF)) {
- re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF;
+ if (data->cur_data) {
+ r = (struct rspamd_radix_map_helper *)data->cur_data;
+ msg_info_map ("read radix trie of %z elements: %s",
+ radix_get_size (r->trie), radix_get_info (r->trie));
}
-#endif
-
- g_ptr_array_add (re_map->regexps, re);
- g_ptr_array_add (re_map->values, g_strdup (value));
}
static void
@@ -807,7 +841,7 @@ rspamd_glob_list_read_single (
struct rspamd_regexp_map_helper *re_map;
if (data->cur_data == NULL) {
- re_map = rspamd_regexp_map_create (data->map, 0);
+ re_map = rspamd_regexp_map_create (data->map, RSPAMD_REGEXP_FLAG_GLOB);
data->cur_data = re_map;
}
@@ -815,7 +849,7 @@ rspamd_glob_list_read_single (
chunk,
len,
data,
- rspamd_glob_map_insert_helper,
+ rspamd_re_map_insert_helper,
hash_fill,
final);
}
@@ -875,7 +909,7 @@ rspamd_match_hs_single_handler (unsigned int id, unsigned long long from,
}
#endif
-gpointer
+gconstpointer
rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
const gchar *in, gsize len)
{
@@ -883,6 +917,7 @@ rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
rspamd_regexp_t *re;
gint res = 0;
gpointer ret = NULL;
+ struct rspamd_map_helper_value *val;
gboolean validated = FALSE;
g_assert (in != NULL);
@@ -910,7 +945,10 @@ rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
if (res == HS_SCAN_TERMINATED) {
res = 1;
- ret = g_ptr_array_index (map->values, i);
+ val = g_ptr_array_index (map->values, i);
+
+ ret = val->value;
+ val->hits ++;
}
return ret;
@@ -924,7 +962,10 @@ rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
re = g_ptr_array_index (map->regexps, i);
if (rspamd_regexp_search (re, in, len, NULL, NULL, !validated, NULL)) {
- ret = g_ptr_array_index (map->values, i);
+ val = g_ptr_array_index (map->values, i);
+
+ ret = val->value;
+ val->hits ++;
break;
}
}
@@ -945,9 +986,13 @@ rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from,
unsigned int flags, void *context)
{
struct rspamd_multiple_cbdata *cbd = context;
+ struct rspamd_map_helper_value *val;
+
if (id < cbd->map->values->len) {
- g_ptr_array_add (cbd->ar, g_ptr_array_index (cbd->map->values, id));
+ val = g_ptr_array_index (cbd->map->values, id);
+ val->hits ++;
+ g_ptr_array_add (cbd->ar, val->value);
}
/* Always return zero as we need all matches here */
@@ -955,7 +1000,7 @@ rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from,
}
#endif
-gpointer
+gconstpointer
rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
const gchar *in, gsize len)
{
@@ -964,6 +1009,7 @@ rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
GPtrArray *ret;
gint res = 0;
gboolean validated = FALSE;
+ struct rspamd_map_helper_value *val;
g_assert (in != NULL);
@@ -1006,7 +1052,9 @@ rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
if (rspamd_regexp_search (re, in, len, NULL, NULL,
!validated, NULL)) {
- g_ptr_array_add (ret, g_ptr_array_index (map->values, i));
+ val = g_ptr_array_index (map->values, i);
+ val->hits ++;
+ g_ptr_array_add (ret, val->value);
}
}
}
@@ -1019,3 +1067,39 @@ rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
return NULL;
}
+
+gconstpointer
+rspamd_match_hash_map (struct rspamd_hash_map_helper *map, const gchar *in)
+{
+ khiter_t k;
+ struct rspamd_map_helper_value *val;
+
+ k = kh_get (rspamd_map_hash, map->htb, in);
+
+ if (k != kh_end (map->htb)) {
+ val = kh_value (map->htb, k);
+ val->hits ++;
+
+ return val->value;
+ }
+
+ return NULL;
+}
+
+gconstpointer
+rspamd_match_radix_map (struct rspamd_radix_map_helper *map,
+ const guchar *in, gsize inlen)
+{
+ struct rspamd_map_helper_value *val;
+
+ val = (struct rspamd_map_helper_value *)radix_find_compressed (map->trie,
+ in, inlen);
+
+ if (val) {
+ val->hits ++;
+
+ return val->value;
+ }
+
+ return NULL;
+} \ No newline at end of file
diff --git a/src/libutil/map_helpers.h b/src/libutil/map_helpers.h
index cbdb80478..da98c1e97 100644
--- a/src/libutil/map_helpers.h
+++ b/src/libutil/map_helpers.h
@@ -46,15 +46,6 @@ gchar * rspamd_radix_read (
gboolean final);
void rspamd_radix_fin (struct map_cb_data *data);
-/**
- * Host list is an ordinal list of hosts or domains
- */
-gchar * rspamd_hosts_read (
- gchar *chunk,
- gint len,
- struct map_cb_data *data,
- gboolean final);
-void rspamd_hosts_fin (struct map_cb_data *data);
/**
* Kv list is an ordinal list of keys and values separated by whitespace
@@ -107,7 +98,7 @@ rspamd_parse_kv_list (
* @param len
* @return
*/
-gpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map *map,
+gconstpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
const gchar *in, gsize len);
/**
@@ -118,7 +109,27 @@ gpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map *map,
* @param len
* @return
*/
-gpointer rspamd_match_regexp_map_all (struct rspamd_regexp_map *map,
+gconstpointer rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
const gchar *in, gsize len);
+/**
+ * Find value matching specific key in a hash map
+ * @param map
+ * @param in
+ * @param len
+ * @return
+ */
+gconstpointer rspamd_match_hash_map (struct rspamd_hash_map_helper *map,
+ const gchar *in);
+
+/**
+ * Find value matching specific key in a hash map
+ * @param map
+ * @param in raw ip address
+ * @param inlen ip address length (4 for IPv4 and 16 for IPv6)
+ * @return
+ */
+gconstpointer rspamd_match_radix_map (struct rspamd_radix_map_helper *map,
+ const guchar *in, gsize inlen);
+
#endif