From c2603043a10ba22dca7f80928a87a544d0f4e573 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 3 Dec 2015 15:15:22 +0000 Subject: [PATCH] Add more logic for the new re cache system --- src/libserver/re_cache.c | 166 ++++++++++++++++++++++++++++++++------- 1 file changed, 139 insertions(+), 27 deletions(-) diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index d6ed367b9..f4884e6b9 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -24,6 +24,8 @@ #include "re_cache.h" #include "xxhash.h" +#include "cryptobox.h" +#include "ref.h" struct rspamd_re_class { guint64 id; @@ -32,10 +34,19 @@ struct rspamd_re_class { gsize type_len; GHashTable *re_ids; GPtrArray *all_re; + gchar hash[rspamd_cryptobox_HASHBYTES * 2 + 1]; }; struct rspamd_re_cache { GHashTable *re_classes; + ref_entry_t ref; + guint nre; +}; + +struct rspamd_re_runtime { + guchar *checked; + guchar *results; + struct rspamd_re_cache *cache; }; static guint64 @@ -62,6 +73,10 @@ rspamd_re_cache_new (void) cache = g_slice_alloc (sizeof (*cache)); cache->re_classes = g_hash_table_new (g_int64_hash, g_int64_equal); + cache->nre = 0; + REF_INIT_RETAIN (cache, rspamd_re_cache_destroy); + + return cache; } void @@ -95,44 +110,141 @@ rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re, } g_ptr_array_add (re_class->all_re, rspamd_regexp_ref (re)); + /* + * We set re id based on the global position in the cache + */ + rspamd_regexp_set_cache_id (re, cache->nre ++); } -/** - * Initialize and optimize re cache structure - */ void rspamd_re_cache_init (struct rspamd_re_cache *cache) { + GHashTableIter it; + gpointer k, v; + struct rspamd_re_class *re_class; + rspamd_cryptobox_hash_state_t st; + rspamd_regexp_t *re; + guint i; + guchar hash_out[rspamd_cryptobox_HASHBYTES]; + + g_assert (cache != NULL); + + g_hash_table_iter_init (&it, cache->re_classes); + while (g_hash_table_iter_next (&it, &k, &v)) { + re_class = v; + rspamd_cryptobox_hash_init (&st, NULL, 0); + rspamd_cryptobox_hash_update (&st, (gpointer)&re_class->id, + sizeof (re_class->id)); + + for (i = 0; i < re_class->all_re->len; i ++) { + re = g_ptr_array_index (re_class->all_re, i); + rspamd_cryptobox_hash_update (&st, rspamd_regexp_get_id (re), + rspamd_cryptobox_HASHBYTES); + } + + rspamd_cryptobox_hash_final (&st, hash_out); + rspamd_snprintf (re_class->hash, sizeof (re_class->hash), "%*xs", + (gint)rspamd_cryptobox_HASHBYTES, hash_out); + } } -/** - * Get runtime data for a cache - */ -struct rspamd_re_runtime *rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache); - -/** - * Process regexp runtime and return the result for a specific regexp - * @param task task object - * @param rt cache runtime object - * @param re regexp object - * @param type type of object - * @param type_data associated data with the type (e.g. header name) - * @param datalen associated data length - */ -gboolean rspamd_re_cache_process (struct rspamd_task *task, +struct rspamd_re_runtime * +rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache) +{ + struct rspamd_re_runtime *rt; + g_assert (cache != NULL); + + rt = g_slice_alloc (sizeof (*rt)); + rt->cache = cache; + REF_RETAIN (cache); + rt->checked = g_slice_alloc0 (NBYTES (cache->nre)); + rt->results = g_slice_alloc0 (NBYTES (cache->nre)); + + return rt; +} + +gboolean +rspamd_re_cache_process (struct rspamd_task *task, struct rspamd_re_runtime *rt, rspamd_regexp_t *re, enum rspamd_re_type type, gpointer type_data, - gsize datalen); + gsize datalen) +{ + guint64 class_id, re_id; + struct rspamd_re_class *re_class; + struct rspamd_re_cache *cache; -/** - * Destroy runtime data - */ -void rspamd_re_cache_runtime_destroy (struct rspamd_re_runtime *rt); + g_assert (rt != NULL); + g_assert (task != NULL); + g_assert (re != NULL); -/** - * Destroy re cache - */ -void rspamd_re_cache_destroy (struct rspamd_re_cache *cache); + re_id = rspamd_regexp_get_cache_id (re); + + if (re_id == RSPAMD_INVALID_ID) { + msg_err_task ("re '%s' has no valid id for the cache", + rspamd_regexp_get_pattern (re)); + return FALSE; + } + + if (isset (rt->checked, re_id)) { + /* Fast path */ + return isset (rt->results, re_id); + } + else { + /* Slow path */ + cache = rt->cache; + class_id = rspamd_re_cache_class_id (type, type_data, datalen); + re_class = g_hash_table_lookup (cache->re_classes, &class_id); + + if (re_class == NULL) { + msg_err_task ("cannot find re class for regexp '%s'", + rspamd_regexp_get_pattern (re)); + return FALSE; + } + } + + return FALSE; +} + +void +rspamd_re_cache_runtime_destroy (struct rspamd_re_runtime *rt) +{ + g_assert (rt != NULL); + + g_slice_free1 (NBYTES (rt->cache->nre), rt->checked); + g_slice_free1 (NBYTES (rt->cache->nre), rt->results); + REF_RELEASE (rt->cache); + g_slice_free1 (sizeof (*rt), rt); +} + +void +rspamd_re_cache_destroy (struct rspamd_re_cache *cache) +{ + GHashTableIter it; + gpointer k, v; + struct rspamd_re_class *re_class; + rspamd_regexp_t *re; + guint i; + + g_assert (cache != NULL); + g_hash_table_iter_init (&it, cache->re_classes); + + while (g_hash_table_iter_next (&it, &k, &v)) { + re_class = v; + g_hash_table_iter_steal (&it); + + for (i = 0; i < re_class->all_re->len; i++) { + re = g_ptr_array_index (re_class->all_re, i); + + rspamd_regexp_set_cache_id (re, RSPAMD_INVALID_ID); + rspamd_regexp_unref (re); + } + + g_slice_free1 (sizeof (*re_class), re_class); + } + + g_hash_table_unref (cache->re_classes); + g_slice_free1 (sizeof (*cache), cache); +} -- 2.39.5