summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-03 15:15:22 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-03 15:15:22 +0000
commitc2603043a10ba22dca7f80928a87a544d0f4e573 (patch)
tree4695f8d2dd05f1b2a7b6ce65b3d8760755f82fa4
parent5936aed13e97ac84e44e38a00dcfde16ec9fd173 (diff)
downloadrspamd-c2603043a10ba22dca7f80928a87a544d0f4e573.tar.gz
rspamd-c2603043a10ba22dca7f80928a87a544d0f4e573.zip
Add more logic for the new re cache system
-rw-r--r--src/libserver/re_cache.c166
1 files changed, 139 insertions, 27 deletions
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index d6ed367b9..f4884e6b9 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -24,6 +24,8 @@
#include "re_cache.h"
#include "xxhash.h"
+#include "cryptobox.h"
+#include "ref.h"
struct rspamd_re_class {
guint64 id;
@@ -32,10 +34,19 @@ struct rspamd_re_class {
gsize type_len;
GHashTable *re_ids;
GPtrArray *all_re;
+ gchar hash[rspamd_cryptobox_HASHBYTES * 2 + 1];
};
struct rspamd_re_cache {
GHashTable *re_classes;
+ ref_entry_t ref;
+ guint nre;
+};
+
+struct rspamd_re_runtime {
+ guchar *checked;
+ guchar *results;
+ struct rspamd_re_cache *cache;
};
static guint64
@@ -62,6 +73,10 @@ rspamd_re_cache_new (void)
cache = g_slice_alloc (sizeof (*cache));
cache->re_classes = g_hash_table_new (g_int64_hash, g_int64_equal);
+ cache->nre = 0;
+ REF_INIT_RETAIN (cache, rspamd_re_cache_destroy);
+
+ return cache;
}
void
@@ -95,44 +110,141 @@ rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
}
g_ptr_array_add (re_class->all_re, rspamd_regexp_ref (re));
+ /*
+ * We set re id based on the global position in the cache
+ */
+ rspamd_regexp_set_cache_id (re, cache->nre ++);
}
-/**
- * Initialize and optimize re cache structure
- */
void
rspamd_re_cache_init (struct rspamd_re_cache *cache)
{
+ GHashTableIter it;
+ gpointer k, v;
+ struct rspamd_re_class *re_class;
+ rspamd_cryptobox_hash_state_t st;
+ rspamd_regexp_t *re;
+ guint i;
+ guchar hash_out[rspamd_cryptobox_HASHBYTES];
+
+ g_assert (cache != NULL);
+
+ g_hash_table_iter_init (&it, cache->re_classes);
+ while (g_hash_table_iter_next (&it, &k, &v)) {
+ re_class = v;
+ rspamd_cryptobox_hash_init (&st, NULL, 0);
+ rspamd_cryptobox_hash_update (&st, (gpointer)&re_class->id,
+ sizeof (re_class->id));
+
+ for (i = 0; i < re_class->all_re->len; i ++) {
+ re = g_ptr_array_index (re_class->all_re, i);
+ rspamd_cryptobox_hash_update (&st, rspamd_regexp_get_id (re),
+ rspamd_cryptobox_HASHBYTES);
+ }
+
+ rspamd_cryptobox_hash_final (&st, hash_out);
+ rspamd_snprintf (re_class->hash, sizeof (re_class->hash), "%*xs",
+ (gint)rspamd_cryptobox_HASHBYTES, hash_out);
+ }
}
-/**
- * Get runtime data for a cache
- */
-struct rspamd_re_runtime *rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache);
-
-/**
- * Process regexp runtime and return the result for a specific regexp
- * @param task task object
- * @param rt cache runtime object
- * @param re regexp object
- * @param type type of object
- * @param type_data associated data with the type (e.g. header name)
- * @param datalen associated data length
- */
-gboolean rspamd_re_cache_process (struct rspamd_task *task,
+struct rspamd_re_runtime *
+rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache)
+{
+ struct rspamd_re_runtime *rt;
+ g_assert (cache != NULL);
+
+ rt = g_slice_alloc (sizeof (*rt));
+ rt->cache = cache;
+ REF_RETAIN (cache);
+ rt->checked = g_slice_alloc0 (NBYTES (cache->nre));
+ rt->results = g_slice_alloc0 (NBYTES (cache->nre));
+
+ return rt;
+}
+
+gboolean
+rspamd_re_cache_process (struct rspamd_task *task,
struct rspamd_re_runtime *rt,
rspamd_regexp_t *re,
enum rspamd_re_type type,
gpointer type_data,
- gsize datalen);
+ gsize datalen)
+{
+ guint64 class_id, re_id;
+ struct rspamd_re_class *re_class;
+ struct rspamd_re_cache *cache;
-/**
- * Destroy runtime data
- */
-void rspamd_re_cache_runtime_destroy (struct rspamd_re_runtime *rt);
+ g_assert (rt != NULL);
+ g_assert (task != NULL);
+ g_assert (re != NULL);
-/**
- * Destroy re cache
- */
-void rspamd_re_cache_destroy (struct rspamd_re_cache *cache);
+ re_id = rspamd_regexp_get_cache_id (re);
+
+ if (re_id == RSPAMD_INVALID_ID) {
+ msg_err_task ("re '%s' has no valid id for the cache",
+ rspamd_regexp_get_pattern (re));
+ return FALSE;
+ }
+
+ if (isset (rt->checked, re_id)) {
+ /* Fast path */
+ return isset (rt->results, re_id);
+ }
+ else {
+ /* Slow path */
+ cache = rt->cache;
+ class_id = rspamd_re_cache_class_id (type, type_data, datalen);
+ re_class = g_hash_table_lookup (cache->re_classes, &class_id);
+
+ if (re_class == NULL) {
+ msg_err_task ("cannot find re class for regexp '%s'",
+ rspamd_regexp_get_pattern (re));
+ return FALSE;
+ }
+ }
+
+ return FALSE;
+}
+
+void
+rspamd_re_cache_runtime_destroy (struct rspamd_re_runtime *rt)
+{
+ g_assert (rt != NULL);
+
+ g_slice_free1 (NBYTES (rt->cache->nre), rt->checked);
+ g_slice_free1 (NBYTES (rt->cache->nre), rt->results);
+ REF_RELEASE (rt->cache);
+ g_slice_free1 (sizeof (*rt), rt);
+}
+
+void
+rspamd_re_cache_destroy (struct rspamd_re_cache *cache)
+{
+ GHashTableIter it;
+ gpointer k, v;
+ struct rspamd_re_class *re_class;
+ rspamd_regexp_t *re;
+ guint i;
+
+ g_assert (cache != NULL);
+ g_hash_table_iter_init (&it, cache->re_classes);
+
+ while (g_hash_table_iter_next (&it, &k, &v)) {
+ re_class = v;
+ g_hash_table_iter_steal (&it);
+
+ for (i = 0; i < re_class->all_re->len; i++) {
+ re = g_ptr_array_index (re_class->all_re, i);
+
+ rspamd_regexp_set_cache_id (re, RSPAMD_INVALID_ID);
+ rspamd_regexp_unref (re);
+ }
+
+ g_slice_free1 (sizeof (*re_class), re_class);
+ }
+
+ g_hash_table_unref (cache->re_classes);
+ g_slice_free1 (sizeof (*cache), cache);
+}