aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-01 17:09:06 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-01 17:09:06 +0000
commite9d134bb883087391bcc998042be89d79f668683 (patch)
tree71c7160332e9dd966019c00faa2fe9505f0b6c3f
parente50ff44d757e06bf0c17f7bd6a7b834a3285d37d (diff)
downloadrspamd-e9d134bb883087391bcc998042be89d79f668683.tar.gz
rspamd-e9d134bb883087391bcc998042be89d79f668683.zip
Start new regexp cache optimizer
-rw-r--r--src/libserver/CMakeLists.txt1
-rw-r--r--src/libserver/re_cache.c138
-rw-r--r--src/libserver/re_cache.h93
3 files changed, 232 insertions, 0 deletions
diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt
index 1f8df6c13..83006e835 100644
--- a/src/libserver/CMakeLists.txt
+++ b/src/libserver/CMakeLists.txt
@@ -12,6 +12,7 @@ SET(LIBRSPAMDSERVERSRC
${CMAKE_CURRENT_SOURCE_DIR}/html.c
${CMAKE_CURRENT_SOURCE_DIR}/protocol.c
${CMAKE_CURRENT_SOURCE_DIR}/proxy.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/re_cache.c
${CMAKE_CURRENT_SOURCE_DIR}/roll_history.c
${CMAKE_CURRENT_SOURCE_DIR}/spf.c
${CMAKE_CURRENT_SOURCE_DIR}/symbols_cache.c
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
new file mode 100644
index 000000000..d6ed367b9
--- /dev/null
+++ b/src/libserver/re_cache.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "re_cache.h"
+#include "xxhash.h"
+
+struct rspamd_re_class {
+ guint64 id;
+ enum rspamd_re_type type;
+ gpointer type_data;
+ gsize type_len;
+ GHashTable *re_ids;
+ GPtrArray *all_re;
+};
+
+struct rspamd_re_cache {
+ GHashTable *re_classes;
+};
+
+static guint64
+rspamd_re_cache_class_id (enum rspamd_re_type type,
+ gpointer type_data,
+ gsize datalen)
+{
+ XXH64_state_t st;
+
+ XXH64_reset (&st, rspamd_hash_seed ());
+ XXH64_update (&st, &type, sizeof (type));
+
+ if (datalen > 0) {
+ XXH64_update (&st, type_data, datalen);
+ }
+
+ return XXH64_digest (&st);
+}
+
+struct rspamd_re_cache *
+rspamd_re_cache_new (void)
+{
+ struct rspamd_re_cache *cache;
+
+ cache = g_slice_alloc (sizeof (*cache));
+ cache->re_classes = g_hash_table_new (g_int64_hash, g_int64_equal);
+}
+
+void
+rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
+ enum rspamd_re_type type, gpointer type_data, gsize datalen)
+{
+ guint64 class_id;
+ struct rspamd_re_class *re_class;
+
+ g_assert (cache != NULL);
+ g_assert (re != NULL);
+
+ class_id = rspamd_re_cache_class_id (type, type_data, datalen);
+ re_class = g_hash_table_lookup (cache->re_classes, &class_id);
+
+ if (re_class == NULL) {
+ re_class = g_slice_alloc0 (sizeof (*re_class));
+ re_class->id = class_id;
+ re_class->type_len = datalen;
+ re_class->type = type;
+ re_class->re_ids = g_hash_table_new (rspamd_regexp_hash,
+ rspamd_regexp_equal);
+
+ if (datalen > 0) {
+ re_class->type_data = g_slice_alloc (datalen);
+ memcpy (re_class->type_data, type_data, datalen);
+ }
+
+ re_class->all_re = g_ptr_array_new ();
+ g_hash_table_insert (cache->re_classes, &re_class->id, re_class);
+ }
+
+ g_ptr_array_add (re_class->all_re, rspamd_regexp_ref (re));
+}
+
+/**
+ * Initialize and optimize re cache structure
+ */
+void
+rspamd_re_cache_init (struct rspamd_re_cache *cache)
+{
+
+}
+
+/**
+ * Get runtime data for a cache
+ */
+struct rspamd_re_runtime *rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache);
+
+/**
+ * Process regexp runtime and return the result for a specific regexp
+ * @param task task object
+ * @param rt cache runtime object
+ * @param re regexp object
+ * @param type type of object
+ * @param type_data associated data with the type (e.g. header name)
+ * @param datalen associated data length
+ */
+gboolean rspamd_re_cache_process (struct rspamd_task *task,
+ struct rspamd_re_runtime *rt,
+ rspamd_regexp_t *re,
+ enum rspamd_re_type type,
+ gpointer type_data,
+ gsize datalen);
+
+/**
+ * Destroy runtime data
+ */
+void rspamd_re_cache_runtime_destroy (struct rspamd_re_runtime *rt);
+
+/**
+ * Destroy re cache
+ */
+void rspamd_re_cache_destroy (struct rspamd_re_cache *cache);
diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h
new file mode 100644
index 000000000..0cc77eae1
--- /dev/null
+++ b/src/libserver/re_cache.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef RSPAMD_RE_CACHE_H
+#define RSPAMD_RE_CACHE_H
+
+#include "config.h"
+#include "libserver/task.h"
+#include "libutil/regexp.h"
+
+struct rspamd_re_cache;
+struct rspamd_re_runtime;
+
+enum rspamd_re_type {
+ RSPAMD_RE_HEADER,
+ RSPAMD_RE_MIME,
+ RSPAMD_RE_URL,
+ RSPAMD_RE_BODY
+};
+
+/**
+ * Initialize re_cache persistent structure
+ */
+struct rspamd_re_cache *rspamd_re_cache_new (void);
+
+/**
+ * Add the existing regexp to the cache
+ * @param cache cache object
+ * @param re regexp object
+ * @param type type of object
+ * @param type_data associated data with the type (e.g. header name)
+ * @param datalen associated data length
+ */
+void rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
+ enum rspamd_re_type type, gpointer type_data, gsize datalen);
+
+/**
+ * Initialize and optimize re cache structure
+ */
+void rspamd_re_cache_init (struct rspamd_re_cache *cache);
+
+/**
+ * Get runtime data for a cache
+ */
+struct rspamd_re_runtime* rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache);
+
+/**
+ * Process regexp runtime and return the result for a specific regexp
+ * @param task task object
+ * @param rt cache runtime object
+ * @param re regexp object
+ * @param type type of object
+ * @param type_data associated data with the type (e.g. header name)
+ * @param datalen associated data length
+ */
+gboolean rspamd_re_cache_process (struct rspamd_task *task,
+ struct rspamd_re_runtime *rt,
+ rspamd_regexp_t *re,
+ enum rspamd_re_type type,
+ gpointer type_data,
+ gsize datalen);
+
+/**
+ * Destroy runtime data
+ */
+void rspamd_re_cache_runtime_destroy (struct rspamd_re_runtime *rt);
+
+/**
+ * Destroy re cache
+ */
+void rspamd_re_cache_destroy (struct rspamd_re_cache *cache);
+
+#endif