]> source.dussan.org Git - rspamd.git/commitdiff
Start new regexp cache optimizer
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 1 Dec 2015 17:09:06 +0000 (17:09 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 1 Dec 2015 17:09:06 +0000 (17:09 +0000)
src/libserver/CMakeLists.txt
src/libserver/re_cache.c [new file with mode: 0644]
src/libserver/re_cache.h [new file with mode: 0644]

index 1f8df6c13cff64bd2a52d7604dcc54147d997f43..83006e8358acb3c63519c1ecf833cd6ccee8b57a 100644 (file)
@@ -12,6 +12,7 @@ SET(LIBRSPAMDSERVERSRC
                                ${CMAKE_CURRENT_SOURCE_DIR}/html.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/protocol.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/proxy.c
+                               ${CMAKE_CURRENT_SOURCE_DIR}/re_cache.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/roll_history.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/spf.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/symbols_cache.c
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
new file mode 100644 (file)
index 0000000..d6ed367
--- /dev/null
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer in the
+ *        documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "re_cache.h"
+#include "xxhash.h"
+
+struct rspamd_re_class {
+       guint64 id;
+       enum rspamd_re_type type;
+       gpointer type_data;
+       gsize type_len;
+       GHashTable *re_ids;
+       GPtrArray *all_re;
+};
+
+struct rspamd_re_cache {
+       GHashTable *re_classes;
+};
+
+static guint64
+rspamd_re_cache_class_id (enum rspamd_re_type type,
+               gpointer type_data,
+               gsize datalen)
+{
+       XXH64_state_t st;
+
+       XXH64_reset (&st, rspamd_hash_seed ());
+       XXH64_update (&st, &type, sizeof (type));
+
+       if (datalen > 0) {
+               XXH64_update (&st, type_data, datalen);
+       }
+
+       return XXH64_digest (&st);
+}
+
+struct rspamd_re_cache *
+rspamd_re_cache_new (void)
+{
+       struct rspamd_re_cache *cache;
+
+       cache = g_slice_alloc (sizeof (*cache));
+       cache->re_classes = g_hash_table_new (g_int64_hash, g_int64_equal);
+}
+
+void
+rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
+               enum rspamd_re_type type, gpointer type_data, gsize datalen)
+{
+       guint64 class_id;
+       struct rspamd_re_class *re_class;
+
+       g_assert (cache != NULL);
+       g_assert (re != NULL);
+
+       class_id = rspamd_re_cache_class_id (type, type_data, datalen);
+       re_class = g_hash_table_lookup (cache->re_classes, &class_id);
+
+       if (re_class == NULL) {
+               re_class = g_slice_alloc0 (sizeof (*re_class));
+               re_class->id = class_id;
+               re_class->type_len = datalen;
+               re_class->type = type;
+               re_class->re_ids = g_hash_table_new (rspamd_regexp_hash,
+                               rspamd_regexp_equal);
+
+               if (datalen > 0) {
+                       re_class->type_data = g_slice_alloc (datalen);
+                       memcpy (re_class->type_data, type_data, datalen);
+               }
+
+               re_class->all_re = g_ptr_array_new ();
+               g_hash_table_insert (cache->re_classes, &re_class->id, re_class);
+       }
+
+       g_ptr_array_add (re_class->all_re, rspamd_regexp_ref (re));
+}
+
+/**
+ * Initialize and optimize re cache structure
+ */
+void
+rspamd_re_cache_init (struct rspamd_re_cache *cache)
+{
+
+}
+
+/**
+ * Get runtime data for a cache
+ */
+struct rspamd_re_runtime *rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache);
+
+/**
+ * Process regexp runtime and return the result for a specific regexp
+ * @param task task object
+ * @param rt cache runtime object
+ * @param re regexp object
+ * @param type type of object
+ * @param type_data associated data with the type (e.g. header name)
+ * @param datalen associated data length
+ */
+gboolean rspamd_re_cache_process (struct rspamd_task *task,
+               struct rspamd_re_runtime *rt,
+               rspamd_regexp_t *re,
+               enum rspamd_re_type type,
+               gpointer type_data,
+               gsize datalen);
+
+/**
+ * Destroy runtime data
+ */
+void rspamd_re_cache_runtime_destroy (struct rspamd_re_runtime *rt);
+
+/**
+ * Destroy re cache
+ */
+void rspamd_re_cache_destroy (struct rspamd_re_cache *cache);
diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h
new file mode 100644 (file)
index 0000000..0cc77ea
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer in the
+ *        documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef RSPAMD_RE_CACHE_H
+#define RSPAMD_RE_CACHE_H
+
+#include "config.h"
+#include "libserver/task.h"
+#include "libutil/regexp.h"
+
+struct rspamd_re_cache;
+struct rspamd_re_runtime;
+
+enum rspamd_re_type {
+       RSPAMD_RE_HEADER,
+       RSPAMD_RE_MIME,
+       RSPAMD_RE_URL,
+       RSPAMD_RE_BODY
+};
+
+/**
+ * Initialize re_cache persistent structure
+ */
+struct rspamd_re_cache *rspamd_re_cache_new (void);
+
+/**
+ * Add the existing regexp to the cache
+ * @param cache cache object
+ * @param re regexp object
+ * @param type type of object
+ * @param type_data associated data with the type (e.g. header name)
+ * @param datalen associated data length
+ */
+void rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
+               enum rspamd_re_type type, gpointer type_data, gsize datalen);
+
+/**
+ * Initialize and optimize re cache structure
+ */
+void rspamd_re_cache_init (struct rspamd_re_cache *cache);
+
+/**
+ * Get runtime data for a cache
+ */
+struct rspamd_re_runtime* rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache);
+
+/**
+ * Process regexp runtime and return the result for a specific regexp
+ * @param task task object
+ * @param rt cache runtime object
+ * @param re regexp object
+ * @param type type of object
+ * @param type_data associated data with the type (e.g. header name)
+ * @param datalen associated data length
+ */
+gboolean rspamd_re_cache_process (struct rspamd_task *task,
+               struct rspamd_re_runtime *rt,
+               rspamd_regexp_t *re,
+               enum rspamd_re_type type,
+               gpointer type_data,
+               gsize datalen);
+
+/**
+ * Destroy runtime data
+ */
+void rspamd_re_cache_runtime_destroy (struct rspamd_re_runtime *rt);
+
+/**
+ * Destroy re cache
+ */
+void rspamd_re_cache_destroy (struct rspamd_re_cache *cache);
+
+#endif