From: Vsevolod Stakhov Date: Tue, 1 Dec 2015 17:09:06 +0000 (+0000) Subject: Start new regexp cache optimizer X-Git-Tag: 1.1.0~437 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=e9d134bb883087391bcc998042be89d79f668683;p=rspamd.git Start new regexp cache optimizer --- diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt index 1f8df6c13..83006e835 100644 --- a/src/libserver/CMakeLists.txt +++ b/src/libserver/CMakeLists.txt @@ -12,6 +12,7 @@ SET(LIBRSPAMDSERVERSRC ${CMAKE_CURRENT_SOURCE_DIR}/html.c ${CMAKE_CURRENT_SOURCE_DIR}/protocol.c ${CMAKE_CURRENT_SOURCE_DIR}/proxy.c + ${CMAKE_CURRENT_SOURCE_DIR}/re_cache.c ${CMAKE_CURRENT_SOURCE_DIR}/roll_history.c ${CMAKE_CURRENT_SOURCE_DIR}/spf.c ${CMAKE_CURRENT_SOURCE_DIR}/symbols_cache.c diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c new file mode 100644 index 000000000..d6ed367b9 --- /dev/null +++ b/src/libserver/re_cache.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "re_cache.h" +#include "xxhash.h" + +struct rspamd_re_class { + guint64 id; + enum rspamd_re_type type; + gpointer type_data; + gsize type_len; + GHashTable *re_ids; + GPtrArray *all_re; +}; + +struct rspamd_re_cache { + GHashTable *re_classes; +}; + +static guint64 +rspamd_re_cache_class_id (enum rspamd_re_type type, + gpointer type_data, + gsize datalen) +{ + XXH64_state_t st; + + XXH64_reset (&st, rspamd_hash_seed ()); + XXH64_update (&st, &type, sizeof (type)); + + if (datalen > 0) { + XXH64_update (&st, type_data, datalen); + } + + return XXH64_digest (&st); +} + +struct rspamd_re_cache * +rspamd_re_cache_new (void) +{ + struct rspamd_re_cache *cache; + + cache = g_slice_alloc (sizeof (*cache)); + cache->re_classes = g_hash_table_new (g_int64_hash, g_int64_equal); +} + +void +rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re, + enum rspamd_re_type type, gpointer type_data, gsize datalen) +{ + guint64 class_id; + struct rspamd_re_class *re_class; + + g_assert (cache != NULL); + g_assert (re != NULL); + + class_id = rspamd_re_cache_class_id (type, type_data, datalen); + re_class = g_hash_table_lookup (cache->re_classes, &class_id); + + if (re_class == NULL) { + re_class = g_slice_alloc0 (sizeof (*re_class)); + re_class->id = class_id; + re_class->type_len = datalen; + re_class->type = type; + re_class->re_ids = g_hash_table_new (rspamd_regexp_hash, + rspamd_regexp_equal); + + if (datalen > 0) { + re_class->type_data = g_slice_alloc (datalen); + memcpy (re_class->type_data, type_data, datalen); + } + + re_class->all_re = g_ptr_array_new (); + g_hash_table_insert (cache->re_classes, &re_class->id, re_class); + } + + g_ptr_array_add (re_class->all_re, rspamd_regexp_ref (re)); +} + +/** + * Initialize and optimize re cache structure + */ +void +rspamd_re_cache_init (struct rspamd_re_cache *cache) +{ + +} + +/** + * Get runtime data for a cache + */ +struct rspamd_re_runtime *rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache); + +/** + * Process regexp runtime and return the result for a specific regexp + * @param task task object + * @param rt cache runtime object + * @param re regexp object + * @param type type of object + * @param type_data associated data with the type (e.g. header name) + * @param datalen associated data length + */ +gboolean rspamd_re_cache_process (struct rspamd_task *task, + struct rspamd_re_runtime *rt, + rspamd_regexp_t *re, + enum rspamd_re_type type, + gpointer type_data, + gsize datalen); + +/** + * Destroy runtime data + */ +void rspamd_re_cache_runtime_destroy (struct rspamd_re_runtime *rt); + +/** + * Destroy re cache + */ +void rspamd_re_cache_destroy (struct rspamd_re_cache *cache); diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h new file mode 100644 index 000000000..0cc77eae1 --- /dev/null +++ b/src/libserver/re_cache.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef RSPAMD_RE_CACHE_H +#define RSPAMD_RE_CACHE_H + +#include "config.h" +#include "libserver/task.h" +#include "libutil/regexp.h" + +struct rspamd_re_cache; +struct rspamd_re_runtime; + +enum rspamd_re_type { + RSPAMD_RE_HEADER, + RSPAMD_RE_MIME, + RSPAMD_RE_URL, + RSPAMD_RE_BODY +}; + +/** + * Initialize re_cache persistent structure + */ +struct rspamd_re_cache *rspamd_re_cache_new (void); + +/** + * Add the existing regexp to the cache + * @param cache cache object + * @param re regexp object + * @param type type of object + * @param type_data associated data with the type (e.g. header name) + * @param datalen associated data length + */ +void rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re, + enum rspamd_re_type type, gpointer type_data, gsize datalen); + +/** + * Initialize and optimize re cache structure + */ +void rspamd_re_cache_init (struct rspamd_re_cache *cache); + +/** + * Get runtime data for a cache + */ +struct rspamd_re_runtime* rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache); + +/** + * Process regexp runtime and return the result for a specific regexp + * @param task task object + * @param rt cache runtime object + * @param re regexp object + * @param type type of object + * @param type_data associated data with the type (e.g. header name) + * @param datalen associated data length + */ +gboolean rspamd_re_cache_process (struct rspamd_task *task, + struct rspamd_re_runtime *rt, + rspamd_regexp_t *re, + enum rspamd_re_type type, + gpointer type_data, + gsize datalen); + +/** + * Destroy runtime data + */ +void rspamd_re_cache_runtime_destroy (struct rspamd_re_runtime *rt); + +/** + * Destroy re cache + */ +void rspamd_re_cache_destroy (struct rspamd_re_cache *cache); + +#endif