Browse Source

Start new regexp cache optimizer

tags/1.1.0
Vsevolod Stakhov 8 years ago
parent
commit
e9d134bb88
3 changed files with 232 additions and 0 deletions
  1. 1
    0
      src/libserver/CMakeLists.txt
  2. 138
    0
      src/libserver/re_cache.c
  3. 93
    0
      src/libserver/re_cache.h

+ 1
- 0
src/libserver/CMakeLists.txt View File

@@ -12,6 +12,7 @@ SET(LIBRSPAMDSERVERSRC
${CMAKE_CURRENT_SOURCE_DIR}/html.c
${CMAKE_CURRENT_SOURCE_DIR}/protocol.c
${CMAKE_CURRENT_SOURCE_DIR}/proxy.c
${CMAKE_CURRENT_SOURCE_DIR}/re_cache.c
${CMAKE_CURRENT_SOURCE_DIR}/roll_history.c
${CMAKE_CURRENT_SOURCE_DIR}/spf.c
${CMAKE_CURRENT_SOURCE_DIR}/symbols_cache.c

+ 138
- 0
src/libserver/re_cache.c View File

@@ -0,0 +1,138 @@
/*
* Copyright (c) 2015, Vsevolod Stakhov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include "re_cache.h"
#include "xxhash.h"

struct rspamd_re_class {
guint64 id;
enum rspamd_re_type type;
gpointer type_data;
gsize type_len;
GHashTable *re_ids;
GPtrArray *all_re;
};

struct rspamd_re_cache {
GHashTable *re_classes;
};

static guint64
rspamd_re_cache_class_id (enum rspamd_re_type type,
gpointer type_data,
gsize datalen)
{
XXH64_state_t st;

XXH64_reset (&st, rspamd_hash_seed ());
XXH64_update (&st, &type, sizeof (type));

if (datalen > 0) {
XXH64_update (&st, type_data, datalen);
}

return XXH64_digest (&st);
}

struct rspamd_re_cache *
rspamd_re_cache_new (void)
{
struct rspamd_re_cache *cache;

cache = g_slice_alloc (sizeof (*cache));
cache->re_classes = g_hash_table_new (g_int64_hash, g_int64_equal);
}

void
rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
enum rspamd_re_type type, gpointer type_data, gsize datalen)
{
guint64 class_id;
struct rspamd_re_class *re_class;

g_assert (cache != NULL);
g_assert (re != NULL);

class_id = rspamd_re_cache_class_id (type, type_data, datalen);
re_class = g_hash_table_lookup (cache->re_classes, &class_id);

if (re_class == NULL) {
re_class = g_slice_alloc0 (sizeof (*re_class));
re_class->id = class_id;
re_class->type_len = datalen;
re_class->type = type;
re_class->re_ids = g_hash_table_new (rspamd_regexp_hash,
rspamd_regexp_equal);

if (datalen > 0) {
re_class->type_data = g_slice_alloc (datalen);
memcpy (re_class->type_data, type_data, datalen);
}

re_class->all_re = g_ptr_array_new ();
g_hash_table_insert (cache->re_classes, &re_class->id, re_class);
}

g_ptr_array_add (re_class->all_re, rspamd_regexp_ref (re));
}

/**
* Initialize and optimize re cache structure
*/
void
rspamd_re_cache_init (struct rspamd_re_cache *cache)
{

}

/**
* Get runtime data for a cache
*/
struct rspamd_re_runtime *rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache);

/**
* Process regexp runtime and return the result for a specific regexp
* @param task task object
* @param rt cache runtime object
* @param re regexp object
* @param type type of object
* @param type_data associated data with the type (e.g. header name)
* @param datalen associated data length
*/
gboolean rspamd_re_cache_process (struct rspamd_task *task,
struct rspamd_re_runtime *rt,
rspamd_regexp_t *re,
enum rspamd_re_type type,
gpointer type_data,
gsize datalen);

/**
* Destroy runtime data
*/
void rspamd_re_cache_runtime_destroy (struct rspamd_re_runtime *rt);

/**
* Destroy re cache
*/
void rspamd_re_cache_destroy (struct rspamd_re_cache *cache);

+ 93
- 0
src/libserver/re_cache.h View File

@@ -0,0 +1,93 @@
/*
* Copyright (c) 2015, Vsevolod Stakhov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef RSPAMD_RE_CACHE_H
#define RSPAMD_RE_CACHE_H

#include "config.h"
#include "libserver/task.h"
#include "libutil/regexp.h"

struct rspamd_re_cache;
struct rspamd_re_runtime;

enum rspamd_re_type {
RSPAMD_RE_HEADER,
RSPAMD_RE_MIME,
RSPAMD_RE_URL,
RSPAMD_RE_BODY
};

/**
* Initialize re_cache persistent structure
*/
struct rspamd_re_cache *rspamd_re_cache_new (void);

/**
* Add the existing regexp to the cache
* @param cache cache object
* @param re regexp object
* @param type type of object
* @param type_data associated data with the type (e.g. header name)
* @param datalen associated data length
*/
void rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
enum rspamd_re_type type, gpointer type_data, gsize datalen);

/**
* Initialize and optimize re cache structure
*/
void rspamd_re_cache_init (struct rspamd_re_cache *cache);

/**
* Get runtime data for a cache
*/
struct rspamd_re_runtime* rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache);

/**
* Process regexp runtime and return the result for a specific regexp
* @param task task object
* @param rt cache runtime object
* @param re regexp object
* @param type type of object
* @param type_data associated data with the type (e.g. header name)
* @param datalen associated data length
*/
gboolean rspamd_re_cache_process (struct rspamd_task *task,
struct rspamd_re_runtime *rt,
rspamd_regexp_t *re,
enum rspamd_re_type type,
gpointer type_data,
gsize datalen);

/**
* Destroy runtime data
*/
void rspamd_re_cache_runtime_destroy (struct rspamd_re_runtime *rt);

/**
* Destroy re cache
*/
void rspamd_re_cache_destroy (struct rspamd_re_cache *cache);

#endif

Loading…
Cancel
Save