diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-03-04 21:59:48 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-03-04 21:59:48 +0000 |
commit | 58ff3a43b71928263415a8a874943de9de158018 (patch) | |
tree | 1ff189aea7cf8c72d4d0953e93daa965ebec9c7f /src/libutil | |
parent | 7b00e0c737ca83763135b4e212c0b59b0610ac3f (diff) | |
download | rspamd-58ff3a43b71928263415a8a874943de9de158018.tar.gz rspamd-58ff3a43b71928263415a8a874943de9de158018.zip |
Write skeleton of rspamd fast regexps.
Diffstat (limited to 'src/libutil')
-rw-r--r-- | src/libutil/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/libutil/regexp.c | 101 | ||||
-rw-r--r-- | src/libutil/regexp.h | 108 |
3 files changed, 210 insertions, 0 deletions
diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt index e7ebe2a47..3e8fd87a7 100644 --- a/src/libutil/CMakeLists.txt +++ b/src/libutil/CMakeLists.txt @@ -14,6 +14,7 @@ SET(LIBRSPAMDUTILSRC ${CMAKE_CURRENT_SOURCE_DIR}/mem_pool.c ${CMAKE_CURRENT_SOURCE_DIR}/printf.c ${CMAKE_CURRENT_SOURCE_DIR}/radix.c + ${CMAKE_CURRENT_SOURCE_DIR}/regexp.c ${CMAKE_CURRENT_SOURCE_DIR}/rrd.c ${CMAKE_CURRENT_SOURCE_DIR}/shingles.c ${CMAKE_CURRENT_SOURCE_DIR}/trie.c diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c new file mode 100644 index 000000000..e4fbdef12 --- /dev/null +++ b/src/libutil/regexp.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "regexp.h" +#include "blake2.h" +#include "ref.h" +#include <pcre.h> + +struct rspamd_regexp_s { + gdouble exec_time; /**< average execution time */ + pcre *re; + pcre_extra *extra; + pcre *raw_re; + pcre_extra *raw_extra; + guchar id[BLAKE2B_OUTBYTES / 2]; + ref_entry_t ref; +}; + +struct rspamd_regexp_cache { + GHashTable *tbl; +}; + +static struct rspamd_regexp_cache *global_re_cache = NULL; + +rspamd_regexp_t* +rspamd_regexp_new (const gchar *pattern, const gchar *flags, + GError **err) +{ + return NULL; +} + +gboolean +rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len) +{ + return FALSE; +} + +gboolean +rspamd_regexp_match (rspamd_regexp_t *re, const gchar *text, gsize len) +{ + return FALSE; +} + +void +rspamd_regexp_unref (rspamd_regexp_t *re) +{ + REF_RELEASE (re); +} + +struct rspamd_regexp_cache* +rspamd_regexp_cache_new (void) +{ + return NULL; +} + + +rspamd_regexp_t* +rspamd_regexp_cache_query (struct rspamd_regexp_cache* cache, + const gchar *pattern, + const gchar *flags) +{ + return NULL; +} + + +rspamd_regexp_t* +rspamd_regexp_cache_create (struct rspamd_regexp_cache *cache, + const gchar *pattern, + const gchar *flags, GError **err) +{ + return NULL; +} + +void +rspamd_regexp_cache_destroy (struct rspamd_regexp_cache *cache) +{ + +} diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h new file mode 100644 index 000000000..4ca4196a0 --- /dev/null +++ b/src/libutil/regexp.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef REGEXP_H_ +#define REGEXP_H_ + +#include "config.h" + +typedef struct rspamd_regexp_s rspamd_regexp_t; +struct rspamd_regexp_cache; + +/** + * Create new rspamd regexp + * @param pattern regexp pattern + * @param flags flags (may be enclosed inside pattern) + * @param err error pointer set if compilation failed + * @return new regexp object + */ +rspamd_regexp_t* rspamd_regexp_new (const gchar *pattern, const gchar *flags, + GError **err); + +/** + * Search the specified regexp in the text + * @param re + * @param text + * @param len + * @return + */ +gboolean rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len); + + +/** + * Exact match of the specified text against the regexp + * @param re + * @param text + * @param len + * @return + */ +gboolean rspamd_regexp_match (rspamd_regexp_t *re, const gchar *text, gsize len); + +/** + * Increase refcount for a regexp object + */ +rspamd_regexp_t* rspamd_regexp_ref (rspamd_regexp_t *re); + +/** + * Unref regexp object + * @param re + */ +void rspamd_regexp_unref (rspamd_regexp_t *re); + +/** + * Create new regexp cache + * @return + */ +struct rspamd_regexp_cache* rspamd_regexp_cache_new (void); + +/** + * Query rspamd cache for a specified regexp + * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe) + * @param pattern + * @param flags + * @return + */ +rspamd_regexp_t* rspamd_regexp_cache_query (struct rspamd_regexp_cache* cache, + const gchar *pattern, + const gchar *flags); + +/** + * Create or get cached regexp from the specified cache + * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe) + * @param pattern regexp pattern + * @param flags flags (may be enclosed inside pattern) + * @param err error pointer set if compilation failed + * @return new regexp object + */ +rspamd_regexp_t* rspamd_regexp_cache_create (struct rspamd_regexp_cache *cache, + const gchar *pattern, + const gchar *flags, GError **err); + +/** + * Destroy regexp cache and unref all elements inside it + * @param cache + */ +void rspamd_regexp_cache_destroy (struct rspamd_regexp_cache *cache); + +#endif /* REGEXP_H_ */ |