summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-03-04 21:59:48 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-03-04 21:59:48 +0000
commit58ff3a43b71928263415a8a874943de9de158018 (patch)
tree1ff189aea7cf8c72d4d0953e93daa965ebec9c7f /src
parent7b00e0c737ca83763135b4e212c0b59b0610ac3f (diff)
downloadrspamd-58ff3a43b71928263415a8a874943de9de158018.tar.gz
rspamd-58ff3a43b71928263415a8a874943de9de158018.zip
Write skeleton of rspamd fast regexps.
Diffstat (limited to 'src')
-rw-r--r--src/libmime/expressions.c8
-rw-r--r--src/libmime/expressions.h8
-rw-r--r--src/libserver/cfg_file.h2
-rw-r--r--src/libutil/CMakeLists.txt1
-rw-r--r--src/libutil/regexp.c101
-rw-r--r--src/libutil/regexp.h108
-rw-r--r--src/plugins/regexp.c28
7 files changed, 233 insertions, 23 deletions
diff --git a/src/libmime/expressions.c b/src/libmime/expressions.c
index 07253a2bc..769b7dc14 100644
--- a/src/libmime/expressions.c
+++ b/src/libmime/expressions.c
@@ -647,12 +647,12 @@ parse_expression (rspamd_mempool_t * pool, gchar *line)
/*
* Rspamd regexp utility functions
*/
-struct rspamd_regexp *
+struct rspamd_regexp_element *
parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
{
const gchar *begin, *end, *p, *src, *start;
gchar *dbegin, *dend;
- struct rspamd_regexp *result, *check;
+ struct rspamd_regexp_element *result, *check;
gint regexp_flags = G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE;
GError *err = NULL;
@@ -662,7 +662,7 @@ parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
}
src = line;
- result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp));
+ result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_element));
/* Skip whitespaces */
while (g_ascii_isspace (*line)) {
line++;
@@ -815,7 +815,7 @@ parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
/* Avoid multiply regexp structures for similar regexps */
if ((check =
- (struct rspamd_regexp *)re_cache_check (result->regexp_text,
+ (struct rspamd_regexp_element *)re_cache_check (result->regexp_text,
pool)) != NULL) {
/* Additional check for headers */
if (result->type == REGEXP_HEADER || result->type ==
diff --git a/src/libmime/expressions.h b/src/libmime/expressions.h
index 1ba02d956..469cc690d 100644
--- a/src/libmime/expressions.h
+++ b/src/libmime/expressions.h
@@ -10,7 +10,7 @@
#include <lua.h>
struct rspamd_task;
-struct rspamd_regexp;
+struct rspamd_regexp_element;
/**
* Rspamd expression function
@@ -60,7 +60,7 @@ typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *, GList *args,
* @param line incoming line
* @return regexp structure or NULL in case of error
*/
-struct rspamd_regexp * parse_regexp (rspamd_mempool_t *pool,
+struct rspamd_regexp_element * parse_regexp (rspamd_mempool_t *pool,
const gchar *line,
gboolean raw_mode);
@@ -119,7 +119,7 @@ void re_cache_del (const gchar *line, rspamd_mempool_t *pool);
* @param result numeric result of this regexp
*/
void task_cache_add (struct rspamd_task *task,
- struct rspamd_regexp *re,
+ struct rspamd_regexp_element *re,
gint32 result);
/**
@@ -128,7 +128,7 @@ void task_cache_add (struct rspamd_task *task,
* @param pointer regexp data
* @return numeric result if value exists or -1 if not
*/
-gint32 task_cache_check (struct rspamd_task *task, struct rspamd_regexp *re);
+gint32 task_cache_check (struct rspamd_task *task, struct rspamd_regexp_element *re);
/**
* Parse and return a single function argument for a function (may recurse)
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index 8af3c542f..8c58a4941 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -65,7 +65,7 @@ enum rspamd_log_type {
/**
* Regexp structure
*/
-struct rspamd_regexp {
+struct rspamd_regexp_element {
enum rspamd_regexp_type type; /**< regexp type */
gchar *regexp_text; /**< regexp text representation */
GRegex *regexp; /**< glib regexp structure */
diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt
index e7ebe2a47..3e8fd87a7 100644
--- a/src/libutil/CMakeLists.txt
+++ b/src/libutil/CMakeLists.txt
@@ -14,6 +14,7 @@ SET(LIBRSPAMDUTILSRC
${CMAKE_CURRENT_SOURCE_DIR}/mem_pool.c
${CMAKE_CURRENT_SOURCE_DIR}/printf.c
${CMAKE_CURRENT_SOURCE_DIR}/radix.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regexp.c
${CMAKE_CURRENT_SOURCE_DIR}/rrd.c
${CMAKE_CURRENT_SOURCE_DIR}/shingles.c
${CMAKE_CURRENT_SOURCE_DIR}/trie.c
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c
new file mode 100644
index 000000000..e4fbdef12
--- /dev/null
+++ b/src/libutil/regexp.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "regexp.h"
+#include "blake2.h"
+#include "ref.h"
+#include <pcre.h>
+
+struct rspamd_regexp_s {
+ gdouble exec_time; /**< average execution time */
+ pcre *re;
+ pcre_extra *extra;
+ pcre *raw_re;
+ pcre_extra *raw_extra;
+ guchar id[BLAKE2B_OUTBYTES / 2];
+ ref_entry_t ref;
+};
+
+struct rspamd_regexp_cache {
+ GHashTable *tbl;
+};
+
+static struct rspamd_regexp_cache *global_re_cache = NULL;
+
+rspamd_regexp_t*
+rspamd_regexp_new (const gchar *pattern, const gchar *flags,
+ GError **err)
+{
+ return NULL;
+}
+
+gboolean
+rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len)
+{
+ return FALSE;
+}
+
+gboolean
+rspamd_regexp_match (rspamd_regexp_t *re, const gchar *text, gsize len)
+{
+ return FALSE;
+}
+
+void
+rspamd_regexp_unref (rspamd_regexp_t *re)
+{
+ REF_RELEASE (re);
+}
+
+struct rspamd_regexp_cache*
+rspamd_regexp_cache_new (void)
+{
+ return NULL;
+}
+
+
+rspamd_regexp_t*
+rspamd_regexp_cache_query (struct rspamd_regexp_cache* cache,
+ const gchar *pattern,
+ const gchar *flags)
+{
+ return NULL;
+}
+
+
+rspamd_regexp_t*
+rspamd_regexp_cache_create (struct rspamd_regexp_cache *cache,
+ const gchar *pattern,
+ const gchar *flags, GError **err)
+{
+ return NULL;
+}
+
+void
+rspamd_regexp_cache_destroy (struct rspamd_regexp_cache *cache)
+{
+
+}
diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h
new file mode 100644
index 000000000..4ca4196a0
--- /dev/null
+++ b/src/libutil/regexp.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef REGEXP_H_
+#define REGEXP_H_
+
+#include "config.h"
+
+typedef struct rspamd_regexp_s rspamd_regexp_t;
+struct rspamd_regexp_cache;
+
+/**
+ * Create new rspamd regexp
+ * @param pattern regexp pattern
+ * @param flags flags (may be enclosed inside pattern)
+ * @param err error pointer set if compilation failed
+ * @return new regexp object
+ */
+rspamd_regexp_t* rspamd_regexp_new (const gchar *pattern, const gchar *flags,
+ GError **err);
+
+/**
+ * Search the specified regexp in the text
+ * @param re
+ * @param text
+ * @param len
+ * @return
+ */
+gboolean rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len);
+
+
+/**
+ * Exact match of the specified text against the regexp
+ * @param re
+ * @param text
+ * @param len
+ * @return
+ */
+gboolean rspamd_regexp_match (rspamd_regexp_t *re, const gchar *text, gsize len);
+
+/**
+ * Increase refcount for a regexp object
+ */
+rspamd_regexp_t* rspamd_regexp_ref (rspamd_regexp_t *re);
+
+/**
+ * Unref regexp object
+ * @param re
+ */
+void rspamd_regexp_unref (rspamd_regexp_t *re);
+
+/**
+ * Create new regexp cache
+ * @return
+ */
+struct rspamd_regexp_cache* rspamd_regexp_cache_new (void);
+
+/**
+ * Query rspamd cache for a specified regexp
+ * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe)
+ * @param pattern
+ * @param flags
+ * @return
+ */
+rspamd_regexp_t* rspamd_regexp_cache_query (struct rspamd_regexp_cache* cache,
+ const gchar *pattern,
+ const gchar *flags);
+
+/**
+ * Create or get cached regexp from the specified cache
+ * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe)
+ * @param pattern regexp pattern
+ * @param flags flags (may be enclosed inside pattern)
+ * @param err error pointer set if compilation failed
+ * @return new regexp object
+ */
+rspamd_regexp_t* rspamd_regexp_cache_create (struct rspamd_regexp_cache *cache,
+ const gchar *pattern,
+ const gchar *flags, GError **err);
+
+/**
+ * Destroy regexp cache and unref all elements inside it
+ * @param cache
+ */
+void rspamd_regexp_cache_destroy (struct rspamd_regexp_cache *cache);
+
+#endif /* REGEXP_H_ */
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index 47cdc649f..848fdfdb2 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -120,7 +120,7 @@ G_LOCK_DEFINE (task_cache_mtx);
void
task_cache_add (struct rspamd_task *task,
- struct rspamd_regexp *re,
+ struct rspamd_regexp_element *re,
gint32 result)
{
if (result == 0) {
@@ -142,7 +142,7 @@ task_cache_add (struct rspamd_task *task,
}
gint32
-task_cache_check (struct rspamd_task *task, struct rspamd_regexp *re)
+task_cache_check (struct rspamd_task *task, struct rspamd_regexp_element *re)
{
gpointer res;
gint32 r;
@@ -370,7 +370,7 @@ regexp_module_reconfig (struct rspamd_config *cfg)
struct url_regexp_param {
struct rspamd_task *task;
GRegex *regexp;
- struct rspamd_regexp *re;
+ struct rspamd_regexp_element *re;
gboolean found;
};
@@ -405,7 +405,7 @@ tree_url_callback (gpointer key, gpointer value, void *data)
}
static gsize
-process_regexp (struct rspamd_regexp *re,
+process_regexp (struct rspamd_regexp_element *re,
struct rspamd_task *task,
const gchar *additional,
gint limit,
@@ -855,7 +855,7 @@ process_regexp_expression (struct expression *expr,
GQueue *stack;
gsize cur, op1, op2;
struct expression *it = expr;
- struct rspamd_regexp *re;
+ struct rspamd_regexp_element *re;
gboolean try_optimize = TRUE;
stack = g_queue_new ();
@@ -863,7 +863,7 @@ process_regexp_expression (struct expression *expr,
while (it) {
if (it->type == EXPR_REGEXP_PARSED) {
/* Find corresponding symbol */
- cur = process_regexp ((struct rspamd_regexp *)it->content.operand,
+ cur = process_regexp ((struct rspamd_regexp_element *)it->content.operand,
task,
additional,
0,
@@ -1202,7 +1202,7 @@ rspamd_regexp_occurs_number (struct rspamd_task *task,
{
gint limit;
struct expression_argument *arg;
- struct rspamd_regexp *re;
+ struct rspamd_regexp_element *re;
gchar *param, *err_str, op;
int_compare_func f = NULL;
@@ -1289,7 +1289,7 @@ match_smtp_data (struct rspamd_task *task,
const gchar *re_text,
const gchar *what)
{
- struct rspamd_regexp *re;
+ struct rspamd_regexp_element *re;
gint r;
if (*re_text == '/') {
@@ -1471,7 +1471,7 @@ lua_regexp_match (lua_State *L)
void *ud = luaL_checkudata (L, 1, "rspamd{task}");
struct rspamd_task *task;
const gchar *re_text;
- struct rspamd_regexp *re;
+ struct rspamd_regexp_element *re;
gint r = 0;
luaL_argcheck (L, ud != NULL, 1, "'task' expected");
@@ -1504,7 +1504,7 @@ rspamd_content_type_compare_param (struct rspamd_task * task,
{
gchar *param_name, *param_pattern;
const gchar *param_data;
- struct rspamd_regexp *re;
+ struct rspamd_regexp_element *re;
struct expression_argument *arg, *arg1;
GMimeObject *part;
GMimeContentType *ct;
@@ -1701,7 +1701,7 @@ rspamd_content_type_is_subtype (struct rspamd_task *task,
void *unused)
{
gchar *param_pattern;
- struct rspamd_regexp *re;
+ struct rspamd_regexp_element *re;
struct expression_argument *arg, *arg1;
GMimeObject *part;
GMimeContentType *ct;
@@ -1806,7 +1806,7 @@ rspamd_content_type_is_type (struct rspamd_task * task,
void *unused)
{
gchar *param_pattern;
- struct rspamd_regexp *re;
+ struct rspamd_regexp_element *re;
struct expression_argument *arg, *arg1;
GMimeObject *part;
GMimeContentType *ct;
@@ -1909,7 +1909,7 @@ static gboolean
compare_subtype (struct rspamd_task *task, GMimeContentType * ct,
gchar *subtype)
{
- struct rspamd_regexp *re;
+ struct rspamd_regexp_element *re;
gint r;
if (subtype == NULL || ct == NULL) {
@@ -1974,7 +1974,7 @@ common_has_content_part (struct rspamd_task * task,
gint min_len,
gint max_len)
{
- struct rspamd_regexp *re;
+ struct rspamd_regexp_element *re;
struct mime_part *part;
GList *cur;
GMimeContentType *ct;