aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-03-07 22:00:14 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-03-07 22:00:14 +0000
commit908df97327a0e7e0c21053a67ac22e66610d30d1 (patch)
tree1ed77671e1cb25f39693aa3d915042cda779fd2d
parent15200f78979740022c0a58ace91447fd99a3075b (diff)
downloadrspamd-908df97327a0e7e0c21053a67ac22e66610d30d1.tar.gz
rspamd-908df97327a0e7e0c21053a67ac22e66610d30d1.zip
Start moving to the rspamd regexps.
-rw-r--r--src/libmime/expressions.c83
-rw-r--r--src/libserver/cfg_file.h5
-rw-r--r--src/libutil/regexp.c4
-rw-r--r--src/plugins/regexp.c91
4 files changed, 44 insertions, 139 deletions
diff --git a/src/libmime/expressions.c b/src/libmime/expressions.c
index 769b7dc14..aab78df5c 100644
--- a/src/libmime/expressions.c
+++ b/src/libmime/expressions.c
@@ -652,9 +652,9 @@ parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
{
const gchar *begin, *end, *p, *src, *start;
gchar *dbegin, *dend;
- struct rspamd_regexp_element *result, *check;
- gint regexp_flags = G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE;
+ struct rspamd_regexp_element *result;
GError *err = NULL;
+ GString *re_flags;
if (line == NULL) {
msg_err ("cannot parse NULL line");
@@ -727,35 +727,20 @@ parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
}
/* Parse flags */
p = end + 1;
+ re_flags = g_string_sized_new (32);
while (p != NULL) {
switch (*p) {
case 'i':
- regexp_flags |= G_REGEX_CASELESS;
- p++;
- break;
case 'm':
- regexp_flags |= G_REGEX_MULTILINE;
- p++;
- break;
case 's':
- regexp_flags |= G_REGEX_DOTALL;
- p++;
- break;
case 'x':
- regexp_flags |= G_REGEX_EXTENDED;
- p++;
- break;
case 'u':
- regexp_flags |= G_REGEX_UNGREEDY;
+ case 'O':
+ case 'r':
+ g_string_append_c (re_flags, *p);
p++;
break;
case 'o':
- regexp_flags |= G_REGEX_OPTIMIZE;
- p++;
- break;
- case 'r':
- regexp_flags |= G_REGEX_RAW;
- result->is_raw = TRUE;
p++;
break;
/* Type flags */
@@ -810,61 +795,27 @@ parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
*dend = '\0';
if (raw_mode) {
- regexp_flags |= G_REGEX_RAW;
- }
-
- /* Avoid multiply regexp structures for similar regexps */
- if ((check =
- (struct rspamd_regexp_element *)re_cache_check (result->regexp_text,
- pool)) != NULL) {
- /* Additional check for headers */
- if (result->type == REGEXP_HEADER || result->type ==
- REGEXP_RAW_HEADER) {
- if (result->header && check->header) {
- if (strcmp (result->header, check->header) == 0) {
- return check;
- }
- }
- }
- else {
- return check;
- }
- }
- result->regexp = g_regex_new (dbegin, regexp_flags, 0, &err);
- if ((regexp_flags & G_REGEX_RAW) != 0) {
- result->raw_regexp = result->regexp;
- }
- else {
- result->raw_regexp = g_regex_new (dbegin,
- regexp_flags | G_REGEX_RAW,
- 0,
- &err);
- rspamd_mempool_add_destructor (pool,
- (rspamd_mempool_destruct_t) g_regex_unref,
- (void *)result->raw_regexp);
+ g_string_append_c (re_flags, 'r');
}
- rspamd_mempool_add_destructor (pool,
- (rspamd_mempool_destruct_t) g_regex_unref,
- (void *)result->regexp);
- *dend = '/';
+ result->regexp = rspamd_regexp_cache_create (NULL, dbegin, re_flags->str,
+ &err);
+
+ g_string_free (re_flags, TRUE);
if (result->regexp == NULL || err != NULL) {
msg_warn ("could not read regexp: %s while reading regexp %s",
err ? err->message : "unknown error",
- src);
+ src);
return NULL;
}
- if (result->raw_regexp == NULL || err != NULL) {
- msg_warn ("could not read raw regexp: %s while reading regexp %s",
- err ? err->message : "unknown error",
- src);
- return NULL;
- }
+ rspamd_mempool_add_destructor (pool,
+ (rspamd_mempool_destruct_t) rspamd_regexp_unref,
+ (void *)result->regexp);
+
+ *dend = '/';
- /* Add to cache for further usage */
- re_cache_add (result->regexp_text, result, pool);
return result;
}
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index 8c58a4941..44728afe8 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -12,6 +12,7 @@
#include "symbols_cache.h"
#include "cfg_rcl.h"
#include "ucl.h"
+#include "regexp.h"
#define DEFAULT_BIND_PORT 11333
#define DEFAULT_CONTROL_PORT 11334
@@ -68,11 +69,9 @@ enum rspamd_log_type {
struct rspamd_regexp_element {
enum rspamd_regexp_type type; /**< regexp type */
gchar *regexp_text; /**< regexp text representation */
- GRegex *regexp; /**< glib regexp structure */
- GRegex *raw_regexp; /**< glib regexp structure for raw matching */
+ rspamd_regexp_t *regexp; /**< regexp structure */
gchar *header; /**< header name for header regexps */
gboolean is_test; /**< true if this expression must be tested */
- gboolean is_raw; /**< true if this regexp is done by raw matching */
gboolean is_strong; /**< true if headers search must be case sensitive */
};
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c
index 6da0a663e..3b76d15e0 100644
--- a/src/libutil/regexp.c
+++ b/src/libutil/regexp.c
@@ -296,6 +296,10 @@ rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len,
g_assert (re != NULL);
g_assert (text != NULL);
+ if (len == 0) {
+ len = strlen (text);
+ }
+
if (end != NULL && *end != NULL) {
/* Incremental search */
mt = (*end);
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index 848fdfdb2..224d40b21 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -369,7 +369,7 @@ regexp_module_reconfig (struct rspamd_config *cfg)
struct url_regexp_param {
struct rspamd_task *task;
- GRegex *regexp;
+ rspamd_regexp_t *regexp;
struct rspamd_regexp_element *re;
gboolean found;
};
@@ -379,10 +379,9 @@ tree_url_callback (gpointer key, gpointer value, void *data)
{
struct url_regexp_param *param = data;
struct rspamd_url *url = value;
- GError *err = NULL;
- if (g_regex_match_full (param->regexp, struri (url), -1, 0, 0, NULL,
- &err) == TRUE) {
+ if (rspamd_regexp_search (param->regexp, struri (url), 0, NULL, NULL, FALSE)
+ == TRUE) {
if (G_UNLIKELY (param->re->is_test)) {
msg_info ("process test regexp %s for url %s returned TRUE",
struri (url));
@@ -395,11 +394,6 @@ tree_url_callback (gpointer key, gpointer value, void *data)
msg_info ("process test regexp %s for url %s returned FALSE",
struri (url));
}
- if (err != NULL) {
- msg_info ("error occured while processing regexp \"%s\": %s",
- param->re->regexp_text,
- err->message);
- }
return FALSE;
}
@@ -413,14 +407,12 @@ process_regexp (struct rspamd_regexp_element *re,
{
guint8 *ct;
gsize clen;
- gint r, passed = 0, start, end, old;
- gboolean matched = FALSE;
- const gchar *in;
+ gint r, passed = 0;
+ gboolean matched = FALSE, raw = FALSE;
+ const gchar *in, *start, *end;
GList *cur, *headerlist;
- GRegex *regexp;
- GMatchInfo *info;
- GError *err = NULL;
+ rspamd_regexp_t *regexp;
struct url_regexp_param callback_param = {
.task = task,
.re = re,
@@ -449,8 +441,8 @@ process_regexp (struct rspamd_regexp_element *re,
re->regexp_text,
additional);
}
- if (g_regex_match_full (re->regexp, additional, strlen (additional), 0,
- 0, NULL, NULL) == TRUE) {
+ if (rspamd_regexp_search (re->regexp, additional, 0, NULL, NULL,
+ FALSE) == TRUE) {
if (G_UNLIKELY (re->is_test)) {
msg_info ("result of regexp %s is true", re->regexp_text);
}
@@ -513,7 +505,7 @@ process_regexp (struct rspamd_regexp_element *re,
re->header, rh->decoded);
if (re->type == REGEXP_RAW_HEADER) {
in = rh->value;
- regexp = re->raw_regexp;
+ raw = TRUE;
}
else {
in = rh->decoded;
@@ -527,8 +519,7 @@ process_regexp (struct rspamd_regexp_element *re,
/* Match re */
if (in &&
- g_regex_match_full (regexp, in, -1, 0, 0, NULL,
- &err) == TRUE) {
+ rspamd_regexp_search (regexp, in, 0, NULL, NULL, raw)) {
if (G_UNLIKELY (re->is_test)) {
msg_info (
"process test regexp %s for header %s with value '%s' returned TRUE",
@@ -555,12 +546,6 @@ process_regexp (struct rspamd_regexp_element *re,
re->header,
in);
}
- if (err != NULL) {
- msg_info (
- "error occured while processing regexp \"%s\": %s",
- re->regexp_text,
- err->message);
- }
cur = g_list_next (cur);
}
task_cache_add (task, re, 0);
@@ -589,14 +574,14 @@ process_regexp (struct rspamd_regexp_element *re,
}
/* Check raw flags */
if (part->is_raw) {
- regexp = re->raw_regexp;
+ raw = TRUE;
}
else {
/* This time there is no need to validate anything as conversion succeed only for valid characters */
regexp = re->regexp;
}
/* Select data for regexp */
- if (re->is_raw) {
+ if (raw) {
ct = part->orig->data;
clen = part->orig->len;
}
@@ -607,9 +592,10 @@ process_regexp (struct rspamd_regexp_element *re,
/* If we have limit, apply regexp so much times as we can */
if (f != NULL && limit > 1) {
end = 0;
+ start = NULL;
+ end = NULL;
while ((matched =
- g_regex_match_full (regexp, ct + end + 1, clen - end - 1, 0,
- 0, &info, &err)) == TRUE) {
+ rspamd_regexp_search (regexp, ct, clen, &start, &end, raw))) {
if (G_UNLIKELY (re->is_test)) {
msg_info (
"process test regexp %s for mime part of length %d returned TRUE",
@@ -621,22 +607,10 @@ process_regexp (struct rspamd_regexp_element *re,
task_cache_add (task, re, 1);
return 1;
}
- else {
- /* Match not found, skip further cycles */
- old = end;
- if (!g_match_info_fetch_pos (info, 0, &start,
- &end) || end <= 0) {
- break;
- }
- end += old;
- }
- g_match_info_free (info);
}
- g_match_info_free (info);
}
else {
- if (g_regex_match_full (regexp, ct, clen, 0, 0, NULL,
- &err) == TRUE) {
+ if (rspamd_regexp_search (regexp, ct, clen, NULL, NULL, raw)) {
if (G_UNLIKELY (re->is_test)) {
msg_info (
"process test regexp %s for mime part of length %d returned TRUE",
@@ -654,18 +628,13 @@ process_regexp (struct rspamd_regexp_element *re,
re->regexp_text,
(gint)clen);
}
- if (err != NULL) {
- msg_info ("error occured while processing regexp \"%s\": %s",
- re->regexp_text,
- err->message);
- }
cur = g_list_next (cur);
}
task_cache_add (task, re, 0);
break;
case REGEXP_MESSAGE:
debug_task ("checking message regexp: %s", re->regexp_text);
- regexp = re->raw_regexp;
+ raw = TRUE;
ct = (guint8 *)task->msg.start;
clen = task->msg.len;
@@ -676,10 +645,9 @@ process_regexp (struct rspamd_regexp_element *re,
}
/* If we have limit, apply regexp so much times as we can */
if (f != NULL && limit > 1) {
- end = 0;
+ start = end = NULL;
while ((matched =
- g_regex_match_full (regexp, ct + end + 1, clen - end - 1, 0, 0,
- &info, &err)) == TRUE) {
+ rspamd_regexp_search (regexp, ct, clen, &start, &end, raw))) {
if (G_UNLIKELY (re->is_test)) {
msg_info (
"process test regexp %s for mime part of length %d returned TRUE",
@@ -690,22 +658,10 @@ process_regexp (struct rspamd_regexp_element *re,
task_cache_add (task, re, 1);
return 1;
}
- else {
- /* Match not found, skip further cycles */
- old = end;
- if (!g_match_info_fetch_pos (info, 0, &start,
- &end) || end <= 0) {
- break;
- }
- old += end;
- }
- g_match_info_free (info);
}
- g_match_info_free (info);
}
else {
- if (g_regex_match_full (regexp, ct, clen, 0, 0, NULL,
- &err) == TRUE) {
+ if (rspamd_regexp_search (regexp, ct, clen, NULL, NULL, raw)) {
if (G_UNLIKELY (re->is_test)) {
msg_info (
"process test regexp %s for message part of length %d returned TRUE",
@@ -723,11 +679,6 @@ process_regexp (struct rspamd_regexp_element *re,
re->regexp_text,
(gint)clen);
}
- if (err != NULL) {
- msg_info ("error occured while processing regexp \"%s\": %s",
- re->regexp_text,
- err->message);
- }
task_cache_add (task, re, 0);
break;
case REGEXP_URL: