summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/libmime/mime_expressions.c13
-rw-r--r--src/libserver/re_cache.c58
-rw-r--r--src/libserver/re_cache.h4
-rw-r--r--src/lua/lua_task.c10
4 files changed, 45 insertions, 40 deletions
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
index 8d42bf9a8..bc13859c1 100644
--- a/src/libmime/mime_expressions.c
+++ b/src/libmime/mime_expressions.c
@@ -342,6 +342,13 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line)
result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
&err);
+ if (result->is_multiple) {
+ rspamd_regexp_set_maxhits (result->regexp, 0);
+ }
+ else {
+ rspamd_regexp_set_maxhits (result->regexp, 1);
+ }
+
g_string_free (re_flags, TRUE);
if (result->regexp == NULL || err != NULL) {
@@ -687,8 +694,7 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
re->type,
re->header,
strlen (re->header),
- re->is_strong,
- re->is_multiple);
+ re->is_strong);
}
else {
ret = rspamd_re_cache_process (task,
@@ -697,8 +703,7 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
re->type,
NULL,
0,
- re->is_strong,
- re->is_multiple);
+ re->is_strong);
}
if (re->is_test) {
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index c29bc7a31..c310c7fb8 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -399,10 +399,11 @@ rspamd_re_cache_runtime_new (struct rspamd_re_cache *cache)
static guint
rspamd_re_cache_process_pcre (struct rspamd_re_runtime *rt,
rspamd_regexp_t *re, const guchar *in, gsize len,
- gboolean is_raw, gboolean is_multiple)
+ gboolean is_raw)
{
guint r = 0;
const gchar *start = NULL, *end = NULL;
+ guint max_hits = rspamd_regexp_get_maxhits (re);
if (len == 0) {
len = strlen (in);
@@ -421,7 +422,7 @@ rspamd_re_cache_process_pcre (struct rspamd_re_runtime *rt,
NULL)) {
r++;
- if (!is_multiple || r >= 0xFF) {
+ if (max_hits > 0 && r > max_hits) {
break;
}
}
@@ -446,29 +447,32 @@ rspamd_re_cache_hyperscan_cb (unsigned int id,
struct rspamd_re_hyperscan_cbdata *cbdata = ud;
struct rspamd_re_runtime *rt;
struct rspamd_re_cache_elt *pcre_elt;
- guint ret;
+ guint ret, maxhits;
rt = cbdata->rt;
+ pcre_elt = g_ptr_array_index (rt->cache->re, id);
if (flags & HS_FLAG_PREFILTER) {
if (!isset (rt->checked, id)) {
/* We need to match the corresponding pcre first */
- pcre_elt = g_ptr_array_index (rt->cache->re, id);
ret = rspamd_re_cache_process_pcre (rt,
pcre_elt->re,
cbdata->in + from,
to - from,
- FALSE,
- TRUE);
+ FALSE);
setbit (rt->checked, id);
rt->results[id] = ret;
}
}
else {
+ maxhits = rspamd_regexp_get_maxhits (pcre_elt->re);
setbit (rt->checked, id);
- rt->results[id] ++;
+
+ if (maxhits == 0 || rt->results[id] < maxhits) {
+ rt->results[id]++;
+ }
}
return 0;
@@ -479,7 +483,7 @@ static guint
rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
rspamd_regexp_t *re,
const guchar *in, gsize len,
- gboolean is_raw, gboolean is_multiple)
+ gboolean is_raw)
{
struct rspamd_re_cache_elt *elt;
struct rspamd_re_class *re_class;
@@ -491,14 +495,14 @@ rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
re_class = rspamd_regexp_get_class (re);
#ifndef WITH_HYPERSCAN
- ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw, is_multiple);
+ ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw);
setbit (rt->checked, re_id);
rt->results[re_id] = ret;
#else
struct rspamd_re_hyperscan_cbdata cbdata;
if (elt->match_type == RSPAMD_RE_CACHE_PCRE) {
- ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw, is_multiple);
+ ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw);
setbit (rt->checked, re_id);
rt->results[re_id] = ret;
}
@@ -560,8 +564,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
struct rspamd_re_runtime *rt,
rspamd_regexp_t *re,
struct rspamd_re_class *re_class,
- gboolean is_strong,
- gboolean is_multiple)
+ gboolean is_strong)
{
guint ret = 0, i;
GList *cur, *headerlist;
@@ -604,7 +607,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
/* Match re */
if (in) {
ret += rspamd_re_cache_process_regexp_data (rt, re, in,
- strlen (in), raw, is_multiple);
+ strlen (in), raw);
debug_task ("checking header %s regexp: %s -> %d",
re_class->type_data,
rspamd_regexp_get_pattern (re), ret);
@@ -619,7 +622,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
in = task->raw_headers_content.begin;
len = task->raw_headers_content.len;
ret = rspamd_re_cache_process_regexp_data (rt, re, in,
- len, raw, is_multiple);
+ len, raw);
debug_task ("checking allheader regexp: %s -> %d",
rspamd_regexp_get_pattern (re), ret);
break;
@@ -649,7 +652,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
if (len > 0) {
ret += rspamd_re_cache_process_regexp_data (rt, re, in,
- len, raw, is_multiple);
+ len, raw);
debug_task ("checking mime regexp: %s -> %d",
rspamd_regexp_get_pattern (re), ret);
}
@@ -665,7 +668,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
raw = FALSE;
ret += rspamd_re_cache_process_regexp_data (rt, re, in,
- len, raw, is_multiple);
+ len, raw);
}
g_hash_table_iter_init (&it, task->emails);
@@ -677,7 +680,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
raw = FALSE;
ret += rspamd_re_cache_process_regexp_data (rt, re, in,
- len, raw, is_multiple);
+ len, raw);
}
debug_task ("checking url regexp: %s -> %d",
@@ -689,7 +692,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
len = task->msg.len;
ret = rspamd_re_cache_process_regexp_data (rt, re, in,
- len, raw, is_multiple);
+ len, raw);
debug_task ("checking rawbody regexp: %s -> %d",
rspamd_regexp_get_pattern (re), ret);
break;
@@ -711,8 +714,7 @@ rspamd_re_cache_process (struct rspamd_task *task,
enum rspamd_re_type type,
gpointer type_data,
gsize datalen,
- gboolean is_strong,
- gboolean is_multiple)
+ gboolean is_strong)
{
guint64 re_id;
struct rspamd_re_class *re_class;
@@ -733,12 +735,7 @@ rspamd_re_cache_process (struct rspamd_task *task,
if (isset (rt->checked, re_id)) {
/* Fast path */
- if (is_multiple) {
- return rt->results[re_id];
- }
- else {
- return rt->results[re_id] ? 1 : 0;
- }
+ return rt->results[re_id];
}
else {
/* Slow path */
@@ -751,7 +748,7 @@ rspamd_re_cache_process (struct rspamd_task *task,
}
return rspamd_re_cache_exec_re (task, rt, re, re_class,
- is_strong, is_multiple);
+ is_strong);
}
return 0;
@@ -999,12 +996,19 @@ rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
hs_flags[i] = 0;
pcre_flags = rspamd_regexp_get_pcre_flags (re);
+
if (pcre_flags & PCRE_UTF8) {
hs_flags[i] |= HS_FLAG_UTF8;
}
if (pcre_flags & PCRE_CASELESS) {
hs_flags[i] |= HS_FLAG_CASELESS;
}
+ if (pcre_flags & PCRE_MULTILINE) {
+ hs_flags[i] |= HS_FLAG_MULTILINE;
+ }
+ if (rspamd_regexp_get_maxhits (re) == 1) {
+ hs_flags[i] |= HS_FLAG_SINGLEMATCH;
+ }
if (hs_compile (rspamd_regexp_get_pattern (re),
hs_flags[i],
diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h
index 310056515..c812b8ef3 100644
--- a/src/libserver/re_cache.h
+++ b/src/libserver/re_cache.h
@@ -86,7 +86,6 @@ struct rspamd_re_runtime* rspamd_re_cache_runtime_new (struct rspamd_re_cache *c
* @param type_data associated data with the type (e.g. header name)
* @param datalen associated data length
* @param is_strong use case sensitive match when looking for headers
- * @param is_multiple return multiple possible occurrences of the specified re
*/
gint rspamd_re_cache_process (struct rspamd_task *task,
struct rspamd_re_runtime *rt,
@@ -94,8 +93,7 @@ gint rspamd_re_cache_process (struct rspamd_task *task,
enum rspamd_re_type type,
gpointer type_data,
gsize datalen,
- gboolean is_strong,
- gboolean is_multiple);
+ gboolean is_strong);
/**
* Destroy runtime data
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index 8246d921b..8529489f6 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -440,7 +440,6 @@ LUA_FUNCTION_DEF (task, set_settings);
* + `url`: url regexp
* - `header`: for header and rawheader regexp means the name of header
* - `strong`: case sensitive match for headers
- * - `multiple`: allow multiple matches
* @return {number} number of regexp occurences in the task (limited by 255 so far)
*/
LUA_FUNCTION_DEF (task, process_regexp);
@@ -2007,7 +2006,7 @@ lua_task_process_regexp (lua_State *L)
{
struct rspamd_task *task = lua_check_task (L, 1);
struct rspamd_lua_regexp *re = NULL;
- gboolean strong = FALSE, multiple = FALSE;
+ gboolean strong = FALSE;
const gchar *type_str = NULL, *header_str = NULL;
gsize header_len = 0;
GError *err = NULL;
@@ -2024,13 +2023,12 @@ lua_task_process_regexp (lua_State *L)
* + `url`: url regexp
* - `header`: for header and rawheader regexp means the name of header
* - `strong`: case sensitive match for headers
- * - `multiple`: allow multiple matches
*/
if (task != NULL) {
if (!rspamd_lua_parse_table_arguments (L, 2, &err,
- "*re=U{regexp};*type=S;header=V;strong=B;multiple=B",
+ "*re=U{regexp};*type=S;header=V;strong=B",
&re, &type_str, &header_len, &header_str,
- &strong, &multiple)) {
+ &strong)) {
msg_err_task ("cannot get parameters list: %e", err);
if (err) {
@@ -2047,7 +2045,7 @@ lua_task_process_regexp (lua_State *L)
}
else {
ret = rspamd_re_cache_process (task, task->re_rt, re->re, type,
- (gpointer) header_str, header_len, strong, multiple);
+ (gpointer) header_str, header_len, strong);
}
}
}