aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rspamd.com>2024-03-24 22:22:56 +0600
committerGitHub <noreply@github.com>2024-03-24 22:22:56 +0600
commite539b43bdb23c47aacb36eb90c055a8e2c2e6f62 (patch)
tree2a8ed15b3f25e1336997f3a553e0168ab71b59ab
parentdb02d917d4a0d853938a02ce56b5c4d914fca50f (diff)
parent417cdf6aa30054c703da5ce8a3185412ed532477 (diff)
downloadrspamd-e539b43bdb23c47aacb36eb90c055a8e2c2e6f62.tar.gz
rspamd-e539b43bdb23c47aacb36eb90c055a8e2c2e6f62.zip
Merge pull request #4887 from rspamd/vstakhov-regexp-fix
Protect regexp matcher from regexps with empty patterns
-rw-r--r--src/libserver/re_cache.c5
-rw-r--r--src/libutil/multipattern.c4
-rw-r--r--src/libutil/regexp.c16
-rw-r--r--src/lua/lua_regexp.c7
-rw-r--r--test/lua/unit/url.lua11
5 files changed, 31 insertions, 12 deletions
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index 647375bcd..0644980da 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -632,6 +632,11 @@ rspamd_re_cache_process_pcre(struct rspamd_re_runtime *rt,
if (max_hits > 0 && r >= max_hits) {
break;
}
+
+ if (start >= end) {
+ /* We found all matches, so no more hits are possible (protect from empty patterns) */
+ break;
+ }
}
rt->results[id] += r;
diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c
index 3c9be0df3..9ae798bb9 100644
--- a/src/libutil/multipattern.c
+++ b/src/libutil/multipattern.c
@@ -717,6 +717,10 @@ int rspamd_multipattern_lookup(struct rspamd_multipattern *mp,
&end,
TRUE,
NULL)) {
+ if (start >= end) {
+ /* We found all matches, so no more hits are possible (protect from empty patterns) */
+ break;
+ }
if (rspamd_multipattern_acism_cb(i, end - in, &cbd)) {
goto out;
}
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c
index b97e66a03..9e98699fe 100644
--- a/src/libutil/regexp.c
+++ b/src/libutil/regexp.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2023 Vsevolod Stakhov
+ * Copyright 2024 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -567,7 +567,8 @@ rspamd_regexp_search(const rspamd_regexp_t *re, const char *text, gsize len,
g_assert(text != NULL);
if (len == 0) {
- len = strlen(text);
+ /* No length, no match! */
+ return FALSE;
}
if (re->match_limit > 0 && len > re->match_limit) {
@@ -727,7 +728,8 @@ rspamd_regexp_search(const rspamd_regexp_t *re, const char *text, gsize len,
g_assert(text != NULL);
if (len == 0) {
- len = strlen(text);
+ /* No length, no match! */
+ return FALSE;
}
if (re->match_limit > 0 && len > re->match_limit) {
@@ -948,10 +950,6 @@ rspamd_regexp_match(const rspamd_regexp_t *re, const char *text, gsize len,
g_assert(re != NULL);
g_assert(text != NULL);
- if (len == 0) {
- len = strlen(text);
- }
-
if (rspamd_regexp_search(re, text, len, &start, &end, raw, NULL)) {
if (start == text && end == text + len) {
return TRUE;
@@ -1253,10 +1251,6 @@ rspamd_regexp_from_glob(const char *gl, gsize sz, GError **err)
g_assert(gl != NULL);
- if (sz == 0) {
- sz = strlen(gl);
- }
-
end = gl + sz;
out = g_string_sized_new(sz + 2);
g_string_append_c(out, '^');
diff --git a/src/lua/lua_regexp.c b/src/lua/lua_regexp.c
index 6e2b0dc22..4a209057e 100644
--- a/src/lua/lua_regexp.c
+++ b/src/lua/lua_regexp.c
@@ -543,6 +543,11 @@ lua_regexp_search(lua_State *L)
}
matched = TRUE;
+
+ if (start >= end) {
+ /* We found all matches, so no more hits are possible (protect from empty patterns) */
+ break;
+ }
}
if (!matched) {
@@ -749,7 +754,7 @@ lua_regexp_split(lua_State *L)
lua_rawseti(L, -2, ++i);
matched = TRUE;
}
- else if (start == end) {
+ else if (start >= end) {
break;
}
old_start = end;
diff --git a/test/lua/unit/url.lua b/test/lua/unit/url.lua
index 52b88d25a..e25005eca 100644
--- a/test/lua/unit/url.lua
+++ b/test/lua/unit/url.lua
@@ -250,4 +250,15 @@ context("URL check functions", function()
assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in url ' .. v[1])
end)
end
+
+ test("URL regexp issue", function()
+ local rspamd_regexp = require "rspamd_regexp"
+ local u = url.create(pool,
+ 'https://cls21.bullhornstaffing.com/MailerUnsubscribe.cfm?privateLabelID=3D26028&email=xpto&updKey=3D%3B%28U%2B%2F%200T%3EI%3B%2FQEI%5E%29%25XR%3FZ%40%5B%2EGJY%3CF%23%3F%25%22%29%5D%2D%0A')
+ assert_not_nil(u, "we are able to parse url")
+ local re = rspamd_regexp.create_cached("^$|^[?].*|^[#].*|[^#?]+")
+ assert_not_nil(re, "regexp is valid")
+ local res = re:search('/' .. u:get_path() .. '?' .. u:get_query())
+ assert_equal(res[#res], '')
+ end)
end)