diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-03-24 22:22:56 +0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-24 22:22:56 +0600 |
commit | e539b43bdb23c47aacb36eb90c055a8e2c2e6f62 (patch) | |
tree | 2a8ed15b3f25e1336997f3a553e0168ab71b59ab | |
parent | db02d917d4a0d853938a02ce56b5c4d914fca50f (diff) | |
parent | 417cdf6aa30054c703da5ce8a3185412ed532477 (diff) | |
download | rspamd-e539b43bdb23c47aacb36eb90c055a8e2c2e6f62.tar.gz rspamd-e539b43bdb23c47aacb36eb90c055a8e2c2e6f62.zip |
Merge pull request #4887 from rspamd/vstakhov-regexp-fix
Protect regexp matcher from regexps with empty patterns
-rw-r--r-- | src/libserver/re_cache.c | 5 | ||||
-rw-r--r-- | src/libutil/multipattern.c | 4 | ||||
-rw-r--r-- | src/libutil/regexp.c | 16 | ||||
-rw-r--r-- | src/lua/lua_regexp.c | 7 | ||||
-rw-r--r-- | test/lua/unit/url.lua | 11 |
5 files changed, 31 insertions, 12 deletions
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 647375bcd..0644980da 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -632,6 +632,11 @@ rspamd_re_cache_process_pcre(struct rspamd_re_runtime *rt, if (max_hits > 0 && r >= max_hits) { break; } + + if (start >= end) { + /* We found all matches, so no more hits are possible (protect from empty patterns) */ + break; + } } rt->results[id] += r; diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c index 3c9be0df3..9ae798bb9 100644 --- a/src/libutil/multipattern.c +++ b/src/libutil/multipattern.c @@ -717,6 +717,10 @@ int rspamd_multipattern_lookup(struct rspamd_multipattern *mp, &end, TRUE, NULL)) { + if (start >= end) { + /* We found all matches, so no more hits are possible (protect from empty patterns) */ + break; + } if (rspamd_multipattern_acism_cb(i, end - in, &cbd)) { goto out; } diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c index b97e66a03..9e98699fe 100644 --- a/src/libutil/regexp.c +++ b/src/libutil/regexp.c @@ -1,5 +1,5 @@ /* - * Copyright 2023 Vsevolod Stakhov + * Copyright 2024 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -567,7 +567,8 @@ rspamd_regexp_search(const rspamd_regexp_t *re, const char *text, gsize len, g_assert(text != NULL); if (len == 0) { - len = strlen(text); + /* No length, no match! */ + return FALSE; } if (re->match_limit > 0 && len > re->match_limit) { @@ -727,7 +728,8 @@ rspamd_regexp_search(const rspamd_regexp_t *re, const char *text, gsize len, g_assert(text != NULL); if (len == 0) { - len = strlen(text); + /* No length, no match! */ + return FALSE; } if (re->match_limit > 0 && len > re->match_limit) { @@ -948,10 +950,6 @@ rspamd_regexp_match(const rspamd_regexp_t *re, const char *text, gsize len, g_assert(re != NULL); g_assert(text != NULL); - if (len == 0) { - len = strlen(text); - } - if (rspamd_regexp_search(re, text, len, &start, &end, raw, NULL)) { if (start == text && end == text + len) { return TRUE; @@ -1253,10 +1251,6 @@ rspamd_regexp_from_glob(const char *gl, gsize sz, GError **err) g_assert(gl != NULL); - if (sz == 0) { - sz = strlen(gl); - } - end = gl + sz; out = g_string_sized_new(sz + 2); g_string_append_c(out, '^'); diff --git a/src/lua/lua_regexp.c b/src/lua/lua_regexp.c index 6e2b0dc22..4a209057e 100644 --- a/src/lua/lua_regexp.c +++ b/src/lua/lua_regexp.c @@ -543,6 +543,11 @@ lua_regexp_search(lua_State *L) } matched = TRUE; + + if (start >= end) { + /* We found all matches, so no more hits are possible (protect from empty patterns) */ + break; + } } if (!matched) { @@ -749,7 +754,7 @@ lua_regexp_split(lua_State *L) lua_rawseti(L, -2, ++i); matched = TRUE; } - else if (start == end) { + else if (start >= end) { break; } old_start = end; diff --git a/test/lua/unit/url.lua b/test/lua/unit/url.lua index 52b88d25a..e25005eca 100644 --- a/test/lua/unit/url.lua +++ b/test/lua/unit/url.lua @@ -250,4 +250,15 @@ context("URL check functions", function() assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in url ' .. v[1]) end) end + + test("URL regexp issue", function() + local rspamd_regexp = require "rspamd_regexp" + local u = url.create(pool, + 'https://cls21.bullhornstaffing.com/MailerUnsubscribe.cfm?privateLabelID=3D26028&email=xpto&updKey=3D%3B%28U%2B%2F%200T%3EI%3B%2FQEI%5E%29%25XR%3FZ%40%5B%2EGJY%3CF%23%3F%25%22%29%5D%2D%0A') + assert_not_nil(u, "we are able to parse url") + local re = rspamd_regexp.create_cached("^$|^[?].*|^[#].*|[^#?]+") + assert_not_nil(re, "regexp is valid") + local res = re:search('/' .. u:get_path() .. '?' .. u:get_query()) + assert_equal(res[#res], '') + end) end) |