Browse Source

Merge pull request #4887 from rspamd/vstakhov-regexp-fix

Protect regexp matcher from regexps with empty patterns
pull/4891/head
Vsevolod Stakhov 1 month ago
parent
commit
e539b43bdb
No account linked to committer's email address
5 changed files with 31 additions and 12 deletions
  1. 5
    0
      src/libserver/re_cache.c
  2. 4
    0
      src/libutil/multipattern.c
  3. 5
    11
      src/libutil/regexp.c
  4. 6
    1
      src/lua/lua_regexp.c
  5. 11
    0
      test/lua/unit/url.lua

+ 5
- 0
src/libserver/re_cache.c View File

@@ -632,6 +632,11 @@ rspamd_re_cache_process_pcre(struct rspamd_re_runtime *rt,
if (max_hits > 0 && r >= max_hits) {
break;
}

if (start >= end) {
/* We found all matches, so no more hits are possible (protect from empty patterns) */
break;
}
}

rt->results[id] += r;

+ 4
- 0
src/libutil/multipattern.c View File

@@ -717,6 +717,10 @@ int rspamd_multipattern_lookup(struct rspamd_multipattern *mp,
&end,
TRUE,
NULL)) {
if (start >= end) {
/* We found all matches, so no more hits are possible (protect from empty patterns) */
break;
}
if (rspamd_multipattern_acism_cb(i, end - in, &cbd)) {
goto out;
}

+ 5
- 11
src/libutil/regexp.c View File

@@ -1,5 +1,5 @@
/*
* Copyright 2023 Vsevolod Stakhov
* Copyright 2024 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -567,7 +567,8 @@ rspamd_regexp_search(const rspamd_regexp_t *re, const char *text, gsize len,
g_assert(text != NULL);

if (len == 0) {
len = strlen(text);
/* No length, no match! */
return FALSE;
}

if (re->match_limit > 0 && len > re->match_limit) {
@@ -727,7 +728,8 @@ rspamd_regexp_search(const rspamd_regexp_t *re, const char *text, gsize len,
g_assert(text != NULL);

if (len == 0) {
len = strlen(text);
/* No length, no match! */
return FALSE;
}

if (re->match_limit > 0 && len > re->match_limit) {
@@ -948,10 +950,6 @@ rspamd_regexp_match(const rspamd_regexp_t *re, const char *text, gsize len,
g_assert(re != NULL);
g_assert(text != NULL);

if (len == 0) {
len = strlen(text);
}

if (rspamd_regexp_search(re, text, len, &start, &end, raw, NULL)) {
if (start == text && end == text + len) {
return TRUE;
@@ -1253,10 +1251,6 @@ rspamd_regexp_from_glob(const char *gl, gsize sz, GError **err)

g_assert(gl != NULL);

if (sz == 0) {
sz = strlen(gl);
}

end = gl + sz;
out = g_string_sized_new(sz + 2);
g_string_append_c(out, '^');

+ 6
- 1
src/lua/lua_regexp.c View File

@@ -543,6 +543,11 @@ lua_regexp_search(lua_State *L)
}

matched = TRUE;

if (start >= end) {
/* We found all matches, so no more hits are possible (protect from empty patterns) */
break;
}
}

if (!matched) {
@@ -749,7 +754,7 @@ lua_regexp_split(lua_State *L)
lua_rawseti(L, -2, ++i);
matched = TRUE;
}
else if (start == end) {
else if (start >= end) {
break;
}
old_start = end;

+ 11
- 0
test/lua/unit/url.lua View File

@@ -250,4 +250,15 @@ context("URL check functions", function()
assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in url ' .. v[1])
end)
end

test("URL regexp issue", function()
local rspamd_regexp = require "rspamd_regexp"
local u = url.create(pool,
'https://cls21.bullhornstaffing.com/MailerUnsubscribe.cfm?privateLabelID=3D26028&email=xpto&updKey=3D%3B%28U%2B%2F%200T%3EI%3B%2FQEI%5E%29%25XR%3FZ%40%5B%2EGJY%3CF%23%3F%25%22%29%5D%2D%0A')
assert_not_nil(u, "we are able to parse url")
local re = rspamd_regexp.create_cached("^$|^[?].*|^[#].*|[^#?]+")
assert_not_nil(re, "regexp is valid")
local res = re:search('/' .. u:get_path() .. '?' .. u:get_query())
assert_equal(res[#res], '')
end)
end)

Loading…
Cancel
Save