Protect regexp matcher from regexps with empty patternspull/4891/head
if (max_hits > 0 && r >= max_hits) { | if (max_hits > 0 && r >= max_hits) { | ||||
break; | break; | ||||
} | } | ||||
if (start >= end) { | |||||
/* We found all matches, so no more hits are possible (protect from empty patterns) */ | |||||
break; | |||||
} | |||||
} | } | ||||
rt->results[id] += r; | rt->results[id] += r; |
&end, | &end, | ||||
TRUE, | TRUE, | ||||
NULL)) { | NULL)) { | ||||
if (start >= end) { | |||||
/* We found all matches, so no more hits are possible (protect from empty patterns) */ | |||||
break; | |||||
} | |||||
if (rspamd_multipattern_acism_cb(i, end - in, &cbd)) { | if (rspamd_multipattern_acism_cb(i, end - in, &cbd)) { | ||||
goto out; | goto out; | ||||
} | } |
/* | /* | ||||
* Copyright 2023 Vsevolod Stakhov | |||||
* Copyright 2024 Vsevolod Stakhov | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
g_assert(text != NULL); | g_assert(text != NULL); | ||||
if (len == 0) { | if (len == 0) { | ||||
len = strlen(text); | |||||
/* No length, no match! */ | |||||
return FALSE; | |||||
} | } | ||||
if (re->match_limit > 0 && len > re->match_limit) { | if (re->match_limit > 0 && len > re->match_limit) { | ||||
g_assert(text != NULL); | g_assert(text != NULL); | ||||
if (len == 0) { | if (len == 0) { | ||||
len = strlen(text); | |||||
/* No length, no match! */ | |||||
return FALSE; | |||||
} | } | ||||
if (re->match_limit > 0 && len > re->match_limit) { | if (re->match_limit > 0 && len > re->match_limit) { | ||||
g_assert(re != NULL); | g_assert(re != NULL); | ||||
g_assert(text != NULL); | g_assert(text != NULL); | ||||
if (len == 0) { | |||||
len = strlen(text); | |||||
} | |||||
if (rspamd_regexp_search(re, text, len, &start, &end, raw, NULL)) { | if (rspamd_regexp_search(re, text, len, &start, &end, raw, NULL)) { | ||||
if (start == text && end == text + len) { | if (start == text && end == text + len) { | ||||
return TRUE; | return TRUE; | ||||
g_assert(gl != NULL); | g_assert(gl != NULL); | ||||
if (sz == 0) { | |||||
sz = strlen(gl); | |||||
} | |||||
end = gl + sz; | end = gl + sz; | ||||
out = g_string_sized_new(sz + 2); | out = g_string_sized_new(sz + 2); | ||||
g_string_append_c(out, '^'); | g_string_append_c(out, '^'); |
} | } | ||||
matched = TRUE; | matched = TRUE; | ||||
if (start >= end) { | |||||
/* We found all matches, so no more hits are possible (protect from empty patterns) */ | |||||
break; | |||||
} | |||||
} | } | ||||
if (!matched) { | if (!matched) { | ||||
lua_rawseti(L, -2, ++i); | lua_rawseti(L, -2, ++i); | ||||
matched = TRUE; | matched = TRUE; | ||||
} | } | ||||
else if (start == end) { | |||||
else if (start >= end) { | |||||
break; | break; | ||||
} | } | ||||
old_start = end; | old_start = end; |
assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in url ' .. v[1]) | assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in url ' .. v[1]) | ||||
end) | end) | ||||
end | end | ||||
test("URL regexp issue", function() | |||||
local rspamd_regexp = require "rspamd_regexp" | |||||
local u = url.create(pool, | |||||
'https://cls21.bullhornstaffing.com/MailerUnsubscribe.cfm?privateLabelID=3D26028&email=xpto&updKey=3D%3B%28U%2B%2F%200T%3EI%3B%2FQEI%5E%29%25XR%3FZ%40%5B%2EGJY%3CF%23%3F%25%22%29%5D%2D%0A') | |||||
assert_not_nil(u, "we are able to parse url") | |||||
local re = rspamd_regexp.create_cached("^$|^[?].*|^[#].*|[^#?]+") | |||||
assert_not_nil(re, "regexp is valid") | |||||
local res = re:search('/' .. u:get_path() .. '?' .. u:get_query()) | |||||
assert_equal(res[#res], '') | |||||
end) | |||||
end) | end) |