aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-03-06 13:59:07 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-03-06 13:59:07 +0000
commit9bc218c25f142a973206869d57275f3cae9a2184 (patch)
tree10f4a2abcba2f89dba64b8e2d4d39b84f0c6b79a
parent453645aa3766bd1e5df8d9bb076f78bf3604e1d8 (diff)
downloadrspamd-9bc218c25f142a973206869d57275f3cae9a2184.tar.gz
rspamd-9bc218c25f142a973206869d57275f3cae9a2184.zip
Implement lua regexp split, write test case.
-rw-r--r--src/libutil/regexp.c15
-rw-r--r--src/libutil/regexp.h9
-rw-r--r--src/lua/lua_regexp.c31
-rw-r--r--test/lua/unit/regxep.lua25
4 files changed, 65 insertions, 15 deletions
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c
index 444496f45..6da0a663e 100644
--- a/src/libutil/regexp.c
+++ b/src/libutil/regexp.c
@@ -488,12 +488,27 @@ rspamd_regexp_cache_create (struct rspamd_regexp_cache *cache,
res = rspamd_regexp_new (pattern, flags, err);
if (res) {
+ REF_RETAIN (res);
g_hash_table_insert (cache->tbl, res->id, res);
}
return res;
}
+gboolean
+rspamd_regexp_cache_remove (struct rspamd_regexp_cache *cache,
+ rspamd_regexp_t *re)
+{
+ if (cache == NULL) {
+ cache = global_re_cache;
+ }
+
+ g_assert (cache != NULL);
+ g_assert (re != NULL);
+
+ return g_hash_table_remove (cache->tbl, re->id);
+}
+
void
rspamd_regexp_cache_destroy (struct rspamd_regexp_cache *cache)
{
diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h
index 6e8aa7295..41835af84 100644
--- a/src/libutil/regexp.h
+++ b/src/libutil/regexp.h
@@ -112,6 +112,15 @@ rspamd_regexp_t* rspamd_regexp_cache_create (struct rspamd_regexp_cache *cache,
const gchar *flags, GError **err);
/**
+ * Remove regexp from the cache
+ * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe)
+ * @param re re to remove
+ * @return TRUE if a regexp has been removed
+ */
+gboolean rspamd_regexp_cache_remove (struct rspamd_regexp_cache *cache,
+ rspamd_regexp_t *re);
+
+/**
* Destroy regexp cache and unref all elements inside it
* @param cache
*/
diff --git a/src/lua/lua_regexp.c b/src/lua/lua_regexp.c
index a67bed334..6fa0de772 100644
--- a/src/lua/lua_regexp.c
+++ b/src/lua/lua_regexp.c
@@ -322,26 +322,29 @@ lua_regexp_split (lua_State *L)
{
struct rspamd_lua_regexp *re = lua_check_regexp (L);
const gchar *data;
- gchar **parts;
+ gboolean matched = FALSE;
+ gsize len;
+ const gchar *start = NULL, *end = NULL, *old_start;
gint i;
if (re) {
- data = luaL_checkstring (L, 2);
+ data = luaL_checklstring (L, 2, &len);
if (data) {
- if ((re->re_flags & G_REGEX_RAW) == 0) {
- /* Validate input */
- if (!g_utf8_validate (data, -1, NULL)) {
- lua_pushnil (L);
- return 1;
+ lua_newtable (L);
+ i = 0;
+ old_start = data;
+ while (rspamd_regexp_search (re->re, data, len, &start, &end, FALSE)) {
+ if (start - old_start > 0) {
+ lua_pushlstring (L, old_start, start - old_start);
+ lua_rawseti (L, -2, ++i);
+ matched = TRUE;
}
+ old_start = end;
}
- parts = g_regex_split (re->re, data, 0);
- lua_newtable (L);
- for (i = 1; parts[i - 1] != NULL; i++) {
- lua_pushstring (L, parts[i - 1]);
- lua_rawseti (L, -2, i);
+ if (!matched) {
+ lua_pop (L, 1);
+ lua_pushnil (L);
}
- g_strfreev (parts);
return 1;
}
}
@@ -362,7 +365,7 @@ lua_regexp_destroy (lua_State *L)
struct rspamd_lua_regexp *to_del = lua_check_regexp (L);
if (to_del) {
- re_cache_del (to_del->re_pattern, regexp_static_pool);
+ rspamd_regexp_cache_remove (NULL, to_del->re);
rspamd_regexp_unref (to_del->re);
g_slice_free1 (sizeof (struct rspamd_lua_regexp), to_del);
}
diff --git a/test/lua/unit/regxep.lua b/test/lua/unit/regxep.lua
index 81234ddb6..1277cce11 100644
--- a/test/lua/unit/regxep.lua
+++ b/test/lua/unit/regxep.lua
@@ -19,12 +19,35 @@ context("Regexp unit tests", function()
for _,c in ipairs(cases) do
local r = re.create_cached(c[1])
- assert_not_nil(r)
+ assert_not_nil(r, "cannot parse " .. c[1])
local res = r:match(c[2])
assert_equal(res, c[3], string.format("'%s' doesn't match with '%s'",
c[2], c[1]))
end
end)
+
+ test("Regexp split", function()
+ local cases = {
+ {'\\s', 'one two', {'one', 'two'}}, -- trivial
+ {'\\s', 'one two', {'one', 'two'}}, -- multiple delimiters
+ {'\\s', ' one two ', {'one', 'two'}}, -- multiple delimiters
+ {'\\s', ' one ', {'one', 'two'}}, -- multiple delimiters
+ {'[:,]', ',,,:::one,two,,', {'one', 'two'}}, -- multiple delimiters
+ }
+
+ for _,c in ipairs(cases) do
+ local r = re.create_cached(c[1])
+ assert_not_nil(r, "cannot parse " .. c[1])
+
+ local res = r:split(c[2])
+ assert_not_nil(res, "cannot split " .. c[2])
+
+ for i,r in ipairs(res) do
+ assert_equal(r, c[3][i])
+ end
+ end
+ end)
+
end
) \ No newline at end of file