From 9bc218c25f142a973206869d57275f3cae9a2184 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 6 Mar 2015 13:59:07 +0000 Subject: [PATCH] Implement lua regexp split, write test case. --- src/libutil/regexp.c | 15 +++++++++++++++ src/libutil/regexp.h | 9 +++++++++ src/lua/lua_regexp.c | 31 +++++++++++++++++-------------- test/lua/unit/regxep.lua | 25 ++++++++++++++++++++++++- 4 files changed, 65 insertions(+), 15 deletions(-) diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c index 444496f45..6da0a663e 100644 --- a/src/libutil/regexp.c +++ b/src/libutil/regexp.c @@ -488,12 +488,27 @@ rspamd_regexp_cache_create (struct rspamd_regexp_cache *cache, res = rspamd_regexp_new (pattern, flags, err); if (res) { + REF_RETAIN (res); g_hash_table_insert (cache->tbl, res->id, res); } return res; } +gboolean +rspamd_regexp_cache_remove (struct rspamd_regexp_cache *cache, + rspamd_regexp_t *re) +{ + if (cache == NULL) { + cache = global_re_cache; + } + + g_assert (cache != NULL); + g_assert (re != NULL); + + return g_hash_table_remove (cache->tbl, re->id); +} + void rspamd_regexp_cache_destroy (struct rspamd_regexp_cache *cache) { diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h index 6e8aa7295..41835af84 100644 --- a/src/libutil/regexp.h +++ b/src/libutil/regexp.h @@ -111,6 +111,15 @@ rspamd_regexp_t* rspamd_regexp_cache_create (struct rspamd_regexp_cache *cache, const gchar *pattern, const gchar *flags, GError **err); +/** + * Remove regexp from the cache + * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe) + * @param re re to remove + * @return TRUE if a regexp has been removed + */ +gboolean rspamd_regexp_cache_remove (struct rspamd_regexp_cache *cache, + rspamd_regexp_t *re); + /** * Destroy regexp cache and unref all elements inside it * @param cache diff --git a/src/lua/lua_regexp.c b/src/lua/lua_regexp.c index a67bed334..6fa0de772 100644 --- a/src/lua/lua_regexp.c +++ b/src/lua/lua_regexp.c @@ -322,26 +322,29 @@ lua_regexp_split (lua_State *L) { struct rspamd_lua_regexp *re = lua_check_regexp (L); const gchar *data; - gchar **parts; + gboolean matched = FALSE; + gsize len; + const gchar *start = NULL, *end = NULL, *old_start; gint i; if (re) { - data = luaL_checkstring (L, 2); + data = luaL_checklstring (L, 2, &len); if (data) { - if ((re->re_flags & G_REGEX_RAW) == 0) { - /* Validate input */ - if (!g_utf8_validate (data, -1, NULL)) { - lua_pushnil (L); - return 1; + lua_newtable (L); + i = 0; + old_start = data; + while (rspamd_regexp_search (re->re, data, len, &start, &end, FALSE)) { + if (start - old_start > 0) { + lua_pushlstring (L, old_start, start - old_start); + lua_rawseti (L, -2, ++i); + matched = TRUE; } + old_start = end; } - parts = g_regex_split (re->re, data, 0); - lua_newtable (L); - for (i = 1; parts[i - 1] != NULL; i++) { - lua_pushstring (L, parts[i - 1]); - lua_rawseti (L, -2, i); + if (!matched) { + lua_pop (L, 1); + lua_pushnil (L); } - g_strfreev (parts); return 1; } } @@ -362,7 +365,7 @@ lua_regexp_destroy (lua_State *L) struct rspamd_lua_regexp *to_del = lua_check_regexp (L); if (to_del) { - re_cache_del (to_del->re_pattern, regexp_static_pool); + rspamd_regexp_cache_remove (NULL, to_del->re); rspamd_regexp_unref (to_del->re); g_slice_free1 (sizeof (struct rspamd_lua_regexp), to_del); } diff --git a/test/lua/unit/regxep.lua b/test/lua/unit/regxep.lua index 81234ddb6..1277cce11 100644 --- a/test/lua/unit/regxep.lua +++ b/test/lua/unit/regxep.lua @@ -19,12 +19,35 @@ context("Regexp unit tests", function() for _,c in ipairs(cases) do local r = re.create_cached(c[1]) - assert_not_nil(r) + assert_not_nil(r, "cannot parse " .. c[1]) local res = r:match(c[2]) assert_equal(res, c[3], string.format("'%s' doesn't match with '%s'", c[2], c[1])) end end) + + test("Regexp split", function() + local cases = { + {'\\s', 'one two', {'one', 'two'}}, -- trivial + {'\\s', 'one two', {'one', 'two'}}, -- multiple delimiters + {'\\s', ' one two ', {'one', 'two'}}, -- multiple delimiters + {'\\s', ' one ', {'one', 'two'}}, -- multiple delimiters + {'[:,]', ',,,:::one,two,,', {'one', 'two'}}, -- multiple delimiters + } + + for _,c in ipairs(cases) do + local r = re.create_cached(c[1]) + assert_not_nil(r, "cannot parse " .. c[1]) + + local res = r:split(c[2]) + assert_not_nil(res, "cannot split " .. c[2]) + + for i,r in ipairs(res) do + assert_equal(r, c[3][i]) + end + end + end) + end ) \ No newline at end of file -- 2.39.5