diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-02-15 19:48:21 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-02-15 19:48:21 +0300 |
commit | 21596f21a35a4d0e110c04824dcda5ef37450003 (patch) | |
tree | 2228e8678b8f72d07b5f84b3efbc3d20cda6c5fc | |
parent | 17a938b2aeffb3448c8d61f3d8dd7d012eb910cc (diff) | |
download | rspamd-21596f21a35a4d0e110c04824dcda5ef37450003.tar.gz rspamd-21596f21a35a4d0e110c04824dcda5ef37450003.zip |
* Add new lua module regexp for using glib regular expressions (pcre compatible)
-rw-r--r-- | src/expressions.c | 17 | ||||
-rw-r--r-- | src/expressions.h | 10 | ||||
-rw-r--r-- | src/lua/CMakeLists.txt | 3 | ||||
-rw-r--r-- | src/lua/lua_common.c | 3 | ||||
-rw-r--r-- | src/lua/lua_common.h | 1 | ||||
-rw-r--r-- | src/lua/lua_regexp.c | 248 |
6 files changed, 275 insertions, 7 deletions
diff --git a/src/expressions.c b/src/expressions.c index 87ce59337..da17912ce 100644 --- a/src/expressions.c +++ b/src/expressions.c @@ -106,7 +106,7 @@ re_cache_check (const gchar *line, memory_pool_t *pool) } void -re_cache_add (gchar *line, void *pointer, memory_pool_t *pool) +re_cache_add (const gchar *line, void *pointer, memory_pool_t *pool) { GHashTable *re_cache; @@ -117,7 +117,20 @@ re_cache_add (gchar *line, void *pointer, memory_pool_t *pool) memory_pool_set_variable (pool, "re_cache", re_cache, (pool_destruct_func)g_hash_table_destroy); } - g_hash_table_insert (re_cache, line, pointer); + g_hash_table_insert (re_cache, (gpointer)line, pointer); +} + +void +re_cache_del (const gchar *line, memory_pool_t *pool) +{ + GHashTable *re_cache; + + re_cache = memory_pool_get_variable (pool, "re_cache"); + + if (re_cache != NULL) { + g_hash_table_remove (re_cache, line); + } + } /* Task cache functions */ diff --git a/src/expressions.h b/src/expressions.h index 6a25dd9e1..0c1576b36 100644 --- a/src/expressions.h +++ b/src/expressions.h @@ -88,14 +88,20 @@ void register_expression_function (const gchar *name, rspamd_internal_func_t fun * @param line symbolic representation * @param pointer regexp data */ -void re_cache_add (gchar *line, void *pointer, memory_pool_t *pool); +void re_cache_add (const gchar *line, void *pointer, memory_pool_t *pool); /** * Check regexp in cache * @param line symbolic representation * @return pointer to regexp data or NULL if regexp is not found */ -void * re_cache_check (const gchar *line, memory_pool_t *pool); +void * re_cache_check (const const gchar *line, memory_pool_t *pool); + +/** + * Remove regexp from regexp cache + * @param line symbolic representation + */ +void re_cache_del (const gchar *line, memory_pool_t *pool); /** * Add regexp to regexp task cache diff --git a/src/lua/CMakeLists.txt b/src/lua/CMakeLists.txt index c17936862..3ec763539 100644 --- a/src/lua/CMakeLists.txt +++ b/src/lua/CMakeLists.txt @@ -4,7 +4,8 @@ SET(LUASRC lua_common.c lua_message.c lua_config.c lua_classifier.c - lua_cfg_file.c) + lua_cfg_file.c + lua_regexp.c) ADD_LIBRARY(rspamd_lua STATIC ${LUASRC}) TARGET_LINK_LIBRARIES(rspamd_lua ${LUALIB}) diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index 961a31d76..14bd6877a 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -232,13 +232,12 @@ init_lua (struct config_file *cfg) (void)luaopen_message (L); (void)luaopen_classifier (L); (void)luaopen_statfile (L); + (void)luaopen_glib_regexp (L); cfg->lua_state = L; memory_pool_add_destructor (cfg->cfg_pool, (pool_destruct_func)lua_close, L); } - - gboolean init_lua_filters (struct config_file *cfg) { diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h index 4851dc1a6..f6b69e0e0 100644 --- a/src/lua/lua_common.h +++ b/src/lua/lua_common.h @@ -35,6 +35,7 @@ gint luaopen_image (lua_State *L); gint luaopen_url (lua_State *L); gint luaopen_classifier (lua_State *L); gint luaopen_statfile (lua_State * L); +gint luaopen_glib_regexp (lua_State *L); void init_lua (struct config_file *cfg); gboolean init_lua_filters (struct config_file *cfg); diff --git a/src/lua/lua_regexp.c b/src/lua/lua_regexp.c new file mode 100644 index 000000000..ee8579dec --- /dev/null +++ b/src/lua/lua_regexp.c @@ -0,0 +1,248 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "lua_common.h" +#include "../expressions.h" + +LUA_FUNCTION_DEF (regexp, create); +LUA_FUNCTION_DEF (regexp, get_cached); +LUA_FUNCTION_DEF (regexp, get_pattern); +LUA_FUNCTION_DEF (regexp, match); +LUA_FUNCTION_DEF (regexp, split); +LUA_FUNCTION_DEF (regexp, destroy); + +static const struct luaL_reg regexplib_m[] = { + LUA_INTERFACE_DEF (regexp, get_pattern), + LUA_INTERFACE_DEF (regexp, match), + LUA_INTERFACE_DEF (regexp, split), + LUA_INTERFACE_DEF (regexp, destroy), + {"__tostring", lua_class_tostring}, + {NULL, NULL} +}; +static const struct luaL_reg regexplib_f[] = { + LUA_INTERFACE_DEF (regexp, create), + LUA_INTERFACE_DEF (regexp, get_cached), + {NULL, NULL} +}; + +memory_pool_t *regexp_static_pool = NULL; + +static GRegex * +lua_check_regexp (lua_State * L) +{ + void *ud = luaL_checkudata (L, 1, "rspamd{regexp}"); + + luaL_argcheck (L, ud != NULL, 1, "'regexp' expected"); + return *((GRegex **)ud); +} + +static int +lua_regexp_create (lua_State *L) +{ + gint regexp_flags = 0; + GRegex *new, **pnew; + const gchar *string, *flags_str = NULL; + GError *err = NULL; + + string = luaL_checkstring (L, 1); + if (lua_gettop (L) == 2) { + flags_str = luaL_checkstring (L, 2); + } + + if (flags_str) { + while (*flags_str) { + switch (*flags_str) { + case 'i': + regexp_flags |= G_REGEX_CASELESS; + break; + case 'm': + regexp_flags |= G_REGEX_MULTILINE; + break; + case 's': + regexp_flags |= G_REGEX_DOTALL; + break; + case 'x': + regexp_flags |= G_REGEX_EXTENDED; + break; + case 'u': + regexp_flags |= G_REGEX_UNGREEDY; + break; + case 'o': + regexp_flags |= G_REGEX_OPTIMIZE; + break; + case 'r': + regexp_flags |= G_REGEX_RAW; + break; + default: + msg_info ("invalid regexp flag: %c", *flags_str); + break; + } + flags_str ++; + } + } + + new = g_regex_new (string, regexp_flags, 0, &err); + if (new == NULL) { + lua_pushnil (L); + msg_info ("cannot parse regexp: %s, error: %s", string, err == NULL ? "undefined" : err->message); + } + else { + pnew = lua_newuserdata (L, sizeof (GRegex *)); + lua_setclass (L, "rspamd{regexp}", -1); + *pnew = new; + re_cache_add (g_regex_get_pattern (new), new, regexp_static_pool); + } + + return 1; +} + +static int +lua_regexp_get_cached (lua_State *L) +{ + GRegex *new, **pnew; + const gchar *line; + + line = luaL_checkstring (L, 1); + new = re_cache_check (line, regexp_static_pool); + if (new) { + pnew = lua_newuserdata (L, sizeof (GRegex *)); + lua_setclass (L, "rspamd{regexp}", -1); + *pnew = new; + } + else { + lua_pushnil (L); + } + + return 1; +} + +static int +lua_regexp_get_pattern (lua_State *L) +{ + GRegex *re = lua_check_regexp (L); + + if (re) { + lua_pushstring (L, g_regex_get_pattern (re)); + } + + return 1; +} + +static int +lua_regexp_match (lua_State *L) +{ + GRegex *re = lua_check_regexp (L); + GMatchInfo *mi; + const gchar *data; + gchar **matches; + gint i; + + if (re) { + data = luaL_checkstring (L, 2); + if (data) { + if (g_regex_match_full (re, data, -1, 0, 0, &mi, NULL)) { + matches = g_match_info_fetch_all (mi); + lua_newtable (L); + for (i = 1; matches[i - 1] != NULL; i ++) { + lua_pushstring (L, matches[i - 1]); + lua_rawseti (L, -2, i); + } + g_strfreev (matches); + } + else { + lua_pushnil (L); + } + g_match_info_free (mi); + return 1; + } + } + + lua_pushnil (L); + return 1; +} + +static int +lua_regexp_split (lua_State *L) +{ + GRegex *re = lua_check_regexp (L); + const gchar *data; + gchar **parts; + gint i; + + if (re) { + data = luaL_checkstring (L, 2); + if (data) { + parts = g_regex_split (re, data, 0); + lua_newtable (L); + for (i = 1; parts[i - 1] != NULL; i ++) { + lua_pushstring (L, parts[i - 1]); + lua_rawseti (L, -2, i); + } + g_strfreev (parts); + return 1; + } + } + + lua_pushnil (L); + return 1; +} + +/* + * We are not using __gc metamethod as it is usually good idea to have + * compiled regexps to be stored permamently, so this method can be used + * for avoiding memory leaks for temporary regexps + * + * + */ +static gint +lua_regexp_destroy (lua_State *L) +{ + GRegex *to_del = lua_check_regexp (L); + + if (to_del) { + re_cache_del (g_regex_get_pattern (to_del), regexp_static_pool); + g_regex_unref (to_del); + } + + return 0; +} + +gint +luaopen_glib_regexp (lua_State * L) +{ + luaL_newmetatable (L, "rspamd{regexp}"); + lua_pushstring (L, "__index"); + lua_pushvalue (L, -2); + lua_settable (L, -3); + + lua_pushstring (L, "class"); + lua_pushstring (L, "rspamd{regexp}"); + lua_rawset (L, -3); + + luaL_openlib (L, NULL, regexplib_m, 0); + luaL_openlib(L, "regexp", regexplib_f, 0); + + regexp_static_pool = memory_pool_new (memory_pool_get_size ()); + + return 1; +} |