diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-16 12:09:19 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-16 12:09:19 +0100 |
commit | 5e8eda10b5e40694952d0969bdab430468ea1ba1 (patch) | |
tree | 369090c4dbd0cbf3a452a4119a0ed525e92f20cc /src/lua | |
parent | 97b6ced5f59fae9a7c97b2ab93a9386025f2a603 (diff) | |
download | rspamd-5e8eda10b5e40694952d0969bdab430468ea1ba1.tar.gz rspamd-5e8eda10b5e40694952d0969bdab430468ea1ba1.zip |
Add lua method to extract text from html.
Diffstat (limited to 'src/lua')
-rw-r--r-- | src/lua/lua_util.c | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index ff061a927..b17eef9ae 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -24,6 +24,7 @@ #include "lua_common.h" #include "task.h" #include "main.h" +#include "html.h" #include "cfg_rcl.h" #include "tokenizers/tokenizers.h" @@ -77,6 +78,14 @@ LUA_FUNCTION_DEF (util, process_message); */ LUA_FUNCTION_DEF (util, tanh); +/*** + * @function util.parse_html(input) + * Parses HTML and returns the according text + * @param {string|text} in input HTML + * @return {rspamd_text} processed text with no HTML tags + */ +LUA_FUNCTION_DEF (util, parse_html); + static const struct luaL_reg utillib_f[] = { LUA_INTERFACE_DEF (util, create_event_base), LUA_INTERFACE_DEF (util, load_rspamd_config), @@ -86,6 +95,7 @@ static const struct luaL_reg utillib_f[] = { LUA_INTERFACE_DEF (util, decode_base64), LUA_INTERFACE_DEF (util, tokenize_text), LUA_INTERFACE_DEF (util, tanh), + LUA_INTERFACE_DEF (util, parse_html), {NULL, NULL} }; @@ -444,6 +454,53 @@ lua_util_tanh (lua_State *L) } static gint +lua_util_parse_html (lua_State *L) +{ + struct rspamd_lua_text *t; + const gchar *start = NULL; + gsize len; + GByteArray *res, *in; + rspamd_mempool_t *pool; + struct html_content *hc; + + if (lua_type (L, 1) == LUA_TUSERDATA) { + t = lua_check_text (L, 1); + + if (t != NULL) { + start = t->start; + len = t->len; + } + } + else if (lua_type (L, 1) == LUA_TSTRING) { + start = luaL_checklstring (L, 1, &len); + } + + if (start != NULL) { + pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + hc = rspamd_mempool_alloc0 (pool, sizeof (*hc)); + in = g_byte_array_sized_new (len); + g_byte_array_append (in, start, len); + + res = rspamd_html_process_part (pool, hc, in); + + t = lua_newuserdata (L, sizeof (*t)); + rspamd_lua_setclass (L, "rspamd{text}", -1); + t->start = res->data; + t->len = res->len; + t->own = TRUE; + + g_byte_array_free (res, FALSE); + g_byte_array_free (in, TRUE); + rspamd_mempool_delete (pool); + } + else { + lua_pushnil (L); + } + + return 1; +} + +static gint lua_load_util (lua_State * L) { lua_newtable (L); |