From 67a6d822c639ba55546fbcb19db267cee6ac11b2 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 30 Nov 2018 13:36:07 +0000 Subject: [Feature] Allow to get all types of words from Lua --- src/lua/lua_mimepart.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) (limited to 'src/lua/lua_mimepart.c') diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index d2ff7e8e4..14111f760 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -132,8 +132,16 @@ LUA_FUNCTION_DEF (textpart, get_stats); LUA_FUNCTION_DEF (textpart, get_words_count); /*** - * @method mime_part:get_words() - * Get words in the part + * @method mime_part:get_words([how]) + * Get words in the part. Optional `how` argument defines type of words returned: + * - `stem`: stemmed words (default) + * - `norm`: normalised words (utf normalised + lowercased) + * - `raw`: raw words in utf (if possible) + * - `full`: list of tables, each table has the following fields: + * - [1] - stemmed word + * - [2] - normalised word + * - [3] - raw word + * - [4] - flags (table of strings) * @return {table/strings} words in the part */ LUA_FUNCTION_DEF (textpart, get_words); @@ -759,8 +767,7 @@ lua_textpart_get_words (lua_State *L) { LUA_TRACE_POINT; struct rspamd_mime_text_part *part = lua_check_textpart (L); - rspamd_stat_token_t *w; - guint i; + enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM; if (part == NULL) { return luaL_error (L, "invalid arguments"); @@ -770,14 +777,27 @@ lua_textpart_get_words (lua_State *L) lua_createtable (L, 0, 0); } else { - lua_createtable (L, part->utf_words->len, 0); - - for (i = 0; i < part->utf_words->len; i ++) { - w = &g_array_index (part->utf_words, rspamd_stat_token_t, i); + if (lua_type (L, 2) == LUA_TSTRING) { + const gchar *how_str = lua_tostring (L, 2); - lua_pushlstring (L, w->stemmed.begin, w->stemmed.len); - lua_rawseti (L, -2, i + 1); + if (strcmp (how_str, "stem") == 0) { + how = RSPAMD_LUA_WORDS_STEM; + } + else if (strcmp (how_str, "norm") == 0) { + how = RSPAMD_LUA_WORDS_NORM; + } + else if (strcmp (how_str, "raw") == 0) { + how = RSPAMD_LUA_WORDS_RAW; + } + else if (strcmp (how_str, "full") == 0) { + how = RSPAMD_LUA_WORDS_FULL; + } + else { + return luaL_error (L, "unknown words type: %s", how_str); + } } + + return rspamd_lua_push_words (L, part->utf_words, how); } return 1; -- cgit v1.2.3