diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-30 13:36:07 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-30 13:36:07 +0000 |
commit | 67a6d822c639ba55546fbcb19db267cee6ac11b2 (patch) | |
tree | ac0ba38746416317d437c6c5162e4270d299ff3f /src/lua/lua_mimepart.c | |
parent | 3f147877af03e70bac4cb9786108c2238d578038 (diff) | |
download | rspamd-67a6d822c639ba55546fbcb19db267cee6ac11b2.tar.gz rspamd-67a6d822c639ba55546fbcb19db267cee6ac11b2.zip |
[Feature] Allow to get all types of words from Lua
Diffstat (limited to 'src/lua/lua_mimepart.c')
-rw-r--r-- | src/lua/lua_mimepart.c | 40 |
1 files changed, 30 insertions, 10 deletions
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index d2ff7e8e4..14111f760 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -132,8 +132,16 @@ LUA_FUNCTION_DEF (textpart, get_stats); LUA_FUNCTION_DEF (textpart, get_words_count); /*** - * @method mime_part:get_words() - * Get words in the part + * @method mime_part:get_words([how]) + * Get words in the part. Optional `how` argument defines type of words returned: + * - `stem`: stemmed words (default) + * - `norm`: normalised words (utf normalised + lowercased) + * - `raw`: raw words in utf (if possible) + * - `full`: list of tables, each table has the following fields: + * - [1] - stemmed word + * - [2] - normalised word + * - [3] - raw word + * - [4] - flags (table of strings) * @return {table/strings} words in the part */ LUA_FUNCTION_DEF (textpart, get_words); @@ -759,8 +767,7 @@ lua_textpart_get_words (lua_State *L) { LUA_TRACE_POINT; struct rspamd_mime_text_part *part = lua_check_textpart (L); - rspamd_stat_token_t *w; - guint i; + enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM; if (part == NULL) { return luaL_error (L, "invalid arguments"); @@ -770,14 +777,27 @@ lua_textpart_get_words (lua_State *L) lua_createtable (L, 0, 0); } else { - lua_createtable (L, part->utf_words->len, 0); - - for (i = 0; i < part->utf_words->len; i ++) { - w = &g_array_index (part->utf_words, rspamd_stat_token_t, i); + if (lua_type (L, 2) == LUA_TSTRING) { + const gchar *how_str = lua_tostring (L, 2); - lua_pushlstring (L, w->stemmed.begin, w->stemmed.len); - lua_rawseti (L, -2, i + 1); + if (strcmp (how_str, "stem") == 0) { + how = RSPAMD_LUA_WORDS_STEM; + } + else if (strcmp (how_str, "norm") == 0) { + how = RSPAMD_LUA_WORDS_NORM; + } + else if (strcmp (how_str, "raw") == 0) { + how = RSPAMD_LUA_WORDS_RAW; + } + else if (strcmp (how_str, "full") == 0) { + how = RSPAMD_LUA_WORDS_FULL; + } + else { + return luaL_error (L, "unknown words type: %s", how_str); + } } + + return rspamd_lua_push_words (L, part->utf_words, how); } return 1; |