summaryrefslogtreecommitdiffstats
path: root/src/lua/lua_mimepart.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-30 13:36:07 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-30 13:36:07 +0000
commit67a6d822c639ba55546fbcb19db267cee6ac11b2 (patch)
treeac0ba38746416317d437c6c5162e4270d299ff3f /src/lua/lua_mimepart.c
parent3f147877af03e70bac4cb9786108c2238d578038 (diff)
downloadrspamd-67a6d822c639ba55546fbcb19db267cee6ac11b2.tar.gz
rspamd-67a6d822c639ba55546fbcb19db267cee6ac11b2.zip
[Feature] Allow to get all types of words from Lua
Diffstat (limited to 'src/lua/lua_mimepart.c')
-rw-r--r--src/lua/lua_mimepart.c40
1 files changed, 30 insertions, 10 deletions
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c
index d2ff7e8e4..14111f760 100644
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -132,8 +132,16 @@ LUA_FUNCTION_DEF (textpart, get_stats);
LUA_FUNCTION_DEF (textpart, get_words_count);
/***
- * @method mime_part:get_words()
- * Get words in the part
+ * @method mime_part:get_words([how])
+ * Get words in the part. Optional `how` argument defines type of words returned:
+ * - `stem`: stemmed words (default)
+ * - `norm`: normalised words (utf normalised + lowercased)
+ * - `raw`: raw words in utf (if possible)
+ * - `full`: list of tables, each table has the following fields:
+ * - [1] - stemmed word
+ * - [2] - normalised word
+ * - [3] - raw word
+ * - [4] - flags (table of strings)
* @return {table/strings} words in the part
*/
LUA_FUNCTION_DEF (textpart, get_words);
@@ -759,8 +767,7 @@ lua_textpart_get_words (lua_State *L)
{
LUA_TRACE_POINT;
struct rspamd_mime_text_part *part = lua_check_textpart (L);
- rspamd_stat_token_t *w;
- guint i;
+ enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM;
if (part == NULL) {
return luaL_error (L, "invalid arguments");
@@ -770,14 +777,27 @@ lua_textpart_get_words (lua_State *L)
lua_createtable (L, 0, 0);
}
else {
- lua_createtable (L, part->utf_words->len, 0);
-
- for (i = 0; i < part->utf_words->len; i ++) {
- w = &g_array_index (part->utf_words, rspamd_stat_token_t, i);
+ if (lua_type (L, 2) == LUA_TSTRING) {
+ const gchar *how_str = lua_tostring (L, 2);
- lua_pushlstring (L, w->stemmed.begin, w->stemmed.len);
- lua_rawseti (L, -2, i + 1);
+ if (strcmp (how_str, "stem") == 0) {
+ how = RSPAMD_LUA_WORDS_STEM;
+ }
+ else if (strcmp (how_str, "norm") == 0) {
+ how = RSPAMD_LUA_WORDS_NORM;
+ }
+ else if (strcmp (how_str, "raw") == 0) {
+ how = RSPAMD_LUA_WORDS_RAW;
+ }
+ else if (strcmp (how_str, "full") == 0) {
+ how = RSPAMD_LUA_WORDS_FULL;
+ }
+ else {
+ return luaL_error (L, "unknown words type: %s", how_str);
+ }
}
+
+ return rspamd_lua_push_words (L, part->utf_words, how);
}
return 1;