diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-12-05 18:06:12 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-12-05 18:06:12 +0000 |
commit | e1b043f8bf7970278f55ae7ca1a106dee6c4fa98 (patch) | |
tree | 7c4e4205a7eb9341fa1196d8fda5997d20b6976c /src/lua/lua_common.c | |
parent | d027dca0f2c45caa9d8a26e476d44f94a92f639e (diff) | |
download | rspamd-e1b043f8bf7970278f55ae7ca1a106dee6c4fa98.tar.gz rspamd-e1b043f8bf7970278f55ae7ca1a106dee6c4fa98.zip |
[Feature] Add method task:lookup_words
Diffstat (limited to 'src/lua/lua_common.c')
-rw-r--r-- | src/lua/lua_common.c | 153 |
1 files changed, 80 insertions, 73 deletions
diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index 7bb45f347..01d5dc869 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -2408,12 +2408,90 @@ rspamd_lua_try_load_redis (lua_State *L, const ucl_object_t *obj, return FALSE; } +void +rspamd_lua_push_full_word (lua_State *L, rspamd_stat_token_t *w) +{ + gint fl_cnt; + + lua_createtable (L, 4, 0); + + if (w->stemmed.len > 0) { + lua_pushlstring (L, w->stemmed.begin, w->stemmed.len); + lua_rawseti (L, -2, 1); + } + else { + lua_pushstring (L, ""); + lua_rawseti (L, -2, 1); + } + + if (w->normalized.len > 0) { + lua_pushlstring (L, w->normalized.begin, w->normalized.len); + lua_rawseti (L, -2, 2); + } + else { + lua_pushstring (L, ""); + lua_rawseti (L, -2, 2); + } + + if (w->original.len > 0) { + lua_pushlstring (L, w->original.begin, w->original.len); + lua_rawseti (L, -2, 3); + } + else { + lua_pushstring (L, ""); + lua_rawseti (L, -2, 3); + } + + /* Flags part */ + fl_cnt = 1; + lua_createtable (L, 4, 0); + + if (w->flags & RSPAMD_STAT_TOKEN_FLAG_NORMALISED) { + lua_pushstring (L, "normalised"); + lua_rawseti (L, -2, fl_cnt ++); + } + if (w->flags & RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE) { + lua_pushstring (L, "broken_unicode"); + lua_rawseti (L, -2, fl_cnt ++); + } + if (w->flags & RSPAMD_STAT_TOKEN_FLAG_UTF) { + lua_pushstring (L, "utf"); + lua_rawseti (L, -2, fl_cnt ++); + } + if (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) { + lua_pushstring (L, "text"); + lua_rawseti (L, -2, fl_cnt ++); + } + if (w->flags & RSPAMD_STAT_TOKEN_FLAG_HEADER) { + lua_pushstring (L, "header"); + lua_rawseti (L, -2, fl_cnt ++); + } + if (w->flags & (RSPAMD_STAT_TOKEN_FLAG_META|RSPAMD_STAT_TOKEN_FLAG_LUA_META)) { + lua_pushstring (L, "meta"); + lua_rawseti (L, -2, fl_cnt ++); + } + if (w->flags & RSPAMD_STAT_TOKEN_FLAG_STOP_WORD) { + lua_pushstring (L, "stop_word"); + lua_rawseti (L, -2, fl_cnt ++); + } + if (w->flags & RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES) { + lua_pushstring (L, "invisible_spaces"); + lua_rawseti (L, -2, fl_cnt ++); + } + if (w->flags & RSPAMD_STAT_TOKEN_FLAG_STEMMED) { + lua_pushstring (L, "stemmed"); + lua_rawseti (L, -2, fl_cnt ++); + } + + lua_rawseti (L, -2, 4); +} + gint rspamd_lua_push_words (lua_State *L, GArray *words, enum rspamd_lua_words_type how) { rspamd_stat_token_t *w; - guint i, cnt, fl_cnt; + guint i, cnt; lua_createtable (L, words->len, 0); @@ -2440,78 +2518,7 @@ rspamd_lua_push_words (lua_State *L, GArray *words, } break; case RSPAMD_LUA_WORDS_FULL: - lua_createtable (L, 4, 0); - - if (w->stemmed.len > 0) { - lua_pushlstring (L, w->stemmed.begin, w->stemmed.len); - lua_rawseti (L, -2, 1); - } - else { - lua_pushstring (L, ""); - lua_rawseti (L, -2, 1); - } - - if (w->normalized.len > 0) { - lua_pushlstring (L, w->normalized.begin, w->normalized.len); - lua_rawseti (L, -2, 2); - } - else { - lua_pushstring (L, ""); - lua_rawseti (L, -2, 2); - } - - if (w->original.len > 0) { - lua_pushlstring (L, w->original.begin, w->original.len); - lua_rawseti (L, -2, 3); - } - else { - lua_pushstring (L, ""); - lua_rawseti (L, -2, 3); - } - - /* Flags part */ - fl_cnt = 1; - lua_createtable (L, 4, 0); - - if (w->flags & RSPAMD_STAT_TOKEN_FLAG_NORMALISED) { - lua_pushstring (L, "normalised"); - lua_rawseti (L, -2, fl_cnt ++); - } - if (w->flags & RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE) { - lua_pushstring (L, "broken_unicode"); - lua_rawseti (L, -2, fl_cnt ++); - } - if (w->flags & RSPAMD_STAT_TOKEN_FLAG_UTF) { - lua_pushstring (L, "utf"); - lua_rawseti (L, -2, fl_cnt ++); - } - if (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) { - lua_pushstring (L, "text"); - lua_rawseti (L, -2, fl_cnt ++); - } - if (w->flags & RSPAMD_STAT_TOKEN_FLAG_HEADER) { - lua_pushstring (L, "header"); - lua_rawseti (L, -2, fl_cnt ++); - } - if (w->flags & (RSPAMD_STAT_TOKEN_FLAG_META|RSPAMD_STAT_TOKEN_FLAG_LUA_META)) { - lua_pushstring (L, "meta"); - lua_rawseti (L, -2, fl_cnt ++); - } - if (w->flags & RSPAMD_STAT_TOKEN_FLAG_STOP_WORD) { - lua_pushstring (L, "stop_word"); - lua_rawseti (L, -2, fl_cnt ++); - } - if (w->flags & RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES) { - lua_pushstring (L, "invisible_spaces"); - lua_rawseti (L, -2, fl_cnt ++); - } - if (w->flags & RSPAMD_STAT_TOKEN_FLAG_STEMMED) { - lua_pushstring (L, "stemmed"); - lua_rawseti (L, -2, fl_cnt ++); - } - - lua_rawseti (L, -2, 4); - + rspamd_lua_push_full_word (L, w); /* Push to the resulting vector */ lua_rawseti (L, -2, cnt ++); break; |