#include "ottery.h"
#include "rspamd_control.h"
#include "lua_thread_pool.h"
+#include "libstat/stat_api.h"
+#include "libserver/rspamd_control.h"
+
#include <math.h>
#include <sys/wait.h>
-#include <src/libserver/rspamd_control.h>
+
/* Lua module init function */
#define MODULE_INIT_FUNC "module_init"
}
return FALSE;
+}
+
+gint
+rspamd_lua_push_words (lua_State *L, GArray *words,
+ enum rspamd_lua_words_type how)
+{
+ rspamd_stat_token_t *w;
+ guint i, cnt, fl_cnt;
+
+ lua_createtable (L, words->len, 0);
+
+ for (i = 0, cnt = 1; i < words->len; i ++) {
+ w = &g_array_index (words, rspamd_stat_token_t, i);
+
+ switch (how) {
+ case RSPAMD_LUA_WORDS_STEM:
+ if (w->stemmed.len > 0) {
+ lua_pushlstring (L, w->stemmed.begin, w->stemmed.len);
+ lua_rawseti (L, -2, cnt ++);
+ }
+ break;
+ case RSPAMD_LUA_WORDS_NORM:
+ if (w->normalized.len > 0) {
+ lua_pushlstring (L, w->normalized.begin, w->normalized.len);
+ lua_rawseti (L, -2, cnt ++);
+ }
+ break;
+ case RSPAMD_LUA_WORDS_RAW:
+ if (w->original.len > 0) {
+ lua_pushlstring (L, w->original.begin, w->original.len);
+ lua_rawseti (L, -2, cnt ++);
+ }
+ break;
+ case RSPAMD_LUA_WORDS_FULL:
+ lua_createtable (L, 4, 0);
+
+ if (w->stemmed.len > 0) {
+ lua_pushlstring (L, w->stemmed.begin, w->stemmed.len);
+ lua_rawseti (L, -2, 1);
+ }
+ else {
+ lua_pushstring (L, "");
+ lua_rawseti (L, -2, 1);
+ }
+
+ if (w->normalized.len > 0) {
+ lua_pushlstring (L, w->normalized.begin, w->normalized.len);
+ lua_rawseti (L, -2, 2);
+ }
+ else {
+ lua_pushstring (L, "");
+ lua_rawseti (L, -2, 2);
+ }
+
+ if (w->original.len > 0) {
+ lua_pushlstring (L, w->original.begin, w->original.len);
+ lua_rawseti (L, -2, 3);
+ }
+ else {
+ lua_pushstring (L, "");
+ lua_rawseti (L, -2, 3);
+ }
+
+ /* Flags part */
+ fl_cnt = 1;
+ lua_createtable (L, 4, 0);
+
+ if (w->flags & RSPAMD_STAT_TOKEN_FLAG_NORMALISED) {
+ lua_pushstring (L, "normalised");
+ lua_rawseti (L, -2, fl_cnt ++);
+ }
+ if (w->flags & RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE) {
+ lua_pushstring (L, "broken_unicode");
+ lua_rawseti (L, -2, fl_cnt ++);
+ }
+ if (w->flags & RSPAMD_STAT_TOKEN_FLAG_UTF) {
+ lua_pushstring (L, "utf");
+ lua_rawseti (L, -2, fl_cnt ++);
+ }
+ if (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) {
+ lua_pushstring (L, "text");
+ lua_rawseti (L, -2, fl_cnt ++);
+ }
+ if (w->flags & RSPAMD_STAT_TOKEN_FLAG_HEADER) {
+ lua_pushstring (L, "header");
+ lua_rawseti (L, -2, fl_cnt ++);
+ }
+ if (w->flags & (RSPAMD_STAT_TOKEN_FLAG_META|RSPAMD_STAT_TOKEN_FLAG_LUA_META)) {
+ lua_pushstring (L, "meta");
+ lua_rawseti (L, -2, fl_cnt ++);
+ }
+ if (w->flags & RSPAMD_STAT_TOKEN_FLAG_STOP_WORD) {
+ lua_pushstring (L, "stop_word");
+ lua_rawseti (L, -2, fl_cnt ++);
+ }
+ if (w->flags & RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES) {
+ lua_pushstring (L, "invisible_spaces");
+ lua_rawseti (L, -2, fl_cnt ++);
+ }
+ if (w->flags & RSPAMD_STAT_TOKEN_FLAG_STEMMED) {
+ lua_pushstring (L, "stemmed");
+ lua_rawseti (L, -2, fl_cnt ++);
+ }
+
+ lua_rawseti (L, -2, 4);
+
+ /* Push to the resulting vector */
+ lua_rawseti (L, -2, cnt ++);
+ break;
+ }
+
+ return 1;
+ }
}
\ No newline at end of file
LUA_FUNCTION_DEF (textpart, get_words_count);
/***
- * @method mime_part:get_words()
- * Get words in the part
+ * @method mime_part:get_words([how])
+ * Get words in the part. Optional `how` argument defines type of words returned:
+ * - `stem`: stemmed words (default)
+ * - `norm`: normalised words (utf normalised + lowercased)
+ * - `raw`: raw words in utf (if possible)
+ * - `full`: list of tables, each table has the following fields:
+ * - [1] - stemmed word
+ * - [2] - normalised word
+ * - [3] - raw word
+ * - [4] - flags (table of strings)
* @return {table/strings} words in the part
*/
LUA_FUNCTION_DEF (textpart, get_words);
{
LUA_TRACE_POINT;
struct rspamd_mime_text_part *part = lua_check_textpart (L);
- rspamd_stat_token_t *w;
- guint i;
+ enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM;
if (part == NULL) {
return luaL_error (L, "invalid arguments");
lua_createtable (L, 0, 0);
}
else {
- lua_createtable (L, part->utf_words->len, 0);
-
- for (i = 0; i < part->utf_words->len; i ++) {
- w = &g_array_index (part->utf_words, rspamd_stat_token_t, i);
+ if (lua_type (L, 2) == LUA_TSTRING) {
+ const gchar *how_str = lua_tostring (L, 2);
- lua_pushlstring (L, w->stemmed.begin, w->stemmed.len);
- lua_rawseti (L, -2, i + 1);
+ if (strcmp (how_str, "stem") == 0) {
+ how = RSPAMD_LUA_WORDS_STEM;
+ }
+ else if (strcmp (how_str, "norm") == 0) {
+ how = RSPAMD_LUA_WORDS_NORM;
+ }
+ else if (strcmp (how_str, "raw") == 0) {
+ how = RSPAMD_LUA_WORDS_RAW;
+ }
+ else if (strcmp (how_str, "full") == 0) {
+ how = RSPAMD_LUA_WORDS_FULL;
+ }
+ else {
+ return luaL_error (L, "unknown words type: %s", how_str);
+ }
}
+
+ return rspamd_lua_push_words (L, part->utf_words, how);
}
return 1;