From: Vsevolod Stakhov Date: Tue, 23 Jul 2019 16:51:19 +0000 (+0100) Subject: [Feature] Lua_selectors: Add `words` selector X-Git-Tag: 2.0~506 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=0de3deeadb49872e4b2eecf74faa2741d45a2bab;p=rspamd.git [Feature] Lua_selectors: Add `words` selector --- diff --git a/lualib/lua_selectors.lua b/lualib/lua_selectors.lua index 2a4782e8b..efe332878 100644 --- a/lualib/lua_selectors.lua +++ b/lualib/lua_selectors.lua @@ -358,7 +358,35 @@ The first argument must be header name.]], The second argument is optional time format, see [os.date](http://pgl.yoyo.org/luai/i/os.date) description]], ['args_schema'] = {ts.one_of{'connect', 'message'}:is_optional(), ts.string:is_optional()} - } + }, + -- Get text words from a message + ['words'] = { + ['get_value'] = function(task, args) + local how = args[1] or 'stem' + local tp = task:get_text_parts() + + if tp then + local rtype = 'string_list' + if how == 'full' then + rtype = 'table_list' + end + + return lua_util.flatten( + fun.map(function(p) + return p:get_words(how) + end, tp)), rtype + end + + return nil + end, + ['description'] = [[Get words from text parts + - `stem`: stemmed words (default) + - `raw`: raw words + - `norm`: normalised words (lowercased) + - `full`: list of tables + ]], + ['args_schema'] = { ts.one_of { 'stem', 'raw', 'norm', 'full' }:is_optional()}, + }, } local function pure_type(ltype) diff --git a/test/lua/unit/selectors.lua b/test/lua/unit/selectors.lua index b5152e347..2fb839be1 100644 --- a/test/lua/unit/selectors.lua +++ b/test/lua/unit/selectors.lua @@ -69,7 +69,7 @@ context("Selectors test", function() ["digest"] = { selector = "digest", - expect = {"1649c0cbbd127660095d4f44e15e8b60"} + expect = {"f46ccafe448fe4d7b46076938749695e"} }, ["user"] = { @@ -267,6 +267,10 @@ context("Selectors test", function() selector = "list('key', 'key1', 'key2', 'key3').apply_map(test_map).uniq", expect = {{'value1', 'value'}} }, + ["words"] = { + selector = "words('norm')", + expect = {{'hello', 'world', 'mail', 'me'}} + }, } for case_name, case in pairs(cases) do @@ -295,7 +299,7 @@ Content-Type: multipart/alternative; --_000_6be055295eab48a5af7ad4022f33e2d0_ Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: base64 +Content-Transfer-Encoding: 7bit Hello world