aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-06-25 16:28:43 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-06-25 16:28:43 +0100
commit592ce3ab0a40fe016b0d90424d8eacc1e3025412 (patch)
tree249cdb4b5d6d2117c697edcf8ae33966a25fd21c
parent3c353232c9f9c56efa5b9e7f58402111cc3524b8 (diff)
downloadrspamd-592ce3ab0a40fe016b0d90424d8eacc1e3025412.tar.gz
rspamd-592ce3ab0a40fe016b0d90424d8eacc1e3025412.zip
[Feature] Allow to extract words in `rspamadm mime`
-rw-r--r--lualib/rspamadm/mime.lua24
1 files changed, 22 insertions, 2 deletions
diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua
index 300dd43ae..ceb2894c5 100644
--- a/lualib/rspamadm/mime.lua
+++ b/lualib/rspamadm/mime.lua
@@ -72,6 +72,8 @@ extract:option "-o --output"
decoded_utf = "raw_utf"
}
:default "content"
+extract:flag "-w --words"
+ :description "Extracts words"
local stat = parser:command "stat st s"
@@ -185,19 +187,37 @@ end
local function extract_handler(opts)
local out_elts = {}
+
+ if opts.words then
+ -- Enable stemming
+ rspamd_config:init_subsystem('langdet')
+ end
+
for _,fname in ipairs(opts.file) do
local task = load_task(opts, fname)
out_elts[fname] = {}
+ if not opts.text and not opts.html then
+ parser:error('please select html or text part to be extracted')
+ end
+
if opts.text or opts.html then
local tp = task:get_text_parts() or {}
for _,part in ipairs(tp) do
local how = opts.output
if opts.text and not part:is_html() then
- table.insert(out_elts[fname], tostring(part:get_content(how)))
+ if opts.words then
+ table.insert(out_elts[fname], table.concat(part:get_words(), ' '))
+ else
+ table.insert(out_elts[fname], tostring(part:get_content(how)))
+ end
elseif opts.html and part:is_html() then
- table.insert(out_elts[fname], tostring(part:get_content(how)))
+ if opts.words then
+ table.insert(out_elts[fname], table.concat(part:get_words(), ' '))
+ else
+ table.insert(out_elts[fname], tostring(part:get_content(how)))
+ end
end
end
end