From 6c9aa44cf660d804345a6881c7ac6bf66d65b07a Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 19 Jun 2018 16:42:03 +0100 Subject: [PATCH] [Feature] Allow rspamadm mime to process multiple files --- lualib/rspamadm/mime.lua | 285 +++++++++++++++++++++++---------------- 1 file changed, 169 insertions(+), 116 deletions(-) diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua index 37a7d66de..dc46a2a06 100644 --- a/lualib/rspamadm/mime.lua +++ b/lualib/rspamadm/mime.lua @@ -15,6 +15,7 @@ limitations under the License. ]]-- local argparse = require "argparse" +local ansicolors = require "ansicolors" --local rspamd_util = require "rspamd_util" local rspamd_task = require "rspamd_task" local rspamd_logger = require "rspamd_logger" @@ -42,7 +43,7 @@ local extract = parser:command "extract ex e" extract:argument "file" :description "File to process" :argname "" - :args "1" + :args "+" extract:mutex( extract:flag "-t --text" @@ -61,13 +62,15 @@ extract:option "-o --output" decoded_utf = "raw_utf" } :default "content" +extract:flag "--no-file" + :description "Do not print filename" local stat = parser:command "stat st s" :description "Extracts statistical data from MIME messages" stat:argument "file" :description "File to process" :argname "" - :args "1" + :args "+" stat:mutex( stat:flag "-m --meta" :description "Lua metatokens", @@ -76,13 +79,15 @@ stat:mutex( stat:flag "-F --fuzzy" :description "Fuzzy hashes" ) +stat:flag "--no-file" + :description "Do not print filename" local urls = parser:command "urls url u" :description "Extracts URLs from MIME messages" urls:argument "file" :description "File to process" :argname "" - :args "1" + :args "+" urls:mutex( urls:flag "-t --tld" :description "Get TLDs only", @@ -100,6 +105,8 @@ urls:flag "-c --count" :description "Print count of each printed element" urls:flag "-r --reverse" :description "Reverse sort order" +urls:flag "--no-file" + :description "Do not print filename" local function load_config(opts) @@ -117,182 +124,222 @@ local function load_config(opts) end end -local function load_task(opts) - if not opts.file then +local function load_task(opts, fname) + if not fname then parser:error('no file specified') end - local res,task = rspamd_task.load_from_file(opts.file) + local res,task = rspamd_task.load_from_file(fname) if not res then - parser:error(string.format('cannot read message from %s: %s', opts.file, + parser:error(string.format('cannot read message from %s: %s', fname, task)) end if not task:process_message() then - parser:error(string.format('cannot read message from %s: %s', opts.file, + parser:error(string.format('cannot read message from %s: %s', fname, 'failed to parse')) end return task end +local function highlight(fmt, ...) + return ansicolors.white .. string.format(fmt, ...) .. ansicolors.reset +end + +local function maybe_print_fname(opts, fname) + if not opts.json and not opts['no-file'] then + rspamd_logger.messagex(highlight('File: %s', fname)) + end +end + local function extract_handler(opts) - local task = load_task(opts) - - if opts.text or opts.html then - local tp = task:get_text_parts() or {} - - for _,part in ipairs(tp) do - local how = opts.output - if opts.text and not part:is_html() then - part:get_content(how):write() - io.write('\n') - elseif opts.html and part:is_html() then - part:get_content(how):write() - io.write('\n') + for i,fname in ipairs(opts.file) do + local task = load_task(opts, fname) + + maybe_print_fname(opts, fname) + + if opts.text or opts.html then + local tp = task:get_text_parts() or {} + + for _,part in ipairs(tp) do + local how = opts.output + if opts.text and not part:is_html() then + part:get_content(how):write() + io.write('\n') + elseif opts.html and part:is_html() then + part:get_content(how):write() + io.write('\n') + end end end - end - task:destroy() -- No automatic dtor + task:destroy() -- No automatic dtor + end end local function stat_handler(opts) load_config(opts) rspamd_url.init(rspamd_config:get_tld_path()) rspamd_config:init_subsystem('langdet,stat') -- Needed to gen stat tokens - local task = load_task(opts) - if opts.meta then - local mt = lua_meta.gen_metatokens_table(task) - for k,v in pairs(mt) do - rspamd_logger.messagex('%s = %s', k, v) + for i,fname in ipairs(opts.file) do + local task = load_task(opts, fname) + + maybe_print_fname(opts, fname) + if opts.meta then + local mt = lua_meta.gen_metatokens_table(task) + for k,v in pairs(mt) do + rspamd_logger.messagex('%s = %s', k, v) + end end - end - task:destroy() -- No automatic dtor + task:destroy() -- No automatic dtor + + if i > 1 then + rspamd_logger.messagex('') + end + end end local function urls_handler(opts) load_config(opts) rspamd_url.init(rspamd_config:get_tld_path()) - local task = load_task(opts) - local elts = {} - - local function process_url(u) - local s - if opts.tld then - s = u:get_tld() - elseif opts.host then - s = u:get_host() - elseif opts.json then - s = u:get_text() - else - s = u:get_text() - end - if opts.unique then - if elts[s] then - elts[s].count = elts[s].count + 1 + if opts.json then rspamd_logger.messagex('[') end + + for i,fname in ipairs(opts.file) do + local task = load_task(opts, fname) + + maybe_print_fname(opts, fname) + if opts.json then rspamd_logger.messagex('{"file":"%s",', fname) end + local task = load_task(opts) + local elts = {} + + local function process_url(u) + local s + if opts.tld then + s = u:get_tld() + elseif opts.host then + s = u:get_host() + elseif opts.json then + s = u:get_text() else - elts[s] = { - count = 1, - url = u - } + s = u:get_text() end - else - if opts.json then - table.insert(elts, u) + + if opts.unique then + if elts[s] then + elts[s].count = elts[s].count + 1 + else + elts[s] = { + count = 1, + url = u + } + end else - table.insert(elts, s) + if opts.json then + table.insert(elts, u) + else + table.insert(elts, s) + end end end - end - for _,u in ipairs(task:get_urls(true)) do - process_url(u) - end + for _,u in ipairs(task:get_urls(true)) do + process_url(u) + end - local json_elts = {} + local json_elts = {} - local function process_elt(s, u) - if opts.unique then - -- s is string, u is {url = url, count = count } - if not opts.json then - if opts.count then - rspamd_logger.messagex('%s : %s', s, u.count) + local function process_elt(s, u) + if opts.unique then + -- s is string, u is {url = url, count = count } + if not opts.json then + if opts.count then + rspamd_logger.messagex('%s : %s', s, u.count) + else + rspamd_logger.messagex('%s', s) + end else - rspamd_logger.messagex('%s', s) + local tb = u.url:to_table() + tb.count = u.count + table.insert(json_elts, tb) end else - local tb = u.url:to_table() - tb.count = u.count - table.insert(json_elts, tb) - end - else - -- s is index, u is url or string - if opts.json then - local tb = u:to_table() - table.insert(json_elts, tb) - else - rspamd_logger.messagex('%s', u) + -- s is index, u is url or string + if opts.json then + local tb = u:to_table() + table.insert(json_elts, tb) + else + rspamd_logger.messagex('%s', u) + end end end - end - if opts.sort then - local sfunc - if opts.unique then - sfunc = function(t, a, b) - if t[a].count ~= t[b].count then - if opts.reverse then - return t[a].count > t[b].count + if opts.sort then + local sfunc + if opts.unique then + sfunc = function(t, a, b) + if t[a].count ~= t[b].count then + if opts.reverse then + return t[a].count > t[b].count + else + return t[a].count < t[b].count + end else - return t[a].count < t[b].count + -- Sort lexicography + if opts.reverse then + return a > b + else + return a < b + end + end + end + else + sfunc = function(t, a, b) + local va, vb + if opts.json then + va = t[a]:get_text() + vb = t[b]:get_text() + else + va = t[a] + vb = t[b] end - else - -- Sort lexicography if opts.reverse then - return a > b + return va > vb else - return a < b + return va < vb end end end + + + for s,u in lua_util.spairs(elts, sfunc) do + process_elt(s, u) + end else - sfunc = function(t, a, b) - local va, vb - if opts.json then - va = t[a]:get_text() - vb = t[b]:get_text() - else - va = t[a] - vb = t[b] - end - if opts.reverse then - return va > vb - else - return va < vb - end + for s,u in pairs(elts) do + process_elt(s, u) end end - - for s,u in lua_util.spairs(elts, sfunc) do - process_elt(s, u) + if opts.json then + rspamd_logger.messagex('"urls": %s', ucl.to_format(json_elts, 'json')) end - else - for s,u in pairs(elts) do - process_elt(s, u) + + if opts.json then + if i == #opts.file then + rspamd_logger.messagex('}') + else + rspamd_logger.messagex('},') + end end - end - if opts.json then - rspamd_logger.messagex('%s', ucl.to_format(json_elts, 'json')) + task:destroy() -- No automatic dtor end - - task:destroy() -- No automatic dtor + if opts.json then rspamd_logger.messagex(']') end end local function handler(args) @@ -300,6 +347,12 @@ local function handler(args) local command = opts.command + if type(opts.file) == 'string' then + opts.file = {opts.file} + elseif type(opts.file) == 'none' then + opts.file = {} + end + if command == 'extract' then extract_handler(opts) elseif command == 'stat' then -- 2.39.5