diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-02 19:09:43 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-02 19:09:43 +0000 |
commit | d264b1dc674d09026135e7421f136447f44e14ca (patch) | |
tree | bc24289124c526781947f69dfb85f4eb1bda24c5 /src/plugins/lua/clickhouse.lua | |
parent | da780b0c63818bf1bf4e1a940850f079d1cd1f9e (diff) | |
download | rspamd-d264b1dc674d09026135e7421f136447f44e14ca.tar.gz rspamd-d264b1dc674d09026135e7421f136447f44e14ca.zip |
[Fix] Fix urls insertion in Clickhouse module
Diffstat (limited to 'src/plugins/lua/clickhouse.lua')
-rw-r--r-- | src/plugins/lua/clickhouse.lua | 23 |
1 files changed, 12 insertions, 11 deletions
diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua index 87ae8dff6..68a435c6c 100644 --- a/src/plugins/lua/clickhouse.lua +++ b/src/plugins/lua/clickhouse.lua @@ -489,26 +489,26 @@ local function clickhouse_collect(task) table.insert(row, {}) end - local flatten_urls = function(...) - return fun.totable(fun.map(function(k,_) return k end, ...)) + local flatten_urls = function(f, ...) + return fun.totable(fun.map(function(k,v) return f(k,v) end, ...)) end -- Urls step - local urls_tlds = {} local urls_urls = {} if task:has_urls(false) then for _,u in ipairs(task:get_urls(false)) do - urls_tlds[u:get_tld()] = true if settings['full_urls'] then - urls_urls[u:get_text()] = true + urls_urls[u:get_text()] = u else - urls_urls[u:get_host()] = true + urls_urls[u:get_host()] = u end end - table.insert(row, flatten_urls(urls_tlds)) - table.insert(row, flatten_urls(urls_urls)) + -- Get tlds + table.insert(row, flatten_urls(function(_,u) return u:get_tld() end, urls_urls)) + -- Get hosts/full urls + table.insert(row, flatten_urls(function(k, _) return k end, urls_urls)) else table.insert(row, {}) table.insert(row, {}) @@ -516,9 +516,10 @@ local function clickhouse_collect(task) -- Emails step if task:has_urls(true) then - table.insert(row, flatten_urls(fun.map(function(u) - return string.format('%s@%s', u:get_user(), u:get_host()),true - end, task:get_emails()))) + table.insert(row, flatten_urls(function(k, _) return k end, + fun.map(function(u) + return string.format('%s@%s', u:get_user(), u:get_host()),true + end, task:get_emails()))) else table.insert(row, {}) end |