summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-10-13 14:57:56 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-10-13 14:57:56 +0100
commit1fa77c23ac5d47b5cc1df15a6901ddedcbd2ec41 (patch)
tree1e4a774c22091a02e40438908b235a00696062e8 /src
parent581ce37bc844409b3df8287e9f9c181a274cac1b (diff)
downloadrspamd-1fa77c23ac5d47b5cc1df15a6901ddedcbd2ec41.tar.gz
rspamd-1fa77c23ac5d47b5cc1df15a6901ddedcbd2ec41.zip
[Feature] Add only unique elements to Clickhouse url arrays
Diffstat (limited to 'src')
-rw-r--r--src/plugins/lua/clickhouse.lua18
1 files changed, 11 insertions, 7 deletions
diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua
index 346ea2e97..d95325b24 100644
--- a/src/plugins/lua/clickhouse.lua
+++ b/src/plugins/lua/clickhouse.lua
@@ -493,18 +493,22 @@ local function clickhouse_collect(task)
local urls_urls = {}
if task:has_urls(false) then
for _,u in ipairs(task:get_urls(false)) do
- table.insert(urls_tlds, u:get_tld())
+ urls_tlds[u:get_tld()] = true
if settings['full_urls'] then
- table.insert(urls_urls, u:get_text())
+ urls_urls[u:get_text()] = true
else
- table.insert(urls_urls, u:get_host())
+ urls_urls[u:get_host()] = true
end
end
end
+ local flatten_urls = function(...)
+ return fun.totable(fun.map(function(k,_) return k end, ...))
+ end
+
if #urls_tlds > 0 then
- table.insert(row, urls_tlds)
- table.insert(row, urls_urls)
+ table.insert(row, flatten_urls(urls_tlds))
+ table.insert(row, flatten_urls(urls_urls))
else
table.insert(row, {})
table.insert(row, {})
@@ -512,8 +516,8 @@ local function clickhouse_collect(task)
-- Emails step
if task:has_urls(true) then
- table.insert(row, fun.totable(fun.map(function(u)
- return string.format('%s@%s', u:get_user(), u:get_host())
+ table.insert(row, flatten_urls(fun.map(function(u)
+ return string.format('%s@%s', u:get_user(), u:get_host()),true
end, task:get_emails())))
else
table.insert(row, {})