diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-10-13 14:57:56 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-10-13 14:57:56 +0100 |
commit | 1fa77c23ac5d47b5cc1df15a6901ddedcbd2ec41 (patch) | |
tree | 1e4a774c22091a02e40438908b235a00696062e8 /src/plugins | |
parent | 581ce37bc844409b3df8287e9f9c181a274cac1b (diff) | |
download | rspamd-1fa77c23ac5d47b5cc1df15a6901ddedcbd2ec41.tar.gz rspamd-1fa77c23ac5d47b5cc1df15a6901ddedcbd2ec41.zip |
[Feature] Add only unique elements to Clickhouse url arrays
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/lua/clickhouse.lua | 18 |
1 files changed, 11 insertions, 7 deletions
diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua index 346ea2e97..d95325b24 100644 --- a/src/plugins/lua/clickhouse.lua +++ b/src/plugins/lua/clickhouse.lua @@ -493,18 +493,22 @@ local function clickhouse_collect(task) local urls_urls = {} if task:has_urls(false) then for _,u in ipairs(task:get_urls(false)) do - table.insert(urls_tlds, u:get_tld()) + urls_tlds[u:get_tld()] = true if settings['full_urls'] then - table.insert(urls_urls, u:get_text()) + urls_urls[u:get_text()] = true else - table.insert(urls_urls, u:get_host()) + urls_urls[u:get_host()] = true end end end + local flatten_urls = function(...) + return fun.totable(fun.map(function(k,_) return k end, ...)) + end + if #urls_tlds > 0 then - table.insert(row, urls_tlds) - table.insert(row, urls_urls) + table.insert(row, flatten_urls(urls_tlds)) + table.insert(row, flatten_urls(urls_urls)) else table.insert(row, {}) table.insert(row, {}) @@ -512,8 +516,8 @@ local function clickhouse_collect(task) -- Emails step if task:has_urls(true) then - table.insert(row, fun.totable(fun.map(function(u) - return string.format('%s@%s', u:get_user(), u:get_host()) + table.insert(row, flatten_urls(fun.map(function(u) + return string.format('%s@%s', u:get_user(), u:get_host()),true end, task:get_emails()))) else table.insert(row, {}) |