aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2021-02-22 13:46:47 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2021-02-22 13:46:47 +0000
commit0f5ed273b4fd6a9fb4efc44e3de334d5487e5be7 (patch)
treed41a184185f1d689abf1c310717d0e922ae1e6f4 /src
parentd2ceb9d5560a68dff834abcb28608f9be42982c3 (diff)
downloadrspamd-0f5ed273b4fd6a9fb4efc44e3de334d5487e5be7.tar.gz
rspamd-0f5ed273b4fd6a9fb4efc44e3de334d5487e5be7.zip
[Minor] Clickhouse: Restore old behaviour for full_urls
Diffstat (limited to 'src')
-rw-r--r--src/plugins/lua/clickhouse.lua51
1 files changed, 45 insertions, 6 deletions
diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua
index fcf9177a4..23ed65a85 100644
--- a/src/plugins/lua/clickhouse.lua
+++ b/src/plugins/lua/clickhouse.lua
@@ -767,16 +767,55 @@ local function clickhouse_collect(task)
local urls_tlds = {}
local urls_flags = {}
- for i,u in ipairs(task_urls) do
- if settings['full_urls'] then
+ if settings.full_urls then
+ for i,u in ipairs(task_urls) do
urls_urls[i] = u:get_text()
- else
- urls_urls[i] = u:get_host()
+ urls_tlds[i] = u:get_tld() or u:get_host()
+ urls_flags[i] = u:get_flags_num()
+ end
+ else
+ -- We need to store unique
+ local mt = {
+ ord_tbl = {}, -- ordered list of urls
+ idx_tbl = {}, -- indexed by host + flags, reference to an index in ord_tbl
+ __newindex = function(t, k, v)
+ local idx = getmetatable(t).idx_tbl
+ local ord = getmetatable(t).ord_tbl
+ local key = k:get_host() .. tostring(k:get_flags_num())
+ if idx[key] then
+ ord[idx[key]] = v -- replace
+ else
+ ord[#ord + 1] = v
+ idx[key] = #ord
+ end
+ end,
+ __index = function(t, k)
+ local ord = getmetatable(t).ord_tbl
+ if type(k) == 'number' then
+ return ord[k]
+ else
+ local idx = getmetatable(t).idx_tbl
+ local key = k:get_host() .. tostring(k:get_flags_num())
+ if idx[key] then
+ return ord[idx[key]]
+ end
+ end
+ end,
+ }
+ -- Extra index needed for making this unique
+ local urls_idx = {}
+ setmetatable(urls_idx, mt)
+ for _,u in ipairs(task_urls) do
+ if not urls_idx[u] then
+ urls_idx[u] = u
+ urls_urls[#urls_urls + 1] = u:get_host()
+ urls_tlds[#urls_tlds + 1] = u:get_tld() or u:get_host()
+ urls_flags[#urls_flags + 1] = u:get_flags_num()
+ end
end
- urls_tlds[i] = u:get_tld() or u:get_host()
- urls_flags[i] = u:get_flags_num()
end
+
-- Get tlds
table.insert(row, urls_tlds)
-- Get hosts/full urls