aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/lua/clickhouse.lua
diff options
context:
space:
mode:
authorAnton Yuzhaninov <citrin+github@citrin.ru>2019-06-26 11:25:40 +0100
committerAnton Yuzhaninov <citrin+github@citrin.ru>2019-06-26 11:25:40 +0100
commit2fd8ae45023bc225bdb2970581452a9c700555db (patch)
tree6ee5a66101b6ca28c811e68c306422a3e352e7bd /src/plugins/lua/clickhouse.lua
parent3c4d34b9441e6cd1ed4135db2a336e3f7ed4d72c (diff)
downloadrspamd-2fd8ae45023bc225bdb2970581452a9c700555db.tar.gz
rspamd-2fd8ae45023bc225bdb2970581452a9c700555db.zip
[Rework] Do not lowercase all data send to ClickHouse
A lot of strings stored in ClickHouse are case sensitive according to standards - store them in original case. We can always can use 'lower(field)' in a ClickHouse query, but if string was lowercased by Rspamd nothing can be done to recover lost information. Lowercase domain parts of addresses - domains are not case sensitive and storing them in lower case will simplify queries.
Diffstat (limited to 'src/plugins/lua/clickhouse.lua')
-rw-r--r--src/plugins/lua/clickhouse.lua12
1 files changed, 7 insertions, 5 deletions
diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua
index 9c8f7b631..f62bda2c6 100644
--- a/src/plugins/lua/clickhouse.lua
+++ b/src/plugins/lua/clickhouse.lua
@@ -426,7 +426,7 @@ local function clickhouse_collect(task)
local from = task:get_from('smtp')[1]
if from then
- from_domain = from['domain']
+ from_domain = from['domain']:lower()
from_user = from['user']
end
@@ -446,15 +446,17 @@ local function clickhouse_collect(task)
if task:has_from('mime') then
local from = task:get_from({'mime','orig'})[1]
if from then
- mime_domain = from['domain']
+ mime_domain = from['domain']:lower()
mime_user = from['user']
end
end
local mime_rcpt = {}
if task:has_recipients('mime') then
- local from = task:get_recipients({'mime','orig'})
- mime_rcpt = fun.totable(fun.map(function (f) return f.addr or '' end, from))
+ local recipients = task:get_recipients({'mime','orig'})
+ for _, rcpt in ipairs(recipients) do
+ table.insert(mime_rcpt, rcpt['user'] .. '@' .. rcpt['domain']:lower())
+ end
end
local ip_str = 'undefined'
@@ -474,7 +476,7 @@ local function clickhouse_collect(task)
if task:has_recipients('smtp') then
local rcpt = task:get_recipients('smtp')[1]
rcpt_user = rcpt['user']
- rcpt_domain = rcpt['domain']
+ rcpt_domain = rcpt['domain']:lower()
end
local list_id = task:get_header('List-Id') or ''