diff options
author | Anton Yuzhaninov <citrin+github@citrin.ru> | 2019-06-26 11:25:40 +0100 |
---|---|---|
committer | Anton Yuzhaninov <citrin+github@citrin.ru> | 2019-06-26 11:25:40 +0100 |
commit | 2fd8ae45023bc225bdb2970581452a9c700555db (patch) | |
tree | 6ee5a66101b6ca28c811e68c306422a3e352e7bd /src/plugins/lua/clickhouse.lua | |
parent | 3c4d34b9441e6cd1ed4135db2a336e3f7ed4d72c (diff) | |
download | rspamd-2fd8ae45023bc225bdb2970581452a9c700555db.tar.gz rspamd-2fd8ae45023bc225bdb2970581452a9c700555db.zip |
[Rework] Do not lowercase all data send to ClickHouse
A lot of strings stored in ClickHouse are case sensitive according to
standards - store them in original case. We can always can use
'lower(field)' in a ClickHouse query, but if string was lowercased by
Rspamd nothing can be done to recover lost information.
Lowercase domain parts of addresses - domains are not case sensitive and
storing them in lower case will simplify queries.
Diffstat (limited to 'src/plugins/lua/clickhouse.lua')
-rw-r--r-- | src/plugins/lua/clickhouse.lua | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua index 9c8f7b631..f62bda2c6 100644 --- a/src/plugins/lua/clickhouse.lua +++ b/src/plugins/lua/clickhouse.lua @@ -426,7 +426,7 @@ local function clickhouse_collect(task) local from = task:get_from('smtp')[1] if from then - from_domain = from['domain'] + from_domain = from['domain']:lower() from_user = from['user'] end @@ -446,15 +446,17 @@ local function clickhouse_collect(task) if task:has_from('mime') then local from = task:get_from({'mime','orig'})[1] if from then - mime_domain = from['domain'] + mime_domain = from['domain']:lower() mime_user = from['user'] end end local mime_rcpt = {} if task:has_recipients('mime') then - local from = task:get_recipients({'mime','orig'}) - mime_rcpt = fun.totable(fun.map(function (f) return f.addr or '' end, from)) + local recipients = task:get_recipients({'mime','orig'}) + for _, rcpt in ipairs(recipients) do + table.insert(mime_rcpt, rcpt['user'] .. '@' .. rcpt['domain']:lower()) + end end local ip_str = 'undefined' @@ -474,7 +476,7 @@ local function clickhouse_collect(task) if task:has_recipients('smtp') then local rcpt = task:get_recipients('smtp')[1] rcpt_user = rcpt['user'] - rcpt_domain = rcpt['domain'] + rcpt_domain = rcpt['domain']:lower() end local list_id = task:get_header('List-Id') or '' |