From 2fd8ae45023bc225bdb2970581452a9c700555db Mon Sep 17 00:00:00 2001 From: Anton Yuzhaninov Date: Wed, 26 Jun 2019 11:25:40 +0100 Subject: [PATCH] [Rework] Do not lowercase all data send to ClickHouse A lot of strings stored in ClickHouse are case sensitive according to standards - store them in original case. We can always can use 'lower(field)' in a ClickHouse query, but if string was lowercased by Rspamd nothing can be done to recover lost information. Lowercase domain parts of addresses - domains are not case sensitive and storing them in lower case will simplify queries. --- lualib/lua_clickhouse.lua | 4 ++-- src/plugins/lua/clickhouse.lua | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/lualib/lua_clickhouse.lua b/lualib/lua_clickhouse.lua index ad5b51dce..4a57afd3f 100644 --- a/lualib/lua_clickhouse.lua +++ b/lualib/lua_clickhouse.lua @@ -49,7 +49,7 @@ local function clickhouse_quote(str) ['\\'] = [[\\]], ['\n'] = [[\n]], ['\t'] = [[\t]], - }):lower() + }) end return '' @@ -503,4 +503,4 @@ exports.generic_sync = function (upstream, settings, params, query) end end -return exports \ No newline at end of file +return exports diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua index 9c8f7b631..f62bda2c6 100644 --- a/src/plugins/lua/clickhouse.lua +++ b/src/plugins/lua/clickhouse.lua @@ -426,7 +426,7 @@ local function clickhouse_collect(task) local from = task:get_from('smtp')[1] if from then - from_domain = from['domain'] + from_domain = from['domain']:lower() from_user = from['user'] end @@ -446,15 +446,17 @@ local function clickhouse_collect(task) if task:has_from('mime') then local from = task:get_from({'mime','orig'})[1] if from then - mime_domain = from['domain'] + mime_domain = from['domain']:lower() mime_user = from['user'] end end local mime_rcpt = {} if task:has_recipients('mime') then - local from = task:get_recipients({'mime','orig'}) - mime_rcpt = fun.totable(fun.map(function (f) return f.addr or '' end, from)) + local recipients = task:get_recipients({'mime','orig'}) + for _, rcpt in ipairs(recipients) do + table.insert(mime_rcpt, rcpt['user'] .. '@' .. rcpt['domain']:lower()) + end end local ip_str = 'undefined' @@ -474,7 +476,7 @@ local function clickhouse_collect(task) if task:has_recipients('smtp') then local rcpt = task:get_recipients('smtp')[1] rcpt_user = rcpt['user'] - rcpt_domain = rcpt['domain'] + rcpt_domain = rcpt['domain']:lower() end local list_id = task:get_header('List-Id') or '' -- 2.39.5