aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/lua/clickhouse.lua
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-06-22 08:37:46 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-06-22 08:37:46 +0100
commita6fb96a19b357ddf83c0b94eeefdcabe3aca6332 (patch)
treec4caf983f82e3f6426581d0bbecbb9cde822249d /src/plugins/lua/clickhouse.lua
parent251da4f0155ce29b82dc3b9a29b140039f2c4226 (diff)
downloadrspamd-a6fb96a19b357ddf83c0b94eeefdcabe3aca6332.tar.gz
rspamd-a6fb96a19b357ddf83c0b94eeefdcabe3aca6332.zip
[Feature] Allow to store symbols data in Clickhouse
Diffstat (limited to 'src/plugins/lua/clickhouse.lua')
-rw-r--r--src/plugins/lua/clickhouse.lua75
1 files changed, 74 insertions, 1 deletions
diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua
index 50808812b..dc6d7be66 100644
--- a/src/plugins/lua/clickhouse.lua
+++ b/src/plugins/lua/clickhouse.lua
@@ -28,6 +28,7 @@ local attachment_rows = {}
local urls_rows = {}
local specific_rows = {}
local asn_rows = {}
+local symbols_rows = {}
local nrows = 0
local settings = {
@@ -45,10 +46,12 @@ local settings = {
table = 'rspamd',
attachments_table = 'rspamd_attachments',
urls_table = 'rspamd_urls',
+ symbols_table = 'rspamd_symbols',
ipmask = 19,
ipmask6 = 48,
full_urls = false,
- from_tables = nil
+ from_tables = nil,
+ enable_symbols = false,
}
--[[
@@ -103,6 +106,14 @@ CREATE TABLE rspamd_asn (
Country FixedString(2),
IPNet String
) ENGINE = MergeTree(Date, Digest, 8192)
+
+CREATE TABLE rspamd_symbols (
+ Date Date,
+ Digest FixedString(32),
+ `Symbols.Names` Array(String),
+ `Symbols.Scores` Array(Float64)
+ `Symbols.Options` Array(String)
+) ENGINE = MergeTree(Date, Digest, 8192)
]]
local function clickhouse_main_row(tname)
@@ -162,6 +173,19 @@ local function clickhouse_urls_row(tname)
return elt
end
+local function clickhouse_symbols_row(tname)
+ local symbols_fields = {
+ 'Date',
+ 'Digest',
+ 'Symbols.Names',
+ 'Symbols.Scores',
+ 'Symbols.Options',
+ }
+ local elt = string.format('INSERT INTO %s (%s) VALUES ',
+ tname, table.concat(symbols_fields, ','))
+ return elt
+end
+
local function clickhouse_asn_row(tname)
local asn_fields = {
'Date',
@@ -189,6 +213,10 @@ local function clickhouse_first_row()
table.insert(asn_rows,
clickhouse_asn_row(settings['asn_table']))
end
+ if settings.enable_symbols and settings['symbols_table'] then
+ table.insert(symbols_rows,
+ clickhouse_symbols_row(settings['symbols_table']))
+ end
end
local function clickhouse_check_symbol(task, symbols, need_score)
@@ -273,6 +301,21 @@ local function clickhouse_send_data(task)
end
end
+ if #symbols_rows > 1 then
+ body = table.concat(symbols_rows, ' ')
+ if not rspamd_http.request({
+ task = task,
+ url = 'http://' .. settings['server'],
+ body = body,
+ callback = http_cb,
+ mime_type = 'text/plain',
+ timeout = settings['timeout'],
+ }) then
+ rspamd_logger.errx(task, "cannot send symbols info to clickhouse server %s: cannot make request",
+ settings['server'])
+ end
+ end
+
for k,specific in pairs(specific_rows) do
if #specific > 1 then
body = table.concat(specific, ' ')
@@ -538,6 +581,35 @@ local function clickhouse_collect(task)
table.insert(asn_rows, elt)
end
+ -- Symbols info
+ if settings.enable_symbols and settings['symbols_table'] then
+ local symbols = task:get_symbols_all()
+ local syms_tab = {}
+ local scores_tab = {}
+ local options_tab = {}
+
+ for _,s in ipairs(symbols) do
+ table.insert(syms_tab, string.format("'%s'",
+ clickhouse_quote(s.name or '')))
+ table.insert(scores_tab, string.format('%.3f', s.score))
+
+ if s.options then
+ table.insert(options_tab, string.format("'%s'",
+ clickhouse_quote(table.concat(s.options, ','))))
+ else
+ table.insert(options_tab, "''");
+ end
+ end
+
+ elt = string.format("(today(),'%s',[%s],[%s],[%s])",
+ task:get_digest(),
+ table.concat(syms_tab, ','),
+ table.concat(scores_tab, ','),
+ table.concat(options_tab, ','))
+
+ table.insert(urls_rows, elt)
+ end
+
nrows = nrows + 1
if nrows > settings['limit'] then
@@ -548,6 +620,7 @@ local function clickhouse_collect(task)
urls_rows = {}
specific_rows = {}
asn_rows = {}
+ symbols_rows = {}
clickhouse_first_row()
end
end