You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_clickhouse.lua 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. --[[
  2. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  3. Copyright (c) 2018, Mikhail Galanin <mgalanin@mimecast.com>
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. ]]--
  14. --[[[
  15. -- @module lua_clickhouse
  16. -- This module contains Clickhouse access functions
  17. --]]
  18. local rspamd_logger = require "rspamd_logger"
  19. local rspamd_http = require "rspamd_http"
  20. local lua_util = require "lua_util"
  21. local rspamd_text = require "rspamd_text"
  22. local exports = {}
  23. local N = 'clickhouse'
  24. local default_timeout = 10.0
  25. local function escape_spaces(query)
  26. return query:gsub('%s', '%%20')
  27. end
  28. local function ch_number(a)
  29. if (a+2^52)-2^52 == a then
  30. -- Integer
  31. return tostring(math.floor(a))
  32. end
  33. return tostring(a)
  34. end
  35. local function clickhouse_quote(str)
  36. if str then
  37. return str:gsub('[\'\\\n\t\r]', {
  38. ['\''] = [[\']],
  39. ['\\'] = [[\\]],
  40. ['\n'] = [[\n]],
  41. ['\t'] = [[\t]],
  42. ['\r'] = [[\r]],
  43. })
  44. end
  45. return ''
  46. end
  47. -- Converts an array to a string suitable for clickhouse
  48. local function array_to_string(ar)
  49. for i,elt in ipairs(ar) do
  50. local t = type(elt)
  51. if t == 'string' then
  52. ar[i] = string.format('\'%s\'', clickhouse_quote(elt))
  53. elseif t == 'userdata' then
  54. ar[i] = string.format('\'%s\'', clickhouse_quote(tostring(elt)))
  55. elseif t == 'number' then
  56. ar[i] = ch_number(elt)
  57. end
  58. end
  59. return table.concat(ar, ',')
  60. end
  61. -- Converts a row into TSV, taking extra care about arrays
  62. local function row_to_tsv(row)
  63. for i,elt in ipairs(row) do
  64. local t = type(elt)
  65. if t == 'table' then
  66. row[i] = '[' .. array_to_string(elt) .. ']'
  67. elseif t == 'number' then
  68. row[i] = ch_number(elt)
  69. elseif t == 'userdata' then
  70. row[i] = clickhouse_quote(tostring(elt))
  71. else
  72. row[i] = clickhouse_quote(elt)
  73. end
  74. end
  75. return rspamd_text.fromtable(row, '\t')
  76. end
  77. exports.row_to_tsv = row_to_tsv
  78. -- Parses JSONEachRow reply from CH
  79. local function parse_clickhouse_response_json_eachrow(params, data, row_cb)
  80. local ucl = require "ucl"
  81. if data == nil then
  82. -- clickhouse returned no data (i.e. empty result set): exiting
  83. return {}
  84. end
  85. local function parse_string(s)
  86. local parser = ucl.parser()
  87. local res, err
  88. if type(s) == 'string' then
  89. res,err = parser:parse_string(s)
  90. else
  91. res,err = parser:parse_text(s)
  92. end
  93. if not res then
  94. rspamd_logger.errx(params.log_obj, 'Parser error: %s', err)
  95. return nil
  96. end
  97. return parser:get_object()
  98. end
  99. -- iterate over rows and parse
  100. local parsed_rows = {}
  101. for plain_row in data:lines() do
  102. if plain_row and #plain_row > 1 then
  103. local parsed_row = parse_string(plain_row)
  104. if parsed_row then
  105. if row_cb then
  106. row_cb(parsed_row)
  107. else
  108. table.insert(parsed_rows, parsed_row)
  109. end
  110. end
  111. end
  112. end
  113. return parsed_rows
  114. end
  115. -- Parses JSON reply from CH
  116. local function parse_clickhouse_response_json(params, data)
  117. local ucl = require "ucl"
  118. if data == nil then
  119. -- clickhouse returned no data (i.e. empty result set) considered valid!
  120. return nil, {}
  121. end
  122. local function parse_string(s)
  123. local parser = ucl.parser()
  124. local res, err
  125. if type(s) == 'string' then
  126. res,err = parser:parse_string(s)
  127. else
  128. res,err = parser:parse_text(s)
  129. end
  130. if not res then
  131. rspamd_logger.errx(params.log_obj, 'Parser error: %s', err)
  132. return nil
  133. end
  134. return parser:get_object()
  135. end
  136. local json = parse_string(data)
  137. if not json then
  138. return 'bad json', {}
  139. end
  140. return nil,json
  141. end
  142. -- Helper to generate HTTP closure
  143. local function mk_http_select_cb(upstream, params, ok_cb, fail_cb, row_cb)
  144. local function http_cb(err_message, code, data, _)
  145. if code ~= 200 or err_message then
  146. if not err_message then err_message = data end
  147. local ip_addr = upstream:get_addr():to_string(true)
  148. if fail_cb then
  149. fail_cb(params, err_message, data)
  150. else
  151. rspamd_logger.errx(params.log_obj,
  152. "request failed on clickhouse server %s: %s",
  153. ip_addr, err_message)
  154. end
  155. upstream:fail()
  156. else
  157. upstream:ok()
  158. local rows = parse_clickhouse_response_json_eachrow(params, data, row_cb)
  159. if rows then
  160. if ok_cb then
  161. ok_cb(params, rows)
  162. else
  163. lua_util.debugm(N, params.log_obj,
  164. "http_select_cb ok: %s, %s, %s, %s", err_message, code,
  165. data:gsub('[\n%s]+', ' '), _)
  166. end
  167. else
  168. if fail_cb then
  169. fail_cb(params, 'failed to parse reply', data)
  170. else
  171. local ip_addr = upstream:get_addr():to_string(true)
  172. rspamd_logger.errx(params.log_obj,
  173. "request failed on clickhouse server %s: %s",
  174. ip_addr, 'failed to parse reply')
  175. end
  176. end
  177. end
  178. end
  179. return http_cb
  180. end
  181. -- Helper to generate HTTP closure
  182. local function mk_http_insert_cb(upstream, params, ok_cb, fail_cb)
  183. local function http_cb(err_message, code, data, _)
  184. if code ~= 200 or err_message then
  185. if not err_message then err_message = data end
  186. local ip_addr = upstream:get_addr():to_string(true)
  187. if fail_cb then
  188. fail_cb(params, err_message, data)
  189. else
  190. rspamd_logger.errx(params.log_obj,
  191. "request failed on clickhouse server %s: %s",
  192. ip_addr, err_message)
  193. end
  194. upstream:fail()
  195. else
  196. upstream:ok()
  197. if ok_cb then
  198. local err,parsed = parse_clickhouse_response_json(data)
  199. if err then
  200. fail_cb(params, err, data)
  201. else
  202. ok_cb(params, parsed)
  203. end
  204. else
  205. lua_util.debugm(N, params.log_obj,
  206. "http_insert_cb ok: %s, %s, %s, %s", err_message, code,
  207. data:gsub('[\n%s]+', ' '), _)
  208. end
  209. end
  210. end
  211. return http_cb
  212. end
  213. --[[[
  214. -- @function lua_clickhouse.select(upstream, settings, params, query,
  215. ok_cb, fail_cb)
  216. -- Make select request to clickhouse
  217. -- @param {upstream} upstream clickhouse server upstream
  218. -- @param {table} settings global settings table:
  219. -- * use_gsip: use gzip compression
  220. -- * timeout: request timeout
  221. -- * no_ssl_verify: skip SSL verification
  222. -- * user: HTTP user
  223. -- * password: HTTP password
  224. -- @param {params} HTTP request params
  225. -- @param {string} query select query (passed in HTTP body)
  226. -- @param {function} ok_cb callback to be called in case of success
  227. -- @param {function} fail_cb callback to be called in case of some error
  228. -- @param {function} row_cb optional callback to be called on each parsed data row (instead of table insertion)
  229. -- @return {boolean} whether a connection was successful
  230. -- @example
  231. --
  232. --]]
  233. exports.select = function (upstream, settings, params, query, ok_cb, fail_cb, row_cb)
  234. local http_params = {}
  235. for k,v in pairs(params) do http_params[k] = v end
  236. http_params.callback = mk_http_select_cb(upstream, http_params, ok_cb, fail_cb, row_cb)
  237. http_params.gzip = settings.use_gzip
  238. http_params.mime_type = 'text/plain'
  239. http_params.timeout = settings.timeout or default_timeout
  240. http_params.no_ssl_verify = settings.no_ssl_verify
  241. http_params.user = settings.user
  242. http_params.password = settings.password
  243. http_params.body = query
  244. http_params.log_obj = params.task or params.config
  245. http_params.opaque_body = true
  246. lua_util.debugm(N, http_params.log_obj, "clickhouse select request: %s", http_params.body)
  247. if not http_params.url then
  248. local connect_prefix = "http://"
  249. if settings.use_https then
  250. connect_prefix = 'https://'
  251. end
  252. local ip_addr = upstream:get_addr():to_string(true)
  253. local database = settings.database or 'default'
  254. http_params.url = string.format('%s%s/?database=%s&default_format=JSONEachRow',
  255. connect_prefix, ip_addr, escape_spaces(database))
  256. end
  257. return rspamd_http.request(http_params)
  258. end
  259. --[[[
  260. -- @function lua_clickhouse.select_sync(upstream, settings, params, query,
  261. ok_cb, fail_cb, row_cb)
  262. -- Make select request to clickhouse
  263. -- @param {upstream} upstream clickhouse server upstream
  264. -- @param {table} settings global settings table:
  265. -- * use_gsip: use gzip compression
  266. -- * timeout: request timeout
  267. -- * no_ssl_verify: skip SSL verification
  268. -- * user: HTTP user
  269. -- * password: HTTP password
  270. -- @param {params} HTTP request params
  271. -- @param {string} query select query (passed in HTTP body)
  272. -- @param {function} ok_cb callback to be called in case of success
  273. -- @param {function} fail_cb callback to be called in case of some error
  274. -- @param {function} row_cb optional callback to be called on each parsed data row (instead of table insertion)
  275. -- @return
  276. -- {string} error message if exists
  277. -- nil | {rows} | {http_response}
  278. -- @example
  279. --
  280. --]]
  281. exports.select_sync = function (upstream, settings, params, query, row_cb)
  282. local http_params = {}
  283. for k,v in pairs(params) do http_params[k] = v end
  284. http_params.gzip = settings.use_gzip
  285. http_params.mime_type = 'text/plain'
  286. http_params.timeout = settings.timeout or default_timeout
  287. http_params.no_ssl_verify = settings.no_ssl_verify
  288. http_params.user = settings.user
  289. http_params.password = settings.password
  290. http_params.body = query
  291. http_params.log_obj = params.task or params.config
  292. http_params.opaque_body = true
  293. lua_util.debugm(N, http_params.log_obj, "clickhouse select request: %s", http_params.body)
  294. if not http_params.url then
  295. local connect_prefix = "http://"
  296. if settings.use_https then
  297. connect_prefix = 'https://'
  298. end
  299. local ip_addr = upstream:get_addr():to_string(true)
  300. local database = settings.database or 'default'
  301. http_params.url = string.format('%s%s/?database=%s&default_format=JSONEachRow',
  302. connect_prefix, ip_addr, escape_spaces(database))
  303. end
  304. local err, response = rspamd_http.request(http_params)
  305. if err then
  306. return err, nil
  307. elseif response.code ~= 200 then
  308. return response.content, response
  309. else
  310. lua_util.debugm(N, http_params.log_obj, "clickhouse select response: %1", response)
  311. local rows = parse_clickhouse_response_json_eachrow(params, response.content, row_cb)
  312. return nil, rows
  313. end
  314. end
  315. --[[[
  316. -- @function lua_clickhouse.insert(upstream, settings, params, query, rows,
  317. ok_cb, fail_cb)
  318. -- Insert data rows to clickhouse
  319. -- @param {upstream} upstream clickhouse server upstream
  320. -- @param {table} settings global settings table:
  321. -- * use_gsip: use gzip compression
  322. -- * timeout: request timeout
  323. -- * no_ssl_verify: skip SSL verification
  324. -- * user: HTTP user
  325. -- * password: HTTP password
  326. -- @param {params} HTTP request params
  327. -- @param {string} query select query (passed in `query` request element with spaces escaped)
  328. -- @param {table|mixed} rows mix of strings, numbers or tables (for arrays)
  329. -- @param {function} ok_cb callback to be called in case of success
  330. -- @param {function} fail_cb callback to be called in case of some error
  331. -- @return {boolean} whether a connection was successful
  332. -- @example
  333. --
  334. --]]
  335. exports.insert = function (upstream, settings, params, query, rows,
  336. ok_cb, fail_cb)
  337. local http_params = {}
  338. for k,v in pairs(params) do http_params[k] = v end
  339. http_params.callback = mk_http_insert_cb(upstream, http_params, ok_cb, fail_cb)
  340. http_params.gzip = settings.use_gzip
  341. http_params.mime_type = 'text/plain'
  342. http_params.timeout = settings.timeout or default_timeout
  343. http_params.no_ssl_verify = settings.no_ssl_verify
  344. http_params.user = settings.user
  345. http_params.password = settings.password
  346. http_params.method = 'POST'
  347. http_params.body = {rspamd_text.fromtable(rows, '\n'), '\n'}
  348. http_params.log_obj = params.task or params.config
  349. if not http_params.url then
  350. local connect_prefix = "http://"
  351. if settings.use_https then
  352. connect_prefix = 'https://'
  353. end
  354. local ip_addr = upstream:get_addr():to_string(true)
  355. local database = settings.database or 'default'
  356. http_params.url = string.format('%s%s/?database=%s&query=%s%%20FORMAT%%20TabSeparated',
  357. connect_prefix,
  358. ip_addr,
  359. escape_spaces(database),
  360. escape_spaces(query))
  361. end
  362. return rspamd_http.request(http_params)
  363. end
  364. --[[[
  365. -- @function lua_clickhouse.generic(upstream, settings, params, query,
  366. ok_cb, fail_cb)
  367. -- Make a generic request to Clickhouse (e.g. alter)
  368. -- @param {upstream} upstream clickhouse server upstream
  369. -- @param {table} settings global settings table:
  370. -- * use_gsip: use gzip compression
  371. -- * timeout: request timeout
  372. -- * no_ssl_verify: skip SSL verification
  373. -- * user: HTTP user
  374. -- * password: HTTP password
  375. -- @param {params} HTTP request params
  376. -- @param {string} query Clickhouse query (passed in `query` request element with spaces escaped)
  377. -- @param {function} ok_cb callback to be called in case of success
  378. -- @param {function} fail_cb callback to be called in case of some error
  379. -- @return {boolean} whether a connection was successful
  380. -- @example
  381. --
  382. --]]
  383. exports.generic = function (upstream, settings, params, query,
  384. ok_cb, fail_cb)
  385. local http_params = {}
  386. for k,v in pairs(params) do http_params[k] = v end
  387. http_params.callback = mk_http_insert_cb(upstream, http_params, ok_cb, fail_cb)
  388. http_params.gzip = settings.use_gzip
  389. http_params.mime_type = 'text/plain'
  390. http_params.timeout = settings.timeout or default_timeout
  391. http_params.no_ssl_verify = settings.no_ssl_verify
  392. http_params.user = settings.user
  393. http_params.password = settings.password
  394. http_params.log_obj = params.task or params.config
  395. http_params.body = query
  396. if not http_params.url then
  397. local connect_prefix = "http://"
  398. if settings.use_https then
  399. connect_prefix = 'https://'
  400. end
  401. local ip_addr = upstream:get_addr():to_string(true)
  402. local database = settings.database or 'default'
  403. http_params.url = string.format('%s%s/?database=%s&default_format=JSONEachRow',
  404. connect_prefix, ip_addr, escape_spaces(database))
  405. end
  406. return rspamd_http.request(http_params)
  407. end
  408. --[[[
  409. -- @function lua_clickhouse.generic_sync(upstream, settings, params, query,
  410. ok_cb, fail_cb)
  411. -- Make a generic request to Clickhouse (e.g. alter)
  412. -- @param {upstream} upstream clickhouse server upstream
  413. -- @param {table} settings global settings table:
  414. -- * use_gsip: use gzip compression
  415. -- * timeout: request timeout
  416. -- * no_ssl_verify: skip SSL verification
  417. -- * user: HTTP user
  418. -- * password: HTTP password
  419. -- @param {params} HTTP request params
  420. -- @param {string} query Clickhouse query (passed in `query` request element with spaces escaped)
  421. -- @return {boolean} whether a connection was successful
  422. -- @example
  423. --
  424. --]]
  425. exports.generic_sync = function (upstream, settings, params, query)
  426. local http_params = {}
  427. for k,v in pairs(params) do http_params[k] = v end
  428. http_params.gzip = settings.use_gzip
  429. http_params.mime_type = 'text/plain'
  430. http_params.timeout = settings.timeout or default_timeout
  431. http_params.no_ssl_verify = settings.no_ssl_verify
  432. http_params.user = settings.user
  433. http_params.password = settings.password
  434. http_params.log_obj = params.task or params.config
  435. http_params.body = query
  436. if not http_params.url then
  437. local connect_prefix = "http://"
  438. if settings.use_https then
  439. connect_prefix = 'https://'
  440. end
  441. local ip_addr = upstream:get_addr():to_string(true)
  442. local database = settings.database or 'default'
  443. http_params.url = string.format('%s%s/?database=%s&default_format=JSON',
  444. connect_prefix, ip_addr, escape_spaces(database))
  445. end
  446. local err, response = rspamd_http.request(http_params)
  447. if err then
  448. return err, nil
  449. elseif response.code ~= 200 then
  450. return response.content, response
  451. else
  452. lua_util.debugm(N, http_params.log_obj, "clickhouse generic response: %1", response)
  453. local e,obj = parse_clickhouse_response_json(params, response.content)
  454. if e then
  455. return e,nil
  456. end
  457. return nil, obj
  458. end
  459. end
  460. return exports