You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

extractors.lua 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. --[[
  2. Copyright (c) 2019, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. local fun = require 'fun'
  14. local meta_functions = require "lua_meta"
  15. local lua_util = require "lua_util"
  16. local rspamd_url = require "rspamd_url"
  17. local common = require "lua_selectors/common"
  18. local ts = require("tableshape").types
  19. local E = {}
  20. local url_flags_ts = ts.array_of(ts.one_of(lua_util.keys(rspamd_url.flags))):is_optional()
  21. local function gen_exclude_flags_filter(exclude_flags)
  22. return function(u)
  23. local got_flags = u:get_flags()
  24. for _, flag in ipairs(exclude_flags) do
  25. if got_flags[flag] then return false end
  26. end
  27. return true
  28. end
  29. end
  30. local extractors = {
  31. -- Plain id function
  32. ['id'] = {
  33. ['get_value'] = function(_, args)
  34. if args[1] then
  35. return args[1], 'string'
  36. end
  37. return '','string'
  38. end,
  39. ['description'] = [[Return value from function's argument or an empty string,
  40. For example, `id('Something')` returns a string 'Something']],
  41. ['args_schema'] = {ts.string:is_optional()}
  42. },
  43. -- Similar but for making lists
  44. ['list'] = {
  45. ['get_value'] = function(_, args)
  46. if args[1] then
  47. return fun.map(tostring, args), 'string_list'
  48. end
  49. return {},'string_list'
  50. end,
  51. ['description'] = [[Return a list from function's arguments or an empty list,
  52. For example, `list('foo', 'bar')` returns a list {'foo', 'bar'}]],
  53. },
  54. -- Get source IP address
  55. ['ip'] = {
  56. ['get_value'] = function(task)
  57. local ip = task:get_ip()
  58. if ip and ip:is_valid() then return ip,'userdata' end
  59. return nil
  60. end,
  61. ['description'] = [[Get source IP address]],
  62. },
  63. -- Get MIME from
  64. ['from'] = {
  65. ['get_value'] = function(task, args)
  66. local from
  67. if type(args) == 'table' then
  68. from = task:get_from(args)
  69. else
  70. from = task:get_from(0)
  71. end
  72. if ((from or E)[1] or E).addr then
  73. return from[1],'table'
  74. end
  75. return nil
  76. end,
  77. ['description'] = [[Get MIME or SMTP from (e.g. `from('smtp')` or `from('mime')`,
  78. uses any type by default)]],
  79. },
  80. ['rcpts'] = {
  81. ['get_value'] = function(task, args)
  82. local rcpts
  83. if type(args) == 'table' then
  84. rcpts = task:get_recipients(args)
  85. else
  86. rcpts = task:get_recipients(0)
  87. end
  88. if ((rcpts or E)[1] or E).addr then
  89. return rcpts,'table_list'
  90. end
  91. return nil
  92. end,
  93. ['description'] = [[Get MIME or SMTP rcpts (e.g. `rcpts('smtp')` or `rcpts('mime')`,
  94. uses any type by default)]],
  95. },
  96. -- Get country (ASN module must be executed first)
  97. ['country'] = {
  98. ['get_value'] = function(task)
  99. local country = task:get_mempool():get_variable('country')
  100. if not country then
  101. return nil
  102. else
  103. return country,'string'
  104. end
  105. end,
  106. ['description'] = [[Get country (ASN module must be executed first)]],
  107. },
  108. -- Get ASN number
  109. ['asn'] = {
  110. ['type'] = 'string',
  111. ['get_value'] = function(task)
  112. local asn = task:get_mempool():get_variable('asn')
  113. if not asn then
  114. return nil
  115. else
  116. return asn,'string'
  117. end
  118. end,
  119. ['description'] = [[Get AS number (ASN module must be executed first)]],
  120. },
  121. -- Get authenticated username
  122. ['user'] = {
  123. ['get_value'] = function(task)
  124. local auser = task:get_user()
  125. if not auser then
  126. return nil
  127. else
  128. return auser,'string'
  129. end
  130. end,
  131. ['description'] = 'Get authenticated user name',
  132. },
  133. -- Get principal recipient
  134. ['to'] = {
  135. ['get_value'] = function(task)
  136. return task:get_principal_recipient(),'string'
  137. end,
  138. ['description'] = 'Get principal recipient',
  139. },
  140. -- Get content digest
  141. ['digest'] = {
  142. ['get_value'] = function(task)
  143. return task:get_digest(),'string'
  144. end,
  145. ['description'] = 'Get content digest',
  146. },
  147. -- Get list of all attachments digests
  148. ['attachments'] = {
  149. ['get_value'] = function(task, args)
  150. local parts = task:get_parts() or E
  151. local digests = {}
  152. for i,p in ipairs(parts) do
  153. if p:is_attachment() then
  154. table.insert(digests, common.get_cached_or_raw_digest(task, i, p, args))
  155. end
  156. end
  157. if #digests > 0 then
  158. return digests,'string_list'
  159. end
  160. return nil
  161. end,
  162. ['description'] = [[Get list of all attachments digests.
  163. The first optional argument is encoding (`hex`, `base32` (and forms `bleach32`, `rbase32`), `base64`),
  164. the second optional argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
  165. ['args_schema'] = common.digest_schema()
  166. },
  167. -- Get all attachments files
  168. ['files'] = {
  169. ['get_value'] = function(task)
  170. local parts = task:get_parts() or E
  171. local files = {}
  172. for _,p in ipairs(parts) do
  173. local fname = p:get_filename()
  174. if fname then
  175. table.insert(files, fname)
  176. end
  177. end
  178. if #files > 0 then
  179. return files,'string_list'
  180. end
  181. return nil
  182. end,
  183. ['description'] = 'Get all attachments files',
  184. },
  185. -- Get languages for text parts
  186. ['languages'] = {
  187. ['get_value'] = function(task)
  188. local text_parts = task:get_text_parts() or E
  189. local languages = {}
  190. for _,p in ipairs(text_parts) do
  191. local lang = p:get_language()
  192. if lang then
  193. table.insert(languages, lang)
  194. end
  195. end
  196. if #languages > 0 then
  197. return languages,'string_list'
  198. end
  199. return nil
  200. end,
  201. ['description'] = 'Get languages for text parts',
  202. },
  203. -- Get helo value
  204. ['helo'] = {
  205. ['get_value'] = function(task)
  206. return task:get_helo(),'string'
  207. end,
  208. ['description'] = 'Get helo value',
  209. },
  210. -- Get header with the name that is expected as an argument. Returns list of
  211. -- headers with this name
  212. ['header'] = {
  213. ['get_value'] = function(task, args)
  214. local strong = false
  215. if args[2] then
  216. if args[2]:match('strong') then
  217. strong = true
  218. end
  219. if args[2]:match('full') then
  220. return task:get_header_full(args[1], strong),'table_list'
  221. end
  222. return task:get_header(args[1], strong),'string'
  223. else
  224. return task:get_header(args[1]),'string'
  225. end
  226. end,
  227. ['description'] = [[Get header with the name that is expected as an argument.
  228. The optional second argument accepts list of flags:
  229. - `full`: returns all headers with this name with all data (like task:get_header_full())
  230. - `strong`: use case sensitive match when matching header's name]],
  231. ['args_schema'] = {ts.string,
  232. (ts.pattern("strong") + ts.pattern("full")):is_optional()}
  233. },
  234. -- Get list of received headers (returns list of tables)
  235. ['received'] = {
  236. ['get_value'] = function(task, args)
  237. local rh = task:get_received_headers()
  238. if not rh[1] then
  239. return nil
  240. end
  241. if args[1] then
  242. return fun.map(function(r) return r[args[1]] end, rh), 'string_list'
  243. end
  244. return rh,'table_list'
  245. end,
  246. ['description'] = [[Get list of received headers.
  247. If no arguments specified, returns list of tables. Otherwise, selects a specific element,
  248. e.g. `by_hostname`]],
  249. },
  250. -- Get all urls
  251. ['urls'] = {
  252. ['get_value'] = function(task, args)
  253. local urls = task:get_urls()
  254. if not urls[1] then
  255. return nil
  256. end
  257. if args[1] then
  258. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  259. end
  260. return urls,'userdata_list'
  261. end,
  262. ['description'] = [[Get list of all urls.
  263. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  264. e.g. `get_tld`]],
  265. },
  266. -- Get specific urls
  267. ['specific_urls'] = {
  268. ['get_value'] = function(task, args)
  269. local params = args[1] or {}
  270. params.task = task
  271. params.no_cache = true
  272. if params.exclude_flags then
  273. params.filter = gen_exclude_flags_filter(params.exclude_flags)
  274. end
  275. local urls = lua_util.extract_specific_urls(params)
  276. if not urls[1] then
  277. return nil
  278. end
  279. return urls,'userdata_list'
  280. end,
  281. ['description'] = [[Get most specific urls. Arguments are equal to the Lua API function]],
  282. ['args_schema'] = {ts.shape{
  283. limit = ts.number + ts.string / tonumber,
  284. esld_limit = (ts.number + ts.string / tonumber):is_optional(),
  285. exclude_flags = url_flags_ts,
  286. flags = url_flags_ts,
  287. flags_mode = ts.one_of{'explicit'}:is_optional(),
  288. prefix = ts.string:is_optional(),
  289. need_content = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
  290. need_emails = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
  291. need_images = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
  292. ignore_redirected = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
  293. }}
  294. },
  295. -- URLs filtered by flags
  296. ['urls_filtered'] = {
  297. ['get_value'] = function(task, args)
  298. local urls = task:get_urls_filtered(args[1], args[2])
  299. if not urls[1] then
  300. return nil
  301. end
  302. return urls,'userdata_list'
  303. end,
  304. ['description'] = [[Get list of all urls filtered by flags_include/exclude
  305. (see rspamd_task:get_urls_filtered for description)]],
  306. ['args_schema'] = {ts.array_of{
  307. url_flags_ts:is_optional(), url_flags_ts:is_optional()
  308. }}
  309. },
  310. -- Get all emails
  311. ['emails'] = {
  312. ['get_value'] = function(task, args)
  313. local urls = task:get_emails()
  314. if not urls[1] then
  315. return nil
  316. end
  317. if args[1] then
  318. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  319. end
  320. return urls,'userdata_list'
  321. end,
  322. ['description'] = [[Get list of all emails.
  323. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  324. e.g. `get_user`]],
  325. },
  326. -- Get specific pool var. The first argument must be variable name,
  327. -- the second argument is optional and defines the type (string by default)
  328. ['pool_var'] = {
  329. ['get_value'] = function(task, args)
  330. local type = args[2] or 'string'
  331. return task:get_mempool():get_variable(args[1], type),(type)
  332. end,
  333. ['description'] = [[Get specific pool var. The first argument must be variable name,
  334. the second argument is optional and defines the type (string by default)]],
  335. ['args_schema'] = {ts.string, ts.string:is_optional()}
  336. },
  337. -- Get value of specific key from task cache
  338. ['task_cache'] = {
  339. ['get_value'] = function(task, args)
  340. local val = task:cache_get(args[1])
  341. if not val then
  342. return
  343. end
  344. if type(val) == 'table' then
  345. if not val[1] then
  346. return
  347. end
  348. return val, 'string_list'
  349. end
  350. return val, 'string'
  351. end,
  352. ['description'] = [[Get value of specific key from task cache. The first argument must be
  353. the key name]],
  354. ['args_schema'] = {ts.string}
  355. },
  356. -- Get specific HTTP request header. The first argument must be header name.
  357. ['request_header'] = {
  358. ['get_value'] = function(task, args)
  359. local hdr = task:get_request_header(args[1])
  360. if hdr then
  361. return hdr,'string'
  362. end
  363. return nil
  364. end,
  365. ['description'] = [[Get specific HTTP request header.
  366. The first argument must be header name.]],
  367. ['args_schema'] = {ts.string}
  368. },
  369. -- Get task date, optionally formatted
  370. ['time'] = {
  371. ['get_value'] = function(task, args)
  372. local what = args[1] or 'message'
  373. local dt = task:get_date{format = what, gmt = true}
  374. if dt then
  375. if args[2] then
  376. -- Should be in format !xxx, as dt is in GMT
  377. return os.date(args[2], dt),'string'
  378. end
  379. return tostring(dt),'string'
  380. end
  381. return nil
  382. end,
  383. ['description'] = [[Get task timestamp. The first argument is type:
  384. - `connect`: connection timestamp (default)
  385. - `message`: timestamp as defined by `Date` header
  386. The second argument is optional time format, see [os.date](http://pgl.yoyo.org/luai/i/os.date) description]],
  387. ['args_schema'] = {ts.one_of{'connect', 'message'}:is_optional(),
  388. ts.string:is_optional()}
  389. },
  390. -- Get text words from a message
  391. ['words'] = {
  392. ['get_value'] = function(task, args)
  393. local how = args[1] or 'stem'
  394. local tp = task:get_text_parts()
  395. if tp then
  396. local rtype = 'string_list'
  397. if how == 'full' then
  398. rtype = 'table_list'
  399. end
  400. return lua_util.flatten(
  401. fun.map(function(p)
  402. return p:get_words(how)
  403. end, tp)), rtype
  404. end
  405. return nil
  406. end,
  407. ['description'] = [[Get words from text parts
  408. - `stem`: stemmed words (default)
  409. - `raw`: raw words
  410. - `norm`: normalised words (lowercased)
  411. - `full`: list of tables
  412. ]],
  413. ['args_schema'] = { ts.one_of { 'stem', 'raw', 'norm', 'full' }:is_optional()},
  414. },
  415. -- Get queue ID
  416. ['queueid'] = {
  417. ['get_value'] = function(task)
  418. local queueid = task:get_queue_id()
  419. if queueid then return queueid,'string' end
  420. return nil
  421. end,
  422. ['description'] = [[Get queue ID]],
  423. },
  424. -- Get ID of the task being processed
  425. ['uid'] = {
  426. ['get_value'] = function(task)
  427. local uid = task:get_uid()
  428. if uid then return uid,'string' end
  429. return nil
  430. end,
  431. ['description'] = [[Get ID of the task being processed]],
  432. },
  433. -- Get message ID of the task being processed
  434. ['messageid'] = {
  435. ['get_value'] = function(task)
  436. local mid = task:get_message_id()
  437. if mid then return mid,'string' end
  438. return nil
  439. end,
  440. ['description'] = [[Get message ID]],
  441. },
  442. -- Get specific symbol
  443. ['symbol'] = {
  444. ['get_value'] = function(task, args)
  445. local symbol = task:get_symbol(args[1], args[2])
  446. if symbol then
  447. return symbol[1],'table'
  448. end
  449. end,
  450. ['description'] = 'Get specific symbol. The first argument must be the symbol name. ' ..
  451. 'The second argument is an optional shadow result name. ' ..
  452. 'Returns the symbol table. See task:get_symbol()',
  453. ['args_schema'] = {ts.string, ts.string:is_optional()}
  454. },
  455. -- Get full scan result
  456. ['scan_result'] = {
  457. ['get_value'] = function(task, args)
  458. local res = task:get_metric_result(args[1])
  459. if res then
  460. return res,'table'
  461. end
  462. end,
  463. ['description'] = 'Get full scan result (either default or shadow if shadow result name is specified)' ..
  464. 'Returns the result table. See task:get_metric_result()',
  465. ['args_schema'] = {ts.string:is_optional()}
  466. },
  467. -- Get list of metatokens as strings
  468. ['metatokens'] = {
  469. ['get_value'] = function(task)
  470. local tokens = meta_functions.gen_metatokens(task)
  471. if not tokens[1] then
  472. return nil
  473. end
  474. local res = {}
  475. for _, t in ipairs(tokens) do
  476. table.insert(res, tostring(t))
  477. end
  478. return res, 'string_list'
  479. end,
  480. ['description'] = 'Get metatokens for a message as strings',
  481. },
  482. }
  483. return extractors