You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

extractors.lua 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. --[[
  2. Copyright (c) 2019, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. local fun = require 'fun'
  14. local meta_functions = require "lua_meta"
  15. local lua_util = require "lua_util"
  16. local common = require "lua_selectors/common"
  17. local ts = require("tableshape").types
  18. local E = {}
  19. local url_flags_ts = ts.array_of(ts.one_of{
  20. 'content',
  21. 'has_port',
  22. 'has_user',
  23. 'host_encoded',
  24. 'html_displayed',
  25. 'idn',
  26. 'image',
  27. 'missing_slahes', -- sic
  28. 'no_tld',
  29. 'numeric',
  30. 'obscured',
  31. 'path_encoded',
  32. 'phished',
  33. 'query',
  34. 'query_encoded',
  35. 'redirected',
  36. 'schema_encoded',
  37. 'schemaless',
  38. 'subject',
  39. 'text',
  40. 'unnormalised',
  41. 'url_displayed',
  42. 'zw_spaces',
  43. }):is_optional()
  44. local function gen_exclude_flags_filter(exclude_flags)
  45. return function(u)
  46. local got_flags = u:get_flags()
  47. for _, flag in ipairs(exclude_flags) do
  48. if got_flags[flag] then return false end
  49. end
  50. return true
  51. end
  52. end
  53. local extractors = {
  54. -- Plain id function
  55. ['id'] = {
  56. ['get_value'] = function(_, args)
  57. if args[1] then
  58. return args[1], 'string'
  59. end
  60. return '','string'
  61. end,
  62. ['description'] = [[Return value from function's argument or an empty string,
  63. For example, `id('Something')` returns a string 'Something']],
  64. ['args_schema'] = {ts.string:is_optional()}
  65. },
  66. -- Similar but for making lists
  67. ['list'] = {
  68. ['get_value'] = function(_, args)
  69. if args[1] then
  70. return fun.map(tostring, args), 'string_list'
  71. end
  72. return {},'string_list'
  73. end,
  74. ['description'] = [[Return a list from function's arguments or an empty list,
  75. For example, `list('foo', 'bar')` returns a list {'foo', 'bar'}]],
  76. },
  77. -- Get source IP address
  78. ['ip'] = {
  79. ['get_value'] = function(task)
  80. local ip = task:get_ip()
  81. if ip and ip:is_valid() then return ip,'userdata' end
  82. return nil
  83. end,
  84. ['description'] = [[Get source IP address]],
  85. },
  86. -- Get MIME from
  87. ['from'] = {
  88. ['get_value'] = function(task, args)
  89. local from
  90. if type(args) == 'table' then
  91. from = task:get_from(args)
  92. else
  93. from = task:get_from(0)
  94. end
  95. if ((from or E)[1] or E).addr then
  96. return from[1],'table'
  97. end
  98. return nil
  99. end,
  100. ['description'] = [[Get MIME or SMTP from (e.g. `from('smtp')` or `from('mime')`,
  101. uses any type by default)]],
  102. },
  103. ['rcpts'] = {
  104. ['get_value'] = function(task, args)
  105. local rcpts
  106. if type(args) == 'table' then
  107. rcpts = task:get_recipients(args)
  108. else
  109. rcpts = task:get_recipients(0)
  110. end
  111. if ((rcpts or E)[1] or E).addr then
  112. return rcpts,'table_list'
  113. end
  114. return nil
  115. end,
  116. ['description'] = [[Get MIME or SMTP rcpts (e.g. `rcpts('smtp')` or `rcpts('mime')`,
  117. uses any type by default)]],
  118. },
  119. -- Get country (ASN module must be executed first)
  120. ['country'] = {
  121. ['get_value'] = function(task)
  122. local country = task:get_mempool():get_variable('country')
  123. if not country then
  124. return nil
  125. else
  126. return country,'string'
  127. end
  128. end,
  129. ['description'] = [[Get country (ASN module must be executed first)]],
  130. },
  131. -- Get ASN number
  132. ['asn'] = {
  133. ['type'] = 'string',
  134. ['get_value'] = function(task)
  135. local asn = task:get_mempool():get_variable('asn')
  136. if not asn then
  137. return nil
  138. else
  139. return asn,'string'
  140. end
  141. end,
  142. ['description'] = [[Get AS number (ASN module must be executed first)]],
  143. },
  144. -- Get authenticated username
  145. ['user'] = {
  146. ['get_value'] = function(task)
  147. local auser = task:get_user()
  148. if not auser then
  149. return nil
  150. else
  151. return auser,'string'
  152. end
  153. end,
  154. ['description'] = 'Get authenticated user name',
  155. },
  156. -- Get principal recipient
  157. ['to'] = {
  158. ['get_value'] = function(task)
  159. return task:get_principal_recipient(),'string'
  160. end,
  161. ['description'] = 'Get principal recipient',
  162. },
  163. -- Get content digest
  164. ['digest'] = {
  165. ['get_value'] = function(task)
  166. return task:get_digest(),'string'
  167. end,
  168. ['description'] = 'Get content digest',
  169. },
  170. -- Get list of all attachments digests
  171. ['attachments'] = {
  172. ['get_value'] = function(task, args)
  173. local parts = task:get_parts() or E
  174. local digests = {}
  175. for i,p in ipairs(parts) do
  176. if p:is_attachment() then
  177. table.insert(digests, common.get_cached_or_raw_digest(task, i, p, args))
  178. end
  179. end
  180. if #digests > 0 then
  181. return digests,'string_list'
  182. end
  183. return nil
  184. end,
  185. ['description'] = [[Get list of all attachments digests.
  186. The first optional argument is encoding (`hex`, `base32` (and forms `bleach32`, `rbase32`), `base64`),
  187. the second optional argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
  188. ['args_schema'] = common.digest_schema()
  189. },
  190. -- Get all attachments files
  191. ['files'] = {
  192. ['get_value'] = function(task)
  193. local parts = task:get_parts() or E
  194. local files = {}
  195. for _,p in ipairs(parts) do
  196. local fname = p:get_filename()
  197. if fname then
  198. table.insert(files, fname)
  199. end
  200. end
  201. if #files > 0 then
  202. return files,'string_list'
  203. end
  204. return nil
  205. end,
  206. ['description'] = 'Get all attachments files',
  207. },
  208. -- Get languages for text parts
  209. ['languages'] = {
  210. ['get_value'] = function(task)
  211. local text_parts = task:get_text_parts() or E
  212. local languages = {}
  213. for _,p in ipairs(text_parts) do
  214. local lang = p:get_language()
  215. if lang then
  216. table.insert(languages, lang)
  217. end
  218. end
  219. if #languages > 0 then
  220. return languages,'string_list'
  221. end
  222. return nil
  223. end,
  224. ['description'] = 'Get languages for text parts',
  225. },
  226. -- Get helo value
  227. ['helo'] = {
  228. ['get_value'] = function(task)
  229. return task:get_helo(),'string'
  230. end,
  231. ['description'] = 'Get helo value',
  232. },
  233. -- Get header with the name that is expected as an argument. Returns list of
  234. -- headers with this name
  235. ['header'] = {
  236. ['get_value'] = function(task, args)
  237. local strong = false
  238. if args[2] then
  239. if args[2]:match('strong') then
  240. strong = true
  241. end
  242. if args[2]:match('full') then
  243. return task:get_header_full(args[1], strong),'table_list'
  244. end
  245. return task:get_header(args[1], strong),'string'
  246. else
  247. return task:get_header(args[1]),'string'
  248. end
  249. end,
  250. ['description'] = [[Get header with the name that is expected as an argument.
  251. The optional second argument accepts list of flags:
  252. - `full`: returns all headers with this name with all data (like task:get_header_full())
  253. - `strong`: use case sensitive match when matching header's name]],
  254. ['args_schema'] = {ts.string,
  255. (ts.pattern("strong") + ts.pattern("full")):is_optional()}
  256. },
  257. -- Get list of received headers (returns list of tables)
  258. ['received'] = {
  259. ['get_value'] = function(task, args)
  260. local rh = task:get_received_headers()
  261. if not rh[1] then
  262. return nil
  263. end
  264. if args[1] then
  265. return fun.map(function(r) return r[args[1]] end, rh), 'string_list'
  266. end
  267. return rh,'table_list'
  268. end,
  269. ['description'] = [[Get list of received headers.
  270. If no arguments specified, returns list of tables. Otherwise, selects a specific element,
  271. e.g. `by_hostname`]],
  272. },
  273. -- Get all urls
  274. ['urls'] = {
  275. ['get_value'] = function(task, args)
  276. local urls = task:get_urls()
  277. if not urls[1] then
  278. return nil
  279. end
  280. if args[1] then
  281. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  282. end
  283. return urls,'userdata_list'
  284. end,
  285. ['description'] = [[Get list of all urls.
  286. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  287. e.g. `get_tld`]],
  288. },
  289. -- Get specific urls
  290. ['specific_urls'] = {
  291. ['get_value'] = function(task, args)
  292. local params = args[1] or {}
  293. params.task = task
  294. params.no_cache = true
  295. if params.exclude_flags then
  296. params.filter = gen_exclude_flags_filter(params.exclude_flags)
  297. end
  298. local urls = lua_util.extract_specific_urls(params)
  299. if not urls[1] then
  300. return nil
  301. end
  302. return urls,'userdata_list'
  303. end,
  304. ['description'] = [[Get most specific urls. Arguments are equal to the Lua API function]],
  305. ['args_schema'] = {ts.shape{
  306. limit = ts.number + ts.string / tonumber,
  307. esld_limit = (ts.number + ts.string / tonumber):is_optional(),
  308. exclude_flags = url_flags_ts,
  309. flags = url_flags_ts,
  310. flags_mode = ts.one_of{'explicit'}:is_optional(),
  311. prefix = ts.string:is_optional(),
  312. need_content = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
  313. need_emails = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
  314. need_images = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
  315. ignore_redirected = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
  316. }}
  317. },
  318. -- Get all emails
  319. ['emails'] = {
  320. ['get_value'] = function(task, args)
  321. local urls = task:get_emails()
  322. if not urls[1] then
  323. return nil
  324. end
  325. if args[1] then
  326. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  327. end
  328. return urls,'userdata_list'
  329. end,
  330. ['description'] = [[Get list of all emails.
  331. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  332. e.g. `get_user`]],
  333. },
  334. -- Get specific pool var. The first argument must be variable name,
  335. -- the second argument is optional and defines the type (string by default)
  336. ['pool_var'] = {
  337. ['get_value'] = function(task, args)
  338. local type = args[2] or 'string'
  339. return task:get_mempool():get_variable(args[1], type),(type)
  340. end,
  341. ['description'] = [[Get specific pool var. The first argument must be variable name,
  342. the second argument is optional and defines the type (string by default)]],
  343. ['args_schema'] = {ts.string, ts.string:is_optional()}
  344. },
  345. -- Get value of specific key from task cache
  346. ['task_cache'] = {
  347. ['get_value'] = function(task, args)
  348. local val = task:cache_get(args[1])
  349. if not val then
  350. return
  351. end
  352. if type(val) == 'table' then
  353. if not val[1] then
  354. return
  355. end
  356. return val, 'string_list'
  357. end
  358. return val, 'string'
  359. end,
  360. ['description'] = [[Get value of specific key from task cache. The first argument must be
  361. the key name]],
  362. ['args_schema'] = {ts.string}
  363. },
  364. -- Get specific HTTP request header. The first argument must be header name.
  365. ['request_header'] = {
  366. ['get_value'] = function(task, args)
  367. local hdr = task:get_request_header(args[1])
  368. if hdr then
  369. return hdr,'string'
  370. end
  371. return nil
  372. end,
  373. ['description'] = [[Get specific HTTP request header.
  374. The first argument must be header name.]],
  375. ['args_schema'] = {ts.string}
  376. },
  377. -- Get task date, optionally formatted
  378. ['time'] = {
  379. ['get_value'] = function(task, args)
  380. local what = args[1] or 'message'
  381. local dt = task:get_date{format = what, gmt = true}
  382. if dt then
  383. if args[2] then
  384. -- Should be in format !xxx, as dt is in GMT
  385. return os.date(args[2], dt),'string'
  386. end
  387. return tostring(dt),'string'
  388. end
  389. return nil
  390. end,
  391. ['description'] = [[Get task timestamp. The first argument is type:
  392. - `connect`: connection timestamp (default)
  393. - `message`: timestamp as defined by `Date` header
  394. The second argument is optional time format, see [os.date](http://pgl.yoyo.org/luai/i/os.date) description]],
  395. ['args_schema'] = {ts.one_of{'connect', 'message'}:is_optional(),
  396. ts.string:is_optional()}
  397. },
  398. -- Get text words from a message
  399. ['words'] = {
  400. ['get_value'] = function(task, args)
  401. local how = args[1] or 'stem'
  402. local tp = task:get_text_parts()
  403. if tp then
  404. local rtype = 'string_list'
  405. if how == 'full' then
  406. rtype = 'table_list'
  407. end
  408. return lua_util.flatten(
  409. fun.map(function(p)
  410. return p:get_words(how)
  411. end, tp)), rtype
  412. end
  413. return nil
  414. end,
  415. ['description'] = [[Get words from text parts
  416. - `stem`: stemmed words (default)
  417. - `raw`: raw words
  418. - `norm`: normalised words (lowercased)
  419. - `full`: list of tables
  420. ]],
  421. ['args_schema'] = { ts.one_of { 'stem', 'raw', 'norm', 'full' }:is_optional()},
  422. },
  423. -- Get queue ID
  424. ['queueid'] = {
  425. ['get_value'] = function(task)
  426. local queueid = task:get_queue_id()
  427. if queueid then return queueid,'string' end
  428. return nil
  429. end,
  430. ['description'] = [[Get queue ID]],
  431. },
  432. -- Get ID of the task being processed
  433. ['uid'] = {
  434. ['get_value'] = function(task)
  435. local uid = task:get_uid()
  436. if uid then return uid,'string' end
  437. return nil
  438. end,
  439. ['description'] = [[Get ID of the task being processed]],
  440. },
  441. -- Get message ID of the task being processed
  442. ['messageid'] = {
  443. ['get_value'] = function(task)
  444. local mid = task:get_message_id()
  445. if mid then return mid,'string' end
  446. return nil
  447. end,
  448. ['description'] = [[Get message ID]],
  449. },
  450. -- Get specific symbol
  451. ['symbol'] = {
  452. ['get_value'] = function(task, args)
  453. local symbol = task:get_symbol(args[1], args[2])
  454. if symbol then
  455. return symbol[1],'table'
  456. end
  457. end,
  458. ['description'] = 'Get specific symbol. The first argument must be the symbol name. ' ..
  459. 'The second argument is an optional shadow result name. ' ..
  460. 'Returns the symbol table. See task:get_symbol()',
  461. ['args_schema'] = {ts.string, ts.string:is_optional()}
  462. },
  463. -- Get full scan result
  464. ['scan_result'] = {
  465. ['get_value'] = function(task, args)
  466. local res = task:get_metric_result(args[1])
  467. if res then
  468. return res,'table'
  469. end
  470. end,
  471. ['description'] = 'Get full scan result (either default or shadow if shadow result name is specified)' ..
  472. 'Returns the result table. See task:get_metric_result()',
  473. ['args_schema'] = {ts.string:is_optional()}
  474. },
  475. -- Get list of metatokens as strings
  476. ['metatokens'] = {
  477. ['get_value'] = function(task)
  478. local tokens = meta_functions.gen_metatokens(task)
  479. if not tokens[1] then
  480. return nil
  481. end
  482. local res = {}
  483. for _, t in ipairs(tokens) do
  484. table.insert(res, tostring(t))
  485. end
  486. return res, 'string_list'
  487. end,
  488. ['description'] = 'Get metatokens for a message as strings',
  489. },
  490. }
  491. return extractors