You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

extractors.lua 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. --[[
  2. Copyright (c) 2019, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. local fun = require 'fun'
  14. local lua_util = require "lua_util"
  15. local ts = require("tableshape").types
  16. local E = {}
  17. local extractors = {
  18. -- Plain id function
  19. ['id'] = {
  20. ['get_value'] = function(_, args)
  21. if args[1] then
  22. return args[1], 'string'
  23. end
  24. return '','string'
  25. end,
  26. ['description'] = [[Return value from function's argument or an empty string,
  27. For example, `id('Something')` returns a string 'Something']],
  28. ['args_schema'] = {ts.string:is_optional()}
  29. },
  30. -- Similar but for making lists
  31. ['list'] = {
  32. ['get_value'] = function(_, args)
  33. if args[1] then
  34. return fun.map(tostring, args), 'string_list'
  35. end
  36. return {},'string_list'
  37. end,
  38. ['description'] = [[Return a list from function's arguments or an empty list,
  39. For example, `list('foo', 'bar')` returns a list {'foo', 'bar'}]],
  40. },
  41. -- Get source IP address
  42. ['ip'] = {
  43. ['get_value'] = function(task)
  44. local ip = task:get_ip()
  45. if ip and ip:is_valid() then return ip,'userdata' end
  46. return nil
  47. end,
  48. ['description'] = [[Get source IP address]],
  49. },
  50. -- Get MIME from
  51. ['from'] = {
  52. ['get_value'] = function(task, args)
  53. local from = task:get_from(args[1] or 0)
  54. if ((from or E)[1] or E).addr then
  55. return from[1],'table'
  56. end
  57. return nil
  58. end,
  59. ['description'] = [[Get MIME or SMTP from (e.g. `from('smtp')` or `from('mime')`,
  60. uses any type by default)]],
  61. },
  62. ['rcpts'] = {
  63. ['get_value'] = function(task, args)
  64. local rcpts = task:get_recipients(args[1] or 0)
  65. if ((rcpts or E)[1] or E).addr then
  66. return rcpts,'table_list'
  67. end
  68. return nil
  69. end,
  70. ['description'] = [[Get MIME or SMTP rcpts (e.g. `rcpts('smtp')` or `rcpts('mime')`,
  71. uses any type by default)]],
  72. },
  73. -- Get country (ASN module must be executed first)
  74. ['country'] = {
  75. ['get_value'] = function(task)
  76. local country = task:get_mempool():get_variable('country')
  77. if not country then
  78. return nil
  79. else
  80. return country,'string'
  81. end
  82. end,
  83. ['description'] = [[Get country (ASN module must be executed first)]],
  84. },
  85. -- Get ASN number
  86. ['asn'] = {
  87. ['type'] = 'string',
  88. ['get_value'] = function(task)
  89. local asn = task:get_mempool():get_variable('asn')
  90. if not asn then
  91. return nil
  92. else
  93. return asn,'string'
  94. end
  95. end,
  96. ['description'] = [[Get AS number (ASN module must be executed first)]],
  97. },
  98. -- Get authenticated username
  99. ['user'] = {
  100. ['get_value'] = function(task)
  101. local auser = task:get_user()
  102. if not auser then
  103. return nil
  104. else
  105. return auser,'string'
  106. end
  107. end,
  108. ['description'] = 'Get authenticated user name',
  109. },
  110. -- Get principal recipient
  111. ['to'] = {
  112. ['get_value'] = function(task)
  113. return task:get_principal_recipient(),'string'
  114. end,
  115. ['description'] = 'Get principal recipient',
  116. },
  117. -- Get content digest
  118. ['digest'] = {
  119. ['get_value'] = function(task)
  120. return task:get_digest(),'string'
  121. end,
  122. ['description'] = 'Get content digest',
  123. },
  124. -- Get list of all attachments digests
  125. ['attachments'] = {
  126. ['get_value'] = function(task, args)
  127. local s
  128. local parts = task:get_parts() or E
  129. local digests = {}
  130. if #args > 0 then
  131. local rspamd_cryptobox = require "rspamd_cryptobox_hash"
  132. local encoding = args[1] or 'hex'
  133. local ht = args[2] or 'blake2'
  134. for _,p in ipairs(parts) do
  135. if p:get_filename() then
  136. local h = rspamd_cryptobox.create_specific(ht, p:get_content('raw_parsed'))
  137. if encoding == 'hex' then
  138. s = h:hex()
  139. elseif encoding == 'base32' then
  140. s = h:base32()
  141. elseif encoding == 'base64' then
  142. s = h:base64()
  143. end
  144. table.insert(digests, s)
  145. end
  146. end
  147. else
  148. for _,p in ipairs(parts) do
  149. if p:get_filename() then
  150. table.insert(digests, p:get_digest())
  151. end
  152. end
  153. end
  154. if #digests > 0 then
  155. return digests,'string_list'
  156. end
  157. return nil
  158. end,
  159. ['description'] = [[Get list of all attachments digests.
  160. The first optional argument is encoding (`hex`, `base32`, `base64`),
  161. the second optional argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
  162. ['args_schema'] = {ts.one_of{'hex', 'base32', 'base64'}:is_optional(),
  163. ts.one_of{'blake2', 'sha256', 'sha1', 'sha512', 'md5'}:is_optional()}
  164. },
  165. -- Get all attachments files
  166. ['files'] = {
  167. ['get_value'] = function(task)
  168. local parts = task:get_parts() or E
  169. local files = {}
  170. for _,p in ipairs(parts) do
  171. local fname = p:get_filename()
  172. if fname then
  173. table.insert(files, fname)
  174. end
  175. end
  176. if #files > 0 then
  177. return files,'string_list'
  178. end
  179. return nil
  180. end,
  181. ['description'] = 'Get all attachments files',
  182. },
  183. -- Get languages for text parts
  184. ['languages'] = {
  185. ['get_value'] = function(task)
  186. local text_parts = task:get_text_parts() or E
  187. local languages = {}
  188. for _,p in ipairs(text_parts) do
  189. local lang = p:get_language()
  190. if lang then
  191. table.insert(languages, lang)
  192. end
  193. end
  194. if #languages > 0 then
  195. return languages,'string_list'
  196. end
  197. return nil
  198. end,
  199. ['description'] = 'Get languages for text parts',
  200. },
  201. -- Get helo value
  202. ['helo'] = {
  203. ['get_value'] = function(task)
  204. return task:get_helo(),'string'
  205. end,
  206. ['description'] = 'Get helo value',
  207. },
  208. -- Get header with the name that is expected as an argument. Returns list of
  209. -- headers with this name
  210. ['header'] = {
  211. ['get_value'] = function(task, args)
  212. local strong = false
  213. if args[2] then
  214. if args[2]:match('strong') then
  215. strong = true
  216. end
  217. if args[2]:match('full') then
  218. return task:get_header_full(args[1], strong),'table_list'
  219. end
  220. return task:get_header(args[1], strong),'string'
  221. else
  222. return task:get_header(args[1]),'string'
  223. end
  224. end,
  225. ['description'] = [[Get header with the name that is expected as an argument.
  226. The optional second argument accepts list of flags:
  227. - `full`: returns all headers with this name with all data (like task:get_header_full())
  228. - `strong`: use case sensitive match when matching header's name]],
  229. ['args_schema'] = {ts.string,
  230. (ts.pattern("strong") + ts.pattern("full")):is_optional()}
  231. },
  232. -- Get list of received headers (returns list of tables)
  233. ['received'] = {
  234. ['get_value'] = function(task, args)
  235. local rh = task:get_received_headers()
  236. if args[1] and rh then
  237. return fun.map(function(r) return r[args[1]] end, rh), 'string_list'
  238. end
  239. return rh,'table_list'
  240. end,
  241. ['description'] = [[Get list of received headers.
  242. If no arguments specified, returns list of tables. Otherwise, selects a specific element,
  243. e.g. `by_hostname`]],
  244. },
  245. -- Get all urls
  246. ['urls'] = {
  247. ['get_value'] = function(task, args)
  248. local urls = task:get_urls()
  249. if args[1] and urls then
  250. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  251. end
  252. return urls,'userdata_list'
  253. end,
  254. ['description'] = [[Get list of all urls.
  255. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  256. e.g. `get_tld`]],
  257. },
  258. -- Get specific urls
  259. ['specific_urls'] = {
  260. ['get_value'] = function(task, args)
  261. local params = args[1] or {}
  262. params.task = task
  263. params.no_cache = true
  264. local urls = lua_util.extract_specific_urls(params)
  265. return urls,'userdata_list'
  266. end,
  267. ['description'] = [[Get most specific urls. Arguments are equal to the Lua API function]],
  268. ['args_schema'] = {ts.shape{
  269. limit = ts.number + ts.string / tonumber,
  270. esld_limit = (ts.number + ts.string / tonumber):is_optional(),
  271. prefix = ts.string:is_optional(),
  272. need_emails = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
  273. need_images = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
  274. ignore_redirected = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
  275. }}
  276. },
  277. -- Get all emails
  278. ['emails'] = {
  279. ['get_value'] = function(task, args)
  280. local urls = task:get_emails()
  281. if args[1] and urls then
  282. return fun.map(function(r) return r[args[1]](r) end, urls), 'string_list'
  283. end
  284. return urls,'userdata_list'
  285. end,
  286. ['description'] = [[Get list of all emails.
  287. If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
  288. e.g. `get_user`]],
  289. },
  290. -- Get specific pool var. The first argument must be variable name,
  291. -- the second argument is optional and defines the type (string by default)
  292. ['pool_var'] = {
  293. ['get_value'] = function(task, args)
  294. local type = args[2] or 'string'
  295. return task:get_mempool():get_variable(args[1], type),(type)
  296. end,
  297. ['description'] = [[Get specific pool var. The first argument must be variable name,
  298. the second argument is optional and defines the type (string by default)]],
  299. ['args_schema'] = {ts.string, ts.string:is_optional()}
  300. },
  301. -- Get specific HTTP request header. The first argument must be header name.
  302. ['request_header'] = {
  303. ['get_value'] = function(task, args)
  304. local hdr = task:get_request_header(args[1])
  305. if hdr then
  306. return tostring(hdr),'string'
  307. end
  308. return nil
  309. end,
  310. ['description'] = [[Get specific HTTP request header.
  311. The first argument must be header name.]],
  312. ['args_schema'] = {ts.string}
  313. },
  314. -- Get task date, optionally formatted
  315. ['time'] = {
  316. ['get_value'] = function(task, args)
  317. local what = args[1] or 'message'
  318. local dt = task:get_date{format = what, gmt = true}
  319. if dt then
  320. if args[2] then
  321. -- Should be in format !xxx, as dt is in GMT
  322. return os.date(args[2], dt),'string'
  323. end
  324. return tostring(dt),'string'
  325. end
  326. return nil
  327. end,
  328. ['description'] = [[Get task timestamp. The first argument is type:
  329. - `connect`: connection timestamp (default)
  330. - `message`: timestamp as defined by `Date` header
  331. The second argument is optional time format, see [os.date](http://pgl.yoyo.org/luai/i/os.date) description]],
  332. ['args_schema'] = {ts.one_of{'connect', 'message'}:is_optional(),
  333. ts.string:is_optional()}
  334. },
  335. -- Get text words from a message
  336. ['words'] = {
  337. ['get_value'] = function(task, args)
  338. local how = args[1] or 'stem'
  339. local tp = task:get_text_parts()
  340. if tp then
  341. local rtype = 'string_list'
  342. if how == 'full' then
  343. rtype = 'table_list'
  344. end
  345. return lua_util.flatten(
  346. fun.map(function(p)
  347. return p:get_words(how)
  348. end, tp)), rtype
  349. end
  350. return nil
  351. end,
  352. ['description'] = [[Get words from text parts
  353. - `stem`: stemmed words (default)
  354. - `raw`: raw words
  355. - `norm`: normalised words (lowercased)
  356. - `full`: list of tables
  357. ]],
  358. ['args_schema'] = { ts.one_of { 'stem', 'raw', 'norm', 'full' }:is_optional()},
  359. },
  360. }
  361. return extractors