You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_meta.lua 8.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. --[[
  2. Copyright (c) 2017, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. local exports = {}
  14. local N = "metatokens"
  15. -- Metafunctions
  16. local function meta_size_function(task)
  17. local sizes = {
  18. 100,
  19. 200,
  20. 500,
  21. 1000,
  22. 2000,
  23. 4000,
  24. 10000,
  25. 20000,
  26. 30000,
  27. 100000,
  28. 200000,
  29. 400000,
  30. 800000,
  31. 1000000,
  32. 2000000,
  33. 8000000,
  34. }
  35. local size = task:get_size()
  36. for i = 1,#sizes do
  37. if sizes[i] >= size then
  38. return {(1.0 * i) / #sizes}
  39. end
  40. end
  41. return {0}
  42. end
  43. local function meta_images_function(task)
  44. local images = task:get_images()
  45. local ntotal = 0
  46. local njpg = 0
  47. local npng = 0
  48. local nlarge = 0
  49. local nsmall = 0
  50. if images then
  51. for _,img in ipairs(images) do
  52. if img:get_type() == 'png' then
  53. npng = npng + 1
  54. elseif img:get_type() == 'jpeg' then
  55. njpg = njpg + 1
  56. end
  57. local w = img:get_width()
  58. local h = img:get_height()
  59. if w > 0 and h > 0 then
  60. if w + h > 256 then
  61. nlarge = nlarge + 1
  62. else
  63. nsmall = nsmall + 1
  64. end
  65. end
  66. ntotal = ntotal + 1
  67. end
  68. end
  69. if ntotal > 0 then
  70. njpg = 1.0 * njpg / ntotal
  71. npng = 1.0 * npng / ntotal
  72. nlarge = 1.0 * nlarge / ntotal
  73. nsmall = 1.0 * nsmall / ntotal
  74. end
  75. return {ntotal,njpg,npng,nlarge,nsmall}
  76. end
  77. local function meta_nparts_function(task)
  78. local nattachments = 0
  79. local ntextparts = 0
  80. local totalparts = 1
  81. local tp = task:get_text_parts()
  82. if tp then
  83. ntextparts = #tp
  84. end
  85. local parts = task:get_parts()
  86. if parts then
  87. for _,p in ipairs(parts) do
  88. if p:get_filename() then
  89. nattachments = nattachments + 1
  90. end
  91. totalparts = totalparts + 1
  92. end
  93. end
  94. return {(1.0 * ntextparts)/totalparts, (1.0 * nattachments)/totalparts}
  95. end
  96. local function meta_encoding_function(task)
  97. local nutf = 0
  98. local nother = 0
  99. local tp = task:get_text_parts()
  100. if tp and #tp > 0 then
  101. for _,p in ipairs(tp) do
  102. if p:is_utf() then
  103. nutf = nutf + 1
  104. else
  105. nother = nother + 1
  106. end
  107. end
  108. return {nutf / #tp, nother / #tp}
  109. end
  110. return {0, 0}
  111. end
  112. local function meta_recipients_function(task)
  113. local nmime = 0
  114. local nsmtp = 0
  115. if task:has_recipients('mime') then
  116. nmime = #(task:get_recipients('mime'))
  117. end
  118. if task:has_recipients('smtp') then
  119. nsmtp = #(task:get_recipients('smtp'))
  120. end
  121. if nmime > 0 then nmime = 1.0 / nmime end
  122. if nsmtp > 0 then nsmtp = 1.0 / nsmtp end
  123. return {nmime,nsmtp}
  124. end
  125. local function meta_received_function(task)
  126. local count_factor = 0
  127. local invalid_factor = 0
  128. local rh = task:get_received_headers()
  129. local time_factor = 0
  130. local secure_factor = 0
  131. local fun = require "fun"
  132. if rh and #rh > 0 then
  133. local ntotal = 0.0
  134. local init_time = 0
  135. fun.each(function(rc)
  136. ntotal = ntotal + 1.0
  137. if not rc.by_hostname then
  138. invalid_factor = invalid_factor + 1.0
  139. end
  140. if init_time == 0 and rc.timestamp then
  141. init_time = rc.timestamp
  142. elseif rc.timestamp then
  143. time_factor = time_factor + math.abs(init_time - rc.timestamp)
  144. init_time = rc.timestamp
  145. end
  146. if rc.flags and (rc.flags['ssl'] or rc.flags['authenticated']) then
  147. secure_factor = secure_factor + 1.0
  148. end
  149. end,
  150. fun.filter(function(rc) return not rc.flags or not rc.flags['artificial'] end, rh))
  151. invalid_factor = invalid_factor / ntotal
  152. secure_factor = secure_factor / ntotal
  153. count_factor = 1.0 / ntotal
  154. if time_factor ~= 0 then
  155. time_factor = 1.0 / time_factor
  156. end
  157. end
  158. return {count_factor, invalid_factor, time_factor, secure_factor}
  159. end
  160. local function meta_urls_function(task)
  161. if task:has_urls() then
  162. return {1.0 / #(task:get_urls())}
  163. end
  164. return {0}
  165. end
  166. local function meta_words_function(task)
  167. local avg_len = task:get_mempool():get_variable("avg_words_len", "double") or 0.0
  168. local short_words = task:get_mempool():get_variable("short_words_cnt", "double") or 0.0
  169. local ret_len = 0
  170. local lens = {
  171. 2,
  172. 3,
  173. 4,
  174. 5,
  175. 6,
  176. 7,
  177. 8,
  178. 9,
  179. 10,
  180. 15,
  181. 20,
  182. }
  183. for i = 1,#lens do
  184. if lens[i] >= avg_len then
  185. ret_len = (1.0 * i) / #lens
  186. break
  187. end
  188. end
  189. local tp = task:get_text_parts()
  190. local wres = {
  191. 0, -- spaces rate
  192. 0, -- double spaces rate
  193. 0, -- non spaces rate
  194. 0, -- ascii characters rate
  195. 0, -- non-ascii characters rate
  196. 0, -- capital characters rate
  197. 0, -- numeric cahracters
  198. }
  199. for _,p in ipairs(tp) do
  200. local stats = p:get_stats()
  201. local len = p:get_length()
  202. if len > 0 then
  203. wres[1] = wres[1] + stats['spaces'] / len
  204. wres[2] = wres[2] + stats['double_spaces'] / len
  205. wres[3] = wres[3] + stats['non_spaces'] / len
  206. wres[4] = wres[4] + stats['ascii_characters'] / len
  207. wres[5] = wres[5] + stats['non_ascii_characters'] / len
  208. wres[6] = wres[6] + stats['capital_letters'] / len
  209. wres[7] = wres[7] + stats['numeric_characters'] / len
  210. end
  211. end
  212. local ret = {
  213. short_words,
  214. ret_len,
  215. }
  216. local divisor = 1.0
  217. if #tp > 0 then
  218. divisor = #tp
  219. end
  220. for _,wr in ipairs(wres) do
  221. table.insert(ret, wr / divisor)
  222. end
  223. return ret
  224. end
  225. local metafunctions = {
  226. {
  227. cb = meta_size_function,
  228. ninputs = 1,
  229. desc = {
  230. "size"
  231. }
  232. },
  233. {
  234. cb = meta_images_function,
  235. ninputs = 5,
  236. -- 1 - number of images,
  237. -- 2 - number of png images,
  238. -- 3 - number of jpeg images
  239. -- 4 - number of large images (> 128 x 128)
  240. -- 5 - number of small images (< 128 x 128)
  241. desc = {
  242. 'nimages',
  243. 'npng_images',
  244. 'njpeg_images',
  245. 'nlarge_images',
  246. 'nsmall_images'
  247. }
  248. },
  249. {
  250. cb = meta_nparts_function,
  251. ninputs = 2,
  252. -- 1 - number of text parts
  253. -- 2 - number of attachments
  254. desc = {
  255. 'ntext_parts',
  256. 'nattachments'
  257. }
  258. },
  259. {
  260. cb = meta_encoding_function,
  261. ninputs = 2,
  262. -- 1 - number of utf parts
  263. -- 2 - number of non-utf parts
  264. desc = {
  265. 'nutf_parts',
  266. 'nascii_parts'
  267. }
  268. },
  269. {
  270. cb = meta_recipients_function,
  271. ninputs = 2,
  272. -- 1 - number of mime rcpt
  273. -- 2 - number of smtp rcpt
  274. desc = {
  275. 'nmime_rcpt',
  276. 'nsmtp_rcpt'
  277. }
  278. },
  279. {
  280. cb = meta_received_function,
  281. ninputs = 4,
  282. desc = {
  283. 'nreceived',
  284. 'nreceived_invalid',
  285. 'nreceived_bad_time',
  286. 'nreceived_secure'
  287. }
  288. },
  289. {
  290. cb = meta_urls_function,
  291. ninputs = 1,
  292. desc = {
  293. 'nurls'
  294. }
  295. },
  296. {
  297. cb = meta_words_function,
  298. ninputs = 9,
  299. desc = {
  300. 'avg_words_len',
  301. 'nshort_words',
  302. 'spaces_rate',
  303. 'double_spaces_rate',
  304. 'non_spaces_rate',
  305. 'ascii_characters_rate',
  306. 'non_ascii_characters_rate',
  307. 'capital_characters_rate',
  308. 'numeric_cahracters'
  309. }
  310. },
  311. }
  312. local function rspamd_gen_metatokens(task)
  313. local rspamd_logger = require "rspamd_logger"
  314. local ipairs = ipairs
  315. local metatokens = {}
  316. local cached = task:cache_get('metatokens')
  317. if cached then
  318. return cached
  319. else
  320. for _,mt in ipairs(metafunctions) do
  321. local ct = mt.cb(task)
  322. for i,tok in ipairs(ct) do
  323. rspamd_logger.debugm(N, task, "metatoken: %s = %s", mt.desc[i], tok)
  324. table.insert(metatokens, tok)
  325. end
  326. end
  327. task:cache_set('metatokens', metatokens)
  328. end
  329. return metatokens
  330. end
  331. exports.rspamd_gen_metatokens = rspamd_gen_metatokens
  332. exports.gen_metatokens = rspamd_gen_metatokens
  333. local function rspamd_gen_metatokens_table(task)
  334. local metatokens = {}
  335. for _,mt in ipairs(metafunctions) do
  336. local ct = mt.cb(task)
  337. for i,tok in ipairs(ct) do
  338. metatokens[mt.desc[i]] = tok
  339. end
  340. end
  341. return metatokens
  342. end
  343. exports.rspamd_gen_metatokens_table = rspamd_gen_metatokens_table
  344. exports.gen_metatokens_table = rspamd_gen_metatokens_table
  345. local function rspamd_count_metatokens()
  346. local ipairs = ipairs
  347. local total = 0
  348. for _,mt in ipairs(metafunctions) do
  349. total = total + mt.ninputs
  350. end
  351. return total
  352. end
  353. exports.rspamd_count_metatokens = rspamd_count_metatokens
  354. exports.count_metatokens = rspamd_count_metatokens
  355. return exports