You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_mimepart.c 54KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "lua_common.h"
  17. #include "lua_url.h"
  18. #include "libmime/message.h"
  19. #include "libmime/lang_detection.h"
  20. #include "libstat/stat_api.h"
  21. #include "libcryptobox/cryptobox.h"
  22. #include "libutil/shingles.h"
  23. #include "contrib/uthash/utlist.h"
  24. /* Textpart methods */
  25. /***
  26. * @module rspamd_textpart
  27. * This module provides different methods to manipulate text parts data. Text parts
  28. * could be obtained from the `rspamd_task` by using of method `task:get_text_parts()`
  29. @example
  30. rspamd_config.R_EMPTY_IMAGE = function (task)
  31. parts = task:get_text_parts()
  32. if parts then
  33. for _,part in ipairs(parts) do
  34. if part:is_empty() then
  35. texts = task:get_texts()
  36. if texts then
  37. return true
  38. end
  39. return false
  40. end
  41. end
  42. end
  43. return false
  44. end
  45. */
  46. /***
  47. * @method text_part:is_utf()
  48. * Return TRUE if part is a valid utf text
  49. * @return {boolean} true if part is valid `UTF8` part
  50. */
  51. LUA_FUNCTION_DEF (textpart, is_utf);
  52. /***
  53. * @method text_part:has_8bit_raw()
  54. * Return TRUE if a part has raw 8bit characters
  55. * @return {boolean} true if a part has raw 8bit characters
  56. */
  57. LUA_FUNCTION_DEF (textpart, has_8bit_raw);
  58. /***
  59. * @method text_part:has_8bit()
  60. * Return TRUE if a part has raw 8bit characters
  61. * @return {boolean} true if a part has encoded 8bit characters
  62. */
  63. LUA_FUNCTION_DEF (textpart, has_8bit);
  64. /***
  65. * @method text_part:get_content([type])
  66. * Get the text of the part (html tags stripped). Optional `type` defines type of content to get:
  67. * - `content` (default): utf8 content with HTML tags stripped and newlines preserved
  68. * - `content_oneline`: utf8 content with HTML tags and newlines stripped
  69. * - `raw`: raw content, not mime decoded nor utf8 converted
  70. * - `raw_parsed`: raw content, mime decoded, not utf8 converted
  71. * - `raw_utf`: raw content, mime decoded, utf8 converted (but with HTML tags and newlines)
  72. * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
  73. */
  74. LUA_FUNCTION_DEF (textpart, get_content);
  75. /***
  76. * @method text_part:get_raw_content()
  77. * Get the original text of the part
  78. * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
  79. */
  80. LUA_FUNCTION_DEF (textpart, get_raw_content);
  81. /***
  82. * @method text_part:get_content_oneline()
  83. *Get the text of the part (html tags and newlines stripped)
  84. * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
  85. */
  86. LUA_FUNCTION_DEF (textpart, get_content_oneline);
  87. /***
  88. * @method text_part:get_length()
  89. * Get length of the text of the part
  90. * @return {integer} length of part in **bytes**
  91. */
  92. LUA_FUNCTION_DEF (textpart, get_length);
  93. /***
  94. * @method mime_part:get_raw_length()
  95. * Get length of the **raw** content of the part (e.g. HTML with tags unstripped)
  96. * @return {integer} length of part in **bytes**
  97. */
  98. LUA_FUNCTION_DEF (textpart, get_raw_length);
  99. /***
  100. * @method mime_part:get_urls_length()
  101. * Get length of the urls within the part
  102. * @return {integer} length of urls in **bytes**
  103. */
  104. LUA_FUNCTION_DEF (textpart, get_urls_length);
  105. /***
  106. * @method mime_part:get_lines_count()
  107. * Get lines number in the part
  108. * @return {integer} number of lines in the part
  109. */
  110. LUA_FUNCTION_DEF (textpart, get_lines_count);
  111. /***
  112. * @method mime_part:get_stats()
  113. * Returns a table with the following data:
  114. * - `lines`: number of lines
  115. * - `spaces`: number of spaces
  116. * - `double_spaces`: double spaces
  117. * - `empty_lines`: number of empty lines
  118. * - `non_ascii_characters`: number of non ascii characters
  119. * - `ascii_characters`: number of ascii characters
  120. * @return {table} table of stats
  121. */
  122. LUA_FUNCTION_DEF (textpart, get_stats);
  123. /***
  124. * @method mime_part:get_words_count()
  125. * Get words number in the part
  126. * @return {integer} number of words in the part
  127. */
  128. LUA_FUNCTION_DEF (textpart, get_words_count);
  129. /***
  130. * @method mime_part:get_words([how])
  131. * Get words in the part. Optional `how` argument defines type of words returned:
  132. * - `stem`: stemmed words (default)
  133. * - `norm`: normalised words (utf normalised + lowercased)
  134. * - `raw`: raw words in utf (if possible)
  135. * - `full`: list of tables, each table has the following fields:
  136. * - [1] - stemmed word
  137. * - [2] - normalised word
  138. * - [3] - raw word
  139. * - [4] - flags (table of strings)
  140. * @return {table/strings} words in the part
  141. */
  142. LUA_FUNCTION_DEF (textpart, get_words);
  143. /***
  144. * @method mime_part:filter_words(regexp, [how][, max]])
  145. * Filter words using some regexp:
  146. * - `stem`: stemmed words (default)
  147. * - `norm`: normalised words (utf normalised + lowercased)
  148. * - `raw`: raw words in utf (if possible)
  149. * - `full`: list of tables, each table has the following fields:
  150. * - [1] - stemmed word
  151. * - [2] - normalised word
  152. * - [3] - raw word
  153. * - [4] - flags (table of strings)
  154. * @param {rspamd_regexp} regexp regexp to match
  155. * @param {string} how what words to extract
  156. * @param {number} max maximum number of hits returned (all hits if <= 0 or nil)
  157. * @return {table/strings} words matching regexp
  158. */
  159. LUA_FUNCTION_DEF (textpart, filter_words);
  160. /***
  161. * @method text_part:is_empty()
  162. * Returns `true` if the specified part is empty
  163. * @return {bool} whether a part is empty
  164. */
  165. LUA_FUNCTION_DEF (textpart, is_empty);
  166. /***
  167. * @method text_part:is_html()
  168. * Returns `true` if the specified part has HTML content
  169. * @return {bool} whether a part is HTML part
  170. */
  171. LUA_FUNCTION_DEF (textpart, is_html);
  172. /***
  173. * @method text_part:get_html()
  174. * Returns html content of the specified part
  175. * @return {html} html content
  176. */
  177. LUA_FUNCTION_DEF (textpart, get_html);
  178. /***
  179. * @method text_part:get_language()
  180. * Returns the code of the most used unicode script in the text part. Does not work with raw parts
  181. * @return {string} short abbreviation (such as `ru`) for the script's language
  182. */
  183. LUA_FUNCTION_DEF (textpart, get_language);
  184. /***
  185. * @method text_part:get_charset()
  186. * Returns part real charset
  187. * @return {string} charset of the part
  188. */
  189. LUA_FUNCTION_DEF (textpart, get_charset);
  190. /***
  191. * @method text_part:get_languages()
  192. * Returns array of tables of all languages detected for a part:
  193. * - 'code': language code (short string)
  194. * - 'prob': logarithm of probability
  195. * @return {array|tables} all languages detected for the part
  196. */
  197. LUA_FUNCTION_DEF (textpart, get_languages);
  198. /***
  199. * @method text_part:get_fuzzy_hashes(mempool)
  200. * @param {rspamd_mempool} mempool - memory pool (usually task pool)
  201. * Returns direct hash of textpart as a string and array [1..32] of shingles each represented as a following table:
  202. * - [1] - 64 bit fuzzy hash represented as a string
  203. * - [2..4] - strings used to generate this hash
  204. * @return {string,array|tables} fuzzy hashes calculated
  205. */
  206. LUA_FUNCTION_DEF (textpart, get_fuzzy_hashes);
  207. /***
  208. * @method text_part:get_mimepart()
  209. * Returns the mime part object corresponding to this text part
  210. * @return {mimepart} mimepart object
  211. */
  212. LUA_FUNCTION_DEF (textpart, get_mimepart);
  213. static const struct luaL_reg textpartlib_m[] = {
  214. LUA_INTERFACE_DEF (textpart, is_utf),
  215. LUA_INTERFACE_DEF (textpart, has_8bit_raw),
  216. LUA_INTERFACE_DEF (textpart, has_8bit),
  217. LUA_INTERFACE_DEF (textpart, get_content),
  218. LUA_INTERFACE_DEF (textpart, get_raw_content),
  219. LUA_INTERFACE_DEF (textpart, get_content_oneline),
  220. LUA_INTERFACE_DEF (textpart, get_length),
  221. LUA_INTERFACE_DEF (textpart, get_raw_length),
  222. LUA_INTERFACE_DEF (textpart, get_urls_length),
  223. LUA_INTERFACE_DEF (textpart, get_lines_count),
  224. LUA_INTERFACE_DEF (textpart, get_words_count),
  225. LUA_INTERFACE_DEF (textpart, get_words),
  226. LUA_INTERFACE_DEF (textpart, filter_words),
  227. LUA_INTERFACE_DEF (textpart, is_empty),
  228. LUA_INTERFACE_DEF (textpart, is_html),
  229. LUA_INTERFACE_DEF (textpart, get_html),
  230. LUA_INTERFACE_DEF (textpart, get_language),
  231. LUA_INTERFACE_DEF (textpart, get_charset),
  232. LUA_INTERFACE_DEF (textpart, get_languages),
  233. LUA_INTERFACE_DEF (textpart, get_mimepart),
  234. LUA_INTERFACE_DEF (textpart, get_stats),
  235. LUA_INTERFACE_DEF (textpart, get_fuzzy_hashes),
  236. {"__tostring", rspamd_lua_class_tostring},
  237. {NULL, NULL}
  238. };
  239. /* Mimepart methods */
  240. /***
  241. * @module rspamd_mimepart
  242. * This module provides access to mime parts found in a message
  243. @example
  244. rspamd_config.MISSING_CONTENT_TYPE = function(task)
  245. local parts = task:get_parts()
  246. if parts and #parts > 1 then
  247. -- We have more than one part
  248. for _,p in ipairs(parts) do
  249. local ct = p:get_header('Content-Type')
  250. -- And some parts have no Content-Type header
  251. if not ct then
  252. return true
  253. end
  254. end
  255. end
  256. return false
  257. end
  258. */
  259. /***
  260. * @method mime_part:get_header(name[, case_sensitive])
  261. * Get decoded value of a header specified with optional case_sensitive flag.
  262. * By default headers are searched in caseless matter.
  263. * @param {string} name name of header to get
  264. * @param {boolean} case_sensitive case sensitiveness flag to search for a header
  265. * @return {string} decoded value of a header
  266. */
  267. LUA_FUNCTION_DEF (mimepart, get_header);
  268. /***
  269. * @method mime_part:get_header_raw(name[, case_sensitive])
  270. * Get raw value of a header specified with optional case_sensitive flag.
  271. * By default headers are searched in caseless matter.
  272. * @param {string} name name of header to get
  273. * @param {boolean} case_sensitive case sensitiveness flag to search for a header
  274. * @return {string} raw value of a header
  275. */
  276. LUA_FUNCTION_DEF (mimepart, get_header_raw);
  277. /***
  278. * @method mime_part:get_header_full(name[, case_sensitive])
  279. * Get raw value of a header specified with optional case_sensitive flag.
  280. * By default headers are searched in caseless matter. This method returns more
  281. * information about the header as a list of tables with the following structure:
  282. *
  283. * - `name` - name of a header
  284. * - `value` - raw value of a header
  285. * - `decoded` - decoded value of a header
  286. * - `tab_separated` - `true` if a header and a value are separated by `tab` character
  287. * - `empty_separator` - `true` if there are no separator between a header and a value
  288. * @param {string} name name of header to get
  289. * @param {boolean} case_sensitive case sensitiveness flag to search for a header
  290. * @return {list of tables} all values of a header as specified above
  291. @example
  292. function check_header_delimiter_tab(task, header_name)
  293. for _,rh in ipairs(task:get_header_full(header_name)) do
  294. if rh['tab_separated'] then return true end
  295. end
  296. return false
  297. end
  298. */
  299. LUA_FUNCTION_DEF (mimepart, get_header_full);
  300. /***
  301. * @method mimepart:get_header_count(name[, case_sensitive])
  302. * Lightweight version if you need just a header's count
  303. * * By default headers are searched in caseless matter.
  304. * @param {string} name name of header to get
  305. * @param {boolean} case_sensitive case sensitiveness flag to search for a header
  306. * @return {number} number of header's occurrencies or 0 if not found
  307. */
  308. LUA_FUNCTION_DEF (mimepart, get_header_count);
  309. /***
  310. * @method mimepart:get_raw_headers()
  311. * Get all undecoded headers of a mime part as a string
  312. * @return {rspamd_text} all raw headers for a message as opaque text
  313. */
  314. LUA_FUNCTION_DEF (mimepart, get_raw_headers);
  315. /***
  316. * @method mime_part:get_content()
  317. * Get the parsed content of part
  318. * @return {text} opaque text object (zero-copy if not casted to lua string)
  319. */
  320. LUA_FUNCTION_DEF (mimepart, get_content);
  321. /***
  322. * @method mime_part:get_raw_content()
  323. * Get the raw content of part
  324. * @return {text} opaque text object (zero-copy if not casted to lua string)
  325. */
  326. LUA_FUNCTION_DEF (mimepart, get_raw_content);
  327. /***
  328. * @method mime_part:get_length()
  329. * Get length of the content of the part
  330. * @return {integer} length of part in **bytes**
  331. */
  332. LUA_FUNCTION_DEF (mimepart, get_length);
  333. /***
  334. * @method mime_part:get_type()
  335. * Extract content-type string of the mime part
  336. * @return {string,string} content type in form 'type','subtype'
  337. */
  338. LUA_FUNCTION_DEF (mimepart, get_type);
  339. /***
  340. * @method mime_part:get_type_full()
  341. * Extract content-type string of the mime part with all attributes
  342. * @return {string,string,table} content type in form 'type','subtype', {attrs}
  343. */
  344. LUA_FUNCTION_DEF (mimepart, get_type_full);
  345. /***
  346. * @method mime_part:get_detected_type()
  347. * Extract content-type string of the mime part. Use lua_magic detection
  348. * @return {string,string} content type in form 'type','subtype'
  349. */
  350. LUA_FUNCTION_DEF (mimepart, get_detected_type);
  351. /***
  352. * @method mime_part:get_detected_type_full()
  353. * Extract content-type string of the mime part with all attributes. Use lua_magic detection
  354. * @return {string,string,table} content type in form 'type','subtype', {attrs}
  355. */
  356. LUA_FUNCTION_DEF (mimepart, get_detected_type_full);
  357. /***
  358. * @method mime_part:get_detected_ext()
  359. * Returns a msdos extension name according to lua_magic detection
  360. * @return {string} detected extension (see lua_magic.types)
  361. */
  362. LUA_FUNCTION_DEF (mimepart, get_detected_ext);
  363. /***
  364. * @method mime_part:get_cte()
  365. * Extract content-transfer-encoding for a part
  366. * @return {string} content transfer encoding (e.g. `base64` or `7bit`)
  367. */
  368. LUA_FUNCTION_DEF (mimepart, get_cte);
  369. /***
  370. * @method mime_part:get_filename()
  371. * Extract filename associated with mime part if it is an attachment
  372. * @return {string} filename or `nil` if no file is associated with this part
  373. */
  374. LUA_FUNCTION_DEF (mimepart, get_filename);
  375. /***
  376. * @method mime_part:is_image()
  377. * Returns true if mime part is an image
  378. * @return {bool} true if a part is an image
  379. */
  380. LUA_FUNCTION_DEF (mimepart, is_image);
  381. /***
  382. * @method mime_part:get_image()
  383. * Returns rspamd_image structure associated with this part. This structure has
  384. * the following methods:
  385. *
  386. * * `get_width` - return width of an image in pixels
  387. * * `get_height` - return height of an image in pixels
  388. * * `get_type` - return string representation of image's type (e.g. 'jpeg')
  389. * * `get_filename` - return string with image's file name
  390. * * `get_size` - return size in bytes
  391. * @return {rspamd_image} image structure or nil if a part is not an image
  392. */
  393. LUA_FUNCTION_DEF (mimepart, get_image);
  394. /***
  395. * @method mime_part:is_archive()
  396. * Returns true if mime part is an archive
  397. * @return {bool} true if a part is an archive
  398. */
  399. LUA_FUNCTION_DEF (mimepart, is_archive);
  400. /***
  401. * @method mime_part:is_attachment()
  402. * Returns true if mime part looks like an attachment
  403. * @return {bool} true if a part looks like an attachment
  404. */
  405. LUA_FUNCTION_DEF (mimepart, is_attachment);
  406. /***
  407. * @method mime_part:get_archive()
  408. * Returns rspamd_archive structure associated with this part. This structure has
  409. * the following methods:
  410. *
  411. * * `get_files` - return list of strings with filenames inside archive
  412. * * `get_files_full` - return list of tables with all information about files
  413. * * `is_encrypted` - return true if an archive is encrypted
  414. * * `get_type` - return string representation of image's type (e.g. 'zip')
  415. * * `get_filename` - return string with archive's file name
  416. * * `get_size` - return size in bytes
  417. * @return {rspamd_archive} archive structure or nil if a part is not an archive
  418. */
  419. LUA_FUNCTION_DEF (mimepart, get_archive);
  420. /***
  421. * @method mime_part:is_multipart()
  422. * Returns true if mime part is a multipart part
  423. * @return {bool} true if a part is is a multipart part
  424. */
  425. LUA_FUNCTION_DEF (mimepart, is_multipart);
  426. /***
  427. * @method mime_part:is_message()
  428. * Returns true if mime part is a message part (message/rfc822)
  429. * @return {bool} true if a part is is a message part
  430. */
  431. LUA_FUNCTION_DEF (mimepart, is_message);
  432. /***
  433. * @method mime_part:get_boundary()
  434. * Returns boundary for a part (extracted from parent multipart for normal parts and
  435. * from the part itself for multipart)
  436. * @return {string} boundary value or nil
  437. */
  438. LUA_FUNCTION_DEF (mimepart, get_boundary);
  439. /***
  440. * @method mime_part:get_children()
  441. * Returns rspamd_mimepart table of part's childer. Returns nil if mime part is not multipart
  442. * or a message part.
  443. * @return {rspamd_mimepart} table of children
  444. */
  445. LUA_FUNCTION_DEF (mimepart, get_children);
  446. /***
  447. * @method mime_part:is_text()
  448. * Returns true if mime part is a text part
  449. * @return {bool} true if a part is a text part
  450. */
  451. LUA_FUNCTION_DEF (mimepart, is_text);
  452. /***
  453. * @method mime_part:get_text()
  454. * Returns rspamd_textpart structure associated with this part.
  455. * @return {rspamd_textpart} textpart structure or nil if a part is not an text
  456. */
  457. LUA_FUNCTION_DEF (mimepart, get_text);
  458. /***
  459. * @method mime_part:get_digest()
  460. * Returns the unique digest for this mime part
  461. * @return {string} 128 characters hex string with digest of the part
  462. */
  463. LUA_FUNCTION_DEF (mimepart, get_digest);
  464. /***
  465. * @method mime_part:get_id()
  466. * Returns the order of the part in parts list
  467. * @return {number} index of the part (starting from 1 as it is Lua API)
  468. */
  469. LUA_FUNCTION_DEF (mimepart, get_id);
  470. /***
  471. * @method mime_part:is_broken()
  472. * Returns true if mime part has incorrectly specified content type
  473. * @return {bool} true if a part has bad content type
  474. */
  475. LUA_FUNCTION_DEF (mimepart, is_broken);
  476. /***
  477. * @method mime_part:headers_foreach(callback, [params])
  478. * This method calls `callback` for each header that satisfies some condition.
  479. * By default, all headers are iterated unless `callback` returns `true`. Nil or
  480. * false means continue of iterations.
  481. * Params could be as following:
  482. *
  483. * - `full`: header value is full table of all attributes @see task:get_header_full for details
  484. * - `regexp`: return headers that satisfies the specified regexp
  485. * @param {function} callback function from header name and header value
  486. * @param {table} params optional parameters
  487. */
  488. LUA_FUNCTION_DEF (mimepart, headers_foreach);
  489. /***
  490. * @method mime_part:get_parent()
  491. * Returns parent part for this part
  492. * @return {rspamd_mimepart} parent part or nil
  493. */
  494. LUA_FUNCTION_DEF (mimepart, get_parent);
  495. /***
  496. * @method mime_part:get_specific()
  497. * Returns specific lua content for this part
  498. * @return {any} specific lua content
  499. */
  500. LUA_FUNCTION_DEF (mimepart, get_specific);
  501. /***
  502. * @method mime_part:set_specific(<any>)
  503. * Sets a specific content for this part
  504. * @return {any} previous specific lua content (or nil)
  505. */
  506. LUA_FUNCTION_DEF (mimepart, set_specific);
  507. /***
  508. * @method mime_part:is_specific(<any>)
  509. * Returns true if part has specific lua content
  510. * @return {boolean} flag
  511. */
  512. LUA_FUNCTION_DEF (mimepart, is_specific);
  513. /***
  514. * @method mime_part:get_urls([need_emails|list_protos][, need_images])
  515. * Get all URLs found in a mime part. Telephone urls and emails are not included unless explicitly asked in `list_protos`
  516. * @param {boolean} need_emails if `true` then reutrn also email urls, this can be a comma separated string of protocols desired or a table (e.g. `mailto` or `telephone`)
  517. * @param {boolean} need_images return urls from images (<img src=...>) as well
  518. * @return {table rspamd_url} list of all urls found
  519. */
  520. LUA_FUNCTION_DEF (mimepart, get_urls);
  521. static const struct luaL_reg mimepartlib_m[] = {
  522. LUA_INTERFACE_DEF (mimepart, get_content),
  523. LUA_INTERFACE_DEF (mimepart, get_raw_content),
  524. LUA_INTERFACE_DEF (mimepart, get_length),
  525. LUA_INTERFACE_DEF (mimepart, get_type),
  526. LUA_INTERFACE_DEF (mimepart, get_type_full),
  527. LUA_INTERFACE_DEF (mimepart, get_detected_type),
  528. LUA_INTERFACE_DEF (mimepart, get_detected_ext),
  529. LUA_INTERFACE_DEF (mimepart, get_detected_type_full),
  530. LUA_INTERFACE_DEF (mimepart, get_cte),
  531. LUA_INTERFACE_DEF (mimepart, get_filename),
  532. LUA_INTERFACE_DEF (mimepart, get_boundary),
  533. LUA_INTERFACE_DEF (mimepart, get_header),
  534. LUA_INTERFACE_DEF (mimepart, get_header_raw),
  535. LUA_INTERFACE_DEF (mimepart, get_header_full),
  536. LUA_INTERFACE_DEF (mimepart, get_header_count),
  537. LUA_INTERFACE_DEF (mimepart, get_raw_headers),
  538. LUA_INTERFACE_DEF (mimepart, is_image),
  539. LUA_INTERFACE_DEF (mimepart, get_image),
  540. LUA_INTERFACE_DEF (mimepart, is_archive),
  541. LUA_INTERFACE_DEF (mimepart, get_archive),
  542. LUA_INTERFACE_DEF (mimepart, is_multipart),
  543. LUA_INTERFACE_DEF (mimepart, is_message),
  544. LUA_INTERFACE_DEF (mimepart, get_children),
  545. LUA_INTERFACE_DEF (mimepart, get_parent),
  546. LUA_INTERFACE_DEF (mimepart, get_urls),
  547. LUA_INTERFACE_DEF (mimepart, is_text),
  548. LUA_INTERFACE_DEF (mimepart, is_broken),
  549. LUA_INTERFACE_DEF (mimepart, is_attachment),
  550. LUA_INTERFACE_DEF (mimepart, get_text),
  551. LUA_INTERFACE_DEF (mimepart, get_digest),
  552. LUA_INTERFACE_DEF (mimepart, get_id),
  553. LUA_INTERFACE_DEF (mimepart, headers_foreach),
  554. LUA_INTERFACE_DEF (mimepart, get_specific),
  555. LUA_INTERFACE_DEF (mimepart, set_specific),
  556. LUA_INTERFACE_DEF (mimepart, is_specific),
  557. {"__tostring", rspamd_lua_class_tostring},
  558. {NULL, NULL}
  559. };
  560. static struct rspamd_mime_text_part *
  561. lua_check_textpart (lua_State * L)
  562. {
  563. void *ud = rspamd_lua_check_udata (L, 1, "rspamd{textpart}");
  564. luaL_argcheck (L, ud != NULL, 1, "'textpart' expected");
  565. return ud ? *((struct rspamd_mime_text_part **)ud) : NULL;
  566. }
  567. static struct rspamd_mime_part *
  568. lua_check_mimepart (lua_State * L)
  569. {
  570. void *ud = rspamd_lua_check_udata (L, 1, "rspamd{mimepart}");
  571. luaL_argcheck (L, ud != NULL, 1, "'mimepart' expected");
  572. return ud ? *((struct rspamd_mime_part **)ud) : NULL;
  573. }
  574. static gint
  575. lua_textpart_is_utf (lua_State * L)
  576. {
  577. LUA_TRACE_POINT;
  578. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  579. if (part == NULL || IS_TEXT_PART_EMPTY (part)) {
  580. lua_pushboolean (L, FALSE);
  581. return 1;
  582. }
  583. lua_pushboolean (L, IS_TEXT_PART_UTF (part));
  584. return 1;
  585. }
  586. static gint
  587. lua_textpart_has_8bit_raw (lua_State * L)
  588. {
  589. LUA_TRACE_POINT;
  590. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  591. if (part) {
  592. if (part->flags & RSPAMD_MIME_TEXT_PART_FLAG_8BIT_RAW) {
  593. lua_pushboolean (L, TRUE);
  594. }
  595. else {
  596. lua_pushboolean (L, FALSE);
  597. }
  598. }
  599. else {
  600. return luaL_error (L, "invalid arguments");
  601. }
  602. return 1;
  603. }
  604. static gint
  605. lua_textpart_has_8bit (lua_State * L)
  606. {
  607. LUA_TRACE_POINT;
  608. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  609. if (part) {
  610. if (part->flags & RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED) {
  611. lua_pushboolean (L, TRUE);
  612. }
  613. else {
  614. lua_pushboolean (L, FALSE);
  615. }
  616. }
  617. else {
  618. return luaL_error (L, "invalid arguments");
  619. }
  620. return 1;
  621. }
  622. static gint
  623. lua_textpart_get_content (lua_State * L)
  624. {
  625. LUA_TRACE_POINT;
  626. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  627. struct rspamd_lua_text *t;
  628. gsize len;
  629. const gchar *start, *type = NULL;
  630. if (part == NULL) {
  631. lua_pushnil (L);
  632. return 1;
  633. }
  634. if (lua_type (L, 2) == LUA_TSTRING) {
  635. type = lua_tostring (L, 2);
  636. }
  637. if (!type) {
  638. if (IS_TEXT_PART_EMPTY (part)) {
  639. lua_pushnil (L);
  640. return 1;
  641. }
  642. start = part->utf_content.begin;
  643. len = part->utf_content.len;
  644. }
  645. else if (strcmp (type, "content") == 0) {
  646. if (IS_TEXT_PART_EMPTY (part)) {
  647. lua_pushnil (L);
  648. return 1;
  649. }
  650. start = part->utf_content.begin;
  651. len = part->utf_content.len;
  652. }
  653. else if (strcmp (type, "content_oneline") == 0) {
  654. if (IS_TEXT_PART_EMPTY (part)) {
  655. lua_pushnil (L);
  656. return 1;
  657. }
  658. start = part->utf_stripped_content->data;
  659. len = part->utf_stripped_content->len;
  660. }
  661. else if (strcmp (type, "raw_parsed") == 0) {
  662. if (part->parsed.len == 0) {
  663. lua_pushnil (L);
  664. return 1;
  665. }
  666. start = part->parsed.begin;
  667. len = part->parsed.len;
  668. }
  669. else if (strcmp (type, "raw_utf") == 0) {
  670. if (part->utf_raw_content == NULL || part->utf_raw_content->len == 0) {
  671. lua_pushnil (L);
  672. return 1;
  673. }
  674. start = part->utf_raw_content->data;
  675. len = part->utf_raw_content->len;
  676. }
  677. else if (strcmp (type, "raw") == 0) {
  678. if (part->raw.len == 0) {
  679. lua_pushnil (L);
  680. return 1;
  681. }
  682. start = part->raw.begin;
  683. len = part->raw.len;
  684. }
  685. else {
  686. return luaL_error (L, "invalid content type: %s", type);
  687. }
  688. t = lua_newuserdata (L, sizeof (*t));
  689. rspamd_lua_setclass (L, "rspamd{text}", -1);
  690. t->start = start;
  691. t->len = len;
  692. t->flags = 0;
  693. return 1;
  694. }
  695. static gint
  696. lua_textpart_get_raw_content (lua_State * L)
  697. {
  698. LUA_TRACE_POINT;
  699. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  700. struct rspamd_lua_text *t;
  701. if (part == NULL || IS_TEXT_PART_EMPTY (part)) {
  702. lua_pushnil (L);
  703. return 1;
  704. }
  705. t = lua_newuserdata (L, sizeof (*t));
  706. rspamd_lua_setclass (L, "rspamd{text}", -1);
  707. t->start = part->raw.begin;
  708. t->len = part->raw.len;
  709. t->flags = 0;
  710. return 1;
  711. }
  712. static gint
  713. lua_textpart_get_content_oneline (lua_State * L)
  714. {
  715. LUA_TRACE_POINT;
  716. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  717. struct rspamd_lua_text *t;
  718. if (part == NULL || IS_TEXT_PART_EMPTY (part)) {
  719. lua_pushnil (L);
  720. return 1;
  721. }
  722. t = lua_newuserdata (L, sizeof (*t));
  723. rspamd_lua_setclass (L, "rspamd{text}", -1);
  724. t->start = part->utf_stripped_content->data;
  725. t->len = part->utf_stripped_content->len;
  726. t->flags = 0;
  727. return 1;
  728. }
  729. static gint
  730. lua_textpart_get_length (lua_State * L)
  731. {
  732. LUA_TRACE_POINT;
  733. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  734. if (part == NULL) {
  735. lua_pushnil (L);
  736. return 1;
  737. }
  738. if (IS_TEXT_PART_EMPTY (part) || part->utf_content.len == 0) {
  739. lua_pushinteger (L, 0);
  740. }
  741. else {
  742. lua_pushinteger (L, part->utf_content.len);
  743. }
  744. return 1;
  745. }
  746. static gint
  747. lua_textpart_get_raw_length (lua_State * L)
  748. {
  749. LUA_TRACE_POINT;
  750. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  751. if (part == NULL) {
  752. lua_pushnil (L);
  753. return 1;
  754. }
  755. lua_pushinteger (L, part->raw.len);
  756. return 1;
  757. }
  758. static gint
  759. lua_textpart_get_urls_length (lua_State * L)
  760. {
  761. LUA_TRACE_POINT;
  762. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  763. GList *cur;
  764. guint total = 0;
  765. struct rspamd_process_exception *ex;
  766. if (part == NULL) {
  767. lua_pushnil (L);
  768. return 1;
  769. }
  770. for (cur = part->exceptions; cur != NULL; cur = g_list_next (cur)) {
  771. ex = cur->data;
  772. if (ex->type == RSPAMD_EXCEPTION_URL) {
  773. total += ex->len;
  774. }
  775. }
  776. lua_pushinteger (L, total);
  777. return 1;
  778. }
  779. static gint
  780. lua_textpart_get_lines_count (lua_State * L)
  781. {
  782. LUA_TRACE_POINT;
  783. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  784. if (part == NULL) {
  785. lua_pushnil (L);
  786. return 1;
  787. }
  788. if (IS_TEXT_PART_EMPTY (part)) {
  789. lua_pushinteger (L, 0);
  790. }
  791. else {
  792. lua_pushinteger (L, part->nlines);
  793. }
  794. return 1;
  795. }
  796. static gint
  797. lua_textpart_get_words_count (lua_State *L)
  798. {
  799. LUA_TRACE_POINT;
  800. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  801. if (part == NULL) {
  802. lua_pushnil (L);
  803. return 1;
  804. }
  805. if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
  806. lua_pushinteger (L, 0);
  807. }
  808. else {
  809. lua_pushinteger (L, part->nwords);
  810. }
  811. return 1;
  812. }
  813. static inline enum rspamd_lua_words_type
  814. word_extract_type_from_string (const gchar *how_str)
  815. {
  816. enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_MAX;
  817. if (strcmp (how_str, "stem") == 0) {
  818. how = RSPAMD_LUA_WORDS_STEM;
  819. }
  820. else if (strcmp (how_str, "norm") == 0) {
  821. how = RSPAMD_LUA_WORDS_NORM;
  822. }
  823. else if (strcmp (how_str, "raw") == 0) {
  824. how = RSPAMD_LUA_WORDS_RAW;
  825. }
  826. else if (strcmp (how_str, "full") == 0) {
  827. how = RSPAMD_LUA_WORDS_FULL;
  828. }
  829. return how;
  830. }
  831. static gint
  832. lua_textpart_get_words (lua_State *L)
  833. {
  834. LUA_TRACE_POINT;
  835. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  836. enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM;
  837. if (part == NULL) {
  838. return luaL_error (L, "invalid arguments");
  839. }
  840. if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
  841. lua_createtable (L, 0, 0);
  842. }
  843. else {
  844. if (lua_type (L, 2) == LUA_TSTRING) {
  845. const gchar *how_str = lua_tostring (L, 2);
  846. how = word_extract_type_from_string (how_str);
  847. if (how == RSPAMD_LUA_WORDS_MAX) {
  848. return luaL_error (L, "invalid extraction type: %s", how_str);
  849. }
  850. }
  851. return rspamd_lua_push_words (L, part->utf_words, how);
  852. }
  853. return 1;
  854. }
  855. static gint
  856. lua_textpart_filter_words (lua_State *L)
  857. {
  858. LUA_TRACE_POINT;
  859. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  860. struct rspamd_lua_regexp *re = lua_check_regexp (L, 2);
  861. gint lim = -1;
  862. enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM;
  863. if (part == NULL || re == NULL) {
  864. return luaL_error (L, "invalid arguments");
  865. }
  866. if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
  867. lua_createtable (L, 0, 0);
  868. }
  869. else {
  870. if (lua_type (L, 3) == LUA_TSTRING) {
  871. const gchar *how_str = lua_tostring (L, 3);
  872. how = word_extract_type_from_string (how_str);
  873. if (how == RSPAMD_LUA_WORDS_MAX) {
  874. return luaL_error (L, "invalid extraction type: %s", how_str);
  875. }
  876. }
  877. if (lua_type (L, 4) == LUA_TNUMBER) {
  878. lim = lua_tointeger (L, 4);
  879. }
  880. guint cnt, i;
  881. lua_createtable (L, 8, 0);
  882. for (i = 0, cnt = 1; i < part->utf_words->len; i ++) {
  883. rspamd_stat_token_t *w = &g_array_index (part->utf_words,
  884. rspamd_stat_token_t, i);
  885. switch (how) {
  886. case RSPAMD_LUA_WORDS_STEM:
  887. if (w->stemmed.len > 0) {
  888. if (rspamd_regexp_match (re->re, w->stemmed.begin,
  889. w->stemmed.len, FALSE)) {
  890. lua_pushlstring (L, w->stemmed.begin, w->stemmed.len);
  891. lua_rawseti (L, -2, cnt++);
  892. }
  893. }
  894. break;
  895. case RSPAMD_LUA_WORDS_NORM:
  896. if (w->normalized.len > 0) {
  897. if (rspamd_regexp_match (re->re, w->normalized.begin,
  898. w->normalized.len, FALSE)) {
  899. lua_pushlstring (L, w->normalized.begin, w->normalized.len);
  900. lua_rawseti (L, -2, cnt++);
  901. }
  902. }
  903. break;
  904. case RSPAMD_LUA_WORDS_RAW:
  905. if (w->original.len > 0) {
  906. if (rspamd_regexp_match (re->re, w->original.begin,
  907. w->original.len, TRUE)) {
  908. lua_pushlstring (L, w->original.begin, w->original.len);
  909. lua_rawseti (L, -2, cnt++);
  910. }
  911. }
  912. break;
  913. case RSPAMD_LUA_WORDS_FULL:
  914. if (rspamd_regexp_match (re->re, w->normalized.begin,
  915. w->normalized.len, FALSE)) {
  916. rspamd_lua_push_full_word (L, w);
  917. /* Push to the resulting vector */
  918. lua_rawseti (L, -2, cnt++);
  919. }
  920. break;
  921. default:
  922. break;
  923. }
  924. if (lim > 0 && cnt >= lim) {
  925. break;
  926. }
  927. }
  928. }
  929. return 1;
  930. }
  931. static gint
  932. lua_textpart_is_empty (lua_State * L)
  933. {
  934. LUA_TRACE_POINT;
  935. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  936. if (part == NULL) {
  937. lua_pushnil (L);
  938. return 1;
  939. }
  940. lua_pushboolean (L, IS_TEXT_PART_EMPTY (part));
  941. return 1;
  942. }
  943. static gint
  944. lua_textpart_is_html (lua_State * L)
  945. {
  946. LUA_TRACE_POINT;
  947. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  948. if (part == NULL) {
  949. lua_pushnil (L);
  950. return 1;
  951. }
  952. lua_pushboolean (L, IS_TEXT_PART_HTML (part));
  953. return 1;
  954. }
  955. static gint
  956. lua_textpart_get_html (lua_State * L)
  957. {
  958. LUA_TRACE_POINT;
  959. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  960. struct html_content **phc;
  961. if (part == NULL || part->html == NULL) {
  962. lua_pushnil (L);
  963. }
  964. else {
  965. phc = lua_newuserdata (L, sizeof (*phc));
  966. rspamd_lua_setclass (L, "rspamd{html}", -1);
  967. *phc = part->html;
  968. }
  969. return 1;
  970. }
  971. static gint
  972. lua_textpart_get_language (lua_State * L)
  973. {
  974. LUA_TRACE_POINT;
  975. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  976. if (part != NULL) {
  977. if (part->language != NULL && part->language[0] != '\0') {
  978. lua_pushstring (L, part->language);
  979. return 1;
  980. }
  981. else {
  982. lua_pushnil (L);
  983. }
  984. }
  985. else {
  986. return luaL_error (L, "invalid arguments");
  987. }
  988. return 1;
  989. }
  990. static gint
  991. lua_textpart_get_charset (lua_State * L)
  992. {
  993. LUA_TRACE_POINT;
  994. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  995. if (part != NULL) {
  996. if (part->real_charset != NULL) {
  997. lua_pushstring (L, part->real_charset);
  998. return 1;
  999. }
  1000. else {
  1001. lua_pushnil (L);
  1002. }
  1003. }
  1004. else {
  1005. return luaL_error (L, "invalid arguments");
  1006. }
  1007. return 1;
  1008. }
  1009. static gint
  1010. lua_textpart_get_languages (lua_State * L)
  1011. {
  1012. LUA_TRACE_POINT;
  1013. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  1014. guint i;
  1015. struct rspamd_lang_detector_res *cur;
  1016. if (part != NULL) {
  1017. if (part->languages != NULL) {
  1018. lua_createtable (L, part->languages->len, 0);
  1019. PTR_ARRAY_FOREACH (part->languages, i, cur) {
  1020. lua_createtable (L, 0, 2);
  1021. lua_pushstring (L, "code");
  1022. lua_pushstring (L, cur->lang);
  1023. lua_settable (L, -3);
  1024. lua_pushstring (L, "prob");
  1025. lua_pushnumber (L, cur->prob);
  1026. lua_settable (L, -3);
  1027. lua_rawseti (L, -2, i + 1);
  1028. }
  1029. }
  1030. else {
  1031. lua_newtable (L);
  1032. }
  1033. }
  1034. else {
  1035. luaL_error (L, "invalid arguments");
  1036. }
  1037. return 1;
  1038. }
  1039. struct lua_shingle_data {
  1040. guint64 hash;
  1041. rspamd_ftok_t t1;
  1042. rspamd_ftok_t t2;
  1043. rspamd_ftok_t t3;
  1044. };
  1045. struct lua_shingle_filter_cbdata {
  1046. struct rspamd_mime_text_part *part;
  1047. rspamd_mempool_t *pool;
  1048. };
  1049. #define STORE_TOKEN(i, t) do { \
  1050. if ((i) < part->utf_words->len) { \
  1051. word = &g_array_index (part->utf_words, rspamd_stat_token_t, (i)); \
  1052. sd->t.begin = word->stemmed.begin; \
  1053. sd->t.len = word->stemmed.len; \
  1054. } \
  1055. }while (0)
  1056. static guint64
  1057. lua_shingles_filter (guint64 *input, gsize count,
  1058. gint shno, const guchar *key, gpointer ud)
  1059. {
  1060. guint64 minimal = G_MAXUINT64;
  1061. gsize i, min_idx = 0;
  1062. struct lua_shingle_data *sd;
  1063. rspamd_stat_token_t *word;
  1064. struct lua_shingle_filter_cbdata *cbd = (struct lua_shingle_filter_cbdata *)ud;
  1065. struct rspamd_mime_text_part *part;
  1066. part = cbd->part;
  1067. for (i = 0; i < count; i ++) {
  1068. if (minimal > input[i]) {
  1069. minimal = input[i];
  1070. min_idx = i;
  1071. }
  1072. }
  1073. sd = rspamd_mempool_alloc0 (cbd->pool, sizeof (*sd));
  1074. sd->hash = minimal;
  1075. STORE_TOKEN (min_idx, t1);
  1076. STORE_TOKEN (min_idx + 1, t2);
  1077. STORE_TOKEN (min_idx + 2, t3);
  1078. return GPOINTER_TO_SIZE (sd);
  1079. }
  1080. #undef STORE_TOKEN
  1081. static gint
  1082. lua_textpart_get_fuzzy_hashes (lua_State * L)
  1083. {
  1084. LUA_TRACE_POINT;
  1085. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  1086. rspamd_mempool_t *pool = rspamd_lua_check_mempool (L, 2);
  1087. guchar key[rspamd_cryptobox_HASHBYTES], digest[rspamd_cryptobox_HASHBYTES],
  1088. hexdigest[rspamd_cryptobox_HASHBYTES * 2 + 1], numbuf[64];
  1089. struct rspamd_shingle *sgl;
  1090. guint i;
  1091. struct lua_shingle_data *sd;
  1092. rspamd_cryptobox_hash_state_t st;
  1093. rspamd_stat_token_t *word;
  1094. struct lua_shingle_filter_cbdata cbd;
  1095. if (part == NULL || pool == NULL) {
  1096. return luaL_error (L, "invalid arguments");
  1097. }
  1098. if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
  1099. lua_pushnil (L);
  1100. lua_pushnil (L);
  1101. }
  1102. else {
  1103. /* TODO: add keys and algorithms support */
  1104. rspamd_cryptobox_hash (key, "rspamd", strlen ("rspamd"), NULL, 0);
  1105. /* TODO: add short text support */
  1106. /* Calculate direct hash */
  1107. rspamd_cryptobox_hash_init (&st, key, rspamd_cryptobox_HASHKEYBYTES);
  1108. for (i = 0; i < part->utf_words->len; i ++) {
  1109. word = &g_array_index (part->utf_words, rspamd_stat_token_t, i);
  1110. rspamd_cryptobox_hash_update (&st,
  1111. word->stemmed.begin, word->stemmed.len);
  1112. }
  1113. rspamd_cryptobox_hash_final (&st, digest);
  1114. rspamd_encode_hex_buf (digest, sizeof (digest), hexdigest,
  1115. sizeof (hexdigest));
  1116. lua_pushlstring (L, hexdigest, sizeof (hexdigest) - 1);
  1117. cbd.pool = pool;
  1118. cbd.part = part;
  1119. sgl = rspamd_shingles_from_text (part->utf_words, key,
  1120. pool, lua_shingles_filter, &cbd, RSPAMD_SHINGLES_MUMHASH);
  1121. if (sgl == NULL) {
  1122. lua_pushnil (L);
  1123. }
  1124. else {
  1125. lua_createtable (L, G_N_ELEMENTS (sgl->hashes), 0);
  1126. for (i = 0; i < G_N_ELEMENTS (sgl->hashes); i ++) {
  1127. sd = GSIZE_TO_POINTER (sgl->hashes[i]);
  1128. lua_createtable (L, 4, 0);
  1129. rspamd_snprintf (numbuf, sizeof (numbuf), "%uL", sd->hash);
  1130. lua_pushstring (L, numbuf);
  1131. lua_rawseti (L, -2, 1);
  1132. /* Tokens */
  1133. lua_pushlstring (L, sd->t1.begin, sd->t1.len);
  1134. lua_rawseti (L, -2, 2);
  1135. lua_pushlstring (L, sd->t2.begin, sd->t2.len);
  1136. lua_rawseti (L, -2, 3);
  1137. lua_pushlstring (L, sd->t3.begin, sd->t3.len);
  1138. lua_rawseti (L, -2, 4);
  1139. lua_rawseti (L, -2, i + 1); /* Store table */
  1140. }
  1141. }
  1142. }
  1143. return 2;
  1144. }
  1145. static gint
  1146. lua_textpart_get_mimepart (lua_State * L)
  1147. {
  1148. LUA_TRACE_POINT;
  1149. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  1150. struct rspamd_mime_part **pmime;
  1151. if (part != NULL) {
  1152. if (part->mime_part != NULL) {
  1153. pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *));
  1154. rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
  1155. *pmime = part->mime_part;
  1156. return 1;
  1157. }
  1158. }
  1159. lua_pushnil (L);
  1160. return 1;
  1161. }
  1162. /***
  1163. * @method mime_part:get_stats()
  1164. * Returns a table with the following data:
  1165. * -
  1166. * - `lines`: number of lines
  1167. * - `spaces`: number of spaces
  1168. * - `double_spaces`: double spaces
  1169. * - `empty_lines`: number of empty lines
  1170. * - `non_ascii_characters`: number of non ascii characters
  1171. * - `ascii_characters`: number of ascii characters
  1172. * @return {table} table of stats
  1173. */
  1174. static gint
  1175. lua_textpart_get_stats (lua_State * L)
  1176. {
  1177. LUA_TRACE_POINT;
  1178. struct rspamd_mime_text_part *part = lua_check_textpart (L);
  1179. if (part != NULL) {
  1180. lua_createtable (L, 0, 9);
  1181. lua_pushstring (L, "lines");
  1182. lua_pushinteger (L, part->nlines);
  1183. lua_settable (L, -3);
  1184. lua_pushstring (L, "empty_lines");
  1185. lua_pushinteger (L, part->empty_lines);
  1186. lua_settable (L, -3);
  1187. lua_pushstring (L, "spaces");
  1188. lua_pushinteger (L, part->spaces);
  1189. lua_settable (L, -3);
  1190. lua_pushstring (L, "non_spaces");
  1191. lua_pushinteger (L, part->non_spaces);
  1192. lua_settable (L, -3);
  1193. lua_pushstring (L, "double_spaces");
  1194. lua_pushinteger (L, part->double_spaces);
  1195. lua_settable (L, -3);
  1196. lua_pushstring (L, "ascii_characters");
  1197. lua_pushinteger (L, part->ascii_chars);
  1198. lua_settable (L, -3);
  1199. lua_pushstring (L, "non_ascii_characters");
  1200. lua_pushinteger (L, part->non_ascii_chars);
  1201. lua_settable (L, -3);
  1202. lua_pushstring (L, "capital_letters");
  1203. lua_pushinteger (L, part->capital_letters);
  1204. lua_settable (L, -3);
  1205. lua_pushstring (L, "numeric_characters");
  1206. lua_pushinteger (L, part->numeric_characters);
  1207. lua_settable (L, -3);
  1208. }
  1209. else {
  1210. return luaL_error (L, "invalid arguments");
  1211. }
  1212. return 1;
  1213. }
  1214. /* Mimepart implementation */
  1215. static gint
  1216. lua_mimepart_get_content (lua_State * L)
  1217. {
  1218. LUA_TRACE_POINT;
  1219. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1220. struct rspamd_lua_text *t;
  1221. if (part == NULL) {
  1222. lua_pushnil (L);
  1223. return 1;
  1224. }
  1225. t = lua_newuserdata (L, sizeof (*t));
  1226. rspamd_lua_setclass (L, "rspamd{text}", -1);
  1227. t->start = part->parsed_data.begin;
  1228. t->len = part->parsed_data.len;
  1229. t->flags = 0;
  1230. return 1;
  1231. }
  1232. static gint
  1233. lua_mimepart_get_raw_content (lua_State * L)
  1234. {
  1235. LUA_TRACE_POINT;
  1236. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1237. struct rspamd_lua_text *t;
  1238. if (part == NULL) {
  1239. lua_pushnil (L);
  1240. return 1;
  1241. }
  1242. t = lua_newuserdata (L, sizeof (*t));
  1243. rspamd_lua_setclass (L, "rspamd{text}", -1);
  1244. t->start = part->raw_data.begin;
  1245. t->len = part->raw_data.len;
  1246. t->flags = 0;
  1247. return 1;
  1248. }
  1249. static gint
  1250. lua_mimepart_get_length (lua_State * L)
  1251. {
  1252. LUA_TRACE_POINT;
  1253. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1254. if (part == NULL) {
  1255. lua_pushnil (L);
  1256. return 1;
  1257. }
  1258. lua_pushinteger (L, part->parsed_data.len);
  1259. return 1;
  1260. }
  1261. static gint
  1262. lua_mimepart_get_type_common (lua_State * L, struct rspamd_content_type *ct,
  1263. gboolean full)
  1264. {
  1265. GHashTableIter it;
  1266. gpointer k, v;
  1267. struct rspamd_content_type_param *param;
  1268. if (ct == NULL) {
  1269. lua_pushnil (L);
  1270. lua_pushnil (L);
  1271. return 2;
  1272. }
  1273. lua_pushlstring (L, ct->type.begin, ct->type.len);
  1274. lua_pushlstring (L, ct->subtype.begin, ct->subtype.len);
  1275. if (!full) {
  1276. return 2;
  1277. }
  1278. lua_createtable (L, 0, 2 + (ct->attrs ?
  1279. g_hash_table_size (ct->attrs) : 0));
  1280. if (ct->charset.len > 0) {
  1281. lua_pushstring (L, "charset");
  1282. lua_pushlstring (L, ct->charset.begin, ct->charset.len);
  1283. lua_settable (L, -3);
  1284. }
  1285. if (ct->boundary.len > 0) {
  1286. lua_pushstring (L, "boundary");
  1287. lua_pushlstring (L, ct->boundary.begin, ct->boundary.len);
  1288. lua_settable (L, -3);
  1289. }
  1290. if (ct->attrs) {
  1291. g_hash_table_iter_init (&it, ct->attrs);
  1292. while (g_hash_table_iter_next (&it, &k, &v)) {
  1293. param = v;
  1294. if (param->name.len > 0 && param->value.len > 0) {
  1295. /* TODO: think about multiple values here */
  1296. lua_pushlstring (L, param->name.begin, param->name.len);
  1297. lua_pushlstring (L, param->value.begin, param->value.len);
  1298. lua_settable (L, -3);
  1299. }
  1300. }
  1301. }
  1302. return 3;
  1303. }
  1304. static gint
  1305. lua_mimepart_get_type (lua_State * L)
  1306. {
  1307. LUA_TRACE_POINT;
  1308. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1309. if (part == NULL) {
  1310. return luaL_error (L, "invalid arguments");
  1311. }
  1312. return lua_mimepart_get_type_common (L, part->ct, FALSE);
  1313. }
  1314. static gint
  1315. lua_mimepart_get_type_full (lua_State * L)
  1316. {
  1317. LUA_TRACE_POINT;
  1318. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1319. if (part == NULL) {
  1320. return luaL_error (L, "invalid arguments");
  1321. }
  1322. return lua_mimepart_get_type_common (L, part->ct, TRUE);
  1323. }
  1324. static gint
  1325. lua_mimepart_get_detected_type (lua_State * L)
  1326. {
  1327. LUA_TRACE_POINT;
  1328. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1329. if (part == NULL) {
  1330. return luaL_error (L, "invalid arguments");
  1331. }
  1332. return lua_mimepart_get_type_common (L, part->detected_ct, FALSE);
  1333. }
  1334. static gint
  1335. lua_mimepart_get_detected_type_full (lua_State * L)
  1336. {
  1337. LUA_TRACE_POINT;
  1338. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1339. if (part == NULL) {
  1340. return luaL_error (L, "invalid arguments");
  1341. }
  1342. return lua_mimepart_get_type_common (L, part->detected_ct, TRUE);
  1343. }
  1344. static gint
  1345. lua_mimepart_get_detected_ext (lua_State * L)
  1346. {
  1347. LUA_TRACE_POINT;
  1348. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1349. if (part == NULL) {
  1350. return luaL_error (L, "invalid arguments");
  1351. }
  1352. if (part->detected_ext) {
  1353. lua_pushstring (L, part->detected_ext);
  1354. }
  1355. else {
  1356. lua_pushnil (L);
  1357. }
  1358. return 1;
  1359. }
  1360. static gint
  1361. lua_mimepart_get_cte (lua_State * L)
  1362. {
  1363. LUA_TRACE_POINT;
  1364. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1365. if (part == NULL) {
  1366. lua_pushnil (L);
  1367. return 1;
  1368. }
  1369. lua_pushstring (L, rspamd_cte_to_string (part->cte));
  1370. return 1;
  1371. }
  1372. static gint
  1373. lua_mimepart_get_filename (lua_State * L)
  1374. {
  1375. LUA_TRACE_POINT;
  1376. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1377. if (part == NULL || part->cd == NULL || part->cd->filename.len == 0) {
  1378. lua_pushnil (L);
  1379. return 1;
  1380. }
  1381. lua_pushlstring (L, part->cd->filename.begin, part->cd->filename.len);
  1382. return 1;
  1383. }
  1384. static gint
  1385. lua_mimepart_get_boundary (lua_State * L)
  1386. {
  1387. LUA_TRACE_POINT;
  1388. struct rspamd_mime_part *part = lua_check_mimepart (L), *parent;
  1389. if (part == NULL) {
  1390. return luaL_error (L, "invalid arguments");
  1391. }
  1392. if (IS_PART_MULTIPART (part)) {
  1393. lua_pushlstring (L, part->specific.mp->boundary.begin,
  1394. part->specific.mp->boundary.len);
  1395. }
  1396. else {
  1397. parent = part->parent_part;
  1398. if (!parent || !IS_PART_MULTIPART (parent)) {
  1399. lua_pushnil (L);
  1400. }
  1401. else {
  1402. lua_pushlstring (L, parent->specific.mp->boundary.begin,
  1403. parent->specific.mp->boundary.len);
  1404. }
  1405. }
  1406. return 1;
  1407. }
  1408. static gint
  1409. lua_mimepart_get_header_common (lua_State *L, enum rspamd_lua_task_header_type how)
  1410. {
  1411. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1412. const gchar *name;
  1413. gboolean strong = FALSE;
  1414. name = luaL_checkstring (L, 2);
  1415. if (name && part) {
  1416. if (lua_isboolean (L, 3)) {
  1417. strong = lua_toboolean (L, 3);
  1418. }
  1419. return rspamd_lua_push_header_array (L,
  1420. name,
  1421. rspamd_message_get_header_from_hash(part->raw_headers, name, FALSE),
  1422. how,
  1423. strong);
  1424. }
  1425. lua_pushnil (L);
  1426. return 1;
  1427. }
  1428. static gint
  1429. lua_mimepart_get_header_full (lua_State * L)
  1430. {
  1431. LUA_TRACE_POINT;
  1432. return lua_mimepart_get_header_common (L, RSPAMD_TASK_HEADER_PUSH_FULL);
  1433. }
  1434. static gint
  1435. lua_mimepart_get_header (lua_State * L)
  1436. {
  1437. LUA_TRACE_POINT;
  1438. return lua_mimepart_get_header_common (L, RSPAMD_TASK_HEADER_PUSH_SIMPLE);
  1439. }
  1440. static gint
  1441. lua_mimepart_get_header_raw (lua_State * L)
  1442. {
  1443. LUA_TRACE_POINT;
  1444. return lua_mimepart_get_header_common (L, RSPAMD_TASK_HEADER_PUSH_RAW);
  1445. }
  1446. static gint
  1447. lua_mimepart_get_header_count (lua_State * L)
  1448. {
  1449. LUA_TRACE_POINT;
  1450. return lua_mimepart_get_header_common (L, RSPAMD_TASK_HEADER_PUSH_COUNT);
  1451. }
  1452. static gint
  1453. lua_mimepart_get_raw_headers (lua_State *L)
  1454. {
  1455. LUA_TRACE_POINT;
  1456. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1457. struct rspamd_lua_text *t;
  1458. if (part) {
  1459. t = lua_newuserdata (L, sizeof (*t));
  1460. rspamd_lua_setclass (L, "rspamd{text}", -1);
  1461. t->start = part->raw_headers_str;
  1462. t->len = part->raw_headers_len;
  1463. t->flags = 0;
  1464. }
  1465. else {
  1466. return luaL_error (L, "invalid arguments");
  1467. }
  1468. return 1;
  1469. }
  1470. static gint
  1471. lua_mimepart_is_image (lua_State * L)
  1472. {
  1473. LUA_TRACE_POINT;
  1474. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1475. if (part == NULL) {
  1476. return luaL_error (L, "invalid arguments");
  1477. }
  1478. lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_IMAGE);
  1479. return 1;
  1480. }
  1481. static gint
  1482. lua_mimepart_is_archive (lua_State * L)
  1483. {
  1484. LUA_TRACE_POINT;
  1485. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1486. if (part == NULL) {
  1487. return luaL_error (L, "invalid arguments");
  1488. }
  1489. lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_ARCHIVE);
  1490. return 1;
  1491. }
  1492. static gint
  1493. lua_mimepart_is_multipart (lua_State * L)
  1494. {
  1495. LUA_TRACE_POINT;
  1496. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1497. if (part == NULL) {
  1498. return luaL_error (L, "invalid arguments");
  1499. }
  1500. lua_pushboolean (L, IS_PART_MULTIPART (part) ? true : false);
  1501. return 1;
  1502. }
  1503. static gint
  1504. lua_mimepart_is_message (lua_State * L)
  1505. {
  1506. LUA_TRACE_POINT;
  1507. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1508. if (part == NULL) {
  1509. return luaL_error (L, "invalid arguments");
  1510. }
  1511. lua_pushboolean (L, IS_PART_MESSAGE (part) ? true : false);
  1512. return 1;
  1513. }
  1514. static gint
  1515. lua_mimepart_is_attachment (lua_State * L)
  1516. {
  1517. LUA_TRACE_POINT;
  1518. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1519. if (part == NULL) {
  1520. return luaL_error (L, "invalid arguments");
  1521. }
  1522. if (part->cd && part->cd->type == RSPAMD_CT_ATTACHMENT) {
  1523. lua_pushboolean (L, true);
  1524. }
  1525. else {
  1526. /* if has_name and not (image and Content-ID_header_present) */
  1527. if (part->cd && part->cd->filename.len > 0) {
  1528. if (part->part_type != RSPAMD_MIME_PART_IMAGE &&
  1529. rspamd_message_get_header_from_hash(part->raw_headers,
  1530. "Content-Id", FALSE) == NULL) {
  1531. /* Filename is presented but no content id and not image */
  1532. lua_pushboolean (L, true);
  1533. }
  1534. else {
  1535. /* Image or an embeded object */
  1536. lua_pushboolean (L, false);
  1537. }
  1538. }
  1539. else {
  1540. /* No filename */
  1541. lua_pushboolean (L, false);
  1542. }
  1543. }
  1544. return 1;
  1545. }
  1546. static gint
  1547. lua_mimepart_is_text (lua_State * L)
  1548. {
  1549. LUA_TRACE_POINT;
  1550. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1551. if (part == NULL) {
  1552. return luaL_error (L, "invalid arguments");
  1553. }
  1554. lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_TEXT);
  1555. return 1;
  1556. }
  1557. static gint
  1558. lua_mimepart_is_broken (lua_State * L)
  1559. {
  1560. LUA_TRACE_POINT;
  1561. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1562. if (part == NULL) {
  1563. return luaL_error (L, "invalid arguments");
  1564. }
  1565. if (part->ct) {
  1566. lua_pushboolean (L, (part->ct->flags & RSPAMD_CONTENT_TYPE_BROKEN) ?
  1567. true : false);
  1568. }
  1569. else {
  1570. lua_pushboolean (L, false);
  1571. }
  1572. return 1;
  1573. }
  1574. static gint
  1575. lua_mimepart_get_image (lua_State * L)
  1576. {
  1577. LUA_TRACE_POINT;
  1578. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1579. struct rspamd_image **pimg;
  1580. if (part == NULL) {
  1581. return luaL_error (L, "invalid arguments");
  1582. }
  1583. if (part->part_type != RSPAMD_MIME_PART_IMAGE || part->specific.img == NULL) {
  1584. lua_pushnil (L);
  1585. }
  1586. else {
  1587. pimg = lua_newuserdata (L, sizeof (*pimg));
  1588. *pimg = part->specific.img;
  1589. rspamd_lua_setclass (L, "rspamd{image}", -1);
  1590. }
  1591. return 1;
  1592. }
  1593. static gint
  1594. lua_mimepart_get_archive (lua_State * L)
  1595. {
  1596. LUA_TRACE_POINT;
  1597. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1598. struct rspamd_archive **parch;
  1599. if (part == NULL) {
  1600. return luaL_error (L, "invalid arguments");
  1601. }
  1602. if (part->part_type != RSPAMD_MIME_PART_ARCHIVE || part->specific.arch == NULL) {
  1603. lua_pushnil (L);
  1604. }
  1605. else {
  1606. parch = lua_newuserdata (L, sizeof (*parch));
  1607. *parch = part->specific.arch;
  1608. rspamd_lua_setclass (L, "rspamd{archive}", -1);
  1609. }
  1610. return 1;
  1611. }
  1612. static gint
  1613. lua_mimepart_get_children (lua_State * L)
  1614. {
  1615. LUA_TRACE_POINT;
  1616. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1617. struct rspamd_mime_part **pcur, *cur;
  1618. guint i;
  1619. if (part == NULL) {
  1620. return luaL_error (L, "invalid arguments");
  1621. }
  1622. if (!IS_PART_MULTIPART (part) || part->specific.mp->children == NULL) {
  1623. lua_pushnil (L);
  1624. }
  1625. else {
  1626. lua_createtable (L, part->specific.mp->children->len, 0);
  1627. PTR_ARRAY_FOREACH (part->specific.mp->children, i, cur) {
  1628. pcur = lua_newuserdata (L, sizeof (*pcur));
  1629. *pcur = cur;
  1630. rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
  1631. lua_rawseti (L, -2, i + 1);
  1632. }
  1633. }
  1634. return 1;
  1635. }
  1636. static gint
  1637. lua_mimepart_get_parent (lua_State * L)
  1638. {
  1639. LUA_TRACE_POINT;
  1640. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1641. struct rspamd_mime_part **pparent;
  1642. if (part == NULL) {
  1643. return luaL_error (L, "invalid arguments");
  1644. }
  1645. if (part->parent_part) {
  1646. pparent = lua_newuserdata (L, sizeof (*pparent));
  1647. *pparent = part->parent_part;
  1648. rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
  1649. }
  1650. else {
  1651. lua_pushnil (L);
  1652. }
  1653. return 1;
  1654. }
  1655. static gint
  1656. lua_mimepart_get_text (lua_State * L)
  1657. {
  1658. LUA_TRACE_POINT;
  1659. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1660. struct rspamd_mime_text_part **ppart;
  1661. if (part == NULL) {
  1662. return luaL_error (L, "invalid arguments");
  1663. }
  1664. if (part->part_type != RSPAMD_MIME_PART_TEXT || part->specific.txt == NULL) {
  1665. lua_pushnil (L);
  1666. }
  1667. else {
  1668. ppart = lua_newuserdata (L, sizeof (*ppart));
  1669. *ppart = part->specific.txt;
  1670. rspamd_lua_setclass (L, "rspamd{textpart}", -1);
  1671. }
  1672. return 1;
  1673. }
  1674. static gint
  1675. lua_mimepart_get_digest (lua_State * L)
  1676. {
  1677. LUA_TRACE_POINT;
  1678. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1679. gchar digestbuf[rspamd_cryptobox_HASHBYTES * 2 + 1];
  1680. if (part == NULL) {
  1681. return luaL_error (L, "invalid arguments");
  1682. }
  1683. memset (digestbuf, 0, sizeof (digestbuf));
  1684. rspamd_encode_hex_buf (part->digest, sizeof (part->digest),
  1685. digestbuf, sizeof (digestbuf));
  1686. lua_pushstring (L, digestbuf);
  1687. return 1;
  1688. }
  1689. static gint
  1690. lua_mimepart_get_id (lua_State * L)
  1691. {
  1692. LUA_TRACE_POINT;
  1693. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1694. if (part == NULL) {
  1695. return luaL_error (L, "invalid arguments");
  1696. }
  1697. lua_pushinteger (L, part->part_number);
  1698. return 1;
  1699. }
  1700. static gint
  1701. lua_mimepart_headers_foreach (lua_State *L)
  1702. {
  1703. LUA_TRACE_POINT;
  1704. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1705. enum rspamd_lua_task_header_type how = RSPAMD_TASK_HEADER_PUSH_SIMPLE;
  1706. struct rspamd_lua_regexp *re = NULL;
  1707. struct rspamd_mime_header *hdr, *cur;
  1708. gint old_top;
  1709. if (part && lua_isfunction (L, 2)) {
  1710. if (lua_istable (L, 3)) {
  1711. lua_pushstring (L, "full");
  1712. lua_gettable (L, 3);
  1713. if (lua_isboolean (L, -1) && lua_toboolean (L, -1)) {
  1714. how = RSPAMD_TASK_HEADER_PUSH_FULL;
  1715. }
  1716. lua_pop (L, 1);
  1717. lua_pushstring (L, "raw");
  1718. lua_gettable (L, 3);
  1719. if (lua_isboolean (L, -1) && lua_toboolean (L, -1)) {
  1720. how = RSPAMD_TASK_HEADER_PUSH_RAW;
  1721. }
  1722. lua_pop (L, 1);
  1723. lua_pushstring (L, "regexp");
  1724. lua_gettable (L, 3);
  1725. if (lua_isuserdata (L, -1)) {
  1726. re = *(struct rspamd_lua_regexp **)
  1727. rspamd_lua_check_udata (L, -1, "rspamd{regexp}");
  1728. }
  1729. lua_pop (L, 1);
  1730. }
  1731. if (part->headers_order) {
  1732. hdr = part->headers_order;
  1733. LL_FOREACH2 (hdr, cur, ord_next) {
  1734. if (re && re->re) {
  1735. if (!rspamd_regexp_match (re->re, cur->name,
  1736. strlen (cur->name),FALSE)) {
  1737. continue;
  1738. }
  1739. }
  1740. old_top = lua_gettop (L);
  1741. lua_pushvalue (L, 2);
  1742. lua_pushstring (L, cur->name);
  1743. rspamd_lua_push_header (L, cur, how);
  1744. if (lua_pcall (L, 2, LUA_MULTRET, 0) != 0) {
  1745. msg_err ("call to header_foreach failed: %s",
  1746. lua_tostring (L, -1));
  1747. lua_settop (L, old_top);
  1748. break;
  1749. }
  1750. else {
  1751. if (lua_gettop (L) > old_top) {
  1752. if (lua_isboolean (L, old_top + 1)) {
  1753. if (lua_toboolean (L, old_top + 1)) {
  1754. lua_settop (L, old_top);
  1755. break;
  1756. }
  1757. }
  1758. }
  1759. }
  1760. lua_settop (L, old_top);
  1761. }
  1762. }
  1763. }
  1764. return 0;
  1765. }
  1766. static gint
  1767. lua_mimepart_get_specific (lua_State * L)
  1768. {
  1769. LUA_TRACE_POINT;
  1770. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1771. if (part == NULL) {
  1772. return luaL_error (L, "invalid arguments");
  1773. }
  1774. if (part->part_type != RSPAMD_MIME_PART_CUSTOM_LUA) {
  1775. lua_pushnil (L);
  1776. }
  1777. else {
  1778. lua_rawgeti (L, LUA_REGISTRYINDEX, part->specific.lua_specific.cbref);
  1779. }
  1780. return 1;
  1781. }
  1782. static gint
  1783. lua_mimepart_get_urls (lua_State * L)
  1784. {
  1785. LUA_TRACE_POINT;
  1786. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1787. if (part == NULL) {
  1788. return luaL_error (L, "invalid arguments");
  1789. }
  1790. struct lua_tree_cb_data cb;
  1791. struct rspamd_url *u;
  1792. static const gint default_protocols_mask = PROTOCOL_HTTP|PROTOCOL_HTTPS|
  1793. PROTOCOL_FILE|PROTOCOL_FTP;
  1794. gsize sz, max_urls = 0, i;
  1795. if (part->urls == NULL) {
  1796. lua_newtable (L);
  1797. return 1;
  1798. }
  1799. if (!lua_url_cbdata_fill (L, 2, &cb, default_protocols_mask,
  1800. ~(0), max_urls)) {
  1801. return luaL_error (L, "invalid arguments");
  1802. }
  1803. sz = part->urls->len;
  1804. lua_createtable (L, sz, 0);
  1805. PTR_ARRAY_FOREACH (part->urls, i, u) {
  1806. lua_tree_url_callback (u, u, &cb);
  1807. }
  1808. lua_url_cbdata_dtor (&cb);
  1809. return 1;
  1810. }
  1811. static gint
  1812. lua_mimepart_is_specific (lua_State * L)
  1813. {
  1814. LUA_TRACE_POINT;
  1815. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1816. if (part == NULL) {
  1817. return luaL_error (L, "invalid arguments");
  1818. }
  1819. lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA);
  1820. return 1;
  1821. }
  1822. static gint
  1823. lua_mimepart_set_specific (lua_State * L)
  1824. {
  1825. LUA_TRACE_POINT;
  1826. struct rspamd_mime_part *part = lua_check_mimepart (L);
  1827. if (part == NULL || lua_isnil (L, 2)) {
  1828. return luaL_error (L, "invalid arguments");
  1829. }
  1830. if (part->part_type != RSPAMD_MIME_PART_UNDEFINED &&
  1831. part->part_type != RSPAMD_MIME_PART_CUSTOM_LUA) {
  1832. return luaL_error (L,
  1833. "internal error: trying to set specific lua content on part of type %d",
  1834. part->part_type);
  1835. }
  1836. if (part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA) {
  1837. /* Push old specific data */
  1838. lua_rawgeti (L, LUA_REGISTRYINDEX, part->specific.lua_specific.cbref);
  1839. luaL_unref (L, LUA_REGISTRYINDEX, part->specific.lua_specific.cbref);
  1840. }
  1841. else {
  1842. part->part_type = RSPAMD_MIME_PART_CUSTOM_LUA;
  1843. lua_pushnil (L);
  1844. }
  1845. /* Now, we push argument on the position 2 and save its reference */
  1846. lua_pushvalue (L, 2);
  1847. part->specific.lua_specific.cbref = luaL_ref (L, LUA_REGISTRYINDEX);
  1848. /* Now stack has just a return value as luaL_ref removes value from stack */
  1849. gint ltype = lua_type (L, 2);
  1850. switch (ltype) {
  1851. case LUA_TTABLE:
  1852. part->specific.lua_specific.type = RSPAMD_LUA_PART_TABLE;
  1853. break;
  1854. case LUA_TSTRING:
  1855. part->specific.lua_specific.type = RSPAMD_LUA_PART_STRING;
  1856. break;
  1857. case LUA_TUSERDATA:
  1858. if (rspamd_lua_check_udata_maybe (L, 2, "rspamd{text}")) {
  1859. part->specific.lua_specific.type = RSPAMD_LUA_PART_TEXT;
  1860. }
  1861. else {
  1862. part->specific.lua_specific.type = RSPAMD_LUA_PART_UNKNOWN;
  1863. }
  1864. break;
  1865. case LUA_TFUNCTION:
  1866. part->specific.lua_specific.type = RSPAMD_LUA_PART_FUNCTION;
  1867. break;
  1868. default:
  1869. part->specific.lua_specific.type = RSPAMD_LUA_PART_UNKNOWN;
  1870. break;
  1871. }
  1872. return 1;
  1873. }
  1874. void
  1875. luaopen_textpart (lua_State * L)
  1876. {
  1877. rspamd_lua_new_class (L, "rspamd{textpart}", textpartlib_m);
  1878. lua_pop (L, 1);
  1879. }
  1880. void
  1881. luaopen_mimepart (lua_State * L)
  1882. {
  1883. rspamd_lua_new_class (L, "rspamd{mimepart}", mimepartlib_m);
  1884. lua_pop (L, 1);
  1885. }