You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_mimepart.c 56KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "lua_common.h"
  17. #include "lua_url.h"
  18. #include "libmime/message.h"
  19. #include "libmime/lang_detection.h"
  20. #include "libstat/stat_api.h"
  21. #include "libcryptobox/cryptobox.h"
  22. #include "libutil/shingles.h"
  23. #include "contrib/uthash/utlist.h"
  24. /* Textpart methods */
  25. /***
  26. * @module rspamd_textpart
  27. * This module provides different methods to manipulate text parts data. Text parts
  28. * could be obtained from the `rspamd_task` by using of method `task:get_text_parts()`
  29. @example
  30. rspamd_config.R_EMPTY_IMAGE = function (task)
  31. parts = task:get_text_parts()
  32. if parts then
  33. for _,part in ipairs(parts) do
  34. if part:is_empty() then
  35. texts = task:get_texts()
  36. if texts then
  37. return true
  38. end
  39. return false
  40. end
  41. end
  42. end
  43. return false
  44. end
  45. */
  46. /***
  47. * @method text_part:is_utf()
  48. * Return TRUE if part is a valid utf text
  49. * @return {boolean} true if part is valid `UTF8` part
  50. */
  51. LUA_FUNCTION_DEF(textpart, is_utf);
  52. /***
  53. * @method text_part:has_8bit_raw()
  54. * Return TRUE if a part has raw 8bit characters
  55. * @return {boolean} true if a part has raw 8bit characters
  56. */
  57. LUA_FUNCTION_DEF(textpart, has_8bit_raw);
  58. /***
  59. * @method text_part:has_8bit()
  60. * Return TRUE if a part has raw 8bit characters
  61. * @return {boolean} true if a part has encoded 8bit characters
  62. */
  63. LUA_FUNCTION_DEF(textpart, has_8bit);
  64. /***
  65. * @method text_part:get_content([type])
  66. * Get the text of the part (html tags stripped). Optional `type` defines type of content to get:
  67. * - `content` (default): utf8 content with HTML tags stripped and newlines preserved
  68. * - `content_oneline`: utf8 content with HTML tags and newlines stripped
  69. * - `raw`: raw content, not mime decoded nor utf8 converted
  70. * - `raw_parsed`: raw content, mime decoded, not utf8 converted
  71. * - `raw_utf`: raw content, mime decoded, utf8 converted (but with HTML tags and newlines)
  72. * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
  73. */
  74. LUA_FUNCTION_DEF(textpart, get_content);
  75. /***
  76. * @method text_part:get_raw_content()
  77. * Get the original text of the part
  78. * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
  79. */
  80. LUA_FUNCTION_DEF(textpart, get_raw_content);
  81. /***
  82. * @method text_part:get_content_oneline()
  83. *Get the text of the part (html tags and newlines stripped)
  84. * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
  85. */
  86. LUA_FUNCTION_DEF(textpart, get_content_oneline);
  87. /***
  88. * @method text_part:get_length()
  89. * Get length of the text of the part
  90. * @return {integer} length of part in **bytes**
  91. */
  92. LUA_FUNCTION_DEF(textpart, get_length);
  93. /***
  94. * @method mime_part:get_raw_length()
  95. * Get length of the **raw** content of the part (e.g. HTML with tags unstripped)
  96. * @return {integer} length of part in **bytes**
  97. */
  98. LUA_FUNCTION_DEF(textpart, get_raw_length);
  99. /***
  100. * @method mime_part:get_urls_length()
  101. * Get length of the urls within the part
  102. * @return {integer} length of urls in **bytes**
  103. */
  104. LUA_FUNCTION_DEF(textpart, get_urls_length);
  105. /***
  106. * @method mime_part:get_lines_count()
  107. * Get lines number in the part
  108. * @return {integer} number of lines in the part
  109. */
  110. LUA_FUNCTION_DEF(textpart, get_lines_count);
  111. /***
  112. * @method mime_part:get_stats()
  113. * Returns a table with the following data:
  114. * - `lines`: number of lines
  115. * - `spaces`: number of spaces
  116. * - `double_spaces`: double spaces
  117. * - `empty_lines`: number of empty lines
  118. * - `non_ascii_characters`: number of non ascii characters
  119. * - `ascii_characters`: number of ascii characters
  120. * @return {table} table of stats
  121. */
  122. LUA_FUNCTION_DEF(textpart, get_stats);
  123. /***
  124. * @method mime_part:get_words_count()
  125. * Get words number in the part
  126. * @return {integer} number of words in the part
  127. */
  128. LUA_FUNCTION_DEF(textpart, get_words_count);
  129. /***
  130. * @method mime_part:get_words([how])
  131. * Get words in the part. Optional `how` argument defines type of words returned:
  132. * - `stem`: stemmed words (default)
  133. * - `norm`: normalised words (utf normalised + lowercased)
  134. * - `raw`: raw words in utf (if possible)
  135. * - `full`: list of tables, each table has the following fields:
  136. * - [1] - stemmed word
  137. * - [2] - normalised word
  138. * - [3] - raw word
  139. * - [4] - flags (table of strings)
  140. * @return {table/strings} words in the part
  141. */
  142. LUA_FUNCTION_DEF(textpart, get_words);
  143. /***
  144. * @method mime_part:filter_words(regexp, [how][, max]])
  145. * Filter words using some regexp:
  146. * - `stem`: stemmed words (default)
  147. * - `norm`: normalised words (utf normalised + lowercased)
  148. * - `raw`: raw words in utf (if possible)
  149. * - `full`: list of tables, each table has the following fields:
  150. * - [1] - stemmed word
  151. * - [2] - normalised word
  152. * - [3] - raw word
  153. * - [4] - flags (table of strings)
  154. * @param {rspamd_regexp} regexp regexp to match
  155. * @param {string} how what words to extract
  156. * @param {number} max maximum number of hits returned (all hits if <= 0 or nil)
  157. * @return {table/strings} words matching regexp
  158. */
  159. LUA_FUNCTION_DEF(textpart, filter_words);
  160. /***
  161. * @method text_part:is_empty()
  162. * Returns `true` if the specified part is empty
  163. * @return {bool} whether a part is empty
  164. */
  165. LUA_FUNCTION_DEF(textpart, is_empty);
  166. /***
  167. * @method text_part:is_html()
  168. * Returns `true` if the specified part has HTML content
  169. * @return {bool} whether a part is HTML part
  170. */
  171. LUA_FUNCTION_DEF(textpart, is_html);
  172. /***
  173. * @method text_part:get_html()
  174. * Returns html content of the specified part
  175. * @return {html} html content
  176. */
  177. LUA_FUNCTION_DEF(textpart, get_html);
  178. /***
  179. * @method text_part:get_language()
  180. * Returns the code of the most used unicode script in the text part. Does not work with raw parts
  181. * @return {string} short abbreviation (such as `ru`) for the script's language
  182. */
  183. LUA_FUNCTION_DEF(textpart, get_language);
  184. /***
  185. * @method text_part:get_charset()
  186. * Returns part real charset
  187. * @return {string} charset of the part
  188. */
  189. LUA_FUNCTION_DEF(textpart, get_charset);
  190. /***
  191. * @method text_part:get_languages()
  192. * Returns array of tables of all languages detected for a part:
  193. * - 'code': language code (short string)
  194. * - 'prob': logarithm of probability
  195. * @return {array|tables} all languages detected for the part
  196. */
  197. LUA_FUNCTION_DEF(textpart, get_languages);
  198. /***
  199. * @method text_part:get_fuzzy_hashes(mempool)
  200. * @param {rspamd_mempool} mempool - memory pool (usually task pool)
  201. * Returns direct hash of textpart as a string and array [1..32] of shingles each represented as a following table:
  202. * - [1] - 64 bit fuzzy hash represented as a string
  203. * - [2..4] - strings used to generate this hash
  204. * @return {string,array|tables} fuzzy hashes calculated
  205. */
  206. LUA_FUNCTION_DEF(textpart, get_fuzzy_hashes);
  207. /***
  208. * @method text_part:get_mimepart()
  209. * Returns the mime part object corresponding to this text part
  210. * @return {mimepart} mimepart object
  211. */
  212. LUA_FUNCTION_DEF(textpart, get_mimepart);
  213. static const struct luaL_reg textpartlib_m[] = {
  214. LUA_INTERFACE_DEF(textpart, is_utf),
  215. LUA_INTERFACE_DEF(textpart, has_8bit_raw),
  216. LUA_INTERFACE_DEF(textpart, has_8bit),
  217. LUA_INTERFACE_DEF(textpart, get_content),
  218. LUA_INTERFACE_DEF(textpart, get_raw_content),
  219. LUA_INTERFACE_DEF(textpart, get_content_oneline),
  220. LUA_INTERFACE_DEF(textpart, get_length),
  221. LUA_INTERFACE_DEF(textpart, get_raw_length),
  222. LUA_INTERFACE_DEF(textpart, get_urls_length),
  223. LUA_INTERFACE_DEF(textpart, get_lines_count),
  224. LUA_INTERFACE_DEF(textpart, get_words_count),
  225. LUA_INTERFACE_DEF(textpart, get_words),
  226. LUA_INTERFACE_DEF(textpart, filter_words),
  227. LUA_INTERFACE_DEF(textpart, is_empty),
  228. LUA_INTERFACE_DEF(textpart, is_html),
  229. LUA_INTERFACE_DEF(textpart, get_html),
  230. LUA_INTERFACE_DEF(textpart, get_language),
  231. LUA_INTERFACE_DEF(textpart, get_charset),
  232. LUA_INTERFACE_DEF(textpart, get_languages),
  233. LUA_INTERFACE_DEF(textpart, get_mimepart),
  234. LUA_INTERFACE_DEF(textpart, get_stats),
  235. LUA_INTERFACE_DEF(textpart, get_fuzzy_hashes),
  236. {"__tostring", rspamd_lua_class_tostring},
  237. {NULL, NULL}};
  238. /* Mimepart methods */
  239. /***
  240. * @module rspamd_mimepart
  241. * This module provides access to mime parts found in a message
  242. @example
  243. rspamd_config.MISSING_CONTENT_TYPE = function(task)
  244. local parts = task:get_parts()
  245. if parts and #parts > 1 then
  246. -- We have more than one part
  247. for _,p in ipairs(parts) do
  248. local ct = p:get_header('Content-Type')
  249. -- And some parts have no Content-Type header
  250. if not ct then
  251. return true
  252. end
  253. end
  254. end
  255. return false
  256. end
  257. */
  258. /***
  259. * @method mime_part:get_header(name[, case_sensitive])
  260. * Get decoded value of a header specified with optional case_sensitive flag.
  261. * By default headers are searched in caseless matter.
  262. * @param {string} name name of header to get
  263. * @param {boolean} case_sensitive case sensitiveness flag to search for a header
  264. * @return {string} decoded value of a header
  265. */
  266. LUA_FUNCTION_DEF(mimepart, get_header);
  267. /***
  268. * @method mime_part:get_header_raw(name[, case_sensitive])
  269. * Get raw value of a header specified with optional case_sensitive flag.
  270. * By default headers are searched in caseless matter.
  271. * @param {string} name name of header to get
  272. * @param {boolean} case_sensitive case sensitiveness flag to search for a header
  273. * @return {string} raw value of a header
  274. */
  275. LUA_FUNCTION_DEF(mimepart, get_header_raw);
  276. /***
  277. * @method mime_part:get_header_full(name[, case_sensitive])
  278. * Get raw value of a header specified with optional case_sensitive flag.
  279. * By default headers are searched in caseless matter. This method returns more
  280. * information about the header as a list of tables with the following structure:
  281. *
  282. * - `name` - name of a header
  283. * - `value` - raw value of a header
  284. * - `decoded` - decoded value of a header
  285. * - `tab_separated` - `true` if a header and a value are separated by `tab` character
  286. * - `empty_separator` - `true` if there are no separator between a header and a value
  287. * @param {string} name name of header to get
  288. * @param {boolean} case_sensitive case sensitiveness flag to search for a header
  289. * @return {list of tables} all values of a header as specified above
  290. @example
  291. function check_header_delimiter_tab(task, header_name)
  292. for _,rh in ipairs(task:get_header_full(header_name)) do
  293. if rh['tab_separated'] then return true end
  294. end
  295. return false
  296. end
  297. */
  298. LUA_FUNCTION_DEF(mimepart, get_header_full);
  299. /***
  300. * @method mimepart:get_header_count(name[, case_sensitive])
  301. * Lightweight version if you need just a header's count
  302. * * By default headers are searched in caseless matter.
  303. * @param {string} name name of header to get
  304. * @param {boolean} case_sensitive case sensitiveness flag to search for a header
  305. * @return {number} number of header's occurrences or 0 if not found
  306. */
  307. LUA_FUNCTION_DEF(mimepart, get_header_count);
  308. /***
  309. * @method mimepart:get_raw_headers()
  310. * Get all undecoded headers of a mime part as a string
  311. * @return {rspamd_text} all raw headers for a message as opaque text
  312. */
  313. LUA_FUNCTION_DEF(mimepart, get_raw_headers);
  314. /***
  315. * @method mimepart:get_headers()
  316. * Get all undecoded headers of a mime part as a string
  317. * @return {rspamd_text} all raw headers for a message as opaque text
  318. */
  319. LUA_FUNCTION_DEF(mimepart, get_headers);
  320. /***
  321. * @method mime_part:get_content()
  322. * Get the parsed content of part
  323. * @return {text} opaque text object (zero-copy if not casted to lua string)
  324. */
  325. LUA_FUNCTION_DEF(mimepart, get_content);
  326. /***
  327. * @method mime_part:get_raw_content()
  328. * Get the raw content of part
  329. * @return {text} opaque text object (zero-copy if not casted to lua string)
  330. */
  331. LUA_FUNCTION_DEF(mimepart, get_raw_content);
  332. /***
  333. * @method mime_part:get_length()
  334. * Get length of the content of the part
  335. * @return {integer} length of part in **bytes**
  336. */
  337. LUA_FUNCTION_DEF(mimepart, get_length);
  338. /***
  339. * @method mime_part:get_type()
  340. * Extract content-type string of the mime part
  341. * @return {string,string} content type in form 'type','subtype'
  342. */
  343. LUA_FUNCTION_DEF(mimepart, get_type);
  344. /***
  345. * @method mime_part:get_type_full()
  346. * Extract content-type string of the mime part with all attributes
  347. * @return {string,string,table} content type in form 'type','subtype', {attrs}
  348. */
  349. LUA_FUNCTION_DEF(mimepart, get_type_full);
  350. /***
  351. * @method mime_part:get_detected_type()
  352. * Extract content-type string of the mime part. Use lua_magic detection
  353. * @return {string,string} content type in form 'type','subtype'
  354. */
  355. LUA_FUNCTION_DEF(mimepart, get_detected_type);
  356. /***
  357. * @method mime_part:get_detected_type_full()
  358. * Extract content-type string of the mime part with all attributes. Use lua_magic detection
  359. * @return {string,string,table} content type in form 'type','subtype', {attrs}
  360. */
  361. LUA_FUNCTION_DEF(mimepart, get_detected_type_full);
  362. /***
  363. * @method mime_part:get_detected_ext()
  364. * Returns a msdos extension name according to lua_magic detection
  365. * @return {string} detected extension (see lua_magic.types)
  366. */
  367. LUA_FUNCTION_DEF(mimepart, get_detected_ext);
  368. /***
  369. * @method mime_part:get_cte()
  370. * Extract content-transfer-encoding for a part
  371. * @return {string} content transfer encoding (e.g. `base64` or `7bit`)
  372. */
  373. LUA_FUNCTION_DEF(mimepart, get_cte);
  374. /***
  375. * @method mime_part:get_filename()
  376. * Extract filename associated with mime part if it is an attachment
  377. * @return {string} filename or `nil` if no file is associated with this part
  378. */
  379. LUA_FUNCTION_DEF(mimepart, get_filename);
  380. /***
  381. * @method mime_part:is_image()
  382. * Returns true if mime part is an image
  383. * @return {bool} true if a part is an image
  384. */
  385. LUA_FUNCTION_DEF(mimepart, is_image);
  386. /***
  387. * @method mime_part:get_image()
  388. * Returns rspamd_image structure associated with this part. This structure has
  389. * the following methods:
  390. *
  391. * * `get_width` - return width of an image in pixels
  392. * * `get_height` - return height of an image in pixels
  393. * * `get_type` - return string representation of image's type (e.g. 'jpeg')
  394. * * `get_filename` - return string with image's file name
  395. * * `get_size` - return size in bytes
  396. * @return {rspamd_image} image structure or nil if a part is not an image
  397. */
  398. LUA_FUNCTION_DEF(mimepart, get_image);
  399. /***
  400. * @method mime_part:is_archive()
  401. * Returns true if mime part is an archive
  402. * @return {bool} true if a part is an archive
  403. */
  404. LUA_FUNCTION_DEF(mimepart, is_archive);
  405. /***
  406. * @method mime_part:is_attachment()
  407. * Returns true if mime part looks like an attachment
  408. * @return {bool} true if a part looks like an attachment
  409. */
  410. LUA_FUNCTION_DEF(mimepart, is_attachment);
  411. /***
  412. * @method mime_part:get_archive()
  413. * Returns rspamd_archive structure associated with this part. This structure has
  414. * the following methods:
  415. *
  416. * * `get_files` - return list of strings with filenames inside archive
  417. * * `get_files_full` - return list of tables with all information about files
  418. * * `is_encrypted` - return true if an archive is encrypted
  419. * * `get_type` - return string representation of image's type (e.g. 'zip')
  420. * * `get_filename` - return string with archive's file name
  421. * * `get_size` - return size in bytes
  422. * @return {rspamd_archive} archive structure or nil if a part is not an archive
  423. */
  424. LUA_FUNCTION_DEF(mimepart, get_archive);
  425. /***
  426. * @method mime_part:is_multipart()
  427. * Returns true if mime part is a multipart part
  428. * @return {bool} true if a part is is a multipart part
  429. */
  430. LUA_FUNCTION_DEF(mimepart, is_multipart);
  431. /***
  432. * @method mime_part:is_message()
  433. * Returns true if mime part is a message part (message/rfc822)
  434. * @return {bool} true if a part is is a message part
  435. */
  436. LUA_FUNCTION_DEF(mimepart, is_message);
  437. /***
  438. * @method mime_part:get_boundary()
  439. * Returns boundary for a part (extracted from parent multipart for normal parts and
  440. * from the part itself for multipart)
  441. * @return {string} boundary value or nil
  442. */
  443. LUA_FUNCTION_DEF(mimepart, get_boundary);
  444. /***
  445. * @method mime_part:get_enclosing_boundary()
  446. * Returns an enclosing boundary for a part even for multiparts. For normal parts
  447. * this method is identical to `get_boundary`
  448. * @return {string} boundary value or nil
  449. */
  450. LUA_FUNCTION_DEF(mimepart, get_enclosing_boundary);
  451. /***
  452. * @method mime_part:get_children()
  453. * Returns rspamd_mimepart table of part's childer. Returns nil if mime part is not multipart
  454. * or a message part.
  455. * @return {rspamd_mimepart} table of children
  456. */
  457. LUA_FUNCTION_DEF(mimepart, get_children);
  458. /***
  459. * @method mime_part:is_text()
  460. * Returns true if mime part is a text part
  461. * @return {bool} true if a part is a text part
  462. */
  463. LUA_FUNCTION_DEF(mimepart, is_text);
  464. /***
  465. * @method mime_part:get_text()
  466. * Returns rspamd_textpart structure associated with this part.
  467. * @return {rspamd_textpart} textpart structure or nil if a part is not an text
  468. */
  469. LUA_FUNCTION_DEF(mimepart, get_text);
  470. /***
  471. * @method mime_part:get_digest()
  472. * Returns the unique digest for this mime part
  473. * @return {string} 128 characters hex string with digest of the part
  474. */
  475. LUA_FUNCTION_DEF(mimepart, get_digest);
  476. /***
  477. * @method mime_part:get_id()
  478. * Returns the order of the part in parts list
  479. * @return {number} index of the part (starting from 1 as it is Lua API)
  480. */
  481. LUA_FUNCTION_DEF(mimepart, get_id);
  482. /***
  483. * @method mime_part:is_broken()
  484. * Returns true if mime part has incorrectly specified content type
  485. * @return {bool} true if a part has bad content type
  486. */
  487. LUA_FUNCTION_DEF(mimepart, is_broken);
  488. /***
  489. * @method mime_part:headers_foreach(callback, [params])
  490. * This method calls `callback` for each header that satisfies some condition.
  491. * By default, all headers are iterated unless `callback` returns `true`. Nil or
  492. * false means continue of iterations.
  493. * Params could be as following:
  494. *
  495. * - `full`: header value is full table of all attributes @see task:get_header_full for details
  496. * - `regexp`: return headers that satisfies the specified regexp
  497. * @param {function} callback function from header name and header value
  498. * @param {table} params optional parameters
  499. */
  500. LUA_FUNCTION_DEF(mimepart, headers_foreach);
  501. /***
  502. * @method mime_part:get_parent()
  503. * Returns parent part for this part
  504. * @return {rspamd_mimepart} parent part or nil
  505. */
  506. LUA_FUNCTION_DEF(mimepart, get_parent);
  507. /***
  508. * @method mime_part:get_specific()
  509. * Returns specific lua content for this part
  510. * @return {any} specific lua content
  511. */
  512. LUA_FUNCTION_DEF(mimepart, get_specific);
  513. /***
  514. * @method mime_part:set_specific(<any>)
  515. * Sets a specific content for this part
  516. * @return {any} previous specific lua content (or nil)
  517. */
  518. LUA_FUNCTION_DEF(mimepart, set_specific);
  519. /***
  520. * @method mime_part:is_specific(<any>)
  521. * Returns true if part has specific lua content
  522. * @return {boolean} flag
  523. */
  524. LUA_FUNCTION_DEF(mimepart, is_specific);
  525. /***
  526. * @method mime_part:get_urls([need_emails|list_protos][, need_images])
  527. * Get all URLs found in a mime part. Telephone urls and emails are not included unless explicitly asked in `list_protos`
  528. * @param {boolean} need_emails if `true` then return also email urls, this can be a comma separated string of protocols desired or a table (e.g. `mailto` or `telephone`)
  529. * @param {boolean} need_images return urls from images (<img src=...>) as well
  530. * @return {table rspamd_url} list of all urls found
  531. */
  532. LUA_FUNCTION_DEF(mimepart, get_urls);
  533. static const struct luaL_reg mimepartlib_m[] = {
  534. LUA_INTERFACE_DEF(mimepart, get_content),
  535. LUA_INTERFACE_DEF(mimepart, get_raw_content),
  536. LUA_INTERFACE_DEF(mimepart, get_length),
  537. LUA_INTERFACE_DEF(mimepart, get_type),
  538. LUA_INTERFACE_DEF(mimepart, get_type_full),
  539. LUA_INTERFACE_DEF(mimepart, get_detected_type),
  540. LUA_INTERFACE_DEF(mimepart, get_detected_ext),
  541. LUA_INTERFACE_DEF(mimepart, get_detected_type_full),
  542. LUA_INTERFACE_DEF(mimepart, get_cte),
  543. LUA_INTERFACE_DEF(mimepart, get_filename),
  544. LUA_INTERFACE_DEF(mimepart, get_boundary),
  545. LUA_INTERFACE_DEF(mimepart, get_enclosing_boundary),
  546. LUA_INTERFACE_DEF(mimepart, get_header),
  547. LUA_INTERFACE_DEF(mimepart, get_header_raw),
  548. LUA_INTERFACE_DEF(mimepart, get_header_full),
  549. LUA_INTERFACE_DEF(mimepart, get_header_count),
  550. LUA_INTERFACE_DEF(mimepart, get_raw_headers),
  551. LUA_INTERFACE_DEF(mimepart, get_headers),
  552. LUA_INTERFACE_DEF(mimepart, is_image),
  553. LUA_INTERFACE_DEF(mimepart, get_image),
  554. LUA_INTERFACE_DEF(mimepart, is_archive),
  555. LUA_INTERFACE_DEF(mimepart, get_archive),
  556. LUA_INTERFACE_DEF(mimepart, is_multipart),
  557. LUA_INTERFACE_DEF(mimepart, is_message),
  558. LUA_INTERFACE_DEF(mimepart, get_children),
  559. LUA_INTERFACE_DEF(mimepart, get_parent),
  560. LUA_INTERFACE_DEF(mimepart, get_urls),
  561. LUA_INTERFACE_DEF(mimepart, is_text),
  562. LUA_INTERFACE_DEF(mimepart, is_broken),
  563. LUA_INTERFACE_DEF(mimepart, is_attachment),
  564. LUA_INTERFACE_DEF(mimepart, get_text),
  565. LUA_INTERFACE_DEF(mimepart, get_digest),
  566. LUA_INTERFACE_DEF(mimepart, get_id),
  567. LUA_INTERFACE_DEF(mimepart, headers_foreach),
  568. LUA_INTERFACE_DEF(mimepart, get_specific),
  569. LUA_INTERFACE_DEF(mimepart, set_specific),
  570. LUA_INTERFACE_DEF(mimepart, is_specific),
  571. {"__tostring", rspamd_lua_class_tostring},
  572. {NULL, NULL}};
  573. static struct rspamd_mime_text_part *
  574. lua_check_textpart(lua_State *L)
  575. {
  576. void *ud = rspamd_lua_check_udata(L, 1, rspamd_textpart_classname);
  577. luaL_argcheck(L, ud != NULL, 1, "'textpart' expected");
  578. return ud ? *((struct rspamd_mime_text_part **) ud) : NULL;
  579. }
  580. static struct rspamd_mime_part *
  581. lua_check_mimepart(lua_State *L)
  582. {
  583. void *ud = rspamd_lua_check_udata(L, 1, rspamd_mimepart_classname);
  584. luaL_argcheck(L, ud != NULL, 1, "'mimepart' expected");
  585. return ud ? *((struct rspamd_mime_part **) ud) : NULL;
  586. }
  587. static gint
  588. lua_textpart_is_utf(lua_State *L)
  589. {
  590. LUA_TRACE_POINT;
  591. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  592. if (part == NULL || IS_TEXT_PART_EMPTY(part)) {
  593. lua_pushboolean(L, FALSE);
  594. return 1;
  595. }
  596. lua_pushboolean(L, IS_TEXT_PART_UTF(part));
  597. return 1;
  598. }
  599. static gint
  600. lua_textpart_has_8bit_raw(lua_State *L)
  601. {
  602. LUA_TRACE_POINT;
  603. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  604. if (part) {
  605. if (part->flags & RSPAMD_MIME_TEXT_PART_FLAG_8BIT_RAW) {
  606. lua_pushboolean(L, TRUE);
  607. }
  608. else {
  609. lua_pushboolean(L, FALSE);
  610. }
  611. }
  612. else {
  613. return luaL_error(L, "invalid arguments");
  614. }
  615. return 1;
  616. }
  617. static gint
  618. lua_textpart_has_8bit(lua_State *L)
  619. {
  620. LUA_TRACE_POINT;
  621. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  622. if (part) {
  623. if (part->flags & RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED) {
  624. lua_pushboolean(L, TRUE);
  625. }
  626. else {
  627. lua_pushboolean(L, FALSE);
  628. }
  629. }
  630. else {
  631. return luaL_error(L, "invalid arguments");
  632. }
  633. return 1;
  634. }
  635. static gint
  636. lua_textpart_get_content(lua_State *L)
  637. {
  638. LUA_TRACE_POINT;
  639. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  640. struct rspamd_lua_text *t;
  641. gsize len;
  642. const gchar *start, *type = NULL;
  643. if (part == NULL) {
  644. lua_pushnil(L);
  645. return 1;
  646. }
  647. if (lua_type(L, 2) == LUA_TSTRING) {
  648. type = lua_tostring(L, 2);
  649. }
  650. if (!type) {
  651. if (IS_TEXT_PART_EMPTY(part)) {
  652. lua_pushnil(L);
  653. return 1;
  654. }
  655. start = part->utf_content.begin;
  656. len = part->utf_content.len;
  657. }
  658. else if (strcmp(type, "content") == 0) {
  659. if (IS_TEXT_PART_EMPTY(part)) {
  660. lua_pushnil(L);
  661. return 1;
  662. }
  663. start = part->utf_content.begin;
  664. len = part->utf_content.len;
  665. }
  666. else if (strcmp(type, "content_oneline") == 0) {
  667. if (IS_TEXT_PART_EMPTY(part)) {
  668. lua_pushnil(L);
  669. return 1;
  670. }
  671. start = part->utf_stripped_content->data;
  672. len = part->utf_stripped_content->len;
  673. }
  674. else if (strcmp(type, "raw_parsed") == 0) {
  675. if (part->parsed.len == 0) {
  676. lua_pushnil(L);
  677. return 1;
  678. }
  679. start = part->parsed.begin;
  680. len = part->parsed.len;
  681. }
  682. else if (strcmp(type, "raw_utf") == 0) {
  683. if (part->utf_raw_content == NULL || part->utf_raw_content->len == 0) {
  684. lua_pushnil(L);
  685. return 1;
  686. }
  687. start = part->utf_raw_content->data;
  688. len = part->utf_raw_content->len;
  689. }
  690. else if (strcmp(type, "raw") == 0) {
  691. if (part->raw.len == 0) {
  692. lua_pushnil(L);
  693. return 1;
  694. }
  695. start = part->raw.begin;
  696. len = part->raw.len;
  697. }
  698. else {
  699. return luaL_error(L, "invalid content type: %s", type);
  700. }
  701. t = lua_newuserdata(L, sizeof(*t));
  702. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  703. t->start = start;
  704. t->len = len;
  705. t->flags = 0;
  706. return 1;
  707. }
  708. static gint
  709. lua_textpart_get_raw_content(lua_State *L)
  710. {
  711. LUA_TRACE_POINT;
  712. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  713. struct rspamd_lua_text *t;
  714. if (part == NULL || IS_TEXT_PART_EMPTY(part)) {
  715. lua_pushnil(L);
  716. return 1;
  717. }
  718. t = lua_newuserdata(L, sizeof(*t));
  719. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  720. t->start = part->raw.begin;
  721. t->len = part->raw.len;
  722. t->flags = 0;
  723. return 1;
  724. }
  725. static gint
  726. lua_textpart_get_content_oneline(lua_State *L)
  727. {
  728. LUA_TRACE_POINT;
  729. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  730. if (part == NULL || IS_TEXT_PART_EMPTY(part)) {
  731. lua_pushnil(L);
  732. return 1;
  733. }
  734. lua_new_text(L, part->utf_stripped_content->data, part->utf_stripped_content->len, FALSE);
  735. return 1;
  736. }
  737. static gint
  738. lua_textpart_get_length(lua_State *L)
  739. {
  740. LUA_TRACE_POINT;
  741. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  742. if (part == NULL) {
  743. lua_pushnil(L);
  744. return 1;
  745. }
  746. if (IS_TEXT_PART_EMPTY(part) || part->utf_content.len == 0) {
  747. lua_pushinteger(L, 0);
  748. }
  749. else {
  750. lua_pushinteger(L, part->utf_content.len);
  751. }
  752. return 1;
  753. }
  754. static gint
  755. lua_textpart_get_raw_length(lua_State *L)
  756. {
  757. LUA_TRACE_POINT;
  758. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  759. if (part == NULL) {
  760. lua_pushnil(L);
  761. return 1;
  762. }
  763. lua_pushinteger(L, part->raw.len);
  764. return 1;
  765. }
  766. static gint
  767. lua_textpart_get_urls_length(lua_State *L)
  768. {
  769. LUA_TRACE_POINT;
  770. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  771. GList *cur;
  772. guint total = 0;
  773. struct rspamd_process_exception *ex;
  774. if (part == NULL) {
  775. lua_pushnil(L);
  776. return 1;
  777. }
  778. for (cur = part->exceptions; cur != NULL; cur = g_list_next(cur)) {
  779. ex = cur->data;
  780. if (ex->type == RSPAMD_EXCEPTION_URL) {
  781. total += ex->len;
  782. }
  783. }
  784. lua_pushinteger(L, total);
  785. return 1;
  786. }
  787. static gint
  788. lua_textpart_get_lines_count(lua_State *L)
  789. {
  790. LUA_TRACE_POINT;
  791. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  792. if (part == NULL) {
  793. lua_pushnil(L);
  794. return 1;
  795. }
  796. if (IS_TEXT_PART_EMPTY(part)) {
  797. lua_pushinteger(L, 0);
  798. }
  799. else {
  800. lua_pushinteger(L, part->nlines);
  801. }
  802. return 1;
  803. }
  804. static gint
  805. lua_textpart_get_words_count(lua_State *L)
  806. {
  807. LUA_TRACE_POINT;
  808. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  809. if (part == NULL) {
  810. lua_pushnil(L);
  811. return 1;
  812. }
  813. if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) {
  814. lua_pushinteger(L, 0);
  815. }
  816. else {
  817. lua_pushinteger(L, part->nwords);
  818. }
  819. return 1;
  820. }
  821. static inline enum rspamd_lua_words_type
  822. word_extract_type_from_string(const gchar *how_str)
  823. {
  824. enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_MAX;
  825. if (strcmp(how_str, "stem") == 0) {
  826. how = RSPAMD_LUA_WORDS_STEM;
  827. }
  828. else if (strcmp(how_str, "norm") == 0) {
  829. how = RSPAMD_LUA_WORDS_NORM;
  830. }
  831. else if (strcmp(how_str, "raw") == 0) {
  832. how = RSPAMD_LUA_WORDS_RAW;
  833. }
  834. else if (strcmp(how_str, "full") == 0) {
  835. how = RSPAMD_LUA_WORDS_FULL;
  836. }
  837. return how;
  838. }
  839. static gint
  840. lua_textpart_get_words(lua_State *L)
  841. {
  842. LUA_TRACE_POINT;
  843. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  844. enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM;
  845. if (part == NULL) {
  846. return luaL_error(L, "invalid arguments");
  847. }
  848. if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) {
  849. lua_createtable(L, 0, 0);
  850. }
  851. else {
  852. if (lua_type(L, 2) == LUA_TSTRING) {
  853. const gchar *how_str = lua_tostring(L, 2);
  854. how = word_extract_type_from_string(how_str);
  855. if (how == RSPAMD_LUA_WORDS_MAX) {
  856. return luaL_error(L, "invalid extraction type: %s", how_str);
  857. }
  858. }
  859. return rspamd_lua_push_words(L, part->utf_words, how);
  860. }
  861. return 1;
  862. }
  863. static gint
  864. lua_textpart_filter_words(lua_State *L)
  865. {
  866. LUA_TRACE_POINT;
  867. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  868. struct rspamd_lua_regexp *re = lua_check_regexp(L, 2);
  869. gint lim = -1;
  870. enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM;
  871. if (part == NULL || re == NULL) {
  872. return luaL_error(L, "invalid arguments");
  873. }
  874. if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) {
  875. lua_createtable(L, 0, 0);
  876. }
  877. else {
  878. if (lua_type(L, 3) == LUA_TSTRING) {
  879. const gchar *how_str = lua_tostring(L, 3);
  880. how = word_extract_type_from_string(how_str);
  881. if (how == RSPAMD_LUA_WORDS_MAX) {
  882. return luaL_error(L, "invalid extraction type: %s", how_str);
  883. }
  884. }
  885. if (lua_type(L, 4) == LUA_TNUMBER) {
  886. lim = lua_tointeger(L, 4);
  887. }
  888. guint cnt, i;
  889. lua_createtable(L, 8, 0);
  890. for (i = 0, cnt = 1; i < part->utf_words->len; i++) {
  891. rspamd_stat_token_t *w = &g_array_index(part->utf_words,
  892. rspamd_stat_token_t, i);
  893. switch (how) {
  894. case RSPAMD_LUA_WORDS_STEM:
  895. if (w->stemmed.len > 0) {
  896. if (rspamd_regexp_match(re->re, w->stemmed.begin,
  897. w->stemmed.len, FALSE)) {
  898. lua_pushlstring(L, w->stemmed.begin, w->stemmed.len);
  899. lua_rawseti(L, -2, cnt++);
  900. }
  901. }
  902. break;
  903. case RSPAMD_LUA_WORDS_NORM:
  904. if (w->normalized.len > 0) {
  905. if (rspamd_regexp_match(re->re, w->normalized.begin,
  906. w->normalized.len, FALSE)) {
  907. lua_pushlstring(L, w->normalized.begin, w->normalized.len);
  908. lua_rawseti(L, -2, cnt++);
  909. }
  910. }
  911. break;
  912. case RSPAMD_LUA_WORDS_RAW:
  913. if (w->original.len > 0) {
  914. if (rspamd_regexp_match(re->re, w->original.begin,
  915. w->original.len, TRUE)) {
  916. lua_pushlstring(L, w->original.begin, w->original.len);
  917. lua_rawseti(L, -2, cnt++);
  918. }
  919. }
  920. break;
  921. case RSPAMD_LUA_WORDS_FULL:
  922. if (rspamd_regexp_match(re->re, w->normalized.begin,
  923. w->normalized.len, FALSE)) {
  924. rspamd_lua_push_full_word(L, w);
  925. /* Push to the resulting vector */
  926. lua_rawseti(L, -2, cnt++);
  927. }
  928. break;
  929. default:
  930. break;
  931. }
  932. if (lim > 0 && cnt >= lim) {
  933. break;
  934. }
  935. }
  936. }
  937. return 1;
  938. }
  939. static gint
  940. lua_textpart_is_empty(lua_State *L)
  941. {
  942. LUA_TRACE_POINT;
  943. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  944. if (part == NULL) {
  945. lua_pushnil(L);
  946. return 1;
  947. }
  948. lua_pushboolean(L, IS_TEXT_PART_EMPTY(part));
  949. return 1;
  950. }
  951. static gint
  952. lua_textpart_is_html(lua_State *L)
  953. {
  954. LUA_TRACE_POINT;
  955. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  956. if (part == NULL) {
  957. lua_pushnil(L);
  958. return 1;
  959. }
  960. lua_pushboolean(L, IS_TEXT_PART_HTML(part));
  961. return 1;
  962. }
  963. static gint
  964. lua_textpart_get_html(lua_State *L)
  965. {
  966. LUA_TRACE_POINT;
  967. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  968. struct html_content **phc;
  969. if (part == NULL || part->html == NULL) {
  970. lua_pushnil(L);
  971. }
  972. else {
  973. phc = lua_newuserdata(L, sizeof(*phc));
  974. rspamd_lua_setclass(L, rspamd_html_classname, -1);
  975. *phc = part->html;
  976. }
  977. return 1;
  978. }
  979. static gint
  980. lua_textpart_get_language(lua_State *L)
  981. {
  982. LUA_TRACE_POINT;
  983. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  984. if (part != NULL) {
  985. if (part->language != NULL && part->language[0] != '\0') {
  986. lua_pushstring(L, part->language);
  987. return 1;
  988. }
  989. else {
  990. lua_pushnil(L);
  991. }
  992. }
  993. else {
  994. return luaL_error(L, "invalid arguments");
  995. }
  996. return 1;
  997. }
  998. static gint
  999. lua_textpart_get_charset(lua_State *L)
  1000. {
  1001. LUA_TRACE_POINT;
  1002. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  1003. if (part != NULL) {
  1004. if (part->real_charset != NULL) {
  1005. lua_pushstring(L, part->real_charset);
  1006. return 1;
  1007. }
  1008. else {
  1009. lua_pushnil(L);
  1010. }
  1011. }
  1012. else {
  1013. return luaL_error(L, "invalid arguments");
  1014. }
  1015. return 1;
  1016. }
  1017. static gint
  1018. lua_textpart_get_languages(lua_State *L)
  1019. {
  1020. LUA_TRACE_POINT;
  1021. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  1022. guint i;
  1023. struct rspamd_lang_detector_res *cur;
  1024. if (part != NULL) {
  1025. if (part->languages != NULL) {
  1026. lua_createtable(L, part->languages->len, 0);
  1027. PTR_ARRAY_FOREACH(part->languages, i, cur)
  1028. {
  1029. lua_createtable(L, 0, 2);
  1030. lua_pushstring(L, "code");
  1031. lua_pushstring(L, cur->lang);
  1032. lua_settable(L, -3);
  1033. lua_pushstring(L, "prob");
  1034. lua_pushnumber(L, cur->prob);
  1035. lua_settable(L, -3);
  1036. lua_rawseti(L, -2, i + 1);
  1037. }
  1038. }
  1039. else {
  1040. lua_newtable(L);
  1041. }
  1042. }
  1043. else {
  1044. luaL_error(L, "invalid arguments");
  1045. }
  1046. return 1;
  1047. }
  1048. struct lua_shingle_data {
  1049. uint64_t hash;
  1050. rspamd_ftok_t t1;
  1051. rspamd_ftok_t t2;
  1052. rspamd_ftok_t t3;
  1053. };
  1054. struct lua_shingle_filter_cbdata {
  1055. struct rspamd_mime_text_part *part;
  1056. rspamd_mempool_t *pool;
  1057. };
  1058. #define STORE_TOKEN(i, t) \
  1059. do { \
  1060. if ((i) < part->utf_words->len) { \
  1061. word = &g_array_index(part->utf_words, rspamd_stat_token_t, (i)); \
  1062. sd->t.begin = word->stemmed.begin; \
  1063. sd->t.len = word->stemmed.len; \
  1064. } \
  1065. } while (0)
  1066. static uint64_t
  1067. lua_shingles_filter(uint64_t *input, gsize count,
  1068. gint shno, const guchar *key, gpointer ud)
  1069. {
  1070. uint64_t minimal = G_MAXUINT64;
  1071. gsize i, min_idx = 0;
  1072. struct lua_shingle_data *sd;
  1073. rspamd_stat_token_t *word;
  1074. struct lua_shingle_filter_cbdata *cbd = (struct lua_shingle_filter_cbdata *) ud;
  1075. struct rspamd_mime_text_part *part;
  1076. part = cbd->part;
  1077. for (i = 0; i < count; i++) {
  1078. if (minimal > input[i]) {
  1079. minimal = input[i];
  1080. min_idx = i;
  1081. }
  1082. }
  1083. sd = rspamd_mempool_alloc0(cbd->pool, sizeof(*sd));
  1084. sd->hash = minimal;
  1085. STORE_TOKEN(min_idx, t1);
  1086. STORE_TOKEN(min_idx + 1, t2);
  1087. STORE_TOKEN(min_idx + 2, t3);
  1088. return GPOINTER_TO_SIZE(sd);
  1089. }
  1090. #undef STORE_TOKEN
  1091. static gint
  1092. lua_textpart_get_fuzzy_hashes(lua_State *L)
  1093. {
  1094. LUA_TRACE_POINT;
  1095. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  1096. rspamd_mempool_t *pool = rspamd_lua_check_mempool(L, 2);
  1097. guchar key[rspamd_cryptobox_HASHBYTES], digest[rspamd_cryptobox_HASHBYTES],
  1098. hexdigest[rspamd_cryptobox_HASHBYTES * 2 + 1], numbuf[64];
  1099. struct rspamd_shingle *sgl;
  1100. guint i;
  1101. struct lua_shingle_data *sd;
  1102. rspamd_cryptobox_hash_state_t st;
  1103. rspamd_stat_token_t *word;
  1104. struct lua_shingle_filter_cbdata cbd;
  1105. if (part == NULL || pool == NULL) {
  1106. return luaL_error(L, "invalid arguments");
  1107. }
  1108. if (IS_TEXT_PART_EMPTY(part) || part->utf_words == NULL) {
  1109. lua_pushnil(L);
  1110. lua_pushnil(L);
  1111. }
  1112. else {
  1113. /* TODO: add keys and algorithms support */
  1114. rspamd_cryptobox_hash(key, "rspamd", strlen("rspamd"), NULL, 0);
  1115. /* TODO: add short text support */
  1116. /* Calculate direct hash */
  1117. rspamd_cryptobox_hash_init(&st, key, rspamd_cryptobox_HASHKEYBYTES);
  1118. for (i = 0; i < part->utf_words->len; i++) {
  1119. word = &g_array_index(part->utf_words, rspamd_stat_token_t, i);
  1120. rspamd_cryptobox_hash_update(&st,
  1121. word->stemmed.begin, word->stemmed.len);
  1122. }
  1123. rspamd_cryptobox_hash_final(&st, digest);
  1124. rspamd_encode_hex_buf(digest, sizeof(digest), hexdigest,
  1125. sizeof(hexdigest));
  1126. lua_pushlstring(L, hexdigest, sizeof(hexdigest) - 1);
  1127. cbd.pool = pool;
  1128. cbd.part = part;
  1129. sgl = rspamd_shingles_from_text(part->utf_words, key,
  1130. pool, lua_shingles_filter, &cbd, RSPAMD_SHINGLES_MUMHASH);
  1131. if (sgl == NULL) {
  1132. lua_pushnil(L);
  1133. }
  1134. else {
  1135. lua_createtable(L, G_N_ELEMENTS(sgl->hashes), 0);
  1136. for (i = 0; i < G_N_ELEMENTS(sgl->hashes); i++) {
  1137. sd = GSIZE_TO_POINTER(sgl->hashes[i]);
  1138. lua_createtable(L, 4, 0);
  1139. rspamd_snprintf(numbuf, sizeof(numbuf), "%uL", sd->hash);
  1140. lua_pushstring(L, numbuf);
  1141. lua_rawseti(L, -2, 1);
  1142. /* Tokens */
  1143. lua_pushlstring(L, sd->t1.begin, sd->t1.len);
  1144. lua_rawseti(L, -2, 2);
  1145. lua_pushlstring(L, sd->t2.begin, sd->t2.len);
  1146. lua_rawseti(L, -2, 3);
  1147. lua_pushlstring(L, sd->t3.begin, sd->t3.len);
  1148. lua_rawseti(L, -2, 4);
  1149. lua_rawseti(L, -2, i + 1); /* Store table */
  1150. }
  1151. }
  1152. }
  1153. return 2;
  1154. }
  1155. static gint
  1156. lua_textpart_get_mimepart(lua_State *L)
  1157. {
  1158. LUA_TRACE_POINT;
  1159. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  1160. struct rspamd_mime_part **pmime;
  1161. if (part != NULL) {
  1162. if (part->mime_part != NULL) {
  1163. pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
  1164. rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
  1165. *pmime = part->mime_part;
  1166. return 1;
  1167. }
  1168. }
  1169. lua_pushnil(L);
  1170. return 1;
  1171. }
  1172. /***
  1173. * @method mime_part:get_stats()
  1174. * Returns a table with the following data:
  1175. * -
  1176. * - `lines`: number of lines
  1177. * - `spaces`: number of spaces
  1178. * - `double_spaces`: double spaces
  1179. * - `empty_lines`: number of empty lines
  1180. * - `non_ascii_characters`: number of non ascii characters
  1181. * - `ascii_characters`: number of ascii characters
  1182. * @return {table} table of stats
  1183. */
  1184. static gint
  1185. lua_textpart_get_stats(lua_State *L)
  1186. {
  1187. LUA_TRACE_POINT;
  1188. struct rspamd_mime_text_part *part = lua_check_textpart(L);
  1189. if (part != NULL) {
  1190. lua_createtable(L, 0, 9);
  1191. lua_pushstring(L, "lines");
  1192. lua_pushinteger(L, part->nlines);
  1193. lua_settable(L, -3);
  1194. lua_pushstring(L, "empty_lines");
  1195. lua_pushinteger(L, part->empty_lines);
  1196. lua_settable(L, -3);
  1197. lua_pushstring(L, "spaces");
  1198. lua_pushinteger(L, part->spaces);
  1199. lua_settable(L, -3);
  1200. lua_pushstring(L, "non_spaces");
  1201. lua_pushinteger(L, part->non_spaces);
  1202. lua_settable(L, -3);
  1203. lua_pushstring(L, "double_spaces");
  1204. lua_pushinteger(L, part->double_spaces);
  1205. lua_settable(L, -3);
  1206. lua_pushstring(L, "ascii_characters");
  1207. lua_pushinteger(L, part->ascii_chars);
  1208. lua_settable(L, -3);
  1209. lua_pushstring(L, "non_ascii_characters");
  1210. lua_pushinteger(L, part->non_ascii_chars);
  1211. lua_settable(L, -3);
  1212. lua_pushstring(L, "capital_letters");
  1213. lua_pushinteger(L, part->capital_letters);
  1214. lua_settable(L, -3);
  1215. lua_pushstring(L, "numeric_characters");
  1216. lua_pushinteger(L, part->numeric_characters);
  1217. lua_settable(L, -3);
  1218. }
  1219. else {
  1220. return luaL_error(L, "invalid arguments");
  1221. }
  1222. return 1;
  1223. }
  1224. /* Mimepart implementation */
  1225. static gint
  1226. lua_mimepart_get_content(lua_State *L)
  1227. {
  1228. LUA_TRACE_POINT;
  1229. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1230. struct rspamd_lua_text *t;
  1231. if (part == NULL) {
  1232. lua_pushnil(L);
  1233. return 1;
  1234. }
  1235. t = lua_newuserdata(L, sizeof(*t));
  1236. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  1237. t->start = part->parsed_data.begin;
  1238. t->len = part->parsed_data.len;
  1239. t->flags = 0;
  1240. if (lua_is_text_binary(t)) {
  1241. t->flags |= RSPAMD_TEXT_FLAG_BINARY;
  1242. }
  1243. return 1;
  1244. }
  1245. static gint
  1246. lua_mimepart_get_raw_content(lua_State *L)
  1247. {
  1248. LUA_TRACE_POINT;
  1249. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1250. struct rspamd_lua_text *t;
  1251. if (part == NULL) {
  1252. lua_pushnil(L);
  1253. return 1;
  1254. }
  1255. t = lua_newuserdata(L, sizeof(*t));
  1256. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  1257. t->start = part->raw_data.begin;
  1258. t->len = part->raw_data.len;
  1259. t->flags = 0;
  1260. return 1;
  1261. }
  1262. static gint
  1263. lua_mimepart_get_length(lua_State *L)
  1264. {
  1265. LUA_TRACE_POINT;
  1266. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1267. if (part == NULL) {
  1268. lua_pushnil(L);
  1269. return 1;
  1270. }
  1271. lua_pushinteger(L, part->parsed_data.len);
  1272. return 1;
  1273. }
  1274. static gint
  1275. lua_mimepart_get_type_common(lua_State *L, struct rspamd_content_type *ct,
  1276. gboolean full)
  1277. {
  1278. GHashTableIter it;
  1279. gpointer k, v;
  1280. struct rspamd_content_type_param *param;
  1281. if (ct == NULL) {
  1282. lua_pushnil(L);
  1283. lua_pushnil(L);
  1284. return 2;
  1285. }
  1286. lua_pushlstring(L, ct->type.begin, ct->type.len);
  1287. lua_pushlstring(L, ct->subtype.begin, ct->subtype.len);
  1288. if (!full) {
  1289. return 2;
  1290. }
  1291. lua_createtable(L, 0, 2 + (ct->attrs ? g_hash_table_size(ct->attrs) : 0));
  1292. if (ct->charset.len > 0) {
  1293. lua_pushstring(L, "charset");
  1294. lua_pushlstring(L, ct->charset.begin, ct->charset.len);
  1295. lua_settable(L, -3);
  1296. }
  1297. if (ct->boundary.len > 0) {
  1298. lua_pushstring(L, "boundary");
  1299. lua_pushlstring(L, ct->boundary.begin, ct->boundary.len);
  1300. lua_settable(L, -3);
  1301. }
  1302. if (ct->attrs) {
  1303. g_hash_table_iter_init(&it, ct->attrs);
  1304. while (g_hash_table_iter_next(&it, &k, &v)) {
  1305. param = v;
  1306. if (param->name.len > 0 && param->value.len > 0) {
  1307. /* TODO: think about multiple values here */
  1308. lua_pushlstring(L, param->name.begin, param->name.len);
  1309. lua_pushlstring(L, param->value.begin, param->value.len);
  1310. lua_settable(L, -3);
  1311. }
  1312. }
  1313. }
  1314. return 3;
  1315. }
  1316. static gint
  1317. lua_mimepart_get_type(lua_State *L)
  1318. {
  1319. LUA_TRACE_POINT;
  1320. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1321. if (part == NULL) {
  1322. return luaL_error(L, "invalid arguments");
  1323. }
  1324. return lua_mimepart_get_type_common(L, part->ct, FALSE);
  1325. }
  1326. static gint
  1327. lua_mimepart_get_type_full(lua_State *L)
  1328. {
  1329. LUA_TRACE_POINT;
  1330. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1331. if (part == NULL) {
  1332. return luaL_error(L, "invalid arguments");
  1333. }
  1334. return lua_mimepart_get_type_common(L, part->ct, TRUE);
  1335. }
  1336. static gint
  1337. lua_mimepart_get_detected_type(lua_State *L)
  1338. {
  1339. LUA_TRACE_POINT;
  1340. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1341. if (part == NULL) {
  1342. return luaL_error(L, "invalid arguments");
  1343. }
  1344. return lua_mimepart_get_type_common(L, part->detected_ct, FALSE);
  1345. }
  1346. static gint
  1347. lua_mimepart_get_detected_type_full(lua_State *L)
  1348. {
  1349. LUA_TRACE_POINT;
  1350. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1351. if (part == NULL) {
  1352. return luaL_error(L, "invalid arguments");
  1353. }
  1354. return lua_mimepart_get_type_common(L, part->detected_ct, TRUE);
  1355. }
  1356. static gint
  1357. lua_mimepart_get_detected_ext(lua_State *L)
  1358. {
  1359. LUA_TRACE_POINT;
  1360. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1361. if (part == NULL) {
  1362. return luaL_error(L, "invalid arguments");
  1363. }
  1364. if (part->detected_ext) {
  1365. lua_pushstring(L, part->detected_ext);
  1366. }
  1367. else {
  1368. lua_pushnil(L);
  1369. }
  1370. return 1;
  1371. }
  1372. static gint
  1373. lua_mimepart_get_cte(lua_State *L)
  1374. {
  1375. LUA_TRACE_POINT;
  1376. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1377. if (part == NULL) {
  1378. lua_pushnil(L);
  1379. return 1;
  1380. }
  1381. lua_pushstring(L, rspamd_cte_to_string(part->cte));
  1382. return 1;
  1383. }
  1384. static gint
  1385. lua_mimepart_get_filename(lua_State *L)
  1386. {
  1387. LUA_TRACE_POINT;
  1388. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1389. if (part == NULL || part->cd == NULL || part->cd->filename.len == 0) {
  1390. lua_pushnil(L);
  1391. return 1;
  1392. }
  1393. lua_pushlstring(L, part->cd->filename.begin, part->cd->filename.len);
  1394. return 1;
  1395. }
  1396. static gint
  1397. lua_mimepart_get_boundary(lua_State *L)
  1398. {
  1399. LUA_TRACE_POINT;
  1400. struct rspamd_mime_part *part = lua_check_mimepart(L), *parent;
  1401. if (part == NULL) {
  1402. return luaL_error(L, "invalid arguments");
  1403. }
  1404. if (IS_PART_MULTIPART(part)) {
  1405. lua_pushlstring(L, part->specific.mp->boundary.begin,
  1406. part->specific.mp->boundary.len);
  1407. }
  1408. else {
  1409. parent = part->parent_part;
  1410. if (!parent || !IS_PART_MULTIPART(parent)) {
  1411. lua_pushnil(L);
  1412. }
  1413. else {
  1414. lua_pushlstring(L, parent->specific.mp->boundary.begin,
  1415. parent->specific.mp->boundary.len);
  1416. }
  1417. }
  1418. return 1;
  1419. }
  1420. static gint
  1421. lua_mimepart_get_enclosing_boundary(lua_State *L)
  1422. {
  1423. LUA_TRACE_POINT;
  1424. struct rspamd_mime_part *part = lua_check_mimepart(L), *parent;
  1425. if (part == NULL) {
  1426. return luaL_error(L, "invalid arguments");
  1427. }
  1428. parent = part->parent_part;
  1429. if (!parent || !IS_PART_MULTIPART(parent)) {
  1430. lua_pushnil(L);
  1431. }
  1432. else {
  1433. lua_pushlstring(L, parent->specific.mp->boundary.begin,
  1434. parent->specific.mp->boundary.len);
  1435. }
  1436. return 1;
  1437. }
  1438. static gint
  1439. lua_mimepart_get_header_common(lua_State *L, enum rspamd_lua_task_header_type how)
  1440. {
  1441. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1442. const gchar *name;
  1443. gboolean strong = FALSE;
  1444. name = luaL_checkstring(L, 2);
  1445. if (name && part) {
  1446. if (lua_isboolean(L, 3)) {
  1447. strong = lua_toboolean(L, 3);
  1448. }
  1449. return rspamd_lua_push_header_array(L,
  1450. name,
  1451. rspamd_message_get_header_from_hash(part->raw_headers, name, FALSE),
  1452. how,
  1453. strong);
  1454. }
  1455. lua_pushnil(L);
  1456. return 1;
  1457. }
  1458. static gint
  1459. lua_mimepart_get_header_full(lua_State *L)
  1460. {
  1461. LUA_TRACE_POINT;
  1462. return lua_mimepart_get_header_common(L, RSPAMD_TASK_HEADER_PUSH_FULL);
  1463. }
  1464. static gint
  1465. lua_mimepart_get_header(lua_State *L)
  1466. {
  1467. LUA_TRACE_POINT;
  1468. return lua_mimepart_get_header_common(L, RSPAMD_TASK_HEADER_PUSH_SIMPLE);
  1469. }
  1470. static gint
  1471. lua_mimepart_get_header_raw(lua_State *L)
  1472. {
  1473. LUA_TRACE_POINT;
  1474. return lua_mimepart_get_header_common(L, RSPAMD_TASK_HEADER_PUSH_RAW);
  1475. }
  1476. static gint
  1477. lua_mimepart_get_header_count(lua_State *L)
  1478. {
  1479. LUA_TRACE_POINT;
  1480. return lua_mimepart_get_header_common(L, RSPAMD_TASK_HEADER_PUSH_COUNT);
  1481. }
  1482. static gint
  1483. lua_mimepart_get_raw_headers(lua_State *L)
  1484. {
  1485. LUA_TRACE_POINT;
  1486. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1487. struct rspamd_lua_text *t;
  1488. if (part) {
  1489. t = lua_newuserdata(L, sizeof(*t));
  1490. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  1491. t->start = part->raw_headers_str;
  1492. t->len = part->raw_headers_len;
  1493. t->flags = 0;
  1494. }
  1495. else {
  1496. return luaL_error(L, "invalid arguments");
  1497. }
  1498. return 1;
  1499. }
  1500. static gint
  1501. lua_mimepart_get_headers(lua_State *L)
  1502. {
  1503. LUA_TRACE_POINT;
  1504. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1505. bool need_modified = lua_isnoneornil(L, 2) ? false : lua_toboolean(L, 2);
  1506. if (part) {
  1507. struct rspamd_mime_header *cur;
  1508. int i = 1;
  1509. lua_createtable(L, rspamd_mime_headers_count(part->raw_headers), 0);
  1510. LL_FOREACH2(part->headers_order, cur, ord_next)
  1511. {
  1512. if (need_modified && cur->modified_chain) {
  1513. struct rspamd_mime_header *cur_modified;
  1514. LL_FOREACH(cur->modified_chain, cur_modified)
  1515. {
  1516. rspamd_lua_push_header(L, cur_modified, RSPAMD_TASK_HEADER_PUSH_FULL);
  1517. lua_rawseti(L, -2, i++);
  1518. }
  1519. }
  1520. else {
  1521. rspamd_lua_push_header(L, cur, RSPAMD_TASK_HEADER_PUSH_FULL);
  1522. lua_rawseti(L, -2, i++);
  1523. }
  1524. }
  1525. }
  1526. else {
  1527. return luaL_error(L, "invalid arguments");
  1528. }
  1529. return 1;
  1530. }
  1531. static gint
  1532. lua_mimepart_is_image(lua_State *L)
  1533. {
  1534. LUA_TRACE_POINT;
  1535. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1536. if (part == NULL) {
  1537. return luaL_error(L, "invalid arguments");
  1538. }
  1539. lua_pushboolean(L, part->part_type == RSPAMD_MIME_PART_IMAGE);
  1540. return 1;
  1541. }
  1542. static gint
  1543. lua_mimepart_is_archive(lua_State *L)
  1544. {
  1545. LUA_TRACE_POINT;
  1546. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1547. if (part == NULL) {
  1548. return luaL_error(L, "invalid arguments");
  1549. }
  1550. lua_pushboolean(L, part->part_type == RSPAMD_MIME_PART_ARCHIVE);
  1551. return 1;
  1552. }
  1553. static gint
  1554. lua_mimepart_is_multipart(lua_State *L)
  1555. {
  1556. LUA_TRACE_POINT;
  1557. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1558. if (part == NULL) {
  1559. return luaL_error(L, "invalid arguments");
  1560. }
  1561. lua_pushboolean(L, IS_PART_MULTIPART(part) ? true : false);
  1562. return 1;
  1563. }
  1564. static gint
  1565. lua_mimepart_is_message(lua_State *L)
  1566. {
  1567. LUA_TRACE_POINT;
  1568. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1569. if (part == NULL) {
  1570. return luaL_error(L, "invalid arguments");
  1571. }
  1572. lua_pushboolean(L, IS_PART_MESSAGE(part) ? true : false);
  1573. return 1;
  1574. }
  1575. static gint
  1576. lua_mimepart_is_attachment(lua_State *L)
  1577. {
  1578. LUA_TRACE_POINT;
  1579. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1580. if (part == NULL) {
  1581. return luaL_error(L, "invalid arguments");
  1582. }
  1583. if (part->cd && part->cd->type == RSPAMD_CT_ATTACHMENT) {
  1584. lua_pushboolean(L, true);
  1585. }
  1586. else {
  1587. /* if has_name and not (image and Content-ID_header_present) */
  1588. if (part->cd && part->cd->filename.len > 0) {
  1589. if (part->part_type != RSPAMD_MIME_PART_IMAGE &&
  1590. rspamd_message_get_header_from_hash(part->raw_headers,
  1591. "Content-Id", FALSE) == NULL) {
  1592. /* Filename is presented but no content id and not image */
  1593. lua_pushboolean(L, true);
  1594. }
  1595. else {
  1596. /* Image or an embedded object */
  1597. lua_pushboolean(L, false);
  1598. }
  1599. }
  1600. else {
  1601. /* No filename */
  1602. lua_pushboolean(L, false);
  1603. }
  1604. }
  1605. return 1;
  1606. }
  1607. static gint
  1608. lua_mimepart_is_text(lua_State *L)
  1609. {
  1610. LUA_TRACE_POINT;
  1611. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1612. if (part == NULL) {
  1613. return luaL_error(L, "invalid arguments");
  1614. }
  1615. lua_pushboolean(L, part->part_type == RSPAMD_MIME_PART_TEXT);
  1616. return 1;
  1617. }
  1618. static gint
  1619. lua_mimepart_is_broken(lua_State *L)
  1620. {
  1621. LUA_TRACE_POINT;
  1622. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1623. if (part == NULL) {
  1624. return luaL_error(L, "invalid arguments");
  1625. }
  1626. if (part->ct) {
  1627. lua_pushboolean(L, (part->ct->flags & RSPAMD_CONTENT_TYPE_BROKEN) ? true : false);
  1628. }
  1629. else {
  1630. lua_pushboolean(L, false);
  1631. }
  1632. return 1;
  1633. }
  1634. static gint
  1635. lua_mimepart_get_image(lua_State *L)
  1636. {
  1637. LUA_TRACE_POINT;
  1638. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1639. struct rspamd_image **pimg;
  1640. if (part == NULL) {
  1641. return luaL_error(L, "invalid arguments");
  1642. }
  1643. if (part->part_type != RSPAMD_MIME_PART_IMAGE || part->specific.img == NULL) {
  1644. lua_pushnil(L);
  1645. }
  1646. else {
  1647. pimg = lua_newuserdata(L, sizeof(*pimg));
  1648. *pimg = part->specific.img;
  1649. rspamd_lua_setclass(L, rspamd_image_classname, -1);
  1650. }
  1651. return 1;
  1652. }
  1653. static gint
  1654. lua_mimepart_get_archive(lua_State *L)
  1655. {
  1656. LUA_TRACE_POINT;
  1657. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1658. struct rspamd_archive **parch;
  1659. if (part == NULL) {
  1660. return luaL_error(L, "invalid arguments");
  1661. }
  1662. if (part->part_type != RSPAMD_MIME_PART_ARCHIVE || part->specific.arch == NULL) {
  1663. lua_pushnil(L);
  1664. }
  1665. else {
  1666. parch = lua_newuserdata(L, sizeof(*parch));
  1667. *parch = part->specific.arch;
  1668. rspamd_lua_setclass(L, rspamd_archive_classname, -1);
  1669. }
  1670. return 1;
  1671. }
  1672. static gint
  1673. lua_mimepart_get_children(lua_State *L)
  1674. {
  1675. LUA_TRACE_POINT;
  1676. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1677. struct rspamd_mime_part **pcur, *cur;
  1678. guint i;
  1679. if (part == NULL) {
  1680. return luaL_error(L, "invalid arguments");
  1681. }
  1682. if (!IS_PART_MULTIPART(part) || part->specific.mp->children == NULL) {
  1683. lua_pushnil(L);
  1684. }
  1685. else {
  1686. lua_createtable(L, part->specific.mp->children->len, 0);
  1687. PTR_ARRAY_FOREACH(part->specific.mp->children, i, cur)
  1688. {
  1689. pcur = lua_newuserdata(L, sizeof(*pcur));
  1690. *pcur = cur;
  1691. rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
  1692. lua_rawseti(L, -2, i + 1);
  1693. }
  1694. }
  1695. return 1;
  1696. }
  1697. static gint
  1698. lua_mimepart_get_parent(lua_State *L)
  1699. {
  1700. LUA_TRACE_POINT;
  1701. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1702. struct rspamd_mime_part **pparent;
  1703. if (part == NULL) {
  1704. return luaL_error(L, "invalid arguments");
  1705. }
  1706. if (part->parent_part) {
  1707. pparent = lua_newuserdata(L, sizeof(*pparent));
  1708. *pparent = part->parent_part;
  1709. rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
  1710. }
  1711. else {
  1712. lua_pushnil(L);
  1713. }
  1714. return 1;
  1715. }
  1716. static gint
  1717. lua_mimepart_get_text(lua_State *L)
  1718. {
  1719. LUA_TRACE_POINT;
  1720. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1721. struct rspamd_mime_text_part **ppart;
  1722. if (part == NULL) {
  1723. return luaL_error(L, "invalid arguments");
  1724. }
  1725. if (part->part_type != RSPAMD_MIME_PART_TEXT || part->specific.txt == NULL) {
  1726. lua_pushnil(L);
  1727. }
  1728. else {
  1729. ppart = lua_newuserdata(L, sizeof(*ppart));
  1730. *ppart = part->specific.txt;
  1731. rspamd_lua_setclass(L, rspamd_textpart_classname, -1);
  1732. }
  1733. return 1;
  1734. }
  1735. static gint
  1736. lua_mimepart_get_digest(lua_State *L)
  1737. {
  1738. LUA_TRACE_POINT;
  1739. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1740. gchar digestbuf[rspamd_cryptobox_HASHBYTES * 2 + 1];
  1741. if (part == NULL) {
  1742. return luaL_error(L, "invalid arguments");
  1743. }
  1744. memset(digestbuf, 0, sizeof(digestbuf));
  1745. rspamd_encode_hex_buf(part->digest, sizeof(part->digest),
  1746. digestbuf, sizeof(digestbuf));
  1747. lua_pushstring(L, digestbuf);
  1748. return 1;
  1749. }
  1750. static gint
  1751. lua_mimepart_get_id(lua_State *L)
  1752. {
  1753. LUA_TRACE_POINT;
  1754. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1755. if (part == NULL) {
  1756. return luaL_error(L, "invalid arguments");
  1757. }
  1758. lua_pushinteger(L, part->part_number);
  1759. return 1;
  1760. }
  1761. static gint
  1762. lua_mimepart_headers_foreach(lua_State *L)
  1763. {
  1764. LUA_TRACE_POINT;
  1765. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1766. enum rspamd_lua_task_header_type how = RSPAMD_TASK_HEADER_PUSH_SIMPLE;
  1767. struct rspamd_lua_regexp *re = NULL;
  1768. struct rspamd_mime_header *hdr, *cur;
  1769. gint old_top;
  1770. if (part && lua_isfunction(L, 2)) {
  1771. if (lua_istable(L, 3)) {
  1772. lua_pushstring(L, "full");
  1773. lua_gettable(L, 3);
  1774. if (lua_isboolean(L, -1) && lua_toboolean(L, -1)) {
  1775. how = RSPAMD_TASK_HEADER_PUSH_FULL;
  1776. }
  1777. lua_pop(L, 1);
  1778. lua_pushstring(L, "raw");
  1779. lua_gettable(L, 3);
  1780. if (lua_isboolean(L, -1) && lua_toboolean(L, -1)) {
  1781. how = RSPAMD_TASK_HEADER_PUSH_RAW;
  1782. }
  1783. lua_pop(L, 1);
  1784. lua_pushstring(L, "regexp");
  1785. lua_gettable(L, 3);
  1786. if (lua_isuserdata(L, -1)) {
  1787. RSPAMD_LUA_CHECK_UDATA_PTR_OR_RETURN(L, -1, rspamd_regexp_classname,
  1788. struct rspamd_lua_regexp, re);
  1789. }
  1790. lua_pop(L, 1);
  1791. }
  1792. if (part->headers_order) {
  1793. hdr = part->headers_order;
  1794. LL_FOREACH2(hdr, cur, ord_next)
  1795. {
  1796. if (re && re->re) {
  1797. if (!rspamd_regexp_match(re->re, cur->name,
  1798. strlen(cur->name), FALSE)) {
  1799. continue;
  1800. }
  1801. }
  1802. old_top = lua_gettop(L);
  1803. lua_pushvalue(L, 2);
  1804. lua_pushstring(L, cur->name);
  1805. rspamd_lua_push_header(L, cur, how);
  1806. if (lua_pcall(L, 2, LUA_MULTRET, 0) != 0) {
  1807. msg_err("call to header_foreach failed: %s",
  1808. lua_tostring(L, -1));
  1809. lua_settop(L, old_top);
  1810. break;
  1811. }
  1812. else {
  1813. if (lua_gettop(L) > old_top) {
  1814. if (lua_isboolean(L, old_top + 1)) {
  1815. if (lua_toboolean(L, old_top + 1)) {
  1816. lua_settop(L, old_top);
  1817. break;
  1818. }
  1819. }
  1820. }
  1821. }
  1822. lua_settop(L, old_top);
  1823. }
  1824. }
  1825. }
  1826. return 0;
  1827. }
  1828. static gint
  1829. lua_mimepart_get_specific(lua_State *L)
  1830. {
  1831. LUA_TRACE_POINT;
  1832. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1833. if (part == NULL) {
  1834. return luaL_error(L, "invalid arguments");
  1835. }
  1836. if (part->part_type != RSPAMD_MIME_PART_CUSTOM_LUA) {
  1837. lua_pushnil(L);
  1838. }
  1839. else {
  1840. lua_rawgeti(L, LUA_REGISTRYINDEX, part->specific.lua_specific.cbref);
  1841. }
  1842. return 1;
  1843. }
  1844. static gint
  1845. lua_mimepart_get_urls(lua_State *L)
  1846. {
  1847. LUA_TRACE_POINT;
  1848. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1849. if (part == NULL) {
  1850. return luaL_error(L, "invalid arguments");
  1851. }
  1852. struct lua_tree_cb_data cb;
  1853. struct rspamd_url *u;
  1854. static const gint default_protocols_mask = PROTOCOL_HTTP | PROTOCOL_HTTPS |
  1855. PROTOCOL_FILE | PROTOCOL_FTP;
  1856. gsize sz, max_urls = 0, i;
  1857. if (part->urls == NULL) {
  1858. lua_newtable(L);
  1859. return 1;
  1860. }
  1861. if (!lua_url_cbdata_fill(L, 2, &cb, default_protocols_mask,
  1862. ~(0), max_urls)) {
  1863. return luaL_error(L, "invalid arguments");
  1864. }
  1865. sz = part->urls->len;
  1866. lua_createtable(L, sz, 0);
  1867. PTR_ARRAY_FOREACH(part->urls, i, u)
  1868. {
  1869. lua_tree_url_callback(u, u, &cb);
  1870. }
  1871. lua_url_cbdata_dtor(&cb);
  1872. return 1;
  1873. }
  1874. static gint
  1875. lua_mimepart_is_specific(lua_State *L)
  1876. {
  1877. LUA_TRACE_POINT;
  1878. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1879. if (part == NULL) {
  1880. return luaL_error(L, "invalid arguments");
  1881. }
  1882. lua_pushboolean(L, part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA);
  1883. return 1;
  1884. }
  1885. static gint
  1886. lua_mimepart_set_specific(lua_State *L)
  1887. {
  1888. LUA_TRACE_POINT;
  1889. struct rspamd_mime_part *part = lua_check_mimepart(L);
  1890. if (part == NULL || lua_isnil(L, 2)) {
  1891. return luaL_error(L, "invalid arguments");
  1892. }
  1893. if (part->part_type != RSPAMD_MIME_PART_UNDEFINED &&
  1894. part->part_type != RSPAMD_MIME_PART_CUSTOM_LUA) {
  1895. return luaL_error(L,
  1896. "internal error: trying to set specific lua content on part of type %d",
  1897. part->part_type);
  1898. }
  1899. if (part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA) {
  1900. /* Push old specific data */
  1901. lua_rawgeti(L, LUA_REGISTRYINDEX, part->specific.lua_specific.cbref);
  1902. luaL_unref(L, LUA_REGISTRYINDEX, part->specific.lua_specific.cbref);
  1903. }
  1904. else {
  1905. part->part_type = RSPAMD_MIME_PART_CUSTOM_LUA;
  1906. lua_pushnil(L);
  1907. }
  1908. /* Now, we push argument on the position 2 and save its reference */
  1909. lua_pushvalue(L, 2);
  1910. part->specific.lua_specific.cbref = luaL_ref(L, LUA_REGISTRYINDEX);
  1911. /* Now stack has just a return value as luaL_ref removes value from stack */
  1912. gint ltype = lua_type(L, 2);
  1913. switch (ltype) {
  1914. case LUA_TTABLE:
  1915. part->specific.lua_specific.type = RSPAMD_LUA_PART_TABLE;
  1916. break;
  1917. case LUA_TSTRING:
  1918. part->specific.lua_specific.type = RSPAMD_LUA_PART_STRING;
  1919. break;
  1920. case LUA_TUSERDATA:
  1921. if (rspamd_lua_check_udata_maybe(L, 2, rspamd_text_classname)) {
  1922. part->specific.lua_specific.type = RSPAMD_LUA_PART_TEXT;
  1923. }
  1924. else {
  1925. part->specific.lua_specific.type = RSPAMD_LUA_PART_UNKNOWN;
  1926. }
  1927. break;
  1928. case LUA_TFUNCTION:
  1929. part->specific.lua_specific.type = RSPAMD_LUA_PART_FUNCTION;
  1930. break;
  1931. default:
  1932. part->specific.lua_specific.type = RSPAMD_LUA_PART_UNKNOWN;
  1933. break;
  1934. }
  1935. return 1;
  1936. }
  1937. void luaopen_textpart(lua_State *L)
  1938. {
  1939. rspamd_lua_new_class(L, rspamd_textpart_classname, textpartlib_m);
  1940. lua_pop(L, 1);
  1941. }
  1942. void luaopen_mimepart(lua_State *L)
  1943. {
  1944. rspamd_lua_new_class(L, rspamd_mimepart_classname, mimepartlib_m);
  1945. lua_pop(L, 1);
  1946. }