Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

lua_text.c 35KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633
  1. /*-
  2. * Copyright 2019 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "lua_common.h"
  17. #include "libcryptobox/cryptobox.h"
  18. #include "contrib/fastutf8/fastutf8.h"
  19. #include "unix-std.h"
  20. /***
  21. * @module rspamd_text
  22. * This module provides access to opaque text structures used widely to prevent
  23. * copying between Lua and C for various concerns: performance, security etc...
  24. *
  25. * You can convert rspamd_text into string but it will copy data.
  26. */
  27. /***
  28. * @function rspamd_text.fromstring(str)
  29. * Creates rspamd_text from Lua string (copied to the text)
  30. * @param {string} str string to use
  31. * @return {rspamd_text} resulting text
  32. */
  33. LUA_FUNCTION_DEF (text, fromstring);
  34. /***
  35. * @function rspamd_text.null()
  36. * Creates rspamd_text with NULL pointer for testing purposes
  37. * @param {string} str string to use
  38. * @return {rspamd_text} resulting text
  39. */
  40. LUA_FUNCTION_DEF (text, null);
  41. /***
  42. * @function rspamd_text.randombytes(nbytes)
  43. * Creates rspamd_text with random bytes inside (raw bytes)
  44. * @param {number} nbytes number of random bytes generated
  45. * @return {rspamd_text} random bytes text
  46. */
  47. LUA_FUNCTION_DEF (text, randombytes);
  48. /***
  49. * @function rspamd_text.fromtable(tbl[, delim])
  50. * Same as `table.concat` but generates rspamd_text instead of the Lua string
  51. * @param {table} tbl table to use
  52. * @param {string} delim optional delimiter
  53. * @return {rspamd_text} resulting text
  54. */
  55. LUA_FUNCTION_DEF (text, fromtable);
  56. /***
  57. * @method rspamd_text:len()
  58. * Returns length of a string
  59. * @return {number} length of string in **bytes**
  60. */
  61. LUA_FUNCTION_DEF (text, len);
  62. /***
  63. * @method rspamd_text:str()
  64. * Converts text to string by copying its content
  65. * @return {string} copy of text as Lua string
  66. */
  67. LUA_FUNCTION_DEF (text, str);
  68. /***
  69. * @method rspamd_text:ptr()
  70. * Converts text to lightuserdata
  71. * @return {lightuserdata} pointer value of rspamd_text
  72. */
  73. LUA_FUNCTION_DEF (text, ptr);
  74. /***
  75. * @method rspamd_text:save_in_file(fname[, mode])
  76. * Saves text in file
  77. * @return {boolean} true if save has been completed
  78. */
  79. LUA_FUNCTION_DEF (text, save_in_file);
  80. /***
  81. * @method rspamd_text:span(start[, len])
  82. * Returns a span for lua_text starting at pos [start] (1 indexed) and with
  83. * length `len` (or to the end of the text)
  84. * @param {integer} start start index
  85. * @param {integer} len length of span
  86. * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
  87. */
  88. LUA_FUNCTION_DEF (text, span);
  89. /***
  90. * @method rspamd_text:sub(start[, len])
  91. * Returns a substrin for lua_text similar to string.sub from Lua
  92. * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
  93. */
  94. LUA_FUNCTION_DEF (text, sub);
  95. /***
  96. * @method rspamd_text:lines([stringify])
  97. * Returns an iter over all lines as rspamd_text objects or as strings if `stringify` is true
  98. * @param {boolean} stringify stringify lines
  99. * @return {iterator} iterator triplet
  100. */
  101. LUA_FUNCTION_DEF (text, lines);
  102. /***
  103. * @method rspamd_text:split(regexp, [stringify])
  104. * Returns an iter over all encounters of the specific regexp as rspamd_text objects or as strings if `stringify` is true
  105. * @param {rspamd_regexp} regexp regexp (pcre syntax) used for splitting
  106. * @param {boolean} stringify stringify lines
  107. * @return {iterator} iterator triplet
  108. */
  109. LUA_FUNCTION_DEF (text, split);
  110. /***
  111. * @method rspamd_text:at(pos)
  112. * Returns a byte at the position `pos`
  113. * @param {integer} pos index
  114. * @return {integer} byte at the position `pos` or nil if pos out of bound
  115. */
  116. LUA_FUNCTION_DEF (text, at);
  117. /***
  118. * @method rspamd_text:memchr(chr, [reverse])
  119. * Returns the first or the last position of the character `chr` in the text or
  120. * -1 in case if a character has not been found. Indexes start from `1`
  121. * @param {string/number} chr character or a character code to find
  122. * @param {boolean} reverse last character if `true`
  123. * @return {integer} position of the character or `-1`
  124. */
  125. LUA_FUNCTION_DEF (text, memchr);
  126. /***
  127. * @method rspamd_text:bytes()
  128. * Converts text to an array of bytes
  129. * @return {table|integer} bytes in the array (as unsigned char)
  130. */
  131. LUA_FUNCTION_DEF (text, bytes);
  132. /***
  133. * @method rspamd_text:lower([is_utf, [inplace]])
  134. * Return a new text with lowercased characters, if is_utf is true then Rspamd applies utf8 lowercase
  135. * @param {boolean} is_utf apply utf8 lowercase
  136. * @param {boolean} inplace lowercase the original text
  137. * @return rspamd_text} new rspamd_text (or the original text if inplace) with lowercased letters
  138. */
  139. LUA_FUNCTION_DEF (text, lower);
  140. LUA_FUNCTION_DEF (text, take_ownership);
  141. /***
  142. * @method rspamd_text:exclude_chars(set_to_exclude, [always_copy])
  143. * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
  144. * where all chars from `set_to_exclude` are removed
  145. * Patterns supported:
  146. *
  147. * - %s - all space characters
  148. * - %n - all newline characters
  149. * - %c - all control characters (it includes 8bit characters and spaces)
  150. * - %8 - all 8 bit characters
  151. * - %% - just a percent character
  152. *
  153. * @param {string} set_to_exclude characters to exclude
  154. * @param {boolean} always_copy always copy the source text
  155. * @return {tspamd_text} modified or copied text
  156. */
  157. LUA_FUNCTION_DEF (text, exclude_chars);
  158. /***
  159. * @method rspamd_text:oneline([always_copy])
  160. * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
  161. * where the following transformations are made:
  162. * - All spaces sequences are replaced with a single space
  163. * - All newlines sequences are replaced with a single space
  164. * - Trailing and leading spaces are removed
  165. * - Control characters are excluded
  166. * - UTF8 sequences are normalised
  167. *
  168. * @param {boolean} always_copy always copy the source text
  169. * @return {tspamd_text} modified or copied text
  170. */
  171. LUA_FUNCTION_DEF (text, oneline);
  172. /***
  173. * @method rspamd_text:base32([b32type])
  174. * Returns a text encoded in base32 (new rspamd_text is allocated)
  175. *
  176. * @param {string} b32type base32 type (default, bleach, rfc)
  177. * @return {tspamd_text} new text encoded in base32
  178. */
  179. LUA_FUNCTION_DEF (text, base32);
  180. /***
  181. * @method rspamd_text:base64([line_length, [nline, [fold]]])
  182. * Returns a text encoded in base64 (new rspamd_text is allocated)
  183. *
  184. * @param {number} line_length return text splited with newlines up to this attribute
  185. * @param {string} nline newline type: `cr`, `lf`, `crlf`
  186. * @param {boolean} fold use folding when splitting into lines (false by default)
  187. * @return {tspamd_text} new text encoded in base64
  188. */
  189. LUA_FUNCTION_DEF (text, base64);
  190. /***
  191. * @method rspamd_text:hex()
  192. * Returns a text encoded in hex (new rspamd_text is allocated)
  193. *
  194. * @return {tspamd_text} new text encoded in hex
  195. */
  196. LUA_FUNCTION_DEF (text, hex);
  197. LUA_FUNCTION_DEF (text, gc);
  198. LUA_FUNCTION_DEF (text, eq);
  199. LUA_FUNCTION_DEF (text, lt);
  200. LUA_FUNCTION_DEF (text, concat);
  201. static const struct luaL_reg textlib_f[] = {
  202. LUA_INTERFACE_DEF (text, fromstring),
  203. {"from_string", lua_text_fromstring},
  204. LUA_INTERFACE_DEF (text, fromtable),
  205. {"from_table", lua_text_fromtable},
  206. LUA_INTERFACE_DEF (text, null),
  207. LUA_INTERFACE_DEF (text, randombytes),
  208. {NULL, NULL}
  209. };
  210. static const struct luaL_reg textlib_m[] = {
  211. LUA_INTERFACE_DEF (text, len),
  212. LUA_INTERFACE_DEF (text, str),
  213. LUA_INTERFACE_DEF (text, ptr),
  214. LUA_INTERFACE_DEF (text, take_ownership),
  215. LUA_INTERFACE_DEF (text, save_in_file),
  216. LUA_INTERFACE_DEF (text, span),
  217. LUA_INTERFACE_DEF (text, sub),
  218. LUA_INTERFACE_DEF (text, lines),
  219. LUA_INTERFACE_DEF (text, split),
  220. LUA_INTERFACE_DEF (text, at),
  221. LUA_INTERFACE_DEF (text, memchr),
  222. LUA_INTERFACE_DEF (text, bytes),
  223. LUA_INTERFACE_DEF (text, lower),
  224. LUA_INTERFACE_DEF (text, exclude_chars),
  225. LUA_INTERFACE_DEF (text, oneline),
  226. LUA_INTERFACE_DEF (text, base32),
  227. LUA_INTERFACE_DEF (text, base64),
  228. LUA_INTERFACE_DEF (text, hex),
  229. {"write", lua_text_save_in_file},
  230. {"__len", lua_text_len},
  231. {"__tostring", lua_text_str},
  232. {"__gc", lua_text_gc},
  233. {"__eq", lua_text_eq},
  234. {"__lt", lua_text_lt},
  235. {"__concat", lua_text_concat},
  236. {NULL, NULL}
  237. };
  238. struct rspamd_lua_text *
  239. lua_check_text (lua_State * L, gint pos)
  240. {
  241. void *ud = rspamd_lua_check_udata (L, pos, "rspamd{text}");
  242. luaL_argcheck (L, ud != NULL, pos, "'text' expected");
  243. return ud ? (struct rspamd_lua_text *)ud : NULL;
  244. }
  245. struct rspamd_lua_text *
  246. lua_check_text_or_string (lua_State * L, gint pos)
  247. {
  248. gint pos_type = lua_type (L, pos);
  249. if (pos_type == LUA_TUSERDATA) {
  250. void *ud = rspamd_lua_check_udata (L, pos, "rspamd{text}");
  251. luaL_argcheck (L, ud != NULL, pos, "'text' expected");
  252. return ud ? (struct rspamd_lua_text *) ud : NULL;
  253. }
  254. else if (pos_type == LUA_TSTRING) {
  255. /* Fake static lua_text */
  256. static struct rspamd_lua_text fake_text;
  257. gsize len;
  258. fake_text.start = lua_tolstring (L, pos, &len);
  259. if (len >= G_MAXUINT) {
  260. return NULL;
  261. }
  262. fake_text.len = len;
  263. fake_text.flags = RSPAMD_TEXT_FLAG_FAKE;
  264. return &fake_text;
  265. }
  266. return NULL;
  267. }
  268. struct rspamd_lua_text *
  269. lua_new_text (lua_State *L, const gchar *start, gsize len, gboolean own)
  270. {
  271. struct rspamd_lua_text *t;
  272. t = lua_newuserdata (L, sizeof (*t));
  273. t->flags = 0;
  274. if (own) {
  275. gchar *storage;
  276. if (len > 0) {
  277. storage = g_malloc (len);
  278. if (start != NULL) {
  279. memcpy (storage, start, len);
  280. }
  281. t->start = storage;
  282. t->flags = RSPAMD_TEXT_FLAG_OWN;
  283. }
  284. else {
  285. t->start = "";
  286. }
  287. }
  288. else {
  289. t->start = start;
  290. }
  291. t->len = len;
  292. rspamd_lua_setclass (L, "rspamd{text}", -1);
  293. return t;
  294. }
  295. static gint
  296. lua_text_fromstring (lua_State *L)
  297. {
  298. LUA_TRACE_POINT;
  299. const gchar *str;
  300. gsize l = 0;
  301. gboolean transparent = FALSE;
  302. str = luaL_checklstring (L, 1, &l);
  303. if (str) {
  304. if (lua_isboolean (L, 2)) {
  305. transparent = lua_toboolean (L, 2);
  306. }
  307. lua_new_text (L, str, l, !transparent);
  308. }
  309. else {
  310. return luaL_error (L, "invalid arguments");
  311. }
  312. return 1;
  313. }
  314. static gint
  315. lua_text_null (lua_State *L)
  316. {
  317. LUA_TRACE_POINT;
  318. lua_new_text (L, NULL, 0, false);
  319. return 1;
  320. }
  321. static gint
  322. lua_text_randombytes (lua_State *L)
  323. {
  324. LUA_TRACE_POINT;
  325. guint nbytes = luaL_checkinteger (L, 1);
  326. struct rspamd_lua_text *out;
  327. out = lua_new_text (L, NULL, nbytes, TRUE);
  328. randombytes_buf ((char *)out->start, nbytes);
  329. out->len = nbytes;
  330. return 1;
  331. }
  332. #define MAX_REC 10
  333. static void
  334. lua_text_tbl_length (lua_State *L, gsize dlen, gsize *dest, guint rec)
  335. {
  336. gsize tblen, stlen;
  337. struct rspamd_lua_text *elt;
  338. if (rec > MAX_REC) {
  339. luaL_error (L, "lua_text_tbl_length: recursion limit exceeded");
  340. return;
  341. }
  342. tblen = rspamd_lua_table_size (L, -1);
  343. for (gsize i = 0; i < tblen; i ++) {
  344. lua_rawgeti (L, -1, i + 1);
  345. if (lua_type (L, -1) == LUA_TSTRING) {
  346. #if LUA_VERSION_NUM >= 502
  347. stlen = lua_rawlen (L, -1);
  348. #else
  349. stlen = lua_objlen (L, -1);
  350. #endif
  351. (*dest) += stlen;
  352. }
  353. else if (lua_type (L, -1) == LUA_TUSERDATA){
  354. elt = (struct rspamd_lua_text *)lua_touserdata (L, -1);
  355. if (elt) {
  356. (*dest) += elt->len;
  357. }
  358. }
  359. else if (lua_type (L, -1) == LUA_TTABLE) {
  360. lua_text_tbl_length (L, dlen, dest, rec + 1);
  361. }
  362. if (i != tblen - 1) {
  363. (*dest) += dlen;
  364. }
  365. lua_pop (L, 1);
  366. }
  367. }
  368. static void
  369. lua_text_tbl_append (lua_State *L,
  370. const gchar *delim,
  371. gsize dlen,
  372. gchar **dest,
  373. guint rec)
  374. {
  375. const gchar *st;
  376. gsize tblen, stlen;
  377. struct rspamd_lua_text *elt;
  378. if (rec > MAX_REC) {
  379. luaL_error (L, "lua_text_tbl_length: recursion limit exceeded");
  380. return;
  381. }
  382. tblen = rspamd_lua_table_size (L, -1);
  383. for (guint i = 0; i < tblen; i ++) {
  384. lua_rawgeti (L, -1, i + 1);
  385. if (lua_type (L, -1) == LUA_TSTRING) {
  386. st = lua_tolstring (L, -1, &stlen);
  387. memcpy ((*dest), st, stlen);
  388. (*dest) += stlen;
  389. }
  390. else if (lua_type (L, -1) == LUA_TUSERDATA){
  391. elt = (struct rspamd_lua_text *)lua_touserdata (L, -1);
  392. if (elt) {
  393. memcpy ((*dest), elt->start, elt->len);
  394. (*dest) += elt->len;
  395. }
  396. }
  397. else if (lua_type (L, -1) == LUA_TTABLE) {
  398. lua_text_tbl_append (L, delim, dlen, dest, rec + 1);
  399. }
  400. if (dlen && i != tblen - 1) {
  401. memcpy ((*dest), delim, dlen);
  402. (*dest) += dlen;
  403. }
  404. lua_pop (L, 1);
  405. }
  406. }
  407. static gint
  408. lua_text_fromtable (lua_State *L)
  409. {
  410. LUA_TRACE_POINT;
  411. const gchar *delim = "";
  412. struct rspamd_lua_text *t;
  413. gsize textlen = 0, dlen, oldtop = lua_gettop (L);
  414. gchar *dest;
  415. if (!lua_istable (L, 1)) {
  416. return luaL_error (L, "invalid arguments");
  417. }
  418. if (lua_type (L, 2) == LUA_TSTRING) {
  419. delim = lua_tolstring (L, 2, &dlen);
  420. }
  421. else {
  422. dlen = 0;
  423. }
  424. /* Calculate length needed */
  425. lua_pushvalue (L, 1);
  426. lua_text_tbl_length (L, dlen, &textlen, 0);
  427. lua_pop (L, 1);
  428. /* Allocate new text */
  429. t = lua_newuserdata (L, sizeof (*t));
  430. dest = g_malloc (textlen);
  431. t->start = dest;
  432. t->len = textlen;
  433. t->flags = RSPAMD_TEXT_FLAG_OWN;
  434. rspamd_lua_setclass (L, "rspamd{text}", -1);
  435. lua_pushvalue (L, 1);
  436. lua_text_tbl_append (L, delim, dlen, &dest, 0);
  437. lua_pop (L, 1); /* Table arg */
  438. gint newtop = lua_gettop (L);
  439. g_assert ( newtop== oldtop + 1);
  440. return 1;
  441. }
  442. static gint
  443. lua_text_len (lua_State *L)
  444. {
  445. LUA_TRACE_POINT;
  446. struct rspamd_lua_text *t = lua_check_text (L, 1);
  447. gsize l = 0;
  448. if (t != NULL) {
  449. l = t->len;
  450. }
  451. else {
  452. return luaL_error (L, "invalid arguments");
  453. }
  454. lua_pushinteger (L, l);
  455. return 1;
  456. }
  457. static gint
  458. lua_text_str (lua_State *L)
  459. {
  460. LUA_TRACE_POINT;
  461. struct rspamd_lua_text *t = lua_check_text (L, 1);
  462. if (t != NULL) {
  463. lua_pushlstring (L, t->start, t->len);
  464. }
  465. else {
  466. return luaL_error (L, "invalid arguments");
  467. }
  468. return 1;
  469. }
  470. static gint
  471. lua_text_ptr (lua_State *L)
  472. {
  473. LUA_TRACE_POINT;
  474. struct rspamd_lua_text *t = lua_check_text (L, 1);
  475. if (t != NULL) {
  476. lua_pushlightuserdata (L, (gpointer)t->start);
  477. }
  478. else {
  479. return luaL_error (L, "invalid arguments");
  480. }
  481. return 1;
  482. }
  483. static gint
  484. lua_text_take_ownership (lua_State *L)
  485. {
  486. LUA_TRACE_POINT;
  487. struct rspamd_lua_text *t = lua_check_text (L, 1);
  488. gchar *dest;
  489. if (t != NULL) {
  490. if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
  491. /* We already own it */
  492. lua_pushboolean (L, true);
  493. }
  494. else {
  495. dest = g_malloc (t->len);
  496. memcpy (dest, t->start, t->len);
  497. t->start = dest;
  498. t->flags |= RSPAMD_TEXT_FLAG_OWN;
  499. lua_pushboolean (L, true);
  500. }
  501. }
  502. else {
  503. return luaL_error (L, "invalid arguments");
  504. }
  505. return 1;
  506. }
  507. static gint
  508. lua_text_span (lua_State *L)
  509. {
  510. LUA_TRACE_POINT;
  511. struct rspamd_lua_text *t = lua_check_text (L, 1);
  512. gint64 start = lua_tointeger (L, 2), len = -1;
  513. if (t && start >= 1 && start <= t->len) {
  514. if (lua_isnumber (L, 3)) {
  515. len = lua_tonumber (L, 3);
  516. }
  517. if (len == -1) {
  518. len = t->len - (start - 1);
  519. }
  520. if (len < 0 || (len > (t->len - (start - 1)))) {
  521. return luaL_error (L, "invalid length");
  522. }
  523. lua_new_text (L, t->start + (start - 1), len, FALSE);
  524. }
  525. else {
  526. if (!t) {
  527. return luaL_error (L, "invalid arguments, text required");
  528. }
  529. else {
  530. return luaL_error (L, "invalid arguments: start offset %d "
  531. "is larger than text len %d", (int)start, (int)t->len);
  532. }
  533. }
  534. return 1;
  535. }
  536. /* Helpers to behave exactly as Lua does */
  537. static inline gsize
  538. relative_pos_start (gint pos, gsize len)
  539. {
  540. if (pos > 0) {
  541. return pos;
  542. }
  543. else if (pos == 0) {
  544. return 1;
  545. }
  546. else if (pos < -((gint) len)) {
  547. return 1;
  548. }
  549. /* Negative pos inside str */
  550. return len + ((gsize)pos) + 1;
  551. }
  552. static inline gsize
  553. relative_pos_end (gint pos, gsize len)
  554. {
  555. if (pos > (gint)len) {
  556. return len;
  557. }
  558. else if (pos >= 0) {
  559. return (size_t) pos;
  560. }
  561. else if (pos < -((gint)len)) {
  562. return 0;
  563. }
  564. return len + ((gsize)pos) + 1;
  565. }
  566. static gint
  567. lua_text_sub (lua_State *L)
  568. {
  569. LUA_TRACE_POINT;
  570. struct rspamd_lua_text *t = lua_check_text (L, 1);
  571. if (t) {
  572. size_t start = relative_pos_start (luaL_checkinteger (L, 2),
  573. t->len);
  574. size_t end = relative_pos_end (luaL_optinteger (L, 3, -1),
  575. t->len);
  576. if (start <= end) {
  577. lua_new_text (L, t->start + (start - 1),
  578. (end - start) + 1, FALSE);
  579. }
  580. else {
  581. lua_new_text (L, "", 0, TRUE);
  582. }
  583. }
  584. else {
  585. return luaL_error (L, "invalid arguments");
  586. }
  587. return 1;
  588. }
  589. static gint64
  590. rspamd_lua_text_push_line (lua_State *L,
  591. struct rspamd_lua_text *t,
  592. gint64 start_offset,
  593. const gchar *sep_pos,
  594. gboolean stringify)
  595. {
  596. const gchar *start;
  597. gsize len;
  598. gint64 ret;
  599. start = t->start + start_offset;
  600. len = sep_pos ? (sep_pos - start) : (t->len - start_offset);
  601. ret = start_offset + len;
  602. /* Trim line */
  603. while (len > 0) {
  604. if (start[len - 1] == '\r' || start[len - 1] == '\n') {
  605. len --;
  606. }
  607. else {
  608. break;
  609. }
  610. }
  611. if (stringify) {
  612. lua_pushlstring (L, start, len);
  613. }
  614. else {
  615. struct rspamd_lua_text *ntext;
  616. ntext = lua_newuserdata (L, sizeof (*ntext));
  617. rspamd_lua_setclass (L, "rspamd{text}", -1);
  618. ntext->start = start;
  619. ntext->len = len;
  620. ntext->flags = 0; /* Not own as it must be owned by a top object */
  621. }
  622. return ret;
  623. }
  624. static gint
  625. rspamd_lua_text_readline (lua_State *L)
  626. {
  627. struct rspamd_lua_text *t = lua_touserdata (L, lua_upvalueindex (1));
  628. gboolean stringify = lua_toboolean (L, lua_upvalueindex (2));
  629. gint64 pos = lua_tointeger (L, lua_upvalueindex (3));
  630. if (pos < 0) {
  631. return luaL_error (L, "invalid pos: %d", (gint)pos);
  632. }
  633. if (pos >= t->len) {
  634. /* We are done */
  635. return 0;
  636. }
  637. const gchar *sep_pos;
  638. /* We look just for `\n` ignoring `\r` as it is very rare nowadays */
  639. sep_pos = memchr (t->start + pos, '\n', t->len - pos);
  640. if (sep_pos == NULL) {
  641. /* Either last `\n` or `\r` separated text */
  642. sep_pos = memchr (t->start + pos, '\r', t->len - pos);
  643. }
  644. pos = rspamd_lua_text_push_line (L, t, pos, sep_pos, stringify);
  645. /* Skip separators */
  646. while (pos < t->len) {
  647. if (t->start[pos] == '\n' || t->start[pos] == '\r') {
  648. pos ++;
  649. }
  650. else {
  651. break;
  652. }
  653. }
  654. /* Update pos */
  655. lua_pushinteger (L, pos);
  656. lua_replace (L, lua_upvalueindex (3));
  657. return 1;
  658. }
  659. static gint
  660. lua_text_lines (lua_State *L)
  661. {
  662. LUA_TRACE_POINT;
  663. struct rspamd_lua_text *t = lua_check_text (L, 1);
  664. gboolean stringify = FALSE;
  665. if (t) {
  666. if (lua_isboolean (L, 2)) {
  667. stringify = lua_toboolean (L, 2);
  668. }
  669. lua_pushvalue (L, 1);
  670. lua_pushboolean (L, stringify);
  671. lua_pushinteger (L, 0); /* Current pos */
  672. lua_pushcclosure (L, rspamd_lua_text_readline, 3);
  673. }
  674. else {
  675. return luaL_error (L, "invalid arguments");
  676. }
  677. return 1;
  678. }
  679. static gint
  680. rspamd_lua_text_regexp_split (lua_State *L) {
  681. struct rspamd_lua_text *t = lua_touserdata (L, lua_upvalueindex (1)),
  682. *new_t;
  683. struct rspamd_lua_regexp *re = *(struct rspamd_lua_regexp **)
  684. lua_touserdata (L, lua_upvalueindex (2));
  685. gboolean stringify = lua_toboolean (L, lua_upvalueindex (3));
  686. gint64 pos = lua_tointeger (L, lua_upvalueindex (4));
  687. gboolean matched;
  688. if (pos < 0) {
  689. return luaL_error (L, "invalid pos: %d", (gint) pos);
  690. }
  691. if (pos >= t->len) {
  692. /* We are done */
  693. return 0;
  694. }
  695. const gchar *start, *end, *old_start;
  696. end = t->start + pos;
  697. for (;;) {
  698. old_start = end;
  699. matched = rspamd_regexp_search (re->re, t->start, t->len, &start, &end, FALSE,
  700. NULL);
  701. if (matched) {
  702. if (start - old_start > 0) {
  703. if (stringify) {
  704. lua_pushlstring (L, old_start, start - old_start);
  705. }
  706. else {
  707. new_t = lua_newuserdata (L, sizeof (*t));
  708. rspamd_lua_setclass (L, "rspamd{text}", -1);
  709. new_t->start = old_start;
  710. new_t->len = start - old_start;
  711. new_t->flags = 0;
  712. }
  713. break;
  714. }
  715. else {
  716. if (start == end) {
  717. matched = FALSE;
  718. break;
  719. }
  720. /*
  721. * All match separators (e.g. starting separator,
  722. * we need to skip it). Continue iterations.
  723. */
  724. }
  725. }
  726. else {
  727. /* No match, stop */
  728. break;
  729. }
  730. }
  731. if (!matched && (t->len > 0 && (end == NULL || end < t->start + t->len))) {
  732. /* No more matches, but we might need to push the last element */
  733. if (end == NULL) {
  734. end = t->start;
  735. }
  736. /* No separators, need to push the whole remaining part */
  737. if (stringify) {
  738. lua_pushlstring (L, end, (t->start + t->len) - end);
  739. }
  740. else {
  741. new_t = lua_newuserdata (L, sizeof (*t));
  742. rspamd_lua_setclass (L, "rspamd{text}", -1);
  743. new_t->start = end;
  744. new_t->len = (t->start + t->len) - end;
  745. new_t->flags = 0;
  746. }
  747. pos = t->len;
  748. }
  749. else {
  750. pos = end - t->start;
  751. }
  752. /* Update pos */
  753. lua_pushinteger (L, pos);
  754. lua_replace (L, lua_upvalueindex (4));
  755. return 1;
  756. }
  757. static gint
  758. lua_text_split (lua_State *L)
  759. {
  760. LUA_TRACE_POINT;
  761. struct rspamd_lua_text *t = lua_check_text (L, 1);
  762. struct rspamd_lua_regexp *re;
  763. gboolean stringify = FALSE, own_re = FALSE;
  764. if (lua_type (L, 2) == LUA_TUSERDATA) {
  765. re = lua_check_regexp (L, 2);
  766. }
  767. else {
  768. rspamd_regexp_t *c_re;
  769. GError *err = NULL;
  770. c_re = rspamd_regexp_new (lua_tostring (L, 2), NULL, &err);
  771. if (c_re == NULL) {
  772. gint ret = luaL_error (L, "cannot parse regexp: %s, error: %s",
  773. lua_tostring (L, 2),
  774. err == NULL ? "undefined" : err->message);
  775. if (err) {
  776. g_error_free (err);
  777. }
  778. return ret;
  779. }
  780. re = g_malloc0 (sizeof (struct rspamd_lua_regexp));
  781. re->re = c_re;
  782. re->re_pattern = g_strdup (lua_tostring (L, 2));
  783. re->module = rspamd_lua_get_module_name (L);
  784. own_re = TRUE;
  785. }
  786. if (t && re) {
  787. if (lua_isboolean (L, 3)) {
  788. stringify = lua_toboolean (L, 3);
  789. }
  790. /* Upvalues */
  791. lua_pushvalue (L, 1); /* text */
  792. if (own_re) {
  793. struct rspamd_lua_regexp **pre;
  794. pre = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
  795. rspamd_lua_setclass (L, "rspamd{regexp}", -1);
  796. *pre = re;
  797. }
  798. else {
  799. lua_pushvalue (L, 2); /* regexp */
  800. }
  801. lua_pushboolean (L, stringify);
  802. lua_pushinteger (L, 0); /* Current pos */
  803. lua_pushcclosure (L, rspamd_lua_text_regexp_split, 4);
  804. }
  805. else {
  806. return luaL_error (L, "invalid arguments");
  807. }
  808. return 1;
  809. }
  810. static gint
  811. lua_text_at (lua_State *L)
  812. {
  813. LUA_TRACE_POINT;
  814. struct rspamd_lua_text *t = lua_check_text (L, 1);
  815. gint pos = lua_tointeger (L, 2);
  816. if (t) {
  817. if (pos > 0 && pos <= t->len) {
  818. lua_pushinteger (L, t->start[pos - 1]);
  819. }
  820. else {
  821. lua_pushnil (L);
  822. }
  823. }
  824. else {
  825. return luaL_error (L, "invalid arguments");
  826. }
  827. return 1;
  828. }
  829. static gint
  830. lua_text_memchr (lua_State *L)
  831. {
  832. LUA_TRACE_POINT;
  833. struct rspamd_lua_text *t = lua_check_text (L, 1);
  834. int c;
  835. bool reverse = false;
  836. if (lua_isnumber (L, 2)) {
  837. c = lua_tonumber (L, 2);
  838. }
  839. else {
  840. gsize l;
  841. const gchar *str = lua_tolstring (L, 2, &l);
  842. if (str) {
  843. c = str[0];
  844. if (l != 1) {
  845. return luaL_error (L, "need exactly one character to search");
  846. }
  847. }
  848. else {
  849. return luaL_error (L, "invalid arguments");
  850. }
  851. }
  852. if (t) {
  853. void *f;
  854. if (lua_isboolean (L, 3)) {
  855. reverse = lua_toboolean (L, 3);
  856. }
  857. if (reverse) {
  858. f = rspamd_memrchr (t->start, c, t->len);
  859. }
  860. else {
  861. f = memchr (t->start, c, t->len);
  862. }
  863. if (f) {
  864. lua_pushinteger (L, ((const char *)f) - t->start + 1);
  865. }
  866. else {
  867. lua_pushinteger (L, -1);
  868. }
  869. }
  870. else {
  871. return luaL_error (L, "invalid arguments");
  872. }
  873. return 1;
  874. }
  875. static gint
  876. lua_text_bytes (lua_State *L)
  877. {
  878. LUA_TRACE_POINT;
  879. struct rspamd_lua_text *t = lua_check_text (L, 1);
  880. if (t) {
  881. lua_createtable (L, t->len, 0);
  882. for (gsize i = 0; i < t->len; i ++) {
  883. lua_pushinteger (L, (guchar)t->start[i]);
  884. lua_rawseti (L, -2, i + 1);
  885. }
  886. }
  887. else {
  888. return luaL_error (L, "invalid arguments");
  889. }
  890. return 1;
  891. }
  892. static gint
  893. lua_text_save_in_file (lua_State *L)
  894. {
  895. LUA_TRACE_POINT;
  896. struct rspamd_lua_text *t = lua_check_text (L, 1);
  897. const gchar *fname = NULL;
  898. guint mode = 00644;
  899. gint fd = -1;
  900. gboolean need_close = FALSE;
  901. if (t != NULL) {
  902. if (lua_type (L, 2) == LUA_TSTRING) {
  903. fname = luaL_checkstring (L, 2);
  904. if (lua_type (L, 3) == LUA_TNUMBER) {
  905. mode = lua_tonumber (L, 3);
  906. }
  907. }
  908. else if (lua_type (L, 2) == LUA_TNUMBER) {
  909. /* Created fd */
  910. fd = lua_tonumber (L, 2);
  911. }
  912. if (fd == -1) {
  913. if (fname) {
  914. fd = rspamd_file_xopen (fname, O_CREAT | O_WRONLY | O_EXCL, mode, 0);
  915. if (fd == -1) {
  916. lua_pushboolean (L, false);
  917. lua_pushstring (L, strerror (errno));
  918. return 2;
  919. }
  920. need_close = TRUE;
  921. }
  922. else {
  923. fd = STDOUT_FILENO;
  924. }
  925. }
  926. if (write (fd, t->start, t->len) == -1) {
  927. if (fd != STDOUT_FILENO) {
  928. close (fd);
  929. }
  930. lua_pushboolean (L, false);
  931. lua_pushstring (L, strerror (errno));
  932. return 2;
  933. }
  934. if (need_close) {
  935. close (fd);
  936. }
  937. lua_pushboolean (L, true);
  938. }
  939. else {
  940. return luaL_error (L, "invalid arguments");
  941. }
  942. return 1;
  943. }
  944. static gint
  945. lua_text_gc (lua_State *L)
  946. {
  947. LUA_TRACE_POINT;
  948. struct rspamd_lua_text *t = lua_check_text (L, 1);
  949. if (t != NULL) {
  950. g_assert (!(t->flags & RSPAMD_TEXT_FLAG_FAKE));
  951. if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
  952. if (t->flags & RSPAMD_TEXT_FLAG_WIPE) {
  953. rspamd_explicit_memzero ((guchar *)t->start, t->len);
  954. }
  955. if (t->flags & RSPAMD_TEXT_FLAG_MMAPED) {
  956. munmap ((gpointer)t->start, t->len);
  957. }
  958. else {
  959. if (t->flags & RSPAMD_TEXT_FLAG_SYSMALLOC) {
  960. free ((gpointer) t->start);
  961. }
  962. else {
  963. g_free ((gpointer) t->start);
  964. }
  965. }
  966. }
  967. }
  968. return 0;
  969. }
  970. static gint
  971. lua_text_eq (lua_State *L)
  972. {
  973. LUA_TRACE_POINT;
  974. struct rspamd_lua_text *t1 = lua_check_text_or_string (L, 1),
  975. *t2 = lua_check_text_or_string (L, 2);
  976. if (t1->len == t2->len) {
  977. lua_pushboolean (L, memcmp (t1->start, t2->start, t1->len) == 0);
  978. }
  979. else {
  980. lua_pushboolean (L, false);
  981. }
  982. return 1;
  983. }
  984. static gint
  985. lua_text_lt (lua_State *L)
  986. {
  987. LUA_TRACE_POINT;
  988. struct rspamd_lua_text *t1 = lua_check_text_or_string (L, 1),
  989. *t2 = lua_check_text_or_string (L, 2);
  990. if (t1 && t2) {
  991. if (t1->len == t2->len) {
  992. lua_pushboolean (L, memcmp (t1->start, t2->start, t1->len) < 0);
  993. }
  994. else {
  995. lua_pushboolean (L, t1->len < t2->len);
  996. }
  997. }
  998. return 1;
  999. }
  1000. static gint
  1001. lua_text_concat (lua_State *L)
  1002. {
  1003. LUA_TRACE_POINT;
  1004. struct rspamd_lua_text *t1 = lua_check_text_or_string (L, 1),
  1005. *t2 = lua_check_text_or_string (L, 2);
  1006. if (t1 && t2) {
  1007. struct rspamd_lua_text *final;
  1008. final = lua_new_text (L, NULL, t1->len + t2->len, TRUE);
  1009. memcpy ((void *)final->start, t1->start, t1->len);
  1010. memcpy ((void *)(final->start + t1->len), t2->start, t2->len);
  1011. }
  1012. return 1;
  1013. }
  1014. static gint
  1015. lua_text_wipe (lua_State *L)
  1016. {
  1017. LUA_TRACE_POINT;
  1018. struct rspamd_lua_text *t = lua_check_text (L, 1);
  1019. if (t != NULL) {
  1020. if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
  1021. rspamd_explicit_memzero ((guchar *)t->start, t->len);
  1022. }
  1023. else {
  1024. return luaL_error (L, "cannot wipe not owned text");
  1025. }
  1026. }
  1027. else {
  1028. return luaL_error (L, "invalid arguments");
  1029. }
  1030. return 0;
  1031. }
  1032. static gint
  1033. lua_text_base32 (lua_State *L)
  1034. {
  1035. LUA_TRACE_POINT;
  1036. struct rspamd_lua_text *t = lua_check_text (L, 1), *out;
  1037. enum rspamd_base32_type btype = RSPAMD_BASE32_DEFAULT;
  1038. if (t != NULL) {
  1039. if (lua_type (L, 2) == LUA_TSTRING) {
  1040. btype = rspamd_base32_decode_type_from_str (lua_tostring (L, 2));
  1041. if (btype == RSPAMD_BASE32_INVALID) {
  1042. return luaL_error (L, "invalid b32 type: %s", lua_tostring (L, 2));
  1043. }
  1044. }
  1045. out = lua_new_text (L, NULL, t->len * 8 / 5 + 2, TRUE);
  1046. out->len = rspamd_encode_base32_buf (t->start, t->len, (gchar *)out->start,
  1047. out->len, btype);
  1048. }
  1049. else {
  1050. return luaL_error (L, "invalid arguments");
  1051. }
  1052. return 1;
  1053. }
  1054. static gint
  1055. lua_text_base64 (lua_State *L)
  1056. {
  1057. LUA_TRACE_POINT;
  1058. struct rspamd_lua_text *t = lua_check_text (L, 1), *out;
  1059. gsize line_len = 0;
  1060. gboolean fold = FALSE;
  1061. if (t != NULL) {
  1062. if (lua_type (L, 2) == LUA_TNUMBER) {
  1063. line_len = lua_tointeger (L, 2);
  1064. if (line_len <= 8) {
  1065. return luaL_error (L, "too small line length (at least 8 is required)");
  1066. }
  1067. }
  1068. enum rspamd_newlines_type how = RSPAMD_TASK_NEWLINES_CRLF;
  1069. if (lua_type (L, 3) == LUA_TSTRING) {
  1070. const gchar *how_str = lua_tostring (L, 3);
  1071. if (g_ascii_strcasecmp (how_str, "cr") == 0) {
  1072. how = RSPAMD_TASK_NEWLINES_CR;
  1073. }
  1074. else if (g_ascii_strcasecmp (how_str, "lf") == 0) {
  1075. how = RSPAMD_TASK_NEWLINES_LF;
  1076. }
  1077. else if (g_ascii_strcasecmp (how_str, "crlf") != 0) {
  1078. return luaL_error (L, "invalid newline style: %s", how_str);
  1079. }
  1080. }
  1081. if (lua_type (L, 4) == LUA_TBOOLEAN) {
  1082. fold = lua_toboolean (L, 4);
  1083. }
  1084. gsize sz_len;
  1085. out = lua_newuserdata (L, sizeof (*t));
  1086. out->flags = RSPAMD_TEXT_FLAG_OWN;
  1087. out->start = rspamd_encode_base64_common (t->start, t->len,
  1088. line_len, &sz_len, fold, how);
  1089. out->len = sz_len;
  1090. rspamd_lua_setclass (L, "rspamd{text}", -1);
  1091. }
  1092. else {
  1093. return luaL_error (L, "invalid arguments");
  1094. }
  1095. return 1;
  1096. }
  1097. static gint
  1098. lua_text_hex (lua_State *L)
  1099. {
  1100. LUA_TRACE_POINT;
  1101. struct rspamd_lua_text *t = lua_check_text (L, 1), *out;
  1102. if (t != NULL) {
  1103. out = lua_new_text (L, NULL, t->len * 2, TRUE);
  1104. out->len = rspamd_encode_hex_buf (t->start, t->len, (gchar *)out->start,
  1105. out->len);
  1106. }
  1107. else {
  1108. return luaL_error (L, "invalid arguments");
  1109. }
  1110. return 1;
  1111. }
  1112. #define BITOP(a,b,op) \
  1113. ((a)[(gsize)(b)/(8*sizeof *(a))] op (gsize)1<<((gsize)(b)%(8*sizeof *(a))))
  1114. static gint
  1115. lua_text_exclude_chars (lua_State *L)
  1116. {
  1117. LUA_TRACE_POINT;
  1118. struct rspamd_lua_text *t = lua_check_text (L, 1);
  1119. gssize patlen;
  1120. const gchar *pat = lua_tolstring (L, 2, &patlen), *p, *end;
  1121. gchar *dest, *d;
  1122. gsize byteset[32 / sizeof(gsize)]; /* Bitset for ascii */
  1123. gboolean copy = TRUE;
  1124. guint *plen;
  1125. if (t != NULL && pat && patlen > 0) {
  1126. if (lua_isboolean (L, 3)) {
  1127. copy = lua_toboolean (L, 3);
  1128. }
  1129. else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
  1130. copy = FALSE;
  1131. }
  1132. if (!copy) {
  1133. dest = (gchar *)t->start;
  1134. plen = &t->len;
  1135. lua_pushvalue (L, 1); /* Push text as a result */
  1136. }
  1137. else {
  1138. /* We need to copy read only text */
  1139. struct rspamd_lua_text *nt;
  1140. dest = g_malloc (t->len);
  1141. nt = lua_newuserdata (L, sizeof (*nt));
  1142. rspamd_lua_setclass (L, "rspamd{text}", -1);
  1143. nt->len = t->len;
  1144. nt->flags = RSPAMD_TEXT_FLAG_OWN;
  1145. memcpy (dest, t->start, t->len);
  1146. nt->start = dest;
  1147. plen = &nt->len;
  1148. }
  1149. /* Fill pattern bitset */
  1150. memset (byteset, 0, sizeof byteset);
  1151. while (patlen > 0) {
  1152. if (*pat == '%') {
  1153. pat ++;
  1154. patlen --;
  1155. if (patlen > 0) {
  1156. /*
  1157. * This stuff assumes little endian, but GSIZE_FROM_LE should
  1158. * deal with proper conversion
  1159. */
  1160. switch (*pat) {
  1161. case '%':
  1162. BITOP (byteset, *(guchar *) pat, |=);
  1163. break;
  1164. case 's':
  1165. /* "\r\n\t\f " */
  1166. byteset[0] |= GSIZE_FROM_LE (0x100003600);
  1167. break;
  1168. case 'n':
  1169. /* newlines: "\r\n" */
  1170. byteset[0] |= GSIZE_FROM_LE (0x2400);
  1171. break;
  1172. case '8':
  1173. /* 8 bit characters */
  1174. byteset[2] |= GSIZE_FROM_LE (0xffffffffffffffffLLU);
  1175. byteset[3] |= GSIZE_FROM_LE (0xffffffffffffffffLLU);
  1176. break;
  1177. case 'c':
  1178. /* Non printable (control) characters */
  1179. byteset[0] |= GSIZE_FROM_LE (0xffffffff);
  1180. /* Del character */
  1181. byteset[1] |= GSIZE_FROM_LE (0x8000000000000000);
  1182. break;
  1183. }
  1184. }
  1185. else {
  1186. /* Last '%' */
  1187. BITOP (byteset, (guchar)'%', |=);
  1188. }
  1189. }
  1190. else {
  1191. BITOP (byteset, *(guchar *)pat, |=);
  1192. }
  1193. pat ++;
  1194. patlen --;
  1195. }
  1196. for (; patlen > 0 && BITOP (byteset, *(guchar *)pat, |=); pat++, patlen --);
  1197. p = t->start;
  1198. end = t->start + t->len;
  1199. d = dest;
  1200. while (p < end) {
  1201. if (!BITOP (byteset, *(guchar *)p, &)) {
  1202. *d++ = *p;
  1203. }
  1204. p ++;
  1205. }
  1206. *(plen) = d - dest;
  1207. }
  1208. else {
  1209. return luaL_error (L, "invalid arguments");
  1210. }
  1211. return 1;
  1212. }
  1213. static gint
  1214. lua_text_oneline (lua_State *L)
  1215. {
  1216. LUA_TRACE_POINT;
  1217. struct rspamd_lua_text *t = lua_check_text (L, 1);
  1218. const gchar *p, *end;
  1219. gchar *dest, *d;
  1220. gsize byteset[32 / sizeof(gsize)]; /* Bitset for ascii */
  1221. gboolean copy = TRUE, seen_8bit = FALSE;
  1222. guint *plen;
  1223. if (t != NULL) {
  1224. if (lua_isboolean (L, 2)) {
  1225. copy = lua_toboolean (L, 2);
  1226. }
  1227. else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
  1228. copy = FALSE;
  1229. }
  1230. if (!copy) {
  1231. dest = (gchar *)t->start;
  1232. plen = &t->len;
  1233. lua_pushvalue (L, 1); /* Push text as a result */
  1234. }
  1235. else {
  1236. /* We need to copy read only text */
  1237. struct rspamd_lua_text *nt;
  1238. dest = g_malloc (t->len);
  1239. nt = lua_newuserdata (L, sizeof (*nt));
  1240. rspamd_lua_setclass (L, "rspamd{text}", -1);
  1241. nt->len = t->len;
  1242. nt->flags = RSPAMD_TEXT_FLAG_OWN;
  1243. memcpy (dest, t->start, t->len);
  1244. nt->start = dest;
  1245. plen = &nt->len;
  1246. }
  1247. /* Fill pattern bitset */
  1248. memset (byteset, 0, sizeof byteset);
  1249. /* All spaces */
  1250. byteset[0] |= GSIZE_FROM_LE (0x100003600);
  1251. /* Control characters */
  1252. byteset[0] |= GSIZE_FROM_LE (0xffffffff);
  1253. /* Del character */
  1254. byteset[1] |= GSIZE_FROM_LE (0x8000000000000000);
  1255. /* 8 bit characters */
  1256. byteset[2] |= GSIZE_FROM_LE (0xffffffffffffffffLLU);
  1257. byteset[3] |= GSIZE_FROM_LE (0xffffffffffffffffLLU);
  1258. p = t->start;
  1259. end = t->start + t->len;
  1260. d = dest;
  1261. while (p < end) {
  1262. if (!BITOP (byteset, *(guchar *)p, &)) {
  1263. *d++ = *p;
  1264. }
  1265. else {
  1266. if ((*(guchar *)p) & 0x80) {
  1267. seen_8bit = TRUE;
  1268. *d++ = *p;
  1269. }
  1270. else {
  1271. if (*p == ' ') {
  1272. if (d != dest) {
  1273. *d++ = *p++;
  1274. }
  1275. while (p < end && g_ascii_isspace (*p)) {
  1276. p ++;
  1277. }
  1278. continue; /* To avoid p++ */
  1279. }
  1280. else if (*p == '\r' || *p == '\n') {
  1281. if (d != dest) {
  1282. *d++ = ' ';
  1283. p ++;
  1284. }
  1285. while (p < end && g_ascii_isspace (*p)) {
  1286. p ++;
  1287. }
  1288. continue; /* To avoid p++ */
  1289. }
  1290. }
  1291. }
  1292. p ++;
  1293. }
  1294. while (d > dest && g_ascii_isspace (*(d - 1))) {
  1295. d --;
  1296. }
  1297. if (seen_8bit) {
  1298. if (rspamd_fast_utf8_validate (dest, d - dest) != 0) {
  1299. /* Need to make it valid :( */
  1300. UChar32 uc;
  1301. goffset err_offset;
  1302. gsize remain = d - dest;
  1303. gchar *nd = dest;
  1304. while (remain > 0 && (err_offset = rspamd_fast_utf8_validate (nd, remain)) > 0) {
  1305. gint i = 0;
  1306. err_offset --; /* As it returns it 1 indexed */
  1307. nd += err_offset;
  1308. remain -= err_offset;
  1309. /* Each invalid character of input requires 3 bytes of output (+2 bytes) */
  1310. while (i < remain) {
  1311. gint old_pos = i;
  1312. U8_NEXT (nd, i, remain, uc);
  1313. if (uc < 0) {
  1314. nd[old_pos] = '?';
  1315. }
  1316. else {
  1317. break;
  1318. }
  1319. }
  1320. nd += i;
  1321. remain -= i;
  1322. }
  1323. }
  1324. }
  1325. *(plen) = d - dest;
  1326. }
  1327. else {
  1328. return luaL_error (L, "invalid arguments");
  1329. }
  1330. return 1;
  1331. }
  1332. static gint
  1333. lua_text_lower (lua_State *L)
  1334. {
  1335. LUA_TRACE_POINT;
  1336. struct rspamd_lua_text *t = lua_check_text (L, 1), *nt;
  1337. gboolean is_utf8 = FALSE, is_inplace = FALSE;
  1338. if (t != NULL) {
  1339. if (lua_isboolean (L, 2)) {
  1340. is_utf8 = lua_toboolean (L, 2);
  1341. }
  1342. if (lua_isboolean (L, 3)) {
  1343. is_inplace = lua_toboolean (L, 3);
  1344. }
  1345. if (is_inplace) {
  1346. nt = t;
  1347. lua_pushvalue (L, 1);
  1348. }
  1349. else {
  1350. nt = lua_new_text (L, t->start, t->len, TRUE);
  1351. }
  1352. if (!is_utf8) {
  1353. rspamd_str_lc ((gchar *) nt->start, nt->len);
  1354. }
  1355. else {
  1356. rspamd_str_lc_utf8 ((gchar *) nt->start, nt->len);
  1357. }
  1358. }
  1359. else {
  1360. return luaL_error (L, "invalid arguments");
  1361. }
  1362. return 1;
  1363. }
  1364. /* Used to distinguish lua text metatable */
  1365. static const guint rspamd_lua_text_cookie = 0x2b21ef6fU;
  1366. static gint
  1367. lua_load_text (lua_State * L)
  1368. {
  1369. lua_newtable (L);
  1370. lua_pushstring (L, "cookie");
  1371. lua_pushnumber (L, rspamd_lua_text_cookie);
  1372. lua_settable (L, -3);
  1373. luaL_register (L, NULL, textlib_f);
  1374. return 1;
  1375. }
  1376. void
  1377. luaopen_text (lua_State *L)
  1378. {
  1379. rspamd_lua_new_class (L, "rspamd{text}", textlib_m);
  1380. lua_pushstring (L, "cookie");
  1381. lua_pushnumber (L, rspamd_lua_text_cookie);
  1382. lua_settable (L, -3);
  1383. lua_pop (L, 1);
  1384. rspamd_lua_add_preload (L, "rspamd_text", lua_load_text);
  1385. }