You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_text.c 38KB


  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "lua_common.h"
  17. #include "libcryptobox/cryptobox.h"
  18. #include "contrib/fastutf8/fastutf8.h"
  19. #include "unix-std.h"
  20. /***
  21. * @module rspamd_text
  22. * This module provides access to opaque text structures used widely to prevent
  23. * copying between Lua and C for various concerns: performance, security etc...
  24. *
  25. * You can convert rspamd_text into string but it will copy data.
  26. */
  27. /***
  28. * @function rspamd_text.fromstring(str)
  29. * Creates rspamd_text from Lua string (copied to the text)
  30. * @param {string} str string to use
  31. * @return {rspamd_text} resulting text
  32. */
  33. LUA_FUNCTION_DEF(text, fromstring);
  34. /***
  35. * @function rspamd_text.null()
  36. * Creates rspamd_text with NULL pointer for testing purposes
  37. * @param {string} str string to use
  38. * @return {rspamd_text} resulting text
  39. */
  40. LUA_FUNCTION_DEF(text, null);
  41. /***
  42. * @function rspamd_text.randombytes(nbytes)
  43. * Creates rspamd_text with random bytes inside (raw bytes)
  44. * @param {number} nbytes number of random bytes generated
  45. * @return {rspamd_text} random bytes text
  46. */
  47. LUA_FUNCTION_DEF(text, randombytes);
  48. /***
  49. * @function rspamd_text.fromtable(tbl[, delim])
  50. * Same as `table.concat` but generates rspamd_text instead of the Lua string
  51. * @param {table} tbl table to use
  52. * @param {string} delim optional delimiter
  53. * @return {rspamd_text} resulting text
  54. */
  55. LUA_FUNCTION_DEF(text, fromtable);
  56. /***
  57. * @method rspamd_text:byte(pos[, pos2])
  58. * Returns a byte at the position `pos` or bytes from `pos` to `pos2` if specified
  59. * @param {integer} pos index
  60. * @param {integer} pos2 index
  61. * @return {integer} byte at the position `pos` or varargs of bytes
  62. */
  63. LUA_FUNCTION_DEF(text, byte);
  64. /***
  65. * @method rspamd_text:len()
  66. * Returns length of a string
  67. * @return {number} length of string in **bytes**
  68. */
  69. LUA_FUNCTION_DEF(text, len);
  70. /***
  71. * @method rspamd_text:str()
  72. * Converts text to string by copying its content
  73. * @return {string} copy of text as Lua string
  74. */
  75. LUA_FUNCTION_DEF(text, str);
  76. /***
  77. * @method rspamd_text:ptr()
  78. * Converts text to lightuserdata
  79. * @return {lightuserdata} pointer value of rspamd_text
  80. */
  81. LUA_FUNCTION_DEF(text, ptr);
  82. /***
  83. * @method rspamd_text:save_in_file(fname[, mode])
  84. * Saves text in file
  85. * @return {boolean} true if save has been completed
  86. */
  87. LUA_FUNCTION_DEF(text, save_in_file);
  88. /***
  89. * @method rspamd_text:span(start[, len])
  90. * Returns a span for lua_text starting at pos [start] (1 indexed) and with
  91. * length `len` (or to the end of the text)
  92. * @param {integer} start start index
  93. * @param {integer} len length of span
  94. * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
  95. */
  96. LUA_FUNCTION_DEF(text, span);
  97. /***
  98. * @method rspamd_text:sub(start[, len])
  99. * Returns a substring for lua_text similar to string.sub from Lua
  100. * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
  101. */
  102. LUA_FUNCTION_DEF(text, sub);
  103. /***
  104. * @method rspamd_text:lines([stringify])
  105. * Returns an iter over all lines as rspamd_text objects or as strings if `stringify` is true
  106. * @param {boolean} stringify stringify lines
  107. * @return {iterator} iterator triplet
  108. */
  109. LUA_FUNCTION_DEF(text, lines);
  110. /***
  111. * @method rspamd_text:split(regexp, [stringify])
  112. * Returns an iter over all encounters of the specific regexp as rspamd_text objects or as strings if `stringify` is true
  113. * @param {rspamd_regexp} regexp regexp (pcre syntax) used for splitting
  114. * @param {boolean} stringify stringify lines
  115. * @return {iterator} iterator triplet
  116. */
  117. LUA_FUNCTION_DEF(text, split);
  118. /***
  119. * @method rspamd_text:at(pos)
  120. * Returns a byte at the position `pos`
  121. * @param {integer} pos index
  122. * @return {integer} byte at the position `pos` or nil if pos out of bound
  123. */
  124. LUA_FUNCTION_DEF(text, at);
  125. /***
  126. * @method rspamd_text:memchr(chr, [reverse])
  127. * Returns the first or the last position of the character `chr` in the text or
  128. * -1 in case if a character has not been found. Indexes start from `1`
  129. * @param {string/number} chr character or a character code to find
  130. * @param {boolean} reverse last character if `true`
  131. * @return {integer} position of the character or `-1`
  132. */
  133. LUA_FUNCTION_DEF(text, memchr);
  134. /***
  135. * @method rspamd_text:bytes()
  136. * Converts text to an array of bytes
  137. * @return {table|integer} bytes in the array (as unsigned char)
  138. */
  139. LUA_FUNCTION_DEF(text, bytes);
  140. /***
  141. * @method rspamd_text:lower([is_utf, [inplace]])
  142. * Return a new text with lowercased characters, if is_utf is true then Rspamd applies utf8 lowercase
  143. * @param {boolean} is_utf apply utf8 lowercase
  144. * @param {boolean} inplace lowercase the original text
  145. * @return {rspamd_text} new rspamd_text (or the original text if inplace) with lowercased letters
  146. */
  147. LUA_FUNCTION_DEF(text, lower);
  148. LUA_FUNCTION_DEF(text, take_ownership);
  149. /***
  150. * @method rspamd_text:exclude_chars(set_to_exclude, [always_copy])
  151. * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
  152. * where all chars from `set_to_exclude` are removed
  153. * Patterns supported:
  154. *
  155. * - %s - all space characters
  156. * - %n - all newline characters
  157. * - %c - all control characters (it includes 8bit characters and spaces)
  158. * - %8 - all 8 bit characters
  159. * - %% - just a percent character
  160. *
  161. * @param {string} set_to_exclude characters to exclude
  162. * @param {boolean} always_copy always copy the source text
  163. * @return {rspamd_text} modified or copied text
  164. */
  165. LUA_FUNCTION_DEF(text, exclude_chars);
  166. /***
  167. * @method rspamd_text:oneline([always_copy])
  168. * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
  169. * where the following transformations are made:
  170. * - All spaces sequences are replaced with a single space
  171. * - All newlines sequences are replaced with a single space
  172. * - Trailing and leading spaces are removed
  173. * - Control characters are excluded
  174. * - UTF8 sequences are normalised
  175. *
  176. * @param {boolean} always_copy always copy the source text
  177. * @return {rspamd_text} modified or copied text
  178. */
  179. LUA_FUNCTION_DEF(text, oneline);
  180. /***
  181. * @method rspamd_text:base32([b32type])
  182. * Returns a text encoded in base32 (new rspamd_text is allocated)
  183. *
  184. * @param {string} b32type base32 type (default, bleach, rfc)
  185. * @return {rspamd_text} new text encoded in base32
  186. */
  187. LUA_FUNCTION_DEF(text, base32);
  188. /***
  189. * @method rspamd_text:base64([line_length, [nline, [fold]]])
  190. * Returns a text encoded in base64 (new rspamd_text is allocated)
  191. *
  192. * @param {number} line_length return text split with newlines up to this attribute
  193. * @param {string} nline newline type: `cr`, `lf`, `crlf`
  194. * @param {boolean} fold use folding when splitting into lines (false by default)
  195. * @return {rspamd_text} new text encoded in base64
  196. */
  197. LUA_FUNCTION_DEF(text, base64);
  198. /***
  199. * @method rspamd_text:hex()
  200. * Returns a text encoded in hex (new rspamd_text is allocated)
  201. *
  202. * @return {rspamd_text} new text encoded in hex
  203. */
  204. LUA_FUNCTION_DEF(text, hex);
  205. /***
  206. * @method rspamd_text:find(pattern [, init])
  207. * Looks for the first match of pattern in the string s.
  208. * If it finds a match, then find returns the indices of s where this occurrence
  209. * starts and ends; otherwise, it returns nil. A third,
  210. * optional numerical argument init specifies where to start the search;
  211. * its default value is 1 and can be negative.
  212. * This method currently supports merely a plain search, no patterns.
  213. *
  214. * @param {string} pattern pattern to find
  215. * @param {number} init specifies where to start the search (1 default)
  216. * @return {number,number/nil} If it finds a match, then find returns the indices of s where this occurrence starts and ends; otherwise, it returns nil
  217. */
  218. LUA_FUNCTION_DEF(text, find);
  219. LUA_FUNCTION_DEF(text, gc);
  220. LUA_FUNCTION_DEF(text, eq);
  221. LUA_FUNCTION_DEF(text, lt);
  222. LUA_FUNCTION_DEF(text, concat);
  223. LUA_FUNCTION_DEF(text, strtoul);
  224. static const struct luaL_reg textlib_f[] = {
  225. LUA_INTERFACE_DEF(text, fromstring),
  226. {"from_string", lua_text_fromstring},
  227. LUA_INTERFACE_DEF(text, fromtable),
  228. {"from_table", lua_text_fromtable},
  229. LUA_INTERFACE_DEF(text, null),
  230. LUA_INTERFACE_DEF(text, randombytes),
  231. {NULL, NULL}};
  232. static const struct luaL_reg textlib_m[] = {
  233. LUA_INTERFACE_DEF(text, len),
  234. LUA_INTERFACE_DEF(text, str),
  235. LUA_INTERFACE_DEF(text, ptr),
  236. LUA_INTERFACE_DEF(text, take_ownership),
  237. LUA_INTERFACE_DEF(text, save_in_file),
  238. LUA_INTERFACE_DEF(text, span),
  239. LUA_INTERFACE_DEF(text, sub),
  240. LUA_INTERFACE_DEF(text, lines),
  241. LUA_INTERFACE_DEF(text, split),
  242. LUA_INTERFACE_DEF(text, at),
  243. LUA_INTERFACE_DEF(text, memchr),
  244. LUA_INTERFACE_DEF(text, byte),
  245. LUA_INTERFACE_DEF(text, bytes),
  246. LUA_INTERFACE_DEF(text, lower),
  247. LUA_INTERFACE_DEF(text, exclude_chars),
  248. LUA_INTERFACE_DEF(text, oneline),
  249. LUA_INTERFACE_DEF(text, base32),
  250. LUA_INTERFACE_DEF(text, base64),
  251. LUA_INTERFACE_DEF(text, hex),
  252. LUA_INTERFACE_DEF(text, find),
  253. LUA_INTERFACE_DEF(text, strtoul),
  254. {"write", lua_text_save_in_file},
  255. {"__len", lua_text_len},
  256. {"__tostring", lua_text_str},
  257. {"__gc", lua_text_gc},
  258. {"__eq", lua_text_eq},
  259. {"__lt", lua_text_lt},
  260. {"__concat", lua_text_concat},
  261. {NULL, NULL}};
  262. struct rspamd_lua_text *
  263. lua_check_text(lua_State *L, int pos)
  264. {
  265. void *ud = rspamd_lua_check_udata(L, pos, rspamd_text_classname);
  266. luaL_argcheck(L, ud != NULL, pos, "'text' expected");
  267. return ud ? (struct rspamd_lua_text *) ud : NULL;
  268. }
  269. struct rspamd_lua_text *
  270. lua_check_text_or_string(lua_State *L, int pos)
  271. {
  272. int pos_type = lua_type(L, pos);
  273. if (pos_type == LUA_TUSERDATA) {
  274. void *ud = rspamd_lua_check_udata(L, pos, rspamd_text_classname);
  275. luaL_argcheck(L, ud != NULL, pos, "'text' expected");
  276. return ud ? (struct rspamd_lua_text *) ud : NULL;
  277. }
  278. else if (pos_type == LUA_TSTRING) {
  279. /*
  280. * Fake static lua_text, we allow to use this function multiple times
  281. * by having a small array of static structures.
  282. */
  283. static unsigned cur_txt_idx = 0;
  284. static struct rspamd_lua_text fake_text[4];
  285. gsize len;
  286. int sel_idx;
  287. sel_idx = cur_txt_idx++ % G_N_ELEMENTS(fake_text);
  288. fake_text[sel_idx].start = lua_tolstring(L, pos, &len);
  289. if (len >= G_MAXUINT) {
  290. return NULL;
  291. }
  292. fake_text[sel_idx].len = len;
  293. fake_text[sel_idx].flags = RSPAMD_TEXT_FLAG_FAKE;
  294. return &fake_text[sel_idx];
  295. }
  296. return NULL;
  297. }
  298. struct rspamd_lua_text *
  299. lua_new_text(lua_State *L, const char *start, gsize len, gboolean own)
  300. {
  301. struct rspamd_lua_text *t;
  302. t = lua_newuserdata(L, sizeof(*t));
  303. t->flags = 0;
  304. if (own) {
  305. char *storage;
  306. if (len > 0) {
  307. storage = g_malloc(len);
  308. if (start != NULL) {
  309. memcpy(storage, start, len);
  310. }
  311. t->start = storage;
  312. t->flags = RSPAMD_TEXT_FLAG_OWN;
  313. }
  314. else {
  315. t->start = "";
  316. }
  317. }
  318. else {
  319. t->start = start;
  320. }
  321. t->len = len;
  322. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  323. return t;
  324. }
  325. struct rspamd_lua_text *
  326. lua_new_text_task(lua_State *L, struct rspamd_task *task,
  327. const char *start, gsize len, gboolean own)
  328. {
  329. struct rspamd_lua_text *t;
  330. t = lua_newuserdata(L, sizeof(*t));
  331. t->flags = 0;
  332. if (own) {
  333. char *storage;
  334. if (len > 0) {
  335. storage = rspamd_mempool_alloc(task->task_pool, len);
  336. if (start != NULL) {
  337. memcpy(storage, start, len);
  338. }
  339. t->start = storage;
  340. }
  341. else {
  342. t->start = "";
  343. }
  344. }
  345. else {
  346. t->start = start;
  347. }
  348. t->len = len;
  349. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  350. return t;
  351. }
  352. bool lua_is_text_binary(struct rspamd_lua_text *t)
  353. {
  354. if (t == NULL || t->len == 0) {
  355. return false;
  356. }
  357. if (rspamd_str_has_8bit(t->start, t->len)) {
  358. if (rspamd_fast_utf8_validate(t->start, t->len) == 0) {
  359. return false;
  360. }
  361. return true;
  362. }
  363. return false;
  364. }
  365. static int
  366. lua_text_fromstring(lua_State *L)
  367. {
  368. LUA_TRACE_POINT;
  369. const char *str;
  370. gsize l = 0;
  371. gboolean transparent = FALSE;
  372. str = luaL_checklstring(L, 1, &l);
  373. if (str) {
  374. if (lua_isboolean(L, 2)) {
  375. transparent = lua_toboolean(L, 2);
  376. }
  377. lua_new_text(L, str, l, !transparent);
  378. }
  379. else {
  380. return luaL_error(L, "invalid arguments");
  381. }
  382. return 1;
  383. }
  384. static int
  385. lua_text_null(lua_State *L)
  386. {
  387. LUA_TRACE_POINT;
  388. lua_new_text(L, NULL, 0, false);
  389. return 1;
  390. }
  391. static int
  392. lua_text_randombytes(lua_State *L)
  393. {
  394. LUA_TRACE_POINT;
  395. unsigned int nbytes = luaL_checkinteger(L, 1);
  396. struct rspamd_lua_text *out;
  397. out = lua_new_text(L, NULL, nbytes, TRUE);
  398. randombytes_buf((char *) out->start, nbytes);
  399. out->len = nbytes;
  400. return 1;
  401. }
  402. #define MAX_REC 10
  403. static void
  404. lua_text_tbl_length(lua_State *L, gsize dlen, gsize *dest, unsigned int rec)
  405. {
  406. gsize tblen, stlen;
  407. struct rspamd_lua_text *elt;
  408. if (rec > MAX_REC) {
  409. luaL_error(L, "lua_text_tbl_length: recursion limit exceeded");
  410. return;
  411. }
  412. tblen = rspamd_lua_table_size(L, -1);
  413. for (gsize i = 0; i < tblen; i++) {
  414. lua_rawgeti(L, -1, i + 1);
  415. if (lua_type(L, -1) == LUA_TSTRING) {
  416. #if LUA_VERSION_NUM >= 502
  417. stlen = lua_rawlen(L, -1);
  418. #else
  419. stlen = lua_objlen(L, -1);
  420. #endif
  421. (*dest) += stlen;
  422. }
  423. else if (lua_type(L, -1) == LUA_TUSERDATA) {
  424. elt = (struct rspamd_lua_text *) lua_touserdata(L, -1);
  425. if (elt) {
  426. (*dest) += elt->len;
  427. }
  428. }
  429. else if (lua_type(L, -1) == LUA_TTABLE) {
  430. lua_text_tbl_length(L, dlen, dest, rec + 1);
  431. }
  432. if (i != tblen - 1) {
  433. (*dest) += dlen;
  434. }
  435. lua_pop(L, 1);
  436. }
  437. }
  438. static void
  439. lua_text_tbl_append(lua_State *L,
  440. const char *delim,
  441. gsize dlen,
  442. char **dest,
  443. unsigned int rec)
  444. {
  445. const char *st;
  446. gsize tblen, stlen;
  447. struct rspamd_lua_text *elt;
  448. if (rec > MAX_REC) {
  449. luaL_error(L, "lua_text_tbl_length: recursion limit exceeded");
  450. return;
  451. }
  452. tblen = rspamd_lua_table_size(L, -1);
  453. for (unsigned int i = 0; i < tblen; i++) {
  454. lua_rawgeti(L, -1, i + 1);
  455. if (lua_type(L, -1) == LUA_TSTRING) {
  456. st = lua_tolstring(L, -1, &stlen);
  457. memcpy((*dest), st, stlen);
  458. (*dest) += stlen;
  459. }
  460. else if (lua_type(L, -1) == LUA_TUSERDATA) {
  461. elt = (struct rspamd_lua_text *) lua_touserdata(L, -1);
  462. if (elt) {
  463. memcpy((*dest), elt->start, elt->len);
  464. (*dest) += elt->len;
  465. }
  466. }
  467. else if (lua_type(L, -1) == LUA_TTABLE) {
  468. lua_text_tbl_append(L, delim, dlen, dest, rec + 1);
  469. }
  470. if (dlen && i != tblen - 1) {
  471. memcpy((*dest), delim, dlen);
  472. (*dest) += dlen;
  473. }
  474. lua_pop(L, 1);
  475. }
  476. }
  477. static int
  478. lua_text_fromtable(lua_State *L)
  479. {
  480. LUA_TRACE_POINT;
  481. const char *delim = "";
  482. struct rspamd_lua_text *t;
  483. gsize textlen = 0, dlen, oldtop = lua_gettop(L);
  484. char *dest;
  485. if (!lua_istable(L, 1)) {
  486. return luaL_error(L, "invalid arguments");
  487. }
  488. if (lua_type(L, 2) == LUA_TSTRING) {
  489. delim = lua_tolstring(L, 2, &dlen);
  490. }
  491. else {
  492. dlen = 0;
  493. }
  494. /* Calculate length needed */
  495. lua_pushvalue(L, 1);
  496. lua_text_tbl_length(L, dlen, &textlen, 0);
  497. lua_pop(L, 1);
  498. /* Allocate new text */
  499. t = lua_newuserdata(L, sizeof(*t));
  500. dest = g_malloc(textlen);
  501. t->start = dest;
  502. t->len = textlen;
  503. t->flags = RSPAMD_TEXT_FLAG_OWN;
  504. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  505. lua_pushvalue(L, 1);
  506. lua_text_tbl_append(L, delim, dlen, &dest, 0);
  507. lua_pop(L, 1); /* Table arg */
  508. int newtop = lua_gettop(L);
  509. g_assert(newtop == oldtop + 1);
  510. return 1;
  511. }
  512. static int
  513. lua_text_len(lua_State *L)
  514. {
  515. LUA_TRACE_POINT;
  516. struct rspamd_lua_text *t = lua_check_text(L, 1);
  517. gsize l = 0;
  518. if (t != NULL) {
  519. l = t->len;
  520. }
  521. else {
  522. return luaL_error(L, "invalid arguments");
  523. }
  524. lua_pushinteger(L, l);
  525. return 1;
  526. }
  527. static int
  528. lua_text_str(lua_State *L)
  529. {
  530. LUA_TRACE_POINT;
  531. struct rspamd_lua_text *t = lua_check_text(L, 1);
  532. if (t != NULL) {
  533. lua_pushlstring(L, t->start, t->len);
  534. }
  535. else {
  536. return luaL_error(L, "invalid arguments");
  537. }
  538. return 1;
  539. }
  540. static int
  541. lua_text_ptr(lua_State *L)
  542. {
  543. LUA_TRACE_POINT;
  544. struct rspamd_lua_text *t = lua_check_text(L, 1);
  545. if (t != NULL) {
  546. lua_pushlightuserdata(L, (gpointer) t->start);
  547. }
  548. else {
  549. return luaL_error(L, "invalid arguments");
  550. }
  551. return 1;
  552. }
  553. static int
  554. lua_text_take_ownership(lua_State *L)
  555. {
  556. LUA_TRACE_POINT;
  557. struct rspamd_lua_text *t = lua_check_text(L, 1);
  558. char *dest;
  559. if (t != NULL) {
  560. if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
  561. /* We already own it */
  562. lua_pushboolean(L, true);
  563. }
  564. else {
  565. dest = g_malloc(t->len);
  566. memcpy(dest, t->start, t->len);
  567. t->start = dest;
  568. t->flags |= RSPAMD_TEXT_FLAG_OWN;
  569. lua_pushboolean(L, true);
  570. }
  571. }
  572. else {
  573. return luaL_error(L, "invalid arguments");
  574. }
  575. return 1;
  576. }
  577. static int
  578. lua_text_span(lua_State *L)
  579. {
  580. LUA_TRACE_POINT;
  581. struct rspamd_lua_text *t = lua_check_text(L, 1);
  582. int64_t start = lua_tointeger(L, 2), len = -1;
  583. if (t && start >= 1 && start <= t->len) {
  584. if (lua_isnumber(L, 3)) {
  585. len = lua_tonumber(L, 3);
  586. }
  587. if (len == -1) {
  588. len = t->len - (start - 1);
  589. }
  590. if (len < 0 || (len > (t->len - (start - 1)))) {
  591. return luaL_error(L, "invalid length");
  592. }
  593. lua_new_text(L, t->start + (start - 1), len, FALSE);
  594. }
  595. else {
  596. if (!t) {
  597. return luaL_error(L, "invalid arguments, text required");
  598. }
  599. else {
  600. return luaL_error(L, "invalid arguments: start offset %d "
  601. "is larger than text len %d",
  602. (int) start, (int) t->len);
  603. }
  604. }
  605. return 1;
  606. }
  607. /* Helpers to behave exactly as Lua does */
  608. static inline gsize
  609. relative_pos_start(int pos, gsize len)
  610. {
  611. if (pos > 0) {
  612. return pos;
  613. }
  614. else if (pos == 0) {
  615. return 1;
  616. }
  617. else if (pos < -((int) len)) {
  618. return 1;
  619. }
  620. /* Negative pos inside str */
  621. return len + ((gsize) pos) + 1;
  622. }
  623. static inline gsize
  624. relative_pos_end(int pos, gsize len)
  625. {
  626. if (pos > (int) len) {
  627. return len;
  628. }
  629. else if (pos >= 0) {
  630. return (size_t) pos;
  631. }
  632. else if (pos < -((int) len)) {
  633. return 0;
  634. }
  635. return len + ((gsize) pos) + 1;
  636. }
  637. static int
  638. lua_text_sub(lua_State *L)
  639. {
  640. LUA_TRACE_POINT;
  641. struct rspamd_lua_text *t = lua_check_text(L, 1);
  642. if (t) {
  643. size_t start = relative_pos_start(luaL_checkinteger(L, 2),
  644. t->len);
  645. size_t end = relative_pos_end(luaL_optinteger(L, 3, -1),
  646. t->len);
  647. if (start <= end) {
  648. lua_new_text(L, t->start + (start - 1),
  649. (end - start) + 1, FALSE);
  650. }
  651. else {
  652. lua_new_text(L, "", 0, TRUE);
  653. }
  654. }
  655. else {
  656. return luaL_error(L, "invalid arguments");
  657. }
  658. return 1;
  659. }
  660. static int64_t
  661. rspamd_lua_text_push_line(lua_State *L,
  662. struct rspamd_lua_text *t,
  663. int64_t start_offset,
  664. const char *sep_pos,
  665. gboolean stringify)
  666. {
  667. const char *start;
  668. gsize len;
  669. int64_t ret;
  670. start = t->start + start_offset;
  671. len = sep_pos ? (sep_pos - start) : (t->len - start_offset);
  672. ret = start_offset + len;
  673. /* Trim line */
  674. while (len > 0) {
  675. if (start[len - 1] == '\r' || start[len - 1] == '\n') {
  676. len--;
  677. }
  678. else {
  679. break;
  680. }
  681. }
  682. if (stringify) {
  683. lua_pushlstring(L, start, len);
  684. }
  685. else {
  686. struct rspamd_lua_text *ntext;
  687. ntext = lua_newuserdata(L, sizeof(*ntext));
  688. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  689. ntext->start = start;
  690. ntext->len = len;
  691. ntext->flags = 0; /* Not own as it must be owned by a top object */
  692. }
  693. return ret;
  694. }
  695. static int
  696. rspamd_lua_text_readline(lua_State *L)
  697. {
  698. struct rspamd_lua_text *t = lua_touserdata(L, lua_upvalueindex(1));
  699. gboolean stringify = lua_toboolean(L, lua_upvalueindex(2));
  700. int64_t pos = lua_tointeger(L, lua_upvalueindex(3));
  701. if (pos < 0) {
  702. return luaL_error(L, "invalid pos: %d", (int) pos);
  703. }
  704. if (pos >= t->len) {
  705. /* We are done */
  706. return 0;
  707. }
  708. const char *sep_pos;
  709. /* We look just for `\n` ignoring `\r` as it is very rare nowadays */
  710. sep_pos = memchr(t->start + pos, '\n', t->len - pos);
  711. if (sep_pos == NULL) {
  712. /* Either last `\n` or `\r` separated text */
  713. sep_pos = memchr(t->start + pos, '\r', t->len - pos);
  714. }
  715. pos = rspamd_lua_text_push_line(L, t, pos, sep_pos, stringify);
  716. /* Skip separators */
  717. while (pos < t->len) {
  718. if (t->start[pos] == '\n' || t->start[pos] == '\r') {
  719. pos++;
  720. }
  721. else {
  722. break;
  723. }
  724. }
  725. /* Update pos */
  726. lua_pushinteger(L, pos);
  727. lua_replace(L, lua_upvalueindex(3));
  728. return 1;
  729. }
  730. static int
  731. lua_text_lines(lua_State *L)
  732. {
  733. LUA_TRACE_POINT;
  734. struct rspamd_lua_text *t = lua_check_text(L, 1);
  735. gboolean stringify = FALSE;
  736. if (t) {
  737. if (lua_isboolean(L, 2)) {
  738. stringify = lua_toboolean(L, 2);
  739. }
  740. lua_pushvalue(L, 1);
  741. lua_pushboolean(L, stringify);
  742. lua_pushinteger(L, 0); /* Current pos */
  743. lua_pushcclosure(L, rspamd_lua_text_readline, 3);
  744. }
  745. else {
  746. return luaL_error(L, "invalid arguments");
  747. }
  748. return 1;
  749. }
  750. static int
  751. rspamd_lua_text_regexp_split(lua_State *L)
  752. {
  753. struct rspamd_lua_text *t = lua_touserdata(L, lua_upvalueindex(1)),
  754. *new_t;
  755. struct rspamd_lua_regexp *re = *(struct rspamd_lua_regexp **)
  756. lua_touserdata(L, lua_upvalueindex(2));
  757. gboolean stringify = lua_toboolean(L, lua_upvalueindex(3));
  758. int64_t pos = lua_tointeger(L, lua_upvalueindex(4));
  759. gboolean matched;
  760. if (pos < 0) {
  761. return luaL_error(L, "invalid pos: %d", (int) pos);
  762. }
  763. if (pos >= t->len) {
  764. /* We are done */
  765. return 0;
  766. }
  767. const char *start, *end, *old_start;
  768. end = t->start + pos;
  769. for (;;) {
  770. old_start = end;
  771. matched = rspamd_regexp_search(re->re, t->start, t->len, &start, &end, FALSE,
  772. NULL);
  773. if (matched) {
  774. if (start - old_start > 0) {
  775. if (stringify) {
  776. lua_pushlstring(L, old_start, start - old_start);
  777. }
  778. else {
  779. new_t = lua_newuserdata(L, sizeof(*t));
  780. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  781. new_t->start = old_start;
  782. new_t->len = start - old_start;
  783. new_t->flags = 0;
  784. }
  785. break;
  786. }
  787. else {
  788. if (start == end) {
  789. matched = FALSE;
  790. break;
  791. }
  792. /*
  793. * All match separators (e.g. starting separator,
  794. * we need to skip it). Continue iterations.
  795. */
  796. }
  797. }
  798. else {
  799. /* No match, stop */
  800. break;
  801. }
  802. }
  803. if (!matched && (t->len > 0 && (end == NULL || end < t->start + t->len))) {
  804. /* No more matches, but we might need to push the last element */
  805. if (end == NULL) {
  806. end = t->start;
  807. }
  808. /* No separators, need to push the whole remaining part */
  809. if (stringify) {
  810. lua_pushlstring(L, end, (t->start + t->len) - end);
  811. }
  812. else {
  813. new_t = lua_newuserdata(L, sizeof(*t));
  814. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  815. new_t->start = end;
  816. new_t->len = (t->start + t->len) - end;
  817. new_t->flags = 0;
  818. }
  819. pos = t->len;
  820. }
  821. else {
  822. pos = end - t->start;
  823. }
  824. /* Update pos */
  825. lua_pushinteger(L, pos);
  826. lua_replace(L, lua_upvalueindex(4));
  827. return 1;
  828. }
  829. static int
  830. lua_text_split(lua_State *L)
  831. {
  832. LUA_TRACE_POINT;
  833. struct rspamd_lua_text *t = lua_check_text(L, 1);
  834. struct rspamd_lua_regexp *re;
  835. gboolean stringify = FALSE, own_re = FALSE;
  836. if (t == NULL) {
  837. return luaL_error(L, "invalid arguments");
  838. }
  839. if (lua_type(L, 2) == LUA_TUSERDATA) {
  840. re = lua_check_regexp(L, 2);
  841. }
  842. else {
  843. rspamd_regexp_t *c_re;
  844. GError *err = NULL;
  845. c_re = rspamd_regexp_new(lua_tostring(L, 2), NULL, &err);
  846. if (c_re == NULL) {
  847. int ret = luaL_error(L, "cannot parse regexp: %s, error: %s",
  848. lua_tostring(L, 2),
  849. err == NULL ? "undefined" : err->message);
  850. if (err) {
  851. g_error_free(err);
  852. }
  853. return ret;
  854. }
  855. re = g_malloc0(sizeof(struct rspamd_lua_regexp));
  856. re->re = c_re;
  857. re->re_pattern = g_strdup(lua_tostring(L, 2));
  858. re->module = rspamd_lua_get_module_name(L);
  859. own_re = TRUE;
  860. }
  861. if (re) {
  862. if (lua_isboolean(L, 3)) {
  863. stringify = lua_toboolean(L, 3);
  864. }
  865. /* Upvalues */
  866. lua_pushvalue(L, 1); /* text */
  867. if (own_re) {
  868. struct rspamd_lua_regexp **pre;
  869. pre = lua_newuserdata(L, sizeof(struct rspamd_lua_regexp *));
  870. rspamd_lua_setclass(L, rspamd_regexp_classname, -1);
  871. *pre = re;
  872. }
  873. else {
  874. lua_pushvalue(L, 2); /* regexp */
  875. }
  876. lua_pushboolean(L, stringify);
  877. lua_pushinteger(L, 0); /* Current pos */
  878. lua_pushcclosure(L, rspamd_lua_text_regexp_split, 4);
  879. }
  880. else {
  881. return luaL_error(L, "invalid arguments");
  882. }
  883. return 1;
  884. }
  885. static int
  886. lua_text_at(lua_State *L)
  887. {
  888. return lua_text_byte(L);
  889. }
  890. static int
  891. lua_text_byte(lua_State *L)
  892. {
  893. LUA_TRACE_POINT;
  894. struct rspamd_lua_text *t = lua_check_text(L, 1);
  895. if (!t) {
  896. return luaL_error(L, "invalid arguments");
  897. }
  898. gsize start = relative_pos_start(luaL_optinteger(L, 2, 1), t->len);
  899. gsize end = relative_pos_end(luaL_optinteger(L, 3, start), t->len);
  900. start--;
  901. if (start >= end) {
  902. return 0;
  903. }
  904. for (gsize i = start; i < end; i++) {
  905. lua_pushinteger(L, t->start[i]);
  906. }
  907. return end - start;
  908. }
  909. static int
  910. lua_text_memchr(lua_State *L)
  911. {
  912. LUA_TRACE_POINT;
  913. struct rspamd_lua_text *t = lua_check_text(L, 1);
  914. int c;
  915. bool reverse = false;
  916. if (lua_isnumber(L, 2)) {
  917. c = lua_tonumber(L, 2);
  918. }
  919. else {
  920. gsize l;
  921. const char *str = lua_tolstring(L, 2, &l);
  922. if (str) {
  923. c = str[0];
  924. if (l != 1) {
  925. return luaL_error(L, "need exactly one character to search");
  926. }
  927. }
  928. else {
  929. return luaL_error(L, "invalid arguments");
  930. }
  931. }
  932. if (t) {
  933. void *f;
  934. if (lua_isboolean(L, 3)) {
  935. reverse = lua_toboolean(L, 3);
  936. }
  937. if (reverse) {
  938. f = rspamd_memrchr(t->start, c, t->len);
  939. }
  940. else {
  941. f = memchr(t->start, c, t->len);
  942. }
  943. if (f) {
  944. lua_pushinteger(L, ((const char *) f) - t->start + 1);
  945. }
  946. else {
  947. lua_pushinteger(L, -1);
  948. }
  949. }
  950. else {
  951. return luaL_error(L, "invalid arguments");
  952. }
  953. return 1;
  954. }
  955. static int
  956. lua_text_bytes(lua_State *L)
  957. {
  958. LUA_TRACE_POINT;
  959. struct rspamd_lua_text *t = lua_check_text(L, 1);
  960. if (t) {
  961. lua_createtable(L, t->len, 0);
  962. for (gsize i = 0; i < t->len; i++) {
  963. lua_pushinteger(L, (unsigned char) t->start[i]);
  964. lua_rawseti(L, -2, i + 1);
  965. }
  966. }
  967. else {
  968. return luaL_error(L, "invalid arguments");
  969. }
  970. return 1;
  971. }
  972. static int
  973. lua_text_save_in_file(lua_State *L)
  974. {
  975. LUA_TRACE_POINT;
  976. struct rspamd_lua_text *t = lua_check_text(L, 1);
  977. const char *fname = NULL;
  978. unsigned int mode = 00644;
  979. int fd = -1;
  980. gboolean need_close = FALSE;
  981. if (t != NULL) {
  982. if (lua_type(L, 2) == LUA_TSTRING) {
  983. fname = luaL_checkstring(L, 2);
  984. if (lua_type(L, 3) == LUA_TNUMBER) {
  985. mode = lua_tointeger(L, 3);
  986. }
  987. }
  988. else if (lua_type(L, 2) == LUA_TNUMBER) {
  989. /* Created fd */
  990. fd = lua_tointeger(L, 2);
  991. }
  992. if (fd == -1) {
  993. if (fname) {
  994. fd = rspamd_file_xopen(fname, O_CREAT | O_WRONLY | O_EXCL, mode, 0);
  995. if (fd == -1) {
  996. lua_pushboolean(L, false);
  997. lua_pushstring(L, strerror(errno));
  998. return 2;
  999. }
  1000. need_close = TRUE;
  1001. }
  1002. else {
  1003. fd = STDOUT_FILENO;
  1004. }
  1005. }
  1006. if (write(fd, t->start, t->len) == -1) {
  1007. if (fd != STDOUT_FILENO) {
  1008. close(fd);
  1009. }
  1010. lua_pushboolean(L, false);
  1011. lua_pushstring(L, strerror(errno));
  1012. return 2;
  1013. }
  1014. if (need_close) {
  1015. close(fd);
  1016. }
  1017. lua_pushboolean(L, true);
  1018. }
  1019. else {
  1020. return luaL_error(L, "invalid arguments");
  1021. }
  1022. return 1;
  1023. }
  1024. static int
  1025. lua_text_gc(lua_State *L)
  1026. {
  1027. LUA_TRACE_POINT;
  1028. struct rspamd_lua_text *t = lua_check_text(L, 1);
  1029. if (t != NULL) {
  1030. g_assert(!(t->flags & RSPAMD_TEXT_FLAG_FAKE));
  1031. if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
  1032. if (t->flags & RSPAMD_TEXT_FLAG_WIPE) {
  1033. rspamd_explicit_memzero((unsigned char *) t->start, t->len);
  1034. }
  1035. if (t->flags & RSPAMD_TEXT_FLAG_MMAPED) {
  1036. munmap((gpointer) t->start, t->len);
  1037. }
  1038. else {
  1039. if (t->flags & RSPAMD_TEXT_FLAG_SYSMALLOC) {
  1040. free((gpointer) t->start);
  1041. }
  1042. else {
  1043. g_free((gpointer) t->start);
  1044. }
  1045. }
  1046. }
  1047. }
  1048. return 0;
  1049. }
  1050. static int
  1051. lua_text_eq(lua_State *L)
  1052. {
  1053. LUA_TRACE_POINT;
  1054. struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1),
  1055. *t2 = lua_check_text_or_string(L, 2);
  1056. if (t1->len == t2->len) {
  1057. lua_pushboolean(L, memcmp(t1->start, t2->start, t1->len) == 0);
  1058. }
  1059. else {
  1060. lua_pushboolean(L, false);
  1061. }
  1062. return 1;
  1063. }
  1064. static int
  1065. lua_text_lt(lua_State *L)
  1066. {
  1067. LUA_TRACE_POINT;
  1068. struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1),
  1069. *t2 = lua_check_text_or_string(L, 2);
  1070. if (t1 && t2) {
  1071. if (t1->len == t2->len) {
  1072. lua_pushboolean(L, memcmp(t1->start, t2->start, t1->len) < 0);
  1073. }
  1074. else {
  1075. lua_pushboolean(L, t1->len < t2->len);
  1076. }
  1077. }
  1078. return 1;
  1079. }
  1080. static int
  1081. lua_text_concat(lua_State *L)
  1082. {
  1083. LUA_TRACE_POINT;
  1084. struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1),
  1085. *t2 = lua_check_text_or_string(L, 2);
  1086. if (t1 && t2) {
  1087. struct rspamd_lua_text *final;
  1088. final = lua_new_text(L, NULL, t1->len + t2->len, TRUE);
  1089. memcpy((void *) final->start, t1->start, t1->len);
  1090. memcpy((void *) (final->start + t1->len), t2->start, t2->len);
  1091. }
  1092. return 1;
  1093. }
  1094. static int
  1095. lua_text_wipe(lua_State *L)
  1096. {
  1097. LUA_TRACE_POINT;
  1098. struct rspamd_lua_text *t = lua_check_text(L, 1);
  1099. if (t != NULL) {
  1100. if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
  1101. rspamd_explicit_memzero((unsigned char *) t->start, t->len);
  1102. }
  1103. else {
  1104. return luaL_error(L, "cannot wipe not owned text");
  1105. }
  1106. }
  1107. else {
  1108. return luaL_error(L, "invalid arguments");
  1109. }
  1110. return 0;
  1111. }
  1112. static int
  1113. lua_text_base32(lua_State *L)
  1114. {
  1115. LUA_TRACE_POINT;
  1116. struct rspamd_lua_text *t = lua_check_text(L, 1), *out;
  1117. enum rspamd_base32_type btype = RSPAMD_BASE32_DEFAULT;
  1118. if (t != NULL) {
  1119. if (lua_type(L, 2) == LUA_TSTRING) {
  1120. btype = rspamd_base32_decode_type_from_str(lua_tostring(L, 2));
  1121. if (btype == RSPAMD_BASE32_INVALID) {
  1122. return luaL_error(L, "invalid b32 type: %s", lua_tostring(L, 2));
  1123. }
  1124. }
  1125. out = lua_new_text(L, NULL, t->len * 8 / 5 + 2, TRUE);
  1126. out->len = rspamd_encode_base32_buf(t->start, t->len, (char *) out->start,
  1127. out->len, btype);
  1128. }
  1129. else {
  1130. return luaL_error(L, "invalid arguments");
  1131. }
  1132. return 1;
  1133. }
  1134. static int
  1135. lua_text_base64(lua_State *L)
  1136. {
  1137. LUA_TRACE_POINT;
  1138. struct rspamd_lua_text *t = lua_check_text(L, 1), *out;
  1139. gsize line_len = 0;
  1140. gboolean fold = FALSE;
  1141. if (t != NULL) {
  1142. if (lua_type(L, 2) == LUA_TNUMBER) {
  1143. line_len = lua_tointeger(L, 2);
  1144. if (line_len <= 8) {
  1145. return luaL_error(L, "too small line length (at least 8 is required)");
  1146. }
  1147. }
  1148. enum rspamd_newlines_type how = RSPAMD_TASK_NEWLINES_CRLF;
  1149. if (lua_type(L, 3) == LUA_TSTRING) {
  1150. const char *how_str = lua_tostring(L, 3);
  1151. if (g_ascii_strcasecmp(how_str, "cr") == 0) {
  1152. how = RSPAMD_TASK_NEWLINES_CR;
  1153. }
  1154. else if (g_ascii_strcasecmp(how_str, "lf") == 0) {
  1155. how = RSPAMD_TASK_NEWLINES_LF;
  1156. }
  1157. else if (g_ascii_strcasecmp(how_str, "crlf") != 0) {
  1158. return luaL_error(L, "invalid newline style: %s", how_str);
  1159. }
  1160. }
  1161. if (lua_type(L, 4) == LUA_TBOOLEAN) {
  1162. fold = lua_toboolean(L, 4);
  1163. }
  1164. gsize sz_len;
  1165. out = lua_newuserdata(L, sizeof(*t));
  1166. out->flags = RSPAMD_TEXT_FLAG_OWN;
  1167. out->start = rspamd_encode_base64_common(t->start, t->len,
  1168. line_len, &sz_len, fold, how);
  1169. out->len = sz_len;
  1170. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  1171. }
  1172. else {
  1173. return luaL_error(L, "invalid arguments");
  1174. }
  1175. return 1;
  1176. }
  1177. static int
  1178. lua_text_hex(lua_State *L)
  1179. {
  1180. LUA_TRACE_POINT;
  1181. struct rspamd_lua_text *t = lua_check_text(L, 1), *out;
  1182. if (t != NULL) {
  1183. out = lua_new_text(L, NULL, t->len * 2, TRUE);
  1184. out->len = rspamd_encode_hex_buf(t->start, t->len, (char *) out->start,
  1185. out->len);
  1186. }
  1187. else {
  1188. return luaL_error(L, "invalid arguments");
  1189. }
  1190. return 1;
  1191. }
  1192. static int
  1193. lua_text_find(lua_State *L)
  1194. {
  1195. LUA_TRACE_POINT;
  1196. struct rspamd_lua_text *t = lua_check_text(L, 1);
  1197. gsize patlen, init = 1;
  1198. const char *pat = luaL_checklstring(L, 2, &patlen);
  1199. if (t != NULL && pat != NULL) {
  1200. if (lua_isnumber(L, 3)) {
  1201. init = relative_pos_start(lua_tointeger(L, 3), t->len);
  1202. }
  1203. init--;
  1204. if (init > t->len) {
  1205. return luaL_error(L, "invalid arguments to find: init too large");
  1206. }
  1207. goffset pos = rspamd_substring_search(t->start + init,
  1208. t->len - init,
  1209. pat, patlen);
  1210. if (pos == -1) {
  1211. lua_pushnil(L);
  1212. return 1;
  1213. }
  1214. lua_pushinteger(L, pos + 1);
  1215. lua_pushinteger(L, pos + patlen);
  1216. }
  1217. else {
  1218. return luaL_error(L, "invalid arguments");
  1219. }
  1220. return 2;
  1221. }
  1222. #define BITOP(a, b, op) \
  1223. ((a)[(uint64_t) (b) / (8u * sizeof *(a))] op(uint64_t) 1 << ((uint64_t) (b) % (8u * sizeof *(a))))
  1224. static int
  1225. lua_text_exclude_chars(lua_State *L)
  1226. {
  1227. LUA_TRACE_POINT;
  1228. struct rspamd_lua_text *t = lua_check_text(L, 1);
  1229. gssize patlen;
  1230. const char *pat = lua_tolstring(L, 2, &patlen), *p, *end;
  1231. char *dest, *d;
  1232. uint64_t byteset[32 / sizeof(uint64_t)]; /* Bitset for ascii */
  1233. gboolean copy = TRUE;
  1234. unsigned int *plen;
  1235. if (t != NULL && pat && patlen > 0) {
  1236. if (lua_isboolean(L, 3)) {
  1237. copy = lua_toboolean(L, 3);
  1238. }
  1239. else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
  1240. copy = FALSE;
  1241. }
  1242. if (!copy) {
  1243. dest = (char *) t->start;
  1244. plen = &t->len;
  1245. lua_pushvalue(L, 1); /* Push text as a result */
  1246. }
  1247. else {
  1248. /* We need to copy read only text */
  1249. struct rspamd_lua_text *nt;
  1250. dest = g_malloc(t->len);
  1251. nt = lua_newuserdata(L, sizeof(*nt));
  1252. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  1253. nt->len = t->len;
  1254. nt->flags = RSPAMD_TEXT_FLAG_OWN;
  1255. memcpy(dest, t->start, t->len);
  1256. nt->start = dest;
  1257. plen = &nt->len;
  1258. }
  1259. /* Fill pattern bitset */
  1260. memset(byteset, 0, sizeof byteset);
  1261. while (patlen > 0) {
  1262. if (*pat == '%') {
  1263. pat++;
  1264. patlen--;
  1265. if (patlen > 0) {
  1266. /*
  1267. * This stuff assumes little endian, but GUINT64_FROM_LE should
  1268. * deal with proper conversion
  1269. */
  1270. switch (*pat) {
  1271. case '%':
  1272. BITOP(byteset, *(unsigned char *) pat, |=);
  1273. break;
  1274. case 's':
  1275. /* "\r\n\t\f " */
  1276. byteset[0] |= GUINT64_FROM_LE(0x100003600LLU);
  1277. break;
  1278. case 'n':
  1279. /* newlines: "\r\n" */
  1280. byteset[0] |= GUINT64_FROM_LE(0x2400LLU);
  1281. break;
  1282. case '8':
  1283. /* 8 bit characters */
  1284. byteset[2] |= GUINT64_FROM_LE(0xffffffffffffffffLLU);
  1285. byteset[3] |= GUINT64_FROM_LE(0xffffffffffffffffLLU);
  1286. break;
  1287. case 'c':
  1288. /* Non printable (control) characters */
  1289. byteset[0] |= GUINT64_FROM_LE(0xffffffffLLU);
  1290. /* Del character */
  1291. byteset[1] |= GUINT64_FROM_LE(0x8000000000000000LLU);
  1292. break;
  1293. }
  1294. }
  1295. else {
  1296. /* Last '%' */
  1297. BITOP(byteset, (unsigned char) '%', |=);
  1298. }
  1299. }
  1300. else {
  1301. BITOP(byteset, *(unsigned char *) pat, |=);
  1302. }
  1303. pat++;
  1304. patlen--;
  1305. }
  1306. for (; patlen > 0 && BITOP(byteset, *(unsigned char *) pat, |=); pat++, patlen--)
  1307. ;
  1308. p = t->start;
  1309. end = t->start + t->len;
  1310. d = dest;
  1311. while (p < end) {
  1312. if (!BITOP(byteset, *(unsigned char *) p, &)) {
  1313. *d++ = *p;
  1314. }
  1315. p++;
  1316. }
  1317. *(plen) = d - dest;
  1318. }
  1319. else {
  1320. return luaL_error(L, "invalid arguments");
  1321. }
  1322. return 1;
  1323. }
  1324. static int
  1325. lua_text_oneline(lua_State *L)
  1326. {
  1327. LUA_TRACE_POINT;
  1328. struct rspamd_lua_text *t = lua_check_text(L, 1);
  1329. const char *p, *end;
  1330. char *dest, *d;
  1331. uint64_t byteset[32 / sizeof(uint64_t)]; /* Bitset for ascii */
  1332. gboolean copy = TRUE, seen_8bit = FALSE;
  1333. unsigned int *plen;
  1334. if (t != NULL) {
  1335. if (lua_isboolean(L, 2)) {
  1336. copy = lua_toboolean(L, 2);
  1337. }
  1338. else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
  1339. copy = FALSE;
  1340. }
  1341. if (!copy) {
  1342. dest = (char *) t->start;
  1343. plen = &t->len;
  1344. lua_pushvalue(L, 1); /* Push text as a result */
  1345. }
  1346. else {
  1347. /* We need to copy read only text */
  1348. struct rspamd_lua_text *nt;
  1349. dest = g_malloc(t->len);
  1350. nt = lua_newuserdata(L, sizeof(*nt));
  1351. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  1352. nt->len = t->len;
  1353. nt->flags = RSPAMD_TEXT_FLAG_OWN;
  1354. memcpy(dest, t->start, t->len);
  1355. nt->start = dest;
  1356. plen = &nt->len;
  1357. }
  1358. /* Fill pattern bitset */
  1359. memset(byteset, 0, sizeof byteset);
  1360. /* All spaces */
  1361. byteset[0] |= GUINT64_FROM_LE(0x100003600LLU);
  1362. /* Control characters */
  1363. byteset[0] |= GUINT64_FROM_LE(0xffffffffLLU);
  1364. /* Del character */
  1365. byteset[1] |= GUINT64_FROM_LE(0x8000000000000000LLU);
  1366. /* 8 bit characters */
  1367. byteset[2] |= GUINT64_FROM_LE(0xffffffffffffffffLLU);
  1368. byteset[3] |= GUINT64_FROM_LE(0xffffffffffffffffLLU);
  1369. p = t->start;
  1370. end = t->start + t->len;
  1371. d = dest;
  1372. while (p < end) {
  1373. if (!BITOP(byteset, *(unsigned char *) p, &)) {
  1374. *d++ = *p;
  1375. }
  1376. else {
  1377. if ((*(unsigned char *) p) & 0x80) {
  1378. seen_8bit = TRUE;
  1379. *d++ = *p;
  1380. }
  1381. else {
  1382. if (*p == ' ') {
  1383. if (d != dest) {
  1384. *d++ = *p++;
  1385. }
  1386. while (p < end && g_ascii_isspace(*p)) {
  1387. p++;
  1388. }
  1389. continue; /* To avoid p++ */
  1390. }
  1391. else if (*p == '\r' || *p == '\n') {
  1392. if (d != dest) {
  1393. *d++ = ' ';
  1394. p++;
  1395. }
  1396. while (p < end && g_ascii_isspace(*p)) {
  1397. p++;
  1398. }
  1399. continue; /* To avoid p++ */
  1400. }
  1401. }
  1402. }
  1403. p++;
  1404. }
  1405. while (d > dest && g_ascii_isspace(*(d - 1))) {
  1406. d--;
  1407. }
  1408. if (seen_8bit) {
  1409. if (rspamd_fast_utf8_validate(dest, d - dest) != 0) {
  1410. /* Need to make it valid :( */
  1411. UChar32 uc;
  1412. goffset err_offset;
  1413. gsize remain = d - dest;
  1414. char *nd = dest;
  1415. while (remain > 0 && (err_offset = rspamd_fast_utf8_validate(nd, remain)) > 0) {
  1416. int i = 0;
  1417. err_offset--; /* As it returns it 1 indexed */
  1418. nd += err_offset;
  1419. remain -= err_offset;
  1420. /* Each invalid character of input requires 3 bytes of output (+2 bytes) */
  1421. while (i < remain) {
  1422. int old_pos = i;
  1423. U8_NEXT(nd, i, remain, uc);
  1424. if (uc < 0) {
  1425. nd[old_pos] = '?';
  1426. }
  1427. else {
  1428. break;
  1429. }
  1430. }
  1431. nd += i;
  1432. remain -= i;
  1433. }
  1434. }
  1435. }
  1436. *(plen) = d - dest;
  1437. }
  1438. else {
  1439. return luaL_error(L, "invalid arguments");
  1440. }
  1441. return 1;
  1442. }
  1443. static int
  1444. lua_text_lower(lua_State *L)
  1445. {
  1446. LUA_TRACE_POINT;
  1447. struct rspamd_lua_text *t = lua_check_text(L, 1), *nt;
  1448. gboolean is_utf8 = FALSE, is_inplace = FALSE;
  1449. if (t != NULL) {
  1450. if (lua_isboolean(L, 2)) {
  1451. is_utf8 = lua_toboolean(L, 2);
  1452. }
  1453. if (lua_isboolean(L, 3)) {
  1454. is_inplace = lua_toboolean(L, 3);
  1455. }
  1456. if (is_inplace) {
  1457. nt = t;
  1458. lua_pushvalue(L, 1);
  1459. }
  1460. else {
  1461. nt = lua_new_text(L, t->start, t->len, TRUE);
  1462. }
  1463. if (!is_utf8) {
  1464. rspamd_str_lc((char *) nt->start, nt->len);
  1465. }
  1466. else {
  1467. rspamd_str_lc_utf8((char *) nt->start, nt->len);
  1468. }
  1469. }
  1470. else {
  1471. return luaL_error(L, "invalid arguments");
  1472. }
  1473. return 1;
  1474. }
  1475. static int
  1476. lua_text_strtoul(lua_State *L)
  1477. {
  1478. LUA_TRACE_POINT;
  1479. struct rspamd_lua_text *t = lua_check_text(L, 1);
  1480. if (t) {
  1481. unsigned long ll;
  1482. if (rspamd_strtoul(t->start, t->len, &ll)) {
  1483. lua_pushinteger(L, ll);
  1484. }
  1485. else {
  1486. lua_pushnil(L);
  1487. }
  1488. }
  1489. else {
  1490. return luaL_error(L, "invalid arguments");
  1491. }
  1492. return 1;
  1493. }
  1494. /* Used to distinguish lua text metatable */
  1495. static const unsigned int rspamd_lua_text_cookie = 0x2b21ef6fU;
  1496. static int
  1497. lua_load_text(lua_State *L)
  1498. {
  1499. lua_newtable(L);
  1500. lua_pushstring(L, "cookie");
  1501. lua_pushnumber(L, rspamd_lua_text_cookie);
  1502. lua_settable(L, -3);
  1503. luaL_register(L, NULL, textlib_f);
  1504. return 1;
  1505. }
  1506. void luaopen_text(lua_State *L)
  1507. {
  1508. rspamd_lua_new_class(L, rspamd_text_classname, textlib_m);
  1509. lua_pushstring(L, "cookie");
  1510. lua_pushnumber(L, rspamd_lua_text_cookie);
  1511. lua_settable(L, -3);
  1512. lua_pop(L, 1);
  1513. rspamd_lua_add_preload(L, "rspamd_text", lua_load_text);
  1514. }