You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_regexp.c 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "lua_common.h"
  17. /***
  18. * @module rspamd_regexp
  19. * Rspamd regexp is an utility module that handles rspamd perl compatible
  20. * regular expressions
  21. * @example
  22. * local rspamd_regexp = require "rspamd_regexp"
  23. *
  24. * local re = rspamd_regexp.create_cached('/^\\s*some_string\\s*$/i')
  25. * re:match('some_string')
  26. * local re = rspamd_regexp.create_cached('/\\s+/i')
  27. * re:split('word word word') -- returns ['word', 'word', 'word']
  28. */
  29. LUA_FUNCTION_DEF(regexp, create);
  30. LUA_FUNCTION_DEF(regexp, import_glob);
  31. LUA_FUNCTION_DEF(regexp, import_plain);
  32. LUA_FUNCTION_DEF(regexp, create_cached);
  33. LUA_FUNCTION_DEF(regexp, get_cached);
  34. LUA_FUNCTION_DEF(regexp, get_pattern);
  35. LUA_FUNCTION_DEF(regexp, set_limit);
  36. LUA_FUNCTION_DEF(regexp, set_max_hits);
  37. LUA_FUNCTION_DEF(regexp, get_max_hits);
  38. LUA_FUNCTION_DEF(regexp, search);
  39. LUA_FUNCTION_DEF(regexp, match);
  40. LUA_FUNCTION_DEF(regexp, matchn);
  41. LUA_FUNCTION_DEF(regexp, split);
  42. LUA_FUNCTION_DEF(regexp, destroy);
  43. LUA_FUNCTION_DEF(regexp, gc);
  44. static const struct luaL_reg regexplib_m[] = {
  45. LUA_INTERFACE_DEF(regexp, get_pattern),
  46. LUA_INTERFACE_DEF(regexp, set_limit),
  47. LUA_INTERFACE_DEF(regexp, set_max_hits),
  48. LUA_INTERFACE_DEF(regexp, get_max_hits),
  49. LUA_INTERFACE_DEF(regexp, match),
  50. LUA_INTERFACE_DEF(regexp, matchn),
  51. LUA_INTERFACE_DEF(regexp, search),
  52. LUA_INTERFACE_DEF(regexp, split),
  53. LUA_INTERFACE_DEF(regexp, destroy),
  54. {"__tostring", lua_regexp_get_pattern},
  55. {"__gc", lua_regexp_gc},
  56. {NULL, NULL}};
  57. static const struct luaL_reg regexplib_f[] = {
  58. LUA_INTERFACE_DEF(regexp, create),
  59. LUA_INTERFACE_DEF(regexp, import_glob),
  60. LUA_INTERFACE_DEF(regexp, import_plain),
  61. LUA_INTERFACE_DEF(regexp, get_cached),
  62. LUA_INTERFACE_DEF(regexp, create_cached),
  63. {NULL, NULL}};
  64. #define LUA_REGEXP_FLAG_DESTROYED (1 << 0)
  65. #define IS_DESTROYED(re) ((re)->re_flags & LUA_REGEXP_FLAG_DESTROYED)
  66. rspamd_mempool_t *regexp_static_pool = NULL;
  67. struct rspamd_lua_regexp *
  68. lua_check_regexp(lua_State *L, int pos)
  69. {
  70. void *ud = rspamd_lua_check_udata(L, pos, rspamd_regexp_classname);
  71. luaL_argcheck(L, ud != NULL, pos, "'regexp' expected");
  72. return ud ? *((struct rspamd_lua_regexp **) ud) : NULL;
  73. }
  74. /***
  75. * @function rspamd_regexp.create(pattern[, flags])
  76. * Creates new rspamd_regexp
  77. * @param {string} pattern pattern to build regexp. If this pattern is enclosed in `//` then it is possible to specify flags after it
  78. * @param {string} flags optional flags to create regular expression
  79. * @return {regexp} regexp argument that is *not* automatically destroyed
  80. * @example
  81. * local regexp = require "rspamd_regexp"
  82. *
  83. * local re = regexp.create('/^test.*[0-9]\\s*$/i')
  84. */
  85. static int
  86. lua_regexp_create(lua_State *L)
  87. {
  88. LUA_TRACE_POINT;
  89. rspamd_regexp_t *re;
  90. struct rspamd_lua_regexp *new, **pnew;
  91. const char *string, *flags_str = NULL;
  92. GError *err = NULL;
  93. string = luaL_checkstring(L, 1);
  94. if (lua_gettop(L) == 2) {
  95. flags_str = luaL_checkstring(L, 2);
  96. }
  97. if (string) {
  98. re = rspamd_regexp_new(string, flags_str, &err);
  99. if (re == NULL) {
  100. lua_pushnil(L);
  101. msg_info("cannot parse regexp: %s, error: %s",
  102. string,
  103. err == NULL ? "undefined" : err->message);
  104. g_error_free(err);
  105. }
  106. else {
  107. new = g_malloc0(sizeof(struct rspamd_lua_regexp));
  108. new->re = re;
  109. new->re_pattern = g_strdup(string);
  110. new->module = rspamd_lua_get_module_name(L);
  111. pnew = lua_newuserdata(L, sizeof(struct rspamd_lua_regexp *));
  112. rspamd_lua_setclass(L, rspamd_regexp_classname, -1);
  113. *pnew = new;
  114. }
  115. }
  116. else {
  117. return luaL_error(L, "invalid arguments");
  118. }
  119. return 1;
  120. }
  121. /***
  122. * @function rspamd_regexp.import_glob(glob_pattern[, flags])
  123. * Creates new rspamd_regexp from glob
  124. * @param {string} pattern pattern to build regexp.
  125. * @param {string} flags optional flags to create regular expression
  126. * @return {regexp} regexp argument that is *not* automatically destroyed
  127. * @example
  128. * local regexp = require "rspamd_regexp"
  129. *
  130. * local re = regexp.import_glob('ab*', 'i')
  131. */
  132. static int
  133. lua_regexp_import_glob(lua_State *L)
  134. {
  135. LUA_TRACE_POINT;
  136. rspamd_regexp_t *re;
  137. struct rspamd_lua_regexp *new, **pnew;
  138. const char *string, *flags_str = NULL;
  139. char *escaped;
  140. gsize pat_len;
  141. GError *err = NULL;
  142. string = luaL_checklstring(L, 1, &pat_len);
  143. if (lua_gettop(L) == 2) {
  144. flags_str = luaL_checkstring(L, 2);
  145. }
  146. if (string) {
  147. escaped = rspamd_str_regexp_escape(string, pat_len, NULL,
  148. RSPAMD_REGEXP_ESCAPE_GLOB | RSPAMD_REGEXP_ESCAPE_UTF);
  149. re = rspamd_regexp_new(escaped, flags_str, &err);
  150. if (re == NULL) {
  151. lua_pushnil(L);
  152. msg_info("cannot parse regexp: %s, error: %s",
  153. string,
  154. err == NULL ? "undefined" : err->message);
  155. g_error_free(err);
  156. g_free(escaped);
  157. }
  158. else {
  159. new = g_malloc0(sizeof(struct rspamd_lua_regexp));
  160. new->re = re;
  161. new->re_pattern = escaped;
  162. new->module = rspamd_lua_get_module_name(L);
  163. pnew = lua_newuserdata(L, sizeof(struct rspamd_lua_regexp *));
  164. rspamd_lua_setclass(L, rspamd_regexp_classname, -1);
  165. *pnew = new;
  166. }
  167. }
  168. else {
  169. return luaL_error(L, "invalid arguments");
  170. }
  171. return 1;
  172. }
  173. /***
  174. * @function rspamd_regexp.import_plain(plain_string[, flags])
  175. * Creates new rspamd_regexp from plain string (escaping specials)
  176. * @param {string} pattern pattern to build regexp.
  177. * @param {string} flags optional flags to create regular expression
  178. * @return {regexp} regexp argument that is *not* automatically destroyed
  179. * @example
  180. * local regexp = require "rspamd_regexp"
  181. *
  182. * local re = regexp.import_plain('exact_string_with*', 'i')
  183. */
  184. static int
  185. lua_regexp_import_plain(lua_State *L)
  186. {
  187. LUA_TRACE_POINT;
  188. rspamd_regexp_t *re;
  189. struct rspamd_lua_regexp *new, **pnew;
  190. const char *string, *flags_str = NULL;
  191. char *escaped;
  192. gsize pat_len;
  193. GError *err = NULL;
  194. string = luaL_checklstring(L, 1, &pat_len);
  195. if (lua_gettop(L) == 2) {
  196. flags_str = luaL_checkstring(L, 2);
  197. }
  198. if (string) {
  199. escaped = rspamd_str_regexp_escape(string, pat_len, NULL,
  200. RSPAMD_REGEXP_ESCAPE_ASCII);
  201. re = rspamd_regexp_new(escaped, flags_str, &err);
  202. if (re == NULL) {
  203. lua_pushnil(L);
  204. msg_info("cannot parse regexp: %s, error: %s",
  205. string,
  206. err == NULL ? "undefined" : err->message);
  207. g_error_free(err);
  208. g_free(escaped);
  209. }
  210. else {
  211. new = g_malloc0(sizeof(struct rspamd_lua_regexp));
  212. new->re = re;
  213. new->re_pattern = escaped;
  214. new->module = rspamd_lua_get_module_name(L);
  215. pnew = lua_newuserdata(L, sizeof(struct rspamd_lua_regexp *));
  216. rspamd_lua_setclass(L, rspamd_regexp_classname, -1);
  217. *pnew = new;
  218. }
  219. }
  220. else {
  221. return luaL_error(L, "invalid arguments");
  222. }
  223. return 1;
  224. }
  225. /***
  226. * @function rspamd_regexp.get_cached(pattern)
  227. * This function gets cached and pre-compiled regexp created by either `create`
  228. * or `create_cached` methods. If no cached regexp is found then `nil` is returned.
  229. *
  230. * @param {string} pattern regexp pattern
  231. * @return {regexp} cached regexp structure or `nil`
  232. */
  233. static int
  234. lua_regexp_get_cached(lua_State *L)
  235. {
  236. LUA_TRACE_POINT;
  237. rspamd_regexp_t *re;
  238. struct rspamd_lua_regexp *new, **pnew;
  239. const char *string, *flags_str = NULL;
  240. string = luaL_checkstring(L, 1);
  241. if (lua_gettop(L) == 2) {
  242. flags_str = luaL_checkstring(L, 2);
  243. }
  244. if (string) {
  245. re = rspamd_regexp_cache_query(NULL, string, flags_str);
  246. if (re) {
  247. new = g_malloc0(sizeof(struct rspamd_lua_regexp));
  248. new->re = rspamd_regexp_ref(re);
  249. new->re_pattern = g_strdup(string);
  250. new->module = rspamd_lua_get_module_name(L);
  251. pnew = lua_newuserdata(L, sizeof(struct rspamd_lua_regexp *));
  252. rspamd_lua_setclass(L, rspamd_regexp_classname, -1);
  253. *pnew = new;
  254. }
  255. else {
  256. lua_pushnil(L);
  257. }
  258. }
  259. else {
  260. return luaL_error(L, "invalid arguments");
  261. }
  262. return 1;
  263. }
  264. /***
  265. * @function rspamd_regexp.create_cached(pattern[, flags])
  266. * This function is similar to `create` but it tries to search for regexp in the
  267. * cache first.
  268. * @param {string} pattern pattern to build regexp. If this pattern is enclosed in `//` then it is possible to specify flags after it
  269. * @param {string} flags optional flags to create regular expression
  270. * @return {regexp} regexp argument that is *not* automatically destroyed
  271. * @example
  272. * local regexp = require "rspamd_regexp"
  273. *
  274. * local re = regexp.create_cached('/^test.*[0-9]\\s*$/i')
  275. * ...
  276. * -- This doesn't create new regexp object
  277. * local other_re = regexp.create_cached('/^test.*[0-9]\\s*$/i')
  278. */
  279. static int
  280. lua_regexp_create_cached(lua_State *L)
  281. {
  282. LUA_TRACE_POINT;
  283. rspamd_regexp_t *re;
  284. struct rspamd_lua_regexp *new, **pnew;
  285. const char *string, *flags_str = NULL;
  286. GError *err = NULL;
  287. string = luaL_checkstring(L, 1);
  288. if (lua_gettop(L) == 2) {
  289. flags_str = luaL_checkstring(L, 2);
  290. }
  291. if (string) {
  292. re = rspamd_regexp_cache_query(NULL, string, flags_str);
  293. if (re) {
  294. new = g_malloc0(sizeof(struct rspamd_lua_regexp));
  295. new->re = rspamd_regexp_ref(re);
  296. new->re_pattern = g_strdup(string);
  297. new->module = rspamd_lua_get_module_name(L);
  298. pnew = lua_newuserdata(L, sizeof(struct rspamd_lua_regexp *));
  299. rspamd_lua_setclass(L, rspamd_regexp_classname, -1);
  300. *pnew = new;
  301. }
  302. else {
  303. re = rspamd_regexp_cache_create(NULL, string, flags_str, &err);
  304. if (re == NULL) {
  305. lua_pushnil(L);
  306. msg_info("cannot parse regexp: %s, error: %s",
  307. string,
  308. err == NULL ? "undefined" : err->message);
  309. g_error_free(err);
  310. }
  311. else {
  312. new = g_malloc0(sizeof(struct rspamd_lua_regexp));
  313. new->re = rspamd_regexp_ref(re);
  314. new->re_pattern = g_strdup(string);
  315. new->module = rspamd_lua_get_module_name(L);
  316. pnew = lua_newuserdata(L, sizeof(struct rspamd_lua_regexp *));
  317. rspamd_lua_setclass(L, rspamd_regexp_classname, -1);
  318. *pnew = new;
  319. }
  320. }
  321. }
  322. else {
  323. return luaL_error(L, "invalid arguments");
  324. }
  325. return 1;
  326. }
  327. /***
  328. * @method re:get_pattern()
  329. * Get a pattern for specified regexp object
  330. * @return {string} pattern line
  331. */
  332. static int
  333. lua_regexp_get_pattern(lua_State *L)
  334. {
  335. LUA_TRACE_POINT;
  336. struct rspamd_lua_regexp *re = lua_check_regexp(L, 1);
  337. if (re && re->re && !IS_DESTROYED(re)) {
  338. lua_pushstring(L, rspamd_regexp_get_pattern(re->re));
  339. }
  340. else {
  341. lua_pushnil(L);
  342. }
  343. return 1;
  344. }
  345. /***
  346. * @method re:set_limit(lim)
  347. * Set maximum size of text length to be matched with this regexp (if `lim` is
  348. * less or equal to zero then all texts are checked)
  349. * @param {number} lim limit in bytes
  350. */
  351. static int
  352. lua_regexp_set_limit(lua_State *L)
  353. {
  354. LUA_TRACE_POINT;
  355. struct rspamd_lua_regexp *re = lua_check_regexp(L, 1);
  356. int64_t lim;
  357. lim = lua_tointeger(L, 2);
  358. if (re && re->re && !IS_DESTROYED(re)) {
  359. if (lim > 0) {
  360. rspamd_regexp_set_match_limit(re->re, lim);
  361. }
  362. else {
  363. rspamd_regexp_set_match_limit(re->re, 0);
  364. }
  365. }
  366. return 0;
  367. }
  368. /***
  369. * @method re:set_max_hits(lim)
  370. * Set maximum number of hits returned by a regexp
  371. * @param {number} lim limit in hits count
  372. * @return {number} old number of max hits
  373. */
  374. static int
  375. lua_regexp_set_max_hits(lua_State *L)
  376. {
  377. LUA_TRACE_POINT;
  378. struct rspamd_lua_regexp *re = lua_check_regexp(L, 1);
  379. unsigned int lim;
  380. lim = luaL_checkinteger(L, 2);
  381. if (re && re->re && !IS_DESTROYED(re)) {
  382. lua_pushinteger(L, rspamd_regexp_set_maxhits(re->re, lim));
  383. }
  384. else {
  385. lua_pushnil(L);
  386. }
  387. return 1;
  388. }
  389. /***
  390. * @method re:get_max_hits(lim)
  391. * Get maximum number of hits returned by a regexp
  392. * @return {number} number of max hits
  393. */
  394. static int
  395. lua_regexp_get_max_hits(lua_State *L)
  396. {
  397. LUA_TRACE_POINT;
  398. struct rspamd_lua_regexp *re = lua_check_regexp(L, 1);
  399. if (re && re->re && !IS_DESTROYED(re)) {
  400. lua_pushinteger(L, rspamd_regexp_get_maxhits(re->re));
  401. }
  402. else {
  403. lua_pushinteger(L, 1);
  404. }
  405. return 1;
  406. }
  407. /***
  408. * @method re:search(line[, raw[, capture]])
  409. * Search line in regular expression object. If line matches then this
  410. * function returns the table of captured strings. Otherwise, nil is returned.
  411. * If `raw` is specified, then input is treated as raw data not encoded in `utf-8`.
  412. * If `capture` is true, then this function saves all captures to the table of
  413. * values, so the first element is the whole matched string and the
  414. * subsequent elements are ordered captures defined within pattern.
  415. *
  416. * @param {string} line match the specified line against regexp object
  417. * @param {bool} match raw regexp instead of utf8 one
  418. * @param {bool} capture perform subpatterns capturing
  419. * @return {table or nil} table of strings or tables (if `capture` is true) or nil if not matched
  420. * @example
  421. * local re = regexp.create_cached('/^\s*([0-9]+)\s*$/')
  422. * -- returns nil
  423. * local m1 = re:search('blah')
  424. * local m2 = re:search(' 190 ')
  425. * -- prints ' 190 '
  426. * print(m2[1])
  427. *
  428. * local m3 = re:search(' 100500 ')
  429. * -- prints ' 100500 '
  430. * print(m3[1][1])
  431. * -- prints '100500' capture
  432. * print(m3[1][2])
  433. */
  434. static int
  435. lua_regexp_search(lua_State *L)
  436. {
  437. LUA_TRACE_POINT;
  438. struct rspamd_lua_regexp *re = lua_check_regexp(L, 1);
  439. const char *data = NULL;
  440. struct rspamd_lua_text *t;
  441. const char *start = NULL, *end = NULL;
  442. int i;
  443. gsize len = 0, capn;
  444. gboolean matched = FALSE, capture = FALSE, raw = FALSE;
  445. GArray *captures = NULL;
  446. struct rspamd_re_capture *cap;
  447. if (re && !IS_DESTROYED(re)) {
  448. if (lua_type(L, 2) == LUA_TSTRING) {
  449. data = luaL_checklstring(L, 2, &len);
  450. }
  451. else if (lua_type(L, 2) == LUA_TUSERDATA) {
  452. t = lua_check_text(L, 2);
  453. if (t != NULL) {
  454. data = t->start;
  455. len = t->len;
  456. }
  457. }
  458. if (lua_gettop(L) >= 3) {
  459. raw = lua_toboolean(L, 3);
  460. }
  461. if (data && len > 0) {
  462. if (lua_gettop(L) >= 4 && lua_toboolean(L, 4)) {
  463. capture = TRUE;
  464. captures = g_array_new(FALSE, TRUE,
  465. sizeof(struct rspamd_re_capture));
  466. }
  467. lua_newtable(L);
  468. i = 0;
  469. while (rspamd_regexp_search(re->re, data, len, &start, &end, raw,
  470. captures)) {
  471. if (capture) {
  472. lua_createtable(L, captures->len, 0);
  473. for (capn = 0; capn < captures->len; capn++) {
  474. cap = &g_array_index(captures, struct rspamd_re_capture,
  475. capn);
  476. lua_pushlstring(L, cap->p, cap->len);
  477. lua_rawseti(L, -2, capn + 1);
  478. }
  479. lua_rawseti(L, -2, ++i);
  480. }
  481. else {
  482. lua_pushlstring(L, start, end - start);
  483. lua_rawseti(L, -2, ++i);
  484. }
  485. matched = TRUE;
  486. if (start >= end) {
  487. /* We found all matches, so no more hits are possible (protect from empty patterns) */
  488. break;
  489. }
  490. }
  491. if (!matched) {
  492. lua_pop(L, 1);
  493. lua_pushnil(L);
  494. }
  495. if (capture) {
  496. g_array_free(captures, TRUE);
  497. }
  498. }
  499. else {
  500. lua_pushnil(L);
  501. }
  502. }
  503. else {
  504. return luaL_error(L, "invalid arguments");
  505. }
  506. return 1;
  507. }
  508. /***
  509. * @method re:match(line[, raw_match])
  510. * Matches line against the regular expression and return true if line matches
  511. * (partially or completely)
  512. *
  513. * @param {string} line match the specified line against regexp object
  514. * @param {bool} match raw regexp instead of utf8 one
  515. * @return {bool} true if `line` matches
  516. */
  517. static int
  518. lua_regexp_match(lua_State *L)
  519. {
  520. LUA_TRACE_POINT;
  521. struct rspamd_lua_regexp *re = lua_check_regexp(L, 1);
  522. struct rspamd_lua_text *t;
  523. const char *data = NULL;
  524. gsize len = 0;
  525. gboolean raw = FALSE;
  526. if (re && !IS_DESTROYED(re)) {
  527. if (lua_type(L, 2) == LUA_TSTRING) {
  528. data = luaL_checklstring(L, 2, &len);
  529. }
  530. else if (lua_type(L, 2) == LUA_TUSERDATA) {
  531. t = lua_check_text(L, 2);
  532. if (t != NULL) {
  533. data = t->start;
  534. len = t->len;
  535. }
  536. }
  537. if (lua_gettop(L) == 3) {
  538. raw = lua_toboolean(L, 3);
  539. }
  540. if (data && len > 0) {
  541. if (rspamd_regexp_search(re->re, data, len, NULL, NULL, raw, NULL)) {
  542. lua_pushboolean(L, TRUE);
  543. }
  544. else {
  545. lua_pushboolean(L, FALSE);
  546. }
  547. }
  548. else {
  549. lua_pushboolean(L, FALSE);
  550. }
  551. }
  552. else {
  553. return luaL_error(L, "invalid arguments");
  554. }
  555. return 1;
  556. }
  557. /***
  558. * @method re:matchn(line, max_matches, [, raw_match])
  559. * Matches line against the regular expression and return number of matches if line matches
  560. * (partially or completely). This process stop when `max_matches` is reached.
  561. * If `max_matches` is zero, then only a single match is counted which is equal to
  562. * @see re:match If `max_matches` is negative, then all matches are considered.
  563. *
  564. * @param {string} line match the specified line against regexp object
  565. * @param {number} max_matches maximum number of matches
  566. * @param {bool} match raw regexp instead of utf8 one
  567. * @return {number} number of matches found in the `line` argument
  568. */
  569. static int
  570. lua_regexp_matchn(lua_State *L)
  571. {
  572. LUA_TRACE_POINT;
  573. struct rspamd_lua_regexp *re = lua_check_regexp(L, 1);
  574. struct rspamd_lua_text *t;
  575. const char *data = NULL, *start = NULL, *end = NULL;
  576. int max_matches, matches;
  577. gsize len = 0;
  578. gboolean raw = FALSE;
  579. if (re && !IS_DESTROYED(re)) {
  580. if (lua_type(L, 2) == LUA_TSTRING) {
  581. data = luaL_checklstring(L, 2, &len);
  582. }
  583. else if (lua_type(L, 2) == LUA_TUSERDATA) {
  584. t = lua_check_text(L, 2);
  585. if (t != NULL) {
  586. data = t->start;
  587. len = t->len;
  588. }
  589. }
  590. max_matches = lua_tointeger(L, 3);
  591. matches = 0;
  592. if (lua_gettop(L) == 4) {
  593. raw = lua_toboolean(L, 4);
  594. }
  595. if (data && len > 0) {
  596. for (;;) {
  597. if (rspamd_regexp_search(re->re, data, len, &start, &end, raw,
  598. NULL)) {
  599. matches++;
  600. }
  601. else {
  602. break;
  603. }
  604. if (max_matches >= 0 && matches >= max_matches) {
  605. break;
  606. }
  607. }
  608. }
  609. lua_pushinteger(L, matches);
  610. }
  611. else {
  612. return luaL_error(L, "invalid arguments");
  613. }
  614. return 1;
  615. }
  616. /***
  617. * @method re:split(line)
  618. * Split line using the specified regular expression.
  619. * Breaks the string on the pattern, and returns an array of the tokens.
  620. * If the pattern contains capturing parentheses, then the text for each
  621. * of the substrings will also be returned. If the pattern does not match
  622. * anywhere in the string, then the whole string is returned as the first
  623. * token.
  624. * @param {string/text} line line to split
  625. * @return {table} table of split line portions (if text was the input, then text is used for return parts)
  626. */
  627. static int
  628. lua_regexp_split(lua_State *L)
  629. {
  630. LUA_TRACE_POINT;
  631. struct rspamd_lua_regexp *re = lua_check_regexp(L, 1);
  632. const char *data = NULL;
  633. struct rspamd_lua_text *t;
  634. gboolean matched = FALSE, is_text = FALSE;
  635. gsize len = 0;
  636. const char *start = NULL, *end = NULL, *old_start;
  637. int i;
  638. if (re && !IS_DESTROYED(re)) {
  639. if (lua_type(L, 2) == LUA_TSTRING) {
  640. data = luaL_checklstring(L, 2, &len);
  641. }
  642. else if (lua_type(L, 2) == LUA_TUSERDATA) {
  643. t = lua_check_text(L, 2);
  644. if (t == NULL) {
  645. lua_error(L);
  646. return 0;
  647. }
  648. data = t->start;
  649. len = t->len;
  650. is_text = TRUE;
  651. }
  652. if (data && len > 0) {
  653. lua_newtable(L);
  654. i = 0;
  655. old_start = data;
  656. while (rspamd_regexp_search(re->re, data, len, &start, &end, FALSE,
  657. NULL)) {
  658. if (start - old_start > 0) {
  659. if (!is_text) {
  660. lua_pushlstring(L, old_start, start - old_start);
  661. }
  662. else {
  663. t = lua_newuserdata(L, sizeof(*t));
  664. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  665. t->start = old_start;
  666. t->len = start - old_start;
  667. t->flags = 0;
  668. }
  669. lua_rawseti(L, -2, ++i);
  670. matched = TRUE;
  671. }
  672. else if (start >= end) {
  673. break;
  674. }
  675. old_start = end;
  676. }
  677. if (len > 0 && (end == NULL || end < data + len)) {
  678. if (end == NULL) {
  679. end = data;
  680. }
  681. if (!is_text) {
  682. lua_pushlstring(L, end, (data + len) - end);
  683. }
  684. else {
  685. t = lua_newuserdata(L, sizeof(*t));
  686. rspamd_lua_setclass(L, rspamd_text_classname, -1);
  687. t->start = end;
  688. t->len = (data + len) - end;
  689. t->flags = 0;
  690. }
  691. lua_rawseti(L, -2, ++i);
  692. matched = TRUE;
  693. }
  694. if (!matched) {
  695. lua_pop(L, 1);
  696. lua_pushnil(L);
  697. }
  698. return 1;
  699. }
  700. }
  701. else {
  702. return luaL_error(L, "invalid arguments");
  703. }
  704. lua_pushnil(L);
  705. return 1;
  706. }
  707. /***
  708. * @method re:destroy()
  709. * Destroy regexp from caches if needed (the pointer is removed by garbage collector)
  710. */
  711. static int
  712. lua_regexp_destroy(lua_State *L)
  713. {
  714. LUA_TRACE_POINT;
  715. struct rspamd_lua_regexp *to_del = lua_check_regexp(L, 1);
  716. if (to_del) {
  717. rspamd_regexp_cache_remove(NULL, to_del->re);
  718. rspamd_regexp_unref(to_del->re);
  719. to_del->re = NULL;
  720. to_del->re_flags |= LUA_REGEXP_FLAG_DESTROYED;
  721. }
  722. return 0;
  723. }
  724. static int
  725. lua_regexp_gc(lua_State *L)
  726. {
  727. LUA_TRACE_POINT;
  728. struct rspamd_lua_regexp *to_del = lua_check_regexp(L, 1);
  729. if (to_del) {
  730. if (!IS_DESTROYED(to_del)) {
  731. rspamd_regexp_unref(to_del->re);
  732. }
  733. g_free(to_del->re_pattern);
  734. g_free(to_del->module);
  735. g_free(to_del);
  736. }
  737. return 0;
  738. }
  739. static int
  740. lua_load_regexp(lua_State *L)
  741. {
  742. lua_newtable(L);
  743. luaL_register(L, NULL, regexplib_f);
  744. return 1;
  745. }
  746. void luaopen_regexp(lua_State *L)
  747. {
  748. if (!regexp_static_pool) {
  749. regexp_static_pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
  750. "regexp_lua_pool", 0);
  751. }
  752. rspamd_lua_new_class(L, rspamd_regexp_classname, regexplib_m);
  753. lua_pop(L, 1);
  754. rspamd_lua_add_preload(L, "rspamd_regexp", lua_load_regexp);
  755. }
  756. RSPAMD_DESTRUCTOR(lua_re_static_pool_dtor)
  757. {
  758. if (regexp_static_pool) {
  759. rspamd_mempool_delete(regexp_static_pool);
  760. }
  761. }