You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_regexp.c 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "lua_common.h"
  17. /***
  18. * @module rspamd_regexp
  19. * Rspamd regexp is an utility module that handles rspamd perl compatible
  20. * regular expressions
  21. * @example
  22. * local rspamd_regexp = require "rspamd_regexp"
  23. *
  24. * local re = rspamd_regexp.create_cached('/^\\s*some_string\\s*$/i')
  25. * re:match('some_string')
  26. * local re = rspamd_regexp.create_cached('/\\s+/i')
  27. * re:split('word word word') -- returns ['word', 'word', 'word']
  28. */
  29. LUA_FUNCTION_DEF (regexp, create);
  30. LUA_FUNCTION_DEF (regexp, import_glob);
  31. LUA_FUNCTION_DEF (regexp, import_plain);
  32. LUA_FUNCTION_DEF (regexp, create_cached);
  33. LUA_FUNCTION_DEF (regexp, get_cached);
  34. LUA_FUNCTION_DEF (regexp, get_pattern);
  35. LUA_FUNCTION_DEF (regexp, set_limit);
  36. LUA_FUNCTION_DEF (regexp, set_max_hits);
  37. LUA_FUNCTION_DEF (regexp, get_max_hits);
  38. LUA_FUNCTION_DEF (regexp, search);
  39. LUA_FUNCTION_DEF (regexp, match);
  40. LUA_FUNCTION_DEF (regexp, matchn);
  41. LUA_FUNCTION_DEF (regexp, split);
  42. LUA_FUNCTION_DEF (regexp, destroy);
  43. LUA_FUNCTION_DEF (regexp, gc);
  44. static const struct luaL_reg regexplib_m[] = {
  45. LUA_INTERFACE_DEF (regexp, get_pattern),
  46. LUA_INTERFACE_DEF (regexp, set_limit),
  47. LUA_INTERFACE_DEF (regexp, set_max_hits),
  48. LUA_INTERFACE_DEF (regexp, get_max_hits),
  49. LUA_INTERFACE_DEF (regexp, match),
  50. LUA_INTERFACE_DEF (regexp, matchn),
  51. LUA_INTERFACE_DEF (regexp, search),
  52. LUA_INTERFACE_DEF (regexp, split),
  53. LUA_INTERFACE_DEF (regexp, destroy),
  54. {"__tostring", lua_regexp_get_pattern},
  55. {"__gc", lua_regexp_gc},
  56. {NULL, NULL}
  57. };
  58. static const struct luaL_reg regexplib_f[] = {
  59. LUA_INTERFACE_DEF (regexp, create),
  60. LUA_INTERFACE_DEF (regexp, import_glob),
  61. LUA_INTERFACE_DEF (regexp, import_plain),
  62. LUA_INTERFACE_DEF (regexp, get_cached),
  63. LUA_INTERFACE_DEF (regexp, create_cached),
  64. {NULL, NULL}
  65. };
  66. #define LUA_REGEXP_FLAG_DESTROYED (1 << 0)
  67. #define IS_DESTROYED(re) ((re)->re_flags & LUA_REGEXP_FLAG_DESTROYED)
  68. rspamd_mempool_t *regexp_static_pool = NULL;
  69. struct rspamd_lua_regexp *
  70. lua_check_regexp (lua_State * L, gint pos)
  71. {
  72. void *ud = rspamd_lua_check_udata (L, pos, "rspamd{regexp}");
  73. luaL_argcheck (L, ud != NULL, pos, "'regexp' expected");
  74. return ud ? *((struct rspamd_lua_regexp **)ud) : NULL;
  75. }
  76. /***
  77. * @function rspamd_regexp.create(pattern[, flags])
  78. * Creates new rspamd_regexp
  79. * @param {string} pattern pattern to build regexp. If this pattern is enclosed in `//` then it is possible to specify flags after it
  80. * @param {string} flags optional flags to create regular expression
  81. * @return {regexp} regexp argument that is *not* automatically destroyed
  82. * @example
  83. * local regexp = require "rspamd_regexp"
  84. *
  85. * local re = regexp.create('/^test.*[0-9]\\s*$/i')
  86. */
  87. static int
  88. lua_regexp_create (lua_State *L)
  89. {
  90. LUA_TRACE_POINT;
  91. rspamd_regexp_t *re;
  92. struct rspamd_lua_regexp *new, **pnew;
  93. const gchar *string, *flags_str = NULL;
  94. GError *err = NULL;
  95. string = luaL_checkstring (L, 1);
  96. if (lua_gettop (L) == 2) {
  97. flags_str = luaL_checkstring (L, 2);
  98. }
  99. if (string) {
  100. re = rspamd_regexp_new (string, flags_str, &err);
  101. if (re == NULL) {
  102. lua_pushnil (L);
  103. msg_info ("cannot parse regexp: %s, error: %s",
  104. string,
  105. err == NULL ? "undefined" : err->message);
  106. g_error_free (err);
  107. } else {
  108. new = g_malloc0 (sizeof (struct rspamd_lua_regexp));
  109. new->re = re;
  110. new->re_pattern = g_strdup (string);
  111. new->module = rspamd_lua_get_module_name (L);
  112. pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
  113. rspamd_lua_setclass (L, "rspamd{regexp}", -1);
  114. *pnew = new;
  115. }
  116. }
  117. else {
  118. return luaL_error (L, "invalid arguments");
  119. }
  120. return 1;
  121. }
  122. /***
  123. * @function rspamd_regexp.import_glob(glob_pattern[, flags])
  124. * Creates new rspamd_regexp from glob
  125. * @param {string} pattern pattern to build regexp.
  126. * @param {string} flags optional flags to create regular expression
  127. * @return {regexp} regexp argument that is *not* automatically destroyed
  128. * @example
  129. * local regexp = require "rspamd_regexp"
  130. *
  131. * local re = regexp.import_glob('ab*', 'i')
  132. */
  133. static int
  134. lua_regexp_import_glob (lua_State *L)
  135. {
  136. LUA_TRACE_POINT;
  137. rspamd_regexp_t *re;
  138. struct rspamd_lua_regexp *new, **pnew;
  139. const gchar *string, *flags_str = NULL;
  140. gchar *escaped;
  141. gsize pat_len;
  142. GError *err = NULL;
  143. string = luaL_checklstring (L, 1, &pat_len);
  144. if (lua_gettop (L) == 2) {
  145. flags_str = luaL_checkstring (L, 2);
  146. }
  147. if (string) {
  148. escaped = rspamd_str_regexp_escape (string, pat_len, NULL,
  149. RSPAMD_REGEXP_ESCAPE_GLOB|RSPAMD_REGEXP_ESCAPE_UTF);
  150. re = rspamd_regexp_new (escaped, flags_str, &err);
  151. if (re == NULL) {
  152. lua_pushnil (L);
  153. msg_info ("cannot parse regexp: %s, error: %s",
  154. string,
  155. err == NULL ? "undefined" : err->message);
  156. g_error_free (err);
  157. g_free (escaped);
  158. }
  159. else {
  160. new = g_malloc0 (sizeof (struct rspamd_lua_regexp));
  161. new->re = re;
  162. new->re_pattern = escaped;
  163. new->module = rspamd_lua_get_module_name (L);
  164. pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
  165. rspamd_lua_setclass (L, "rspamd{regexp}", -1);
  166. *pnew = new;
  167. }
  168. }
  169. else {
  170. return luaL_error (L, "invalid arguments");
  171. }
  172. return 1;
  173. }
  174. /***
  175. * @function rspamd_regexp.import_plain(plain_string[, flags])
  176. * Creates new rspamd_regexp from plain string (escaping specials)
  177. * @param {string} pattern pattern to build regexp.
  178. * @param {string} flags optional flags to create regular expression
  179. * @return {regexp} regexp argument that is *not* automatically destroyed
  180. * @example
  181. * local regexp = require "rspamd_regexp"
  182. *
  183. * local re = regexp.import_plain('exact_string_with*', 'i')
  184. */
  185. static int
  186. lua_regexp_import_plain (lua_State *L)
  187. {
  188. LUA_TRACE_POINT;
  189. rspamd_regexp_t *re;
  190. struct rspamd_lua_regexp *new, **pnew;
  191. const gchar *string, *flags_str = NULL;
  192. gchar *escaped;
  193. gsize pat_len;
  194. GError *err = NULL;
  195. string = luaL_checklstring (L, 1, &pat_len);
  196. if (lua_gettop (L) == 2) {
  197. flags_str = luaL_checkstring (L, 2);
  198. }
  199. if (string) {
  200. escaped = rspamd_str_regexp_escape (string, pat_len, NULL,
  201. RSPAMD_REGEXP_ESCAPE_ASCII);
  202. re = rspamd_regexp_new (escaped, flags_str, &err);
  203. if (re == NULL) {
  204. lua_pushnil (L);
  205. msg_info ("cannot parse regexp: %s, error: %s",
  206. string,
  207. err == NULL ? "undefined" : err->message);
  208. g_error_free (err);
  209. g_free (escaped);
  210. }
  211. else {
  212. new = g_malloc0 (sizeof (struct rspamd_lua_regexp));
  213. new->re = re;
  214. new->re_pattern = escaped;
  215. new->module = rspamd_lua_get_module_name (L);
  216. pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
  217. rspamd_lua_setclass (L, "rspamd{regexp}", -1);
  218. *pnew = new;
  219. }
  220. }
  221. else {
  222. return luaL_error (L, "invalid arguments");
  223. }
  224. return 1;
  225. }
  226. /***
  227. * @function rspamd_regexp.get_cached(pattern)
  228. * This function gets cached and pre-compiled regexp created by either `create`
  229. * or `create_cached` methods. If no cached regexp is found then `nil` is returned.
  230. *
  231. * @param {string} pattern regexp pattern
  232. * @return {regexp} cached regexp structure or `nil`
  233. */
  234. static int
  235. lua_regexp_get_cached (lua_State *L)
  236. {
  237. LUA_TRACE_POINT;
  238. rspamd_regexp_t *re;
  239. struct rspamd_lua_regexp *new, **pnew;
  240. const gchar *string, *flags_str = NULL;
  241. string = luaL_checkstring (L, 1);
  242. if (lua_gettop (L) == 2) {
  243. flags_str = luaL_checkstring (L, 2);
  244. }
  245. if (string) {
  246. re = rspamd_regexp_cache_query (NULL, string, flags_str);
  247. if (re) {
  248. new = g_malloc0 (sizeof (struct rspamd_lua_regexp));
  249. new->re = rspamd_regexp_ref (re);
  250. new->re_pattern = g_strdup (string);
  251. new->module = rspamd_lua_get_module_name (L);
  252. pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
  253. rspamd_lua_setclass (L, "rspamd{regexp}", -1);
  254. *pnew = new;
  255. }
  256. else {
  257. lua_pushnil (L);
  258. }
  259. }
  260. else {
  261. return luaL_error (L, "invalid arguments");
  262. }
  263. return 1;
  264. }
  265. /***
  266. * @function rspamd_regexp.create_cached(pattern[, flags])
  267. * This function is similar to `create` but it tries to search for regexp in the
  268. * cache first.
  269. * @param {string} pattern pattern to build regexp. If this pattern is enclosed in `//` then it is possible to specify flags after it
  270. * @param {string} flags optional flags to create regular expression
  271. * @return {regexp} regexp argument that is *not* automatically destroyed
  272. * @example
  273. * local regexp = require "rspamd_regexp"
  274. *
  275. * local re = regexp.create_cached('/^test.*[0-9]\\s*$/i')
  276. * ...
  277. * -- This doesn't create new regexp object
  278. * local other_re = regexp.create_cached('/^test.*[0-9]\\s*$/i')
  279. */
  280. static int
  281. lua_regexp_create_cached (lua_State *L)
  282. {
  283. LUA_TRACE_POINT;
  284. rspamd_regexp_t *re;
  285. struct rspamd_lua_regexp *new, **pnew;
  286. const gchar *string, *flags_str = NULL;
  287. GError *err = NULL;
  288. string = luaL_checkstring (L, 1);
  289. if (lua_gettop (L) == 2) {
  290. flags_str = luaL_checkstring (L, 2);
  291. }
  292. if (string) {
  293. re = rspamd_regexp_cache_query (NULL, string, flags_str);
  294. if (re) {
  295. new = g_malloc0 (sizeof (struct rspamd_lua_regexp));
  296. new->re = rspamd_regexp_ref (re);
  297. new->re_pattern = g_strdup (string);
  298. new->module = rspamd_lua_get_module_name (L);
  299. pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
  300. rspamd_lua_setclass (L, "rspamd{regexp}", -1);
  301. *pnew = new;
  302. }
  303. else {
  304. re = rspamd_regexp_cache_create (NULL, string, flags_str, &err);
  305. if (re == NULL) {
  306. lua_pushnil (L);
  307. msg_info ("cannot parse regexp: %s, error: %s",
  308. string,
  309. err == NULL ? "undefined" : err->message);
  310. g_error_free (err);
  311. } else {
  312. new = g_malloc0 (sizeof (struct rspamd_lua_regexp));
  313. new->re = rspamd_regexp_ref (re);
  314. new->re_pattern = g_strdup (string);
  315. new->module = rspamd_lua_get_module_name (L);
  316. pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
  317. rspamd_lua_setclass (L, "rspamd{regexp}", -1);
  318. *pnew = new;
  319. }
  320. }
  321. }
  322. else {
  323. return luaL_error (L, "invalid arguments");
  324. }
  325. return 1;
  326. }
  327. /***
  328. * @method re:get_pattern()
  329. * Get a pattern for specified regexp object
  330. * @return {string} pattern line
  331. */
  332. static int
  333. lua_regexp_get_pattern (lua_State *L)
  334. {
  335. LUA_TRACE_POINT;
  336. struct rspamd_lua_regexp *re = lua_check_regexp (L, 1);
  337. if (re && re->re && !IS_DESTROYED (re)) {
  338. lua_pushstring (L, rspamd_regexp_get_pattern (re->re));
  339. }
  340. else {
  341. lua_pushnil (L);
  342. }
  343. return 1;
  344. }
  345. /***
  346. * @method re:set_limit(lim)
  347. * Set maximum size of text length to be matched with this regexp (if `lim` is
  348. * less or equal to zero then all texts are checked)
  349. * @param {number} lim limit in bytes
  350. */
  351. static int
  352. lua_regexp_set_limit (lua_State *L)
  353. {
  354. LUA_TRACE_POINT;
  355. struct rspamd_lua_regexp *re = lua_check_regexp (L, 1);
  356. gint64 lim;
  357. lim = lua_tointeger (L, 2);
  358. if (re && re->re && !IS_DESTROYED (re)) {
  359. if (lim > 0) {
  360. re->match_limit = lim;
  361. }
  362. else {
  363. re->match_limit = 0;
  364. }
  365. }
  366. return 0;
  367. }
  368. /***
  369. * @method re:set_max_hits(lim)
  370. * Set maximum number of hits returned by a regexp
  371. * @param {number} lim limit in hits count
  372. * @return {number} old number of max hits
  373. */
  374. static int
  375. lua_regexp_set_max_hits (lua_State *L)
  376. {
  377. LUA_TRACE_POINT;
  378. struct rspamd_lua_regexp *re = lua_check_regexp (L, 1);
  379. guint lim;
  380. lim = luaL_checkinteger (L, 2);
  381. if (re && re->re && !IS_DESTROYED (re)) {
  382. lua_pushinteger (L, rspamd_regexp_set_maxhits (re->re, lim));
  383. }
  384. else {
  385. lua_pushnil (L);
  386. }
  387. return 1;
  388. }
  389. /***
  390. * @method re:get_max_hits(lim)
  391. * Get maximum number of hits returned by a regexp
  392. * @return {number} number of max hits
  393. */
  394. static int
  395. lua_regexp_get_max_hits (lua_State *L)
  396. {
  397. LUA_TRACE_POINT;
  398. struct rspamd_lua_regexp *re = lua_check_regexp (L, 1);
  399. if (re && re->re && !IS_DESTROYED (re)) {
  400. lua_pushinteger (L, rspamd_regexp_get_maxhits (re->re));
  401. }
  402. else {
  403. lua_pushinteger (L, 1);
  404. }
  405. return 1;
  406. }
  407. /***
  408. * @method re:search(line[, raw[, capture]])
  409. * Search line in regular expression object. If line matches then this
  410. * function returns the table of captured strings. Otherwise, nil is returned.
  411. * If `raw` is specified, then input is treated as raw data not encoded in `utf-8`.
  412. * If `capture` is true, then this function saves all captures to the table of
  413. * values, so the first element is the whole matched string and the
  414. * subsequent elements are ordered captures defined within pattern.
  415. *
  416. * @param {string} line match the specified line against regexp object
  417. * @param {bool} match raw regexp instead of utf8 one
  418. * @param {bool} capture perform subpatterns capturing
  419. * @return {table or nil} table of strings or tables (if `capture` is true) or nil if not matched
  420. * @example
  421. * local re = regexp.create_cached('/^\s*([0-9]+)\s*$/')
  422. * -- returns nil
  423. * local m1 = re:search('blah')
  424. * local m2 = re:search(' 190 ')
  425. * -- prints ' 190 '
  426. * print(m2[1])
  427. *
  428. * local m3 = re:search(' 100500 ')
  429. * -- prints ' 100500 '
  430. * print(m3[1][1])
  431. * -- prints '100500' capture
  432. * print(m3[1][2])
  433. */
  434. static int
  435. lua_regexp_search (lua_State *L)
  436. {
  437. LUA_TRACE_POINT;
  438. struct rspamd_lua_regexp *re = lua_check_regexp (L, 1);
  439. const gchar *data = NULL;
  440. struct rspamd_lua_text *t;
  441. const gchar *start = NULL, *end = NULL;
  442. gint i;
  443. gsize len = 0, capn;
  444. gboolean matched = FALSE, capture = FALSE, raw = FALSE;
  445. GArray *captures = NULL;
  446. struct rspamd_re_capture *cap;
  447. if (re && !IS_DESTROYED (re)) {
  448. if (lua_type (L, 2) == LUA_TSTRING) {
  449. data = luaL_checklstring (L, 2, &len);
  450. }
  451. else if (lua_type (L, 2) == LUA_TUSERDATA) {
  452. t = lua_check_text (L, 2);
  453. if (t != NULL) {
  454. data = t->start;
  455. len = t->len;
  456. }
  457. }
  458. if (lua_gettop (L) >= 3) {
  459. raw = lua_toboolean (L, 3);
  460. }
  461. if (data && len > 0) {
  462. if (lua_gettop (L) >= 4 && lua_toboolean (L, 4)) {
  463. capture = TRUE;
  464. captures = g_array_new (FALSE, TRUE,
  465. sizeof (struct rspamd_re_capture));
  466. }
  467. lua_newtable (L);
  468. i = 0;
  469. if (re->match_limit > 0) {
  470. len = MIN (len, re->match_limit);
  471. }
  472. while (rspamd_regexp_search (re->re, data, len, &start, &end, raw,
  473. captures)) {
  474. if (capture) {
  475. lua_createtable (L, captures->len, 0);
  476. for (capn = 0; capn < captures->len; capn ++) {
  477. cap = &g_array_index (captures, struct rspamd_re_capture,
  478. capn);
  479. lua_pushlstring (L, cap->p, cap->len);
  480. lua_rawseti (L, -2, capn + 1);
  481. }
  482. lua_rawseti (L, -2, ++i);
  483. }
  484. else {
  485. lua_pushlstring (L, start, end - start);
  486. lua_rawseti (L, -2, ++i);
  487. }
  488. matched = TRUE;
  489. }
  490. if (!matched) {
  491. lua_pop (L, 1);
  492. lua_pushnil (L);
  493. }
  494. if (capture) {
  495. g_array_free (captures, TRUE);
  496. }
  497. }
  498. else {
  499. lua_pushnil (L);
  500. }
  501. }
  502. else {
  503. return luaL_error (L, "invalid arguments");
  504. }
  505. return 1;
  506. }
  507. /***
  508. * @method re:match(line[, raw_match])
  509. * Matches line against the regular expression and return true if line matches
  510. * (partially or completely)
  511. *
  512. * @param {string} line match the specified line against regexp object
  513. * @param {bool} match raw regexp instead of utf8 one
  514. * @return {bool} true if `line` matches
  515. */
  516. static int
  517. lua_regexp_match (lua_State *L)
  518. {
  519. LUA_TRACE_POINT;
  520. struct rspamd_lua_regexp *re = lua_check_regexp (L, 1);
  521. struct rspamd_lua_text *t;
  522. const gchar *data = NULL;
  523. gsize len = 0;
  524. gboolean raw = FALSE;
  525. if (re && !IS_DESTROYED (re)) {
  526. if (lua_type (L, 2) == LUA_TSTRING) {
  527. data = luaL_checklstring (L, 2, &len);
  528. }
  529. else if (lua_type (L, 2) == LUA_TUSERDATA) {
  530. t = lua_check_text (L, 2);
  531. if (t != NULL) {
  532. data = t->start;
  533. len = t->len;
  534. }
  535. }
  536. if (lua_gettop (L) == 3) {
  537. raw = lua_toboolean (L, 3);
  538. }
  539. if (data && len > 0) {
  540. if (re->match_limit > 0) {
  541. len = MIN (len, re->match_limit);
  542. }
  543. if (rspamd_regexp_search (re->re, data, len, NULL, NULL, raw, NULL)) {
  544. lua_pushboolean (L, TRUE);
  545. }
  546. else {
  547. lua_pushboolean (L, FALSE);
  548. }
  549. }
  550. else {
  551. lua_pushboolean (L, FALSE);
  552. }
  553. }
  554. else {
  555. return luaL_error (L, "invalid arguments");
  556. }
  557. return 1;
  558. }
  559. /***
  560. * @method re:matchn(line, max_matches, [, raw_match])
  561. * Matches line against the regular expression and return number of matches if line matches
  562. * (partially or completely). This process stop when `max_matches` is reached.
  563. * If `max_matches` is zero, then only a single match is counted which is equal to
  564. * @see re:match If `max_matches` is negative, then all matches are considered.
  565. *
  566. * @param {string} line match the specified line against regexp object
  567. * @param {number} max_matches maximum number of matches
  568. * @param {bool} match raw regexp instead of utf8 one
  569. * @return {number} number of matches found in the `line` argument
  570. */
  571. static int
  572. lua_regexp_matchn (lua_State *L)
  573. {
  574. LUA_TRACE_POINT;
  575. struct rspamd_lua_regexp *re = lua_check_regexp (L, 1);
  576. struct rspamd_lua_text *t;
  577. const gchar *data = NULL, *start = NULL, *end = NULL;
  578. gint max_matches, matches;
  579. gsize len = 0;
  580. gboolean raw = FALSE;
  581. if (re && !IS_DESTROYED (re)) {
  582. if (lua_type (L, 2) == LUA_TSTRING) {
  583. data = luaL_checklstring (L, 2, &len);
  584. }
  585. else if (lua_type (L, 2) == LUA_TUSERDATA) {
  586. t = lua_check_text (L, 2);
  587. if (t != NULL) {
  588. data = t->start;
  589. len = t->len;
  590. }
  591. }
  592. max_matches = lua_tointeger (L, 3);
  593. matches = 0;
  594. if (lua_gettop (L) == 4) {
  595. raw = lua_toboolean (L, 4);
  596. }
  597. if (data && len > 0) {
  598. if (re->match_limit > 0) {
  599. len = MIN (len, re->match_limit);
  600. }
  601. for (;;) {
  602. if (rspamd_regexp_search (re->re, data, len, &start, &end, raw,
  603. NULL)) {
  604. matches ++;
  605. }
  606. else {
  607. break;
  608. }
  609. if (max_matches >= 0 && matches >= max_matches) {
  610. break;
  611. }
  612. }
  613. }
  614. lua_pushinteger (L, matches);
  615. }
  616. else {
  617. return luaL_error (L, "invalid arguments");
  618. }
  619. return 1;
  620. }
  621. /***
  622. * @method re:split(line)
  623. * Split line using the specified regular expression.
  624. * Breaks the string on the pattern, and returns an array of the tokens.
  625. * If the pattern contains capturing parentheses, then the text for each
  626. * of the substrings will also be returned. If the pattern does not match
  627. * anywhere in the string, then the whole string is returned as the first
  628. * token.
  629. * @param {string/text} line line to split
  630. * @return {table} table of split line portions (if text was the input, then text is used for return parts)
  631. */
  632. static int
  633. lua_regexp_split (lua_State *L)
  634. {
  635. LUA_TRACE_POINT;
  636. struct rspamd_lua_regexp *re = lua_check_regexp (L, 1);
  637. const gchar *data = NULL;
  638. struct rspamd_lua_text *t;
  639. gboolean matched = FALSE, is_text = FALSE;
  640. gsize len = 0;
  641. const gchar *start = NULL, *end = NULL, *old_start;
  642. gint i;
  643. if (re && !IS_DESTROYED (re)) {
  644. if (lua_type (L, 2) == LUA_TSTRING) {
  645. data = luaL_checklstring (L, 2, &len);
  646. }
  647. else if (lua_type (L, 2) == LUA_TUSERDATA) {
  648. t = lua_check_text (L, 2);
  649. if (t == NULL) {
  650. lua_error (L);
  651. return 0;
  652. }
  653. data = t->start;
  654. len = t->len;
  655. is_text = TRUE;
  656. }
  657. if (re->match_limit > 0) {
  658. len = MIN (len, re->match_limit);
  659. }
  660. if (data && len > 0) {
  661. lua_newtable (L);
  662. i = 0;
  663. old_start = data;
  664. while (rspamd_regexp_search (re->re, data, len, &start, &end, FALSE,
  665. NULL)) {
  666. if (start - old_start > 0) {
  667. if (!is_text) {
  668. lua_pushlstring (L, old_start, start - old_start);
  669. }
  670. else {
  671. t = lua_newuserdata (L, sizeof (*t));
  672. rspamd_lua_setclass (L, "rspamd{text}", -1);
  673. t->start = old_start;
  674. t->len = start - old_start;
  675. t->flags = 0;
  676. }
  677. lua_rawseti (L, -2, ++i);
  678. matched = TRUE;
  679. }
  680. else if (start == end) {
  681. break;
  682. }
  683. old_start = end;
  684. }
  685. if (len > 0 && (end == NULL || end < data + len)) {
  686. if (end == NULL) {
  687. end = data;
  688. }
  689. if (!is_text) {
  690. lua_pushlstring (L, end, (data + len) - end);
  691. }
  692. else {
  693. t = lua_newuserdata (L, sizeof (*t));
  694. rspamd_lua_setclass (L, "rspamd{text}", -1);
  695. t->start = end;
  696. t->len = (data + len) - end;
  697. t->flags = 0;
  698. }
  699. lua_rawseti (L, -2, ++i);
  700. matched = TRUE;
  701. }
  702. if (!matched) {
  703. lua_pop (L, 1);
  704. lua_pushnil (L);
  705. }
  706. return 1;
  707. }
  708. }
  709. else {
  710. return luaL_error (L, "invalid arguments");
  711. }
  712. lua_pushnil (L);
  713. return 1;
  714. }
  715. /***
  716. * @method re:destroy()
  717. * Destroy regexp from caches if needed (the pointer is removed by garbadge collector)
  718. */
  719. static gint
  720. lua_regexp_destroy (lua_State *L)
  721. {
  722. LUA_TRACE_POINT;
  723. struct rspamd_lua_regexp *to_del = lua_check_regexp (L, 1);
  724. if (to_del) {
  725. rspamd_regexp_cache_remove (NULL, to_del->re);
  726. rspamd_regexp_unref (to_del->re);
  727. to_del->re = NULL;
  728. to_del->re_flags |= LUA_REGEXP_FLAG_DESTROYED;
  729. }
  730. return 0;
  731. }
  732. static gint
  733. lua_regexp_gc (lua_State *L)
  734. {
  735. LUA_TRACE_POINT;
  736. struct rspamd_lua_regexp *to_del = lua_check_regexp (L, 1);
  737. if (to_del) {
  738. if (!IS_DESTROYED (to_del)) {
  739. rspamd_regexp_unref (to_del->re);
  740. }
  741. g_free (to_del->re_pattern);
  742. g_free (to_del->module);
  743. g_free (to_del);
  744. }
  745. return 0;
  746. }
  747. static gint
  748. lua_load_regexp (lua_State * L)
  749. {
  750. lua_newtable (L);
  751. luaL_register (L, NULL, regexplib_f);
  752. return 1;
  753. }
  754. void
  755. luaopen_regexp (lua_State * L)
  756. {
  757. if (!regexp_static_pool) {
  758. regexp_static_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  759. "regexp_lua_pool", 0);
  760. }
  761. rspamd_lua_new_class (L, "rspamd{regexp}", regexplib_m);
  762. lua_pop (L, 1);
  763. rspamd_lua_add_preload (L, "rspamd_regexp", lua_load_regexp);
  764. }
  765. RSPAMD_DESTRUCTOR (lua_re_static_pool_dtor) {
  766. if (regexp_static_pool) {
  767. rspamd_mempool_delete (regexp_static_pool);
  768. }
  769. }