You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_regexp.c 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "lua_common.h"
  17. /***
  18. * @module rspamd_regexp
  19. * Rspamd regexp is an utility module that handles rspamd perl compatible
  20. * regular expressions
  21. * @example
  22. * local rspamd_regexp = require "rspamd_regexp"
  23. *
  24. * local re = rspamd_regexp.create_cached('/^\\s*some_string\\s*$/i')
  25. * re:match('some_string')
  26. * local re = rspamd_regexp.create_cached('/\\s+/i')
  27. * re:split('word word word') -- returns ['word', 'word', 'word']
  28. */
  29. LUA_FUNCTION_DEF (regexp, create);
  30. LUA_FUNCTION_DEF (regexp, import_glob);
  31. LUA_FUNCTION_DEF (regexp, import_plain);
  32. LUA_FUNCTION_DEF (regexp, create_cached);
  33. LUA_FUNCTION_DEF (regexp, get_cached);
  34. LUA_FUNCTION_DEF (regexp, get_pattern);
  35. LUA_FUNCTION_DEF (regexp, set_limit);
  36. LUA_FUNCTION_DEF (regexp, set_max_hits);
  37. LUA_FUNCTION_DEF (regexp, get_max_hits);
  38. LUA_FUNCTION_DEF (regexp, search);
  39. LUA_FUNCTION_DEF (regexp, match);
  40. LUA_FUNCTION_DEF (regexp, matchn);
  41. LUA_FUNCTION_DEF (regexp, split);
  42. LUA_FUNCTION_DEF (regexp, destroy);
  43. LUA_FUNCTION_DEF (regexp, gc);
  44. static const struct luaL_reg regexplib_m[] = {
  45. LUA_INTERFACE_DEF (regexp, get_pattern),
  46. LUA_INTERFACE_DEF (regexp, set_limit),
  47. LUA_INTERFACE_DEF (regexp, set_max_hits),
  48. LUA_INTERFACE_DEF (regexp, get_max_hits),
  49. LUA_INTERFACE_DEF (regexp, match),
  50. LUA_INTERFACE_DEF (regexp, matchn),
  51. LUA_INTERFACE_DEF (regexp, search),
  52. LUA_INTERFACE_DEF (regexp, split),
  53. LUA_INTERFACE_DEF (regexp, destroy),
  54. {"__tostring", lua_regexp_get_pattern},
  55. {"__gc", lua_regexp_gc},
  56. {NULL, NULL}
  57. };
  58. static const struct luaL_reg regexplib_f[] = {
  59. LUA_INTERFACE_DEF (regexp, create),
  60. LUA_INTERFACE_DEF (regexp, import_glob),
  61. LUA_INTERFACE_DEF (regexp, import_plain),
  62. LUA_INTERFACE_DEF (regexp, get_cached),
  63. LUA_INTERFACE_DEF (regexp, create_cached),
  64. {NULL, NULL}
  65. };
  66. #define LUA_REGEXP_FLAG_DESTROYED (1 << 0)
  67. #define IS_DESTROYED(re) ((re)->re_flags & LUA_REGEXP_FLAG_DESTROYED)
  68. rspamd_mempool_t *regexp_static_pool = NULL;
  69. static struct rspamd_lua_regexp *
  70. lua_check_regexp (lua_State * L)
  71. {
  72. void *ud = rspamd_lua_check_udata (L, 1, "rspamd{regexp}");
  73. luaL_argcheck (L, ud != NULL, 1, "'regexp' expected");
  74. return ud ? *((struct rspamd_lua_regexp **)ud) : NULL;
  75. }
  76. static gchar *
  77. rspamd_lua_get_module_name (lua_State *L)
  78. {
  79. lua_Debug d;
  80. gchar *p;
  81. gchar func_buf[128];
  82. if (lua_getstack (L, 1, &d) == 1) {
  83. (void) lua_getinfo (L, "Sl", &d);
  84. if ((p = strrchr (d.short_src, '/')) == NULL) {
  85. p = d.short_src;
  86. }
  87. else {
  88. p++;
  89. }
  90. if (strlen (p) > 20) {
  91. rspamd_snprintf (func_buf, sizeof (func_buf), "%10s...]:%d", p,
  92. d.currentline);
  93. }
  94. else {
  95. rspamd_snprintf (func_buf, sizeof (func_buf), "%s:%d", p,
  96. d.currentline);
  97. }
  98. return g_strdup (func_buf);
  99. }
  100. return NULL;
  101. }
  102. /***
  103. * @function rspamd_regexp.create(pattern[, flags])
  104. * Creates new rspamd_regexp
  105. * @param {string} pattern pattern to build regexp. If this pattern is enclosed in `//` then it is possible to specify flags after it
  106. * @param {string} flags optional flags to create regular expression
  107. * @return {regexp} regexp argument that is *not* automatically destroyed
  108. * @example
  109. * local regexp = require "rspamd_regexp"
  110. *
  111. * local re = regexp.create('/^test.*[0-9]\\s*$/i')
  112. */
  113. static int
  114. lua_regexp_create (lua_State *L)
  115. {
  116. LUA_TRACE_POINT;
  117. rspamd_regexp_t *re;
  118. struct rspamd_lua_regexp *new, **pnew;
  119. const gchar *string, *flags_str = NULL;
  120. GError *err = NULL;
  121. string = luaL_checkstring (L, 1);
  122. if (lua_gettop (L) == 2) {
  123. flags_str = luaL_checkstring (L, 2);
  124. }
  125. if (string) {
  126. re = rspamd_regexp_new (string, flags_str, &err);
  127. if (re == NULL) {
  128. lua_pushnil (L);
  129. msg_info ("cannot parse regexp: %s, error: %s",
  130. string,
  131. err == NULL ? "undefined" : err->message);
  132. g_error_free (err);
  133. } else {
  134. new = g_malloc0 (sizeof (struct rspamd_lua_regexp));
  135. new->re = re;
  136. new->re_pattern = g_strdup (string);
  137. new->module = rspamd_lua_get_module_name (L);
  138. pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
  139. rspamd_lua_setclass (L, "rspamd{regexp}", -1);
  140. *pnew = new;
  141. }
  142. }
  143. else {
  144. return luaL_error (L, "invalid arguments");
  145. }
  146. return 1;
  147. }
  148. /***
  149. * @function rspamd_regexp.import_glob(glob_pattern[, flags])
  150. * Creates new rspamd_regexp from glob
  151. * @param {string} pattern pattern to build regexp.
  152. * @param {string} flags optional flags to create regular expression
  153. * @return {regexp} regexp argument that is *not* automatically destroyed
  154. * @example
  155. * local regexp = require "rspamd_regexp"
  156. *
  157. * local re = regexp.import_glob('ab*', 'i')
  158. */
  159. static int
  160. lua_regexp_import_glob (lua_State *L)
  161. {
  162. LUA_TRACE_POINT;
  163. rspamd_regexp_t *re;
  164. struct rspamd_lua_regexp *new, **pnew;
  165. const gchar *string, *flags_str = NULL;
  166. gchar *escaped;
  167. gsize pat_len;
  168. GError *err = NULL;
  169. string = luaL_checklstring (L, 1, &pat_len);
  170. if (lua_gettop (L) == 2) {
  171. flags_str = luaL_checkstring (L, 2);
  172. }
  173. if (string) {
  174. escaped = rspamd_str_regexp_escape (string, pat_len, NULL,
  175. RSPAMD_REGEXP_ESCAPE_GLOB|RSPAMD_REGEXP_ESCAPE_UTF);
  176. re = rspamd_regexp_new (escaped, flags_str, &err);
  177. if (re == NULL) {
  178. lua_pushnil (L);
  179. msg_info ("cannot parse regexp: %s, error: %s",
  180. string,
  181. err == NULL ? "undefined" : err->message);
  182. g_error_free (err);
  183. g_free (escaped);
  184. }
  185. else {
  186. new = g_malloc0 (sizeof (struct rspamd_lua_regexp));
  187. new->re = re;
  188. new->re_pattern = escaped;
  189. new->module = rspamd_lua_get_module_name (L);
  190. pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
  191. rspamd_lua_setclass (L, "rspamd{regexp}", -1);
  192. *pnew = new;
  193. }
  194. }
  195. else {
  196. return luaL_error (L, "invalid arguments");
  197. }
  198. return 1;
  199. }
  200. /***
  201. * @function rspamd_regexp.import_plain(plain_string[, flags])
  202. * Creates new rspamd_regexp from plain string (escaping specials)
  203. * @param {string} pattern pattern to build regexp.
  204. * @param {string} flags optional flags to create regular expression
  205. * @return {regexp} regexp argument that is *not* automatically destroyed
  206. * @example
  207. * local regexp = require "rspamd_regexp"
  208. *
  209. * local re = regexp.import_plain('exact_string_with*', 'i')
  210. */
  211. static int
  212. lua_regexp_import_plain (lua_State *L)
  213. {
  214. LUA_TRACE_POINT;
  215. rspamd_regexp_t *re;
  216. struct rspamd_lua_regexp *new, **pnew;
  217. const gchar *string, *flags_str = NULL;
  218. gchar *escaped;
  219. gsize pat_len;
  220. GError *err = NULL;
  221. string = luaL_checklstring (L, 1, &pat_len);
  222. if (lua_gettop (L) == 2) {
  223. flags_str = luaL_checkstring (L, 2);
  224. }
  225. if (string) {
  226. escaped = rspamd_str_regexp_escape (string, pat_len, NULL,
  227. RSPAMD_REGEXP_ESCAPE_ASCII);
  228. re = rspamd_regexp_new (escaped, flags_str, &err);
  229. if (re == NULL) {
  230. lua_pushnil (L);
  231. msg_info ("cannot parse regexp: %s, error: %s",
  232. string,
  233. err == NULL ? "undefined" : err->message);
  234. g_error_free (err);
  235. g_free (escaped);
  236. }
  237. else {
  238. new = g_malloc0 (sizeof (struct rspamd_lua_regexp));
  239. new->re = re;
  240. new->re_pattern = escaped;
  241. new->module = rspamd_lua_get_module_name (L);
  242. pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
  243. rspamd_lua_setclass (L, "rspamd{regexp}", -1);
  244. *pnew = new;
  245. }
  246. }
  247. else {
  248. return luaL_error (L, "invalid arguments");
  249. }
  250. return 1;
  251. }
  252. /***
  253. * @function rspamd_regexp.get_cached(pattern)
  254. * This function gets cached and pre-compiled regexp created by either `create`
  255. * or `create_cached` methods. If no cached regexp is found then `nil` is returned.
  256. *
  257. * @param {string} pattern regexp pattern
  258. * @return {regexp} cached regexp structure or `nil`
  259. */
  260. static int
  261. lua_regexp_get_cached (lua_State *L)
  262. {
  263. LUA_TRACE_POINT;
  264. rspamd_regexp_t *re;
  265. struct rspamd_lua_regexp *new, **pnew;
  266. const gchar *string, *flags_str = NULL;
  267. string = luaL_checkstring (L, 1);
  268. if (lua_gettop (L) == 2) {
  269. flags_str = luaL_checkstring (L, 2);
  270. }
  271. if (string) {
  272. re = rspamd_regexp_cache_query (NULL, string, flags_str);
  273. if (re) {
  274. new = g_malloc0 (sizeof (struct rspamd_lua_regexp));
  275. new->re = rspamd_regexp_ref (re);
  276. new->re_pattern = g_strdup (string);
  277. new->module = rspamd_lua_get_module_name (L);
  278. pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
  279. rspamd_lua_setclass (L, "rspamd{regexp}", -1);
  280. *pnew = new;
  281. }
  282. else {
  283. lua_pushnil (L);
  284. }
  285. }
  286. else {
  287. return luaL_error (L, "invalid arguments");
  288. }
  289. return 1;
  290. }
  291. /***
  292. * @function rspamd_regexp.create_cached(pattern[, flags])
  293. * This function is similar to `create` but it tries to search for regexp in the
  294. * cache first.
  295. * @param {string} pattern pattern to build regexp. If this pattern is enclosed in `//` then it is possible to specify flags after it
  296. * @param {string} flags optional flags to create regular expression
  297. * @return {regexp} regexp argument that is *not* automatically destroyed
  298. * @example
  299. * local regexp = require "rspamd_regexp"
  300. *
  301. * local re = regexp.create_cached('/^test.*[0-9]\\s*$/i')
  302. * ...
  303. * -- This doesn't create new regexp object
  304. * local other_re = regexp.create_cached('/^test.*[0-9]\\s*$/i')
  305. */
  306. static int
  307. lua_regexp_create_cached (lua_State *L)
  308. {
  309. LUA_TRACE_POINT;
  310. rspamd_regexp_t *re;
  311. struct rspamd_lua_regexp *new, **pnew;
  312. const gchar *string, *flags_str = NULL;
  313. GError *err = NULL;
  314. string = luaL_checkstring (L, 1);
  315. if (lua_gettop (L) == 2) {
  316. flags_str = luaL_checkstring (L, 2);
  317. }
  318. if (string) {
  319. re = rspamd_regexp_cache_query (NULL, string, flags_str);
  320. if (re) {
  321. new = g_malloc0 (sizeof (struct rspamd_lua_regexp));
  322. new->re = rspamd_regexp_ref (re);
  323. new->re_pattern = g_strdup (string);
  324. new->module = rspamd_lua_get_module_name (L);
  325. pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
  326. rspamd_lua_setclass (L, "rspamd{regexp}", -1);
  327. *pnew = new;
  328. }
  329. else {
  330. re = rspamd_regexp_cache_create (NULL, string, flags_str, &err);
  331. if (re == NULL) {
  332. lua_pushnil (L);
  333. msg_info ("cannot parse regexp: %s, error: %s",
  334. string,
  335. err == NULL ? "undefined" : err->message);
  336. g_error_free (err);
  337. } else {
  338. new = g_malloc0 (sizeof (struct rspamd_lua_regexp));
  339. new->re = rspamd_regexp_ref (re);
  340. new->re_pattern = g_strdup (string);
  341. new->module = rspamd_lua_get_module_name (L);
  342. pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
  343. rspamd_lua_setclass (L, "rspamd{regexp}", -1);
  344. *pnew = new;
  345. }
  346. }
  347. }
  348. else {
  349. return luaL_error (L, "invalid arguments");
  350. }
  351. return 1;
  352. }
  353. /***
  354. * @method re:get_pattern()
  355. * Get a pattern for specified regexp object
  356. * @return {string} pattern line
  357. */
  358. static int
  359. lua_regexp_get_pattern (lua_State *L)
  360. {
  361. LUA_TRACE_POINT;
  362. struct rspamd_lua_regexp *re = lua_check_regexp (L);
  363. if (re && re->re && !IS_DESTROYED (re)) {
  364. lua_pushstring (L, rspamd_regexp_get_pattern (re->re));
  365. }
  366. else {
  367. lua_pushnil (L);
  368. }
  369. return 1;
  370. }
  371. /***
  372. * @method re:set_limit(lim)
  373. * Set maximum size of text length to be matched with this regexp (if `lim` is
  374. * less or equal to zero then all texts are checked)
  375. * @param {number} lim limit in bytes
  376. */
  377. static int
  378. lua_regexp_set_limit (lua_State *L)
  379. {
  380. LUA_TRACE_POINT;
  381. struct rspamd_lua_regexp *re = lua_check_regexp (L);
  382. gint64 lim;
  383. lim = luaL_checknumber (L, 2);
  384. if (re && re->re && !IS_DESTROYED (re)) {
  385. if (lim > 0) {
  386. re->match_limit = lim;
  387. }
  388. else {
  389. re->match_limit = 0;
  390. }
  391. }
  392. return 0;
  393. }
  394. /***
  395. * @method re:set_max_hits(lim)
  396. * Set maximum number of hits returned by a regexp
  397. * @param {number} lim limit in hits count
  398. * @return {number} old number of max hits
  399. */
  400. static int
  401. lua_regexp_set_max_hits (lua_State *L)
  402. {
  403. LUA_TRACE_POINT;
  404. struct rspamd_lua_regexp *re = lua_check_regexp (L);
  405. guint lim;
  406. lim = luaL_checkinteger (L, 2);
  407. if (re && re->re && !IS_DESTROYED (re)) {
  408. lua_pushinteger (L, rspamd_regexp_set_maxhits (re->re, lim));
  409. }
  410. else {
  411. lua_pushnil (L);
  412. }
  413. return 1;
  414. }
  415. /***
  416. * @method re:get_max_hits(lim)
  417. * Get maximum number of hits returned by a regexp
  418. * @return {number} number of max hits
  419. */
  420. static int
  421. lua_regexp_get_max_hits (lua_State *L)
  422. {
  423. LUA_TRACE_POINT;
  424. struct rspamd_lua_regexp *re = lua_check_regexp (L);
  425. if (re && re->re && !IS_DESTROYED (re)) {
  426. lua_pushinteger (L, rspamd_regexp_get_maxhits (re->re));
  427. }
  428. else {
  429. lua_pushinteger (L, 1);
  430. }
  431. return 1;
  432. }
  433. /***
  434. * @method re:search(line[, raw[, capture]])
  435. * Search line in regular expression object. If line matches then this
  436. * function returns the table of captured strings. Otherwise, nil is returned.
  437. * If `raw` is specified, then input is treated as raw data not encoded in `utf-8`.
  438. * If `capture` is true, then this function saves all captures to the table of
  439. * values, so the first element is the whole matched string and the
  440. * subsequent elements are ordered captures defined within pattern.
  441. *
  442. * @param {string} line match the specified line against regexp object
  443. * @param {bool} match raw regexp instead of utf8 one
  444. * @param {bool} capture perform subpatterns capturing
  445. * @return {table or nil} table of strings or tables (if `capture` is true) or nil if not matched
  446. * @example
  447. * local re = regexp.create_cached('/^\s*([0-9]+)\s*$/')
  448. * -- returns nil
  449. * local m1 = re:search('blah')
  450. * local m2 = re:search(' 190 ')
  451. * -- prints ' 190 '
  452. * print(m2[1])
  453. *
  454. * local m3 = re:search(' 100500 ')
  455. * -- prints ' 100500 '
  456. * print(m3[1][1])
  457. * -- prints '100500' capture
  458. * print(m3[1][2])
  459. */
  460. static int
  461. lua_regexp_search (lua_State *L)
  462. {
  463. LUA_TRACE_POINT;
  464. struct rspamd_lua_regexp *re = lua_check_regexp (L);
  465. const gchar *data = NULL;
  466. struct rspamd_lua_text *t;
  467. const gchar *start = NULL, *end = NULL;
  468. gint i;
  469. gsize len, capn;
  470. gboolean matched = FALSE, capture = FALSE, raw = FALSE;
  471. GArray *captures = NULL;
  472. struct rspamd_re_capture *cap;
  473. if (re && !IS_DESTROYED (re)) {
  474. if (lua_type (L, 2) == LUA_TSTRING) {
  475. data = luaL_checklstring (L, 2, &len);
  476. }
  477. else if (lua_type (L, 2) == LUA_TUSERDATA) {
  478. t = lua_check_text (L, 2);
  479. if (t != NULL) {
  480. data = t->start;
  481. len = t->len;
  482. }
  483. }
  484. if (lua_gettop (L) >= 3) {
  485. raw = lua_toboolean (L, 3);
  486. }
  487. if (data) {
  488. if (lua_gettop (L) >= 4) {
  489. capture = TRUE;
  490. captures = g_array_new (FALSE, TRUE,
  491. sizeof (struct rspamd_re_capture));
  492. }
  493. lua_newtable (L);
  494. i = 0;
  495. if (re->match_limit > 0) {
  496. len = MIN (len, re->match_limit);
  497. }
  498. while (rspamd_regexp_search (re->re, data, len, &start, &end, raw,
  499. captures)) {
  500. if (capture) {
  501. lua_createtable (L, captures->len, 0);
  502. for (capn = 0; capn < captures->len; capn ++) {
  503. cap = &g_array_index (captures, struct rspamd_re_capture,
  504. capn);
  505. lua_pushlstring (L, cap->p, cap->len);
  506. lua_rawseti (L, -2, capn + 1);
  507. }
  508. lua_rawseti (L, -2, ++i);
  509. }
  510. else {
  511. lua_pushlstring (L, start, end - start);
  512. lua_rawseti (L, -2, ++i);
  513. }
  514. matched = TRUE;
  515. }
  516. if (!matched) {
  517. lua_pop (L, 1);
  518. lua_pushnil (L);
  519. }
  520. if (capture) {
  521. g_array_free (captures, TRUE);
  522. }
  523. return 1;
  524. }
  525. }
  526. lua_pushnil (L);
  527. return 1;
  528. }
  529. /***
  530. * @method re:match(line[, raw_match])
  531. * Matches line against the regular expression and return true if line matches
  532. * (partially or completely)
  533. *
  534. * @param {string} line match the specified line against regexp object
  535. * @param {bool} match raw regexp instead of utf8 one
  536. * @return {bool} true if `line` matches
  537. */
  538. static int
  539. lua_regexp_match (lua_State *L)
  540. {
  541. LUA_TRACE_POINT;
  542. struct rspamd_lua_regexp *re = lua_check_regexp (L);
  543. struct rspamd_lua_text *t;
  544. const gchar *data = NULL;
  545. gsize len = 0;
  546. gboolean raw = FALSE;
  547. if (re && !IS_DESTROYED (re)) {
  548. if (lua_type (L, 2) == LUA_TSTRING) {
  549. data = luaL_checklstring (L, 2, &len);
  550. }
  551. else if (lua_type (L, 2) == LUA_TUSERDATA) {
  552. t = lua_check_text (L, 2);
  553. if (t != NULL) {
  554. data = t->start;
  555. len = t->len;
  556. }
  557. }
  558. if (lua_gettop (L) == 3) {
  559. raw = lua_toboolean (L, 3);
  560. }
  561. if (data) {
  562. if (re->match_limit > 0) {
  563. len = MIN (len, re->match_limit);
  564. }
  565. if (rspamd_regexp_search (re->re, data, len, NULL, NULL, raw, NULL)) {
  566. lua_pushboolean (L, TRUE);
  567. }
  568. else {
  569. lua_pushboolean (L, FALSE);
  570. }
  571. return 1;
  572. }
  573. }
  574. lua_pushnil (L);
  575. return 1;
  576. }
  577. /***
  578. * @method re:matchn(line, max_matches, [, raw_match])
  579. * Matches line against the regular expression and return number of matches if line matches
  580. * (partially or completely). This process stop when `max_matches` is reached.
  581. * If `max_matches` is zero, then only a single match is counted which is equal to
  582. * @see re:match If `max_matches` is negative, then all matches are considered.
  583. *
  584. * @param {string} line match the specified line against regexp object
  585. * @param {number} max_matches maximum number of matches
  586. * @param {bool} match raw regexp instead of utf8 one
  587. * @return {number} number of matches found in the `line` argument
  588. */
  589. static int
  590. lua_regexp_matchn (lua_State *L)
  591. {
  592. LUA_TRACE_POINT;
  593. struct rspamd_lua_regexp *re = lua_check_regexp (L);
  594. struct rspamd_lua_text *t;
  595. const gchar *data = NULL, *start = NULL, *end = NULL;
  596. gint max_matches, matches;
  597. gsize len = 0;
  598. gboolean raw = FALSE;
  599. if (re && !IS_DESTROYED (re)) {
  600. if (lua_type (L, 2) == LUA_TSTRING) {
  601. data = luaL_checklstring (L, 2, &len);
  602. }
  603. else if (lua_type (L, 2) == LUA_TUSERDATA) {
  604. t = lua_check_text (L, 2);
  605. if (t != NULL) {
  606. data = t->start;
  607. len = t->len;
  608. }
  609. }
  610. max_matches = lua_tonumber (L, 3);
  611. if (lua_gettop (L) == 4) {
  612. raw = lua_toboolean (L, 4);
  613. }
  614. if (data) {
  615. matches = 0;
  616. if (re->match_limit > 0) {
  617. len = MIN (len, re->match_limit);
  618. }
  619. for (;;) {
  620. if (rspamd_regexp_search (re->re, data, len, &start, &end, raw,
  621. NULL)) {
  622. matches ++;
  623. }
  624. else {
  625. break;
  626. }
  627. if (max_matches >= 0 && matches >= max_matches) {
  628. break;
  629. }
  630. }
  631. lua_pushinteger (L, matches);
  632. return 1;
  633. }
  634. }
  635. lua_pushnil (L);
  636. return 1;
  637. }
  638. /***
  639. * @method re:split(line)
  640. * Split line using the specified regular expression.
  641. * Breaks the string on the pattern, and returns an array of the tokens.
  642. * If the pattern contains capturing parentheses, then the text for each
  643. * of the substrings will also be returned. If the pattern does not match
  644. * anywhere in the string, then the whole string is returned as the first
  645. * token.
  646. * @param {string/text} line line to split
  647. * @return {table} table of split line portions (if text was the input, then text is used for return parts)
  648. */
  649. static int
  650. lua_regexp_split (lua_State *L)
  651. {
  652. LUA_TRACE_POINT;
  653. struct rspamd_lua_regexp *re = lua_check_regexp (L);
  654. const gchar *data = NULL;
  655. struct rspamd_lua_text *t;
  656. gboolean matched = FALSE, is_text = FALSE;
  657. gsize len = 0;
  658. const gchar *start = NULL, *end = NULL, *old_start;
  659. gint i;
  660. if (re && !IS_DESTROYED (re)) {
  661. if (lua_type (L, 2) == LUA_TSTRING) {
  662. data = luaL_checklstring (L, 2, &len);
  663. }
  664. else if (lua_type (L, 2) == LUA_TUSERDATA) {
  665. t = lua_check_text (L, 2);
  666. if (t == NULL) {
  667. lua_error (L);
  668. return 0;
  669. }
  670. data = t->start;
  671. len = t->len;
  672. is_text = TRUE;
  673. }
  674. if (re->match_limit > 0) {
  675. len = MIN (len, re->match_limit);
  676. }
  677. if (data) {
  678. lua_newtable (L);
  679. i = 0;
  680. old_start = data;
  681. while (rspamd_regexp_search (re->re, data, len, &start, &end, FALSE,
  682. NULL)) {
  683. if (start - old_start > 0) {
  684. if (!is_text) {
  685. lua_pushlstring (L, old_start, start - old_start);
  686. }
  687. else {
  688. t = lua_newuserdata (L, sizeof (*t));
  689. rspamd_lua_setclass (L, "rspamd{text}", -1);
  690. t->start = old_start;
  691. t->len = start - old_start;
  692. t->flags = 0;
  693. }
  694. lua_rawseti (L, -2, ++i);
  695. matched = TRUE;
  696. }
  697. else if (start == end) {
  698. break;
  699. }
  700. old_start = end;
  701. }
  702. if (len > 0 && (end == NULL || end < data + len)) {
  703. if (end == NULL) {
  704. end = data;
  705. }
  706. if (!is_text) {
  707. lua_pushlstring (L, end, (data + len) - end);
  708. }
  709. else {
  710. t = lua_newuserdata (L, sizeof (*t));
  711. rspamd_lua_setclass (L, "rspamd{text}", -1);
  712. t->start = end;
  713. t->len = (data + len) - end;
  714. t->flags = 0;
  715. }
  716. lua_rawseti (L, -2, ++i);
  717. matched = TRUE;
  718. }
  719. if (!matched) {
  720. lua_pop (L, 1);
  721. lua_pushnil (L);
  722. }
  723. return 1;
  724. }
  725. }
  726. lua_pushnil (L);
  727. return 1;
  728. }
  729. /***
  730. * @method re:destroy()
  731. * Destroy regexp from caches if needed (the pointer is removed by garbadge collector)
  732. */
  733. static gint
  734. lua_regexp_destroy (lua_State *L)
  735. {
  736. LUA_TRACE_POINT;
  737. struct rspamd_lua_regexp *to_del = lua_check_regexp (L);
  738. if (to_del) {
  739. rspamd_regexp_cache_remove (NULL, to_del->re);
  740. rspamd_regexp_unref (to_del->re);
  741. to_del->re = NULL;
  742. to_del->re_flags |= LUA_REGEXP_FLAG_DESTROYED;
  743. }
  744. return 0;
  745. }
  746. static gint
  747. lua_regexp_gc (lua_State *L)
  748. {
  749. LUA_TRACE_POINT;
  750. struct rspamd_lua_regexp *to_del = lua_check_regexp (L);
  751. if (to_del) {
  752. if (!IS_DESTROYED (to_del)) {
  753. rspamd_regexp_unref (to_del->re);
  754. }
  755. g_free (to_del->re_pattern);
  756. g_free (to_del->module);
  757. g_free (to_del);
  758. }
  759. return 0;
  760. }
  761. static gint
  762. lua_load_regexp (lua_State * L)
  763. {
  764. lua_newtable (L);
  765. luaL_register (L, NULL, regexplib_f);
  766. return 1;
  767. }
  768. void
  769. luaopen_regexp (lua_State * L)
  770. {
  771. luaL_newmetatable (L, "rspamd{regexp}");
  772. lua_pushstring (L, "__index");
  773. lua_pushvalue (L, -2);
  774. lua_settable (L, -3);
  775. lua_pushstring (L, "class");
  776. lua_pushstring (L, "rspamd{regexp}");
  777. lua_rawset (L, -3);
  778. luaL_register (L, NULL, regexplib_m);
  779. rspamd_lua_add_preload (L, "rspamd_regexp", lua_load_regexp);
  780. if (regexp_static_pool == NULL) {
  781. regexp_static_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  782. "regexp_lua_pool");
  783. }
  784. lua_settop (L, 0);
  785. }