Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

lua_trie.c 9.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "lua_common.h"
  17. #include "message.h"
  18. #include "libutil/multipattern.h"
  19. /***
  20. * @module rspamd_trie
  21. * Rspamd trie module provides the data structure suitable for searching of many
  22. * patterns in arbitrary texts (or binary chunks). The algorithmic complexity of
  23. * this algorithm is at most O(n + m + z), where `n` is the length of text, `m` is a length of pattern and `z` is a number of patterns in the text.
  24. *
  25. * Here is a typical example of trie usage:
  26. * @example
  27. local rspamd_trie = require "rspamd_trie"
  28. local patterns = {'aab', 'ab', 'bcd\0ef'}
  29. local trie = rspamd_trie.create(patterns)
  30. local function trie_callback(number, pos)
  31. print('Matched pattern number ' .. tostring(number) .. ' at pos: ' .. tostring(pos))
  32. end
  33. trie:match('some big text', trie_callback)
  34. */
  35. /* Suffix trie */
  36. LUA_FUNCTION_DEF (trie, create);
  37. LUA_FUNCTION_DEF (trie, match);
  38. LUA_FUNCTION_DEF (trie, search_mime);
  39. LUA_FUNCTION_DEF (trie, search_rawmsg);
  40. LUA_FUNCTION_DEF (trie, search_rawbody);
  41. LUA_FUNCTION_DEF (trie, destroy);
  42. static const struct luaL_reg trielib_m[] = {
  43. LUA_INTERFACE_DEF (trie, match),
  44. LUA_INTERFACE_DEF (trie, search_mime),
  45. LUA_INTERFACE_DEF (trie, search_rawmsg),
  46. LUA_INTERFACE_DEF (trie, search_rawbody),
  47. {"__tostring", rspamd_lua_class_tostring},
  48. {"__gc", lua_trie_destroy},
  49. {NULL, NULL}
  50. };
  51. static const struct luaL_reg trielib_f[] = {
  52. LUA_INTERFACE_DEF (trie, create),
  53. {NULL, NULL}
  54. };
  55. static struct rspamd_multipattern *
  56. lua_check_trie (lua_State * L, gint idx)
  57. {
  58. void *ud = rspamd_lua_check_udata (L, 1, "rspamd{trie}");
  59. luaL_argcheck (L, ud != NULL, 1, "'trie' expected");
  60. return ud ? *((struct rspamd_multipattern **)ud) : NULL;
  61. }
  62. static gint
  63. lua_trie_destroy (lua_State *L)
  64. {
  65. struct rspamd_multipattern *trie = lua_check_trie (L, 1);
  66. if (trie) {
  67. rspamd_multipattern_destroy (trie);
  68. }
  69. return 0;
  70. }
  71. /***
  72. * function trie.create(patterns)
  73. * Creates new trie data structure
  74. * @param {table} array of string patterns
  75. * @return {trie} new trie object
  76. */
  77. static gint
  78. lua_trie_create (lua_State *L)
  79. {
  80. struct rspamd_multipattern *trie, **ptrie;
  81. gint npat = 0, flags = RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_GLOB;
  82. GError *err = NULL;
  83. if (!lua_istable (L, 1)) {
  84. msg_err ("lua trie expects array of patterns for now");
  85. lua_pushnil (L);
  86. }
  87. else {
  88. lua_pushvalue (L, 1);
  89. lua_pushnil (L);
  90. while (lua_next (L, -2) != 0) {
  91. if (lua_isstring (L, -1)) {
  92. npat ++;
  93. }
  94. lua_pop (L, 1);
  95. }
  96. trie = rspamd_multipattern_create_sized (npat, flags);
  97. lua_pushnil (L);
  98. while (lua_next (L, -2) != 0) {
  99. if (lua_isstring (L, -1)) {
  100. const gchar *pat;
  101. gsize patlen;
  102. pat = lua_tolstring (L, -1, &patlen);
  103. rspamd_multipattern_add_pattern_len (trie, pat, patlen, flags);
  104. }
  105. lua_pop (L, 1);
  106. }
  107. lua_pop (L, 1); /* table */
  108. if (!rspamd_multipattern_compile (trie, &err)) {
  109. msg_err ("cannot compile multipattern: %e", err);
  110. g_error_free (err);
  111. rspamd_multipattern_destroy (trie);
  112. lua_pushnil (L);
  113. }
  114. else {
  115. ptrie = lua_newuserdata (L, sizeof (void *));
  116. rspamd_lua_setclass (L, "rspamd{trie}", -1);
  117. *ptrie = trie;
  118. }
  119. }
  120. return 1;
  121. }
  122. static gint
  123. lua_trie_callback (struct rspamd_multipattern *mp,
  124. guint strnum,
  125. gint match_start,
  126. gint textpos,
  127. const gchar *text,
  128. gsize len,
  129. void *context)
  130. {
  131. lua_State *L = context;
  132. gint ret;
  133. /* Function */
  134. lua_pushvalue (L, 3);
  135. lua_pushinteger (L, strnum + 1);
  136. lua_pushinteger (L, textpos);
  137. if (lua_pcall (L, 2, 1, 0) != 0) {
  138. msg_info ("call to trie callback has failed: %s",
  139. lua_tostring (L, -1));
  140. lua_pop (L, 1);
  141. return 1;
  142. }
  143. ret = lua_tonumber (L, -1);
  144. lua_pop (L, 1);
  145. return ret;
  146. }
  147. /*
  148. * We assume that callback argument is at pos 3 and icase is in position 4
  149. */
  150. static gint
  151. lua_trie_search_str (lua_State *L, struct rspamd_multipattern *trie,
  152. const gchar *str, gsize len)
  153. {
  154. gint ret;
  155. guint nfound = 0;
  156. if ((ret = rspamd_multipattern_lookup (trie, str, len,
  157. lua_trie_callback, L, &nfound)) == 0) {
  158. return nfound;
  159. }
  160. return ret;
  161. }
  162. /***
  163. * @method trie:match(input, cb[, caseless])
  164. * Search for patterns in `input` invoking `cb` optionally ignoring case
  165. * @param {table or string} input one or several (if `input` is an array) strings of input text
  166. * @param {function} cb callback called on each pattern match in form `function (idx, pos)` where `idx` is a numeric index of pattern (starting from 1) and `pos` is a numeric offset where the pattern ends
  167. * @param {boolean} caseless if `true` then match ignores symbols case (ASCII only)
  168. * @return {boolean} `true` if any pattern has been found (`cb` might be called multiple times however)
  169. */
  170. static gint
  171. lua_trie_match (lua_State *L)
  172. {
  173. LUA_TRACE_POINT;
  174. struct rspamd_multipattern *trie = lua_check_trie (L, 1);
  175. const gchar *text;
  176. gsize len;
  177. gboolean found = FALSE;
  178. if (trie) {
  179. if (lua_type (L, 2) == LUA_TTABLE) {
  180. lua_pushvalue (L, 2);
  181. lua_pushnil (L);
  182. while (lua_next (L, -2) != 0) {
  183. if (lua_isstring (L, -1)) {
  184. text = lua_tolstring (L, -1, &len);
  185. if (lua_trie_search_str (L, trie, text, len)) {
  186. found = TRUE;
  187. }
  188. }
  189. lua_pop (L, 1);
  190. }
  191. lua_pop (L, 1); /* table */
  192. }
  193. else if (lua_type (L, 2) == LUA_TSTRING) {
  194. text = lua_tolstring (L, 2, &len);
  195. if (lua_trie_search_str (L, trie, text, len)) {
  196. found = TRUE;
  197. }
  198. }
  199. }
  200. lua_pushboolean (L, found);
  201. return 1;
  202. }
  203. /***
  204. * @method trie:search_mime(task, cb[, caseless])
  205. * This is a helper mehthod to search pattern within text parts of a message in rspamd task
  206. * @param {task} task object
  207. * @param {function} cb callback called on each pattern match @see trie:match
  208. * @param {boolean} caseless if `true` then match ignores symbols case (ASCII only)
  209. * @return {boolean} `true` if any pattern has been found (`cb` might be called multiple times however)
  210. */
  211. static gint
  212. lua_trie_search_mime (lua_State *L)
  213. {
  214. LUA_TRACE_POINT;
  215. struct rspamd_multipattern *trie = lua_check_trie (L, 1);
  216. struct rspamd_task *task = lua_check_task (L, 2);
  217. struct rspamd_mime_text_part *part;
  218. const gchar *text;
  219. gsize len, i;
  220. gboolean found = FALSE;
  221. if (trie && task) {
  222. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) {
  223. if (!IS_PART_EMPTY (part) && part->utf_content != NULL) {
  224. text = part->utf_content->data;
  225. len = part->utf_content->len;
  226. if (lua_trie_search_str (L, trie, text, len) != 0) {
  227. found = TRUE;
  228. }
  229. }
  230. }
  231. }
  232. lua_pushboolean (L, found);
  233. return 1;
  234. }
  235. /***
  236. * @method trie:search_rawmsg(task, cb[, caseless])
  237. * This is a helper mehthod to search pattern within the whole undecoded content of rspamd task
  238. * @param {task} task object
  239. * @param {function} cb callback called on each pattern match @see trie:match
  240. * @param {boolean} caseless if `true` then match ignores symbols case (ASCII only)
  241. * @return {boolean} `true` if any pattern has been found (`cb` might be called multiple times however)
  242. */
  243. static gint
  244. lua_trie_search_rawmsg (lua_State *L)
  245. {
  246. LUA_TRACE_POINT;
  247. struct rspamd_multipattern *trie = lua_check_trie (L, 1);
  248. struct rspamd_task *task = lua_check_task (L, 2);
  249. const gchar *text;
  250. gsize len;
  251. gboolean found = FALSE;
  252. if (trie && task) {
  253. text = task->msg.begin;
  254. len = task->msg.len;
  255. if (lua_trie_search_str (L, trie, text, len) != 0) {
  256. found = TRUE;
  257. }
  258. }
  259. lua_pushboolean (L, found);
  260. return 1;
  261. }
  262. /***
  263. * @method trie:search_rawbody(task, cb[, caseless])
  264. * This is a helper mehthod to search pattern within the whole undecoded content of task's body (not including headers)
  265. * @param {task} task object
  266. * @param {function} cb callback called on each pattern match @see trie:match
  267. * @param {boolean} caseless if `true` then match ignores symbols case (ASCII only)
  268. * @return {boolean} `true` if any pattern has been found (`cb` might be called multiple times however)
  269. */
  270. static gint
  271. lua_trie_search_rawbody (lua_State *L)
  272. {
  273. LUA_TRACE_POINT;
  274. struct rspamd_multipattern *trie = lua_check_trie (L, 1);
  275. struct rspamd_task *task = lua_check_task (L, 2);
  276. const gchar *text;
  277. gsize len;
  278. gboolean found = FALSE;
  279. if (trie && task) {
  280. if (MESSAGE_FIELD (task, raw_headers_content).len > 0) {
  281. text = task->msg.begin + MESSAGE_FIELD (task, raw_headers_content).len;
  282. len = task->msg.len - MESSAGE_FIELD (task, raw_headers_content).len;
  283. }
  284. else {
  285. /* Treat as raw message */
  286. text = task->msg.begin;
  287. len = task->msg.len;
  288. }
  289. if (lua_trie_search_str (L, trie, text, len) != 0) {
  290. found = TRUE;
  291. }
  292. }
  293. lua_pushboolean (L, found);
  294. return 1;
  295. }
  296. static gint
  297. lua_load_trie (lua_State *L)
  298. {
  299. lua_newtable (L);
  300. luaL_register (L, NULL, trielib_f);
  301. return 1;
  302. }
  303. void
  304. luaopen_trie (lua_State * L)
  305. {
  306. luaL_newmetatable (L, "rspamd{trie}");
  307. lua_pushstring (L, "__index");
  308. lua_pushvalue (L, -2);
  309. lua_settable (L, -3);
  310. lua_pushstring (L, "class");
  311. lua_pushstring (L, "rspamd{trie}");
  312. lua_rawset (L, -3);
  313. luaL_register (L, NULL, trielib_m);
  314. rspamd_lua_add_preload (L, "rspamd_trie", lua_load_trie);
  315. lua_pop (L, 1); /* remove metatable from stack */
  316. }