You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

regexp.c 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /***MODULE:regexp
  17. * rspamd module that implements different regexp rules
  18. */
  19. #include "config.h"
  20. #include "libmime/message.h"
  21. #include "expression.h"
  22. #include "mime_expressions.h"
  23. #include "libserver/maps/map.h"
  24. #include "lua/lua_common.h"
  25. static const guint64 rspamd_regexp_cb_magic = 0xca9d9649fc3e2659ULL;
  26. struct regexp_module_item {
  27. guint64 magic;
  28. struct rspamd_expression *expr;
  29. const gchar *symbol;
  30. struct ucl_lua_funcdata *lua_function;
  31. };
  32. struct regexp_ctx {
  33. struct module_ctx ctx;
  34. gsize max_size;
  35. };
  36. static void process_regexp_item (struct rspamd_task *task,
  37. struct rspamd_symcache_item *item,
  38. void *user_data);
  39. /* Initialization */
  40. gint regexp_module_init (struct rspamd_config *cfg, struct module_ctx **ctx);
  41. gint regexp_module_config (struct rspamd_config *cfg);
  42. gint regexp_module_reconfig (struct rspamd_config *cfg);
  43. module_t regexp_module = {
  44. "regexp",
  45. regexp_module_init,
  46. regexp_module_config,
  47. regexp_module_reconfig,
  48. NULL,
  49. RSPAMD_MODULE_VER,
  50. (guint)-1,
  51. };
  52. static inline struct regexp_ctx *
  53. regexp_get_context (struct rspamd_config *cfg)
  54. {
  55. return (struct regexp_ctx *)g_ptr_array_index (cfg->c_modules,
  56. regexp_module.ctx_offset);
  57. }
  58. /* Process regexp expression */
  59. static gboolean
  60. read_regexp_expression (rspamd_mempool_t * pool,
  61. struct regexp_module_item *chain,
  62. const gchar *symbol,
  63. const gchar *line,
  64. struct rspamd_mime_expr_ud *ud)
  65. {
  66. struct rspamd_expression *e = NULL;
  67. GError *err = NULL;
  68. if (!rspamd_parse_expression (line, 0, &mime_expr_subr, ud, pool, &err,
  69. &e)) {
  70. msg_warn_pool ("%s = \"%s\" is invalid regexp expression: %e", symbol,
  71. line,
  72. err);
  73. g_error_free (err);
  74. return FALSE;
  75. }
  76. g_assert (e != NULL);
  77. chain->expr = e;
  78. return TRUE;
  79. }
  80. /* Init function */
  81. gint
  82. regexp_module_init (struct rspamd_config *cfg, struct module_ctx **ctx)
  83. {
  84. struct regexp_ctx *regexp_module_ctx;
  85. regexp_module_ctx = rspamd_mempool_alloc0 (cfg->cfg_pool,
  86. sizeof (*regexp_module_ctx));
  87. *ctx = (struct module_ctx *)regexp_module_ctx;
  88. rspamd_rcl_add_doc_by_path (cfg,
  89. NULL,
  90. "Regular expressions rules plugin",
  91. "regexp",
  92. UCL_OBJECT,
  93. NULL,
  94. 0,
  95. NULL,
  96. 0);
  97. rspamd_rcl_add_doc_by_path (cfg,
  98. "regexp",
  99. "Maximum size of data chunk scanned with any regexp (further data is truncated)",
  100. "max_size",
  101. UCL_INT,
  102. NULL,
  103. 0,
  104. NULL,
  105. 0);
  106. return 0;
  107. }
  108. gint
  109. regexp_module_config (struct rspamd_config *cfg)
  110. {
  111. struct regexp_ctx *regexp_module_ctx = regexp_get_context (cfg);
  112. struct regexp_module_item *cur_item = NULL;
  113. const ucl_object_t *sec, *value, *elt;
  114. ucl_object_iter_t it = NULL;
  115. gint res = TRUE, nre = 0, nlua = 0, nshots = cfg->default_max_shots;
  116. if (!rspamd_config_is_module_enabled (cfg, "regexp")) {
  117. return TRUE;
  118. }
  119. sec = ucl_object_lookup (cfg->rcl_obj, "regexp");
  120. if (sec == NULL) {
  121. msg_err_config ("regexp module enabled, but no rules are defined");
  122. return TRUE;
  123. }
  124. regexp_module_ctx->max_size = 0;
  125. while ((value = ucl_object_iterate (sec, &it, true)) != NULL) {
  126. if (g_ascii_strncasecmp (ucl_object_key (value), "max_size",
  127. sizeof ("max_size") - 1) == 0) {
  128. regexp_module_ctx->max_size = ucl_obj_toint (value);
  129. rspamd_re_cache_set_limit (cfg->re_cache, regexp_module_ctx->max_size);
  130. }
  131. else if (g_ascii_strncasecmp (ucl_object_key (value), "max_threads",
  132. sizeof ("max_threads") - 1) == 0) {
  133. msg_warn_config ("regexp module is now single threaded, max_threads is ignored");
  134. }
  135. else if (value->type == UCL_STRING) {
  136. struct rspamd_mime_expr_ud ud;
  137. cur_item = rspamd_mempool_alloc0 (cfg->cfg_pool,
  138. sizeof (struct regexp_module_item));
  139. cur_item->symbol = ucl_object_key (value);
  140. cur_item->magic = rspamd_regexp_cb_magic;
  141. ud.conf_obj = NULL;
  142. ud.cfg = cfg;
  143. if (!read_regexp_expression (cfg->cfg_pool,
  144. cur_item, ucl_object_key (value),
  145. ucl_obj_tostring (value), &ud)) {
  146. res = FALSE;
  147. }
  148. else {
  149. rspamd_symcache_add_symbol (cfg->cache,
  150. cur_item->symbol,
  151. 0,
  152. process_regexp_item,
  153. cur_item,
  154. SYMBOL_TYPE_NORMAL, -1);
  155. nre ++;
  156. }
  157. }
  158. else if (value->type == UCL_USERDATA) {
  159. /* Just a lua function */
  160. cur_item = rspamd_mempool_alloc0 (cfg->cfg_pool,
  161. sizeof (struct regexp_module_item));
  162. cur_item->magic = rspamd_regexp_cb_magic;
  163. cur_item->symbol = ucl_object_key (value);
  164. cur_item->lua_function = ucl_object_toclosure (value);
  165. rspamd_symcache_add_symbol (cfg->cache,
  166. cur_item->symbol,
  167. 0,
  168. process_regexp_item,
  169. cur_item,
  170. SYMBOL_TYPE_NORMAL, -1);
  171. nlua ++;
  172. }
  173. else if (value->type == UCL_OBJECT) {
  174. const gchar *description = NULL, *group = NULL;
  175. gdouble score = 0.0;
  176. guint flags = 0, priority = 0;
  177. gboolean is_lua = FALSE, valid_expression = TRUE;
  178. struct rspamd_mime_expr_ud ud;
  179. /* We have some lua table, extract its arguments */
  180. elt = ucl_object_lookup (value, "callback");
  181. if (elt == NULL || elt->type != UCL_USERDATA) {
  182. /* Try plain regexp expression */
  183. elt = ucl_object_lookup_any (value, "regexp", "re", NULL);
  184. if (elt != NULL && ucl_object_type (elt) == UCL_STRING) {
  185. cur_item = rspamd_mempool_alloc0 (cfg->cfg_pool,
  186. sizeof (struct regexp_module_item));
  187. cur_item->symbol = ucl_object_key (value);
  188. cur_item->magic = rspamd_regexp_cb_magic;
  189. ud.cfg = cfg;
  190. ud.conf_obj = value;
  191. if (!read_regexp_expression (cfg->cfg_pool,
  192. cur_item, ucl_object_key (value),
  193. ucl_obj_tostring (elt), &ud)) {
  194. res = FALSE;
  195. }
  196. else {
  197. valid_expression = TRUE;
  198. nre ++;
  199. }
  200. }
  201. else {
  202. msg_err_config (
  203. "no callback/expression defined for regexp symbol: "
  204. "%s", ucl_object_key (value));
  205. }
  206. }
  207. else {
  208. is_lua = TRUE;
  209. nlua ++;
  210. cur_item = rspamd_mempool_alloc0 (
  211. cfg->cfg_pool,
  212. sizeof (struct regexp_module_item));
  213. cur_item->magic = rspamd_regexp_cb_magic;
  214. cur_item->symbol = ucl_object_key (value);
  215. cur_item->lua_function = ucl_object_toclosure (value);
  216. }
  217. if (cur_item && (is_lua || valid_expression)) {
  218. flags = SYMBOL_TYPE_NORMAL;
  219. elt = ucl_object_lookup (value, "mime_only");
  220. if (elt) {
  221. if (ucl_object_type (elt) != UCL_BOOLEAN) {
  222. msg_err_config (
  223. "mime_only attribute is not boolean for symbol: '%s'",
  224. cur_item->symbol);
  225. res = FALSE;
  226. }
  227. else {
  228. if (ucl_object_toboolean (elt)) {
  229. flags |= SYMBOL_TYPE_MIME_ONLY;
  230. }
  231. }
  232. }
  233. rspamd_symcache_add_symbol (cfg->cache,
  234. cur_item->symbol,
  235. 0,
  236. process_regexp_item,
  237. cur_item,
  238. flags, -1);
  239. /* Reset flags */
  240. flags = 0;
  241. elt = ucl_object_lookup (value, "condition");
  242. if (elt != NULL && ucl_object_type (elt) == UCL_USERDATA) {
  243. struct ucl_lua_funcdata *conddata;
  244. g_assert (cur_item->symbol != NULL);
  245. conddata = ucl_object_toclosure (elt);
  246. rspamd_symcache_add_condition_delayed (cfg->cache,
  247. cur_item->symbol,
  248. conddata->L, conddata->idx);
  249. }
  250. elt = ucl_object_lookup (value, "description");
  251. if (elt) {
  252. description = ucl_object_tostring (elt);
  253. }
  254. elt = ucl_object_lookup (value, "group");
  255. if (elt) {
  256. group = ucl_object_tostring (elt);
  257. }
  258. elt = ucl_object_lookup (value, "score");
  259. if (elt) {
  260. if (ucl_object_type (elt) != UCL_FLOAT && ucl_object_type (elt) != UCL_INT) {
  261. msg_err_config (
  262. "score attribute is not numeric for symbol: '%s'",
  263. cur_item->symbol);
  264. res = FALSE;
  265. }
  266. else {
  267. score = ucl_object_todouble (elt);
  268. }
  269. }
  270. elt = ucl_object_lookup (value, "one_shot");
  271. if (elt) {
  272. if (ucl_object_type (elt) != UCL_BOOLEAN) {
  273. msg_err_config (
  274. "one_shot attribute is not boolean for symbol: '%s'",
  275. cur_item->symbol);
  276. res = FALSE;
  277. }
  278. else {
  279. if (ucl_object_toboolean (elt)) {
  280. nshots = 1;
  281. }
  282. }
  283. }
  284. if ((elt = ucl_object_lookup (value, "any_shot")) != NULL) {
  285. if (ucl_object_type (elt) != UCL_BOOLEAN) {
  286. msg_err_config (
  287. "any_shot attribute is not boolean for symbol: '%s'",
  288. cur_item->symbol);
  289. res = FALSE;
  290. }
  291. else {
  292. if (ucl_object_toboolean (elt)) {
  293. nshots = -1;
  294. }
  295. }
  296. }
  297. if ((elt = ucl_object_lookup (value, "nshots")) != NULL) {
  298. if (ucl_object_type (elt) != UCL_FLOAT && ucl_object_type (elt) != UCL_INT) {
  299. msg_err_config (
  300. "nshots attribute is not numeric for symbol: '%s'",
  301. cur_item->symbol);
  302. res = FALSE;
  303. }
  304. else {
  305. nshots = ucl_object_toint (elt);
  306. }
  307. }
  308. elt = ucl_object_lookup (value, "one_param");
  309. if (elt) {
  310. if (ucl_object_type (elt) != UCL_BOOLEAN) {
  311. msg_err_config (
  312. "one_param attribute is not boolean for symbol: '%s'",
  313. cur_item->symbol);
  314. res = FALSE;
  315. }
  316. else {
  317. if (ucl_object_toboolean (elt)) {
  318. flags |= RSPAMD_SYMBOL_FLAG_ONEPARAM;
  319. }
  320. }
  321. }
  322. elt = ucl_object_lookup (value, "priority");
  323. if (elt) {
  324. if (ucl_object_type (elt) != UCL_FLOAT && ucl_object_type (elt) != UCL_INT) {
  325. msg_err_config (
  326. "priority attribute is not numeric for symbol: '%s'",
  327. cur_item->symbol);
  328. res = FALSE;
  329. }
  330. else {
  331. priority = ucl_object_toint (elt);
  332. }
  333. }
  334. else {
  335. priority = 0;
  336. }
  337. rspamd_config_add_symbol (cfg, cur_item->symbol,
  338. score, description, group, flags, priority, nshots);
  339. elt = ucl_object_lookup (value, "groups");
  340. if (elt) {
  341. ucl_object_iter_t gr_it;
  342. const ucl_object_t *cur_gr;
  343. gr_it = ucl_object_iterate_new (elt);
  344. while ((cur_gr = ucl_object_iterate_safe (gr_it, true)) != NULL) {
  345. rspamd_config_add_symbol_group (cfg, cur_item->symbol,
  346. ucl_object_tostring (cur_gr));
  347. }
  348. ucl_object_iterate_free (gr_it);
  349. }
  350. }
  351. }
  352. else {
  353. msg_warn_config ("unknown type of attribute %s for regexp module",
  354. ucl_object_key (value));
  355. }
  356. }
  357. msg_info_config ("init internal regexp module, %d regexp rules and %d "
  358. "lua rules are loaded", nre, nlua);
  359. return res;
  360. }
  361. gint
  362. regexp_module_reconfig (struct rspamd_config *cfg)
  363. {
  364. return regexp_module_config (cfg);
  365. }
  366. static gboolean
  367. rspamd_lua_call_expression_func (struct ucl_lua_funcdata *lua_data,
  368. struct rspamd_task *task,
  369. GArray *args, gdouble *res,
  370. const gchar *symbol)
  371. {
  372. lua_State *L = lua_data->L;
  373. struct rspamd_task **ptask;
  374. struct expression_argument *arg;
  375. gint pop = 0, i, nargs = 0;
  376. lua_rawgeti (L, LUA_REGISTRYINDEX, lua_data->idx);
  377. /* Now we got function in top of stack */
  378. ptask = lua_newuserdata (L, sizeof(struct rspamd_task *));
  379. rspamd_lua_setclass (L, "rspamd{task}", -1);
  380. *ptask = task;
  381. /* Now push all arguments */
  382. if (args) {
  383. for (i = 0; i < (gint)args->len; i ++) {
  384. arg = &g_array_index (args, struct expression_argument, i);
  385. if (arg) {
  386. switch (arg->type) {
  387. case EXPRESSION_ARGUMENT_NORMAL:
  388. lua_pushstring (L, (const gchar *) arg->data);
  389. break;
  390. case EXPRESSION_ARGUMENT_BOOL:
  391. lua_pushboolean (L, (gboolean) GPOINTER_TO_SIZE(arg->data));
  392. break;
  393. default:
  394. msg_err_task ("%s: cannot pass custom params to lua function",
  395. symbol);
  396. return FALSE;
  397. }
  398. }
  399. }
  400. nargs = args->len;
  401. }
  402. if (lua_pcall (L, nargs + 1, 1, 0) != 0) {
  403. msg_info_task ("%s: call to lua function failed: %s", symbol,
  404. lua_tostring (L, -1));
  405. lua_pop (L, 1);
  406. return FALSE;
  407. }
  408. pop++;
  409. if (lua_type (L, -1) == LUA_TNUMBER) {
  410. *res = lua_tonumber (L, -1);
  411. }
  412. else if (lua_type (L, -1) == LUA_TBOOLEAN) {
  413. *res = lua_toboolean (L, -1);
  414. }
  415. else {
  416. msg_info_task ("%s: lua function must return a boolean", symbol);
  417. *res = FALSE;
  418. }
  419. lua_pop (L, pop);
  420. return TRUE;
  421. }
  422. static void
  423. process_regexp_item (struct rspamd_task *task,
  424. struct rspamd_symcache_item *symcache_item,
  425. void *user_data)
  426. {
  427. struct regexp_module_item *item = user_data;
  428. gdouble res = FALSE;
  429. /* Non-threaded version */
  430. if (item->lua_function) {
  431. /* Just call function */
  432. res = FALSE;
  433. if (!rspamd_lua_call_expression_func (item->lua_function, task, NULL,
  434. &res, item->symbol)) {
  435. msg_err_task ("error occurred when checking symbol %s",
  436. item->symbol);
  437. }
  438. }
  439. else {
  440. /* Process expression */
  441. if (item->expr) {
  442. res = rspamd_process_expression (item->expr, 0, task);
  443. }
  444. else {
  445. msg_warn_task ("FIXME: %s symbol is broken with new expressions",
  446. item->symbol);
  447. }
  448. }
  449. if (res != 0) {
  450. rspamd_task_insert_result (task, item->symbol, res, NULL);
  451. }
  452. rspamd_symcache_finalize_item (task, symcache_item);
  453. }