You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

regexp.c 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /***MODULE:regexp
  17. * rspamd module that implements different regexp rules
  18. */
  19. #include "config.h"
  20. #include "libmime/message.h"
  21. #include "expression.h"
  22. #include "mime_expressions.h"
  23. #include "libserver/maps/map.h"
  24. #include "lua/lua_common.h"
  25. static const guint64 rspamd_regexp_cb_magic = 0xca9d9649fc3e2659ULL;
  26. struct regexp_module_item {
  27. guint64 magic;
  28. struct rspamd_expression *expr;
  29. const gchar *symbol;
  30. struct ucl_lua_funcdata *lua_function;
  31. };
  32. struct regexp_ctx {
  33. struct module_ctx ctx;
  34. gsize max_size;
  35. };
  36. static void process_regexp_item (struct rspamd_task *task,
  37. struct rspamd_symcache_dynamic_item *item,
  38. void *user_data);
  39. /* Initialization */
  40. gint regexp_module_init (struct rspamd_config *cfg, struct module_ctx **ctx);
  41. gint regexp_module_config (struct rspamd_config *cfg, bool validate);
  42. gint regexp_module_reconfig (struct rspamd_config *cfg);
  43. module_t regexp_module = {
  44. "regexp",
  45. regexp_module_init,
  46. regexp_module_config,
  47. regexp_module_reconfig,
  48. NULL,
  49. RSPAMD_MODULE_VER,
  50. (guint)-1,
  51. };
  52. static inline struct regexp_ctx *
  53. regexp_get_context (struct rspamd_config *cfg)
  54. {
  55. return (struct regexp_ctx *)g_ptr_array_index (cfg->c_modules,
  56. regexp_module.ctx_offset);
  57. }
  58. /* Process regexp expression */
  59. static gboolean
  60. read_regexp_expression (rspamd_mempool_t * pool,
  61. struct regexp_module_item *chain,
  62. const gchar *symbol,
  63. const gchar *line,
  64. struct rspamd_mime_expr_ud *ud)
  65. {
  66. struct rspamd_expression *e = NULL;
  67. GError *err = NULL;
  68. if (!rspamd_parse_expression (line, 0, &mime_expr_subr, ud, pool, &err,
  69. &e)) {
  70. msg_warn_pool ("%s = \"%s\" is invalid regexp expression: %e", symbol,
  71. line,
  72. err);
  73. g_error_free (err);
  74. return FALSE;
  75. }
  76. g_assert (e != NULL);
  77. chain->expr = e;
  78. return TRUE;
  79. }
  80. /* Init function */
  81. gint
  82. regexp_module_init (struct rspamd_config *cfg, struct module_ctx **ctx)
  83. {
  84. struct regexp_ctx *regexp_module_ctx;
  85. regexp_module_ctx = rspamd_mempool_alloc0 (cfg->cfg_pool,
  86. sizeof (*regexp_module_ctx));
  87. *ctx = (struct module_ctx *)regexp_module_ctx;
  88. rspamd_rcl_add_doc_by_path (cfg,
  89. NULL,
  90. "Regular expressions rules plugin",
  91. "regexp",
  92. UCL_OBJECT,
  93. NULL,
  94. 0,
  95. NULL,
  96. 0);
  97. rspamd_rcl_add_doc_by_path (cfg,
  98. "regexp",
  99. "Maximum size of data chunk scanned with any regexp (further data is truncated)",
  100. "max_size",
  101. UCL_INT,
  102. NULL,
  103. 0,
  104. NULL,
  105. 0);
  106. return 0;
  107. }
  108. gint
  109. regexp_module_config (struct rspamd_config *cfg, bool validate)
  110. {
  111. struct regexp_ctx *regexp_module_ctx = regexp_get_context (cfg);
  112. struct regexp_module_item *cur_item = NULL;
  113. const ucl_object_t *sec, *value, *elt;
  114. ucl_object_iter_t it = NULL;
  115. gint res = TRUE, nre = 0, nlua = 0, nshots = cfg->default_max_shots;
  116. if (!rspamd_config_is_module_enabled (cfg, "regexp")) {
  117. return TRUE;
  118. }
  119. sec = ucl_object_lookup (cfg->rcl_obj, "regexp");
  120. if (sec == NULL) {
  121. msg_err_config ("regexp module enabled, but no rules are defined");
  122. return TRUE;
  123. }
  124. regexp_module_ctx->max_size = 0;
  125. while ((value = ucl_object_iterate (sec, &it, true)) != NULL) {
  126. if (g_ascii_strncasecmp (ucl_object_key (value), "max_size",
  127. sizeof ("max_size") - 1) == 0) {
  128. regexp_module_ctx->max_size = ucl_obj_toint (value);
  129. rspamd_re_cache_set_limit (cfg->re_cache, regexp_module_ctx->max_size);
  130. }
  131. else if (g_ascii_strncasecmp (ucl_object_key (value), "max_threads",
  132. sizeof ("max_threads") - 1) == 0) {
  133. msg_warn_config ("regexp module is now single threaded, max_threads is ignored");
  134. }
  135. else if (value->type == UCL_STRING) {
  136. struct rspamd_mime_expr_ud ud;
  137. cur_item = rspamd_mempool_alloc0 (cfg->cfg_pool,
  138. sizeof (struct regexp_module_item));
  139. cur_item->symbol = ucl_object_key (value);
  140. cur_item->magic = rspamd_regexp_cb_magic;
  141. ud.conf_obj = NULL;
  142. ud.cfg = cfg;
  143. if (!read_regexp_expression (cfg->cfg_pool,
  144. cur_item, ucl_object_key (value),
  145. ucl_obj_tostring (value), &ud)) {
  146. if (validate) {
  147. return FALSE;
  148. }
  149. }
  150. else {
  151. rspamd_symcache_add_symbol (cfg->cache,
  152. cur_item->symbol,
  153. 0,
  154. process_regexp_item,
  155. cur_item,
  156. SYMBOL_TYPE_NORMAL, -1);
  157. nre ++;
  158. }
  159. }
  160. else if (value->type == UCL_USERDATA) {
  161. /* Just a lua function */
  162. cur_item = rspamd_mempool_alloc0 (cfg->cfg_pool,
  163. sizeof (struct regexp_module_item));
  164. cur_item->magic = rspamd_regexp_cb_magic;
  165. cur_item->symbol = ucl_object_key (value);
  166. cur_item->lua_function = ucl_object_toclosure (value);
  167. rspamd_symcache_add_symbol (cfg->cache,
  168. cur_item->symbol,
  169. 0,
  170. process_regexp_item,
  171. cur_item,
  172. SYMBOL_TYPE_NORMAL, -1);
  173. nlua ++;
  174. }
  175. else if (value->type == UCL_OBJECT) {
  176. const gchar *description = NULL, *group = NULL;
  177. gdouble score = 0.0;
  178. guint flags = 0, priority = 0;
  179. gboolean is_lua = FALSE, valid_expression = TRUE;
  180. struct rspamd_mime_expr_ud ud;
  181. /* We have some lua table, extract its arguments */
  182. elt = ucl_object_lookup (value, "callback");
  183. if (elt == NULL || elt->type != UCL_USERDATA) {
  184. /* Try plain regexp expression */
  185. elt = ucl_object_lookup_any (value, "regexp", "re", NULL);
  186. if (elt != NULL && ucl_object_type (elt) == UCL_STRING) {
  187. cur_item = rspamd_mempool_alloc0 (cfg->cfg_pool,
  188. sizeof (struct regexp_module_item));
  189. cur_item->symbol = ucl_object_key (value);
  190. cur_item->magic = rspamd_regexp_cb_magic;
  191. ud.cfg = cfg;
  192. ud.conf_obj = value;
  193. if (!read_regexp_expression (cfg->cfg_pool,
  194. cur_item, ucl_object_key (value),
  195. ucl_obj_tostring (elt), &ud)) {
  196. if (validate) {
  197. return FALSE;
  198. }
  199. }
  200. else {
  201. valid_expression = TRUE;
  202. nre ++;
  203. }
  204. }
  205. else {
  206. msg_err_config (
  207. "no callback/expression defined for regexp symbol: "
  208. "%s", ucl_object_key (value));
  209. }
  210. }
  211. else {
  212. is_lua = TRUE;
  213. nlua ++;
  214. cur_item = rspamd_mempool_alloc0 (
  215. cfg->cfg_pool,
  216. sizeof (struct regexp_module_item));
  217. cur_item->magic = rspamd_regexp_cb_magic;
  218. cur_item->symbol = ucl_object_key (value);
  219. cur_item->lua_function = ucl_object_toclosure (value);
  220. }
  221. if (cur_item && (is_lua || valid_expression)) {
  222. flags = SYMBOL_TYPE_NORMAL;
  223. elt = ucl_object_lookup (value, "mime_only");
  224. if (elt) {
  225. if (ucl_object_type (elt) != UCL_BOOLEAN) {
  226. msg_err_config (
  227. "mime_only attribute is not boolean for symbol: '%s'",
  228. cur_item->symbol);
  229. if (validate) {
  230. return FALSE;
  231. }
  232. }
  233. else {
  234. if (ucl_object_toboolean (elt)) {
  235. flags |= SYMBOL_TYPE_MIME_ONLY;
  236. }
  237. }
  238. }
  239. rspamd_symcache_add_symbol (cfg->cache,
  240. cur_item->symbol,
  241. 0,
  242. process_regexp_item,
  243. cur_item,
  244. flags, -1);
  245. /* Reset flags */
  246. flags = 0;
  247. elt = ucl_object_lookup (value, "condition");
  248. if (elt != NULL && ucl_object_type (elt) == UCL_USERDATA) {
  249. struct ucl_lua_funcdata *conddata;
  250. g_assert (cur_item->symbol != NULL);
  251. conddata = ucl_object_toclosure (elt);
  252. rspamd_symcache_add_condition_delayed (cfg->cache,
  253. cur_item->symbol,
  254. conddata->L, conddata->idx);
  255. }
  256. elt = ucl_object_lookup (value, "description");
  257. if (elt) {
  258. description = ucl_object_tostring (elt);
  259. }
  260. elt = ucl_object_lookup (value, "group");
  261. if (elt) {
  262. group = ucl_object_tostring (elt);
  263. }
  264. elt = ucl_object_lookup (value, "score");
  265. if (elt) {
  266. if (ucl_object_type (elt) != UCL_FLOAT && ucl_object_type (elt) != UCL_INT) {
  267. msg_err_config (
  268. "score attribute is not numeric for symbol: '%s'",
  269. cur_item->symbol);
  270. if (validate) {
  271. return FALSE;
  272. }
  273. }
  274. else {
  275. score = ucl_object_todouble (elt);
  276. }
  277. }
  278. elt = ucl_object_lookup (value, "one_shot");
  279. if (elt) {
  280. if (ucl_object_type (elt) != UCL_BOOLEAN) {
  281. msg_err_config (
  282. "one_shot attribute is not boolean for symbol: '%s'",
  283. cur_item->symbol);
  284. if (validate) {
  285. return FALSE;
  286. }
  287. }
  288. else {
  289. if (ucl_object_toboolean (elt)) {
  290. nshots = 1;
  291. }
  292. }
  293. }
  294. if ((elt = ucl_object_lookup (value, "any_shot")) != NULL) {
  295. if (ucl_object_type (elt) != UCL_BOOLEAN) {
  296. msg_err_config (
  297. "any_shot attribute is not boolean for symbol: '%s'",
  298. cur_item->symbol);
  299. if (validate) {
  300. return FALSE;
  301. }
  302. }
  303. else {
  304. if (ucl_object_toboolean (elt)) {
  305. nshots = -1;
  306. }
  307. }
  308. }
  309. if ((elt = ucl_object_lookup (value, "nshots")) != NULL) {
  310. if (ucl_object_type (elt) != UCL_FLOAT && ucl_object_type (elt) != UCL_INT) {
  311. msg_err_config (
  312. "nshots attribute is not numeric for symbol: '%s'",
  313. cur_item->symbol);
  314. if (validate) {
  315. return FALSE;
  316. }
  317. }
  318. else {
  319. nshots = ucl_object_toint (elt);
  320. }
  321. }
  322. elt = ucl_object_lookup (value, "one_param");
  323. if (elt) {
  324. if (ucl_object_type (elt) != UCL_BOOLEAN) {
  325. msg_err_config (
  326. "one_param attribute is not boolean for symbol: '%s'",
  327. cur_item->symbol);
  328. if (validate) {
  329. return FALSE;
  330. }
  331. }
  332. else {
  333. if (ucl_object_toboolean (elt)) {
  334. flags |= RSPAMD_SYMBOL_FLAG_ONEPARAM;
  335. }
  336. }
  337. }
  338. elt = ucl_object_lookup (value, "priority");
  339. if (elt) {
  340. if (ucl_object_type (elt) != UCL_FLOAT && ucl_object_type (elt) != UCL_INT) {
  341. msg_err_config (
  342. "priority attribute is not numeric for symbol: '%s'",
  343. cur_item->symbol);
  344. if (validate) {
  345. return FALSE;
  346. }
  347. }
  348. else {
  349. priority = ucl_object_toint (elt);
  350. }
  351. }
  352. else {
  353. priority = 0;
  354. }
  355. rspamd_config_add_symbol (cfg, cur_item->symbol,
  356. score, description, group, flags, priority, nshots);
  357. elt = ucl_object_lookup (value, "groups");
  358. if (elt) {
  359. ucl_object_iter_t gr_it;
  360. const ucl_object_t *cur_gr;
  361. gr_it = ucl_object_iterate_new (elt);
  362. while ((cur_gr = ucl_object_iterate_safe (gr_it, true)) != NULL) {
  363. rspamd_config_add_symbol_group (cfg, cur_item->symbol,
  364. ucl_object_tostring (cur_gr));
  365. }
  366. ucl_object_iterate_free (gr_it);
  367. }
  368. }
  369. }
  370. else {
  371. msg_warn_config ("unknown type of attribute %s for regexp module",
  372. ucl_object_key (value));
  373. }
  374. }
  375. if (res) {
  376. msg_info_config ("init internal regexp module, %d regexp rules and %d "
  377. "lua rules are loaded", nre, nlua);
  378. }
  379. else {
  380. msg_err_config ("fatal regexp module error");
  381. }
  382. return res;
  383. }
  384. gint
  385. regexp_module_reconfig (struct rspamd_config *cfg)
  386. {
  387. return regexp_module_config (cfg, false);
  388. }
  389. static gboolean
  390. rspamd_lua_call_expression_func (struct ucl_lua_funcdata *lua_data,
  391. struct rspamd_task *task,
  392. GArray *args, gdouble *res,
  393. const gchar *symbol)
  394. {
  395. lua_State *L = lua_data->L;
  396. struct rspamd_task **ptask;
  397. struct expression_argument *arg;
  398. gint pop = 0, i, nargs = 0;
  399. lua_rawgeti (L, LUA_REGISTRYINDEX, lua_data->idx);
  400. /* Now we got function in top of stack */
  401. ptask = lua_newuserdata (L, sizeof(struct rspamd_task *));
  402. rspamd_lua_setclass (L, "rspamd{task}", -1);
  403. *ptask = task;
  404. /* Now push all arguments */
  405. if (args) {
  406. for (i = 0; i < (gint)args->len; i ++) {
  407. arg = &g_array_index (args, struct expression_argument, i);
  408. if (arg) {
  409. switch (arg->type) {
  410. case EXPRESSION_ARGUMENT_NORMAL:
  411. lua_pushstring (L, (const gchar *) arg->data);
  412. break;
  413. case EXPRESSION_ARGUMENT_BOOL:
  414. lua_pushboolean (L, (gboolean) GPOINTER_TO_SIZE(arg->data));
  415. break;
  416. default:
  417. msg_err_task ("%s: cannot pass custom params to lua function",
  418. symbol);
  419. return FALSE;
  420. }
  421. }
  422. }
  423. nargs = args->len;
  424. }
  425. if (lua_pcall (L, nargs + 1, 1, 0) != 0) {
  426. msg_info_task ("%s: call to lua function failed: %s", symbol,
  427. lua_tostring (L, -1));
  428. lua_pop (L, 1);
  429. return FALSE;
  430. }
  431. pop++;
  432. if (lua_type (L, -1) == LUA_TNUMBER) {
  433. *res = lua_tonumber (L, -1);
  434. }
  435. else if (lua_type (L, -1) == LUA_TBOOLEAN) {
  436. *res = lua_toboolean (L, -1);
  437. }
  438. else {
  439. msg_info_task ("%s: lua function must return a boolean", symbol);
  440. *res = FALSE;
  441. }
  442. lua_pop (L, pop);
  443. return TRUE;
  444. }
  445. static void
  446. process_regexp_item (struct rspamd_task *task,
  447. struct rspamd_symcache_dynamic_item *symcache_item,
  448. void *user_data)
  449. {
  450. struct regexp_module_item *item = user_data;
  451. gdouble res = FALSE;
  452. /* Non-threaded version */
  453. if (item->lua_function) {
  454. /* Just call function */
  455. res = FALSE;
  456. if (!rspamd_lua_call_expression_func (item->lua_function, task, NULL,
  457. &res, item->symbol)) {
  458. msg_err_task ("error occurred when checking symbol %s",
  459. item->symbol);
  460. }
  461. }
  462. else {
  463. /* Process expression */
  464. if (item->expr) {
  465. res = rspamd_process_expression (item->expr, 0, task);
  466. }
  467. else {
  468. msg_warn_task ("FIXME: %s symbol is broken with new expressions",
  469. item->symbol);
  470. }
  471. }
  472. if (res != 0) {
  473. rspamd_task_insert_result (task, item->symbol, res, NULL);
  474. }
  475. rspamd_symcache_finalize_item (task, symcache_item);
  476. }