Du kannst nicht mehr als 25 Themen auswählen Themen müssen mit entweder einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

regexp.c 44KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573
  1. /*
  2. * Copyright (c) 2009, Rambler media
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. *
  13. * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY
  14. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  15. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  16. * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
  17. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  18. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  19. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  20. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  21. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  22. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. */
  24. /***MODULE:regexp
  25. * rspamd module that implements different regexp rules
  26. */
  27. #include "config.h"
  28. #include "main.h"
  29. #include "message.h"
  30. #include "cfg_file.h"
  31. #include "map.h"
  32. #include "util.h"
  33. #include "expressions.h"
  34. #include "view.h"
  35. #include "lua/lua_common.h"
  36. #include "json/jansson.h"
  37. #include "cfg_xml.h"
  38. #define DEFAULT_STATFILE_PREFIX "./"
  39. struct regexp_module_item {
  40. struct expression *expr;
  41. gchar *symbol;
  42. guint32 avg_time;
  43. gpointer lua_function;
  44. };
  45. struct autolearn_data {
  46. gchar *statfile_name;
  47. gchar *symbol;
  48. float weight;
  49. };
  50. struct regexp_ctx {
  51. gint (*filter) (struct worker_task * task);
  52. GHashTable *autolearn_symbols;
  53. gchar *statfile_prefix;
  54. memory_pool_t *regexp_pool;
  55. memory_pool_t *dynamic_pool;
  56. gsize max_size;
  57. GThreadPool *workers;
  58. };
  59. struct regexp_json_buf {
  60. gchar *buf;
  61. gchar *pos;
  62. size_t buflen;
  63. struct config_file *cfg;
  64. };
  65. /* Lua regexp module for checking rspamd regexps */
  66. LUA_FUNCTION_DEF (regexp, match);
  67. static const struct luaL_reg regexplib_m[] = {
  68. LUA_INTERFACE_DEF (regexp, match),
  69. {"__tostring", lua_class_tostring},
  70. {NULL, NULL}
  71. };
  72. static struct regexp_ctx *regexp_module_ctx = NULL;
  73. static GMutex *workers_mtx = NULL;
  74. static gint regexp_common_filter (struct worker_task *task);
  75. static void process_regexp_item_threaded (gpointer data, gpointer user_data);
  76. static gboolean rspamd_regexp_match_number (struct worker_task *task, GList * args, void *unused);
  77. static gboolean rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused);
  78. static gboolean rspamd_check_smtp_data (struct worker_task *task, GList * args, void *unused);
  79. static gboolean rspamd_regexp_occurs_number (struct worker_task *task, GList * args, void *unused);
  80. static void process_regexp_item (struct worker_task *task, void *user_data);
  81. /* Initialization */
  82. gint regexp_module_init (struct config_file *cfg, struct module_ctx **ctx);
  83. gint regexp_module_config (struct config_file *cfg);
  84. gint regexp_module_reconfig (struct config_file *cfg);
  85. module_t regexp_module = {
  86. "regexp",
  87. regexp_module_init,
  88. regexp_module_config,
  89. regexp_module_reconfig
  90. };
  91. static gint
  92. luaopen_regexp (lua_State * L)
  93. {
  94. luaL_openlib (L, "rspamd_regexp", regexplib_m, 0);
  95. return 1;
  96. }
  97. static void
  98. regexp_dynamic_insert_result (struct worker_task *task, void *user_data)
  99. {
  100. gchar *symbol = user_data;
  101. insert_result (task, symbol, 1, NULL);
  102. }
  103. /*
  104. * Utility functions for matching exact number of regexps
  105. */
  106. typedef gboolean (*int_compare_func) (gint a, gint b);
  107. static gboolean
  108. op_equal (gint a, gint b)
  109. {
  110. return a == b;
  111. }
  112. static gboolean
  113. op_more (gint a, gint b)
  114. {
  115. return a > b;
  116. }
  117. static gboolean
  118. op_less (gint a, gint b)
  119. {
  120. return a < b;
  121. }
  122. static gboolean
  123. op_more_equal (gint a, gint b)
  124. {
  125. return a >= b;
  126. }
  127. static gboolean
  128. op_less_equal (gint a, gint b)
  129. {
  130. return a <= b;
  131. }
  132. /*
  133. * Process ip and mask of dynamic regexp
  134. */
  135. static gboolean
  136. parse_regexp_ipmask (const gchar *begin, struct dynamic_map_item *addr)
  137. {
  138. const gchar *pos;
  139. gchar ip_buf[sizeof ("255.255.255.255")], mask_buf[3], *p;
  140. gint state = 0, dots = 0;
  141. bzero (ip_buf, sizeof (ip_buf));
  142. bzero (mask_buf, sizeof (mask_buf));
  143. pos = begin;
  144. p = ip_buf;
  145. if (*pos == '!') {
  146. addr->negative = TRUE;
  147. pos ++;
  148. }
  149. else {
  150. addr->negative = FALSE;
  151. }
  152. while (*pos) {
  153. switch (state) {
  154. case 0:
  155. state = 1;
  156. p = ip_buf;
  157. dots = 0;
  158. break;
  159. case 1:
  160. /* Begin parse ip */
  161. if (p - ip_buf >= (gint)sizeof (ip_buf) || dots > 3) {
  162. return FALSE;
  163. }
  164. if (g_ascii_isdigit (*pos)) {
  165. *p ++ = *pos ++;
  166. }
  167. else if (*pos == '.') {
  168. *p ++ = *pos ++;
  169. dots ++;
  170. }
  171. else if (*pos == '/') {
  172. pos ++;
  173. p = mask_buf;
  174. state = 2;
  175. }
  176. else {
  177. /* Invalid character */
  178. return FALSE;
  179. }
  180. break;
  181. case 2:
  182. /* Parse mask */
  183. if (p - mask_buf > 2) {
  184. return FALSE;
  185. }
  186. if (g_ascii_isdigit (*pos)) {
  187. *p ++ = *pos ++;
  188. }
  189. else {
  190. return FALSE;
  191. }
  192. break;
  193. }
  194. }
  195. if (!inet_aton (ip_buf, &addr->addr)) {
  196. return FALSE;
  197. }
  198. if (state == 2) {
  199. /* Also parse mask */
  200. addr->mask = (mask_buf[0] - '0') * 10 + mask_buf[1] - '0';
  201. if (addr->mask > 32) {
  202. msg_info ("bad ipmask value: '%s'", begin);
  203. return FALSE;
  204. }
  205. }
  206. else {
  207. addr->mask = 32;
  208. }
  209. return TRUE;
  210. }
  211. /* Process regexp expression */
  212. static gboolean
  213. read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain, gchar *symbol, gchar *line, gboolean raw_mode)
  214. {
  215. struct expression *e, *cur;
  216. e = parse_expression (pool, line);
  217. if (e == NULL) {
  218. msg_warn ("%s = \"%s\" is invalid regexp expression", symbol, line);
  219. return FALSE;
  220. }
  221. chain->expr = e;
  222. cur = e;
  223. while (cur) {
  224. if (cur->type == EXPR_REGEXP) {
  225. cur->content.operand = parse_regexp (pool, cur->content.operand, raw_mode);
  226. if (cur->content.operand == NULL) {
  227. msg_warn ("cannot parse regexp, skip expression %s = \"%s\"", symbol, line);
  228. return FALSE;
  229. }
  230. cur->type = EXPR_REGEXP_PARSED;
  231. }
  232. cur = cur->next;
  233. }
  234. return TRUE;
  235. }
  236. /* Callbacks for reading json dynamic rules */
  237. gchar *
  238. json_regexp_read_cb (memory_pool_t * pool, gchar * chunk, gint len, struct map_cb_data *data)
  239. {
  240. struct regexp_json_buf *jb;
  241. gint free, off;
  242. if (data->cur_data == NULL) {
  243. jb = g_malloc (sizeof (struct regexp_json_buf));
  244. jb->cfg = ((struct regexp_json_buf *)data->prev_data)->cfg;
  245. jb->buf = NULL;
  246. jb->pos = NULL;
  247. data->cur_data = jb;
  248. }
  249. else {
  250. jb = data->cur_data;
  251. }
  252. if (jb->buf == NULL) {
  253. /* Allocate memory for buffer */
  254. jb->buflen = len * 2;
  255. jb->buf = g_malloc (jb->buflen);
  256. jb->pos = jb->buf;
  257. }
  258. off = jb->pos - jb->buf;
  259. free = jb->buflen - off;
  260. if (free < len) {
  261. jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2);
  262. jb->buf = g_realloc (jb->buf, jb->buflen);
  263. jb->pos = jb->buf + off;
  264. }
  265. memcpy (jb->pos, chunk, len);
  266. jb->pos += len;
  267. /* Say not to copy any part of this buffer */
  268. return NULL;
  269. }
  270. void
  271. json_regexp_fin_cb (memory_pool_t * pool, struct map_cb_data *data)
  272. {
  273. struct regexp_json_buf *jb;
  274. guint nelts, i, j;
  275. json_t *js, *cur_elt, *cur_nm, *it_val;
  276. json_error_t je;
  277. gchar *cur_rule, *cur_symbol;
  278. double score;
  279. struct regexp_module_item *cur_item;
  280. GList *cur_networks = NULL;
  281. struct dynamic_map_item *cur_nitem;
  282. memory_pool_t *new_pool;
  283. if (data->prev_data) {
  284. jb = data->prev_data;
  285. /* Clean prev data */
  286. if (jb->buf) {
  287. g_free (jb->buf);
  288. }
  289. g_free (jb);
  290. }
  291. /* Now parse json */
  292. if (data->cur_data) {
  293. jb = data->cur_data;
  294. }
  295. else {
  296. msg_err ("no data read");
  297. return;
  298. }
  299. if (jb->buf == NULL) {
  300. msg_err ("no data read");
  301. return;
  302. }
  303. /* NULL terminate current buf */
  304. *jb->pos = '\0';
  305. js = json_loads (jb->buf, &je);
  306. if (!js) {
  307. msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line);
  308. return;
  309. }
  310. if (!json_is_array (js)) {
  311. json_decref (js);
  312. msg_err ("loaded json is not an array");
  313. return;
  314. }
  315. new_pool = memory_pool_new (memory_pool_get_size ());
  316. remove_dynamic_rules (jb->cfg->cache);
  317. if (regexp_module_ctx->dynamic_pool != NULL) {
  318. memory_pool_delete (regexp_module_ctx->dynamic_pool);
  319. }
  320. regexp_module_ctx->dynamic_pool = new_pool;
  321. nelts = json_array_size (js);
  322. for (i = 0; i < nelts; i++) {
  323. cur_networks = NULL;
  324. cur_rule = NULL;
  325. cur_elt = json_array_get (js, i);
  326. if (!cur_elt || !json_is_object (cur_elt)) {
  327. msg_err ("loaded json is not an object");
  328. continue;
  329. }
  330. /* Factor param */
  331. cur_nm = json_object_get (cur_elt, "factor");
  332. if (cur_nm == NULL || !json_is_number (cur_nm)) {
  333. msg_err ("factor is not a number or not exists, but is required");
  334. continue;
  335. }
  336. score = json_number_value (cur_nm);
  337. /* Symbol param */
  338. cur_nm = json_object_get (cur_elt, "symbol");
  339. if (cur_nm == NULL || !json_is_string (cur_nm)) {
  340. msg_err ("symbol is not a string or not exists, but is required");
  341. continue;
  342. }
  343. cur_symbol = memory_pool_strdup (new_pool, json_string_value (cur_nm));
  344. /* Enabled flag */
  345. cur_nm = json_object_get (cur_elt, "enabled");
  346. if (cur_nm != NULL && json_is_boolean (cur_nm)) {
  347. if (json_is_false (cur_nm)) {
  348. msg_info ("rule %s is disabled in json", cur_symbol);
  349. continue;
  350. }
  351. }
  352. /* Now check other settings */
  353. /* Rule */
  354. cur_nm = json_object_get (cur_elt, "rule");
  355. if (cur_nm != NULL && json_is_string (cur_nm)) {
  356. cur_rule = memory_pool_strdup (new_pool, json_string_value (cur_nm));
  357. }
  358. /* Networks array */
  359. cur_nm = json_object_get (cur_elt, "networks");
  360. if (cur_nm != NULL && json_is_array (cur_nm)) {
  361. for (j = 0; j < json_array_size (cur_nm); j++) {
  362. it_val = json_array_get (cur_nm, i);
  363. if (it_val && json_is_string (it_val)) {
  364. cur_nitem = memory_pool_alloc (new_pool, sizeof (struct dynamic_map_item));
  365. if (parse_regexp_ipmask (json_string_value (it_val), cur_nitem)) {
  366. cur_networks = g_list_prepend (cur_networks, cur_nitem);
  367. }
  368. }
  369. }
  370. }
  371. if (cur_rule) {
  372. /* Dynamic rule has rule option */
  373. cur_item = memory_pool_alloc0 (new_pool, sizeof (struct regexp_module_item));
  374. cur_item->symbol = cur_symbol;
  375. if (read_regexp_expression (new_pool, cur_item, cur_symbol, cur_rule, jb->cfg->raw_mode)) {
  376. register_dynamic_symbol (new_pool, &jb->cfg->cache, cur_symbol, score, process_regexp_item, cur_item, cur_networks);
  377. }
  378. else {
  379. msg_warn ("cannot parse dynamic rule");
  380. }
  381. }
  382. else {
  383. /* Just rule that is allways true (for whitelisting for example) */
  384. register_dynamic_symbol (new_pool, &jb->cfg->cache, cur_symbol, score, regexp_dynamic_insert_result, cur_symbol, cur_networks);
  385. }
  386. if (cur_networks) {
  387. g_list_free (cur_networks);
  388. }
  389. }
  390. json_decref (js);
  391. }
  392. /* Init function */
  393. gint
  394. regexp_module_init (struct config_file *cfg, struct module_ctx **ctx)
  395. {
  396. regexp_module_ctx = g_malloc (sizeof (struct regexp_ctx));
  397. regexp_module_ctx->filter = regexp_common_filter;
  398. regexp_module_ctx->regexp_pool = memory_pool_new (memory_pool_get_size ());
  399. regexp_module_ctx->dynamic_pool = NULL;
  400. regexp_module_ctx->autolearn_symbols = g_hash_table_new (g_str_hash, g_str_equal);
  401. regexp_module_ctx->workers = NULL;
  402. *ctx = (struct module_ctx *)regexp_module_ctx;
  403. register_expression_function ("regexp_match_number", rspamd_regexp_match_number, NULL);
  404. register_expression_function ("regexp_occurs_number", rspamd_regexp_occurs_number, NULL);
  405. register_expression_function ("raw_header_exists", rspamd_raw_header_exists, NULL);
  406. register_expression_function ("check_smtp_data", rspamd_check_smtp_data, NULL);
  407. (void)luaopen_regexp (cfg->lua_state);
  408. register_module_opt ("regexp", "dynamic_rules", MODULE_OPT_TYPE_STRING);
  409. register_module_opt ("regexp", "max_size", MODULE_OPT_TYPE_SIZE);
  410. register_module_opt ("regexp", "max_threads", MODULE_OPT_TYPE_SIZE);
  411. register_module_opt ("regexp", "/^\\S+$/", MODULE_OPT_TYPE_STRING);
  412. return 0;
  413. }
  414. /*
  415. * Parse string in format:
  416. * SYMBOL:statfile:weight
  417. */
  418. void
  419. parse_autolearn_param (const gchar *param, const gchar *value, struct config_file *cfg)
  420. {
  421. struct autolearn_data *d;
  422. gchar *p;
  423. p = memory_pool_strdup (regexp_module_ctx->regexp_pool, value);
  424. d = memory_pool_alloc (regexp_module_ctx->regexp_pool, sizeof (struct autolearn_data));
  425. d->symbol = strsep (&p, ":");
  426. if (d->symbol) {
  427. d->statfile_name = strsep (&p, ":");
  428. if (d->statfile_name) {
  429. if (p != NULL && *p != '\0') {
  430. d->weight = strtod (p, NULL);
  431. g_hash_table_insert (regexp_module_ctx->autolearn_symbols, d->symbol, d);
  432. }
  433. }
  434. else {
  435. msg_warn ("cannot extract statfile name from %s", p);
  436. }
  437. }
  438. else {
  439. msg_warn ("cannot extract symbol name from %s", p);
  440. }
  441. }
  442. gint
  443. regexp_module_config (struct config_file *cfg)
  444. {
  445. GList *cur_opt = NULL;
  446. struct module_opt *cur;
  447. struct regexp_module_item *cur_item;
  448. gchar *value;
  449. gint res = TRUE;
  450. struct regexp_json_buf *jb, **pjb;
  451. gsize thr;
  452. GError *err = NULL;
  453. if ((value = get_module_opt (cfg, "regexp", "statfile_prefix")) != NULL) {
  454. regexp_module_ctx->statfile_prefix = memory_pool_strdup (regexp_module_ctx->regexp_pool, value);
  455. }
  456. else {
  457. regexp_module_ctx->statfile_prefix = DEFAULT_STATFILE_PREFIX;
  458. }
  459. if ((value = get_module_opt (cfg, "regexp", "max_size")) != NULL) {
  460. regexp_module_ctx->max_size = parse_limit (value, -1);
  461. }
  462. else {
  463. regexp_module_ctx->max_size = 0;
  464. }
  465. if ((value = get_module_opt (cfg, "regexp", "max_threads")) != NULL) {
  466. if (g_thread_supported ()) {
  467. thr = parse_limit (value, -1);
  468. if (thr > 1) {
  469. #if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
  470. g_thread_init (NULL);
  471. workers_mtx = g_mutex_new ();
  472. #else
  473. workers_mtx = memory_pool_alloc (regexp_module_ctx->regexp_pool, sizeof (GMutex));
  474. g_mutex_init (workers_mtx);
  475. #endif
  476. regexp_module_ctx->workers = g_thread_pool_new (process_regexp_item_threaded, regexp_module_ctx, thr, TRUE, &err);
  477. if (err != NULL) {
  478. msg_err ("thread pool creation failed: %s", err->message);
  479. regexp_module_ctx->workers = NULL;
  480. }
  481. }
  482. }
  483. }
  484. else {
  485. regexp_module_ctx->workers = NULL;
  486. }
  487. if ((value = get_module_opt (cfg, "regexp", "dynamic_rules")) != NULL) {
  488. jb = g_malloc (sizeof (struct regexp_json_buf));
  489. pjb = g_malloc (sizeof (struct regexp_json_buf *));
  490. jb->buf = NULL;
  491. jb->cfg = cfg;
  492. *pjb = jb;
  493. if (!add_map (value, json_regexp_read_cb, json_regexp_fin_cb, (void **)pjb)) {
  494. msg_err ("cannot add map %s", value);
  495. }
  496. }
  497. cur_opt = g_hash_table_lookup (cfg->modules_opts, "regexp");
  498. while (cur_opt) {
  499. cur = cur_opt->data;
  500. /* Skip several options that are not regexp */
  501. if (g_ascii_strncasecmp (cur->param, "autolearn", sizeof ("autolearn") - 1) == 0) {
  502. parse_autolearn_param (cur->param, cur->value, cfg);
  503. cur_opt = g_list_next (cur_opt);
  504. continue;
  505. }
  506. else if (g_ascii_strncasecmp (cur->param, "dynamic_rules", sizeof ("dynamic_rules") - 1) == 0) {
  507. cur_opt = g_list_next (cur_opt);
  508. continue;
  509. }
  510. else if (g_ascii_strncasecmp (cur->param, "max_size", sizeof ("max_size") - 1) == 0) {
  511. cur_opt = g_list_next (cur_opt);
  512. continue;
  513. }
  514. else if (g_ascii_strncasecmp (cur->param, "max_threads", sizeof ("max_threads") - 1) == 0) {
  515. cur_opt = g_list_next (cur_opt);
  516. continue;
  517. }
  518. /* Handle regexps */
  519. cur_item = memory_pool_alloc0 (regexp_module_ctx->regexp_pool, sizeof (struct regexp_module_item));
  520. cur_item->symbol = cur->param;
  521. if (cur->is_lua && cur->lua_type == LUA_VAR_STRING) {
  522. if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->actual_data, cfg->raw_mode)) {
  523. res = FALSE;
  524. }
  525. }
  526. else if (cur->is_lua && cur->lua_type == LUA_VAR_FUNCTION) {
  527. cur_item->lua_function = cur->actual_data;
  528. }
  529. else if (! cur->is_lua) {
  530. if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->value, cfg->raw_mode)) {
  531. res = FALSE;
  532. }
  533. }
  534. else {
  535. msg_err ("unknown variable type for %s", cur->param);
  536. res = FALSE;
  537. }
  538. if ( !res) {
  539. /* Stop on errors */
  540. break;
  541. }
  542. register_symbol (&cfg->cache, cur->param, 1, process_regexp_item, cur_item);
  543. cur_opt = g_list_next (cur_opt);
  544. }
  545. return res;
  546. }
  547. gint
  548. regexp_module_reconfig (struct config_file *cfg)
  549. {
  550. memory_pool_delete (regexp_module_ctx->regexp_pool);
  551. regexp_module_ctx->regexp_pool = memory_pool_new (memory_pool_get_size ());
  552. return regexp_module_config (cfg);
  553. }
  554. struct url_regexp_param {
  555. struct worker_task *task;
  556. GRegex *regexp;
  557. struct rspamd_regexp *re;
  558. gboolean found;
  559. };
  560. static gboolean
  561. tree_url_callback (gpointer key, gpointer value, void *data)
  562. {
  563. struct url_regexp_param *param = data;
  564. struct uri *url = value;
  565. GError *err = NULL;
  566. if (g_regex_match_full (param->regexp, struri (url), -1, 0, 0, NULL, &err) == TRUE) {
  567. if (G_UNLIKELY (param->re->is_test)) {
  568. msg_info ("process test regexp %s for url %s returned TRUE", struri (url));
  569. }
  570. task_cache_add (param->task, param->re, 1);
  571. param->found = TRUE;
  572. return TRUE;
  573. }
  574. else if (G_UNLIKELY (param->re->is_test)) {
  575. msg_info ("process test regexp %s for url %s returned FALSE", struri (url));
  576. }
  577. if (err != NULL) {
  578. msg_info ("error occured while processing regexp \"%s\": %s", param->re->regexp_text, err->message);
  579. }
  580. return FALSE;
  581. }
  582. static gsize
  583. process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar *additional,
  584. gint limit, int_compare_func f)
  585. {
  586. guint8 *ct;
  587. gsize clen;
  588. gint r, passed = 0, start, end, old;
  589. gboolean matched = FALSE;
  590. GList *cur, *headerlist;
  591. GRegex *regexp;
  592. GMatchInfo *info;
  593. GError *err = NULL;
  594. struct url_regexp_param callback_param = {
  595. .task = task,
  596. .regexp = re->regexp,
  597. .re = re,
  598. .found = FALSE
  599. };
  600. struct mime_text_part *part;
  601. struct raw_header *rh;
  602. if (re == NULL) {
  603. msg_info ("invalid regexp passed");
  604. return 0;
  605. }
  606. if ((r = task_cache_check (task, re)) != -1) {
  607. debug_task ("regexp /%s/ is found in cache, result: %d", re->regexp_text, r);
  608. return r == 1;
  609. }
  610. if (additional != NULL) {
  611. /* We have additional parameter defined, so ignore type of regexp expression and use it for parsing */
  612. if (G_UNLIKELY (re->is_test)) {
  613. msg_info ("process test regexp %s with test %s", re->regexp_text, additional);
  614. }
  615. if (g_regex_match_full (re->regexp, additional, strlen (additional), 0, 0, NULL, NULL) == TRUE) {
  616. if (G_UNLIKELY (re->is_test)) {
  617. msg_info ("result of regexp %s is true", re->regexp_text);
  618. }
  619. task_cache_add (task, re, 1);
  620. return 1;
  621. }
  622. else {
  623. task_cache_add (task, re, 0);
  624. return 0;
  625. }
  626. }
  627. switch (re->type) {
  628. case REGEXP_NONE:
  629. msg_warn ("bad error detected: %s has invalid regexp type", re->regexp_text);
  630. return 0;
  631. case REGEXP_HEADER:
  632. /* Check header's name */
  633. if (re->header == NULL) {
  634. msg_info ("header regexp without header name: '%s'", re->regexp_text);
  635. task_cache_add (task, re, 0);
  636. return 0;
  637. }
  638. debug_task ("checking header regexp: %s = %s", re->header, re->regexp_text);
  639. /* Get list of specified headers */
  640. headerlist = message_get_header (task->task_pool, task->message, re->header, re->is_strong);
  641. if (headerlist == NULL) {
  642. /* Header is not found */
  643. if (G_UNLIKELY (re->is_test)) {
  644. msg_info ("process test regexp %s for header %s returned FALSE: no header found", re->regexp_text, re->header);
  645. }
  646. task_cache_add (task, re, 0);
  647. return 0;
  648. }
  649. else {
  650. memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, headerlist);
  651. /* Check whether we have regexp for it */
  652. if (re->regexp == NULL) {
  653. debug_task ("regexp contains only header and it is found %s", re->header);
  654. task_cache_add (task, re, 1);
  655. return 1;
  656. }
  657. /* Iterate throught headers */
  658. cur = headerlist;
  659. while (cur) {
  660. debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data);
  661. /* Try to match regexp */
  662. if (!re->is_raw) {
  663. /* Validate input */
  664. if (!cur->data || !g_utf8_validate (cur->data, -1, NULL)) {
  665. cur = g_list_next (cur);
  666. continue;
  667. }
  668. }
  669. if (cur->data && g_regex_match_full (re->regexp, cur->data, -1, 0, 0, NULL, &err) == TRUE) {
  670. if (G_UNLIKELY (re->is_test)) {
  671. msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data);
  672. }
  673. if (f != NULL && limit > 1) {
  674. /* If we have limit count, increase passed count and compare with limit */
  675. if (f (++passed, limit)) {
  676. task_cache_add (task, re, 1);
  677. return 1;
  678. }
  679. }
  680. else {
  681. task_cache_add (task, re, 1);
  682. return 1;
  683. }
  684. }
  685. else if (G_UNLIKELY (re->is_test)) {
  686. msg_info ("process test regexp %s for header %s with value '%s' returned FALSE", re->regexp_text, re->header, (const gchar *)cur->data);
  687. }
  688. if (err != NULL) {
  689. msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message);
  690. }
  691. cur = g_list_next (cur);
  692. }
  693. task_cache_add (task, re, 0);
  694. return 0;
  695. }
  696. break;
  697. case REGEXP_MIME:
  698. debug_task ("checking mime regexp: %s", re->regexp_text);
  699. /* Iterate throught text parts */
  700. cur = g_list_first (task->text_parts);
  701. while (cur) {
  702. part = (struct mime_text_part *)cur->data;
  703. /* Skip empty parts */
  704. if (part->is_empty) {
  705. cur = g_list_next (cur);
  706. continue;
  707. }
  708. /* Skip too large parts */
  709. if (regexp_module_ctx->max_size != 0 && part->content->len > regexp_module_ctx->max_size) {
  710. msg_info ("<%s> skip part of size %Hud", task->message_id, part->content->len);
  711. cur = g_list_next (cur);
  712. continue;
  713. }
  714. /* Check raw flags */
  715. if (part->is_raw) {
  716. regexp = re->raw_regexp;
  717. }
  718. else {
  719. /* This time there is no need to validate anything as conversion succeed only for valid characters */
  720. regexp = re->regexp;
  721. }
  722. /* Select data for regexp */
  723. if (re->is_raw) {
  724. ct = part->orig->data;
  725. clen = part->orig->len;
  726. }
  727. else {
  728. ct = part->content->data;
  729. clen = part->content->len;
  730. }
  731. /* If we have limit, apply regexp so much times as we can */
  732. if (f != NULL && limit > 1) {
  733. end = 0;
  734. while ((matched = g_regex_match_full (regexp, ct + end + 1, clen - end - 1, 0, 0, &info, &err)) == TRUE) {
  735. if (G_UNLIKELY (re->is_test)) {
  736. msg_info ("process test regexp %s for mime part of length %d returned TRUE",
  737. re->regexp_text,
  738. (gint)clen,
  739. end);
  740. }
  741. if (f (++passed, limit)) {
  742. task_cache_add (task, re, 1);
  743. return 1;
  744. }
  745. else {
  746. /* Match not found, skip further cycles */
  747. old = end;
  748. if (!g_match_info_fetch_pos (info, 0, &start, &end) || end <= 0) {
  749. break;
  750. }
  751. end += old;
  752. }
  753. g_match_info_free (info);
  754. }
  755. g_match_info_free (info);
  756. }
  757. else {
  758. if (g_regex_match_full (regexp, ct, clen, 0, 0, NULL, &err) == TRUE) {
  759. if (G_UNLIKELY (re->is_test)) {
  760. msg_info ("process test regexp %s for mime part of length %d returned TRUE", re->regexp_text,
  761. (gint)clen);
  762. }
  763. task_cache_add (task, re, 1);
  764. return 1;
  765. }
  766. }
  767. if (!matched && G_UNLIKELY (re->is_test)) {
  768. msg_info ("process test regexp %s for mime part of length %d returned FALSE", re->regexp_text,
  769. (gint)clen);
  770. }
  771. if (err != NULL) {
  772. msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message);
  773. }
  774. cur = g_list_next (cur);
  775. }
  776. task_cache_add (task, re, 0);
  777. return 0;
  778. case REGEXP_MESSAGE:
  779. debug_task ("checking message regexp: %s", re->regexp_text);
  780. regexp = re->raw_regexp;
  781. ct = task->msg->begin;
  782. clen = task->msg->len;
  783. if (regexp_module_ctx->max_size != 0 && clen > regexp_module_ctx->max_size) {
  784. msg_info ("<%s> skip message of size %Hz", task->message_id, clen);
  785. return 0;
  786. }
  787. /* If we have limit, apply regexp so much times as we can */
  788. if (f != NULL && limit > 1) {
  789. end = 0;
  790. while ((matched = g_regex_match_full (regexp, ct + end + 1, clen - end - 1, 0, 0, &info, &err)) == TRUE) {
  791. if (G_UNLIKELY (re->is_test)) {
  792. msg_info ("process test regexp %s for mime part of length %d returned TRUE", re->regexp_text,
  793. (gint)clen);
  794. }
  795. if (f (++passed, limit)) {
  796. task_cache_add (task, re, 1);
  797. return 1;
  798. }
  799. else {
  800. /* Match not found, skip further cycles */
  801. old = end;
  802. if (!g_match_info_fetch_pos (info, 0, &start, &end) || end <= 0) {
  803. break;
  804. }
  805. old += end;
  806. }
  807. g_match_info_free (info);
  808. }
  809. g_match_info_free (info);
  810. }
  811. else {
  812. if (g_regex_match_full (regexp, ct, clen, 0, 0, NULL, &err) == TRUE) {
  813. if (G_UNLIKELY (re->is_test)) {
  814. msg_info ("process test regexp %s for message part of length %d returned TRUE", re->regexp_text,
  815. (gint)clen);
  816. }
  817. task_cache_add (task, re, 1);
  818. return 1;
  819. }
  820. }
  821. if (!matched && G_UNLIKELY (re->is_test)) {
  822. msg_info ("process test regexp %s for message part of length %d returned FALSE", re->regexp_text,
  823. (gint)clen);
  824. }
  825. if (err != NULL) {
  826. msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message);
  827. }
  828. task_cache_add (task, re, 0);
  829. return 0;
  830. case REGEXP_URL:
  831. debug_task ("checking url regexp: %s", re->regexp_text);
  832. if (f != NULL && limit > 1) {
  833. /*XXX: add support of it */
  834. msg_warn ("numbered matches are not supported for url regexp");
  835. }
  836. regexp = re->regexp;
  837. callback_param.task = task;
  838. callback_param.regexp = regexp;
  839. callback_param.re = re;
  840. callback_param.found = FALSE;
  841. if (task->urls) {
  842. g_tree_foreach (task->urls, tree_url_callback, &callback_param);
  843. }
  844. if (task->emails && callback_param.found == FALSE) {
  845. g_tree_foreach (task->emails, tree_url_callback, &callback_param);
  846. }
  847. if (callback_param.found == FALSE) {
  848. task_cache_add (task, re, 0);
  849. }
  850. return 0;
  851. case REGEXP_RAW_HEADER:
  852. debug_task ("checking for raw header: %s with regexp: %s", re->header, re->regexp_text);
  853. /* Check header's name */
  854. if (re->header == NULL) {
  855. msg_info ("header regexp without header name: '%s'", re->regexp_text);
  856. task_cache_add (task, re, 0);
  857. return 0;
  858. }
  859. debug_task ("checking header regexp: %s = %s", re->header, re->regexp_text);
  860. /* Get list of specified headers */
  861. headerlist = message_get_raw_header (task, re->header, re->is_strong);
  862. if (headerlist == NULL) {
  863. /* Header is not found */
  864. if (G_UNLIKELY (re->is_test)) {
  865. msg_info ("process test regexp %s for header %s returned FALSE: no header found", re->regexp_text, re->header);
  866. }
  867. task_cache_add (task, re, 0);
  868. return 0;
  869. }
  870. else {
  871. /* Check whether we have regexp for it */
  872. if (re->regexp == NULL) {
  873. debug_task ("regexp contains only header and it is found %s", re->header);
  874. task_cache_add (task, re, 1);
  875. return 1;
  876. }
  877. /* Iterate throught headers */
  878. cur = headerlist;
  879. while (cur) {
  880. debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data);
  881. rh = cur->data;
  882. /* Try to match regexp */
  883. if (!re->is_raw) {
  884. /* Validate input */
  885. if (!rh->value || !g_utf8_validate (rh->value, -1, NULL)) {
  886. cur = g_list_next (cur);
  887. continue;
  888. }
  889. }
  890. if (rh->value && g_regex_match_full (re->regexp, rh->value, -1, 0, 0, NULL, &err) == TRUE) {
  891. if (G_UNLIKELY (re->is_test)) {
  892. msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data);
  893. }
  894. if (f != NULL && limit > 1) {
  895. /* If we have limit count, increase passed count and compare with limit */
  896. if (f (++passed, limit)) {
  897. task_cache_add (task, re, 1);
  898. return 1;
  899. }
  900. }
  901. else {
  902. task_cache_add (task, re, 1);
  903. return 1;
  904. }
  905. }
  906. else if (G_UNLIKELY (re->is_test)) {
  907. msg_info ("process test regexp %s for header %s with value '%s' returned FALSE", re->regexp_text, re->header, (const gchar *)cur->data);
  908. }
  909. if (err != NULL) {
  910. msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message);
  911. }
  912. cur = g_list_next (cur);
  913. }
  914. task_cache_add (task, re, 0);
  915. return 0;
  916. }
  917. break;
  918. default:
  919. msg_warn ("bad error detected: %p is not a valid regexp object", re);
  920. }
  921. /* Not reached */
  922. return 0;
  923. }
  924. static gboolean
  925. maybe_call_lua_function (const gchar *name, struct worker_task *task)
  926. {
  927. lua_State *L = task->cfg->lua_state;
  928. struct worker_task **ptask;
  929. gboolean res;
  930. lua_getglobal (L, name);
  931. if (lua_isfunction (L, -1)) {
  932. ptask = lua_newuserdata (L, sizeof (struct worker_task *));
  933. lua_setclass (L, "rspamd{task}", -1);
  934. *ptask = task;
  935. /* Call function */
  936. if (lua_pcall (L, 1, 1, 0) != 0) {
  937. msg_info ("call to %s failed: %s", (gchar *)name, lua_tostring (L, -1));
  938. return FALSE;
  939. }
  940. res = lua_toboolean (L, -1);
  941. lua_pop (L, 1);
  942. return res;
  943. }
  944. else {
  945. lua_pop (L, 1);
  946. }
  947. return FALSE;
  948. }
  949. static gboolean
  950. optimize_regexp_expression (struct expression **e, GQueue * stack, gboolean res)
  951. {
  952. struct expression *it = (*e)->next;
  953. gboolean ret = FALSE, is_nearest = TRUE;
  954. gint skip_level = 0;
  955. /* Skip nearest logical operators from optimization */
  956. if (!it || (it->type == EXPR_OPERATION && it->content.operation != '!')) {
  957. g_queue_push_head (stack, GSIZE_TO_POINTER (res));
  958. return ret;
  959. }
  960. while (it) {
  961. /* Find first operation for this iterator */
  962. if (it->type == EXPR_OPERATION) {
  963. /* If this operation is just ! just inverse res and check for further operators */
  964. if (it->content.operation == '!') {
  965. if (is_nearest) {
  966. msg_debug ("found '!' operator, inversing result");
  967. res = !res;
  968. *e = it;
  969. }
  970. it = it->next;
  971. continue;
  972. }
  973. else {
  974. skip_level--;
  975. }
  976. /* Check whether we found corresponding operator for this operand */
  977. if (skip_level <= 0) {
  978. if (it->content.operation == '|' && res == TRUE) {
  979. msg_debug ("found '|' and previous expression is true");
  980. *e = it;
  981. ret = TRUE;
  982. }
  983. else if (it->content.operation == '&' && res == FALSE) {
  984. msg_debug ("found '&' and previous expression is false");
  985. *e = it;
  986. ret = TRUE;
  987. }
  988. break;
  989. }
  990. }
  991. else {
  992. is_nearest = FALSE;
  993. skip_level++;
  994. }
  995. it = it->next;
  996. }
  997. g_queue_push_head (stack, GSIZE_TO_POINTER (res));
  998. return ret;
  999. }
  1000. static gboolean
  1001. process_regexp_expression (struct expression *expr, gchar *symbol, struct worker_task *task, const gchar *additional)
  1002. {
  1003. GQueue *stack;
  1004. gsize cur, op1, op2;
  1005. struct expression *it = expr;
  1006. struct rspamd_regexp *re;
  1007. gboolean try_optimize = TRUE;
  1008. stack = g_queue_new ();
  1009. while (it) {
  1010. if (it->type == EXPR_REGEXP_PARSED) {
  1011. /* Find corresponding symbol */
  1012. cur = process_regexp ((struct rspamd_regexp *)it->content.operand, task, additional, 0, NULL);
  1013. debug_task ("regexp %s found", cur ? "is" : "is not");
  1014. if (try_optimize) {
  1015. try_optimize = optimize_regexp_expression (&it, stack, cur);
  1016. }
  1017. else {
  1018. g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
  1019. }
  1020. }
  1021. else if (it->type == EXPR_FUNCTION) {
  1022. cur = (gsize) call_expression_function ((struct expression_function *)it->content.operand, task);
  1023. debug_task ("function %s returned %s", ((struct expression_function *)it->content.operand)->name, cur ? "true" : "false");
  1024. if (try_optimize) {
  1025. try_optimize = optimize_regexp_expression (&it, stack, cur);
  1026. }
  1027. else {
  1028. g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
  1029. }
  1030. }
  1031. else if (it->type == EXPR_STR) {
  1032. /* This may be lua function, try to call it */
  1033. cur = maybe_call_lua_function ((const gchar*)it->content.operand, task);
  1034. debug_task ("function %s returned %s", (const gchar *)it->content.operand, cur ? "true" : "false");
  1035. if (try_optimize) {
  1036. try_optimize = optimize_regexp_expression (&it, stack, cur);
  1037. }
  1038. else {
  1039. g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
  1040. }
  1041. }
  1042. else if (it->type == EXPR_REGEXP) {
  1043. /* Compile regexp if it is not parsed */
  1044. if (it->content.operand == NULL) {
  1045. it = it->next;
  1046. continue;
  1047. }
  1048. re = parse_regexp (task->cfg->cfg_pool, it->content.operand, task->cfg->raw_mode);
  1049. if (re == NULL) {
  1050. msg_warn ("cannot parse regexp, skip expression");
  1051. g_queue_free (stack);
  1052. return FALSE;
  1053. }
  1054. it->content.operand = re;
  1055. it->type = EXPR_REGEXP_PARSED;
  1056. /* Continue with this regexp once again */
  1057. continue;
  1058. }
  1059. else if (it->type == EXPR_OPERATION) {
  1060. if (g_queue_is_empty (stack)) {
  1061. /* Queue has no operands for operation, exiting */
  1062. msg_warn ("regexp expression seems to be invalid: empty stack while reading operation");
  1063. g_queue_free (stack);
  1064. return FALSE;
  1065. }
  1066. debug_task ("got operation %c", it->content.operation);
  1067. switch (it->content.operation) {
  1068. case '!':
  1069. op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
  1070. op1 = !op1;
  1071. try_optimize = optimize_regexp_expression (&it, stack, op1);
  1072. break;
  1073. case '&':
  1074. op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
  1075. op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
  1076. try_optimize = optimize_regexp_expression (&it, stack, op1 && op2);
  1077. break;
  1078. case '|':
  1079. op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
  1080. op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
  1081. try_optimize = optimize_regexp_expression (&it, stack, op1 || op2);
  1082. break;
  1083. default:
  1084. it = it->next;
  1085. continue;
  1086. }
  1087. }
  1088. if (it) {
  1089. it = it->next;
  1090. }
  1091. }
  1092. if (!g_queue_is_empty (stack)) {
  1093. op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
  1094. if (op1) {
  1095. g_queue_free (stack);
  1096. return TRUE;
  1097. }
  1098. }
  1099. else {
  1100. msg_warn ("regexp expression seems to be invalid: empty stack at the end of expression, symbol %s", symbol);
  1101. }
  1102. g_queue_free (stack);
  1103. return FALSE;
  1104. }
  1105. struct regexp_threaded_ud {
  1106. struct regexp_module_item *item;
  1107. struct worker_task *task;
  1108. };
  1109. static void
  1110. process_regexp_item_threaded (gpointer data, gpointer user_data)
  1111. {
  1112. struct regexp_threaded_ud *ud = user_data;
  1113. gboolean res = FALSE;
  1114. if (ud->item->lua_function) {
  1115. /* Just call function */
  1116. if (lua_call_expression_func ("regexp", ud->item->lua_function, ud->task, NULL, &res) && res) {
  1117. g_mutex_lock (workers_mtx);
  1118. insert_result (ud->task, ud->item->symbol, 1, NULL);
  1119. g_mutex_unlock (workers_mtx);
  1120. }
  1121. }
  1122. else {
  1123. /* Process expression */
  1124. if (process_regexp_expression (ud->item->expr, ud->item->symbol, ud->task, NULL)) {
  1125. g_mutex_lock (workers_mtx);
  1126. insert_result (ud->task, ud->item->symbol, 1, NULL);
  1127. g_mutex_unlock (workers_mtx);
  1128. }
  1129. }
  1130. }
  1131. static void
  1132. process_regexp_item (struct worker_task *task, void *user_data)
  1133. {
  1134. struct regexp_module_item *item = user_data;
  1135. gboolean res = FALSE;
  1136. struct regexp_threaded_ud *thr_ud;
  1137. if (regexp_module_ctx->workers) {
  1138. thr_ud = memory_pool_alloc (task->task_pool, sizeof (struct regexp_threaded_ud));
  1139. thr_ud->item = item;
  1140. thr_ud->task = task;
  1141. g_thread_pool_push (regexp_module_ctx->workers, thr_ud, NULL);
  1142. }
  1143. else {
  1144. /* Non-threaded version */
  1145. if (item->lua_function) {
  1146. /* Just call function */
  1147. if (lua_call_expression_func ("regexp", item->lua_function, task, NULL, &res) && res) {
  1148. insert_result (task, item->symbol, 1, NULL);
  1149. }
  1150. }
  1151. else {
  1152. /* Process expression */
  1153. if (process_regexp_expression (item->expr, item->symbol, task, NULL)) {
  1154. insert_result (task, item->symbol, 1, NULL);
  1155. }
  1156. }
  1157. }
  1158. }
  1159. static gint
  1160. regexp_common_filter (struct worker_task *task)
  1161. {
  1162. /* XXX: remove this shit too */
  1163. return 0;
  1164. }
  1165. static gboolean
  1166. rspamd_regexp_match_number (struct worker_task *task, GList * args, void *unused)
  1167. {
  1168. gint param_count, res = 0;
  1169. struct expression_argument *arg;
  1170. GList *cur;
  1171. if (args == NULL) {
  1172. msg_warn ("no parameters to function");
  1173. return FALSE;
  1174. }
  1175. arg = get_function_arg (args->data, task, TRUE);
  1176. param_count = strtoul (arg->data, NULL, 10);
  1177. cur = args->next;
  1178. while (cur) {
  1179. arg = get_function_arg (cur->data, task, FALSE);
  1180. if (arg && arg->type == EXPRESSION_ARGUMENT_BOOL) {
  1181. if ((gboolean) GPOINTER_TO_SIZE (arg->data)) {
  1182. res++;
  1183. }
  1184. }
  1185. else {
  1186. if (process_regexp_expression (cur->data, "regexp_match_number", task, NULL)) {
  1187. res++;
  1188. }
  1189. if (res >= param_count) {
  1190. return TRUE;
  1191. }
  1192. }
  1193. cur = g_list_next (cur);
  1194. }
  1195. return res >= param_count;
  1196. }
  1197. static gboolean
  1198. rspamd_regexp_occurs_number (struct worker_task *task, GList * args, void *unused)
  1199. {
  1200. gint limit;
  1201. struct expression_argument *arg;
  1202. struct rspamd_regexp *re;
  1203. gchar *param, *err_str, op;
  1204. int_compare_func f = NULL;
  1205. if (args == NULL || args->next == NULL) {
  1206. msg_warn ("wrong number of parameters to function, must be 2");
  1207. return FALSE;
  1208. }
  1209. arg = get_function_arg (args->data, task, TRUE);
  1210. if ((re = re_cache_check (arg->data, task->cfg->cfg_pool)) == NULL) {
  1211. re = parse_regexp (task->cfg->cfg_pool, arg->data, task->cfg->raw_mode);
  1212. if (!re) {
  1213. msg_err ("cannot parse given regexp: %s", (gchar *)arg->data);
  1214. return FALSE;
  1215. }
  1216. }
  1217. arg = get_function_arg (args->next->data, task, TRUE);
  1218. param = arg->data;
  1219. op = *param;
  1220. if (g_ascii_isdigit (op)) {
  1221. op = '=';
  1222. }
  1223. else {
  1224. param ++;
  1225. }
  1226. switch (op) {
  1227. case '>':
  1228. if (*param == '=') {
  1229. f = op_more_equal;
  1230. param ++;
  1231. }
  1232. else {
  1233. f = op_more;
  1234. }
  1235. break;
  1236. case '<':
  1237. if (*param == '=') {
  1238. f = op_less_equal;
  1239. param ++;
  1240. }
  1241. else {
  1242. f = op_less;
  1243. }
  1244. break;
  1245. case '=':
  1246. f = op_equal;
  1247. break;
  1248. default:
  1249. msg_err ("wrong operation character: %c, assumed '=', '>', '<', '>=', '<=' or empty op", op);
  1250. return FALSE;
  1251. }
  1252. limit = strtoul (param, &err_str, 10);
  1253. if (*err_str != 0) {
  1254. msg_err ("wrong numeric: %s at position: %s", param, err_str);
  1255. return FALSE;
  1256. }
  1257. return process_regexp (re, task, NULL, limit, f);
  1258. }
  1259. static gboolean
  1260. rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused)
  1261. {
  1262. struct expression_argument *arg;
  1263. if (args == NULL || task == NULL) {
  1264. return FALSE;
  1265. }
  1266. arg = get_function_arg (args->data, task, TRUE);
  1267. if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) {
  1268. msg_warn ("invalid argument to function is passed");
  1269. return FALSE;
  1270. }
  1271. return g_hash_table_lookup (task->raw_headers, arg->data) != NULL;
  1272. }
  1273. static gboolean
  1274. match_smtp_data (struct worker_task *task, const gchar *re_text, const gchar *what)
  1275. {
  1276. struct rspamd_regexp *re;
  1277. gint r;
  1278. if (*re_text == '/') {
  1279. /* This is a regexp */
  1280. if ((re = re_cache_check (re_text, task->cfg->cfg_pool)) == NULL) {
  1281. re = parse_regexp (task->cfg->cfg_pool, (gchar *)re_text, task->cfg->raw_mode);
  1282. if (re == NULL) {
  1283. msg_warn ("cannot compile regexp for function");
  1284. return FALSE;
  1285. }
  1286. re_cache_add ((gchar *)re_text, re, task->cfg->cfg_pool);
  1287. }
  1288. if ((r = task_cache_check (task, re)) == -1) {
  1289. if (g_regex_match (re->regexp, what, 0, NULL) == TRUE) {
  1290. task_cache_add (task, re, 1);
  1291. return TRUE;
  1292. }
  1293. task_cache_add (task, re, 0);
  1294. }
  1295. else {
  1296. return r == 1;
  1297. }
  1298. }
  1299. else if (g_ascii_strcasecmp (re_text, what) == 0) {
  1300. return TRUE;
  1301. }
  1302. return FALSE;
  1303. }
  1304. static gboolean
  1305. rspamd_check_smtp_data (struct worker_task *task, GList * args, void *unused)
  1306. {
  1307. struct expression_argument *arg;
  1308. GList *cur, *rcpt_list = NULL;
  1309. gchar *type, *what = NULL;
  1310. if (args == NULL) {
  1311. msg_warn ("no parameters to function");
  1312. return FALSE;
  1313. }
  1314. arg = get_function_arg (args->data, task, TRUE);
  1315. if (!arg || !arg->data) {
  1316. msg_warn ("no parameters to function");
  1317. return FALSE;
  1318. }
  1319. else {
  1320. type = arg->data;
  1321. switch (*type) {
  1322. case 'f':
  1323. case 'F':
  1324. if (g_ascii_strcasecmp (type, "from") == 0) {
  1325. what = task->from;
  1326. }
  1327. else {
  1328. msg_warn ("bad argument to function: %s", type);
  1329. return FALSE;
  1330. }
  1331. break;
  1332. case 'h':
  1333. case 'H':
  1334. if (g_ascii_strcasecmp (type, "helo") == 0) {
  1335. what = task->helo;
  1336. }
  1337. else {
  1338. msg_warn ("bad argument to function: %s", type);
  1339. return FALSE;
  1340. }
  1341. break;
  1342. case 'u':
  1343. case 'U':
  1344. if (g_ascii_strcasecmp (type, "user") == 0) {
  1345. what = task->user;
  1346. }
  1347. else {
  1348. msg_warn ("bad argument to function: %s", type);
  1349. return FALSE;
  1350. }
  1351. break;
  1352. case 's':
  1353. case 'S':
  1354. if (g_ascii_strcasecmp (type, "subject") == 0) {
  1355. what = task->subject;
  1356. }
  1357. else {
  1358. msg_warn ("bad argument to function: %s", type);
  1359. return FALSE;
  1360. }
  1361. break;
  1362. case 'r':
  1363. case 'R':
  1364. if (g_ascii_strcasecmp (type, "rcpt") == 0) {
  1365. rcpt_list = task->rcpt;
  1366. }
  1367. else {
  1368. msg_warn ("bad argument to function: %s", type);
  1369. return FALSE;
  1370. }
  1371. break;
  1372. default:
  1373. msg_warn ("bad argument to function: %s", type);
  1374. return FALSE;
  1375. }
  1376. }
  1377. if (what == NULL && rcpt_list == NULL) {
  1378. /* Not enough data so regexp would NOT be found anyway */
  1379. return FALSE;
  1380. }
  1381. /* We would process only one more argument, others are ignored */
  1382. cur = args->next;
  1383. if (cur) {
  1384. arg = get_function_arg (cur->data, task, FALSE);
  1385. if (arg && arg->type == EXPRESSION_ARGUMENT_NORMAL) {
  1386. if (what != NULL) {
  1387. return match_smtp_data (task, arg->data, what);
  1388. }
  1389. else {
  1390. while (rcpt_list) {
  1391. if (match_smtp_data (task, arg->data, rcpt_list->data)) {
  1392. return TRUE;
  1393. }
  1394. rcpt_list = g_list_next (rcpt_list);
  1395. }
  1396. }
  1397. }
  1398. else if (arg != NULL) {
  1399. if (what != NULL) {
  1400. if (process_regexp_expression (arg->data, "regexp_check_smtp_data", task, what)) {
  1401. return TRUE;
  1402. }
  1403. }
  1404. else {
  1405. while (rcpt_list) {
  1406. if (process_regexp_expression (arg->data, "regexp_check_smtp_data", task, rcpt_list->data)) {
  1407. return TRUE;
  1408. }
  1409. rcpt_list = g_list_next (rcpt_list);
  1410. }
  1411. }
  1412. }
  1413. }
  1414. return FALSE;
  1415. }
  1416. /* Lua part */
  1417. static gint
  1418. lua_regexp_match (lua_State *L)
  1419. {
  1420. void *ud = luaL_checkudata (L, 1, "rspamd{task}");
  1421. struct worker_task *task;
  1422. const gchar *re_text;
  1423. struct rspamd_regexp *re;
  1424. gint r = 0;
  1425. luaL_argcheck (L, ud != NULL, 1, "'task' expected");
  1426. task = ud ? *((struct worker_task **)ud) : NULL;
  1427. re_text = luaL_checkstring (L, 2);
  1428. /* This is a regexp */
  1429. if (task != NULL) {
  1430. if ((re = re_cache_check (re_text, task->cfg->cfg_pool)) == NULL) {
  1431. re = parse_regexp (task->cfg->cfg_pool, (gchar *)re_text, task->cfg->raw_mode);
  1432. if (re == NULL) {
  1433. msg_warn ("cannot compile regexp for function");
  1434. return FALSE;
  1435. }
  1436. re_cache_add ((gchar *)re_text, re, task->cfg->cfg_pool);
  1437. }
  1438. r = process_regexp (re, task, NULL, 0, NULL);
  1439. }
  1440. lua_pushboolean (L, r == 1);
  1441. return 1;
  1442. }