You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

regexp.c 41KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503
  1. /*
  2. * Copyright (c) 2009, Rambler media
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. *
  13. * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY
  14. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  15. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  16. * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
  17. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  18. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  19. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  20. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  21. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  22. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. */
  24. /***MODULE:regexp
  25. * rspamd module that implements different regexp rules
  26. */
  27. #include "config.h"
  28. #include "main.h"
  29. #include "message.h"
  30. #include "cfg_file.h"
  31. #include "map.h"
  32. #include "util.h"
  33. #include "expressions.h"
  34. #include "view.h"
  35. #include "lua/lua_common.h"
  36. #include "json/jansson.h"
  37. #include "cfg_xml.h"
  38. #define DEFAULT_STATFILE_PREFIX "./"
  39. struct regexp_module_item {
  40. struct expression *expr;
  41. gchar *symbol;
  42. guint32 avg_time;
  43. gpointer lua_function;
  44. };
  45. struct autolearn_data {
  46. gchar *statfile_name;
  47. gchar *symbol;
  48. float weight;
  49. };
  50. struct regexp_ctx {
  51. gint (*filter) (struct worker_task * task);
  52. GHashTable *autolearn_symbols;
  53. gchar *statfile_prefix;
  54. memory_pool_t *regexp_pool;
  55. memory_pool_t *dynamic_pool;
  56. gsize max_size;
  57. };
  58. struct regexp_json_buf {
  59. gchar *buf;
  60. gchar *pos;
  61. size_t buflen;
  62. struct config_file *cfg;
  63. };
  64. /* Lua regexp module for checking rspamd regexps */
  65. LUA_FUNCTION_DEF (regexp, match);
  66. static const struct luaL_reg regexplib_m[] = {
  67. LUA_INTERFACE_DEF (regexp, match),
  68. {"__tostring", lua_class_tostring},
  69. {NULL, NULL}
  70. };
  71. static struct regexp_ctx *regexp_module_ctx = NULL;
  72. static gint regexp_common_filter (struct worker_task *task);
  73. static gboolean rspamd_regexp_match_number (struct worker_task *task, GList * args, void *unused);
  74. static gboolean rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused);
  75. static gboolean rspamd_check_smtp_data (struct worker_task *task, GList * args, void *unused);
  76. static gboolean rspamd_regexp_occurs_number (struct worker_task *task, GList * args, void *unused);
  77. static void process_regexp_item (struct worker_task *task, void *user_data);
  78. /* Initialization */
  79. gint regexp_module_init (struct config_file *cfg, struct module_ctx **ctx);
  80. gint regexp_module_config (struct config_file *cfg);
  81. gint regexp_module_reconfig (struct config_file *cfg);
  82. module_t regexp_module = {
  83. "regexp",
  84. regexp_module_init,
  85. regexp_module_config,
  86. regexp_module_reconfig
  87. };
  88. static gint
  89. luaopen_regexp (lua_State * L)
  90. {
  91. luaL_openlib (L, "rspamd_regexp", regexplib_m, 0);
  92. return 1;
  93. }
  94. static void
  95. regexp_dynamic_insert_result (struct worker_task *task, void *user_data)
  96. {
  97. gchar *symbol = user_data;
  98. insert_result (task, symbol, 1, NULL);
  99. }
  100. /*
  101. * Utility functions for matching exact number of regexps
  102. */
  103. typedef gboolean (*int_compare_func) (gint a, gint b);
  104. static gboolean
  105. op_equal (gint a, gint b)
  106. {
  107. return a == b;
  108. }
  109. static gboolean
  110. op_more (gint a, gint b)
  111. {
  112. return a > b;
  113. }
  114. static gboolean
  115. op_less (gint a, gint b)
  116. {
  117. return a < b;
  118. }
  119. static gboolean
  120. op_more_equal (gint a, gint b)
  121. {
  122. return a >= b;
  123. }
  124. static gboolean
  125. op_less_equal (gint a, gint b)
  126. {
  127. return a <= b;
  128. }
  129. /*
  130. * Process ip and mask of dynamic regexp
  131. */
  132. static gboolean
  133. parse_regexp_ipmask (const gchar *begin, struct dynamic_map_item *addr)
  134. {
  135. const gchar *pos;
  136. gchar ip_buf[sizeof ("255.255.255.255")], mask_buf[3], *p;
  137. gint state = 0, dots = 0;
  138. bzero (ip_buf, sizeof (ip_buf));
  139. bzero (mask_buf, sizeof (mask_buf));
  140. pos = begin;
  141. p = ip_buf;
  142. if (*pos == '!') {
  143. addr->negative = TRUE;
  144. pos ++;
  145. }
  146. else {
  147. addr->negative = FALSE;
  148. }
  149. while (*pos) {
  150. switch (state) {
  151. case 0:
  152. state = 1;
  153. p = ip_buf;
  154. dots = 0;
  155. break;
  156. case 1:
  157. /* Begin parse ip */
  158. if (p - ip_buf >= (gint)sizeof (ip_buf) || dots > 3) {
  159. return FALSE;
  160. }
  161. if (g_ascii_isdigit (*pos)) {
  162. *p ++ = *pos ++;
  163. }
  164. else if (*pos == '.') {
  165. *p ++ = *pos ++;
  166. dots ++;
  167. }
  168. else if (*pos == '/') {
  169. pos ++;
  170. p = mask_buf;
  171. state = 2;
  172. }
  173. else {
  174. /* Invalid character */
  175. return FALSE;
  176. }
  177. break;
  178. case 2:
  179. /* Parse mask */
  180. if (p - mask_buf > 2) {
  181. return FALSE;
  182. }
  183. if (g_ascii_isdigit (*pos)) {
  184. *p ++ = *pos ++;
  185. }
  186. else {
  187. return FALSE;
  188. }
  189. break;
  190. }
  191. }
  192. if (!inet_aton (ip_buf, &addr->addr)) {
  193. return FALSE;
  194. }
  195. if (state == 2) {
  196. /* Also parse mask */
  197. addr->mask = (mask_buf[0] - '0') * 10 + mask_buf[1] - '0';
  198. if (addr->mask > 32) {
  199. msg_info ("bad ipmask value: '%s'", begin);
  200. return FALSE;
  201. }
  202. }
  203. else {
  204. addr->mask = 32;
  205. }
  206. return TRUE;
  207. }
  208. /* Process regexp expression */
  209. static gboolean
  210. read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain, gchar *symbol, gchar *line, gboolean raw_mode)
  211. {
  212. struct expression *e, *cur;
  213. e = parse_expression (pool, line);
  214. if (e == NULL) {
  215. msg_warn ("%s = \"%s\" is invalid regexp expression", symbol, line);
  216. return FALSE;
  217. }
  218. chain->expr = e;
  219. cur = e;
  220. while (cur) {
  221. if (cur->type == EXPR_REGEXP) {
  222. cur->content.operand = parse_regexp (pool, cur->content.operand, raw_mode);
  223. if (cur->content.operand == NULL) {
  224. msg_warn ("cannot parse regexp, skip expression %s = \"%s\"", symbol, line);
  225. return FALSE;
  226. }
  227. cur->type = EXPR_REGEXP_PARSED;
  228. }
  229. cur = cur->next;
  230. }
  231. return TRUE;
  232. }
  233. /* Callbacks for reading json dynamic rules */
  234. gchar *
  235. json_regexp_read_cb (memory_pool_t * pool, gchar * chunk, gint len, struct map_cb_data *data)
  236. {
  237. struct regexp_json_buf *jb;
  238. gint free, off;
  239. if (data->cur_data == NULL) {
  240. jb = g_malloc (sizeof (struct regexp_json_buf));
  241. jb->cfg = ((struct regexp_json_buf *)data->prev_data)->cfg;
  242. jb->buf = NULL;
  243. jb->pos = NULL;
  244. data->cur_data = jb;
  245. }
  246. else {
  247. jb = data->cur_data;
  248. }
  249. if (jb->buf == NULL) {
  250. /* Allocate memory for buffer */
  251. jb->buflen = len * 2;
  252. jb->buf = g_malloc (jb->buflen);
  253. jb->pos = jb->buf;
  254. }
  255. off = jb->pos - jb->buf;
  256. free = jb->buflen - off;
  257. if (free < len) {
  258. jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2);
  259. jb->buf = g_realloc (jb->buf, jb->buflen);
  260. jb->pos = jb->buf + off;
  261. }
  262. memcpy (jb->pos, chunk, len);
  263. jb->pos += len;
  264. /* Say not to copy any part of this buffer */
  265. return NULL;
  266. }
  267. void
  268. json_regexp_fin_cb (memory_pool_t * pool, struct map_cb_data *data)
  269. {
  270. struct regexp_json_buf *jb;
  271. guint nelts, i, j;
  272. json_t *js, *cur_elt, *cur_nm, *it_val;
  273. json_error_t je;
  274. gchar *cur_rule, *cur_symbol;
  275. double score;
  276. struct regexp_module_item *cur_item;
  277. GList *cur_networks = NULL;
  278. struct dynamic_map_item *cur_nitem;
  279. memory_pool_t *new_pool;
  280. if (data->prev_data) {
  281. jb = data->prev_data;
  282. /* Clean prev data */
  283. if (jb->buf) {
  284. g_free (jb->buf);
  285. }
  286. g_free (jb);
  287. }
  288. /* Now parse json */
  289. if (data->cur_data) {
  290. jb = data->cur_data;
  291. }
  292. else {
  293. msg_err ("no data read");
  294. return;
  295. }
  296. if (jb->buf == NULL) {
  297. msg_err ("no data read");
  298. return;
  299. }
  300. /* NULL terminate current buf */
  301. *jb->pos = '\0';
  302. js = json_loads (jb->buf, &je);
  303. if (!js) {
  304. msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line);
  305. return;
  306. }
  307. if (!json_is_array (js)) {
  308. json_decref (js);
  309. msg_err ("loaded json is not an array");
  310. return;
  311. }
  312. new_pool = memory_pool_new (memory_pool_get_size ());
  313. remove_dynamic_rules (jb->cfg->cache);
  314. if (regexp_module_ctx->dynamic_pool != NULL) {
  315. memory_pool_delete (regexp_module_ctx->dynamic_pool);
  316. }
  317. regexp_module_ctx->dynamic_pool = new_pool;
  318. nelts = json_array_size (js);
  319. for (i = 0; i < nelts; i++) {
  320. cur_networks = NULL;
  321. cur_rule = NULL;
  322. cur_elt = json_array_get (js, i);
  323. if (!cur_elt || !json_is_object (cur_elt)) {
  324. msg_err ("loaded json is not an object");
  325. continue;
  326. }
  327. /* Factor param */
  328. cur_nm = json_object_get (cur_elt, "factor");
  329. if (cur_nm == NULL || !json_is_number (cur_nm)) {
  330. msg_err ("factor is not a number or not exists, but is required");
  331. continue;
  332. }
  333. score = json_number_value (cur_nm);
  334. /* Symbol param */
  335. cur_nm = json_object_get (cur_elt, "symbol");
  336. if (cur_nm == NULL || !json_is_string (cur_nm)) {
  337. msg_err ("symbol is not a string or not exists, but is required");
  338. continue;
  339. }
  340. cur_symbol = memory_pool_strdup (new_pool, json_string_value (cur_nm));
  341. /* Enabled flag */
  342. cur_nm = json_object_get (cur_elt, "enabled");
  343. if (cur_nm != NULL && json_is_boolean (cur_nm)) {
  344. if (json_is_false (cur_nm)) {
  345. msg_info ("rule %s is disabled in json", cur_symbol);
  346. continue;
  347. }
  348. }
  349. /* Now check other settings */
  350. /* Rule */
  351. cur_nm = json_object_get (cur_elt, "rule");
  352. if (cur_nm != NULL && json_is_string (cur_nm)) {
  353. cur_rule = memory_pool_strdup (new_pool, json_string_value (cur_nm));
  354. }
  355. /* Networks array */
  356. cur_nm = json_object_get (cur_elt, "networks");
  357. if (cur_nm != NULL && json_is_array (cur_nm)) {
  358. for (j = 0; j < json_array_size (cur_nm); j++) {
  359. it_val = json_array_get (cur_nm, i);
  360. if (it_val && json_is_string (it_val)) {
  361. cur_nitem = memory_pool_alloc (new_pool, sizeof (struct dynamic_map_item));
  362. if (parse_regexp_ipmask (json_string_value (it_val), cur_nitem)) {
  363. cur_networks = g_list_prepend (cur_networks, cur_nitem);
  364. }
  365. }
  366. }
  367. }
  368. if (cur_rule) {
  369. /* Dynamic rule has rule option */
  370. cur_item = memory_pool_alloc0 (new_pool, sizeof (struct regexp_module_item));
  371. cur_item->symbol = cur_symbol;
  372. if (read_regexp_expression (new_pool, cur_item, cur_symbol, cur_rule, jb->cfg->raw_mode)) {
  373. register_dynamic_symbol (new_pool, &jb->cfg->cache, cur_symbol, score, process_regexp_item, cur_item, cur_networks);
  374. }
  375. else {
  376. msg_warn ("cannot parse dynamic rule");
  377. }
  378. }
  379. else {
  380. /* Just rule that is allways true (for whitelisting for example) */
  381. register_dynamic_symbol (new_pool, &jb->cfg->cache, cur_symbol, score, regexp_dynamic_insert_result, cur_symbol, cur_networks);
  382. }
  383. if (cur_networks) {
  384. g_list_free (cur_networks);
  385. }
  386. }
  387. json_decref (js);
  388. }
  389. /* Init function */
  390. gint
  391. regexp_module_init (struct config_file *cfg, struct module_ctx **ctx)
  392. {
  393. regexp_module_ctx = g_malloc (sizeof (struct regexp_ctx));
  394. regexp_module_ctx->filter = regexp_common_filter;
  395. regexp_module_ctx->regexp_pool = memory_pool_new (memory_pool_get_size ());
  396. regexp_module_ctx->dynamic_pool = NULL;
  397. regexp_module_ctx->autolearn_symbols = g_hash_table_new (g_str_hash, g_str_equal);
  398. *ctx = (struct module_ctx *)regexp_module_ctx;
  399. register_expression_function ("regexp_match_number", rspamd_regexp_match_number, NULL);
  400. register_expression_function ("regexp_occurs_number", rspamd_regexp_occurs_number, NULL);
  401. register_expression_function ("raw_header_exists", rspamd_raw_header_exists, NULL);
  402. register_expression_function ("check_smtp_data", rspamd_check_smtp_data, NULL);
  403. (void)luaopen_regexp (cfg->lua_state);
  404. register_module_opt ("regexp", "dynamic_rules", MODULE_OPT_TYPE_STRING);
  405. register_module_opt ("regexp", "max_size", MODULE_OPT_TYPE_SIZE);
  406. register_module_opt ("regexp", "/^\\S+$/", MODULE_OPT_TYPE_STRING);
  407. return 0;
  408. }
  409. /*
  410. * Parse string in format:
  411. * SYMBOL:statfile:weight
  412. */
  413. void
  414. parse_autolearn_param (const gchar *param, const gchar *value, struct config_file *cfg)
  415. {
  416. struct autolearn_data *d;
  417. gchar *p;
  418. p = memory_pool_strdup (regexp_module_ctx->regexp_pool, value);
  419. d = memory_pool_alloc (regexp_module_ctx->regexp_pool, sizeof (struct autolearn_data));
  420. d->symbol = strsep (&p, ":");
  421. if (d->symbol) {
  422. d->statfile_name = strsep (&p, ":");
  423. if (d->statfile_name) {
  424. if (p != NULL && *p != '\0') {
  425. d->weight = strtod (p, NULL);
  426. g_hash_table_insert (regexp_module_ctx->autolearn_symbols, d->symbol, d);
  427. }
  428. }
  429. else {
  430. msg_warn ("cannot extract statfile name from %s", p);
  431. }
  432. }
  433. else {
  434. msg_warn ("cannot extract symbol name from %s", p);
  435. }
  436. }
  437. gint
  438. regexp_module_config (struct config_file *cfg)
  439. {
  440. GList *cur_opt = NULL;
  441. struct module_opt *cur;
  442. struct regexp_module_item *cur_item;
  443. gchar *value;
  444. gint res = TRUE;
  445. struct regexp_json_buf *jb, **pjb;
  446. if ((value = get_module_opt (cfg, "regexp", "statfile_prefix")) != NULL) {
  447. regexp_module_ctx->statfile_prefix = memory_pool_strdup (regexp_module_ctx->regexp_pool, value);
  448. }
  449. else {
  450. regexp_module_ctx->statfile_prefix = DEFAULT_STATFILE_PREFIX;
  451. }
  452. if ((value = get_module_opt (cfg, "regexp", "max_size")) != NULL) {
  453. regexp_module_ctx->max_size = parse_limit (value, -1);
  454. }
  455. else {
  456. regexp_module_ctx->max_size = 0;
  457. }
  458. if ((value = get_module_opt (cfg, "regexp", "dynamic_rules")) != NULL) {
  459. jb = g_malloc (sizeof (struct regexp_json_buf));
  460. pjb = g_malloc (sizeof (struct regexp_json_buf *));
  461. jb->buf = NULL;
  462. jb->cfg = cfg;
  463. *pjb = jb;
  464. if (!add_map (value, json_regexp_read_cb, json_regexp_fin_cb, (void **)pjb)) {
  465. msg_err ("cannot add map %s", value);
  466. }
  467. }
  468. cur_opt = g_hash_table_lookup (cfg->modules_opts, "regexp");
  469. while (cur_opt) {
  470. cur = cur_opt->data;
  471. if (strcmp (cur->param, "metric") == 0 || strcmp (cur->param, "statfile_prefix") == 0) {
  472. cur_opt = g_list_next (cur_opt);
  473. continue;
  474. }
  475. else if (g_ascii_strncasecmp (cur->param, "autolearn", sizeof ("autolearn") - 1) == 0) {
  476. parse_autolearn_param (cur->param, cur->value, cfg);
  477. cur_opt = g_list_next (cur_opt);
  478. continue;
  479. }
  480. else if (g_ascii_strncasecmp (cur->param, "dynamic_rules", sizeof ("dynamic_rules") - 1) == 0) {
  481. cur_opt = g_list_next (cur_opt);
  482. continue;
  483. }
  484. else if (g_ascii_strncasecmp (cur->param, "max_size", sizeof ("max_size") - 1) == 0) {
  485. cur_opt = g_list_next (cur_opt);
  486. continue;
  487. }
  488. cur_item = memory_pool_alloc0 (regexp_module_ctx->regexp_pool, sizeof (struct regexp_module_item));
  489. cur_item->symbol = cur->param;
  490. if (cur->is_lua && cur->lua_type == LUA_VAR_STRING) {
  491. if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->actual_data, cfg->raw_mode)) {
  492. res = FALSE;
  493. }
  494. }
  495. else if (cur->is_lua && cur->lua_type == LUA_VAR_FUNCTION) {
  496. cur_item->lua_function = cur->actual_data;
  497. }
  498. else if (! cur->is_lua) {
  499. if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->value, cfg->raw_mode)) {
  500. res = FALSE;
  501. }
  502. }
  503. else {
  504. msg_err ("unknown variable type for %s", cur->param);
  505. res = FALSE;
  506. }
  507. if ( !res) {
  508. /* Stop on errors */
  509. break;
  510. }
  511. register_symbol (&cfg->cache, cur->param, 1, process_regexp_item, cur_item);
  512. cur_opt = g_list_next (cur_opt);
  513. }
  514. return res;
  515. }
  516. gint
  517. regexp_module_reconfig (struct config_file *cfg)
  518. {
  519. memory_pool_delete (regexp_module_ctx->regexp_pool);
  520. regexp_module_ctx->regexp_pool = memory_pool_new (memory_pool_get_size ());
  521. return regexp_module_config (cfg);
  522. }
  523. struct url_regexp_param {
  524. struct worker_task *task;
  525. GRegex *regexp;
  526. struct rspamd_regexp *re;
  527. gboolean found;
  528. };
  529. static gboolean
  530. tree_url_callback (gpointer key, gpointer value, void *data)
  531. {
  532. struct url_regexp_param *param = data;
  533. struct uri *url = value;
  534. GError *err = NULL;
  535. if (g_regex_match_full (param->regexp, struri (url), -1, 0, 0, NULL, &err) == TRUE) {
  536. if (G_UNLIKELY (param->re->is_test)) {
  537. msg_info ("process test regexp %s for url %s returned TRUE", struri (url));
  538. }
  539. task_cache_add (param->task, param->re, 1);
  540. param->found = TRUE;
  541. return TRUE;
  542. }
  543. else if (G_UNLIKELY (param->re->is_test)) {
  544. msg_info ("process test regexp %s for url %s returned FALSE", struri (url));
  545. }
  546. if (err != NULL) {
  547. msg_info ("error occured while processing regexp \"%s\": %s", param->re->regexp_text, err->message);
  548. }
  549. return FALSE;
  550. }
  551. static gsize
  552. process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar *additional,
  553. gint limit, int_compare_func f)
  554. {
  555. guint8 *ct;
  556. gsize clen;
  557. gint r, passed = 0, start, end, old;
  558. gboolean matched = FALSE;
  559. GList *cur, *headerlist;
  560. GRegex *regexp;
  561. GMatchInfo *info;
  562. GError *err = NULL;
  563. struct url_regexp_param callback_param = {
  564. .task = task,
  565. .regexp = re->regexp,
  566. .re = re,
  567. .found = FALSE
  568. };
  569. struct mime_text_part *part;
  570. struct raw_header *rh;
  571. if (re == NULL) {
  572. msg_info ("invalid regexp passed");
  573. return 0;
  574. }
  575. if ((r = task_cache_check (task, re)) != -1) {
  576. debug_task ("regexp /%s/ is found in cache, result: %d", re->regexp_text, r);
  577. return r == 1;
  578. }
  579. if (additional != NULL) {
  580. /* We have additional parameter defined, so ignore type of regexp expression and use it for parsing */
  581. if (G_UNLIKELY (re->is_test)) {
  582. msg_info ("process test regexp %s with test %s", re->regexp_text, additional);
  583. }
  584. if (g_regex_match_full (re->regexp, additional, strlen (additional), 0, 0, NULL, NULL) == TRUE) {
  585. if (G_UNLIKELY (re->is_test)) {
  586. msg_info ("result of regexp %s is true", re->regexp_text);
  587. }
  588. task_cache_add (task, re, 1);
  589. return 1;
  590. }
  591. else {
  592. task_cache_add (task, re, 0);
  593. return 0;
  594. }
  595. }
  596. switch (re->type) {
  597. case REGEXP_NONE:
  598. msg_warn ("bad error detected: %s has invalid regexp type", re->regexp_text);
  599. return 0;
  600. case REGEXP_HEADER:
  601. /* Check header's name */
  602. if (re->header == NULL) {
  603. msg_info ("header regexp without header name: '%s'", re->regexp_text);
  604. task_cache_add (task, re, 0);
  605. return 0;
  606. }
  607. debug_task ("checking header regexp: %s = %s", re->header, re->regexp_text);
  608. /* Get list of specified headers */
  609. headerlist = message_get_header (task->task_pool, task->message, re->header, re->is_strong);
  610. if (headerlist == NULL) {
  611. /* Header is not found */
  612. if (G_UNLIKELY (re->is_test)) {
  613. msg_info ("process test regexp %s for header %s returned FALSE: no header found", re->regexp_text, re->header);
  614. }
  615. task_cache_add (task, re, 0);
  616. return 0;
  617. }
  618. else {
  619. memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, headerlist);
  620. /* Check whether we have regexp for it */
  621. if (re->regexp == NULL) {
  622. debug_task ("regexp contains only header and it is found %s", re->header);
  623. task_cache_add (task, re, 1);
  624. return 1;
  625. }
  626. /* Iterate throught headers */
  627. cur = headerlist;
  628. while (cur) {
  629. debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data);
  630. /* Try to match regexp */
  631. if (!re->is_raw) {
  632. /* Validate input */
  633. if (!cur->data || !g_utf8_validate (cur->data, -1, NULL)) {
  634. cur = g_list_next (cur);
  635. continue;
  636. }
  637. }
  638. if (cur->data && g_regex_match_full (re->regexp, cur->data, -1, 0, 0, NULL, &err) == TRUE) {
  639. if (G_UNLIKELY (re->is_test)) {
  640. msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data);
  641. }
  642. if (f != NULL && limit > 1) {
  643. /* If we have limit count, increase passed count and compare with limit */
  644. if (f (++passed, limit)) {
  645. task_cache_add (task, re, 1);
  646. return 1;
  647. }
  648. }
  649. else {
  650. task_cache_add (task, re, 1);
  651. return 1;
  652. }
  653. }
  654. else if (G_UNLIKELY (re->is_test)) {
  655. msg_info ("process test regexp %s for header %s with value '%s' returned FALSE", re->regexp_text, re->header, (const gchar *)cur->data);
  656. }
  657. if (err != NULL) {
  658. msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message);
  659. }
  660. cur = g_list_next (cur);
  661. }
  662. task_cache_add (task, re, 0);
  663. return 0;
  664. }
  665. break;
  666. case REGEXP_MIME:
  667. debug_task ("checking mime regexp: %s", re->regexp_text);
  668. /* Iterate throught text parts */
  669. cur = g_list_first (task->text_parts);
  670. while (cur) {
  671. part = (struct mime_text_part *)cur->data;
  672. /* Skip empty parts */
  673. if (part->is_empty) {
  674. cur = g_list_next (cur);
  675. continue;
  676. }
  677. /* Skip too large parts */
  678. if (regexp_module_ctx->max_size != 0 && part->content->len > regexp_module_ctx->max_size) {
  679. msg_info ("<%s> skip part of size %Hud", task->message_id, part->content->len);
  680. cur = g_list_next (cur);
  681. continue;
  682. }
  683. /* Check raw flags */
  684. if (part->is_raw) {
  685. regexp = re->raw_regexp;
  686. }
  687. else {
  688. /* This time there is no need to validate anything as conversion succeed only for valid characters */
  689. regexp = re->regexp;
  690. }
  691. /* Select data for regexp */
  692. if (re->is_raw) {
  693. ct = part->orig->data;
  694. clen = part->orig->len;
  695. }
  696. else {
  697. ct = part->content->data;
  698. clen = part->content->len;
  699. }
  700. /* If we have limit, apply regexp so much times as we can */
  701. if (f != NULL && limit > 1) {
  702. end = 0;
  703. while ((matched = g_regex_match_full (regexp, ct + end + 1, clen - end - 1, 0, 0, &info, &err)) == TRUE) {
  704. if (G_UNLIKELY (re->is_test)) {
  705. msg_info ("process test regexp %s for mime part of length %d returned TRUE",
  706. re->regexp_text,
  707. (gint)clen,
  708. end);
  709. }
  710. if (f (++passed, limit)) {
  711. task_cache_add (task, re, 1);
  712. return 1;
  713. }
  714. else {
  715. /* Match not found, skip further cycles */
  716. old = end;
  717. if (!g_match_info_fetch_pos (info, 0, &start, &end) || end <= 0) {
  718. break;
  719. }
  720. end += old;
  721. }
  722. g_match_info_free (info);
  723. }
  724. g_match_info_free (info);
  725. }
  726. else {
  727. if (g_regex_match_full (regexp, ct, clen, 0, 0, NULL, &err) == TRUE) {
  728. if (G_UNLIKELY (re->is_test)) {
  729. msg_info ("process test regexp %s for mime part of length %d returned TRUE", re->regexp_text,
  730. (gint)clen);
  731. }
  732. task_cache_add (task, re, 1);
  733. return 1;
  734. }
  735. }
  736. if (!matched && G_UNLIKELY (re->is_test)) {
  737. msg_info ("process test regexp %s for mime part of length %d returned FALSE", re->regexp_text,
  738. (gint)clen);
  739. }
  740. if (err != NULL) {
  741. msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message);
  742. }
  743. cur = g_list_next (cur);
  744. }
  745. task_cache_add (task, re, 0);
  746. return 0;
  747. case REGEXP_MESSAGE:
  748. debug_task ("checking message regexp: %s", re->regexp_text);
  749. regexp = re->raw_regexp;
  750. ct = task->msg->begin;
  751. clen = task->msg->len;
  752. if (regexp_module_ctx->max_size != 0 && clen > regexp_module_ctx->max_size) {
  753. msg_info ("<%s> skip message of size %Hz", task->message_id, clen);
  754. return 0;
  755. }
  756. /* If we have limit, apply regexp so much times as we can */
  757. if (f != NULL && limit > 1) {
  758. end = 0;
  759. while ((matched = g_regex_match_full (regexp, ct + end + 1, clen - end - 1, 0, 0, &info, &err)) == TRUE) {
  760. if (G_UNLIKELY (re->is_test)) {
  761. msg_info ("process test regexp %s for mime part of length %d returned TRUE", re->regexp_text,
  762. (gint)clen);
  763. }
  764. if (f (++passed, limit)) {
  765. task_cache_add (task, re, 1);
  766. return 1;
  767. }
  768. else {
  769. /* Match not found, skip further cycles */
  770. old = end;
  771. if (!g_match_info_fetch_pos (info, 0, &start, &end) || end <= 0) {
  772. break;
  773. }
  774. old += end;
  775. }
  776. g_match_info_free (info);
  777. }
  778. g_match_info_free (info);
  779. }
  780. else {
  781. if (g_regex_match_full (regexp, ct, clen, 0, 0, NULL, &err) == TRUE) {
  782. if (G_UNLIKELY (re->is_test)) {
  783. msg_info ("process test regexp %s for message part of length %d returned TRUE", re->regexp_text,
  784. (gint)clen);
  785. }
  786. task_cache_add (task, re, 1);
  787. return 1;
  788. }
  789. }
  790. if (!matched && G_UNLIKELY (re->is_test)) {
  791. msg_info ("process test regexp %s for message part of length %d returned FALSE", re->regexp_text,
  792. (gint)clen);
  793. }
  794. if (err != NULL) {
  795. msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message);
  796. }
  797. task_cache_add (task, re, 0);
  798. return 0;
  799. case REGEXP_URL:
  800. debug_task ("checking url regexp: %s", re->regexp_text);
  801. if (f != NULL && limit > 1) {
  802. /*XXX: add support of it */
  803. msg_warn ("numbered matches are not supported for url regexp");
  804. }
  805. regexp = re->regexp;
  806. callback_param.task = task;
  807. callback_param.regexp = regexp;
  808. callback_param.re = re;
  809. callback_param.found = FALSE;
  810. if (task->urls) {
  811. g_tree_foreach (task->urls, tree_url_callback, &callback_param);
  812. }
  813. if (task->emails && callback_param.found == FALSE) {
  814. g_tree_foreach (task->emails, tree_url_callback, &callback_param);
  815. }
  816. if (callback_param.found == FALSE) {
  817. task_cache_add (task, re, 0);
  818. }
  819. return 0;
  820. case REGEXP_RAW_HEADER:
  821. debug_task ("checking for raw header: %s with regexp: %s", re->header, re->regexp_text);
  822. /* Check header's name */
  823. if (re->header == NULL) {
  824. msg_info ("header regexp without header name: '%s'", re->regexp_text);
  825. task_cache_add (task, re, 0);
  826. return 0;
  827. }
  828. debug_task ("checking header regexp: %s = %s", re->header, re->regexp_text);
  829. /* Get list of specified headers */
  830. headerlist = message_get_raw_header (task, re->header, re->is_strong);
  831. if (headerlist == NULL) {
  832. /* Header is not found */
  833. if (G_UNLIKELY (re->is_test)) {
  834. msg_info ("process test regexp %s for header %s returned FALSE: no header found", re->regexp_text, re->header);
  835. }
  836. task_cache_add (task, re, 0);
  837. return 0;
  838. }
  839. else {
  840. /* Check whether we have regexp for it */
  841. if (re->regexp == NULL) {
  842. debug_task ("regexp contains only header and it is found %s", re->header);
  843. task_cache_add (task, re, 1);
  844. return 1;
  845. }
  846. /* Iterate throught headers */
  847. cur = headerlist;
  848. while (cur) {
  849. debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data);
  850. rh = cur->data;
  851. /* Try to match regexp */
  852. if (!re->is_raw) {
  853. /* Validate input */
  854. if (!rh->value || !g_utf8_validate (rh->value, -1, NULL)) {
  855. cur = g_list_next (cur);
  856. continue;
  857. }
  858. }
  859. if (rh->value && g_regex_match_full (re->regexp, rh->value, -1, 0, 0, NULL, &err) == TRUE) {
  860. if (G_UNLIKELY (re->is_test)) {
  861. msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data);
  862. }
  863. if (f != NULL && limit > 1) {
  864. /* If we have limit count, increase passed count and compare with limit */
  865. if (f (++passed, limit)) {
  866. task_cache_add (task, re, 1);
  867. return 1;
  868. }
  869. }
  870. else {
  871. task_cache_add (task, re, 1);
  872. return 1;
  873. }
  874. }
  875. else if (G_UNLIKELY (re->is_test)) {
  876. msg_info ("process test regexp %s for header %s with value '%s' returned FALSE", re->regexp_text, re->header, (const gchar *)cur->data);
  877. }
  878. if (err != NULL) {
  879. msg_info ("error occured while processing regexp \"%s\": %s", re->regexp_text, err->message);
  880. }
  881. cur = g_list_next (cur);
  882. }
  883. task_cache_add (task, re, 0);
  884. return 0;
  885. }
  886. break;
  887. default:
  888. msg_warn ("bad error detected: %p is not a valid regexp object", re);
  889. }
  890. /* Not reached */
  891. return 0;
  892. }
  893. static gboolean
  894. maybe_call_lua_function (const gchar *name, struct worker_task *task)
  895. {
  896. lua_State *L = task->cfg->lua_state;
  897. struct worker_task **ptask;
  898. gboolean res;
  899. lua_getglobal (L, name);
  900. if (lua_isfunction (L, -1)) {
  901. ptask = lua_newuserdata (L, sizeof (struct worker_task *));
  902. lua_setclass (L, "rspamd{task}", -1);
  903. *ptask = task;
  904. /* Call function */
  905. if (lua_pcall (L, 1, 1, 0) != 0) {
  906. msg_info ("call to %s failed: %s", (gchar *)name, lua_tostring (L, -1));
  907. return FALSE;
  908. }
  909. res = lua_toboolean (L, -1);
  910. lua_pop (L, 1);
  911. return res;
  912. }
  913. else {
  914. lua_pop (L, 1);
  915. }
  916. return FALSE;
  917. }
  918. static gboolean
  919. optimize_regexp_expression (struct expression **e, GQueue * stack, gboolean res)
  920. {
  921. struct expression *it = (*e)->next;
  922. gboolean ret = FALSE, is_nearest = TRUE;
  923. gint skip_level = 0;
  924. /* Skip nearest logical operators from optimization */
  925. if (!it || (it->type == EXPR_OPERATION && it->content.operation != '!')) {
  926. g_queue_push_head (stack, GSIZE_TO_POINTER (res));
  927. return ret;
  928. }
  929. while (it) {
  930. /* Find first operation for this iterator */
  931. if (it->type == EXPR_OPERATION) {
  932. /* If this operation is just ! just inverse res and check for further operators */
  933. if (it->content.operation == '!') {
  934. if (is_nearest) {
  935. msg_debug ("found '!' operator, inversing result");
  936. res = !res;
  937. *e = it;
  938. }
  939. it = it->next;
  940. continue;
  941. }
  942. else {
  943. skip_level--;
  944. }
  945. /* Check whether we found corresponding operator for this operand */
  946. if (skip_level <= 0) {
  947. if (it->content.operation == '|' && res == TRUE) {
  948. msg_debug ("found '|' and previous expression is true");
  949. *e = it;
  950. ret = TRUE;
  951. }
  952. else if (it->content.operation == '&' && res == FALSE) {
  953. msg_debug ("found '&' and previous expression is false");
  954. *e = it;
  955. ret = TRUE;
  956. }
  957. break;
  958. }
  959. }
  960. else {
  961. is_nearest = FALSE;
  962. skip_level++;
  963. }
  964. it = it->next;
  965. }
  966. g_queue_push_head (stack, GSIZE_TO_POINTER (res));
  967. return ret;
  968. }
  969. static gboolean
  970. process_regexp_expression (struct expression *expr, gchar *symbol, struct worker_task *task, const gchar *additional)
  971. {
  972. GQueue *stack;
  973. gsize cur, op1, op2;
  974. struct expression *it = expr;
  975. struct rspamd_regexp *re;
  976. gboolean try_optimize = TRUE;
  977. stack = g_queue_new ();
  978. while (it) {
  979. if (it->type == EXPR_REGEXP_PARSED) {
  980. /* Find corresponding symbol */
  981. cur = process_regexp ((struct rspamd_regexp *)it->content.operand, task, additional, 0, NULL);
  982. debug_task ("regexp %s found", cur ? "is" : "is not");
  983. if (try_optimize) {
  984. try_optimize = optimize_regexp_expression (&it, stack, cur);
  985. }
  986. else {
  987. g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
  988. }
  989. }
  990. else if (it->type == EXPR_FUNCTION) {
  991. cur = (gsize) call_expression_function ((struct expression_function *)it->content.operand, task);
  992. debug_task ("function %s returned %s", ((struct expression_function *)it->content.operand)->name, cur ? "true" : "false");
  993. if (try_optimize) {
  994. try_optimize = optimize_regexp_expression (&it, stack, cur);
  995. }
  996. else {
  997. g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
  998. }
  999. }
  1000. else if (it->type == EXPR_STR) {
  1001. /* This may be lua function, try to call it */
  1002. cur = maybe_call_lua_function ((const gchar*)it->content.operand, task);
  1003. debug_task ("function %s returned %s", (const gchar *)it->content.operand, cur ? "true" : "false");
  1004. if (try_optimize) {
  1005. try_optimize = optimize_regexp_expression (&it, stack, cur);
  1006. }
  1007. else {
  1008. g_queue_push_head (stack, GSIZE_TO_POINTER (cur));
  1009. }
  1010. }
  1011. else if (it->type == EXPR_REGEXP) {
  1012. /* Compile regexp if it is not parsed */
  1013. if (it->content.operand == NULL) {
  1014. it = it->next;
  1015. continue;
  1016. }
  1017. re = parse_regexp (task->cfg->cfg_pool, it->content.operand, task->cfg->raw_mode);
  1018. if (re == NULL) {
  1019. msg_warn ("cannot parse regexp, skip expression");
  1020. g_queue_free (stack);
  1021. return FALSE;
  1022. }
  1023. it->content.operand = re;
  1024. it->type = EXPR_REGEXP_PARSED;
  1025. /* Continue with this regexp once again */
  1026. continue;
  1027. }
  1028. else if (it->type == EXPR_OPERATION) {
  1029. if (g_queue_is_empty (stack)) {
  1030. /* Queue has no operands for operation, exiting */
  1031. msg_warn ("regexp expression seems to be invalid: empty stack while reading operation");
  1032. g_queue_free (stack);
  1033. return FALSE;
  1034. }
  1035. debug_task ("got operation %c", it->content.operation);
  1036. switch (it->content.operation) {
  1037. case '!':
  1038. op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
  1039. op1 = !op1;
  1040. try_optimize = optimize_regexp_expression (&it, stack, op1);
  1041. break;
  1042. case '&':
  1043. op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
  1044. op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
  1045. try_optimize = optimize_regexp_expression (&it, stack, op1 && op2);
  1046. break;
  1047. case '|':
  1048. op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
  1049. op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
  1050. try_optimize = optimize_regexp_expression (&it, stack, op1 || op2);
  1051. break;
  1052. default:
  1053. it = it->next;
  1054. continue;
  1055. }
  1056. }
  1057. if (it) {
  1058. it = it->next;
  1059. }
  1060. }
  1061. if (!g_queue_is_empty (stack)) {
  1062. op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack));
  1063. if (op1) {
  1064. g_queue_free (stack);
  1065. return TRUE;
  1066. }
  1067. }
  1068. else {
  1069. msg_warn ("regexp expression seems to be invalid: empty stack at the end of expression, symbol %s", symbol);
  1070. }
  1071. g_queue_free (stack);
  1072. return FALSE;
  1073. }
  1074. static void
  1075. process_regexp_item (struct worker_task *task, void *user_data)
  1076. {
  1077. struct regexp_module_item *item = user_data;
  1078. gboolean res = FALSE;
  1079. if (item->lua_function) {
  1080. /* Just call function */
  1081. if (lua_call_expression_func ("regexp", item->lua_function, task, NULL, &res) && res) {
  1082. insert_result (task, item->symbol, 1, NULL);
  1083. }
  1084. }
  1085. else {
  1086. /* Process expression */
  1087. if (process_regexp_expression (item->expr, item->symbol, task, NULL)) {
  1088. insert_result (task, item->symbol, 1, NULL);
  1089. }
  1090. }
  1091. }
  1092. static gint
  1093. regexp_common_filter (struct worker_task *task)
  1094. {
  1095. /* XXX: remove this shit too */
  1096. return 0;
  1097. }
  1098. static gboolean
  1099. rspamd_regexp_match_number (struct worker_task *task, GList * args, void *unused)
  1100. {
  1101. gint param_count, res = 0;
  1102. struct expression_argument *arg;
  1103. GList *cur;
  1104. if (args == NULL) {
  1105. msg_warn ("no parameters to function");
  1106. return FALSE;
  1107. }
  1108. arg = get_function_arg (args->data, task, TRUE);
  1109. param_count = strtoul (arg->data, NULL, 10);
  1110. cur = args->next;
  1111. while (cur) {
  1112. arg = get_function_arg (cur->data, task, FALSE);
  1113. if (arg && arg->type == EXPRESSION_ARGUMENT_BOOL) {
  1114. if ((gboolean) GPOINTER_TO_SIZE (arg->data)) {
  1115. res++;
  1116. }
  1117. }
  1118. else {
  1119. if (process_regexp_expression (cur->data, "regexp_match_number", task, NULL)) {
  1120. res++;
  1121. }
  1122. if (res >= param_count) {
  1123. return TRUE;
  1124. }
  1125. }
  1126. cur = g_list_next (cur);
  1127. }
  1128. return res >= param_count;
  1129. }
  1130. static gboolean
  1131. rspamd_regexp_occurs_number (struct worker_task *task, GList * args, void *unused)
  1132. {
  1133. gint limit;
  1134. struct expression_argument *arg;
  1135. struct rspamd_regexp *re;
  1136. gchar *param, *err_str, op;
  1137. int_compare_func f = NULL;
  1138. if (args == NULL || args->next == NULL) {
  1139. msg_warn ("wrong number of parameters to function, must be 2");
  1140. return FALSE;
  1141. }
  1142. arg = get_function_arg (args->data, task, TRUE);
  1143. if ((re = re_cache_check (arg->data, task->cfg->cfg_pool)) == NULL) {
  1144. re = parse_regexp (task->cfg->cfg_pool, arg->data, task->cfg->raw_mode);
  1145. if (!re) {
  1146. msg_err ("cannot parse given regexp: %s", (gchar *)arg->data);
  1147. return FALSE;
  1148. }
  1149. }
  1150. arg = get_function_arg (args->next->data, task, TRUE);
  1151. param = arg->data;
  1152. op = *param;
  1153. if (g_ascii_isdigit (op)) {
  1154. op = '=';
  1155. }
  1156. else {
  1157. param ++;
  1158. }
  1159. switch (op) {
  1160. case '>':
  1161. if (*param == '=') {
  1162. f = op_more_equal;
  1163. param ++;
  1164. }
  1165. else {
  1166. f = op_more;
  1167. }
  1168. break;
  1169. case '<':
  1170. if (*param == '=') {
  1171. f = op_less_equal;
  1172. param ++;
  1173. }
  1174. else {
  1175. f = op_less;
  1176. }
  1177. break;
  1178. case '=':
  1179. f = op_equal;
  1180. break;
  1181. default:
  1182. msg_err ("wrong operation character: %c, assumed '=', '>', '<', '>=', '<=' or empty op", op);
  1183. return FALSE;
  1184. }
  1185. limit = strtoul (param, &err_str, 10);
  1186. if (*err_str != 0) {
  1187. msg_err ("wrong numeric: %s at position: %s", param, err_str);
  1188. return FALSE;
  1189. }
  1190. return process_regexp (re, task, NULL, limit, f);
  1191. }
  1192. static gboolean
  1193. rspamd_raw_header_exists (struct worker_task *task, GList * args, void *unused)
  1194. {
  1195. struct expression_argument *arg;
  1196. if (args == NULL || task == NULL) {
  1197. return FALSE;
  1198. }
  1199. arg = get_function_arg (args->data, task, TRUE);
  1200. if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) {
  1201. msg_warn ("invalid argument to function is passed");
  1202. return FALSE;
  1203. }
  1204. return g_hash_table_lookup (task->raw_headers, arg->data) != NULL;
  1205. }
  1206. static gboolean
  1207. match_smtp_data (struct worker_task *task, const gchar *re_text, const gchar *what)
  1208. {
  1209. struct rspamd_regexp *re;
  1210. gint r;
  1211. if (*re_text == '/') {
  1212. /* This is a regexp */
  1213. if ((re = re_cache_check (re_text, task->cfg->cfg_pool)) == NULL) {
  1214. re = parse_regexp (task->cfg->cfg_pool, (gchar *)re_text, task->cfg->raw_mode);
  1215. if (re == NULL) {
  1216. msg_warn ("cannot compile regexp for function");
  1217. return FALSE;
  1218. }
  1219. re_cache_add ((gchar *)re_text, re, task->cfg->cfg_pool);
  1220. }
  1221. if ((r = task_cache_check (task, re)) == -1) {
  1222. if (g_regex_match (re->regexp, what, 0, NULL) == TRUE) {
  1223. task_cache_add (task, re, 1);
  1224. return TRUE;
  1225. }
  1226. task_cache_add (task, re, 0);
  1227. }
  1228. else {
  1229. return r == 1;
  1230. }
  1231. }
  1232. else if (g_ascii_strcasecmp (re_text, what) == 0) {
  1233. return TRUE;
  1234. }
  1235. return FALSE;
  1236. }
  1237. static gboolean
  1238. rspamd_check_smtp_data (struct worker_task *task, GList * args, void *unused)
  1239. {
  1240. struct expression_argument *arg;
  1241. GList *cur, *rcpt_list = NULL;
  1242. gchar *type, *what = NULL;
  1243. if (args == NULL) {
  1244. msg_warn ("no parameters to function");
  1245. return FALSE;
  1246. }
  1247. arg = get_function_arg (args->data, task, TRUE);
  1248. if (!arg || !arg->data) {
  1249. msg_warn ("no parameters to function");
  1250. return FALSE;
  1251. }
  1252. else {
  1253. type = arg->data;
  1254. switch (*type) {
  1255. case 'f':
  1256. case 'F':
  1257. if (g_ascii_strcasecmp (type, "from") == 0) {
  1258. what = task->from;
  1259. }
  1260. else {
  1261. msg_warn ("bad argument to function: %s", type);
  1262. return FALSE;
  1263. }
  1264. break;
  1265. case 'h':
  1266. case 'H':
  1267. if (g_ascii_strcasecmp (type, "helo") == 0) {
  1268. what = task->helo;
  1269. }
  1270. else {
  1271. msg_warn ("bad argument to function: %s", type);
  1272. return FALSE;
  1273. }
  1274. break;
  1275. case 'u':
  1276. case 'U':
  1277. if (g_ascii_strcasecmp (type, "user") == 0) {
  1278. what = task->user;
  1279. }
  1280. else {
  1281. msg_warn ("bad argument to function: %s", type);
  1282. return FALSE;
  1283. }
  1284. break;
  1285. case 's':
  1286. case 'S':
  1287. if (g_ascii_strcasecmp (type, "subject") == 0) {
  1288. what = task->subject;
  1289. }
  1290. else {
  1291. msg_warn ("bad argument to function: %s", type);
  1292. return FALSE;
  1293. }
  1294. break;
  1295. case 'r':
  1296. case 'R':
  1297. if (g_ascii_strcasecmp (type, "rcpt") == 0) {
  1298. rcpt_list = task->rcpt;
  1299. }
  1300. else {
  1301. msg_warn ("bad argument to function: %s", type);
  1302. return FALSE;
  1303. }
  1304. break;
  1305. default:
  1306. msg_warn ("bad argument to function: %s", type);
  1307. return FALSE;
  1308. }
  1309. }
  1310. if (what == NULL && rcpt_list == NULL) {
  1311. /* Not enough data so regexp would NOT be found anyway */
  1312. return FALSE;
  1313. }
  1314. /* We would process only one more argument, others are ignored */
  1315. cur = args->next;
  1316. if (cur) {
  1317. arg = get_function_arg (cur->data, task, FALSE);
  1318. if (arg && arg->type == EXPRESSION_ARGUMENT_NORMAL) {
  1319. if (what != NULL) {
  1320. return match_smtp_data (task, arg->data, what);
  1321. }
  1322. else {
  1323. while (rcpt_list) {
  1324. if (match_smtp_data (task, arg->data, rcpt_list->data)) {
  1325. return TRUE;
  1326. }
  1327. rcpt_list = g_list_next (rcpt_list);
  1328. }
  1329. }
  1330. }
  1331. else if (arg != NULL) {
  1332. if (what != NULL) {
  1333. if (process_regexp_expression (arg->data, "regexp_check_smtp_data", task, what)) {
  1334. return TRUE;
  1335. }
  1336. }
  1337. else {
  1338. while (rcpt_list) {
  1339. if (process_regexp_expression (arg->data, "regexp_check_smtp_data", task, rcpt_list->data)) {
  1340. return TRUE;
  1341. }
  1342. rcpt_list = g_list_next (rcpt_list);
  1343. }
  1344. }
  1345. }
  1346. }
  1347. return FALSE;
  1348. }
  1349. /* Lua part */
  1350. static gint
  1351. lua_regexp_match (lua_State *L)
  1352. {
  1353. void *ud = luaL_checkudata (L, 1, "rspamd{task}");
  1354. struct worker_task *task;
  1355. const gchar *re_text;
  1356. struct rspamd_regexp *re;
  1357. gint r = 0;
  1358. luaL_argcheck (L, ud != NULL, 1, "'task' expected");
  1359. task = ud ? *((struct worker_task **)ud) : NULL;
  1360. re_text = luaL_checkstring (L, 2);
  1361. /* This is a regexp */
  1362. if (task != NULL) {
  1363. if ((re = re_cache_check (re_text, task->cfg->cfg_pool)) == NULL) {
  1364. re = parse_regexp (task->cfg->cfg_pool, (gchar *)re_text, task->cfg->raw_mode);
  1365. if (re == NULL) {
  1366. msg_warn ("cannot compile regexp for function");
  1367. return FALSE;
  1368. }
  1369. re_cache_add ((gchar *)re_text, re, task->cfg->cfg_pool);
  1370. }
  1371. r = process_regexp (re, task, NULL, 0, NULL);
  1372. }
  1373. lua_pushboolean (L, r == 1);
  1374. return 1;
  1375. }