You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

expression.c 36KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "expression.h"
  18. #include "printf.h"
  19. #include "regexp.h"
  20. #include "util.h"
  21. #include "utlist.h"
  22. #include "ottery.h"
  23. #include "libserver/logger.h"
  24. #include "libcryptobox/cryptobox.h"
  25. #include <math.h>
  26. #define RSPAMD_EXPR_FLAG_NEGATE (1 << 0)
  27. #define RSPAMD_EXPR_FLAG_PROCESSED (1 << 1)
  28. #define MIN_RESORT_EVALS 50
  29. #define MAX_RESORT_EVALS 150
  30. enum rspamd_expression_elt_type {
  31. ELT_OP = 0,
  32. ELT_ATOM,
  33. ELT_LIMIT
  34. };
  35. enum rspamd_expression_op_flag {
  36. RSPAMD_EXPRESSION_UNARY = 1u << 0u,
  37. RSPAMD_EXPRESSION_BINARY = 1u << 1u,
  38. RSPAMD_EXPRESSION_NARY = 1u << 2u,
  39. RSPAMD_EXPRESSION_ARITHMETIC = 1u << 3u,
  40. RSPAMD_EXPRESSION_LOGICAL = 1u << 4u,
  41. RSPAMD_EXPRESSION_COMPARISON = 1u << 5u,
  42. };
  43. struct rspamd_expression_operation {
  44. enum rspamd_expression_op op;
  45. guint logical_priority;
  46. guint op_flags;
  47. };
  48. struct rspamd_expression_elt {
  49. enum rspamd_expression_elt_type type;
  50. union {
  51. rspamd_expression_atom_t *atom;
  52. struct rspamd_expression_operation op;
  53. gdouble lim;
  54. } p;
  55. gint flags;
  56. gint priority;
  57. gdouble value;
  58. };
  59. struct rspamd_expression {
  60. const struct rspamd_atom_subr *subr;
  61. GArray *expressions;
  62. GPtrArray *expression_stack;
  63. GNode *ast;
  64. gchar *log_id;
  65. guint next_resort;
  66. guint evals;
  67. };
  68. struct rspamd_expr_process_data {
  69. gpointer *ud;
  70. gint flags;
  71. /* != NULL if trace is collected */
  72. GPtrArray *trace;
  73. rspamd_expression_process_cb process_closure;
  74. };
  75. #define msg_debug_expression(...) rspamd_conditional_debug_fast (NULL, NULL, \
  76. rspamd_expression_log_id, "expression", e->log_id, \
  77. RSPAMD_LOG_FUNC, \
  78. __VA_ARGS__)
  79. #ifdef DEBUG_EXPRESSIONS
  80. #define msg_debug_expression_verbose(...) rspamd_conditional_debug_fast (NULL, NULL, \
  81. rspamd_expression_log_id, "expression", e->log_id, \
  82. RSPAMD_LOG_FUNC, \
  83. __VA_ARGS__)
  84. #else
  85. #define msg_debug_expression_verbose(...) do {} while(0)
  86. #endif
  87. INIT_LOG_MODULE(expression)
  88. static GQuark
  89. rspamd_expr_quark (void)
  90. {
  91. return g_quark_from_static_string ("rspamd-expression");
  92. }
  93. static const gchar * RSPAMD_CONST_FUNCTION
  94. rspamd_expr_op_to_str (enum rspamd_expression_op op);
  95. static const gchar *
  96. rspamd_expr_op_to_str (enum rspamd_expression_op op)
  97. {
  98. const gchar *op_str = NULL;
  99. switch (op) {
  100. case OP_AND:
  101. op_str = "&";
  102. break;
  103. case OP_OR:
  104. op_str = "|";
  105. break;
  106. case OP_MULT:
  107. op_str = "*";
  108. break;
  109. case OP_PLUS:
  110. op_str = "+";
  111. break;
  112. case OP_MINUS:
  113. op_str = "-";
  114. break;
  115. case OP_DIVIDE:
  116. op_str = "/";
  117. break;
  118. case OP_NOT:
  119. op_str = "!";
  120. break;
  121. case OP_GE:
  122. op_str = ">=";
  123. break;
  124. case OP_GT:
  125. op_str = ">";
  126. break;
  127. case OP_LE:
  128. op_str = "<=";
  129. break;
  130. case OP_LT:
  131. op_str = "<";
  132. break;
  133. case OP_EQ:
  134. op_str = "==";
  135. break;
  136. case OP_NE:
  137. op_str = "!=";
  138. break;
  139. case OP_OBRACE:
  140. op_str = "(";
  141. break;
  142. case OP_CBRACE:
  143. op_str = ")";
  144. break;
  145. default:
  146. op_str = "???";
  147. break;
  148. }
  149. return op_str;
  150. }
  151. #define G_ARRAY_LAST(ar, type) (&g_array_index((ar), type, (ar)->len - 1))
  152. static void
  153. rspamd_expr_stack_elt_push (GPtrArray *stack,
  154. gpointer elt)
  155. {
  156. g_ptr_array_add (stack, elt);
  157. }
  158. static gpointer
  159. rspamd_expr_stack_elt_pop (GPtrArray *stack)
  160. {
  161. gpointer e;
  162. gint idx;
  163. if (stack->len == 0) {
  164. return NULL;
  165. }
  166. idx = stack->len - 1;
  167. e = g_ptr_array_index (stack, idx);
  168. g_ptr_array_remove_index_fast (stack, idx);
  169. return e;
  170. }
  171. static void
  172. rspamd_expr_stack_push (struct rspamd_expression *expr,
  173. gpointer elt)
  174. {
  175. rspamd_expr_stack_elt_push (expr->expression_stack, elt);
  176. }
  177. static gpointer
  178. rspamd_expr_stack_pop (struct rspamd_expression *expr)
  179. {
  180. return rspamd_expr_stack_elt_pop (expr->expression_stack);
  181. }
  182. static gpointer
  183. rspamd_expr_stack_peek (struct rspamd_expression *expr)
  184. {
  185. gpointer e;
  186. gint idx;
  187. GPtrArray *stack = expr->expression_stack;
  188. if (stack->len == 0) {
  189. return NULL;
  190. }
  191. idx = stack->len - 1;
  192. e = g_ptr_array_index (stack, idx);
  193. return e;
  194. }
  195. /*
  196. * Return operation priority
  197. */
  198. static gint RSPAMD_CONST_FUNCTION
  199. rspamd_expr_logic_priority (enum rspamd_expression_op op);
  200. static gint
  201. rspamd_expr_logic_priority (enum rspamd_expression_op op)
  202. {
  203. gint ret = 0;
  204. switch (op) {
  205. case OP_NOT:
  206. ret = 7;
  207. break;
  208. case OP_MULT:
  209. case OP_DIVIDE:
  210. ret = 6;
  211. break;
  212. case OP_PLUS:
  213. case OP_MINUS:
  214. ret = 5;
  215. break;
  216. case OP_GE:
  217. case OP_GT:
  218. case OP_LE:
  219. case OP_LT:
  220. case OP_EQ:
  221. case OP_NE:
  222. ret = 4;
  223. break;
  224. case OP_AND:
  225. ret = 3;
  226. break;
  227. case OP_OR:
  228. ret = 2;
  229. break;
  230. case OP_OBRACE:
  231. case OP_CBRACE:
  232. ret = 1;
  233. break;
  234. case OP_INVALID:
  235. ret = -1;
  236. break;
  237. }
  238. return ret;
  239. }
  240. static guint RSPAMD_CONST_FUNCTION
  241. rspamd_expr_op_flags (enum rspamd_expression_op op);
  242. static guint
  243. rspamd_expr_op_flags (enum rspamd_expression_op op)
  244. {
  245. guint ret = 0;
  246. switch (op) {
  247. case OP_NOT:
  248. ret |= RSPAMD_EXPRESSION_UNARY|RSPAMD_EXPRESSION_LOGICAL;
  249. break;
  250. case OP_MULT:
  251. ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_ARITHMETIC;
  252. break;
  253. case OP_DIVIDE:
  254. ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_ARITHMETIC;
  255. break;
  256. case OP_PLUS:
  257. ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_ARITHMETIC;
  258. break;
  259. case OP_MINUS:
  260. ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_ARITHMETIC;
  261. break;
  262. case OP_GE:
  263. case OP_GT:
  264. case OP_LE:
  265. case OP_LT:
  266. case OP_EQ:
  267. case OP_NE:
  268. ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_COMPARISON;
  269. break;
  270. case OP_AND:
  271. case OP_OR:
  272. ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_LOGICAL;
  273. break;
  274. case OP_OBRACE:
  275. case OP_CBRACE:
  276. case OP_INVALID:
  277. break;
  278. }
  279. return ret;
  280. }
  281. /*
  282. * Return FALSE if symbol is not operation symbol (operand)
  283. * Return TRUE if symbol is operation symbol
  284. */
  285. static gboolean RSPAMD_CONST_FUNCTION
  286. rspamd_expr_is_operation_symbol (gchar a);
  287. static gboolean
  288. rspamd_expr_is_operation_symbol (gchar a)
  289. {
  290. switch (a) {
  291. case '!':
  292. case '&':
  293. case '|':
  294. case '(':
  295. case ')':
  296. case '>':
  297. case '<':
  298. case '+':
  299. case '*':
  300. case '-':
  301. case '/':
  302. case '=':
  303. return TRUE;
  304. }
  305. return FALSE;
  306. }
  307. static gboolean
  308. rspamd_expr_is_operation (struct rspamd_expression *e,
  309. const gchar *p, const gchar *end, rspamd_regexp_t *num_re)
  310. {
  311. if (rspamd_expr_is_operation_symbol (*p)) {
  312. if (p + 1 < end) {
  313. gchar t = *(p + 1);
  314. if (t == ':') {
  315. /* Special case, treat it as an atom */
  316. }
  317. else if (*p == '/') {
  318. /* Lookahead for division operation to distinguish from regexp */
  319. const gchar *track = p + 1;
  320. /* Skip spaces */
  321. while (track < end && g_ascii_isspace (*track)) {
  322. track++;
  323. }
  324. /* Check for a number */
  325. if (rspamd_regexp_search (num_re,
  326. track,
  327. end - track,
  328. NULL,
  329. NULL,
  330. FALSE,
  331. NULL)) {
  332. msg_debug_expression_verbose("found divide operation");
  333. return TRUE;
  334. }
  335. msg_debug_expression_verbose("false divide operation");
  336. /* Fallback to PARSE_ATOM state */
  337. }
  338. else if (*p == '-') {
  339. /* - is used in composites, so we need to distinguish - from
  340. * 1) unary minus of a limit!
  341. * 2) -BLAH in composites
  342. * Decision is simple: require a space after binary `-` op
  343. */
  344. if (g_ascii_isspace (t)) {
  345. return TRUE;
  346. }
  347. /* Fallback to PARSE_ATOM state */
  348. msg_debug_expression_verbose("false minus operation");
  349. }
  350. else {
  351. /* Generic operation */
  352. return TRUE;
  353. }
  354. }
  355. else {
  356. /* Last op */
  357. return TRUE;
  358. }
  359. }
  360. return FALSE;
  361. }
  362. /* Return character representation of operation */
  363. static enum rspamd_expression_op
  364. rspamd_expr_str_to_op (const gchar *a, const gchar *end, const gchar **next)
  365. {
  366. enum rspamd_expression_op op = OP_INVALID;
  367. g_assert (a < end);
  368. switch (*a) {
  369. case '!':
  370. case '&':
  371. case '|':
  372. case '+':
  373. case '*':
  374. case '/':
  375. case '-':
  376. case '(':
  377. case ')':
  378. case '=': {
  379. if (a < end - 1) {
  380. if ((a[0] == '&' && a[1] == '&') ||
  381. (a[0] == '|' && a[1] == '|') ||
  382. (a[0] == '!' && a[1] == '=') ||
  383. (a[0] == '=' && a[1] == '=')) {
  384. *next = a + 2;
  385. }
  386. else {
  387. *next = a + 1;
  388. }
  389. }
  390. else {
  391. *next = end;
  392. }
  393. /* XXX: not especially effective */
  394. switch (*a) {
  395. case '!':
  396. if (a < end - 1 && a[1] == '=') {
  397. op = OP_NE;
  398. }
  399. else {
  400. op = OP_NOT;
  401. }
  402. break;
  403. case '&':
  404. op = OP_AND;
  405. break;
  406. case '*':
  407. op = OP_MULT;
  408. break;
  409. case '|':
  410. op = OP_OR;
  411. break;
  412. case '+':
  413. op = OP_PLUS;
  414. break;
  415. case '/':
  416. op = OP_DIVIDE;
  417. break;
  418. case '-':
  419. op = OP_MINUS;
  420. break;
  421. case '=':
  422. op = OP_EQ;
  423. break;
  424. case ')':
  425. op = OP_CBRACE;
  426. break;
  427. case '(':
  428. op = OP_OBRACE;
  429. break;
  430. default:
  431. op = OP_INVALID;
  432. break;
  433. }
  434. break;
  435. }
  436. case 'O':
  437. case 'o':
  438. if ((gulong)(end - a) >= sizeof ("or") &&
  439. g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) {
  440. *next = a + sizeof ("or") - 1;
  441. op = OP_OR;
  442. }
  443. break;
  444. case 'A':
  445. case 'a':
  446. if ((gulong)(end - a) >= sizeof ("and") &&
  447. g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) {
  448. *next = a + sizeof ("and") - 1;
  449. op = OP_AND;
  450. }
  451. break;
  452. case 'N':
  453. case 'n':
  454. if ((gulong)(end - a) >= sizeof ("not") &&
  455. g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) {
  456. *next = a + sizeof ("not") - 1;
  457. op = OP_NOT;
  458. }
  459. break;
  460. case '>':
  461. if (a < end - 1 && a[1] == '=') {
  462. *next = a + 2;
  463. op = OP_GE;
  464. }
  465. else {
  466. *next = a + 1;
  467. op = OP_GT;
  468. }
  469. break;
  470. case '<':
  471. if (a < end - 1 && a[1] == '=') {
  472. *next = a + 2;
  473. op = OP_LE;
  474. }
  475. else {
  476. *next = a + 1;
  477. op = OP_LT;
  478. }
  479. break;
  480. default:
  481. op = OP_INVALID;
  482. break;
  483. }
  484. return op;
  485. }
  486. static void
  487. rspamd_expression_destroy (struct rspamd_expression *expr)
  488. {
  489. guint i;
  490. struct rspamd_expression_elt *elt;
  491. if (expr != NULL) {
  492. if (expr->subr->destroy) {
  493. /* Free atoms */
  494. for (i = 0; i < expr->expressions->len; i ++) {
  495. elt = &g_array_index (expr->expressions,
  496. struct rspamd_expression_elt, i);
  497. if (elt->type == ELT_ATOM) {
  498. expr->subr->destroy (elt->p.atom);
  499. }
  500. }
  501. }
  502. if (expr->expressions) {
  503. g_array_free (expr->expressions, TRUE);
  504. }
  505. if (expr->expression_stack) {
  506. g_ptr_array_free (expr->expression_stack, TRUE);
  507. }
  508. if (expr->ast) {
  509. g_node_destroy (expr->ast);
  510. }
  511. if (expr->log_id) {
  512. g_free (expr->log_id);
  513. }
  514. g_free (expr);
  515. }
  516. }
  517. static gboolean
  518. rspamd_ast_add_node (struct rspamd_expression *e,
  519. GPtrArray *operands,
  520. struct rspamd_expression_elt *op,
  521. GError **err)
  522. {
  523. GNode *res, *a1, *a2, *test;
  524. g_assert (op->type == ELT_OP);
  525. if (op->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) {
  526. /* Unary operator */
  527. struct rspamd_expression_elt *test_elt;
  528. res = g_node_new (op);
  529. a1 = rspamd_expr_stack_elt_pop (operands);
  530. if (a1 == NULL) {
  531. g_set_error (err, rspamd_expr_quark(), EINVAL, "no operand to "
  532. "unary '%s' operation", rspamd_expr_op_to_str (op->p.op.op));
  533. g_node_destroy (res);
  534. return FALSE;
  535. }
  536. g_node_append (res, a1);
  537. test_elt = a1->data;
  538. if (test_elt->type == ELT_ATOM) {
  539. test_elt->p.atom->parent = res;
  540. msg_debug_expression ("added unary op %s to AST; operand: %*s",
  541. rspamd_expr_op_to_str (op->p.op.op),
  542. (int)test_elt->p.atom->len, test_elt->p.atom->str);
  543. }
  544. else {
  545. msg_debug_expression ("added unary op %s to AST; operand type: %d",
  546. rspamd_expr_op_to_str (op->p.op.op),
  547. test_elt->type);
  548. }
  549. }
  550. else {
  551. struct rspamd_expression_elt *e1, *e2;
  552. /* For binary/nary operators we might want to examine chains */
  553. a2 = rspamd_expr_stack_elt_pop (operands);
  554. a1 = rspamd_expr_stack_elt_pop (operands);
  555. if (a2 == NULL) {
  556. g_set_error (err, rspamd_expr_quark(), EINVAL, "no left operand to "
  557. "'%s' operation", rspamd_expr_op_to_str (op->p.op.op));
  558. return FALSE;
  559. }
  560. if (a1 == NULL) {
  561. g_set_error (err, rspamd_expr_quark(), EINVAL, "no right operand to "
  562. "'%s' operation", rspamd_expr_op_to_str (op->p.op.op));
  563. return FALSE;
  564. }
  565. /* Nary stuff */
  566. if (op->p.op.op_flags & RSPAMD_EXPRESSION_NARY) {
  567. /*
  568. * We convert a set of ops like X + Y + Z to a nary tree like
  569. * X Y Z +
  570. * for the longest possible prefix of atoms/limits
  571. */
  572. /* First try with a1 */
  573. test = a1;
  574. e1 = test->data;
  575. if (e1->type == ELT_OP && e1->p.op.op == op->p.op.op) {
  576. /* Add children */
  577. g_node_append (test, a2);
  578. rspamd_expr_stack_elt_push (operands, a1);
  579. msg_debug_expression ("added nary op %s to AST merged with the first operand",
  580. rspamd_expr_op_to_str (op->p.op.op));
  581. return TRUE;
  582. }
  583. /* Now test a2 */
  584. test = a2;
  585. e2 = test->data;
  586. if (e2->type == ELT_OP && e2->p.op.op == op->p.op.op) {
  587. /* Add children */
  588. g_node_prepend (test, a1);
  589. rspamd_expr_stack_elt_push (operands, a2);
  590. msg_debug_expression ("added nary op %s to AST merged with the second operand",
  591. rspamd_expr_op_to_str (op->p.op.op));
  592. return TRUE;
  593. }
  594. }
  595. /* No optimizations possible, so create a new level */
  596. res = g_node_new (op);
  597. g_node_append (res, a1);
  598. g_node_append (res, a2);
  599. e1 = a1->data;
  600. e2 = a2->data;
  601. if (e1->type == ELT_ATOM) {
  602. e1->p.atom->parent = res;
  603. }
  604. if (e2->type == ELT_ATOM) {
  605. e2->p.atom->parent = res;
  606. }
  607. if (e1->type == ELT_ATOM && e2->type == ELT_ATOM) {
  608. msg_debug_expression ("added binary op %s to AST; operands: (%*s; %*s)",
  609. rspamd_expr_op_to_str (op->p.op.op),
  610. (int) e1->p.atom->len, e1->p.atom->str,
  611. (int) e2->p.atom->len, e2->p.atom->str);
  612. }
  613. else {
  614. msg_debug_expression ("added binary op %s to AST; operands (types): (%d; %d)",
  615. rspamd_expr_op_to_str (op->p.op.op),
  616. e1->type,
  617. e2->type);
  618. }
  619. }
  620. /* Push back resulting node to the stack */
  621. rspamd_expr_stack_elt_push (operands, res);
  622. return TRUE;
  623. }
  624. static gboolean
  625. rspamd_ast_priority_traverse (GNode *node, gpointer d)
  626. {
  627. struct rspamd_expression_elt *elt = node->data, *cur_elt;
  628. struct rspamd_expression *expr = d;
  629. gint cnt = 0;
  630. GNode *cur;
  631. if (node->children) {
  632. cur = node->children;
  633. while (cur) {
  634. cur_elt = cur->data;
  635. cnt += cur_elt->priority;
  636. cur = cur->next;
  637. }
  638. elt->priority = cnt;
  639. }
  640. else {
  641. /* It is atom or limit */
  642. g_assert (elt->type != ELT_OP);
  643. if (elt->type == ELT_LIMIT) {
  644. /* Always push limit first */
  645. elt->priority = 0;
  646. }
  647. else {
  648. elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY;
  649. if (expr->subr->priority != NULL) {
  650. elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY -
  651. expr->subr->priority (elt->p.atom);
  652. }
  653. elt->p.atom->hits = 0;
  654. }
  655. }
  656. return FALSE;
  657. }
  658. #define ATOM_PRIORITY(a) ((a)->p.atom->hits / ((a)->p.atom->exec_time.mean > 0 ? \
  659. (a)->p.atom->exec_time.mean * 10000000 : 1.0))
  660. static gint
  661. rspamd_ast_priority_cmp (GNode *a, GNode *b)
  662. {
  663. struct rspamd_expression_elt *ea = a->data, *eb = b->data;
  664. gdouble w1, w2;
  665. if (ea->type == ELT_LIMIT) {
  666. return 1;
  667. }
  668. else if (eb->type == ELT_LIMIT) {
  669. return -1;
  670. }
  671. /* Special logic for atoms */
  672. if (ea->type == ELT_ATOM && eb->type == ELT_ATOM &&
  673. ea->priority == eb->priority) {
  674. w1 = ATOM_PRIORITY (ea);
  675. w2 = ATOM_PRIORITY (eb);
  676. ea->p.atom->hits = 0;
  677. return w1 - w2;
  678. }
  679. else {
  680. return ea->priority - eb->priority;
  681. }
  682. }
  683. static gboolean
  684. rspamd_ast_resort_traverse (GNode *node, gpointer unused)
  685. {
  686. GNode *children, *last;
  687. struct rspamd_expression_elt *elt;
  688. elt = (struct rspamd_expression_elt *)node->data;
  689. /*
  690. * We sort merely logical operations, everything else is dangerous
  691. */
  692. if (elt->type == ELT_OP && elt->p.op.op_flags & RSPAMD_EXPRESSION_LOGICAL) {
  693. if (node->children) {
  694. children = node->children;
  695. last = g_node_last_sibling (children);
  696. /* Needed for utlist compatibility */
  697. children->prev = last;
  698. DL_SORT (node->children, rspamd_ast_priority_cmp);
  699. /* Restore GLIB compatibility */
  700. children = node->children;
  701. children->prev = NULL;
  702. }
  703. }
  704. return FALSE;
  705. }
  706. static struct rspamd_expression_elt *
  707. rspamd_expr_dup_elt (rspamd_mempool_t *pool, struct rspamd_expression_elt *elt)
  708. {
  709. struct rspamd_expression_elt *n;
  710. n = rspamd_mempool_alloc (pool, sizeof (*n));
  711. memcpy (n, elt, sizeof (*n));
  712. return n;
  713. }
  714. gboolean
  715. rspamd_parse_expression (const gchar *line, gsize len,
  716. const struct rspamd_atom_subr *subr, gpointer subr_data,
  717. rspamd_mempool_t *pool, GError **err,
  718. struct rspamd_expression **target)
  719. {
  720. struct rspamd_expression *e;
  721. struct rspamd_expression_elt elt;
  722. rspamd_expression_atom_t *atom;
  723. rspamd_regexp_t *num_re;
  724. enum rspamd_expression_op op, op_stack;
  725. const gchar *p, *c, *end;
  726. GPtrArray *operand_stack;
  727. GNode *tmp;
  728. enum {
  729. PARSE_ATOM = 0,
  730. PARSE_OP,
  731. PARSE_LIM,
  732. SKIP_SPACES
  733. } state = PARSE_ATOM;
  734. g_assert (line != NULL);
  735. g_assert (subr != NULL && subr->parse != NULL);
  736. if (len == 0) {
  737. len = strlen (line);
  738. }
  739. memset (&elt, 0, sizeof (elt));
  740. num_re = rspamd_regexp_cache_create (NULL,
  741. "/^(?:[+-]?([0-9]*[.])?[0-9]+)(?:\\s+|[)]|$)/", NULL, NULL);
  742. p = line;
  743. c = line;
  744. end = line + len;
  745. e = g_malloc0 (sizeof (*e));
  746. e->expressions = g_array_new (FALSE, FALSE,
  747. sizeof (struct rspamd_expression_elt));
  748. operand_stack = g_ptr_array_sized_new (32);
  749. e->ast = NULL;
  750. e->expression_stack = g_ptr_array_sized_new (32);
  751. e->subr = subr;
  752. e->evals = 0;
  753. e->next_resort = ottery_rand_range (MAX_RESORT_EVALS) + MIN_RESORT_EVALS;
  754. e->log_id = g_malloc0 (RSPAMD_LOG_ID_LEN + 1);
  755. guint64 h = rspamd_cryptobox_fast_hash (line, len, 0xdeadbabe);
  756. rspamd_snprintf (e->log_id, RSPAMD_LOG_ID_LEN + 1, "%xL", h);
  757. msg_debug_expression ("start to parse expression '%*s'", (int)len, line);
  758. /* Shunting-yard algorithm */
  759. while (p < end) {
  760. switch (state) {
  761. case PARSE_ATOM:
  762. if (g_ascii_isspace (*p)) {
  763. state = SKIP_SPACES;
  764. continue;
  765. }
  766. else if (rspamd_expr_is_operation (e, p, end, num_re)) {
  767. /* Lookahead */
  768. state = PARSE_OP;
  769. continue;
  770. }
  771. /*
  772. * First of all, we check some pre-conditions:
  773. * 1) if we have 'and ' or 'or ' or 'not ' strings, they are op
  774. * 2) if we have full numeric string, then we check for
  775. * the following expression:
  776. * ^\d+\s*[><]$
  777. * and check the operation on stack
  778. */
  779. if ((gulong)(end - p) > sizeof ("and ") &&
  780. (g_ascii_strncasecmp (p, "and ", sizeof ("and ") - 1) == 0 ||
  781. g_ascii_strncasecmp (p, "not ", sizeof ("not ") - 1) == 0 )) {
  782. state = PARSE_OP;
  783. }
  784. else if ((gulong)(end - p) > sizeof ("or ") &&
  785. g_ascii_strncasecmp (p, "or ", sizeof ("or ") - 1) == 0) {
  786. state = PARSE_OP;
  787. }
  788. else {
  789. /*
  790. * If we have any comparison or arithmetic operator in the stack, then try
  791. * to parse limit
  792. */
  793. op = GPOINTER_TO_INT (rspamd_expr_stack_peek (e));
  794. if (op == OP_MULT || op == OP_MINUS || op == OP_DIVIDE ||
  795. op == OP_PLUS || (op >= OP_LT && op <= OP_NE)) {
  796. if (rspamd_regexp_search (num_re,
  797. p,
  798. end - p,
  799. NULL,
  800. NULL,
  801. FALSE,
  802. NULL)) {
  803. c = p;
  804. state = PARSE_LIM;
  805. continue;
  806. }
  807. /* Fallback to atom parsing */
  808. }
  809. /* Try to parse atom */
  810. atom = subr->parse (p, end - p, pool, subr_data, err);
  811. if (atom == NULL || atom->len == 0) {
  812. /* We couldn't parse the atom, so go out */
  813. if (err != NULL && *err == NULL) {
  814. g_set_error (err,
  815. rspamd_expr_quark (),
  816. 500,
  817. "Cannot parse atom: callback function failed"
  818. " to parse '%.*s'",
  819. (int) (end - p),
  820. p);
  821. }
  822. goto error_label;
  823. }
  824. if (atom->str == NULL) {
  825. atom->str = p;
  826. }
  827. p = p + atom->len;
  828. /* Push to output */
  829. elt.type = ELT_ATOM;
  830. elt.p.atom = atom;
  831. g_array_append_val (e->expressions, elt);
  832. rspamd_expr_stack_elt_push (operand_stack,
  833. g_node_new (rspamd_expr_dup_elt (pool, &elt)));
  834. msg_debug_expression ("found atom: %*s; pushed onto operand stack (%d size)",
  835. (int)atom->len, atom->str, operand_stack->len);
  836. }
  837. break;
  838. case PARSE_LIM:
  839. if ((g_ascii_isdigit (*p) || *p == '-' || *p == '.')
  840. && p < end - 1) {
  841. p ++;
  842. }
  843. else {
  844. if (p == end - 1 && g_ascii_isdigit (*p)) {
  845. p ++;
  846. }
  847. if (p - c > 0) {
  848. elt.type = ELT_LIMIT;
  849. elt.p.lim = strtod (c, NULL);
  850. g_array_append_val (e->expressions, elt);
  851. rspamd_expr_stack_elt_push (operand_stack,
  852. g_node_new (rspamd_expr_dup_elt (pool, &elt)));
  853. msg_debug_expression ("found limit: %.1f; pushed onto operand stack (%d size)",
  854. elt.p.lim, operand_stack->len);
  855. c = p;
  856. state = SKIP_SPACES;
  857. }
  858. else {
  859. g_set_error (err, rspamd_expr_quark(), 400, "Empty number");
  860. goto error_label;
  861. }
  862. }
  863. break;
  864. case PARSE_OP:
  865. op = rspamd_expr_str_to_op (p, end, &p);
  866. if (op == OP_INVALID) {
  867. g_set_error (err, rspamd_expr_quark(), 500, "Bad operator %c",
  868. *p);
  869. goto error_label;
  870. }
  871. else if (op == OP_OBRACE) {
  872. /*
  873. * If the token is a left parenthesis, then push it onto
  874. * the stack.
  875. */
  876. rspamd_expr_stack_push (e, GINT_TO_POINTER (op));
  877. msg_debug_expression ("found obrace, pushed to operators stack (%d size)",
  878. e->expression_stack->len);
  879. }
  880. else if (op == OP_CBRACE) {
  881. /*
  882. * Until the token at the top of the stack is a left
  883. * parenthesis, pop operators off the stack onto the
  884. * output queue.
  885. *
  886. * Pop the left parenthesis from the stack,
  887. * but not onto the output queue.
  888. *
  889. * If the stack runs out without finding a left parenthesis,
  890. * then there are mismatched parentheses.
  891. */
  892. msg_debug_expression ("found cbrace, rewind operators stack (%d size)",
  893. e->expression_stack->len);
  894. do {
  895. op = GPOINTER_TO_INT (rspamd_expr_stack_pop (e));
  896. if (op == OP_INVALID) {
  897. g_set_error (err, rspamd_expr_quark(), 600,
  898. "Braces mismatch");
  899. goto error_label;
  900. }
  901. guint op_priority = rspamd_expr_logic_priority (op);
  902. msg_debug_expression ("found op: %s; priority = %d",
  903. rspamd_expr_op_to_str (op), op_priority);
  904. if (op != OP_OBRACE) {
  905. elt.type = ELT_OP;
  906. elt.p.op.op = op;
  907. elt.p.op.op_flags = rspamd_expr_op_flags (op);
  908. elt.p.op.logical_priority = op_priority;
  909. g_array_append_val (e->expressions, elt);
  910. if (!rspamd_ast_add_node (e, operand_stack,
  911. rspamd_expr_dup_elt (pool, &elt), err)) {
  912. goto error_label;
  913. }
  914. }
  915. } while (op != OP_OBRACE);
  916. }
  917. else {
  918. /*
  919. * While there is an operator token, o2, at the top of
  920. * the operator stack, and either:
  921. *
  922. * - o1 is left-associative and its precedence is less than
  923. * or equal to that of o2, or
  924. * - o1 is right associative, and has precedence less than
  925. * that of o2,
  926. *
  927. * then pop o2 off the operator stack, onto the output queue;
  928. *
  929. * push o1 onto the operator stack.
  930. */
  931. for (;;) {
  932. op_stack = GPOINTER_TO_INT (rspamd_expr_stack_pop (e));
  933. if (op_stack == OP_INVALID) {
  934. /* Stack is empty */
  935. msg_debug_expression ("no operations in operators stack");
  936. break;
  937. }
  938. /* We ignore associativity for now */
  939. guint op_priority = rspamd_expr_logic_priority (op),
  940. stack_op_priority = rspamd_expr_logic_priority (op_stack);
  941. msg_debug_expression ("operators stack %d; operands stack: %d; "
  942. "process operation '%s'(%d); pop operation '%s'(%d)",
  943. e->expression_stack->len,
  944. operand_stack->len,
  945. rspamd_expr_op_to_str (op), op_priority,
  946. rspamd_expr_op_to_str (op_stack), stack_op_priority);
  947. if (op_stack != OP_OBRACE &&
  948. op_priority < stack_op_priority) {
  949. elt.type = ELT_OP;
  950. elt.p.op.op = op_stack;
  951. elt.p.op.op_flags = rspamd_expr_op_flags (op_stack);
  952. elt.p.op.logical_priority = op_priority;
  953. g_array_append_val (e->expressions, elt);
  954. if (!rspamd_ast_add_node (e, operand_stack,
  955. rspamd_expr_dup_elt (pool, &elt), err)) {
  956. goto error_label;
  957. }
  958. }
  959. else {
  960. /* Push op_stack back */
  961. msg_debug_expression ("operators stack %d; operands stack: %d; "
  962. "process operation '%s'(%d); push back to stack '%s'(%d)",
  963. e->expression_stack->len,
  964. operand_stack->len,
  965. rspamd_expr_op_to_str (op), op_priority,
  966. rspamd_expr_op_to_str (op_stack), stack_op_priority);
  967. rspamd_expr_stack_push (e, GINT_TO_POINTER (op_stack));
  968. break;
  969. }
  970. }
  971. /* Push new operator itself */
  972. msg_debug_expression ("operators stack %d; operands stack: %d; "
  973. "process operation '%s'; push to stack",
  974. e->expression_stack->len,
  975. operand_stack->len,
  976. rspamd_expr_op_to_str (op));
  977. rspamd_expr_stack_push (e, GINT_TO_POINTER (op));
  978. }
  979. state = SKIP_SPACES;
  980. break;
  981. case SKIP_SPACES:
  982. if (g_ascii_isspace (*p)) {
  983. p ++;
  984. }
  985. if (rspamd_expr_is_operation (e, p, end, num_re)) {
  986. /* Lookahead */
  987. state = PARSE_OP;
  988. }
  989. else {
  990. state = PARSE_ATOM;
  991. }
  992. break;
  993. }
  994. }
  995. /* Now we process the stack and push operators to the output */
  996. while ((op_stack = GPOINTER_TO_INT (rspamd_expr_stack_pop (e)))
  997. != OP_INVALID) {
  998. msg_debug_expression ("operators stack %d; operands stack: %d; "
  999. "rewind stack; op: %s",
  1000. e->expression_stack->len,
  1001. operand_stack->len,
  1002. rspamd_expr_op_to_str (op_stack));
  1003. if (op_stack != OP_OBRACE) {
  1004. elt.type = ELT_OP;
  1005. elt.p.op.op = op_stack;
  1006. elt.p.op.op_flags = rspamd_expr_op_flags (op_stack);
  1007. elt.p.op.logical_priority = rspamd_expr_logic_priority (op_stack);
  1008. g_array_append_val (e->expressions, elt);
  1009. if (!rspamd_ast_add_node (e, operand_stack,
  1010. rspamd_expr_dup_elt (pool, &elt), err)) {
  1011. goto error_label;
  1012. }
  1013. }
  1014. else {
  1015. g_set_error (err, rspamd_expr_quark(), 600,
  1016. "Braces mismatch");
  1017. goto error_label;
  1018. }
  1019. }
  1020. if (operand_stack->len != 1) {
  1021. g_set_error (err, rspamd_expr_quark(), 601,
  1022. "Operators mismatch: %d elts in stack", operand_stack->len);
  1023. goto error_label;
  1024. }
  1025. e->ast = rspamd_expr_stack_elt_pop (operand_stack);
  1026. g_ptr_array_free (operand_stack, TRUE);
  1027. /* Set priorities for branches */
  1028. g_node_traverse (e->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1029. rspamd_ast_priority_traverse, e);
  1030. /* Now set less expensive branches to be evaluated first */
  1031. g_node_traverse (e->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1,
  1032. rspamd_ast_resort_traverse, NULL);
  1033. if (target) {
  1034. *target = e;
  1035. rspamd_mempool_add_destructor (pool,
  1036. (rspamd_mempool_destruct_t)rspamd_expression_destroy, e);
  1037. }
  1038. else {
  1039. rspamd_expression_destroy (e);
  1040. }
  1041. return TRUE;
  1042. error_label:
  1043. if (err && *err) {
  1044. msg_debug_expression ("fatal expression parse error: %e", *err);
  1045. }
  1046. while ((tmp = rspamd_expr_stack_elt_pop (operand_stack)) != NULL) {
  1047. g_node_destroy (tmp);
  1048. }
  1049. g_ptr_array_free (operand_stack, TRUE);
  1050. rspamd_expression_destroy (e);
  1051. return FALSE;
  1052. }
  1053. /*
  1054. * Node optimizer function: skip nodes that are not relevant
  1055. */
  1056. static gboolean
  1057. rspamd_ast_node_done (struct rspamd_expression_elt *elt, gdouble acc)
  1058. {
  1059. gboolean ret = FALSE;
  1060. g_assert (elt->type == ELT_OP);
  1061. switch (elt->p.op.op) {
  1062. case OP_NOT:
  1063. ret = TRUE;
  1064. break;
  1065. case OP_AND:
  1066. ret = acc == 0;
  1067. break;
  1068. case OP_OR:
  1069. ret = acc != 0;
  1070. break;
  1071. default:
  1072. break;
  1073. }
  1074. return ret;
  1075. }
  1076. static gdouble
  1077. rspamd_ast_do_unary_op (struct rspamd_expression_elt *elt, gdouble operand)
  1078. {
  1079. gdouble ret;
  1080. g_assert (elt->type == ELT_OP);
  1081. switch (elt->p.op.op) {
  1082. case OP_NOT:
  1083. ret = fabs (operand) > DBL_EPSILON ? 0.0 : 1.0;
  1084. break;
  1085. default:
  1086. g_assert_not_reached ();
  1087. }
  1088. return ret;
  1089. }
  1090. static gdouble
  1091. rspamd_ast_do_binary_op (struct rspamd_expression_elt *elt, gdouble op1, gdouble op2)
  1092. {
  1093. gdouble ret;
  1094. g_assert (elt->type == ELT_OP);
  1095. switch (elt->p.op.op) {
  1096. case OP_MINUS:
  1097. ret = op1 - op2;
  1098. break;
  1099. case OP_DIVIDE:
  1100. ret = op1 / op2;
  1101. break;
  1102. case OP_GE:
  1103. ret = op1 >= op2;
  1104. break;
  1105. case OP_GT:
  1106. ret = op1 > op2;
  1107. break;
  1108. case OP_LE:
  1109. ret = op1 <= op2;
  1110. break;
  1111. case OP_LT:
  1112. ret = op1 < op2;
  1113. break;
  1114. case OP_EQ:
  1115. ret = op1 == op2;
  1116. break;
  1117. case OP_NE:
  1118. ret = op1 != op2;
  1119. break;
  1120. case OP_NOT:
  1121. case OP_PLUS:
  1122. case OP_MULT:
  1123. case OP_AND:
  1124. case OP_OR:
  1125. default:
  1126. g_assert_not_reached();
  1127. break;
  1128. }
  1129. return ret;
  1130. }
  1131. static gdouble
  1132. rspamd_ast_do_nary_op (struct rspamd_expression_elt *elt, gdouble val, gdouble acc)
  1133. {
  1134. gdouble ret;
  1135. g_assert (elt->type == ELT_OP);
  1136. if (isnan (acc)) {
  1137. return val;
  1138. }
  1139. switch (elt->p.op.op) {
  1140. case OP_PLUS:
  1141. ret = acc + val;
  1142. break;
  1143. case OP_MULT:
  1144. ret = acc * val;
  1145. break;
  1146. case OP_AND:
  1147. ret = (fabs(acc) > DBL_EPSILON) && (fabs(val) > DBL_EPSILON);
  1148. break;
  1149. case OP_OR:
  1150. ret = (fabs(acc) > DBL_EPSILON) || (fabs(val) > DBL_EPSILON);
  1151. break;
  1152. default:
  1153. case OP_NOT:
  1154. case OP_MINUS:
  1155. case OP_DIVIDE:
  1156. case OP_GE:
  1157. case OP_GT:
  1158. case OP_LE:
  1159. case OP_LT:
  1160. case OP_EQ:
  1161. case OP_NE:
  1162. g_assert_not_reached();
  1163. break;
  1164. }
  1165. return ret;
  1166. }
  1167. static gdouble
  1168. rspamd_ast_process_node (struct rspamd_expression *e, GNode *node,
  1169. struct rspamd_expr_process_data *process_data)
  1170. {
  1171. struct rspamd_expression_elt *elt;
  1172. GNode *cld;
  1173. gdouble acc = NAN;
  1174. float t1, t2;
  1175. gdouble val;
  1176. gboolean calc_ticks = FALSE;
  1177. const gchar *op_name = NULL;
  1178. elt = node->data;
  1179. switch (elt->type) {
  1180. case ELT_ATOM:
  1181. if (!(elt->flags & RSPAMD_EXPR_FLAG_PROCESSED)) {
  1182. /*
  1183. * Check once per 256 evaluations approx
  1184. */
  1185. calc_ticks = (rspamd_random_uint64_fast() & 0xff) == 0xff;
  1186. if (calc_ticks) {
  1187. t1 = rspamd_get_ticks (TRUE);
  1188. }
  1189. elt->value = process_data->process_closure (process_data->ud, elt->p.atom);
  1190. if (fabs (elt->value) > DBL_EPSILON) {
  1191. elt->p.atom->hits ++;
  1192. if (process_data->trace) {
  1193. g_ptr_array_add (process_data->trace, elt->p.atom);
  1194. }
  1195. }
  1196. if (calc_ticks) {
  1197. t2 = rspamd_get_ticks (TRUE);
  1198. rspamd_set_counter_ema(&elt->p.atom->exec_time, (t2 - t1), 0.5f);
  1199. }
  1200. elt->flags |= RSPAMD_EXPR_FLAG_PROCESSED;
  1201. }
  1202. acc = elt->value;
  1203. msg_debug_expression_verbose ("atom: elt=%s; acc=%.1f", elt->p.atom->str, acc);
  1204. break;
  1205. case ELT_LIMIT:
  1206. acc = elt->p.lim;
  1207. msg_debug_expression_verbose ("limit: lim=%.1f; acc=%.1f;", elt->p.lim, acc);
  1208. break;
  1209. case ELT_OP:
  1210. g_assert (node->children != NULL);
  1211. op_name = rspamd_expr_op_to_str (elt->p.op.op);
  1212. if (elt->p.op.op_flags & RSPAMD_EXPRESSION_NARY) {
  1213. msg_debug_expression_verbose ("proceed nary operation %s", op_name);
  1214. /* Proceed all ops in chain */
  1215. DL_FOREACH (node->children, cld) {
  1216. val = rspamd_ast_process_node (e, cld, process_data);
  1217. msg_debug_expression_verbose ("before op: op=%s; acc=%.1f; val = %.2f", op_name,
  1218. acc, val);
  1219. acc = rspamd_ast_do_nary_op (elt, val, acc);
  1220. msg_debug_expression_verbose ("after op: op=%s; acc=%.1f; val = %.2f", op_name,
  1221. acc, val);
  1222. /* Check if we need to process further */
  1223. if (!(process_data->flags & RSPAMD_EXPRESSION_FLAG_NOOPT)) {
  1224. if (rspamd_ast_node_done (elt, acc)) {
  1225. msg_debug_expression_verbose ("optimizer: done");
  1226. return acc;
  1227. }
  1228. }
  1229. }
  1230. }
  1231. else if (elt->p.op.op_flags & RSPAMD_EXPRESSION_BINARY) {
  1232. GNode *c1 = node->children, *c2;
  1233. c2 = c1->next;
  1234. g_assert (c2->next == NULL);
  1235. gdouble val1, val2;
  1236. msg_debug_expression_verbose ("proceed binary operation %s",
  1237. op_name);
  1238. val1 = rspamd_ast_process_node (e, c1, process_data);
  1239. val2 = rspamd_ast_process_node (e, c2, process_data);
  1240. msg_debug_expression_verbose ("before op: op=%s; op1 = %.1f, op2 = %.1f",
  1241. op_name, val1, val2);
  1242. acc = rspamd_ast_do_binary_op (elt, val1, val2);
  1243. msg_debug_expression_verbose ("after op: op=%s; res=%.1f",
  1244. op_name, acc);
  1245. }
  1246. else if (elt->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) {
  1247. GNode *c1 = node->children;
  1248. g_assert (c1->next == NULL);
  1249. msg_debug_expression_verbose ("proceed unary operation %s",
  1250. op_name);
  1251. val = rspamd_ast_process_node (e, c1, process_data);
  1252. msg_debug_expression_verbose ("before op: op=%s; op1 = %.1f",
  1253. op_name, val);
  1254. acc = rspamd_ast_do_unary_op (elt, val);
  1255. msg_debug_expression_verbose ("after op: op=%s; res=%.1f",
  1256. op_name, acc);
  1257. }
  1258. break;
  1259. }
  1260. return acc;
  1261. }
  1262. static gboolean
  1263. rspamd_ast_cleanup_traverse (GNode *n, gpointer d)
  1264. {
  1265. struct rspamd_expression_elt *elt = n->data;
  1266. elt->value = 0;
  1267. elt->flags = 0;
  1268. return FALSE;
  1269. }
  1270. gdouble
  1271. rspamd_process_expression_closure (struct rspamd_expression *expr,
  1272. rspamd_expression_process_cb cb,
  1273. gint flags,
  1274. gpointer runtime_ud,
  1275. GPtrArray **track)
  1276. {
  1277. struct rspamd_expr_process_data pd;
  1278. gdouble ret = 0;
  1279. g_assert (expr != NULL);
  1280. /* Ensure that stack is empty at this point */
  1281. g_assert (expr->expression_stack->len == 0);
  1282. expr->evals ++;
  1283. memset (&pd, 0, sizeof (pd));
  1284. pd.process_closure = cb;
  1285. pd.flags = flags;
  1286. pd.ud = runtime_ud;
  1287. if (track) {
  1288. pd.trace = g_ptr_array_sized_new (32);
  1289. *track = pd.trace;
  1290. }
  1291. ret = rspamd_ast_process_node (expr, expr->ast, &pd);
  1292. /* Cleanup */
  1293. g_node_traverse (expr->ast, G_IN_ORDER, G_TRAVERSE_ALL, -1,
  1294. rspamd_ast_cleanup_traverse, NULL);
  1295. /* Check if we need to resort */
  1296. if (expr->evals % expr->next_resort == 0) {
  1297. expr->next_resort = ottery_rand_range (MAX_RESORT_EVALS) +
  1298. MIN_RESORT_EVALS;
  1299. /* Set priorities for branches */
  1300. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1301. rspamd_ast_priority_traverse, expr);
  1302. /* Now set less expensive branches to be evaluated first */
  1303. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1,
  1304. rspamd_ast_resort_traverse, NULL);
  1305. }
  1306. return ret;
  1307. }
  1308. gdouble
  1309. rspamd_process_expression_track (struct rspamd_expression *expr,
  1310. gint flags,
  1311. gpointer runtime_ud,
  1312. GPtrArray **track)
  1313. {
  1314. return rspamd_process_expression_closure (expr,
  1315. expr->subr->process, flags, runtime_ud, track);
  1316. }
  1317. gdouble
  1318. rspamd_process_expression (struct rspamd_expression *expr,
  1319. gint flags,
  1320. gpointer runtime_ud)
  1321. {
  1322. return rspamd_process_expression_closure (expr,
  1323. expr->subr->process, flags, runtime_ud, NULL);
  1324. }
  1325. static gboolean
  1326. rspamd_ast_string_traverse (GNode *n, gpointer d)
  1327. {
  1328. GString *res = d;
  1329. gint cnt;
  1330. GNode *cur;
  1331. struct rspamd_expression_elt *elt = n->data;
  1332. const char *op_str = NULL;
  1333. if (elt->type == ELT_ATOM) {
  1334. rspamd_printf_gstring (res, "(%*s)",
  1335. (int)elt->p.atom->len, elt->p.atom->str);
  1336. }
  1337. else if (elt->type == ELT_LIMIT) {
  1338. if (elt->p.lim == (double)(gint64)elt->p.lim) {
  1339. rspamd_printf_gstring (res, "%L", (gint64)elt->p.lim);
  1340. }
  1341. else {
  1342. rspamd_printf_gstring (res, "%f", elt->p.lim);
  1343. }
  1344. }
  1345. else {
  1346. op_str = rspamd_expr_op_to_str (elt->p.op.op);
  1347. g_string_append (res, op_str);
  1348. if (n->children) {
  1349. LL_COUNT(n->children, cur, cnt);
  1350. if (cnt > 2) {
  1351. /* Print n-ary of the operator */
  1352. g_string_append_printf (res, "(%d)", cnt);
  1353. }
  1354. }
  1355. }
  1356. g_string_append_c (res, ' ');
  1357. return FALSE;
  1358. }
  1359. GString *
  1360. rspamd_expression_tostring (struct rspamd_expression *expr)
  1361. {
  1362. GString *res;
  1363. g_assert (expr != NULL);
  1364. res = g_string_new (NULL);
  1365. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1366. rspamd_ast_string_traverse, res);
  1367. /* Last space */
  1368. if (res->len > 0) {
  1369. g_string_erase (res, res->len - 1, 1);
  1370. }
  1371. return res;
  1372. }
  1373. struct atom_foreach_cbdata {
  1374. rspamd_expression_atom_foreach_cb cb;
  1375. gpointer cbdata;
  1376. };
  1377. static gboolean
  1378. rspamd_ast_atom_traverse (GNode *n, gpointer d)
  1379. {
  1380. struct atom_foreach_cbdata *data = d;
  1381. struct rspamd_expression_elt *elt = n->data;
  1382. rspamd_ftok_t tok;
  1383. if (elt->type == ELT_ATOM) {
  1384. tok.begin = elt->p.atom->str;
  1385. tok.len = elt->p.atom->len;
  1386. data->cb (&tok, data->cbdata);
  1387. }
  1388. return FALSE;
  1389. }
  1390. void
  1391. rspamd_expression_atom_foreach (struct rspamd_expression *expr,
  1392. rspamd_expression_atom_foreach_cb cb, gpointer cbdata)
  1393. {
  1394. struct atom_foreach_cbdata data;
  1395. g_assert (expr != NULL);
  1396. data.cb = cb;
  1397. data.cbdata = cbdata;
  1398. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1399. rspamd_ast_atom_traverse, &data);
  1400. }
  1401. gboolean
  1402. rspamd_expression_node_is_op (GNode *node, enum rspamd_expression_op op)
  1403. {
  1404. struct rspamd_expression_elt *elt;
  1405. g_assert (node != NULL);
  1406. elt = node->data;
  1407. if (elt->type == ELT_OP && elt->p.op.op == op) {
  1408. return TRUE;
  1409. }
  1410. return FALSE;
  1411. }