You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

expression.c 36KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "expression.h"
  18. #include "printf.h"
  19. #include "regexp.h"
  20. #include "util.h"
  21. #include "utlist.h"
  22. #include "ottery.h"
  23. #include "libserver/logger.h"
  24. #include "libcryptobox/cryptobox.h"
  25. #include <math.h>
  26. #define RSPAMD_EXPR_FLAG_NEGATE (1 << 0)
  27. #define RSPAMD_EXPR_FLAG_PROCESSED (1 << 1)
  28. #define MIN_RESORT_EVALS 50
  29. #define MAX_RESORT_EVALS 150
  30. enum rspamd_expression_elt_type {
  31. ELT_OP = 0,
  32. ELT_ATOM,
  33. ELT_LIMIT
  34. };
  35. enum rspamd_expression_op_flag {
  36. RSPAMD_EXPRESSION_UNARY = 1u << 0u,
  37. RSPAMD_EXPRESSION_BINARY = 1u << 1u,
  38. RSPAMD_EXPRESSION_NARY = 1u << 2u,
  39. RSPAMD_EXPRESSION_ARITHMETIC = 1u << 3u,
  40. RSPAMD_EXPRESSION_LOGICAL = 1u << 4u,
  41. RSPAMD_EXPRESSION_COMPARISON = 1u << 5u,
  42. };
  43. struct rspamd_expression_operation {
  44. enum rspamd_expression_op op;
  45. guint logical_priority;
  46. guint op_flags;
  47. };
  48. struct rspamd_expression_elt {
  49. enum rspamd_expression_elt_type type;
  50. union {
  51. rspamd_expression_atom_t *atom;
  52. struct rspamd_expression_operation op;
  53. gdouble lim;
  54. } p;
  55. gint flags;
  56. gint priority;
  57. gdouble value;
  58. };
  59. struct rspamd_expression {
  60. const struct rspamd_atom_subr *subr;
  61. GArray *expressions;
  62. GPtrArray *expression_stack;
  63. GNode *ast;
  64. gchar *log_id;
  65. guint next_resort;
  66. guint evals;
  67. };
  68. struct rspamd_expr_process_data {
  69. gpointer *ud;
  70. gint flags;
  71. /* != NULL if trace is collected */
  72. GPtrArray *trace;
  73. rspamd_expression_process_cb process_closure;
  74. };
  75. #define msg_debug_expression(...) rspamd_conditional_debug_fast(NULL, NULL, \
  76. rspamd_expression_log_id, "expression", e->log_id, \
  77. RSPAMD_LOG_FUNC, \
  78. __VA_ARGS__)
  79. #ifdef DEBUG_EXPRESSIONS
  80. #define msg_debug_expression_verbose(...) rspamd_conditional_debug_fast(NULL, NULL, \
  81. rspamd_expression_log_id, "expression", e->log_id, \
  82. RSPAMD_LOG_FUNC, \
  83. __VA_ARGS__)
  84. #else
  85. #define msg_debug_expression_verbose(...) \
  86. do { \
  87. } while (0)
  88. #endif
  89. INIT_LOG_MODULE(expression)
  90. static GQuark
  91. rspamd_expr_quark(void)
  92. {
  93. return g_quark_from_static_string("rspamd-expression");
  94. }
  95. static const gchar *RSPAMD_CONST_FUNCTION
  96. rspamd_expr_op_to_str(enum rspamd_expression_op op);
  97. static const gchar *
  98. rspamd_expr_op_to_str(enum rspamd_expression_op op)
  99. {
  100. const gchar *op_str = NULL;
  101. switch (op) {
  102. case OP_AND:
  103. op_str = "&";
  104. break;
  105. case OP_OR:
  106. op_str = "|";
  107. break;
  108. case OP_MULT:
  109. op_str = "*";
  110. break;
  111. case OP_PLUS:
  112. op_str = "+";
  113. break;
  114. case OP_MINUS:
  115. op_str = "-";
  116. break;
  117. case OP_DIVIDE:
  118. op_str = "/";
  119. break;
  120. case OP_NOT:
  121. op_str = "!";
  122. break;
  123. case OP_GE:
  124. op_str = ">=";
  125. break;
  126. case OP_GT:
  127. op_str = ">";
  128. break;
  129. case OP_LE:
  130. op_str = "<=";
  131. break;
  132. case OP_LT:
  133. op_str = "<";
  134. break;
  135. case OP_EQ:
  136. op_str = "==";
  137. break;
  138. case OP_NE:
  139. op_str = "!=";
  140. break;
  141. case OP_OBRACE:
  142. op_str = "(";
  143. break;
  144. case OP_CBRACE:
  145. op_str = ")";
  146. break;
  147. default:
  148. op_str = "???";
  149. break;
  150. }
  151. return op_str;
  152. }
  153. #define G_ARRAY_LAST(ar, type) (&g_array_index((ar), type, (ar)->len - 1))
  154. static void
  155. rspamd_expr_stack_elt_push(GPtrArray *stack,
  156. gpointer elt)
  157. {
  158. g_ptr_array_add(stack, elt);
  159. }
  160. static gpointer
  161. rspamd_expr_stack_elt_pop(GPtrArray *stack)
  162. {
  163. gpointer e;
  164. gint idx;
  165. if (stack->len == 0) {
  166. return NULL;
  167. }
  168. idx = stack->len - 1;
  169. e = g_ptr_array_index(stack, idx);
  170. g_ptr_array_remove_index_fast(stack, idx);
  171. return e;
  172. }
  173. static void
  174. rspamd_expr_stack_push(struct rspamd_expression *expr,
  175. gpointer elt)
  176. {
  177. rspamd_expr_stack_elt_push(expr->expression_stack, elt);
  178. }
  179. static gpointer
  180. rspamd_expr_stack_pop(struct rspamd_expression *expr)
  181. {
  182. return rspamd_expr_stack_elt_pop(expr->expression_stack);
  183. }
  184. static gpointer
  185. rspamd_expr_stack_peek(struct rspamd_expression *expr)
  186. {
  187. gpointer e;
  188. gint idx;
  189. GPtrArray *stack = expr->expression_stack;
  190. if (stack->len == 0) {
  191. return NULL;
  192. }
  193. idx = stack->len - 1;
  194. e = g_ptr_array_index(stack, idx);
  195. return e;
  196. }
  197. /*
  198. * Return operation priority
  199. */
  200. static gint RSPAMD_CONST_FUNCTION
  201. rspamd_expr_logic_priority(enum rspamd_expression_op op);
  202. static gint
  203. rspamd_expr_logic_priority(enum rspamd_expression_op op)
  204. {
  205. gint ret = 0;
  206. switch (op) {
  207. case OP_NOT:
  208. ret = 7;
  209. break;
  210. case OP_MULT:
  211. case OP_DIVIDE:
  212. ret = 6;
  213. break;
  214. case OP_PLUS:
  215. case OP_MINUS:
  216. ret = 5;
  217. break;
  218. case OP_GE:
  219. case OP_GT:
  220. case OP_LE:
  221. case OP_LT:
  222. case OP_EQ:
  223. case OP_NE:
  224. ret = 4;
  225. break;
  226. case OP_AND:
  227. ret = 3;
  228. break;
  229. case OP_OR:
  230. ret = 2;
  231. break;
  232. case OP_OBRACE:
  233. case OP_CBRACE:
  234. ret = 1;
  235. break;
  236. case OP_INVALID:
  237. ret = -1;
  238. break;
  239. }
  240. return ret;
  241. }
  242. static guint RSPAMD_CONST_FUNCTION
  243. rspamd_expr_op_flags(enum rspamd_expression_op op);
  244. static guint
  245. rspamd_expr_op_flags(enum rspamd_expression_op op)
  246. {
  247. guint ret = 0;
  248. switch (op) {
  249. case OP_NOT:
  250. ret |= RSPAMD_EXPRESSION_UNARY | RSPAMD_EXPRESSION_LOGICAL;
  251. break;
  252. case OP_MULT:
  253. ret |= RSPAMD_EXPRESSION_NARY | RSPAMD_EXPRESSION_ARITHMETIC;
  254. break;
  255. case OP_DIVIDE:
  256. ret |= RSPAMD_EXPRESSION_BINARY | RSPAMD_EXPRESSION_ARITHMETIC;
  257. break;
  258. case OP_PLUS:
  259. ret |= RSPAMD_EXPRESSION_NARY | RSPAMD_EXPRESSION_ARITHMETIC;
  260. break;
  261. case OP_MINUS:
  262. ret |= RSPAMD_EXPRESSION_BINARY | RSPAMD_EXPRESSION_ARITHMETIC;
  263. break;
  264. case OP_GE:
  265. case OP_GT:
  266. case OP_LE:
  267. case OP_LT:
  268. case OP_EQ:
  269. case OP_NE:
  270. ret |= RSPAMD_EXPRESSION_BINARY | RSPAMD_EXPRESSION_COMPARISON;
  271. break;
  272. case OP_AND:
  273. case OP_OR:
  274. ret |= RSPAMD_EXPRESSION_NARY | RSPAMD_EXPRESSION_LOGICAL;
  275. break;
  276. case OP_OBRACE:
  277. case OP_CBRACE:
  278. case OP_INVALID:
  279. break;
  280. }
  281. return ret;
  282. }
  283. /*
  284. * Return FALSE if symbol is not operation symbol (operand)
  285. * Return TRUE if symbol is operation symbol
  286. */
  287. static gboolean RSPAMD_CONST_FUNCTION
  288. rspamd_expr_is_operation_symbol(gchar a);
  289. static gboolean
  290. rspamd_expr_is_operation_symbol(gchar a)
  291. {
  292. switch (a) {
  293. case '!':
  294. case '&':
  295. case '|':
  296. case '(':
  297. case ')':
  298. case '>':
  299. case '<':
  300. case '+':
  301. case '*':
  302. case '-':
  303. case '/':
  304. case '=':
  305. return TRUE;
  306. }
  307. return FALSE;
  308. }
  309. static gboolean
  310. rspamd_expr_is_operation(struct rspamd_expression *e,
  311. const gchar *p, const gchar *end, rspamd_regexp_t *num_re)
  312. {
  313. if (rspamd_expr_is_operation_symbol(*p)) {
  314. if (p + 1 < end) {
  315. gchar t = *(p + 1);
  316. if (t == ':') {
  317. /* Special case, treat it as an atom */
  318. }
  319. else if (*p == '/') {
  320. /* Lookahead for division operation to distinguish from regexp */
  321. const gchar *track = p + 1;
  322. /* Skip spaces */
  323. while (track < end && g_ascii_isspace(*track)) {
  324. track++;
  325. }
  326. /* Check for a number */
  327. if (rspamd_regexp_search(num_re,
  328. track,
  329. end - track,
  330. NULL,
  331. NULL,
  332. FALSE,
  333. NULL)) {
  334. msg_debug_expression_verbose("found divide operation");
  335. return TRUE;
  336. }
  337. msg_debug_expression_verbose("false divide operation");
  338. /* Fallback to PARSE_ATOM state */
  339. }
  340. else if (*p == '-') {
  341. /* - is used in composites, so we need to distinguish - from
  342. * 1) unary minus of a limit!
  343. * 2) -BLAH in composites
  344. * Decision is simple: require a space after binary `-` op
  345. */
  346. if (g_ascii_isspace(t)) {
  347. return TRUE;
  348. }
  349. /* Fallback to PARSE_ATOM state */
  350. msg_debug_expression_verbose("false minus operation");
  351. }
  352. else {
  353. /* Generic operation */
  354. return TRUE;
  355. }
  356. }
  357. else {
  358. /* Last op */
  359. return TRUE;
  360. }
  361. }
  362. return FALSE;
  363. }
  364. /* Return character representation of operation */
  365. static enum rspamd_expression_op
  366. rspamd_expr_str_to_op(const gchar *a, const gchar *end, const gchar **next)
  367. {
  368. enum rspamd_expression_op op = OP_INVALID;
  369. g_assert(a < end);
  370. switch (*a) {
  371. case '!':
  372. case '&':
  373. case '|':
  374. case '+':
  375. case '*':
  376. case '/':
  377. case '-':
  378. case '(':
  379. case ')':
  380. case '=': {
  381. if (a < end - 1) {
  382. if ((a[0] == '&' && a[1] == '&') ||
  383. (a[0] == '|' && a[1] == '|') ||
  384. (a[0] == '!' && a[1] == '=') ||
  385. (a[0] == '=' && a[1] == '=')) {
  386. *next = a + 2;
  387. }
  388. else {
  389. *next = a + 1;
  390. }
  391. }
  392. else {
  393. *next = end;
  394. }
  395. /* XXX: not especially effective */
  396. switch (*a) {
  397. case '!':
  398. if (a < end - 1 && a[1] == '=') {
  399. op = OP_NE;
  400. }
  401. else {
  402. op = OP_NOT;
  403. }
  404. break;
  405. case '&':
  406. op = OP_AND;
  407. break;
  408. case '*':
  409. op = OP_MULT;
  410. break;
  411. case '|':
  412. op = OP_OR;
  413. break;
  414. case '+':
  415. op = OP_PLUS;
  416. break;
  417. case '/':
  418. op = OP_DIVIDE;
  419. break;
  420. case '-':
  421. op = OP_MINUS;
  422. break;
  423. case '=':
  424. op = OP_EQ;
  425. break;
  426. case ')':
  427. op = OP_CBRACE;
  428. break;
  429. case '(':
  430. op = OP_OBRACE;
  431. break;
  432. default:
  433. op = OP_INVALID;
  434. break;
  435. }
  436. break;
  437. }
  438. case 'O':
  439. case 'o':
  440. if ((gulong) (end - a) >= sizeof("or") &&
  441. g_ascii_strncasecmp(a, "or", sizeof("or") - 1) == 0) {
  442. *next = a + sizeof("or") - 1;
  443. op = OP_OR;
  444. }
  445. break;
  446. case 'A':
  447. case 'a':
  448. if ((gulong) (end - a) >= sizeof("and") &&
  449. g_ascii_strncasecmp(a, "and", sizeof("and") - 1) == 0) {
  450. *next = a + sizeof("and") - 1;
  451. op = OP_AND;
  452. }
  453. break;
  454. case 'N':
  455. case 'n':
  456. if ((gulong) (end - a) >= sizeof("not") &&
  457. g_ascii_strncasecmp(a, "not", sizeof("not") - 1) == 0) {
  458. *next = a + sizeof("not") - 1;
  459. op = OP_NOT;
  460. }
  461. break;
  462. case '>':
  463. if (a < end - 1 && a[1] == '=') {
  464. *next = a + 2;
  465. op = OP_GE;
  466. }
  467. else {
  468. *next = a + 1;
  469. op = OP_GT;
  470. }
  471. break;
  472. case '<':
  473. if (a < end - 1 && a[1] == '=') {
  474. *next = a + 2;
  475. op = OP_LE;
  476. }
  477. else {
  478. *next = a + 1;
  479. op = OP_LT;
  480. }
  481. break;
  482. default:
  483. op = OP_INVALID;
  484. break;
  485. }
  486. return op;
  487. }
  488. static void
  489. rspamd_expression_destroy(struct rspamd_expression *expr)
  490. {
  491. guint i;
  492. struct rspamd_expression_elt *elt;
  493. if (expr != NULL) {
  494. if (expr->subr->destroy) {
  495. /* Free atoms */
  496. for (i = 0; i < expr->expressions->len; i++) {
  497. elt = &g_array_index(expr->expressions,
  498. struct rspamd_expression_elt, i);
  499. if (elt->type == ELT_ATOM) {
  500. expr->subr->destroy(elt->p.atom);
  501. }
  502. }
  503. }
  504. if (expr->expressions) {
  505. g_array_free(expr->expressions, TRUE);
  506. }
  507. if (expr->expression_stack) {
  508. g_ptr_array_free(expr->expression_stack, TRUE);
  509. }
  510. if (expr->ast) {
  511. g_node_destroy(expr->ast);
  512. }
  513. if (expr->log_id) {
  514. g_free(expr->log_id);
  515. }
  516. g_free(expr);
  517. }
  518. }
  519. static gboolean
  520. rspamd_ast_add_node(struct rspamd_expression *e,
  521. GPtrArray *operands,
  522. struct rspamd_expression_elt *op,
  523. GError **err)
  524. {
  525. GNode *res, *a1, *a2, *test;
  526. g_assert(op->type == ELT_OP);
  527. if (op->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) {
  528. /* Unary operator */
  529. struct rspamd_expression_elt *test_elt;
  530. res = g_node_new(op);
  531. a1 = rspamd_expr_stack_elt_pop(operands);
  532. if (a1 == NULL) {
  533. g_set_error(err, rspamd_expr_quark(), EINVAL, "no operand to "
  534. "unary '%s' operation",
  535. rspamd_expr_op_to_str(op->p.op.op));
  536. g_node_destroy(res);
  537. return FALSE;
  538. }
  539. g_node_append(res, a1);
  540. test_elt = a1->data;
  541. if (test_elt->type == ELT_ATOM) {
  542. test_elt->p.atom->parent = res;
  543. msg_debug_expression("added unary op %s to AST; operand: %*s",
  544. rspamd_expr_op_to_str(op->p.op.op),
  545. (int) test_elt->p.atom->len, test_elt->p.atom->str);
  546. }
  547. else {
  548. msg_debug_expression("added unary op %s to AST; operand type: %d",
  549. rspamd_expr_op_to_str(op->p.op.op),
  550. test_elt->type);
  551. }
  552. }
  553. else {
  554. struct rspamd_expression_elt *e1, *e2;
  555. /* For binary/nary operators we might want to examine chains */
  556. a2 = rspamd_expr_stack_elt_pop(operands);
  557. a1 = rspamd_expr_stack_elt_pop(operands);
  558. if (a2 == NULL) {
  559. g_set_error(err, rspamd_expr_quark(), EINVAL, "no left operand to "
  560. "'%s' operation",
  561. rspamd_expr_op_to_str(op->p.op.op));
  562. return FALSE;
  563. }
  564. if (a1 == NULL) {
  565. g_set_error(err, rspamd_expr_quark(), EINVAL, "no right operand to "
  566. "'%s' operation",
  567. rspamd_expr_op_to_str(op->p.op.op));
  568. return FALSE;
  569. }
  570. /* Nary stuff */
  571. if (op->p.op.op_flags & RSPAMD_EXPRESSION_NARY) {
  572. /*
  573. * We convert a set of ops like X + Y + Z to a nary tree like
  574. * X Y Z +
  575. * for the longest possible prefix of atoms/limits
  576. */
  577. /* First try with a1 */
  578. test = a1;
  579. e1 = test->data;
  580. if (e1->type == ELT_OP && e1->p.op.op == op->p.op.op) {
  581. /* Add children */
  582. g_node_append(test, a2);
  583. rspamd_expr_stack_elt_push(operands, a1);
  584. msg_debug_expression("added nary op %s to AST merged with the first operand",
  585. rspamd_expr_op_to_str(op->p.op.op));
  586. return TRUE;
  587. }
  588. /* Now test a2 */
  589. test = a2;
  590. e2 = test->data;
  591. if (e2->type == ELT_OP && e2->p.op.op == op->p.op.op) {
  592. /* Add children */
  593. g_node_prepend(test, a1);
  594. rspamd_expr_stack_elt_push(operands, a2);
  595. msg_debug_expression("added nary op %s to AST merged with the second operand",
  596. rspamd_expr_op_to_str(op->p.op.op));
  597. return TRUE;
  598. }
  599. }
  600. /* No optimizations possible, so create a new level */
  601. res = g_node_new(op);
  602. g_node_append(res, a1);
  603. g_node_append(res, a2);
  604. e1 = a1->data;
  605. e2 = a2->data;
  606. if (e1->type == ELT_ATOM) {
  607. e1->p.atom->parent = res;
  608. }
  609. if (e2->type == ELT_ATOM) {
  610. e2->p.atom->parent = res;
  611. }
  612. if (e1->type == ELT_ATOM && e2->type == ELT_ATOM) {
  613. msg_debug_expression("added binary op %s to AST; operands: (%*s; %*s)",
  614. rspamd_expr_op_to_str(op->p.op.op),
  615. (int) e1->p.atom->len, e1->p.atom->str,
  616. (int) e2->p.atom->len, e2->p.atom->str);
  617. }
  618. else {
  619. msg_debug_expression("added binary op %s to AST; operands (types): (%d; %d)",
  620. rspamd_expr_op_to_str(op->p.op.op),
  621. e1->type,
  622. e2->type);
  623. }
  624. }
  625. /* Push back resulting node to the stack */
  626. rspamd_expr_stack_elt_push(operands, res);
  627. return TRUE;
  628. }
  629. static gboolean
  630. rspamd_ast_priority_traverse(GNode *node, gpointer d)
  631. {
  632. struct rspamd_expression_elt *elt = node->data, *cur_elt;
  633. struct rspamd_expression *expr = d;
  634. gint cnt = 0;
  635. GNode *cur;
  636. if (node->children) {
  637. cur = node->children;
  638. while (cur) {
  639. cur_elt = cur->data;
  640. cnt += cur_elt->priority;
  641. cur = cur->next;
  642. }
  643. elt->priority = cnt;
  644. }
  645. else {
  646. /* It is atom or limit */
  647. g_assert(elt->type != ELT_OP);
  648. if (elt->type == ELT_LIMIT) {
  649. /* Always push limit first */
  650. elt->priority = 0;
  651. }
  652. else {
  653. elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY;
  654. if (expr->subr->priority != NULL) {
  655. elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY -
  656. expr->subr->priority(elt->p.atom);
  657. }
  658. elt->p.atom->hits = 0;
  659. }
  660. }
  661. return FALSE;
  662. }
  663. #define ATOM_PRIORITY(a) ((a)->p.atom->hits / ((a)->p.atom->exec_time.mean > 0 ? (a)->p.atom->exec_time.mean * 10000000 : 1.0))
  664. static gint
  665. rspamd_ast_priority_cmp(GNode *a, GNode *b)
  666. {
  667. struct rspamd_expression_elt *ea = a->data, *eb = b->data;
  668. gdouble w1, w2;
  669. if (ea->type == ELT_LIMIT) {
  670. return 1;
  671. }
  672. else if (eb->type == ELT_LIMIT) {
  673. return -1;
  674. }
  675. /* Special logic for atoms */
  676. if (ea->type == ELT_ATOM && eb->type == ELT_ATOM &&
  677. ea->priority == eb->priority) {
  678. w1 = ATOM_PRIORITY(ea);
  679. w2 = ATOM_PRIORITY(eb);
  680. ea->p.atom->hits = 0;
  681. return w1 - w2;
  682. }
  683. else {
  684. return ea->priority - eb->priority;
  685. }
  686. }
  687. static gboolean
  688. rspamd_ast_resort_traverse(GNode *node, gpointer unused)
  689. {
  690. GNode *children, *last;
  691. struct rspamd_expression_elt *elt;
  692. elt = (struct rspamd_expression_elt *) node->data;
  693. /*
  694. * We sort merely logical operations, everything else is dangerous
  695. */
  696. if (elt->type == ELT_OP && elt->p.op.op_flags & RSPAMD_EXPRESSION_LOGICAL) {
  697. if (node->children) {
  698. children = node->children;
  699. last = g_node_last_sibling(children);
  700. /* Needed for utlist compatibility */
  701. children->prev = last;
  702. DL_SORT(node->children, rspamd_ast_priority_cmp);
  703. /* Restore GLIB compatibility */
  704. children = node->children;
  705. children->prev = NULL;
  706. }
  707. }
  708. return FALSE;
  709. }
  710. static struct rspamd_expression_elt *
  711. rspamd_expr_dup_elt(rspamd_mempool_t *pool, struct rspamd_expression_elt *elt)
  712. {
  713. struct rspamd_expression_elt *n;
  714. n = rspamd_mempool_alloc(pool, sizeof(*n));
  715. memcpy(n, elt, sizeof(*n));
  716. return n;
  717. }
  718. gboolean
  719. rspamd_parse_expression(const gchar *line, gsize len,
  720. const struct rspamd_atom_subr *subr, gpointer subr_data,
  721. rspamd_mempool_t *pool, GError **err,
  722. struct rspamd_expression **target)
  723. {
  724. struct rspamd_expression *e;
  725. struct rspamd_expression_elt elt;
  726. rspamd_expression_atom_t *atom;
  727. rspamd_regexp_t *num_re;
  728. enum rspamd_expression_op op, op_stack;
  729. const gchar *p, *c, *end;
  730. GPtrArray *operand_stack;
  731. GNode *tmp;
  732. enum {
  733. PARSE_ATOM = 0,
  734. PARSE_OP,
  735. PARSE_LIM,
  736. SKIP_SPACES
  737. } state = PARSE_ATOM;
  738. g_assert(line != NULL);
  739. g_assert(subr != NULL && subr->parse != NULL);
  740. if (len == 0) {
  741. len = strlen(line);
  742. }
  743. memset(&elt, 0, sizeof(elt));
  744. num_re = rspamd_regexp_cache_create(NULL,
  745. "/^(?:[+-]?([0-9]*[.])?[0-9]+)(?:\\s+|[)]|$)/", NULL, NULL);
  746. p = line;
  747. c = line;
  748. end = line + len;
  749. e = g_malloc0(sizeof(*e));
  750. e->expressions = g_array_new(FALSE, FALSE,
  751. sizeof(struct rspamd_expression_elt));
  752. operand_stack = g_ptr_array_sized_new(32);
  753. e->ast = NULL;
  754. e->expression_stack = g_ptr_array_sized_new(32);
  755. e->subr = subr;
  756. e->evals = 0;
  757. e->next_resort = ottery_rand_range(MAX_RESORT_EVALS) + MIN_RESORT_EVALS;
  758. e->log_id = g_malloc0(RSPAMD_LOG_ID_LEN + 1);
  759. uint64_t h = rspamd_cryptobox_fast_hash(line, len, 0xdeadbabe);
  760. rspamd_snprintf(e->log_id, RSPAMD_LOG_ID_LEN + 1, "%xL", h);
  761. msg_debug_expression("start to parse expression '%*s'", (int) len, line);
  762. /* Shunting-yard algorithm */
  763. while (p < end) {
  764. switch (state) {
  765. case PARSE_ATOM:
  766. if (g_ascii_isspace(*p)) {
  767. state = SKIP_SPACES;
  768. continue;
  769. }
  770. else if (rspamd_expr_is_operation(e, p, end, num_re)) {
  771. /* Lookahead */
  772. state = PARSE_OP;
  773. continue;
  774. }
  775. /*
  776. * First of all, we check some pre-conditions:
  777. * 1) if we have 'and ' or 'or ' or 'not ' strings, they are op
  778. * 2) if we have full numeric string, then we check for
  779. * the following expression:
  780. * ^\d+\s*[><]$
  781. * and check the operation on stack
  782. */
  783. if ((gulong) (end - p) > sizeof("and ") &&
  784. (g_ascii_strncasecmp(p, "and ", sizeof("and ") - 1) == 0 ||
  785. g_ascii_strncasecmp(p, "not ", sizeof("not ") - 1) == 0)) {
  786. state = PARSE_OP;
  787. }
  788. else if ((gulong) (end - p) > sizeof("or ") &&
  789. g_ascii_strncasecmp(p, "or ", sizeof("or ") - 1) == 0) {
  790. state = PARSE_OP;
  791. }
  792. else {
  793. /*
  794. * If we have any comparison or arithmetic operator in the stack, then try
  795. * to parse limit
  796. */
  797. op = GPOINTER_TO_INT(rspamd_expr_stack_peek(e));
  798. if (op == OP_MULT || op == OP_MINUS || op == OP_DIVIDE ||
  799. op == OP_PLUS || (op >= OP_LT && op <= OP_NE)) {
  800. if (rspamd_regexp_search(num_re,
  801. p,
  802. end - p,
  803. NULL,
  804. NULL,
  805. FALSE,
  806. NULL)) {
  807. c = p;
  808. state = PARSE_LIM;
  809. continue;
  810. }
  811. /* Fallback to atom parsing */
  812. }
  813. /* Try to parse atom */
  814. atom = subr->parse(p, end - p, pool, subr_data, err);
  815. if (atom == NULL || atom->len == 0) {
  816. /* We couldn't parse the atom, so go out */
  817. if (err != NULL && *err == NULL) {
  818. g_set_error(err,
  819. rspamd_expr_quark(),
  820. 500,
  821. "Cannot parse atom: callback function failed"
  822. " to parse '%.*s'",
  823. (int) (end - p),
  824. p);
  825. }
  826. goto error_label;
  827. }
  828. if (atom->str == NULL) {
  829. atom->str = p;
  830. }
  831. p = p + atom->len;
  832. /* Push to output */
  833. elt.type = ELT_ATOM;
  834. elt.p.atom = atom;
  835. g_array_append_val(e->expressions, elt);
  836. rspamd_expr_stack_elt_push(operand_stack,
  837. g_node_new(rspamd_expr_dup_elt(pool, &elt)));
  838. msg_debug_expression("found atom: %*s; pushed onto operand stack (%d size)",
  839. (int) atom->len, atom->str, operand_stack->len);
  840. }
  841. break;
  842. case PARSE_LIM:
  843. if ((g_ascii_isdigit(*p) || *p == '-' || *p == '.') && p < end - 1) {
  844. p++;
  845. }
  846. else {
  847. if (p == end - 1 && g_ascii_isdigit(*p)) {
  848. p++;
  849. }
  850. if (p - c > 0) {
  851. elt.type = ELT_LIMIT;
  852. elt.p.lim = strtod(c, NULL);
  853. g_array_append_val(e->expressions, elt);
  854. rspamd_expr_stack_elt_push(operand_stack,
  855. g_node_new(rspamd_expr_dup_elt(pool, &elt)));
  856. msg_debug_expression("found limit: %.1f; pushed onto operand stack (%d size)",
  857. elt.p.lim, operand_stack->len);
  858. c = p;
  859. state = SKIP_SPACES;
  860. }
  861. else {
  862. g_set_error(err, rspamd_expr_quark(), 400, "Empty number");
  863. goto error_label;
  864. }
  865. }
  866. break;
  867. case PARSE_OP:
  868. op = rspamd_expr_str_to_op(p, end, &p);
  869. if (op == OP_INVALID) {
  870. g_set_error(err, rspamd_expr_quark(), 500, "Bad operator %c",
  871. *p);
  872. goto error_label;
  873. }
  874. else if (op == OP_OBRACE) {
  875. /*
  876. * If the token is a left parenthesis, then push it onto
  877. * the stack.
  878. */
  879. rspamd_expr_stack_push(e, GINT_TO_POINTER(op));
  880. msg_debug_expression("found obrace, pushed to operators stack (%d size)",
  881. e->expression_stack->len);
  882. }
  883. else if (op == OP_CBRACE) {
  884. /*
  885. * Until the token at the top of the stack is a left
  886. * parenthesis, pop operators off the stack onto the
  887. * output queue.
  888. *
  889. * Pop the left parenthesis from the stack,
  890. * but not onto the output queue.
  891. *
  892. * If the stack runs out without finding a left parenthesis,
  893. * then there are mismatched parentheses.
  894. */
  895. msg_debug_expression("found cbrace, rewind operators stack (%d size)",
  896. e->expression_stack->len);
  897. do {
  898. op = GPOINTER_TO_INT(rspamd_expr_stack_pop(e));
  899. if (op == OP_INVALID) {
  900. g_set_error(err, rspamd_expr_quark(), 600,
  901. "Braces mismatch");
  902. goto error_label;
  903. }
  904. guint op_priority = rspamd_expr_logic_priority(op);
  905. msg_debug_expression("found op: %s; priority = %d",
  906. rspamd_expr_op_to_str(op), op_priority);
  907. if (op != OP_OBRACE) {
  908. elt.type = ELT_OP;
  909. elt.p.op.op = op;
  910. elt.p.op.op_flags = rspamd_expr_op_flags(op);
  911. elt.p.op.logical_priority = op_priority;
  912. g_array_append_val(e->expressions, elt);
  913. if (!rspamd_ast_add_node(e, operand_stack,
  914. rspamd_expr_dup_elt(pool, &elt), err)) {
  915. goto error_label;
  916. }
  917. }
  918. } while (op != OP_OBRACE);
  919. }
  920. else {
  921. /*
  922. * While there is an operator token, o2, at the top of
  923. * the operator stack, and either:
  924. *
  925. * - o1 is left-associative and its precedence is less than
  926. * or equal to that of o2, or
  927. * - o1 is right associative, and has precedence less than
  928. * that of o2,
  929. *
  930. * then pop o2 off the operator stack, onto the output queue;
  931. *
  932. * push o1 onto the operator stack.
  933. */
  934. for (;;) {
  935. op_stack = GPOINTER_TO_INT(rspamd_expr_stack_pop(e));
  936. if (op_stack == OP_INVALID) {
  937. /* Stack is empty */
  938. msg_debug_expression("no operations in operators stack");
  939. break;
  940. }
  941. /* We ignore associativity for now */
  942. guint op_priority = rspamd_expr_logic_priority(op),
  943. stack_op_priority = rspamd_expr_logic_priority(op_stack);
  944. msg_debug_expression("operators stack %d; operands stack: %d; "
  945. "process operation '%s'(%d); pop operation '%s'(%d)",
  946. e->expression_stack->len,
  947. operand_stack->len,
  948. rspamd_expr_op_to_str(op), op_priority,
  949. rspamd_expr_op_to_str(op_stack), stack_op_priority);
  950. if (op_stack != OP_OBRACE &&
  951. op_priority < stack_op_priority) {
  952. elt.type = ELT_OP;
  953. elt.p.op.op = op_stack;
  954. elt.p.op.op_flags = rspamd_expr_op_flags(op_stack);
  955. elt.p.op.logical_priority = op_priority;
  956. g_array_append_val(e->expressions, elt);
  957. if (!rspamd_ast_add_node(e, operand_stack,
  958. rspamd_expr_dup_elt(pool, &elt), err)) {
  959. goto error_label;
  960. }
  961. }
  962. else {
  963. /* Push op_stack back */
  964. msg_debug_expression("operators stack %d; operands stack: %d; "
  965. "process operation '%s'(%d); push back to stack '%s'(%d)",
  966. e->expression_stack->len,
  967. operand_stack->len,
  968. rspamd_expr_op_to_str(op), op_priority,
  969. rspamd_expr_op_to_str(op_stack), stack_op_priority);
  970. rspamd_expr_stack_push(e, GINT_TO_POINTER(op_stack));
  971. break;
  972. }
  973. }
  974. /* Push new operator itself */
  975. msg_debug_expression("operators stack %d; operands stack: %d; "
  976. "process operation '%s'; push to stack",
  977. e->expression_stack->len,
  978. operand_stack->len,
  979. rspamd_expr_op_to_str(op));
  980. rspamd_expr_stack_push(e, GINT_TO_POINTER(op));
  981. }
  982. state = SKIP_SPACES;
  983. break;
  984. case SKIP_SPACES:
  985. if (g_ascii_isspace(*p)) {
  986. p++;
  987. }
  988. if (rspamd_expr_is_operation(e, p, end, num_re)) {
  989. /* Lookahead */
  990. state = PARSE_OP;
  991. }
  992. else {
  993. state = PARSE_ATOM;
  994. }
  995. break;
  996. }
  997. }
  998. /* Now we process the stack and push operators to the output */
  999. while ((op_stack = GPOINTER_TO_INT(rspamd_expr_stack_pop(e))) != OP_INVALID) {
  1000. msg_debug_expression("operators stack %d; operands stack: %d; "
  1001. "rewind stack; op: %s",
  1002. e->expression_stack->len,
  1003. operand_stack->len,
  1004. rspamd_expr_op_to_str(op_stack));
  1005. if (op_stack != OP_OBRACE) {
  1006. elt.type = ELT_OP;
  1007. elt.p.op.op = op_stack;
  1008. elt.p.op.op_flags = rspamd_expr_op_flags(op_stack);
  1009. elt.p.op.logical_priority = rspamd_expr_logic_priority(op_stack);
  1010. g_array_append_val(e->expressions, elt);
  1011. if (!rspamd_ast_add_node(e, operand_stack,
  1012. rspamd_expr_dup_elt(pool, &elt), err)) {
  1013. goto error_label;
  1014. }
  1015. }
  1016. else {
  1017. g_set_error(err, rspamd_expr_quark(), 600,
  1018. "Braces mismatch");
  1019. goto error_label;
  1020. }
  1021. }
  1022. if (operand_stack->len != 1) {
  1023. g_set_error(err, rspamd_expr_quark(), 601,
  1024. "Operators mismatch: %d elts in stack", operand_stack->len);
  1025. goto error_label;
  1026. }
  1027. e->ast = rspamd_expr_stack_elt_pop(operand_stack);
  1028. g_ptr_array_free(operand_stack, TRUE);
  1029. /* Set priorities for branches */
  1030. g_node_traverse(e->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1031. rspamd_ast_priority_traverse, e);
  1032. /* Now set less expensive branches to be evaluated first */
  1033. g_node_traverse(e->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1,
  1034. rspamd_ast_resort_traverse, NULL);
  1035. if (target) {
  1036. *target = e;
  1037. rspamd_mempool_add_destructor(pool,
  1038. (rspamd_mempool_destruct_t) rspamd_expression_destroy, e);
  1039. }
  1040. else {
  1041. rspamd_expression_destroy(e);
  1042. }
  1043. return TRUE;
  1044. error_label:
  1045. if (err && *err) {
  1046. msg_debug_expression("fatal expression parse error: %e", *err);
  1047. }
  1048. while ((tmp = rspamd_expr_stack_elt_pop(operand_stack)) != NULL) {
  1049. g_node_destroy(tmp);
  1050. }
  1051. g_ptr_array_free(operand_stack, TRUE);
  1052. rspamd_expression_destroy(e);
  1053. return FALSE;
  1054. }
  1055. /*
  1056. * Node optimizer function: skip nodes that are not relevant
  1057. */
  1058. static gboolean
  1059. rspamd_ast_node_done(struct rspamd_expression_elt *elt, gdouble acc)
  1060. {
  1061. gboolean ret = FALSE;
  1062. g_assert(elt->type == ELT_OP);
  1063. switch (elt->p.op.op) {
  1064. case OP_NOT:
  1065. ret = TRUE;
  1066. break;
  1067. case OP_AND:
  1068. ret = acc == 0;
  1069. break;
  1070. case OP_OR:
  1071. ret = acc != 0;
  1072. break;
  1073. default:
  1074. break;
  1075. }
  1076. return ret;
  1077. }
  1078. static gdouble
  1079. rspamd_ast_do_unary_op(struct rspamd_expression_elt *elt, gdouble operand)
  1080. {
  1081. gdouble ret;
  1082. g_assert(elt->type == ELT_OP);
  1083. switch (elt->p.op.op) {
  1084. case OP_NOT:
  1085. ret = fabs(operand) > DBL_EPSILON ? 0.0 : 1.0;
  1086. break;
  1087. default:
  1088. g_assert_not_reached();
  1089. }
  1090. return ret;
  1091. }
  1092. static gdouble
  1093. rspamd_ast_do_binary_op(struct rspamd_expression_elt *elt, gdouble op1, gdouble op2)
  1094. {
  1095. gdouble ret;
  1096. g_assert(elt->type == ELT_OP);
  1097. switch (elt->p.op.op) {
  1098. case OP_MINUS:
  1099. ret = op1 - op2;
  1100. break;
  1101. case OP_DIVIDE:
  1102. ret = op1 / op2;
  1103. break;
  1104. case OP_GE:
  1105. ret = op1 >= op2;
  1106. break;
  1107. case OP_GT:
  1108. ret = op1 > op2;
  1109. break;
  1110. case OP_LE:
  1111. ret = op1 <= op2;
  1112. break;
  1113. case OP_LT:
  1114. ret = op1 < op2;
  1115. break;
  1116. case OP_EQ:
  1117. ret = op1 == op2;
  1118. break;
  1119. case OP_NE:
  1120. ret = op1 != op2;
  1121. break;
  1122. case OP_NOT:
  1123. case OP_PLUS:
  1124. case OP_MULT:
  1125. case OP_AND:
  1126. case OP_OR:
  1127. default:
  1128. g_assert_not_reached();
  1129. break;
  1130. }
  1131. return ret;
  1132. }
  1133. static gdouble
  1134. rspamd_ast_do_nary_op(struct rspamd_expression_elt *elt, gdouble val, gdouble acc)
  1135. {
  1136. gdouble ret;
  1137. g_assert(elt->type == ELT_OP);
  1138. if (isnan(acc)) {
  1139. return val;
  1140. }
  1141. switch (elt->p.op.op) {
  1142. case OP_PLUS:
  1143. ret = acc + val;
  1144. break;
  1145. case OP_MULT:
  1146. ret = acc * val;
  1147. break;
  1148. case OP_AND:
  1149. ret = (fabs(acc) > DBL_EPSILON) && (fabs(val) > DBL_EPSILON);
  1150. break;
  1151. case OP_OR:
  1152. ret = (fabs(acc) > DBL_EPSILON) || (fabs(val) > DBL_EPSILON);
  1153. break;
  1154. default:
  1155. case OP_NOT:
  1156. case OP_MINUS:
  1157. case OP_DIVIDE:
  1158. case OP_GE:
  1159. case OP_GT:
  1160. case OP_LE:
  1161. case OP_LT:
  1162. case OP_EQ:
  1163. case OP_NE:
  1164. g_assert_not_reached();
  1165. break;
  1166. }
  1167. return ret;
  1168. }
  1169. static gdouble
  1170. rspamd_ast_process_node(struct rspamd_expression *e, GNode *node,
  1171. struct rspamd_expr_process_data *process_data)
  1172. {
  1173. struct rspamd_expression_elt *elt;
  1174. GNode *cld;
  1175. gdouble acc = NAN;
  1176. float t1, t2;
  1177. gdouble val;
  1178. gboolean calc_ticks = FALSE;
  1179. __attribute__((unused)) const gchar *op_name = NULL;
  1180. elt = node->data;
  1181. switch (elt->type) {
  1182. case ELT_ATOM:
  1183. if (!(elt->flags & RSPAMD_EXPR_FLAG_PROCESSED)) {
  1184. /*
  1185. * Check once per 256 evaluations approx
  1186. */
  1187. calc_ticks = (rspamd_random_uint64_fast() & 0xff) == 0xff;
  1188. if (calc_ticks) {
  1189. t1 = rspamd_get_ticks(TRUE);
  1190. }
  1191. elt->value = process_data->process_closure(process_data->ud, elt->p.atom);
  1192. if (fabs(elt->value) > DBL_EPSILON) {
  1193. elt->p.atom->hits++;
  1194. if (process_data->trace) {
  1195. g_ptr_array_add(process_data->trace, elt->p.atom);
  1196. }
  1197. }
  1198. if (calc_ticks) {
  1199. t2 = rspamd_get_ticks(TRUE);
  1200. rspamd_set_counter_ema(&elt->p.atom->exec_time, (t2 - t1), 0.5f);
  1201. }
  1202. elt->flags |= RSPAMD_EXPR_FLAG_PROCESSED;
  1203. }
  1204. acc = elt->value;
  1205. msg_debug_expression_verbose("atom: elt=%s; acc=%.1f", elt->p.atom->str, acc);
  1206. break;
  1207. case ELT_LIMIT:
  1208. acc = elt->p.lim;
  1209. msg_debug_expression_verbose("limit: lim=%.1f; acc=%.1f;", elt->p.lim, acc);
  1210. break;
  1211. case ELT_OP:
  1212. g_assert(node->children != NULL);
  1213. #ifdef DEBUG_EXPRESSIONS
  1214. op_name = rspamd_expr_op_to_str(elt->p.op.op);
  1215. #endif
  1216. if (elt->p.op.op_flags & RSPAMD_EXPRESSION_NARY) {
  1217. msg_debug_expression_verbose("proceed nary operation %s", op_name);
  1218. /* Proceed all ops in chain */
  1219. DL_FOREACH(node->children, cld)
  1220. {
  1221. val = rspamd_ast_process_node(e, cld, process_data);
  1222. msg_debug_expression_verbose("before op: op=%s; acc=%.1f; val = %.2f", op_name,
  1223. acc, val);
  1224. acc = rspamd_ast_do_nary_op(elt, val, acc);
  1225. msg_debug_expression_verbose("after op: op=%s; acc=%.1f; val = %.2f", op_name,
  1226. acc, val);
  1227. /* Check if we need to process further */
  1228. if (!(process_data->flags & RSPAMD_EXPRESSION_FLAG_NOOPT)) {
  1229. if (rspamd_ast_node_done(elt, acc)) {
  1230. msg_debug_expression_verbose("optimizer: done");
  1231. return acc;
  1232. }
  1233. }
  1234. }
  1235. }
  1236. else if (elt->p.op.op_flags & RSPAMD_EXPRESSION_BINARY) {
  1237. GNode *c1 = node->children, *c2;
  1238. c2 = c1->next;
  1239. g_assert(c2->next == NULL);
  1240. gdouble val1, val2;
  1241. msg_debug_expression_verbose("proceed binary operation %s",
  1242. op_name);
  1243. val1 = rspamd_ast_process_node(e, c1, process_data);
  1244. val2 = rspamd_ast_process_node(e, c2, process_data);
  1245. msg_debug_expression_verbose("before op: op=%s; op1 = %.1f, op2 = %.1f",
  1246. op_name, val1, val2);
  1247. acc = rspamd_ast_do_binary_op(elt, val1, val2);
  1248. msg_debug_expression_verbose("after op: op=%s; res=%.1f",
  1249. op_name, acc);
  1250. }
  1251. else if (elt->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) {
  1252. GNode *c1 = node->children;
  1253. g_assert(c1->next == NULL);
  1254. msg_debug_expression_verbose("proceed unary operation %s",
  1255. op_name);
  1256. val = rspamd_ast_process_node(e, c1, process_data);
  1257. msg_debug_expression_verbose("before op: op=%s; op1 = %.1f",
  1258. op_name, val);
  1259. acc = rspamd_ast_do_unary_op(elt, val);
  1260. msg_debug_expression_verbose("after op: op=%s; res=%.1f",
  1261. op_name, acc);
  1262. }
  1263. break;
  1264. }
  1265. return acc;
  1266. }
  1267. static gboolean
  1268. rspamd_ast_cleanup_traverse(GNode *n, gpointer d)
  1269. {
  1270. struct rspamd_expression_elt *elt = n->data;
  1271. elt->value = 0;
  1272. elt->flags = 0;
  1273. return FALSE;
  1274. }
  1275. gdouble
  1276. rspamd_process_expression_closure(struct rspamd_expression *expr,
  1277. rspamd_expression_process_cb cb,
  1278. gint flags,
  1279. gpointer runtime_ud,
  1280. GPtrArray **track)
  1281. {
  1282. struct rspamd_expr_process_data pd;
  1283. gdouble ret = 0;
  1284. g_assert(expr != NULL);
  1285. /* Ensure that stack is empty at this point */
  1286. g_assert(expr->expression_stack->len == 0);
  1287. expr->evals++;
  1288. memset(&pd, 0, sizeof(pd));
  1289. pd.process_closure = cb;
  1290. pd.flags = flags;
  1291. pd.ud = runtime_ud;
  1292. if (track) {
  1293. pd.trace = g_ptr_array_sized_new(32);
  1294. *track = pd.trace;
  1295. }
  1296. ret = rspamd_ast_process_node(expr, expr->ast, &pd);
  1297. /* Cleanup */
  1298. g_node_traverse(expr->ast, G_IN_ORDER, G_TRAVERSE_ALL, -1,
  1299. rspamd_ast_cleanup_traverse, NULL);
  1300. /* Check if we need to resort */
  1301. if (expr->evals % expr->next_resort == 0) {
  1302. expr->next_resort = ottery_rand_range(MAX_RESORT_EVALS) +
  1303. MIN_RESORT_EVALS;
  1304. /* Set priorities for branches */
  1305. g_node_traverse(expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1306. rspamd_ast_priority_traverse, expr);
  1307. /* Now set less expensive branches to be evaluated first */
  1308. g_node_traverse(expr->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1,
  1309. rspamd_ast_resort_traverse, NULL);
  1310. }
  1311. return ret;
  1312. }
  1313. gdouble
  1314. rspamd_process_expression_track(struct rspamd_expression *expr,
  1315. gint flags,
  1316. gpointer runtime_ud,
  1317. GPtrArray **track)
  1318. {
  1319. return rspamd_process_expression_closure(expr,
  1320. expr->subr->process, flags, runtime_ud, track);
  1321. }
  1322. gdouble
  1323. rspamd_process_expression(struct rspamd_expression *expr,
  1324. gint flags,
  1325. gpointer runtime_ud)
  1326. {
  1327. return rspamd_process_expression_closure(expr,
  1328. expr->subr->process, flags, runtime_ud, NULL);
  1329. }
  1330. static gboolean
  1331. rspamd_ast_string_traverse(GNode *n, gpointer d)
  1332. {
  1333. GString *res = d;
  1334. gint cnt;
  1335. GNode *cur;
  1336. struct rspamd_expression_elt *elt = n->data;
  1337. const char *op_str = NULL;
  1338. if (elt->type == ELT_ATOM) {
  1339. rspamd_printf_gstring(res, "(%*s)",
  1340. (int) elt->p.atom->len, elt->p.atom->str);
  1341. }
  1342. else if (elt->type == ELT_LIMIT) {
  1343. if (elt->p.lim == (double) (int64_t) elt->p.lim) {
  1344. rspamd_printf_gstring(res, "%L", (int64_t) elt->p.lim);
  1345. }
  1346. else {
  1347. rspamd_printf_gstring(res, "%f", elt->p.lim);
  1348. }
  1349. }
  1350. else {
  1351. op_str = rspamd_expr_op_to_str(elt->p.op.op);
  1352. g_string_append(res, op_str);
  1353. if (n->children) {
  1354. LL_COUNT(n->children, cur, cnt);
  1355. if (cnt > 2) {
  1356. /* Print n-ary of the operator */
  1357. g_string_append_printf(res, "(%d)", cnt);
  1358. }
  1359. }
  1360. }
  1361. g_string_append_c(res, ' ');
  1362. return FALSE;
  1363. }
  1364. GString *
  1365. rspamd_expression_tostring(struct rspamd_expression *expr)
  1366. {
  1367. GString *res;
  1368. g_assert(expr != NULL);
  1369. res = g_string_new(NULL);
  1370. g_node_traverse(expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1371. rspamd_ast_string_traverse, res);
  1372. /* Last space */
  1373. if (res->len > 0) {
  1374. g_string_erase(res, res->len - 1, 1);
  1375. }
  1376. return res;
  1377. }
  1378. struct atom_foreach_cbdata {
  1379. rspamd_expression_atom_foreach_cb cb;
  1380. gpointer cbdata;
  1381. };
  1382. static gboolean
  1383. rspamd_ast_atom_traverse(GNode *n, gpointer d)
  1384. {
  1385. struct atom_foreach_cbdata *data = d;
  1386. struct rspamd_expression_elt *elt = n->data;
  1387. rspamd_ftok_t tok;
  1388. if (elt->type == ELT_ATOM) {
  1389. tok.begin = elt->p.atom->str;
  1390. tok.len = elt->p.atom->len;
  1391. data->cb(&tok, data->cbdata);
  1392. }
  1393. return FALSE;
  1394. }
  1395. void rspamd_expression_atom_foreach(struct rspamd_expression *expr,
  1396. rspamd_expression_atom_foreach_cb cb, gpointer cbdata)
  1397. {
  1398. struct atom_foreach_cbdata data;
  1399. g_assert(expr != NULL);
  1400. data.cb = cb;
  1401. data.cbdata = cbdata;
  1402. g_node_traverse(expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1403. rspamd_ast_atom_traverse, &data);
  1404. }
  1405. gboolean
  1406. rspamd_expression_node_is_op(GNode *node, enum rspamd_expression_op op)
  1407. {
  1408. struct rspamd_expression_elt *elt;
  1409. g_assert(node != NULL);
  1410. elt = node->data;
  1411. if (elt->type == ELT_OP && elt->p.op.op == op) {
  1412. return TRUE;
  1413. }
  1414. return FALSE;
  1415. }