You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

expression.c 35KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "expression.h"
  18. #include "printf.h"
  19. #include "regexp.h"
  20. #include "util.h"
  21. #include "utlist.h"
  22. #include "ottery.h"
  23. #include "libserver/logger.h"
  24. #include "libcryptobox/cryptobox.h"
  25. #include <math.h>
  26. #define RSPAMD_EXPR_FLAG_NEGATE (1 << 0)
  27. #define RSPAMD_EXPR_FLAG_PROCESSED (1 << 1)
  28. #define MIN_RESORT_EVALS 50
  29. #define MAX_RESORT_EVALS 150
  30. #define DOUBLE_EPSILON 1e-9
  31. enum rspamd_expression_elt_type {
  32. ELT_OP = 0,
  33. ELT_ATOM,
  34. ELT_LIMIT
  35. };
  36. enum rspamd_expression_op_flag {
  37. RSPAMD_EXPRESSION_UNARY = 1u << 0u,
  38. RSPAMD_EXPRESSION_BINARY = 1u << 1u,
  39. RSPAMD_EXPRESSION_NARY = 1u << 2u,
  40. RSPAMD_EXPRESSION_ARITHMETIC = 1u << 3u,
  41. RSPAMD_EXPRESSION_LOGICAL = 1u << 4u,
  42. RSPAMD_EXPRESSION_COMPARISON = 1u << 5u,
  43. };
  44. struct rspamd_expression_operation {
  45. enum rspamd_expression_op op;
  46. guint logical_priority;
  47. guint op_flags;
  48. };
  49. struct rspamd_expression_elt {
  50. enum rspamd_expression_elt_type type;
  51. union {
  52. rspamd_expression_atom_t *atom;
  53. struct rspamd_expression_operation op;
  54. gdouble lim;
  55. } p;
  56. gint flags;
  57. gint priority;
  58. gdouble value;
  59. };
  60. struct rspamd_expression {
  61. const struct rspamd_atom_subr *subr;
  62. GArray *expressions;
  63. GPtrArray *expression_stack;
  64. GNode *ast;
  65. gchar *log_id;
  66. guint next_resort;
  67. guint evals;
  68. };
  69. struct rspamd_expr_process_data {
  70. gpointer *ud;
  71. gint flags;
  72. /* != NULL if trace is collected */
  73. GPtrArray *trace;
  74. rspamd_expression_process_cb process_closure;
  75. };
  76. #define msg_debug_expression(...) rspamd_conditional_debug_fast (NULL, NULL, \
  77. rspamd_expression_log_id, "expression", e->log_id, \
  78. G_STRFUNC, \
  79. __VA_ARGS__)
  80. INIT_LOG_MODULE(expression)
  81. static GQuark
  82. rspamd_expr_quark (void)
  83. {
  84. return g_quark_from_static_string ("rspamd-expression");
  85. }
  86. static const gchar * RSPAMD_CONST_FUNCTION
  87. rspamd_expr_op_to_str (enum rspamd_expression_op op);
  88. static const gchar *
  89. rspamd_expr_op_to_str (enum rspamd_expression_op op)
  90. {
  91. const gchar *op_str = NULL;
  92. switch (op) {
  93. case OP_AND:
  94. op_str = "&";
  95. break;
  96. case OP_OR:
  97. op_str = "|";
  98. break;
  99. case OP_MULT:
  100. op_str = "*";
  101. break;
  102. case OP_PLUS:
  103. op_str = "+";
  104. break;
  105. case OP_MINUS:
  106. op_str = "-";
  107. break;
  108. case OP_DIVIDE:
  109. op_str = "/";
  110. break;
  111. case OP_NOT:
  112. op_str = "!";
  113. break;
  114. case OP_GE:
  115. op_str = ">=";
  116. break;
  117. case OP_GT:
  118. op_str = ">";
  119. break;
  120. case OP_LE:
  121. op_str = "<=";
  122. break;
  123. case OP_LT:
  124. op_str = "<";
  125. break;
  126. case OP_OBRACE:
  127. op_str = "(";
  128. break;
  129. case OP_CBRACE:
  130. op_str = ")";
  131. break;
  132. default:
  133. op_str = "???";
  134. break;
  135. }
  136. return op_str;
  137. }
  138. #define G_ARRAY_LAST(ar, type) (&g_array_index((ar), type, (ar)->len - 1))
  139. static void
  140. rspamd_expr_stack_elt_push (GPtrArray *stack,
  141. gpointer elt)
  142. {
  143. g_ptr_array_add (stack, elt);
  144. }
  145. static gpointer
  146. rspamd_expr_stack_elt_pop (GPtrArray *stack)
  147. {
  148. gpointer e;
  149. gint idx;
  150. if (stack->len == 0) {
  151. return NULL;
  152. }
  153. idx = stack->len - 1;
  154. e = g_ptr_array_index (stack, idx);
  155. g_ptr_array_remove_index_fast (stack, idx);
  156. return e;
  157. }
  158. static void
  159. rspamd_expr_stack_push (struct rspamd_expression *expr,
  160. gpointer elt)
  161. {
  162. rspamd_expr_stack_elt_push (expr->expression_stack, elt);
  163. }
  164. static gpointer
  165. rspamd_expr_stack_pop (struct rspamd_expression *expr)
  166. {
  167. return rspamd_expr_stack_elt_pop (expr->expression_stack);
  168. }
  169. static gpointer
  170. rspamd_expr_stack_peek (struct rspamd_expression *expr)
  171. {
  172. gpointer e;
  173. gint idx;
  174. GPtrArray *stack = expr->expression_stack;
  175. if (stack->len == 0) {
  176. return NULL;
  177. }
  178. idx = stack->len - 1;
  179. e = g_ptr_array_index (stack, idx);
  180. return e;
  181. }
  182. /*
  183. * Return operation priority
  184. */
  185. static gint RSPAMD_CONST_FUNCTION
  186. rspamd_expr_logic_priority (enum rspamd_expression_op op);
  187. static gint
  188. rspamd_expr_logic_priority (enum rspamd_expression_op op)
  189. {
  190. gint ret = 0;
  191. switch (op) {
  192. case OP_NOT:
  193. ret = 7;
  194. break;
  195. case OP_MULT:
  196. case OP_DIVIDE:
  197. ret = 6;
  198. break;
  199. case OP_PLUS:
  200. case OP_MINUS:
  201. ret = 5;
  202. break;
  203. case OP_GE:
  204. case OP_GT:
  205. case OP_LE:
  206. case OP_LT:
  207. ret = 4;
  208. break;
  209. case OP_AND:
  210. ret = 3;
  211. break;
  212. case OP_OR:
  213. ret = 2;
  214. break;
  215. case OP_OBRACE:
  216. case OP_CBRACE:
  217. ret = 1;
  218. break;
  219. case OP_INVALID:
  220. ret = -1;
  221. break;
  222. }
  223. return ret;
  224. }
  225. static guint RSPAMD_CONST_FUNCTION
  226. rspamd_expr_op_flags (enum rspamd_expression_op op);
  227. static guint
  228. rspamd_expr_op_flags (enum rspamd_expression_op op)
  229. {
  230. guint ret = 0;
  231. switch (op) {
  232. case OP_NOT:
  233. ret |= RSPAMD_EXPRESSION_UNARY|RSPAMD_EXPRESSION_LOGICAL;
  234. break;
  235. case OP_MULT:
  236. ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_ARITHMETIC;
  237. break;
  238. case OP_DIVIDE:
  239. ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_ARITHMETIC;
  240. break;
  241. case OP_PLUS:
  242. ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_ARITHMETIC;
  243. break;
  244. case OP_MINUS:
  245. ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_ARITHMETIC;
  246. break;
  247. case OP_GE:
  248. case OP_GT:
  249. case OP_LE:
  250. case OP_LT:
  251. ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_COMPARISON;
  252. break;
  253. case OP_AND:
  254. case OP_OR:
  255. ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_LOGICAL;
  256. break;
  257. case OP_OBRACE:
  258. case OP_CBRACE:
  259. case OP_INVALID:
  260. break;
  261. }
  262. return ret;
  263. }
  264. /*
  265. * Return FALSE if symbol is not operation symbol (operand)
  266. * Return TRUE if symbol is operation symbol
  267. */
  268. static gboolean RSPAMD_CONST_FUNCTION
  269. rspamd_expr_is_operation_symbol (gchar a);
  270. static gboolean
  271. rspamd_expr_is_operation_symbol (gchar a)
  272. {
  273. switch (a) {
  274. case '!':
  275. case '&':
  276. case '|':
  277. case '(':
  278. case ')':
  279. case '>':
  280. case '<':
  281. case '+':
  282. case '*':
  283. case '-':
  284. case '/':
  285. return TRUE;
  286. }
  287. return FALSE;
  288. }
  289. static gboolean
  290. rspamd_expr_is_operation (struct rspamd_expression *e,
  291. const gchar *p, const gchar *end, rspamd_regexp_t *num_re)
  292. {
  293. if (rspamd_expr_is_operation_symbol (*p)) {
  294. if (p + 1 < end) {
  295. gchar t = *(p + 1);
  296. if (t == ':') {
  297. /* Special case, treat it as an atom */
  298. }
  299. else if (*p == '/') {
  300. /* Lookahead for division operation to distinguish from regexp */
  301. const gchar *track = p + 1;
  302. /* Skip spaces */
  303. while (track < end && g_ascii_isspace (*track)) {
  304. track++;
  305. }
  306. /* Check for a number */
  307. if (rspamd_regexp_search (num_re,
  308. track,
  309. end - track,
  310. NULL,
  311. NULL,
  312. FALSE,
  313. NULL)) {
  314. msg_debug_expression ("found divide operation");
  315. return TRUE;
  316. }
  317. msg_debug_expression ("false divide operation");
  318. /* Fallback to PARSE_ATOM state */
  319. }
  320. else if (*p == '-') {
  321. /* - is used in composites, so we need to distinguish - from
  322. * 1) unary minus of a limit!
  323. * 2) -BLAH in composites
  324. * Decision is simple: require a space after binary `-` op
  325. */
  326. if (g_ascii_isspace (t)) {
  327. return TRUE;
  328. }
  329. /* Fallback to PARSE_ATOM state */
  330. msg_debug_expression ("false minus operation");
  331. }
  332. else {
  333. /* Generic operation */
  334. return TRUE;
  335. }
  336. }
  337. else {
  338. /* Last op */
  339. return TRUE;
  340. }
  341. }
  342. return FALSE;
  343. }
  344. /* Return character representation of operation */
  345. static enum rspamd_expression_op
  346. rspamd_expr_str_to_op (const gchar *a, const gchar *end, const gchar **next)
  347. {
  348. enum rspamd_expression_op op = OP_INVALID;
  349. g_assert (a < end);
  350. switch (*a) {
  351. case '!':
  352. case '&':
  353. case '|':
  354. case '+':
  355. case '*':
  356. case '/':
  357. case '-':
  358. case '(':
  359. case ')': {
  360. if (a < end - 1) {
  361. if ((a[0] == '&' && a[1] == '&') ||
  362. (a[0] == '|' && a[1] == '|')) {
  363. *next = a + 2;
  364. }
  365. else {
  366. *next = a + 1;
  367. }
  368. }
  369. else {
  370. *next = end;
  371. }
  372. /* XXX: not especially effective */
  373. switch (*a) {
  374. case '!':
  375. op = OP_NOT;
  376. break;
  377. case '&':
  378. op = OP_AND;
  379. break;
  380. case '*':
  381. op = OP_MULT;
  382. break;
  383. case '|':
  384. op = OP_OR;
  385. break;
  386. case '+':
  387. op = OP_PLUS;
  388. break;
  389. case '/':
  390. op = OP_DIVIDE;
  391. break;
  392. case '-':
  393. op = OP_MINUS;
  394. break;
  395. case ')':
  396. op = OP_CBRACE;
  397. break;
  398. case '(':
  399. op = OP_OBRACE;
  400. break;
  401. default:
  402. op = OP_INVALID;
  403. break;
  404. }
  405. break;
  406. }
  407. case 'O':
  408. case 'o':
  409. if ((gulong)(end - a) >= sizeof ("or") &&
  410. g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) {
  411. *next = a + sizeof ("or") - 1;
  412. op = OP_OR;
  413. }
  414. break;
  415. case 'A':
  416. case 'a':
  417. if ((gulong)(end - a) >= sizeof ("and") &&
  418. g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) {
  419. *next = a + sizeof ("and") - 1;
  420. op = OP_AND;
  421. }
  422. break;
  423. case 'N':
  424. case 'n':
  425. if ((gulong)(end - a) >= sizeof ("not") &&
  426. g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) {
  427. *next = a + sizeof ("not") - 1;
  428. op = OP_NOT;
  429. }
  430. break;
  431. case '>':
  432. if (a < end - 1 && a[1] == '=') {
  433. *next = a + 2;
  434. op = OP_GE;
  435. }
  436. else {
  437. *next = a + 1;
  438. op = OP_GT;
  439. }
  440. break;
  441. case '<':
  442. if (a < end - 1 && a[1] == '=') {
  443. *next = a + 2;
  444. op = OP_LE;
  445. }
  446. else {
  447. *next = a + 1;
  448. op = OP_LT;
  449. }
  450. break;
  451. default:
  452. op = OP_INVALID;
  453. break;
  454. }
  455. return op;
  456. }
  457. static void
  458. rspamd_expression_destroy (struct rspamd_expression *expr)
  459. {
  460. guint i;
  461. struct rspamd_expression_elt *elt;
  462. if (expr != NULL) {
  463. if (expr->subr->destroy) {
  464. /* Free atoms */
  465. for (i = 0; i < expr->expressions->len; i ++) {
  466. elt = &g_array_index (expr->expressions,
  467. struct rspamd_expression_elt, i);
  468. if (elt->type == ELT_ATOM) {
  469. expr->subr->destroy (elt->p.atom);
  470. }
  471. }
  472. }
  473. if (expr->expressions) {
  474. g_array_free (expr->expressions, TRUE);
  475. }
  476. if (expr->expression_stack) {
  477. g_ptr_array_free (expr->expression_stack, TRUE);
  478. }
  479. if (expr->ast) {
  480. g_node_destroy (expr->ast);
  481. }
  482. if (expr->log_id) {
  483. g_free (expr->log_id);
  484. }
  485. g_free (expr);
  486. }
  487. }
  488. static gboolean
  489. rspamd_ast_add_node (struct rspamd_expression *e,
  490. GPtrArray *operands,
  491. struct rspamd_expression_elt *op,
  492. GError **err)
  493. {
  494. GNode *res, *a1, *a2, *test;
  495. g_assert (op->type == ELT_OP);
  496. if (op->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) {
  497. /* Unary operator */
  498. struct rspamd_expression_elt *test_elt;
  499. res = g_node_new (op);
  500. a1 = rspamd_expr_stack_elt_pop (operands);
  501. if (a1 == NULL) {
  502. g_set_error (err, rspamd_expr_quark(), EINVAL, "no operand to "
  503. "unary '%s' operation", rspamd_expr_op_to_str (op->p.op.op));
  504. g_node_destroy (res);
  505. return FALSE;
  506. }
  507. g_node_append (res, a1);
  508. test_elt = a1->data;
  509. if (test_elt->type == ELT_ATOM) {
  510. test_elt->p.atom->parent = res;
  511. msg_debug_expression ("added unary op %s to AST; operand: %*s",
  512. rspamd_expr_op_to_str (op->p.op.op),
  513. (int)test_elt->p.atom->len, test_elt->p.atom->str);
  514. }
  515. else {
  516. msg_debug_expression ("added unary op %s to AST; operand type: %d",
  517. rspamd_expr_op_to_str (op->p.op.op),
  518. test_elt->type);
  519. }
  520. }
  521. else {
  522. struct rspamd_expression_elt *e1, *e2;
  523. /* For binary/nary operators we might want to examine chains */
  524. a2 = rspamd_expr_stack_elt_pop (operands);
  525. a1 = rspamd_expr_stack_elt_pop (operands);
  526. if (a2 == NULL) {
  527. g_set_error (err, rspamd_expr_quark(), EINVAL, "no left operand to "
  528. "'%s' operation", rspamd_expr_op_to_str (op->p.op.op));
  529. return FALSE;
  530. }
  531. if (a1 == NULL) {
  532. g_set_error (err, rspamd_expr_quark(), EINVAL, "no right operand to "
  533. "'%s' operation", rspamd_expr_op_to_str (op->p.op.op));
  534. return FALSE;
  535. }
  536. /* Nary stuff */
  537. if (op->p.op.op_flags & RSPAMD_EXPRESSION_NARY) {
  538. /*
  539. * We convert a set of ops like X + Y + Z to a nary tree like
  540. * X Y Z +
  541. * for the longest possible prefix of atoms/limits
  542. */
  543. /* First try with a1 */
  544. test = a1;
  545. e1 = test->data;
  546. if (e1->type == ELT_OP && e1->p.op.op == op->p.op.op) {
  547. /* Add children */
  548. g_node_append (test, a2);
  549. rspamd_expr_stack_elt_push (operands, a1);
  550. msg_debug_expression ("added nary op %s to AST merged with the first operand",
  551. rspamd_expr_op_to_str (op->p.op.op));
  552. return TRUE;
  553. }
  554. /* Now test a2 */
  555. test = a2;
  556. e2 = test->data;
  557. if (e2->type == ELT_OP && e2->p.op.op == op->p.op.op) {
  558. /* Add children */
  559. g_node_prepend (test, a1);
  560. rspamd_expr_stack_elt_push (operands, a2);
  561. msg_debug_expression ("added nary op %s to AST merged with the second operand",
  562. rspamd_expr_op_to_str (op->p.op.op));
  563. return TRUE;
  564. }
  565. }
  566. /* No optimizations possible, so create a new level */
  567. res = g_node_new (op);
  568. g_node_append (res, a1);
  569. g_node_append (res, a2);
  570. e1 = a1->data;
  571. e2 = a2->data;
  572. if (e1->type == ELT_ATOM) {
  573. e1->p.atom->parent = res;
  574. }
  575. if (e2->type == ELT_ATOM) {
  576. e2->p.atom->parent = res;
  577. }
  578. if (e1->type == ELT_ATOM && e2->type == ELT_ATOM) {
  579. msg_debug_expression ("added binary op %s to AST; operands: (%*s; %*s)",
  580. rspamd_expr_op_to_str (op->p.op.op),
  581. (int) e1->p.atom->len, e1->p.atom->str,
  582. (int) e2->p.atom->len, e2->p.atom->str);
  583. }
  584. else {
  585. msg_debug_expression ("added binary op %s to AST; operands (types): (%d; %d)",
  586. rspamd_expr_op_to_str (op->p.op.op),
  587. e1->type,
  588. e2->type);
  589. }
  590. }
  591. /* Push back resulting node to the stack */
  592. rspamd_expr_stack_elt_push (operands, res);
  593. return TRUE;
  594. }
  595. static gboolean
  596. rspamd_ast_priority_traverse (GNode *node, gpointer d)
  597. {
  598. struct rspamd_expression_elt *elt = node->data, *cur_elt;
  599. struct rspamd_expression *expr = d;
  600. gint cnt = 0;
  601. GNode *cur;
  602. if (node->children) {
  603. cur = node->children;
  604. while (cur) {
  605. cur_elt = cur->data;
  606. cnt += cur_elt->priority;
  607. cur = cur->next;
  608. }
  609. elt->priority = cnt;
  610. }
  611. else {
  612. /* It is atom or limit */
  613. g_assert (elt->type != ELT_OP);
  614. if (elt->type == ELT_LIMIT) {
  615. /* Always push limit first */
  616. elt->priority = 0;
  617. }
  618. else {
  619. elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY;
  620. if (expr->subr->priority != NULL) {
  621. elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY -
  622. expr->subr->priority (elt->p.atom);
  623. }
  624. elt->p.atom->hits = 0;
  625. elt->p.atom->avg_ticks = 0.0;
  626. }
  627. }
  628. return FALSE;
  629. }
  630. #define ATOM_PRIORITY(a) ((a)->p.atom->hits / ((a)->p.atom->avg_ticks > 0 ? \
  631. (a)->p.atom->avg_ticks * 10000000 : 1.0))
  632. static gint
  633. rspamd_ast_priority_cmp (GNode *a, GNode *b)
  634. {
  635. struct rspamd_expression_elt *ea = a->data, *eb = b->data;
  636. gdouble w1, w2;
  637. if (ea->type == ELT_LIMIT) {
  638. return 1;
  639. }
  640. else if (eb->type == ELT_LIMIT) {
  641. return -1;
  642. }
  643. /* Special logic for atoms */
  644. if (ea->type == ELT_ATOM && eb->type == ELT_ATOM &&
  645. ea->priority == eb->priority) {
  646. w1 = ATOM_PRIORITY (ea);
  647. w2 = ATOM_PRIORITY (eb);
  648. ea->p.atom->hits = 0;
  649. ea->p.atom->avg_ticks = 0.0;
  650. return w1 - w2;
  651. }
  652. else {
  653. return ea->priority - eb->priority;
  654. }
  655. }
  656. static gboolean
  657. rspamd_ast_resort_traverse (GNode *node, gpointer unused)
  658. {
  659. GNode *children, *last;
  660. struct rspamd_expression_elt *elt;
  661. elt = (struct rspamd_expression_elt *)node->data;
  662. /*
  663. * We sort merely logical operations, everything else is dangerous
  664. */
  665. if (elt->type == ELT_OP && elt->p.op.op_flags & RSPAMD_EXPRESSION_LOGICAL) {
  666. if (node->children) {
  667. children = node->children;
  668. last = g_node_last_sibling (children);
  669. /* Needed for utlist compatibility */
  670. children->prev = last;
  671. DL_SORT (node->children, rspamd_ast_priority_cmp);
  672. /* Restore GLIB compatibility */
  673. children = node->children;
  674. children->prev = NULL;
  675. }
  676. }
  677. return FALSE;
  678. }
  679. static struct rspamd_expression_elt *
  680. rspamd_expr_dup_elt (rspamd_mempool_t *pool, struct rspamd_expression_elt *elt)
  681. {
  682. struct rspamd_expression_elt *n;
  683. n = rspamd_mempool_alloc (pool, sizeof (*n));
  684. memcpy (n, elt, sizeof (*n));
  685. return n;
  686. }
  687. gboolean
  688. rspamd_parse_expression (const gchar *line, gsize len,
  689. const struct rspamd_atom_subr *subr, gpointer subr_data,
  690. rspamd_mempool_t *pool, GError **err,
  691. struct rspamd_expression **target)
  692. {
  693. struct rspamd_expression *e;
  694. struct rspamd_expression_elt elt;
  695. rspamd_expression_atom_t *atom;
  696. rspamd_regexp_t *num_re;
  697. enum rspamd_expression_op op, op_stack;
  698. const gchar *p, *c, *end;
  699. GPtrArray *operand_stack;
  700. GNode *tmp;
  701. enum {
  702. PARSE_ATOM = 0,
  703. PARSE_OP,
  704. PARSE_LIM,
  705. SKIP_SPACES
  706. } state = PARSE_ATOM;
  707. g_assert (line != NULL);
  708. g_assert (subr != NULL && subr->parse != NULL);
  709. if (len == 0) {
  710. len = strlen (line);
  711. }
  712. memset (&elt, 0, sizeof (elt));
  713. num_re = rspamd_regexp_cache_create (NULL,
  714. "/^(?:[+-]?([0-9]*[.])?[0-9]+)(?:\\s+|[)]|$)/", NULL, NULL);
  715. p = line;
  716. c = line;
  717. end = line + len;
  718. e = g_malloc0 (sizeof (*e));
  719. e->expressions = g_array_new (FALSE, FALSE,
  720. sizeof (struct rspamd_expression_elt));
  721. operand_stack = g_ptr_array_sized_new (32);
  722. e->ast = NULL;
  723. e->expression_stack = g_ptr_array_sized_new (32);
  724. e->subr = subr;
  725. e->evals = 0;
  726. e->next_resort = ottery_rand_range (MAX_RESORT_EVALS) + MIN_RESORT_EVALS;
  727. e->log_id = g_malloc0 (RSPAMD_LOG_ID_LEN + 1);
  728. guint64 h = rspamd_cryptobox_fast_hash (line, len, 0xdeadbabe);
  729. rspamd_snprintf (e->log_id, RSPAMD_LOG_ID_LEN + 1, "%xL", h);
  730. msg_debug_expression ("start to parse expression '%*s'", (int)len, line);
  731. /* Shunting-yard algorithm */
  732. while (p < end) {
  733. switch (state) {
  734. case PARSE_ATOM:
  735. if (g_ascii_isspace (*p)) {
  736. state = SKIP_SPACES;
  737. continue;
  738. }
  739. else if (rspamd_expr_is_operation (e, p, end, num_re)) {
  740. /* Lookahead */
  741. state = PARSE_OP;
  742. continue;
  743. }
  744. /*
  745. * First of all, we check some pre-conditions:
  746. * 1) if we have 'and ' or 'or ' or 'not ' strings, they are op
  747. * 2) if we have full numeric string, then we check for
  748. * the following expression:
  749. * ^\d+\s*[><]$
  750. * and check the operation on stack
  751. */
  752. if ((gulong)(end - p) > sizeof ("and ") &&
  753. (g_ascii_strncasecmp (p, "and ", sizeof ("and ") - 1) == 0 ||
  754. g_ascii_strncasecmp (p, "not ", sizeof ("not ") - 1) == 0 )) {
  755. state = PARSE_OP;
  756. }
  757. else if ((gulong)(end - p) > sizeof ("or ") &&
  758. g_ascii_strncasecmp (p, "or ", sizeof ("or ") - 1) == 0) {
  759. state = PARSE_OP;
  760. }
  761. else {
  762. /*
  763. * If we have any comparison or arithmetic operator in the stack, then try
  764. * to parse limit
  765. */
  766. op = GPOINTER_TO_INT (rspamd_expr_stack_peek (e));
  767. if (op == OP_MULT || op == OP_MINUS || op == OP_DIVIDE ||
  768. op == OP_PLUS || (op >= OP_LT && op <= OP_GE)) {
  769. if (rspamd_regexp_search (num_re,
  770. p,
  771. end - p,
  772. NULL,
  773. NULL,
  774. FALSE,
  775. NULL)) {
  776. c = p;
  777. state = PARSE_LIM;
  778. continue;
  779. }
  780. /* Fallback to atom parsing */
  781. }
  782. /* Try to parse atom */
  783. atom = subr->parse (p, end - p, pool, subr_data, err);
  784. if (atom == NULL || atom->len == 0) {
  785. /* We couldn't parse the atom, so go out */
  786. if (err != NULL && *err == NULL) {
  787. g_set_error (err,
  788. rspamd_expr_quark (),
  789. 500,
  790. "Cannot parse atom: callback function failed"
  791. " to parse '%.*s'",
  792. (int) (end - p),
  793. p);
  794. }
  795. goto error_label;
  796. }
  797. if (atom->str == NULL) {
  798. atom->str = p;
  799. }
  800. p = p + atom->len;
  801. /* Push to output */
  802. elt.type = ELT_ATOM;
  803. elt.p.atom = atom;
  804. g_array_append_val (e->expressions, elt);
  805. rspamd_expr_stack_elt_push (operand_stack,
  806. g_node_new (rspamd_expr_dup_elt (pool, &elt)));
  807. msg_debug_expression ("found atom: %*s; pushed onto operand stack (%d size)",
  808. (int)atom->len, atom->str, operand_stack->len);
  809. }
  810. break;
  811. case PARSE_LIM:
  812. if ((g_ascii_isdigit (*p) || *p == '-' || *p == '.')
  813. && p < end - 1) {
  814. p ++;
  815. }
  816. else {
  817. if (p == end - 1 && g_ascii_isdigit (*p)) {
  818. p ++;
  819. }
  820. if (p - c > 0) {
  821. elt.type = ELT_LIMIT;
  822. elt.p.lim = strtod (c, NULL);
  823. g_array_append_val (e->expressions, elt);
  824. rspamd_expr_stack_elt_push (operand_stack,
  825. g_node_new (rspamd_expr_dup_elt (pool, &elt)));
  826. msg_debug_expression ("found limit: %.1f; pushed onto operand stack (%d size)",
  827. elt.p.lim, operand_stack->len);
  828. c = p;
  829. state = SKIP_SPACES;
  830. }
  831. else {
  832. g_set_error (err, rspamd_expr_quark(), 400, "Empty number");
  833. goto error_label;
  834. }
  835. }
  836. break;
  837. case PARSE_OP:
  838. op = rspamd_expr_str_to_op (p, end, &p);
  839. if (op == OP_INVALID) {
  840. g_set_error (err, rspamd_expr_quark(), 500, "Bad operator %c",
  841. *p);
  842. goto error_label;
  843. }
  844. else if (op == OP_OBRACE) {
  845. /*
  846. * If the token is a left parenthesis, then push it onto
  847. * the stack.
  848. */
  849. rspamd_expr_stack_push (e, GINT_TO_POINTER (op));
  850. msg_debug_expression ("found obrace, pushed to operators stack (%d size)",
  851. e->expression_stack->len);
  852. }
  853. else if (op == OP_CBRACE) {
  854. /*
  855. * Until the token at the top of the stack is a left
  856. * parenthesis, pop operators off the stack onto the
  857. * output queue.
  858. *
  859. * Pop the left parenthesis from the stack,
  860. * but not onto the output queue.
  861. *
  862. * If the stack runs out without finding a left parenthesis,
  863. * then there are mismatched parentheses.
  864. */
  865. msg_debug_expression ("found cbrace, rewind operators stack (%d size)",
  866. e->expression_stack->len);
  867. do {
  868. op = GPOINTER_TO_INT (rspamd_expr_stack_pop (e));
  869. if (op == OP_INVALID) {
  870. g_set_error (err, rspamd_expr_quark(), 600,
  871. "Braces mismatch");
  872. goto error_label;
  873. }
  874. guint op_priority = rspamd_expr_logic_priority (op);
  875. msg_debug_expression ("found op: %s; priority = %d",
  876. rspamd_expr_op_to_str (op), op_priority);
  877. if (op != OP_OBRACE) {
  878. elt.type = ELT_OP;
  879. elt.p.op.op = op;
  880. elt.p.op.op_flags = rspamd_expr_op_flags (op);
  881. elt.p.op.logical_priority = op_priority;
  882. g_array_append_val (e->expressions, elt);
  883. if (!rspamd_ast_add_node (e, operand_stack,
  884. rspamd_expr_dup_elt (pool, &elt), err)) {
  885. goto error_label;
  886. }
  887. }
  888. } while (op != OP_OBRACE);
  889. }
  890. else {
  891. /*
  892. * While there is an operator token, o2, at the top of
  893. * the operator stack, and either:
  894. *
  895. * - o1 is left-associative and its precedence is less than
  896. * or equal to that of o2, or
  897. * - o1 is right associative, and has precedence less than
  898. * that of o2,
  899. *
  900. * then pop o2 off the operator stack, onto the output queue;
  901. *
  902. * push o1 onto the operator stack.
  903. */
  904. for (;;) {
  905. op_stack = GPOINTER_TO_INT (rspamd_expr_stack_pop (e));
  906. if (op_stack == OP_INVALID) {
  907. /* Stack is empty */
  908. msg_debug_expression ("no operations in operators stack");
  909. break;
  910. }
  911. /* We ignore associativity for now */
  912. guint op_priority = rspamd_expr_logic_priority (op),
  913. stack_op_priority = rspamd_expr_logic_priority (op_stack);
  914. msg_debug_expression ("operators stack %d; operands stack: %d; "
  915. "process operation '%s'(%d); pop operation '%s'(%d)",
  916. e->expression_stack->len,
  917. operand_stack->len,
  918. rspamd_expr_op_to_str (op), op_priority,
  919. rspamd_expr_op_to_str (op_stack), stack_op_priority);
  920. if (op_stack != OP_OBRACE &&
  921. op_priority < stack_op_priority) {
  922. elt.type = ELT_OP;
  923. elt.p.op.op = op_stack;
  924. elt.p.op.op_flags = rspamd_expr_op_flags (op_stack);
  925. elt.p.op.logical_priority = op_priority;
  926. g_array_append_val (e->expressions, elt);
  927. if (!rspamd_ast_add_node (e, operand_stack,
  928. rspamd_expr_dup_elt (pool, &elt), err)) {
  929. goto error_label;
  930. }
  931. }
  932. else {
  933. /* Push op_stack back */
  934. msg_debug_expression ("operators stack %d; operands stack: %d; "
  935. "process operation '%s'(%d); push back to stack '%s'(%d)",
  936. e->expression_stack->len,
  937. operand_stack->len,
  938. rspamd_expr_op_to_str (op), op_priority,
  939. rspamd_expr_op_to_str (op_stack), stack_op_priority);
  940. rspamd_expr_stack_push (e, GINT_TO_POINTER (op_stack));
  941. break;
  942. }
  943. }
  944. /* Push new operator itself */
  945. msg_debug_expression ("operators stack %d; operands stack: %d; "
  946. "process operation '%s'; push to stack",
  947. e->expression_stack->len,
  948. operand_stack->len,
  949. rspamd_expr_op_to_str (op));
  950. rspamd_expr_stack_push (e, GINT_TO_POINTER (op));
  951. }
  952. state = SKIP_SPACES;
  953. break;
  954. case SKIP_SPACES:
  955. if (g_ascii_isspace (*p)) {
  956. p ++;
  957. }
  958. if (rspamd_expr_is_operation (e, p, end, num_re)) {
  959. /* Lookahead */
  960. state = PARSE_OP;
  961. }
  962. else {
  963. state = PARSE_ATOM;
  964. }
  965. break;
  966. }
  967. }
  968. /* Now we process the stack and push operators to the output */
  969. while ((op_stack = GPOINTER_TO_INT (rspamd_expr_stack_pop (e)))
  970. != OP_INVALID) {
  971. msg_debug_expression ("operators stack %d; operands stack: %d; "
  972. "rewind stack; op: %s",
  973. e->expression_stack->len,
  974. operand_stack->len,
  975. rspamd_expr_op_to_str (op));
  976. if (op_stack != OP_OBRACE) {
  977. elt.type = ELT_OP;
  978. elt.p.op.op = op_stack;
  979. elt.p.op.op_flags = rspamd_expr_op_flags (op_stack);
  980. elt.p.op.logical_priority = rspamd_expr_logic_priority (op_stack);
  981. g_array_append_val (e->expressions, elt);
  982. if (!rspamd_ast_add_node (e, operand_stack,
  983. rspamd_expr_dup_elt (pool, &elt), err)) {
  984. goto error_label;
  985. }
  986. }
  987. else {
  988. g_set_error (err, rspamd_expr_quark(), 600,
  989. "Braces mismatch");
  990. goto error_label;
  991. }
  992. }
  993. if (operand_stack->len != 1) {
  994. g_set_error (err, rspamd_expr_quark(), 601,
  995. "Operators mismatch: %d elts in stack", operand_stack->len);
  996. goto error_label;
  997. }
  998. e->ast = rspamd_expr_stack_elt_pop (operand_stack);
  999. g_ptr_array_free (operand_stack, TRUE);
  1000. /* Set priorities for branches */
  1001. g_node_traverse (e->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1002. rspamd_ast_priority_traverse, e);
  1003. /* Now set less expensive branches to be evaluated first */
  1004. g_node_traverse (e->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1,
  1005. rspamd_ast_resort_traverse, NULL);
  1006. if (target) {
  1007. *target = e;
  1008. rspamd_mempool_add_destructor (pool,
  1009. (rspamd_mempool_destruct_t)rspamd_expression_destroy, e);
  1010. }
  1011. else {
  1012. rspamd_expression_destroy (e);
  1013. }
  1014. return TRUE;
  1015. error_label:
  1016. msg_debug_expression ("fatal error: %e", *err);
  1017. while ((tmp = rspamd_expr_stack_elt_pop (operand_stack)) != NULL) {
  1018. g_node_destroy (tmp);
  1019. }
  1020. g_ptr_array_free (operand_stack, TRUE);
  1021. rspamd_expression_destroy (e);
  1022. return FALSE;
  1023. }
  1024. /*
  1025. * Node optimizer function: skip nodes that are not relevant
  1026. */
  1027. static gboolean
  1028. rspamd_ast_node_done (struct rspamd_expression_elt *elt, gdouble acc)
  1029. {
  1030. gboolean ret = FALSE;
  1031. g_assert (elt->type == ELT_OP);
  1032. switch (elt->p.op.op) {
  1033. case OP_NOT:
  1034. ret = TRUE;
  1035. break;
  1036. case OP_AND:
  1037. ret = acc == 0;
  1038. break;
  1039. case OP_OR:
  1040. ret = acc != 0;
  1041. break;
  1042. default:
  1043. break;
  1044. }
  1045. return ret;
  1046. }
  1047. static gdouble
  1048. rspamd_ast_do_unary_op (struct rspamd_expression_elt *elt, gdouble operand)
  1049. {
  1050. gdouble ret;
  1051. g_assert (elt->type == ELT_OP);
  1052. switch (elt->p.op.op) {
  1053. case OP_NOT:
  1054. ret = fabs (operand) > DOUBLE_EPSILON ? 0.0 : 1.0;
  1055. break;
  1056. default:
  1057. g_assert_not_reached ();
  1058. }
  1059. return ret;
  1060. }
  1061. static gdouble
  1062. rspamd_ast_do_binary_op (struct rspamd_expression_elt *elt, gdouble op1, gdouble op2)
  1063. {
  1064. gdouble ret;
  1065. g_assert (elt->type == ELT_OP);
  1066. switch (elt->p.op.op) {
  1067. case OP_MINUS:
  1068. ret = op1 - op2;
  1069. break;
  1070. case OP_DIVIDE:
  1071. ret = op1 / op2;
  1072. break;
  1073. case OP_GE:
  1074. ret = op1 >= op2;
  1075. break;
  1076. case OP_GT:
  1077. ret = op1 > op2;
  1078. break;
  1079. case OP_LE:
  1080. ret = op1 <= op2;
  1081. break;
  1082. case OP_LT:
  1083. ret = op1 < op2;
  1084. break;
  1085. case OP_NOT:
  1086. case OP_PLUS:
  1087. case OP_MULT:
  1088. case OP_AND:
  1089. case OP_OR:
  1090. default:
  1091. g_assert_not_reached();
  1092. break;
  1093. }
  1094. return ret;
  1095. }
  1096. static gdouble
  1097. rspamd_ast_do_nary_op (struct rspamd_expression_elt *elt, gdouble val, gdouble acc)
  1098. {
  1099. gdouble ret;
  1100. g_assert (elt->type == ELT_OP);
  1101. if (isnan (acc)) {
  1102. return val;
  1103. }
  1104. switch (elt->p.op.op) {
  1105. case OP_PLUS:
  1106. ret = acc + val;
  1107. break;
  1108. case OP_MULT:
  1109. ret = acc * val;
  1110. break;
  1111. case OP_AND:
  1112. ret = (acc * val);
  1113. break;
  1114. case OP_OR:
  1115. ret = (acc + val);
  1116. break;
  1117. default:
  1118. case OP_NOT:
  1119. case OP_MINUS:
  1120. case OP_DIVIDE:
  1121. case OP_GE:
  1122. case OP_GT:
  1123. case OP_LE:
  1124. case OP_LT:
  1125. g_assert_not_reached();
  1126. break;
  1127. }
  1128. return ret;
  1129. }
  1130. static gdouble
  1131. rspamd_ast_process_node (struct rspamd_expression *e, GNode *node,
  1132. struct rspamd_expr_process_data *process_data)
  1133. {
  1134. struct rspamd_expression_elt *elt;
  1135. GNode *cld;
  1136. gdouble acc = NAN;
  1137. gdouble t1, t2, val;
  1138. gboolean calc_ticks = FALSE;
  1139. const gchar *op_name = NULL;
  1140. elt = node->data;
  1141. switch (elt->type) {
  1142. case ELT_ATOM:
  1143. if (!(elt->flags & RSPAMD_EXPR_FLAG_PROCESSED)) {
  1144. /*
  1145. * Sometimes get ticks for this expression. 'Sometimes' here means
  1146. * that we get lowest 5 bits of the counter `evals` and 5 bits
  1147. * of some shifted address to provide some sort of jittering for
  1148. * ticks evaluation
  1149. */
  1150. if ((e->evals & 0x1F) == (GPOINTER_TO_UINT (node) >> 4 & 0x1F)) {
  1151. calc_ticks = TRUE;
  1152. t1 = rspamd_get_ticks (TRUE);
  1153. }
  1154. elt->value = process_data->process_closure (process_data->ud, elt->p.atom);
  1155. if (fabs (elt->value) > 1e-9) {
  1156. elt->p.atom->hits ++;
  1157. if (process_data->trace) {
  1158. g_ptr_array_add (process_data->trace, elt->p.atom);
  1159. }
  1160. }
  1161. if (calc_ticks) {
  1162. t2 = rspamd_get_ticks (TRUE);
  1163. elt->p.atom->avg_ticks += ((t2 - t1) - elt->p.atom->avg_ticks) /
  1164. (e->evals);
  1165. }
  1166. elt->flags |= RSPAMD_EXPR_FLAG_PROCESSED;
  1167. }
  1168. acc = elt->value;
  1169. msg_debug_expression ("atom: elt=%s; acc=%.1f", elt->p.atom->str, acc);
  1170. break;
  1171. case ELT_LIMIT:
  1172. acc = elt->p.lim;
  1173. msg_debug_expression ("limit: lim=%.1f; acc=%.1f;", elt->p.lim, acc);
  1174. break;
  1175. case ELT_OP:
  1176. g_assert (node->children != NULL);
  1177. op_name = rspamd_expr_op_to_str (elt->p.op.op);
  1178. if (elt->p.op.op_flags & RSPAMD_EXPRESSION_NARY) {
  1179. msg_debug_expression ("proceed nary operation %s", op_name);
  1180. /* Proceed all ops in chain */
  1181. DL_FOREACH (node->children, cld) {
  1182. val = rspamd_ast_process_node (e, cld, process_data);
  1183. msg_debug_expression ("before op: op=%s; acc=%.1f; val = %.2f", op_name,
  1184. acc, val);
  1185. acc = rspamd_ast_do_nary_op (elt, val, acc);
  1186. msg_debug_expression ("after op: op=%s; acc=%.1f; val = %.2f", op_name,
  1187. acc, val);
  1188. /* Check if we need to process further */
  1189. if (!(process_data->flags & RSPAMD_EXPRESSION_FLAG_NOOPT)) {
  1190. if (rspamd_ast_node_done (elt, acc)) {
  1191. msg_debug_expression ("optimizer: done");
  1192. return acc;
  1193. }
  1194. }
  1195. }
  1196. }
  1197. else if (elt->p.op.op_flags & RSPAMD_EXPRESSION_BINARY) {
  1198. GNode *c1 = node->children, *c2;
  1199. c2 = c1->next;
  1200. g_assert (c2->next == NULL);
  1201. gdouble val1, val2;
  1202. msg_debug_expression ("proceed binary operation %s",
  1203. op_name);
  1204. val1 = rspamd_ast_process_node (e, c1, process_data);
  1205. val2 = rspamd_ast_process_node (e, c2, process_data);
  1206. msg_debug_expression ("before op: op=%s; op1 = %.1f, op2 = %.1f",
  1207. op_name, val1, val2);
  1208. acc = rspamd_ast_do_binary_op (elt, val1, val2);
  1209. msg_debug_expression ("after op: op=%s; res=%.1f",
  1210. op_name, acc);
  1211. }
  1212. else if (elt->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) {
  1213. GNode *c1 = node->children;
  1214. g_assert (c1->next == NULL);
  1215. msg_debug_expression ("proceed unary operation %s",
  1216. op_name);
  1217. val = rspamd_ast_process_node (e, c1, process_data);
  1218. msg_debug_expression ("before op: op=%s; op1 = %.1f",
  1219. op_name, val);
  1220. acc = rspamd_ast_do_unary_op (elt, val);
  1221. msg_debug_expression ("after op: op=%s; res=%.1f",
  1222. op_name, acc);
  1223. }
  1224. break;
  1225. }
  1226. return acc;
  1227. }
  1228. static gboolean
  1229. rspamd_ast_cleanup_traverse (GNode *n, gpointer d)
  1230. {
  1231. struct rspamd_expression_elt *elt = n->data;
  1232. elt->value = 0;
  1233. elt->flags = 0;
  1234. return FALSE;
  1235. }
  1236. gdouble
  1237. rspamd_process_expression_closure (struct rspamd_expression *expr,
  1238. rspamd_expression_process_cb cb,
  1239. gint flags,
  1240. gpointer runtime_ud,
  1241. GPtrArray **track)
  1242. {
  1243. struct rspamd_expr_process_data pd;
  1244. gdouble ret = 0;
  1245. g_assert (expr != NULL);
  1246. /* Ensure that stack is empty at this point */
  1247. g_assert (expr->expression_stack->len == 0);
  1248. expr->evals ++;
  1249. memset (&pd, 0, sizeof (pd));
  1250. pd.process_closure = cb;
  1251. pd.flags = flags;
  1252. pd.ud = runtime_ud;
  1253. if (track) {
  1254. pd.trace = g_ptr_array_sized_new (32);
  1255. *track = pd.trace;
  1256. }
  1257. ret = rspamd_ast_process_node (expr, expr->ast, &pd);
  1258. /* Cleanup */
  1259. g_node_traverse (expr->ast, G_IN_ORDER, G_TRAVERSE_ALL, -1,
  1260. rspamd_ast_cleanup_traverse, NULL);
  1261. /* Check if we need to resort */
  1262. if (expr->evals % expr->next_resort == 0) {
  1263. expr->next_resort = ottery_rand_range (MAX_RESORT_EVALS) +
  1264. MIN_RESORT_EVALS;
  1265. /* Set priorities for branches */
  1266. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1267. rspamd_ast_priority_traverse, expr);
  1268. /* Now set less expensive branches to be evaluated first */
  1269. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1,
  1270. rspamd_ast_resort_traverse, NULL);
  1271. }
  1272. return ret;
  1273. }
  1274. gdouble
  1275. rspamd_process_expression_track (struct rspamd_expression *expr,
  1276. gint flags,
  1277. gpointer runtime_ud,
  1278. GPtrArray **track)
  1279. {
  1280. return rspamd_process_expression_closure (expr,
  1281. expr->subr->process, flags, runtime_ud, track);
  1282. }
  1283. gdouble
  1284. rspamd_process_expression (struct rspamd_expression *expr,
  1285. gint flags,
  1286. gpointer runtime_ud)
  1287. {
  1288. return rspamd_process_expression_closure (expr,
  1289. expr->subr->process, flags, runtime_ud, NULL);
  1290. }
  1291. static gboolean
  1292. rspamd_ast_string_traverse (GNode *n, gpointer d)
  1293. {
  1294. GString *res = d;
  1295. gint cnt;
  1296. GNode *cur;
  1297. struct rspamd_expression_elt *elt = n->data;
  1298. const char *op_str = NULL;
  1299. if (elt->type == ELT_ATOM) {
  1300. rspamd_printf_gstring (res, "(%*s)",
  1301. (int)elt->p.atom->len, elt->p.atom->str);
  1302. }
  1303. else if (elt->type == ELT_LIMIT) {
  1304. if (elt->p.lim == (double)(gint64)elt->p.lim) {
  1305. rspamd_printf_gstring (res, "%L", (gint64)elt->p.lim);
  1306. }
  1307. else {
  1308. rspamd_printf_gstring (res, "%f", elt->p.lim);
  1309. }
  1310. }
  1311. else {
  1312. op_str = rspamd_expr_op_to_str (elt->p.op.op);
  1313. g_string_append (res, op_str);
  1314. if (n->children) {
  1315. LL_COUNT(n->children, cur, cnt);
  1316. if (cnt > 2) {
  1317. /* Print n-ary of the operator */
  1318. g_string_append_printf (res, "(%d)", cnt);
  1319. }
  1320. }
  1321. }
  1322. g_string_append_c (res, ' ');
  1323. return FALSE;
  1324. }
  1325. GString *
  1326. rspamd_expression_tostring (struct rspamd_expression *expr)
  1327. {
  1328. GString *res;
  1329. g_assert (expr != NULL);
  1330. res = g_string_new (NULL);
  1331. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1332. rspamd_ast_string_traverse, res);
  1333. /* Last space */
  1334. if (res->len > 0) {
  1335. g_string_erase (res, res->len - 1, 1);
  1336. }
  1337. return res;
  1338. }
  1339. struct atom_foreach_cbdata {
  1340. rspamd_expression_atom_foreach_cb cb;
  1341. gpointer cbdata;
  1342. };
  1343. static gboolean
  1344. rspamd_ast_atom_traverse (GNode *n, gpointer d)
  1345. {
  1346. struct atom_foreach_cbdata *data = d;
  1347. struct rspamd_expression_elt *elt = n->data;
  1348. rspamd_ftok_t tok;
  1349. if (elt->type == ELT_ATOM) {
  1350. tok.begin = elt->p.atom->str;
  1351. tok.len = elt->p.atom->len;
  1352. data->cb (&tok, data->cbdata);
  1353. }
  1354. return FALSE;
  1355. }
  1356. void
  1357. rspamd_expression_atom_foreach (struct rspamd_expression *expr,
  1358. rspamd_expression_atom_foreach_cb cb, gpointer cbdata)
  1359. {
  1360. struct atom_foreach_cbdata data;
  1361. g_assert (expr != NULL);
  1362. data.cb = cb;
  1363. data.cbdata = cbdata;
  1364. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1365. rspamd_ast_atom_traverse, &data);
  1366. }
  1367. gboolean
  1368. rspamd_expression_node_is_op (GNode *node, enum rspamd_expression_op op)
  1369. {
  1370. struct rspamd_expression_elt *elt;
  1371. g_assert (node != NULL);
  1372. elt = node->data;
  1373. if (elt->type == ELT_OP && elt->p.op.op == op) {
  1374. return TRUE;
  1375. }
  1376. return FALSE;
  1377. }