You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

expression.c 25KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "expression.h"
  18. #include "printf.h"
  19. #include "regexp.h"
  20. #include "util.h"
  21. #include "utlist.h"
  22. #include "ottery.h"
  23. #include <math.h>
  24. #define RSPAMD_EXPR_FLAG_NEGATE (1 << 0)
  25. #define RSPAMD_EXPR_FLAG_PROCESSED (1 << 1)
  26. #define MIN_RESORT_EVALS 50
  27. #define MAX_RESORT_EVALS 150
  28. #define DOUBLE_EPSILON 1e-9
  29. enum rspamd_expression_elt_type {
  30. ELT_OP = 0,
  31. ELT_ATOM,
  32. ELT_LIMIT
  33. };
  34. struct rspamd_expression_elt {
  35. enum rspamd_expression_elt_type type;
  36. union {
  37. rspamd_expression_atom_t *atom;
  38. enum rspamd_expression_op op;
  39. gdouble lim;
  40. } p;
  41. gint flags;
  42. gint priority;
  43. gdouble value;
  44. };
  45. struct rspamd_expression {
  46. const struct rspamd_atom_subr *subr;
  47. GArray *expressions;
  48. GPtrArray *expression_stack;
  49. GNode *ast;
  50. guint next_resort;
  51. guint evals;
  52. };
  53. static GQuark
  54. rspamd_expr_quark (void)
  55. {
  56. return g_quark_from_static_string ("rspamd-expression");
  57. }
  58. static const gchar *
  59. rspamd_expr_op_to_str (enum rspamd_expression_op op)
  60. {
  61. const gchar *op_str = NULL;
  62. switch (op) {
  63. case OP_AND:
  64. op_str = "&";
  65. break;
  66. case OP_OR:
  67. op_str = "|";
  68. break;
  69. case OP_MULT:
  70. op_str = "*";
  71. break;
  72. case OP_PLUS:
  73. op_str = "+";
  74. break;
  75. case OP_NOT:
  76. op_str = "!";
  77. break;
  78. case OP_GE:
  79. op_str = ">=";
  80. break;
  81. case OP_GT:
  82. op_str = ">";
  83. break;
  84. case OP_LE:
  85. op_str = "<=";
  86. break;
  87. case OP_LT:
  88. op_str = "<";
  89. break;
  90. default:
  91. op_str = "???";
  92. break;
  93. }
  94. return op_str;
  95. }
  96. #define G_ARRAY_LAST(ar, type) (&g_array_index((ar), type, (ar)->len - 1))
  97. static void
  98. rspamd_expr_stack_elt_push (GPtrArray *stack,
  99. gpointer elt)
  100. {
  101. g_ptr_array_add (stack, elt);
  102. }
  103. static gpointer
  104. rspamd_expr_stack_elt_pop (GPtrArray *stack)
  105. {
  106. gpointer e;
  107. gint idx;
  108. if (stack->len == 0) {
  109. return NULL;
  110. }
  111. idx = stack->len - 1;
  112. e = g_ptr_array_index (stack, idx);
  113. g_ptr_array_remove_index_fast (stack, idx);
  114. return e;
  115. }
  116. static void
  117. rspamd_expr_stack_push (struct rspamd_expression *expr,
  118. gpointer elt)
  119. {
  120. rspamd_expr_stack_elt_push (expr->expression_stack, elt);
  121. }
  122. static gpointer
  123. rspamd_expr_stack_pop (struct rspamd_expression *expr)
  124. {
  125. return rspamd_expr_stack_elt_pop (expr->expression_stack);
  126. }
  127. static gpointer
  128. rspamd_expr_stack_peek (struct rspamd_expression *expr)
  129. {
  130. gpointer e;
  131. gint idx;
  132. GPtrArray *stack = expr->expression_stack;
  133. if (stack->len == 0) {
  134. return NULL;
  135. }
  136. idx = stack->len - 1;
  137. e = g_ptr_array_index (stack, idx);
  138. return e;
  139. }
  140. /*
  141. * Return operation priority
  142. */
  143. static gint
  144. rspamd_expr_logic_priority (enum rspamd_expression_op op)
  145. {
  146. gint ret = 0;
  147. switch (op) {
  148. case OP_NOT:
  149. ret = 6;
  150. break;
  151. case OP_PLUS:
  152. ret = 5;
  153. break;
  154. case OP_GE:
  155. case OP_GT:
  156. case OP_LE:
  157. case OP_LT:
  158. ret = 4;
  159. break;
  160. case OP_MULT:
  161. case OP_AND:
  162. ret = 3;
  163. break;
  164. case OP_OR:
  165. ret = 2;
  166. break;
  167. case OP_OBRACE:
  168. case OP_CBRACE:
  169. ret = 1;
  170. break;
  171. case OP_INVALID:
  172. ret = -1;
  173. break;
  174. }
  175. return ret;
  176. }
  177. /*
  178. * Return FALSE if symbol is not operation symbol (operand)
  179. * Return TRUE if symbol is operation symbol
  180. */
  181. static gboolean
  182. rspamd_expr_is_operation_symbol (gchar a)
  183. {
  184. switch (a) {
  185. case '!':
  186. case '&':
  187. case '|':
  188. case '(':
  189. case ')':
  190. case '>':
  191. case '<':
  192. case '+':
  193. case '*':
  194. return TRUE;
  195. }
  196. return FALSE;
  197. }
  198. /* Return character representation of operation */
  199. static enum rspamd_expression_op
  200. rspamd_expr_str_to_op (const gchar *a, const gchar *end, const gchar **next)
  201. {
  202. enum rspamd_expression_op op = OP_INVALID;
  203. g_assert (a < end);
  204. switch (*a) {
  205. case '!':
  206. case '&':
  207. case '|':
  208. case '+':
  209. case '*':
  210. case '(':
  211. case ')': {
  212. if (a < end - 1) {
  213. if ((a[0] == '&' && a[1] == '&') ||
  214. (a[0] == '|' && a[1] == '|')) {
  215. *next = a + 2;
  216. }
  217. else {
  218. *next = a + 1;
  219. }
  220. }
  221. else {
  222. *next = end;
  223. }
  224. /* XXX: not especially effective */
  225. switch (*a) {
  226. case '!':
  227. op = OP_NOT;
  228. break;
  229. case '&':
  230. op = OP_AND;
  231. break;
  232. case '*':
  233. op = OP_MULT;
  234. break;
  235. case '|':
  236. op = OP_OR;
  237. break;
  238. case '+':
  239. op = OP_PLUS;
  240. break;
  241. case ')':
  242. op = OP_CBRACE;
  243. break;
  244. case '(':
  245. op = OP_OBRACE;
  246. break;
  247. default:
  248. op = OP_INVALID;
  249. break;
  250. }
  251. break;
  252. }
  253. case 'O':
  254. case 'o':
  255. if ((gulong)(end - a) >= sizeof ("or") &&
  256. g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) {
  257. *next = a + sizeof ("or") - 1;
  258. op = OP_OR;
  259. }
  260. break;
  261. case 'A':
  262. case 'a':
  263. if ((gulong)(end - a) >= sizeof ("and") &&
  264. g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) {
  265. *next = a + sizeof ("and") - 1;
  266. op = OP_AND;
  267. }
  268. break;
  269. case 'N':
  270. case 'n':
  271. if ((gulong)(end - a) >= sizeof ("not") &&
  272. g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) {
  273. *next = a + sizeof ("not") - 1;
  274. op = OP_NOT;
  275. }
  276. break;
  277. case '>':
  278. if (a < end - 1 && a[1] == '=') {
  279. *next = a + 2;
  280. op = OP_GE;
  281. }
  282. else {
  283. *next = a + 1;
  284. op = OP_GT;
  285. }
  286. break;
  287. case '<':
  288. if (a < end - 1 && a[1] == '=') {
  289. *next = a + 2;
  290. op = OP_LE;
  291. }
  292. else {
  293. *next = a + 1;
  294. op = OP_LT;
  295. }
  296. break;
  297. default:
  298. op = OP_INVALID;
  299. break;
  300. }
  301. return op;
  302. }
  303. static void
  304. rspamd_expression_destroy (struct rspamd_expression *expr)
  305. {
  306. guint i;
  307. struct rspamd_expression_elt *elt;
  308. if (expr != NULL) {
  309. if (expr->subr->destroy) {
  310. /* Free atoms */
  311. for (i = 0; i < expr->expressions->len; i ++) {
  312. elt = &g_array_index (expr->expressions,
  313. struct rspamd_expression_elt, i);
  314. if (elt->type == ELT_ATOM) {
  315. expr->subr->destroy (elt->p.atom);
  316. }
  317. }
  318. }
  319. if (expr->expressions) {
  320. g_array_free (expr->expressions, TRUE);
  321. }
  322. if (expr->expression_stack) {
  323. g_ptr_array_free (expr->expression_stack, TRUE);
  324. }
  325. if (expr->ast) {
  326. g_node_destroy (expr->ast);
  327. }
  328. g_free (expr);
  329. }
  330. }
  331. static gboolean
  332. rspamd_ast_add_node (GPtrArray *operands, struct rspamd_expression_elt *op,
  333. GError **err)
  334. {
  335. GNode *res, *a1, *a2, *test;
  336. struct rspamd_expression_elt *test_elt;
  337. g_assert (op->type == ELT_OP);
  338. if (op->p.op == OP_NOT) {
  339. /* Unary operator */
  340. res = g_node_new (op);
  341. a1 = rspamd_expr_stack_elt_pop (operands);
  342. if (a1 == NULL) {
  343. g_set_error (err, rspamd_expr_quark(), EINVAL, "no operand to "
  344. "unary '%s' operation", rspamd_expr_op_to_str (op->p.op));
  345. g_node_destroy (res);
  346. return FALSE;
  347. }
  348. g_node_append (res, a1);
  349. test_elt = a1->data;
  350. if (test_elt->type == ELT_ATOM) {
  351. test_elt->p.atom->parent = res;
  352. }
  353. }
  354. else {
  355. /* For binary operators we might want to examine chains */
  356. a2 = rspamd_expr_stack_elt_pop (operands);
  357. a1 = rspamd_expr_stack_elt_pop (operands);
  358. if (a2 == NULL) {
  359. g_set_error (err, rspamd_expr_quark(), EINVAL, "no left operand to "
  360. "'%s' operation", rspamd_expr_op_to_str (op->p.op));
  361. return FALSE;
  362. }
  363. if (a1 == NULL) {
  364. g_set_error (err, rspamd_expr_quark(), EINVAL, "no right operand to "
  365. "'%s' operation", rspamd_expr_op_to_str (op->p.op));
  366. return FALSE;
  367. }
  368. /* First try with a1 */
  369. test = a1;
  370. test_elt = test->data;
  371. if (test_elt->type == ELT_OP && test_elt->p.op == op->p.op) {
  372. /* Add children */
  373. g_node_append (test, a2);
  374. rspamd_expr_stack_elt_push (operands, a1);
  375. return TRUE;
  376. }
  377. /* Now test a2 */
  378. test = a2;
  379. test_elt = test->data;
  380. if (test_elt->type == ELT_OP && test_elt->p.op == op->p.op) {
  381. /* Add children */
  382. g_node_prepend (test, a1);
  383. rspamd_expr_stack_elt_push (operands, a2);
  384. return TRUE;
  385. }
  386. /* No optimizations possible, so create new level */
  387. res = g_node_new (op);
  388. g_node_append (res, a1);
  389. g_node_append (res, a2);
  390. test_elt = a1->data;
  391. if (test_elt->type == ELT_ATOM) {
  392. test_elt->p.atom->parent = res;
  393. }
  394. test_elt = a2->data;
  395. if (test_elt->type == ELT_ATOM) {
  396. test_elt->p.atom->parent = res;
  397. }
  398. }
  399. /* Push back resulting node to the stack */
  400. rspamd_expr_stack_elt_push (operands, res);
  401. return TRUE;
  402. }
  403. static gboolean
  404. rspamd_ast_priority_traverse (GNode *node, gpointer d)
  405. {
  406. struct rspamd_expression_elt *elt = node->data, *cur_elt;
  407. struct rspamd_expression *expr = d;
  408. gint cnt = 0;
  409. GNode *cur;
  410. if (node->children) {
  411. cur = node->children;
  412. while (cur) {
  413. cur_elt = cur->data;
  414. cnt += cur_elt->priority;
  415. cur = cur->next;
  416. }
  417. elt->priority = cnt;
  418. }
  419. else {
  420. /* It is atom or limit */
  421. g_assert (elt->type != ELT_OP);
  422. if (elt->type == ELT_LIMIT) {
  423. /* Always push limit first */
  424. elt->priority = 0;
  425. }
  426. else {
  427. elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY;
  428. if (expr->subr->priority != NULL) {
  429. elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY -
  430. expr->subr->priority (elt->p.atom);
  431. }
  432. elt->p.atom->hits = 0;
  433. elt->p.atom->avg_ticks = 0.0;
  434. }
  435. }
  436. return FALSE;
  437. }
  438. #define ATOM_PRIORITY(a) ((a)->p.atom->hits / ((a)->p.atom->avg_ticks > 0 ? \
  439. (a)->p.atom->avg_ticks * 10000000 : 1.0))
  440. static gint
  441. rspamd_ast_priority_cmp (GNode *a, GNode *b)
  442. {
  443. struct rspamd_expression_elt *ea = a->data, *eb = b->data;
  444. gdouble w1, w2;
  445. if (ea->type == ELT_LIMIT) {
  446. return -1;
  447. }
  448. else if (eb->type == ELT_LIMIT) {
  449. return 1;
  450. }
  451. /* Special logic for atoms */
  452. if (ea->type == ELT_ATOM && eb->type == ELT_ATOM &&
  453. ea->priority == eb->priority) {
  454. w1 = ATOM_PRIORITY (ea);
  455. w2 = ATOM_PRIORITY (eb);
  456. ea->p.atom->hits = 0;
  457. ea->p.atom->avg_ticks = 0.0;
  458. return w1 - w2;
  459. }
  460. else {
  461. return ea->priority - eb->priority;
  462. }
  463. }
  464. static gboolean
  465. rspamd_ast_resort_traverse (GNode *node, gpointer unused)
  466. {
  467. GNode *children, *last;
  468. if (node->children) {
  469. children = node->children;
  470. last = g_node_last_sibling (children);
  471. /* Needed for utlist compatibility */
  472. children->prev = last;
  473. DL_SORT (node->children, rspamd_ast_priority_cmp);
  474. /* Restore GLIB compatibility */
  475. children = node->children;
  476. children->prev = NULL;
  477. }
  478. return FALSE;
  479. }
  480. static struct rspamd_expression_elt *
  481. rspamd_expr_dup_elt (rspamd_mempool_t *pool, struct rspamd_expression_elt *elt)
  482. {
  483. struct rspamd_expression_elt *n;
  484. n = rspamd_mempool_alloc (pool, sizeof (*n));
  485. memcpy (n, elt, sizeof (*n));
  486. return n;
  487. }
  488. gboolean
  489. rspamd_parse_expression (const gchar *line, gsize len,
  490. const struct rspamd_atom_subr *subr, gpointer subr_data,
  491. rspamd_mempool_t *pool, GError **err,
  492. struct rspamd_expression **target)
  493. {
  494. struct rspamd_expression *e;
  495. struct rspamd_expression_elt elt;
  496. rspamd_expression_atom_t *atom;
  497. rspamd_regexp_t *num_re;
  498. enum rspamd_expression_op op, op_stack;
  499. const gchar *p, *c, *end;
  500. GPtrArray *operand_stack;
  501. GNode *tmp;
  502. enum {
  503. PARSE_ATOM = 0,
  504. PARSE_OP,
  505. PARSE_LIM,
  506. SKIP_SPACES
  507. } state = PARSE_ATOM;
  508. g_assert (line != NULL);
  509. g_assert (subr != NULL && subr->parse != NULL);
  510. if (len == 0) {
  511. len = strlen (line);
  512. }
  513. memset (&elt, 0, sizeof (elt));
  514. num_re = rspamd_regexp_cache_create (NULL,
  515. "/^(?:[+-]?([0-9]*[.])?[0-9]+)(?:\\s+|[)]|$)/", NULL, NULL);
  516. p = line;
  517. c = line;
  518. end = line + len;
  519. e = g_malloc0 (sizeof (*e));
  520. e->expressions = g_array_new (FALSE, FALSE,
  521. sizeof (struct rspamd_expression_elt));
  522. operand_stack = g_ptr_array_sized_new (32);
  523. e->ast = NULL;
  524. e->expression_stack = g_ptr_array_sized_new (32);
  525. e->subr = subr;
  526. e->evals = 0;
  527. e->next_resort = ottery_rand_range (MAX_RESORT_EVALS) + MIN_RESORT_EVALS;
  528. /* Shunting-yard algorithm */
  529. while (p < end) {
  530. switch (state) {
  531. case PARSE_ATOM:
  532. if (g_ascii_isspace (*p)) {
  533. state = SKIP_SPACES;
  534. continue;
  535. }
  536. else if (rspamd_expr_is_operation_symbol (*p)) {
  537. if (p + 1 < end) {
  538. gchar t = *(p + 1);
  539. if (t != ':') {
  540. state = PARSE_OP;
  541. continue;
  542. }
  543. }
  544. else {
  545. state = PARSE_OP;
  546. continue;
  547. }
  548. }
  549. /*
  550. * First of all, we check some pre-conditions:
  551. * 1) if we have 'and ' or 'or ' or 'not ' strings, they are op
  552. * 2) if we have full numeric string, then we check for
  553. * the following expression:
  554. * ^\d+\s*[><]$
  555. */
  556. if ((gulong)(end - p) > sizeof ("and ") &&
  557. (g_ascii_strncasecmp (p, "and ", sizeof ("and ") - 1) == 0 ||
  558. g_ascii_strncasecmp (p, "not ", sizeof ("not ") - 1) == 0 )) {
  559. state = PARSE_OP;
  560. }
  561. else if ((gulong)(end - p) > sizeof ("or ") &&
  562. g_ascii_strncasecmp (p, "or ", sizeof ("or ") - 1) == 0) {
  563. state = PARSE_OP;
  564. }
  565. else {
  566. /*
  567. * If we have any comparison operator in the stack, then try
  568. * to parse limit
  569. */
  570. op = GPOINTER_TO_INT (rspamd_expr_stack_peek (e));
  571. if (op >= OP_LT && op <= OP_GE) {
  572. if (rspamd_regexp_search (num_re,
  573. p,
  574. end - p,
  575. NULL,
  576. NULL,
  577. FALSE,
  578. NULL)) {
  579. c = p;
  580. state = PARSE_LIM;
  581. continue;
  582. }
  583. }
  584. /* Try to parse atom */
  585. atom = subr->parse (p, end - p, pool, subr_data, err);
  586. if (atom == NULL || atom->len == 0) {
  587. /* We couldn't parse the atom, so go out */
  588. if (err != NULL && *err == NULL) {
  589. g_set_error (err,
  590. rspamd_expr_quark (),
  591. 500,
  592. "Cannot parse atom: callback function failed"
  593. " to parse '%.*s'",
  594. (int) (end - p),
  595. p);
  596. }
  597. goto err;
  598. }
  599. if (atom->str == NULL) {
  600. atom->str = p;
  601. }
  602. p = p + atom->len;
  603. /* Push to output */
  604. elt.type = ELT_ATOM;
  605. elt.p.atom = atom;
  606. g_array_append_val (e->expressions, elt);
  607. rspamd_expr_stack_elt_push (operand_stack,
  608. g_node_new (rspamd_expr_dup_elt (pool, &elt)));
  609. }
  610. break;
  611. case PARSE_LIM:
  612. if ((g_ascii_isdigit (*p) || *p == '-' || *p == '.')
  613. && p < end - 1) {
  614. p ++;
  615. }
  616. else {
  617. if (p == end - 1 && g_ascii_isdigit (*p)) {
  618. p ++;
  619. }
  620. if (p - c > 0) {
  621. elt.type = ELT_LIMIT;
  622. elt.p.lim = strtod (c, NULL);
  623. g_array_append_val (e->expressions, elt);
  624. rspamd_expr_stack_elt_push (operand_stack,
  625. g_node_new (rspamd_expr_dup_elt (pool, &elt)));
  626. c = p;
  627. state = SKIP_SPACES;
  628. }
  629. else {
  630. g_set_error (err, rspamd_expr_quark(), 400, "Empty number");
  631. goto err;
  632. }
  633. }
  634. break;
  635. case PARSE_OP:
  636. op = rspamd_expr_str_to_op (p, end, &p);
  637. if (op == OP_INVALID) {
  638. g_set_error (err, rspamd_expr_quark(), 500, "Bad operator %c",
  639. *p);
  640. goto err;
  641. }
  642. else if (op == OP_OBRACE) {
  643. /*
  644. * If the token is a left parenthesis, then push it onto
  645. * the stack.
  646. */
  647. rspamd_expr_stack_push (e, GINT_TO_POINTER (op));
  648. }
  649. else if (op == OP_CBRACE) {
  650. /*
  651. * Until the token at the top of the stack is a left
  652. * parenthesis, pop operators off the stack onto the
  653. * output queue.
  654. *
  655. * Pop the left parenthesis from the stack,
  656. * but not onto the output queue.
  657. *
  658. * If the stack runs out without finding a left parenthesis,
  659. * then there are mismatched parentheses.
  660. */
  661. do {
  662. op = GPOINTER_TO_INT (rspamd_expr_stack_pop (e));
  663. if (op == OP_INVALID) {
  664. g_set_error (err, rspamd_expr_quark(), 600,
  665. "Braces mismatch");
  666. goto err;
  667. }
  668. if (op != OP_OBRACE) {
  669. elt.type = ELT_OP;
  670. elt.p.op = op;
  671. g_array_append_val (e->expressions, elt);
  672. if (!rspamd_ast_add_node (operand_stack,
  673. rspamd_expr_dup_elt (pool, &elt), err)) {
  674. goto err;
  675. }
  676. }
  677. } while (op != OP_OBRACE);
  678. }
  679. else {
  680. /*
  681. * While there is an operator token, o2, at the top of
  682. * the operator stack, and either:
  683. *
  684. * - o1 is left-associative and its precedence is less than
  685. * or equal to that of o2, or
  686. * - o1 is right associative, and has precedence less than
  687. * that of o2,
  688. *
  689. * then pop o2 off the operator stack, onto the output queue;
  690. *
  691. * push o1 onto the operator stack.
  692. */
  693. for (;;) {
  694. op_stack = GPOINTER_TO_INT (rspamd_expr_stack_pop (e));
  695. if (op_stack == OP_INVALID) {
  696. /* Stack is empty */
  697. break;
  698. }
  699. /* We ignore associativity for now */
  700. if (op_stack != OP_OBRACE &&
  701. rspamd_expr_logic_priority (op) <
  702. rspamd_expr_logic_priority (op_stack)) {
  703. elt.type = ELT_OP;
  704. elt.p.op = op_stack;
  705. g_array_append_val (e->expressions, elt);
  706. if (!rspamd_ast_add_node (operand_stack,
  707. rspamd_expr_dup_elt (pool, &elt), err)) {
  708. goto err;
  709. }
  710. }
  711. else {
  712. /* Push op_stack back */
  713. rspamd_expr_stack_push (e, GINT_TO_POINTER (op_stack));
  714. break;
  715. }
  716. }
  717. /* Push new operator itself */
  718. rspamd_expr_stack_push (e, GINT_TO_POINTER (op));
  719. }
  720. state = SKIP_SPACES;
  721. break;
  722. case SKIP_SPACES:
  723. if (g_ascii_isspace (*p)) {
  724. p ++;
  725. }
  726. else if (rspamd_expr_is_operation_symbol (*p)) {
  727. state = PARSE_OP;
  728. }
  729. else {
  730. state = PARSE_ATOM;
  731. }
  732. break;
  733. }
  734. }
  735. /* Now we process the stack and push operators to the output */
  736. while ((op_stack = GPOINTER_TO_INT (rspamd_expr_stack_pop (e)))
  737. != OP_INVALID) {
  738. if (op_stack != OP_OBRACE) {
  739. elt.type = ELT_OP;
  740. elt.p.op = op_stack;
  741. g_array_append_val (e->expressions, elt);
  742. if (!rspamd_ast_add_node (operand_stack,
  743. rspamd_expr_dup_elt (pool, &elt), err)) {
  744. goto err;
  745. }
  746. }
  747. else {
  748. g_set_error (err, rspamd_expr_quark(), 600,
  749. "Braces mismatch");
  750. goto err;
  751. }
  752. }
  753. if (operand_stack->len != 1) {
  754. g_set_error (err, rspamd_expr_quark(), 601,
  755. "Operators mismatch");
  756. goto err;
  757. }
  758. e->ast = rspamd_expr_stack_elt_pop (operand_stack);
  759. g_ptr_array_free (operand_stack, TRUE);
  760. /* Set priorities for branches */
  761. g_node_traverse (e->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  762. rspamd_ast_priority_traverse, e);
  763. /* Now set less expensive branches to be evaluated first */
  764. g_node_traverse (e->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1,
  765. rspamd_ast_resort_traverse, NULL);
  766. if (target) {
  767. *target = e;
  768. rspamd_mempool_add_destructor (pool,
  769. (rspamd_mempool_destruct_t)rspamd_expression_destroy, e);
  770. }
  771. else {
  772. rspamd_expression_destroy (e);
  773. }
  774. return TRUE;
  775. err:
  776. while ((tmp = rspamd_expr_stack_elt_pop (operand_stack)) != NULL) {
  777. g_node_destroy (tmp);
  778. }
  779. g_ptr_array_free (operand_stack, TRUE);
  780. rspamd_expression_destroy (e);
  781. return FALSE;
  782. }
  783. static gboolean
  784. rspamd_ast_node_done (struct rspamd_expression_elt *elt,
  785. struct rspamd_expression_elt *parelt, gdouble acc, gdouble lim)
  786. {
  787. gboolean ret = FALSE;
  788. g_assert (elt->type == ELT_OP);
  789. switch (elt->p.op) {
  790. case OP_NOT:
  791. ret = TRUE;
  792. break;
  793. case OP_PLUS:
  794. if (parelt && lim > 0) {
  795. g_assert (parelt->type == ELT_OP);
  796. switch (parelt->p.op) {
  797. case OP_GE:
  798. ret = acc >= lim;
  799. break;
  800. case OP_GT:
  801. ret = acc > lim;
  802. break;
  803. case OP_LE:
  804. ret = acc <= lim;
  805. break;
  806. case OP_LT:
  807. ret = acc < lim;
  808. break;
  809. default:
  810. ret = FALSE;
  811. break;
  812. }
  813. }
  814. break;
  815. case OP_GE:
  816. ret = acc >= lim;
  817. break;
  818. case OP_GT:
  819. ret = acc > lim;
  820. break;
  821. case OP_LE:
  822. ret = acc <= lim;
  823. break;
  824. case OP_LT:
  825. ret = acc < lim;
  826. break;
  827. case OP_MULT:
  828. case OP_AND:
  829. ret = !acc;
  830. break;
  831. case OP_OR:
  832. ret = !!acc;
  833. break;
  834. default:
  835. g_assert (0);
  836. break;
  837. }
  838. return ret;
  839. }
  840. static gdouble
  841. rspamd_ast_do_op (struct rspamd_expression_elt *elt, gdouble val,
  842. gdouble acc, gdouble lim, gboolean first_elt)
  843. {
  844. gdouble ret = val;
  845. g_assert (elt->type == ELT_OP);
  846. switch (elt->p.op) {
  847. case OP_NOT:
  848. ret = fabs (val) > DOUBLE_EPSILON ? 0.0 : 1.0;
  849. break;
  850. case OP_PLUS:
  851. ret = acc + val;
  852. break;
  853. case OP_GE:
  854. ret = first_elt ? (val >= lim) : (acc >= lim);
  855. break;
  856. case OP_GT:
  857. ret = first_elt ? (val > lim) : (acc > lim);
  858. break;
  859. case OP_LE:
  860. ret = first_elt ? (val <= lim) : (acc <= lim);
  861. break;
  862. case OP_LT:
  863. ret = first_elt ? (val < lim) : (acc < lim);
  864. break;
  865. case OP_MULT:
  866. case OP_AND:
  867. ret = first_elt ? (val) : (acc * val);
  868. break;
  869. case OP_OR:
  870. ret = first_elt ? (val) : (acc + val);
  871. break;
  872. default:
  873. g_assert (0);
  874. break;
  875. }
  876. return ret;
  877. }
  878. static gdouble
  879. rspamd_ast_process_node (struct rspamd_expression *expr, GNode *node,
  880. struct rspamd_expr_process_data *process_data)
  881. {
  882. struct rspamd_expression_elt *elt, *celt, *parelt = NULL;
  883. GNode *cld;
  884. gdouble acc = NAN, lim = 0;
  885. gdouble t1, t2, val;
  886. gboolean calc_ticks = FALSE;
  887. elt = node->data;
  888. switch (elt->type) {
  889. case ELT_ATOM:
  890. if (!(elt->flags & RSPAMD_EXPR_FLAG_PROCESSED)) {
  891. /*
  892. * Sometimes get ticks for this expression. 'Sometimes' here means
  893. * that we get lowest 5 bits of the counter `evals` and 5 bits
  894. * of some shifted address to provide some sort of jittering for
  895. * ticks evaluation
  896. */
  897. if ((expr->evals & 0x1F) == (GPOINTER_TO_UINT (node) >> 4 & 0x1F)) {
  898. calc_ticks = TRUE;
  899. t1 = rspamd_get_ticks (TRUE);
  900. }
  901. elt->value = expr->subr->process (process_data, elt->p.atom);
  902. if (fabs (elt->value) > 1e-9) {
  903. elt->p.atom->hits ++;
  904. if (process_data->trace) {
  905. g_ptr_array_add (process_data->trace, elt->p.atom);
  906. }
  907. }
  908. if (calc_ticks) {
  909. t2 = rspamd_get_ticks (TRUE);
  910. elt->p.atom->avg_ticks += ((t2 - t1) - elt->p.atom->avg_ticks) /
  911. (expr->evals);
  912. }
  913. elt->flags |= RSPAMD_EXPR_FLAG_PROCESSED;
  914. }
  915. acc = elt->value;
  916. break;
  917. case ELT_LIMIT:
  918. acc = elt->p.lim;
  919. break;
  920. case ELT_OP:
  921. g_assert (node->children != NULL);
  922. /* Try to find limit at the parent node */
  923. if (node->parent) {
  924. parelt = node->parent->data;
  925. celt = node->parent->children->data;
  926. if (celt->type == ELT_LIMIT) {
  927. lim = celt->p.lim;
  928. }
  929. }
  930. DL_FOREACH (node->children, cld) {
  931. celt = cld->data;
  932. /* Save limit if we've found it */
  933. if (celt->type == ELT_LIMIT) {
  934. lim = celt->p.lim;
  935. continue;
  936. }
  937. val = rspamd_ast_process_node (expr, cld, process_data);
  938. if (isnan (acc)) {
  939. acc = rspamd_ast_do_op (elt, val, 0, lim, TRUE);
  940. }
  941. else {
  942. acc = rspamd_ast_do_op (elt, val, acc, lim, FALSE);
  943. }
  944. if (!(process_data->flags & RSPAMD_EXPRESSION_FLAG_NOOPT)) {
  945. if (rspamd_ast_node_done (elt, parelt, acc, lim)) {
  946. return acc;
  947. }
  948. }
  949. }
  950. break;
  951. }
  952. return acc;
  953. }
  954. static gboolean
  955. rspamd_ast_cleanup_traverse (GNode *n, gpointer d)
  956. {
  957. struct rspamd_expression_elt *elt = n->data;
  958. elt->value = 0;
  959. elt->flags = 0;
  960. return FALSE;
  961. }
  962. gdouble
  963. rspamd_process_expression_track (struct rspamd_expression *expr, struct rspamd_expr_process_data *process_data)
  964. {
  965. gdouble ret = 0;
  966. g_assert (expr != NULL);
  967. /* Ensure that stack is empty at this point */
  968. g_assert (expr->expression_stack->len == 0);
  969. expr->evals ++;
  970. ret = rspamd_ast_process_node (expr, expr->ast, process_data);
  971. /* Cleanup */
  972. g_node_traverse (expr->ast, G_IN_ORDER, G_TRAVERSE_ALL, -1,
  973. rspamd_ast_cleanup_traverse, NULL);
  974. /* Check if we need to resort */
  975. if (expr->evals % expr->next_resort == 0) {
  976. expr->next_resort = ottery_rand_range (MAX_RESORT_EVALS) +
  977. MIN_RESORT_EVALS;
  978. /* Set priorities for branches */
  979. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  980. rspamd_ast_priority_traverse, expr);
  981. /* Now set less expensive branches to be evaluated first */
  982. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1,
  983. rspamd_ast_resort_traverse, NULL);
  984. }
  985. return ret;
  986. }
  987. gdouble
  988. rspamd_process_expression (struct rspamd_expression *expr, struct rspamd_expr_process_data *process_data)
  989. {
  990. return rspamd_process_expression_track (expr, process_data);
  991. }
  992. static gboolean
  993. rspamd_ast_string_traverse (GNode *n, gpointer d)
  994. {
  995. GString *res = d;
  996. gint cnt;
  997. GNode *cur;
  998. struct rspamd_expression_elt *elt = n->data;
  999. const char *op_str = NULL;
  1000. if (elt->type == ELT_ATOM) {
  1001. rspamd_printf_gstring (res, "(%*s)",
  1002. (int)elt->p.atom->len, elt->p.atom->str);
  1003. }
  1004. else if (elt->type == ELT_LIMIT) {
  1005. if (elt->p.lim == (double)(gint64)elt->p.lim) {
  1006. rspamd_printf_gstring (res, "%L", (gint64)elt->p.lim);
  1007. }
  1008. else {
  1009. rspamd_printf_gstring (res, "%f", elt->p.lim);
  1010. }
  1011. }
  1012. else {
  1013. op_str = rspamd_expr_op_to_str (elt->p.op);
  1014. g_string_append (res, op_str);
  1015. if (n->children) {
  1016. LL_COUNT(n->children, cur, cnt);
  1017. if (cnt > 2) {
  1018. /* Print n-ary of the operator */
  1019. g_string_append_printf (res, "(%d)", cnt);
  1020. }
  1021. }
  1022. }
  1023. g_string_append_c (res, ' ');
  1024. return FALSE;
  1025. }
  1026. GString *
  1027. rspamd_expression_tostring (struct rspamd_expression *expr)
  1028. {
  1029. GString *res;
  1030. g_assert (expr != NULL);
  1031. res = g_string_new (NULL);
  1032. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1033. rspamd_ast_string_traverse, res);
  1034. /* Last space */
  1035. if (res->len > 0) {
  1036. g_string_erase (res, res->len - 1, 1);
  1037. }
  1038. return res;
  1039. }
  1040. struct atom_foreach_cbdata {
  1041. rspamd_expression_atom_foreach_cb cb;
  1042. gpointer cbdata;
  1043. };
  1044. static gboolean
  1045. rspamd_ast_atom_traverse (GNode *n, gpointer d)
  1046. {
  1047. struct atom_foreach_cbdata *data = d;
  1048. struct rspamd_expression_elt *elt = n->data;
  1049. rspamd_ftok_t tok;
  1050. if (elt->type == ELT_ATOM) {
  1051. tok.begin = elt->p.atom->str;
  1052. tok.len = elt->p.atom->len;
  1053. data->cb (&tok, data->cbdata);
  1054. }
  1055. return FALSE;
  1056. }
  1057. void
  1058. rspamd_expression_atom_foreach (struct rspamd_expression *expr,
  1059. rspamd_expression_atom_foreach_cb cb, gpointer cbdata)
  1060. {
  1061. struct atom_foreach_cbdata data;
  1062. g_assert (expr != NULL);
  1063. data.cb = cb;
  1064. data.cbdata = cbdata;
  1065. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1066. rspamd_ast_atom_traverse, &data);
  1067. }
  1068. gboolean
  1069. rspamd_expression_node_is_op (GNode *node, enum rspamd_expression_op op)
  1070. {
  1071. struct rspamd_expression_elt *elt;
  1072. g_assert (node != NULL);
  1073. elt = node->data;
  1074. if (elt->type == ELT_OP && elt->p.op == op) {
  1075. return TRUE;
  1076. }
  1077. return FALSE;
  1078. }