You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

expression.c 35KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "expression.h"
  18. #include "printf.h"
  19. #include "regexp.h"
  20. #include "util.h"
  21. #include "utlist.h"
  22. #include "ottery.h"
  23. #include "libserver/logger.h"
  24. #include "libcryptobox/cryptobox.h"
  25. #include <math.h>
  26. #define RSPAMD_EXPR_FLAG_NEGATE (1 << 0)
  27. #define RSPAMD_EXPR_FLAG_PROCESSED (1 << 1)
  28. #define MIN_RESORT_EVALS 50
  29. #define MAX_RESORT_EVALS 150
  30. #define DOUBLE_EPSILON 1e-9
  31. enum rspamd_expression_elt_type {
  32. ELT_OP = 0,
  33. ELT_ATOM,
  34. ELT_LIMIT
  35. };
  36. enum rspamd_expression_op_flag {
  37. RSPAMD_EXPRESSION_UNARY = 1u << 0u,
  38. RSPAMD_EXPRESSION_BINARY = 1u << 1u,
  39. RSPAMD_EXPRESSION_NARY = 1u << 2u,
  40. RSPAMD_EXPRESSION_ARITHMETIC = 1u << 3u,
  41. RSPAMD_EXPRESSION_LOGICAL = 1u << 4u,
  42. RSPAMD_EXPRESSION_COMPARISON = 1u << 5u,
  43. };
  44. struct rspamd_expression_operation {
  45. enum rspamd_expression_op op;
  46. guint logical_priority;
  47. guint op_flags;
  48. };
  49. struct rspamd_expression_elt {
  50. enum rspamd_expression_elt_type type;
  51. union {
  52. rspamd_expression_atom_t *atom;
  53. struct rspamd_expression_operation op;
  54. gdouble lim;
  55. } p;
  56. gint flags;
  57. gint priority;
  58. gdouble value;
  59. };
  60. struct rspamd_expression {
  61. const struct rspamd_atom_subr *subr;
  62. GArray *expressions;
  63. GPtrArray *expression_stack;
  64. GNode *ast;
  65. gchar *log_id;
  66. guint next_resort;
  67. guint evals;
  68. };
  69. struct rspamd_expr_process_data {
  70. gpointer *ud;
  71. gint flags;
  72. /* != NULL if trace is collected */
  73. GPtrArray *trace;
  74. rspamd_expression_process_cb process_closure;
  75. };
  76. #define msg_debug_expression(...) rspamd_conditional_debug_fast (NULL, NULL, \
  77. rspamd_expression_log_id, "expression", e->log_id, \
  78. G_STRFUNC, \
  79. __VA_ARGS__)
  80. INIT_LOG_MODULE(expression)
  81. static GQuark
  82. rspamd_expr_quark (void)
  83. {
  84. return g_quark_from_static_string ("rspamd-expression");
  85. }
  86. static const gchar * RSPAMD_CONST_FUNCTION
  87. rspamd_expr_op_to_str (enum rspamd_expression_op op);
  88. static const gchar *
  89. rspamd_expr_op_to_str (enum rspamd_expression_op op)
  90. {
  91. const gchar *op_str = NULL;
  92. switch (op) {
  93. case OP_AND:
  94. op_str = "&";
  95. break;
  96. case OP_OR:
  97. op_str = "|";
  98. break;
  99. case OP_MULT:
  100. op_str = "*";
  101. break;
  102. case OP_PLUS:
  103. op_str = "+";
  104. break;
  105. case OP_MINUS:
  106. op_str = "-";
  107. break;
  108. case OP_DIVIDE:
  109. op_str = "/";
  110. break;
  111. case OP_NOT:
  112. op_str = "!";
  113. break;
  114. case OP_GE:
  115. op_str = ">=";
  116. break;
  117. case OP_GT:
  118. op_str = ">";
  119. break;
  120. case OP_LE:
  121. op_str = "<=";
  122. break;
  123. case OP_LT:
  124. op_str = "<";
  125. break;
  126. case OP_OBRACE:
  127. op_str = "(";
  128. break;
  129. case OP_CBRACE:
  130. op_str = ")";
  131. break;
  132. default:
  133. op_str = "???";
  134. break;
  135. }
  136. return op_str;
  137. }
  138. #define G_ARRAY_LAST(ar, type) (&g_array_index((ar), type, (ar)->len - 1))
  139. static void
  140. rspamd_expr_stack_elt_push (GPtrArray *stack,
  141. gpointer elt)
  142. {
  143. g_ptr_array_add (stack, elt);
  144. }
  145. static gpointer
  146. rspamd_expr_stack_elt_pop (GPtrArray *stack)
  147. {
  148. gpointer e;
  149. gint idx;
  150. if (stack->len == 0) {
  151. return NULL;
  152. }
  153. idx = stack->len - 1;
  154. e = g_ptr_array_index (stack, idx);
  155. g_ptr_array_remove_index_fast (stack, idx);
  156. return e;
  157. }
  158. static void
  159. rspamd_expr_stack_push (struct rspamd_expression *expr,
  160. gpointer elt)
  161. {
  162. rspamd_expr_stack_elt_push (expr->expression_stack, elt);
  163. }
  164. static gpointer
  165. rspamd_expr_stack_pop (struct rspamd_expression *expr)
  166. {
  167. return rspamd_expr_stack_elt_pop (expr->expression_stack);
  168. }
  169. static gpointer
  170. rspamd_expr_stack_peek (struct rspamd_expression *expr)
  171. {
  172. gpointer e;
  173. gint idx;
  174. GPtrArray *stack = expr->expression_stack;
  175. if (stack->len == 0) {
  176. return NULL;
  177. }
  178. idx = stack->len - 1;
  179. e = g_ptr_array_index (stack, idx);
  180. return e;
  181. }
  182. /*
  183. * Return operation priority
  184. */
  185. static gint RSPAMD_CONST_FUNCTION
  186. rspamd_expr_logic_priority (enum rspamd_expression_op op);
  187. static gint
  188. rspamd_expr_logic_priority (enum rspamd_expression_op op)
  189. {
  190. gint ret = 0;
  191. switch (op) {
  192. case OP_NOT:
  193. ret = 7;
  194. break;
  195. case OP_MULT:
  196. case OP_DIVIDE:
  197. ret = 6;
  198. break;
  199. case OP_PLUS:
  200. case OP_MINUS:
  201. ret = 5;
  202. break;
  203. case OP_GE:
  204. case OP_GT:
  205. case OP_LE:
  206. case OP_LT:
  207. ret = 4;
  208. break;
  209. case OP_AND:
  210. ret = 3;
  211. break;
  212. case OP_OR:
  213. ret = 2;
  214. break;
  215. case OP_OBRACE:
  216. case OP_CBRACE:
  217. ret = 1;
  218. break;
  219. case OP_INVALID:
  220. ret = -1;
  221. break;
  222. }
  223. return ret;
  224. }
  225. static guint RSPAMD_CONST_FUNCTION
  226. rspamd_expr_op_flags (enum rspamd_expression_op op);
  227. static guint
  228. rspamd_expr_op_flags (enum rspamd_expression_op op)
  229. {
  230. guint ret = 0;
  231. switch (op) {
  232. case OP_NOT:
  233. ret |= RSPAMD_EXPRESSION_UNARY|RSPAMD_EXPRESSION_LOGICAL;
  234. break;
  235. case OP_MULT:
  236. ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_ARITHMETIC;
  237. break;
  238. case OP_DIVIDE:
  239. ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_ARITHMETIC;
  240. break;
  241. case OP_PLUS:
  242. ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_ARITHMETIC;
  243. break;
  244. case OP_MINUS:
  245. ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_ARITHMETIC;
  246. break;
  247. case OP_GE:
  248. case OP_GT:
  249. case OP_LE:
  250. case OP_LT:
  251. ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_COMPARISON;
  252. break;
  253. case OP_AND:
  254. case OP_OR:
  255. ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_LOGICAL;
  256. break;
  257. case OP_OBRACE:
  258. case OP_CBRACE:
  259. case OP_INVALID:
  260. break;
  261. }
  262. return ret;
  263. }
  264. /*
  265. * Return FALSE if symbol is not operation symbol (operand)
  266. * Return TRUE if symbol is operation symbol
  267. */
  268. static gboolean RSPAMD_CONST_FUNCTION
  269. rspamd_expr_is_operation_symbol (gchar a);
  270. static gboolean
  271. rspamd_expr_is_operation_symbol (gchar a)
  272. {
  273. switch (a) {
  274. case '!':
  275. case '&':
  276. case '|':
  277. case '(':
  278. case ')':
  279. case '>':
  280. case '<':
  281. case '+':
  282. case '*':
  283. case '-':
  284. case '/':
  285. return TRUE;
  286. }
  287. return FALSE;
  288. }
  289. /* Return character representation of operation */
  290. static enum rspamd_expression_op
  291. rspamd_expr_str_to_op (const gchar *a, const gchar *end, const gchar **next)
  292. {
  293. enum rspamd_expression_op op = OP_INVALID;
  294. g_assert (a < end);
  295. switch (*a) {
  296. case '!':
  297. case '&':
  298. case '|':
  299. case '+':
  300. case '*':
  301. case '/':
  302. case '-':
  303. case '(':
  304. case ')': {
  305. if (a < end - 1) {
  306. if ((a[0] == '&' && a[1] == '&') ||
  307. (a[0] == '|' && a[1] == '|')) {
  308. *next = a + 2;
  309. }
  310. else {
  311. *next = a + 1;
  312. }
  313. }
  314. else {
  315. *next = end;
  316. }
  317. /* XXX: not especially effective */
  318. switch (*a) {
  319. case '!':
  320. op = OP_NOT;
  321. break;
  322. case '&':
  323. op = OP_AND;
  324. break;
  325. case '*':
  326. op = OP_MULT;
  327. break;
  328. case '|':
  329. op = OP_OR;
  330. break;
  331. case '+':
  332. op = OP_PLUS;
  333. break;
  334. case '/':
  335. op = OP_DIVIDE;
  336. break;
  337. case '-':
  338. op = OP_MINUS;
  339. break;
  340. case ')':
  341. op = OP_CBRACE;
  342. break;
  343. case '(':
  344. op = OP_OBRACE;
  345. break;
  346. default:
  347. op = OP_INVALID;
  348. break;
  349. }
  350. break;
  351. }
  352. case 'O':
  353. case 'o':
  354. if ((gulong)(end - a) >= sizeof ("or") &&
  355. g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) {
  356. *next = a + sizeof ("or") - 1;
  357. op = OP_OR;
  358. }
  359. break;
  360. case 'A':
  361. case 'a':
  362. if ((gulong)(end - a) >= sizeof ("and") &&
  363. g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) {
  364. *next = a + sizeof ("and") - 1;
  365. op = OP_AND;
  366. }
  367. break;
  368. case 'N':
  369. case 'n':
  370. if ((gulong)(end - a) >= sizeof ("not") &&
  371. g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) {
  372. *next = a + sizeof ("not") - 1;
  373. op = OP_NOT;
  374. }
  375. break;
  376. case '>':
  377. if (a < end - 1 && a[1] == '=') {
  378. *next = a + 2;
  379. op = OP_GE;
  380. }
  381. else {
  382. *next = a + 1;
  383. op = OP_GT;
  384. }
  385. break;
  386. case '<':
  387. if (a < end - 1 && a[1] == '=') {
  388. *next = a + 2;
  389. op = OP_LE;
  390. }
  391. else {
  392. *next = a + 1;
  393. op = OP_LT;
  394. }
  395. break;
  396. default:
  397. op = OP_INVALID;
  398. break;
  399. }
  400. return op;
  401. }
  402. static void
  403. rspamd_expression_destroy (struct rspamd_expression *expr)
  404. {
  405. guint i;
  406. struct rspamd_expression_elt *elt;
  407. if (expr != NULL) {
  408. if (expr->subr->destroy) {
  409. /* Free atoms */
  410. for (i = 0; i < expr->expressions->len; i ++) {
  411. elt = &g_array_index (expr->expressions,
  412. struct rspamd_expression_elt, i);
  413. if (elt->type == ELT_ATOM) {
  414. expr->subr->destroy (elt->p.atom);
  415. }
  416. }
  417. }
  418. if (expr->expressions) {
  419. g_array_free (expr->expressions, TRUE);
  420. }
  421. if (expr->expression_stack) {
  422. g_ptr_array_free (expr->expression_stack, TRUE);
  423. }
  424. if (expr->ast) {
  425. g_node_destroy (expr->ast);
  426. }
  427. if (expr->log_id) {
  428. g_free (expr->log_id);
  429. }
  430. g_free (expr);
  431. }
  432. }
  433. static gboolean
  434. rspamd_ast_add_node (struct rspamd_expression *e,
  435. GPtrArray *operands,
  436. struct rspamd_expression_elt *op,
  437. GError **err)
  438. {
  439. GNode *res, *a1, *a2, *test;
  440. g_assert (op->type == ELT_OP);
  441. if (op->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) {
  442. /* Unary operator */
  443. struct rspamd_expression_elt *test_elt;
  444. res = g_node_new (op);
  445. a1 = rspamd_expr_stack_elt_pop (operands);
  446. if (a1 == NULL) {
  447. g_set_error (err, rspamd_expr_quark(), EINVAL, "no operand to "
  448. "unary '%s' operation", rspamd_expr_op_to_str (op->p.op.op));
  449. g_node_destroy (res);
  450. return FALSE;
  451. }
  452. g_node_append (res, a1);
  453. test_elt = a1->data;
  454. if (test_elt->type == ELT_ATOM) {
  455. test_elt->p.atom->parent = res;
  456. msg_debug_expression ("added unary op %s to AST; operand: %*s",
  457. rspamd_expr_op_to_str (op->p.op.op),
  458. (int)test_elt->p.atom->len, test_elt->p.atom->str);
  459. }
  460. else {
  461. msg_debug_expression ("added unary op %s to AST; operand type: %d",
  462. rspamd_expr_op_to_str (op->p.op.op),
  463. test_elt->type);
  464. }
  465. }
  466. else {
  467. struct rspamd_expression_elt *e1, *e2;
  468. /* For binary/nary operators we might want to examine chains */
  469. a2 = rspamd_expr_stack_elt_pop (operands);
  470. a1 = rspamd_expr_stack_elt_pop (operands);
  471. if (a2 == NULL) {
  472. g_set_error (err, rspamd_expr_quark(), EINVAL, "no left operand to "
  473. "'%s' operation", rspamd_expr_op_to_str (op->p.op.op));
  474. return FALSE;
  475. }
  476. if (a1 == NULL) {
  477. g_set_error (err, rspamd_expr_quark(), EINVAL, "no right operand to "
  478. "'%s' operation", rspamd_expr_op_to_str (op->p.op.op));
  479. return FALSE;
  480. }
  481. /* Nary stuff */
  482. if (op->p.op.op_flags & RSPAMD_EXPRESSION_NARY) {
  483. /*
  484. * We convert a set of ops like X + Y + Z to a nary tree like
  485. * X Y Z +
  486. * for the longest possible prefix of atoms/limits
  487. */
  488. /* First try with a1 */
  489. test = a1;
  490. e1 = test->data;
  491. if (e1->type == ELT_OP && e1->p.op.op == op->p.op.op) {
  492. /* Add children */
  493. g_node_append (test, a2);
  494. rspamd_expr_stack_elt_push (operands, a1);
  495. msg_debug_expression ("added nary op %s to AST merged with the first operand",
  496. rspamd_expr_op_to_str (op->p.op.op));
  497. return TRUE;
  498. }
  499. /* Now test a2 */
  500. test = a2;
  501. e2 = test->data;
  502. if (e2->type == ELT_OP && e2->p.op.op == op->p.op.op) {
  503. /* Add children */
  504. g_node_prepend (test, a1);
  505. rspamd_expr_stack_elt_push (operands, a2);
  506. msg_debug_expression ("added nary op %s to AST merged with the second operand",
  507. rspamd_expr_op_to_str (op->p.op.op));
  508. return TRUE;
  509. }
  510. }
  511. /* No optimizations possible, so create a new level */
  512. res = g_node_new (op);
  513. g_node_append (res, a1);
  514. g_node_append (res, a2);
  515. e1 = a1->data;
  516. e2 = a2->data;
  517. if (e1->type == ELT_ATOM) {
  518. e1->p.atom->parent = res;
  519. }
  520. if (e2->type == ELT_ATOM) {
  521. e2->p.atom->parent = res;
  522. }
  523. if (e1->type == ELT_ATOM && e2->type == ELT_ATOM) {
  524. msg_debug_expression ("added binary op %s to AST; operands: (%*s; %*s)",
  525. rspamd_expr_op_to_str (op->p.op.op),
  526. (int) e1->p.atom->len, e1->p.atom->str,
  527. (int) e2->p.atom->len, e2->p.atom->str);
  528. }
  529. else {
  530. msg_debug_expression ("added binary op %s to AST; operands (types): (%d; %d)",
  531. rspamd_expr_op_to_str (op->p.op.op),
  532. e1->type,
  533. e2->type);
  534. }
  535. }
  536. /* Push back resulting node to the stack */
  537. rspamd_expr_stack_elt_push (operands, res);
  538. return TRUE;
  539. }
  540. static gboolean
  541. rspamd_ast_priority_traverse (GNode *node, gpointer d)
  542. {
  543. struct rspamd_expression_elt *elt = node->data, *cur_elt;
  544. struct rspamd_expression *expr = d;
  545. gint cnt = 0;
  546. GNode *cur;
  547. if (node->children) {
  548. cur = node->children;
  549. while (cur) {
  550. cur_elt = cur->data;
  551. cnt += cur_elt->priority;
  552. cur = cur->next;
  553. }
  554. elt->priority = cnt;
  555. }
  556. else {
  557. /* It is atom or limit */
  558. g_assert (elt->type != ELT_OP);
  559. if (elt->type == ELT_LIMIT) {
  560. /* Always push limit first */
  561. elt->priority = 0;
  562. }
  563. else {
  564. elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY;
  565. if (expr->subr->priority != NULL) {
  566. elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY -
  567. expr->subr->priority (elt->p.atom);
  568. }
  569. elt->p.atom->hits = 0;
  570. elt->p.atom->avg_ticks = 0.0;
  571. }
  572. }
  573. return FALSE;
  574. }
  575. #define ATOM_PRIORITY(a) ((a)->p.atom->hits / ((a)->p.atom->avg_ticks > 0 ? \
  576. (a)->p.atom->avg_ticks * 10000000 : 1.0))
  577. static gint
  578. rspamd_ast_priority_cmp (GNode *a, GNode *b)
  579. {
  580. struct rspamd_expression_elt *ea = a->data, *eb = b->data;
  581. gdouble w1, w2;
  582. if (ea->type == ELT_LIMIT) {
  583. return 1;
  584. }
  585. else if (eb->type == ELT_LIMIT) {
  586. return -1;
  587. }
  588. /* Special logic for atoms */
  589. if (ea->type == ELT_ATOM && eb->type == ELT_ATOM &&
  590. ea->priority == eb->priority) {
  591. w1 = ATOM_PRIORITY (ea);
  592. w2 = ATOM_PRIORITY (eb);
  593. ea->p.atom->hits = 0;
  594. ea->p.atom->avg_ticks = 0.0;
  595. return w1 - w2;
  596. }
  597. else {
  598. return ea->priority - eb->priority;
  599. }
  600. }
  601. static gboolean
  602. rspamd_ast_resort_traverse (GNode *node, gpointer unused)
  603. {
  604. GNode *children, *last;
  605. struct rspamd_expression_elt *elt;
  606. elt = (struct rspamd_expression_elt *)node->data;
  607. /*
  608. * We sort merely logical operations, everything else is dangerous
  609. */
  610. if (elt->type == ELT_OP && elt->p.op.op_flags & RSPAMD_EXPRESSION_LOGICAL) {
  611. if (node->children) {
  612. children = node->children;
  613. last = g_node_last_sibling (children);
  614. /* Needed for utlist compatibility */
  615. children->prev = last;
  616. DL_SORT (node->children, rspamd_ast_priority_cmp);
  617. /* Restore GLIB compatibility */
  618. children = node->children;
  619. children->prev = NULL;
  620. }
  621. }
  622. return FALSE;
  623. }
  624. static struct rspamd_expression_elt *
  625. rspamd_expr_dup_elt (rspamd_mempool_t *pool, struct rspamd_expression_elt *elt)
  626. {
  627. struct rspamd_expression_elt *n;
  628. n = rspamd_mempool_alloc (pool, sizeof (*n));
  629. memcpy (n, elt, sizeof (*n));
  630. return n;
  631. }
  632. gboolean
  633. rspamd_parse_expression (const gchar *line, gsize len,
  634. const struct rspamd_atom_subr *subr, gpointer subr_data,
  635. rspamd_mempool_t *pool, GError **err,
  636. struct rspamd_expression **target)
  637. {
  638. struct rspamd_expression *e;
  639. struct rspamd_expression_elt elt;
  640. rspamd_expression_atom_t *atom;
  641. rspamd_regexp_t *num_re;
  642. enum rspamd_expression_op op, op_stack;
  643. const gchar *p, *c, *end;
  644. GPtrArray *operand_stack;
  645. GNode *tmp;
  646. enum {
  647. PARSE_ATOM = 0,
  648. PARSE_OP,
  649. PARSE_LIM,
  650. SKIP_SPACES
  651. } state = PARSE_ATOM;
  652. g_assert (line != NULL);
  653. g_assert (subr != NULL && subr->parse != NULL);
  654. if (len == 0) {
  655. len = strlen (line);
  656. }
  657. memset (&elt, 0, sizeof (elt));
  658. num_re = rspamd_regexp_cache_create (NULL,
  659. "/^(?:[+-]?([0-9]*[.])?[0-9]+)(?:\\s+|[)]|$)/", NULL, NULL);
  660. p = line;
  661. c = line;
  662. end = line + len;
  663. e = g_malloc0 (sizeof (*e));
  664. e->expressions = g_array_new (FALSE, FALSE,
  665. sizeof (struct rspamd_expression_elt));
  666. operand_stack = g_ptr_array_sized_new (32);
  667. e->ast = NULL;
  668. e->expression_stack = g_ptr_array_sized_new (32);
  669. e->subr = subr;
  670. e->evals = 0;
  671. e->next_resort = ottery_rand_range (MAX_RESORT_EVALS) + MIN_RESORT_EVALS;
  672. e->log_id = g_malloc0 (RSPAMD_LOG_ID_LEN + 1);
  673. guint64 h = rspamd_cryptobox_fast_hash (line, len, 0xdeadbabe);
  674. rspamd_snprintf (e->log_id, RSPAMD_LOG_ID_LEN + 1, "%xL", h);
  675. msg_debug_expression ("start to parse expression '%*s'", (int)len, line);
  676. /* Shunting-yard algorithm */
  677. while (p < end) {
  678. switch (state) {
  679. case PARSE_ATOM:
  680. if (g_ascii_isspace (*p)) {
  681. state = SKIP_SPACES;
  682. continue;
  683. }
  684. else if (rspamd_expr_is_operation_symbol (*p)) {
  685. /* Lookahead */
  686. if (p + 1 < end) {
  687. gchar t = *(p + 1);
  688. if (t == ':') {
  689. /* Special case, treat it as an atom */
  690. }
  691. else if (*p == '/') {
  692. /* Lookahead for division operation to distinguish from regexp */
  693. const gchar *track = p + 1;
  694. /* Skip spaces */
  695. while (track < end && g_ascii_isspace (*track)) {
  696. track++;
  697. }
  698. /* Check for a number */
  699. if (rspamd_regexp_search (num_re,
  700. track,
  701. end - track,
  702. NULL,
  703. NULL,
  704. FALSE,
  705. NULL)) {
  706. state = PARSE_OP;
  707. msg_debug_expression ("found divide operation");
  708. continue;
  709. }
  710. msg_debug_expression ("false divide operation");
  711. /* Fallback to PARSE_ATOM state */
  712. }
  713. else if (*p == '-') {
  714. /* - is used in composites, so we need to distinguish - from
  715. * 1) unary minus of a limit!
  716. * 2) -BLAH in composites
  717. * Decision is simple: require a space after binary `-` op
  718. */
  719. if (g_ascii_isspace (t)) {
  720. state = PARSE_OP;
  721. continue;
  722. }
  723. /* Fallback to PARSE_ATOM state */
  724. msg_debug_expression ("false minus operation");
  725. }
  726. else {
  727. /* Generic operation */
  728. state = PARSE_OP;
  729. continue;
  730. }
  731. }
  732. else {
  733. state = PARSE_OP;
  734. continue;
  735. }
  736. }
  737. /*
  738. * First of all, we check some pre-conditions:
  739. * 1) if we have 'and ' or 'or ' or 'not ' strings, they are op
  740. * 2) if we have full numeric string, then we check for
  741. * the following expression:
  742. * ^\d+\s*[><]$
  743. * and check the operation on stack
  744. */
  745. if ((gulong)(end - p) > sizeof ("and ") &&
  746. (g_ascii_strncasecmp (p, "and ", sizeof ("and ") - 1) == 0 ||
  747. g_ascii_strncasecmp (p, "not ", sizeof ("not ") - 1) == 0 )) {
  748. state = PARSE_OP;
  749. }
  750. else if ((gulong)(end - p) > sizeof ("or ") &&
  751. g_ascii_strncasecmp (p, "or ", sizeof ("or ") - 1) == 0) {
  752. state = PARSE_OP;
  753. }
  754. else {
  755. /*
  756. * If we have any comparison or arithmetic operator in the stack, then try
  757. * to parse limit
  758. */
  759. op = GPOINTER_TO_INT (rspamd_expr_stack_peek (e));
  760. if (op == OP_MULT || op == OP_MINUS || op == OP_DIVIDE ||
  761. op == OP_PLUS || (op >= OP_LT && op <= OP_GE)) {
  762. if (rspamd_regexp_search (num_re,
  763. p,
  764. end - p,
  765. NULL,
  766. NULL,
  767. FALSE,
  768. NULL)) {
  769. c = p;
  770. state = PARSE_LIM;
  771. continue;
  772. }
  773. /* Fallback to atom parsing */
  774. }
  775. /* Try to parse atom */
  776. atom = subr->parse (p, end - p, pool, subr_data, err);
  777. if (atom == NULL || atom->len == 0) {
  778. /* We couldn't parse the atom, so go out */
  779. if (err != NULL && *err == NULL) {
  780. g_set_error (err,
  781. rspamd_expr_quark (),
  782. 500,
  783. "Cannot parse atom: callback function failed"
  784. " to parse '%.*s'",
  785. (int) (end - p),
  786. p);
  787. }
  788. goto error_label;
  789. }
  790. if (atom->str == NULL) {
  791. atom->str = p;
  792. }
  793. p = p + atom->len;
  794. /* Push to output */
  795. elt.type = ELT_ATOM;
  796. elt.p.atom = atom;
  797. g_array_append_val (e->expressions, elt);
  798. rspamd_expr_stack_elt_push (operand_stack,
  799. g_node_new (rspamd_expr_dup_elt (pool, &elt)));
  800. msg_debug_expression ("found atom: %*s; pushed onto operand stack (%d size)",
  801. (int)atom->len, atom->str, operand_stack->len);
  802. }
  803. break;
  804. case PARSE_LIM:
  805. if ((g_ascii_isdigit (*p) || *p == '-' || *p == '.')
  806. && p < end - 1) {
  807. p ++;
  808. }
  809. else {
  810. if (p == end - 1 && g_ascii_isdigit (*p)) {
  811. p ++;
  812. }
  813. if (p - c > 0) {
  814. elt.type = ELT_LIMIT;
  815. elt.p.lim = strtod (c, NULL);
  816. g_array_append_val (e->expressions, elt);
  817. rspamd_expr_stack_elt_push (operand_stack,
  818. g_node_new (rspamd_expr_dup_elt (pool, &elt)));
  819. msg_debug_expression ("found limit: %.1f; pushed onto operand stack (%d size)",
  820. elt.p.lim, operand_stack->len);
  821. c = p;
  822. state = SKIP_SPACES;
  823. }
  824. else {
  825. g_set_error (err, rspamd_expr_quark(), 400, "Empty number");
  826. goto error_label;
  827. }
  828. }
  829. break;
  830. case PARSE_OP:
  831. op = rspamd_expr_str_to_op (p, end, &p);
  832. if (op == OP_INVALID) {
  833. g_set_error (err, rspamd_expr_quark(), 500, "Bad operator %c",
  834. *p);
  835. goto error_label;
  836. }
  837. else if (op == OP_OBRACE) {
  838. /*
  839. * If the token is a left parenthesis, then push it onto
  840. * the stack.
  841. */
  842. rspamd_expr_stack_push (e, GINT_TO_POINTER (op));
  843. msg_debug_expression ("found obrace, pushed to operators stack (%d size)",
  844. e->expression_stack->len);
  845. }
  846. else if (op == OP_CBRACE) {
  847. /*
  848. * Until the token at the top of the stack is a left
  849. * parenthesis, pop operators off the stack onto the
  850. * output queue.
  851. *
  852. * Pop the left parenthesis from the stack,
  853. * but not onto the output queue.
  854. *
  855. * If the stack runs out without finding a left parenthesis,
  856. * then there are mismatched parentheses.
  857. */
  858. msg_debug_expression ("found cbrace, rewind operators stack (%d size)",
  859. e->expression_stack->len);
  860. do {
  861. op = GPOINTER_TO_INT (rspamd_expr_stack_pop (e));
  862. if (op == OP_INVALID) {
  863. g_set_error (err, rspamd_expr_quark(), 600,
  864. "Braces mismatch");
  865. goto error_label;
  866. }
  867. guint op_priority = rspamd_expr_logic_priority (op);
  868. msg_debug_expression ("found op: %s; priority = %d",
  869. rspamd_expr_op_to_str (op), op_priority);
  870. if (op != OP_OBRACE) {
  871. elt.type = ELT_OP;
  872. elt.p.op.op = op;
  873. elt.p.op.op_flags = rspamd_expr_op_flags (op);
  874. elt.p.op.logical_priority = op_priority;
  875. g_array_append_val (e->expressions, elt);
  876. if (!rspamd_ast_add_node (e, operand_stack,
  877. rspamd_expr_dup_elt (pool, &elt), err)) {
  878. goto error_label;
  879. }
  880. }
  881. } while (op != OP_OBRACE);
  882. }
  883. else {
  884. /*
  885. * While there is an operator token, o2, at the top of
  886. * the operator stack, and either:
  887. *
  888. * - o1 is left-associative and its precedence is less than
  889. * or equal to that of o2, or
  890. * - o1 is right associative, and has precedence less than
  891. * that of o2,
  892. *
  893. * then pop o2 off the operator stack, onto the output queue;
  894. *
  895. * push o1 onto the operator stack.
  896. */
  897. for (;;) {
  898. op_stack = GPOINTER_TO_INT (rspamd_expr_stack_pop (e));
  899. if (op_stack == OP_INVALID) {
  900. /* Stack is empty */
  901. msg_debug_expression ("no operations in operators stack");
  902. break;
  903. }
  904. /* We ignore associativity for now */
  905. guint op_priority = rspamd_expr_logic_priority (op),
  906. stack_op_priority = rspamd_expr_logic_priority (op_stack);
  907. msg_debug_expression ("operators stack %d; operands stack: %d; "
  908. "process operation '%s'(%d); pop operation '%s'(%d)",
  909. e->expression_stack->len,
  910. operand_stack->len,
  911. rspamd_expr_op_to_str (op), op_priority,
  912. rspamd_expr_op_to_str (op_stack), stack_op_priority);
  913. if (op_stack != OP_OBRACE &&
  914. op_priority < stack_op_priority) {
  915. elt.type = ELT_OP;
  916. elt.p.op.op = op_stack;
  917. elt.p.op.op_flags = rspamd_expr_op_flags (op_stack);
  918. elt.p.op.logical_priority = op_priority;
  919. g_array_append_val (e->expressions, elt);
  920. if (!rspamd_ast_add_node (e, operand_stack,
  921. rspamd_expr_dup_elt (pool, &elt), err)) {
  922. goto error_label;
  923. }
  924. }
  925. else {
  926. /* Push op_stack back */
  927. msg_debug_expression ("operators stack %d; operands stack: %d; "
  928. "process operation '%s'(%d); push back to stack '%s'(%d)",
  929. e->expression_stack->len,
  930. operand_stack->len,
  931. rspamd_expr_op_to_str (op), op_priority,
  932. rspamd_expr_op_to_str (op_stack), stack_op_priority);
  933. rspamd_expr_stack_push (e, GINT_TO_POINTER (op_stack));
  934. break;
  935. }
  936. }
  937. /* Push new operator itself */
  938. msg_debug_expression ("operators stack %d; operands stack: %d; "
  939. "process operation '%s'; push to stack",
  940. e->expression_stack->len,
  941. operand_stack->len,
  942. rspamd_expr_op_to_str (op));
  943. rspamd_expr_stack_push (e, GINT_TO_POINTER (op));
  944. }
  945. state = SKIP_SPACES;
  946. break;
  947. case SKIP_SPACES:
  948. if (g_ascii_isspace (*p)) {
  949. p ++;
  950. }
  951. else if (rspamd_expr_is_operation_symbol (*p)) {
  952. state = PARSE_OP;
  953. }
  954. else {
  955. state = PARSE_ATOM;
  956. }
  957. break;
  958. }
  959. }
  960. /* Now we process the stack and push operators to the output */
  961. while ((op_stack = GPOINTER_TO_INT (rspamd_expr_stack_pop (e)))
  962. != OP_INVALID) {
  963. msg_debug_expression ("operators stack %d; operands stack: %d; "
  964. "rewind stack; op: %s",
  965. e->expression_stack->len,
  966. operand_stack->len,
  967. rspamd_expr_op_to_str (op));
  968. if (op_stack != OP_OBRACE) {
  969. elt.type = ELT_OP;
  970. elt.p.op.op = op_stack;
  971. elt.p.op.op_flags = rspamd_expr_op_flags (op_stack);
  972. elt.p.op.logical_priority = rspamd_expr_logic_priority (op_stack);
  973. g_array_append_val (e->expressions, elt);
  974. if (!rspamd_ast_add_node (e, operand_stack,
  975. rspamd_expr_dup_elt (pool, &elt), err)) {
  976. goto error_label;
  977. }
  978. }
  979. else {
  980. g_set_error (err, rspamd_expr_quark(), 600,
  981. "Braces mismatch");
  982. goto error_label;
  983. }
  984. }
  985. if (operand_stack->len != 1) {
  986. g_set_error (err, rspamd_expr_quark(), 601,
  987. "Operators mismatch: %d elts in stack", operand_stack->len);
  988. goto error_label;
  989. }
  990. e->ast = rspamd_expr_stack_elt_pop (operand_stack);
  991. g_ptr_array_free (operand_stack, TRUE);
  992. /* Set priorities for branches */
  993. g_node_traverse (e->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  994. rspamd_ast_priority_traverse, e);
  995. /* Now set less expensive branches to be evaluated first */
  996. g_node_traverse (e->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1,
  997. rspamd_ast_resort_traverse, NULL);
  998. if (target) {
  999. *target = e;
  1000. rspamd_mempool_add_destructor (pool,
  1001. (rspamd_mempool_destruct_t)rspamd_expression_destroy, e);
  1002. }
  1003. else {
  1004. rspamd_expression_destroy (e);
  1005. }
  1006. return TRUE;
  1007. error_label:
  1008. msg_debug_expression ("fatal error: %e", *err);
  1009. while ((tmp = rspamd_expr_stack_elt_pop (operand_stack)) != NULL) {
  1010. g_node_destroy (tmp);
  1011. }
  1012. g_ptr_array_free (operand_stack, TRUE);
  1013. rspamd_expression_destroy (e);
  1014. return FALSE;
  1015. }
  1016. /*
  1017. * Node optimizer function: skip nodes that are not relevant
  1018. */
  1019. static gboolean
  1020. rspamd_ast_node_done (struct rspamd_expression_elt *elt, gdouble acc)
  1021. {
  1022. gboolean ret = FALSE;
  1023. g_assert (elt->type == ELT_OP);
  1024. switch (elt->p.op.op) {
  1025. case OP_NOT:
  1026. ret = TRUE;
  1027. break;
  1028. case OP_AND:
  1029. ret = acc == 0;
  1030. break;
  1031. case OP_OR:
  1032. ret = acc != 0;
  1033. break;
  1034. default:
  1035. break;
  1036. }
  1037. return ret;
  1038. }
  1039. static gdouble
  1040. rspamd_ast_do_unary_op (struct rspamd_expression_elt *elt, gdouble operand)
  1041. {
  1042. gdouble ret;
  1043. g_assert (elt->type == ELT_OP);
  1044. switch (elt->p.op.op) {
  1045. case OP_NOT:
  1046. ret = fabs (operand) > DOUBLE_EPSILON ? 0.0 : 1.0;
  1047. break;
  1048. default:
  1049. g_assert_not_reached ();
  1050. }
  1051. return ret;
  1052. }
  1053. static gdouble
  1054. rspamd_ast_do_binary_op (struct rspamd_expression_elt *elt, gdouble op1, gdouble op2)
  1055. {
  1056. gdouble ret;
  1057. g_assert (elt->type == ELT_OP);
  1058. switch (elt->p.op.op) {
  1059. case OP_MINUS:
  1060. ret = op1 - op2;
  1061. break;
  1062. case OP_DIVIDE:
  1063. ret = op1 / op2;
  1064. break;
  1065. case OP_GE:
  1066. ret = op1 >= op2;
  1067. break;
  1068. case OP_GT:
  1069. ret = op1 > op2;
  1070. break;
  1071. case OP_LE:
  1072. ret = op1 <= op2;
  1073. break;
  1074. case OP_LT:
  1075. ret = op1 < op2;
  1076. break;
  1077. case OP_NOT:
  1078. case OP_PLUS:
  1079. case OP_MULT:
  1080. case OP_AND:
  1081. case OP_OR:
  1082. default:
  1083. g_assert_not_reached();
  1084. break;
  1085. }
  1086. return ret;
  1087. }
  1088. static gdouble
  1089. rspamd_ast_do_nary_op (struct rspamd_expression_elt *elt, gdouble val, gdouble acc)
  1090. {
  1091. gdouble ret;
  1092. g_assert (elt->type == ELT_OP);
  1093. if (isnan (acc)) {
  1094. return val;
  1095. }
  1096. switch (elt->p.op.op) {
  1097. case OP_PLUS:
  1098. ret = acc + val;
  1099. break;
  1100. case OP_MULT:
  1101. ret = acc * val;
  1102. break;
  1103. case OP_AND:
  1104. ret = (acc * val);
  1105. break;
  1106. case OP_OR:
  1107. ret = (acc + val);
  1108. break;
  1109. default:
  1110. case OP_NOT:
  1111. case OP_MINUS:
  1112. case OP_DIVIDE:
  1113. case OP_GE:
  1114. case OP_GT:
  1115. case OP_LE:
  1116. case OP_LT:
  1117. g_assert_not_reached();
  1118. break;
  1119. }
  1120. return ret;
  1121. }
  1122. static gdouble
  1123. rspamd_ast_process_node (struct rspamd_expression *e, GNode *node,
  1124. struct rspamd_expr_process_data *process_data)
  1125. {
  1126. struct rspamd_expression_elt *elt;
  1127. GNode *cld;
  1128. gdouble acc = NAN;
  1129. gdouble t1, t2, val;
  1130. gboolean calc_ticks = FALSE;
  1131. const gchar *op_name = NULL;
  1132. elt = node->data;
  1133. switch (elt->type) {
  1134. case ELT_ATOM:
  1135. if (!(elt->flags & RSPAMD_EXPR_FLAG_PROCESSED)) {
  1136. /*
  1137. * Sometimes get ticks for this expression. 'Sometimes' here means
  1138. * that we get lowest 5 bits of the counter `evals` and 5 bits
  1139. * of some shifted address to provide some sort of jittering for
  1140. * ticks evaluation
  1141. */
  1142. if ((e->evals & 0x1F) == (GPOINTER_TO_UINT (node) >> 4 & 0x1F)) {
  1143. calc_ticks = TRUE;
  1144. t1 = rspamd_get_ticks (TRUE);
  1145. }
  1146. elt->value = process_data->process_closure (process_data->ud, elt->p.atom);
  1147. if (fabs (elt->value) > 1e-9) {
  1148. elt->p.atom->hits ++;
  1149. if (process_data->trace) {
  1150. g_ptr_array_add (process_data->trace, elt->p.atom);
  1151. }
  1152. }
  1153. if (calc_ticks) {
  1154. t2 = rspamd_get_ticks (TRUE);
  1155. elt->p.atom->avg_ticks += ((t2 - t1) - elt->p.atom->avg_ticks) /
  1156. (e->evals);
  1157. }
  1158. elt->flags |= RSPAMD_EXPR_FLAG_PROCESSED;
  1159. }
  1160. acc = elt->value;
  1161. msg_debug_expression ("atom: elt=%s; acc=%.1f", elt->p.atom->str, acc);
  1162. break;
  1163. case ELT_LIMIT:
  1164. acc = elt->p.lim;
  1165. msg_debug_expression ("limit: lim=%.1f; acc=%.1f;", elt->p.lim, acc);
  1166. break;
  1167. case ELT_OP:
  1168. g_assert (node->children != NULL);
  1169. op_name = rspamd_expr_op_to_str (elt->p.op.op);
  1170. if (elt->p.op.op_flags & RSPAMD_EXPRESSION_NARY) {
  1171. msg_debug_expression ("proceed nary operation %s", op_name);
  1172. /* Proceed all ops in chain */
  1173. DL_FOREACH (node->children, cld) {
  1174. val = rspamd_ast_process_node (e, cld, process_data);
  1175. msg_debug_expression ("before op: op=%s; acc=%.1f; val = %.2f", op_name,
  1176. acc, val);
  1177. acc = rspamd_ast_do_nary_op (elt, val, acc);
  1178. msg_debug_expression ("after op: op=%s; acc=%.1f; val = %.2f", op_name,
  1179. acc, val);
  1180. /* Check if we need to process further */
  1181. if (!(process_data->flags & RSPAMD_EXPRESSION_FLAG_NOOPT)) {
  1182. if (rspamd_ast_node_done (elt, acc)) {
  1183. msg_debug_expression ("optimizer: done");
  1184. return acc;
  1185. }
  1186. }
  1187. }
  1188. }
  1189. else if (elt->p.op.op_flags & RSPAMD_EXPRESSION_BINARY) {
  1190. GNode *c1 = node->children, *c2;
  1191. c2 = c1->next;
  1192. g_assert (c2->next == NULL);
  1193. gdouble val1, val2;
  1194. msg_debug_expression ("proceed binary operation %s",
  1195. op_name);
  1196. val1 = rspamd_ast_process_node (e, c1, process_data);
  1197. val2 = rspamd_ast_process_node (e, c2, process_data);
  1198. msg_debug_expression ("before op: op=%s; op1 = %.1f, op2 = %.1f",
  1199. op_name, val1, val2);
  1200. acc = rspamd_ast_do_binary_op (elt, val1, val2);
  1201. msg_debug_expression ("after op: op=%s; res=%.1f",
  1202. op_name, acc);
  1203. }
  1204. else if (elt->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) {
  1205. GNode *c1 = node->children;
  1206. g_assert (c1->next == NULL);
  1207. msg_debug_expression ("proceed unary operation %s",
  1208. op_name);
  1209. val = rspamd_ast_process_node (e, c1, process_data);
  1210. msg_debug_expression ("before op: op=%s; op1 = %.1f",
  1211. op_name, val);
  1212. acc = rspamd_ast_do_unary_op (elt, val);
  1213. msg_debug_expression ("after op: op=%s; res=%.1f",
  1214. op_name, acc);
  1215. }
  1216. break;
  1217. }
  1218. return acc;
  1219. }
  1220. static gboolean
  1221. rspamd_ast_cleanup_traverse (GNode *n, gpointer d)
  1222. {
  1223. struct rspamd_expression_elt *elt = n->data;
  1224. elt->value = 0;
  1225. elt->flags = 0;
  1226. return FALSE;
  1227. }
  1228. gdouble
  1229. rspamd_process_expression_closure (struct rspamd_expression *expr,
  1230. rspamd_expression_process_cb cb,
  1231. gint flags,
  1232. gpointer runtime_ud,
  1233. GPtrArray **track)
  1234. {
  1235. struct rspamd_expr_process_data pd;
  1236. gdouble ret = 0;
  1237. g_assert (expr != NULL);
  1238. /* Ensure that stack is empty at this point */
  1239. g_assert (expr->expression_stack->len == 0);
  1240. expr->evals ++;
  1241. memset (&pd, 0, sizeof (pd));
  1242. pd.process_closure = cb;
  1243. pd.flags = flags;
  1244. pd.ud = runtime_ud;
  1245. if (track) {
  1246. pd.trace = g_ptr_array_sized_new (32);
  1247. *track = pd.trace;
  1248. }
  1249. ret = rspamd_ast_process_node (expr, expr->ast, &pd);
  1250. /* Cleanup */
  1251. g_node_traverse (expr->ast, G_IN_ORDER, G_TRAVERSE_ALL, -1,
  1252. rspamd_ast_cleanup_traverse, NULL);
  1253. /* Check if we need to resort */
  1254. if (expr->evals % expr->next_resort == 0) {
  1255. expr->next_resort = ottery_rand_range (MAX_RESORT_EVALS) +
  1256. MIN_RESORT_EVALS;
  1257. /* Set priorities for branches */
  1258. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1259. rspamd_ast_priority_traverse, expr);
  1260. /* Now set less expensive branches to be evaluated first */
  1261. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1,
  1262. rspamd_ast_resort_traverse, NULL);
  1263. }
  1264. return ret;
  1265. }
  1266. gdouble
  1267. rspamd_process_expression_track (struct rspamd_expression *expr,
  1268. gint flags,
  1269. gpointer runtime_ud,
  1270. GPtrArray **track)
  1271. {
  1272. return rspamd_process_expression_closure (expr,
  1273. expr->subr->process, flags, runtime_ud, track);
  1274. }
  1275. gdouble
  1276. rspamd_process_expression (struct rspamd_expression *expr,
  1277. gint flags,
  1278. gpointer runtime_ud)
  1279. {
  1280. return rspamd_process_expression_closure (expr,
  1281. expr->subr->process, flags, runtime_ud, NULL);
  1282. }
  1283. static gboolean
  1284. rspamd_ast_string_traverse (GNode *n, gpointer d)
  1285. {
  1286. GString *res = d;
  1287. gint cnt;
  1288. GNode *cur;
  1289. struct rspamd_expression_elt *elt = n->data;
  1290. const char *op_str = NULL;
  1291. if (elt->type == ELT_ATOM) {
  1292. rspamd_printf_gstring (res, "(%*s)",
  1293. (int)elt->p.atom->len, elt->p.atom->str);
  1294. }
  1295. else if (elt->type == ELT_LIMIT) {
  1296. if (elt->p.lim == (double)(gint64)elt->p.lim) {
  1297. rspamd_printf_gstring (res, "%L", (gint64)elt->p.lim);
  1298. }
  1299. else {
  1300. rspamd_printf_gstring (res, "%f", elt->p.lim);
  1301. }
  1302. }
  1303. else {
  1304. op_str = rspamd_expr_op_to_str (elt->p.op.op);
  1305. g_string_append (res, op_str);
  1306. if (n->children) {
  1307. LL_COUNT(n->children, cur, cnt);
  1308. if (cnt > 2) {
  1309. /* Print n-ary of the operator */
  1310. g_string_append_printf (res, "(%d)", cnt);
  1311. }
  1312. }
  1313. }
  1314. g_string_append_c (res, ' ');
  1315. return FALSE;
  1316. }
  1317. GString *
  1318. rspamd_expression_tostring (struct rspamd_expression *expr)
  1319. {
  1320. GString *res;
  1321. g_assert (expr != NULL);
  1322. res = g_string_new (NULL);
  1323. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1324. rspamd_ast_string_traverse, res);
  1325. /* Last space */
  1326. if (res->len > 0) {
  1327. g_string_erase (res, res->len - 1, 1);
  1328. }
  1329. return res;
  1330. }
  1331. struct atom_foreach_cbdata {
  1332. rspamd_expression_atom_foreach_cb cb;
  1333. gpointer cbdata;
  1334. };
  1335. static gboolean
  1336. rspamd_ast_atom_traverse (GNode *n, gpointer d)
  1337. {
  1338. struct atom_foreach_cbdata *data = d;
  1339. struct rspamd_expression_elt *elt = n->data;
  1340. rspamd_ftok_t tok;
  1341. if (elt->type == ELT_ATOM) {
  1342. tok.begin = elt->p.atom->str;
  1343. tok.len = elt->p.atom->len;
  1344. data->cb (&tok, data->cbdata);
  1345. }
  1346. return FALSE;
  1347. }
  1348. void
  1349. rspamd_expression_atom_foreach (struct rspamd_expression *expr,
  1350. rspamd_expression_atom_foreach_cb cb, gpointer cbdata)
  1351. {
  1352. struct atom_foreach_cbdata data;
  1353. g_assert (expr != NULL);
  1354. data.cb = cb;
  1355. data.cbdata = cbdata;
  1356. g_node_traverse (expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
  1357. rspamd_ast_atom_traverse, &data);
  1358. }
  1359. gboolean
  1360. rspamd_expression_node_is_op (GNode *node, enum rspamd_expression_op op)
  1361. {
  1362. struct rspamd_expression_elt *elt;
  1363. g_assert (node != NULL);
  1364. elt = node->data;
  1365. if (elt->type == ELT_OP && elt->p.op.op == op) {
  1366. return TRUE;
  1367. }
  1368. return FALSE;
  1369. }