You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rspamd_symcache.c 93KB


  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "util.h"
  18. #include "rspamd.h"
  19. #include "message.h"
  20. #include "rspamd_symcache.h"
  21. #include "cfg_file.h"
  22. #include "lua/lua_common.h"
  23. #include "unix-std.h"
  24. #include "contrib/t1ha/t1ha.h"
  25. #include "libserver/worker_util.h"
  26. #include "khash.h"
  27. #include "utlist.h"
  28. #include <math.h>
  29. #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
  30. # include <stdalign.h>
  31. #endif
  32. #define msg_err_cache(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
  33. cache->static_pool->tag.tagname, cache->cfg->checksum, \
  34. G_STRFUNC, \
  35. __VA_ARGS__)
  36. #define msg_warn_cache(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
  37. cache->static_pool->tag.tagname, cache->cfg->checksum, \
  38. G_STRFUNC, \
  39. __VA_ARGS__)
  40. #define msg_info_cache(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \
  41. cache->static_pool->tag.tagname, cache->cfg->checksum, \
  42. G_STRFUNC, \
  43. __VA_ARGS__)
  44. #define msg_debug_cache(...) rspamd_conditional_debug_fast (NULL, NULL, \
  45. rspamd_symcache_log_id, "symcache", cache->cfg->checksum, \
  46. G_STRFUNC, \
  47. __VA_ARGS__)
  48. #define msg_debug_cache_task(...) rspamd_conditional_debug_fast (NULL, NULL, \
  49. rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \
  50. G_STRFUNC, \
  51. __VA_ARGS__)
  52. INIT_LOG_MODULE(symcache)
  53. #define CHECK_START_BIT(checkpoint, dyn_item) \
  54. ((dyn_item)->started)
  55. #define SET_START_BIT(checkpoint, dyn_item) \
  56. (dyn_item)->started = 1
  57. #define CLR_START_BIT(checkpoint, dyn_item) \
  58. (dyn_item)->started = 0
  59. #define CHECK_FINISH_BIT(checkpoint, dyn_item) \
  60. ((dyn_item)->finished)
  61. #define SET_FINISH_BIT(checkpoint, dyn_item) \
  62. (dyn_item)->finished = 1
  63. #define CLR_FINISH_BIT(checkpoint, dyn_item) \
  64. (dyn_item)->finished = 0
  65. static const guchar rspamd_symcache_magic[8] = {'r', 's', 'c', 2, 0, 0, 0, 0 };
  66. struct rspamd_symcache_header {
  67. guchar magic[8];
  68. guint nitems;
  69. guchar checksum[64];
  70. guchar unused[128];
  71. };
  72. struct symcache_order {
  73. GPtrArray *d;
  74. guint id;
  75. ref_entry_t ref;
  76. };
  77. /*
  78. * This structure is optimised to store ids list:
  79. * - If the first element is -1 then use dynamic part, else use static part
  80. */
  81. struct rspamd_symcache_id_list {
  82. union {
  83. guint32 st[4];
  84. struct {
  85. guint32 e; /* First element */
  86. guint16 len;
  87. guint16 allocated;
  88. guint *n;
  89. } dyn;
  90. };
  91. };
  92. struct rspamd_symcache_condition {
  93. gint cb;
  94. struct rspamd_symcache_condition *prev, *next;
  95. };
  96. struct rspamd_symcache_item {
  97. /* This block is likely shared */
  98. struct rspamd_symcache_item_stat *st;
  99. guint64 last_count;
  100. struct rspamd_counter_data *cd;
  101. gchar *symbol;
  102. const gchar *type_descr;
  103. gint type;
  104. /* Callback data */
  105. union {
  106. struct {
  107. symbol_func_t func;
  108. gpointer user_data;
  109. struct rspamd_symcache_condition *conditions;
  110. } normal;
  111. struct {
  112. gint parent;
  113. struct rspamd_symcache_item *parent_item;
  114. } virtual;
  115. } specific;
  116. /* Condition of execution */
  117. gboolean enabled;
  118. /* Used for async stuff checks */
  119. gboolean is_filter;
  120. gboolean is_virtual;
  121. /* Priority */
  122. gint priority;
  123. /* Topological order */
  124. guint order;
  125. gint id;
  126. gint frequency_peaks;
  127. /* Settings ids */
  128. struct rspamd_symcache_id_list allowed_ids;
  129. /* Allows execution but not symbols insertion */
  130. struct rspamd_symcache_id_list exec_only_ids;
  131. struct rspamd_symcache_id_list forbidden_ids;
  132. /* Dependencies */
  133. GPtrArray *deps;
  134. GPtrArray *rdeps;
  135. /* Container */
  136. GPtrArray *container;
  137. };
  138. struct rspamd_symcache {
  139. /* Hash table for fast access */
  140. GHashTable *items_by_symbol;
  141. GPtrArray *items_by_id;
  142. struct symcache_order *items_by_order;
  143. GPtrArray *connfilters;
  144. GPtrArray *prefilters;
  145. GPtrArray *filters;
  146. GPtrArray *postfilters;
  147. GPtrArray *composites;
  148. GPtrArray *idempotent;
  149. GPtrArray *virtual;
  150. GList *delayed_deps;
  151. GList *delayed_conditions;
  152. rspamd_mempool_t *static_pool;
  153. guint64 cksum;
  154. gdouble total_weight;
  155. guint used_items;
  156. guint stats_symbols_count;
  157. guint64 total_hits;
  158. guint id;
  159. struct rspamd_config *cfg;
  160. gdouble reload_time;
  161. gdouble last_profile;
  162. gint peak_cb;
  163. };
  164. struct rspamd_symcache_dynamic_item {
  165. guint16 start_msec; /* Relative to task time */
  166. unsigned started:1;
  167. unsigned finished:1;
  168. /* unsigned pad:14; */
  169. guint32 async_events;
  170. };
  171. struct cache_dependency {
  172. struct rspamd_symcache_item *item; /* Real dependency */
  173. gchar *sym; /* Symbolic dep name */
  174. gint id; /* Real from */
  175. gint vid; /* Virtual from */
  176. };
  177. struct delayed_cache_dependency {
  178. gchar *from;
  179. gchar *to;
  180. };
  181. struct delayed_cache_condition {
  182. gchar *sym;
  183. gint cbref;
  184. lua_State *L;
  185. };
  186. struct cache_savepoint {
  187. guint version;
  188. guint items_inflight;
  189. gboolean profile;
  190. gboolean has_slow;
  191. gdouble profile_start;
  192. struct rspamd_scan_result *rs;
  193. gdouble lim;
  194. struct rspamd_symcache_item *cur_item;
  195. struct symcache_order *order;
  196. struct rspamd_symcache_dynamic_item dynamic_items[];
  197. };
  198. struct rspamd_cache_refresh_cbdata {
  199. gdouble last_resort;
  200. ev_timer resort_ev;
  201. struct rspamd_symcache *cache;
  202. struct rspamd_worker *w;
  203. struct ev_loop *event_loop;
  204. };
  205. /* At least once per minute */
  206. #define PROFILE_MAX_TIME (60.0)
  207. /* For messages larger than 2Mb enable profiling */
  208. #define PROFILE_MESSAGE_SIZE_THRESHOLD (1024 * 1024 * 2)
  209. /* Enable profile at least once per this amount of messages processed */
  210. #define PROFILE_PROBABILITY (0.01)
  211. /* weight, frequency, time */
  212. #define TIME_ALPHA (1.0)
  213. #define WEIGHT_ALPHA (0.1)
  214. #define FREQ_ALPHA (0.01)
  215. #define SCORE_FUN(w, f, t) (((w) > 0 ? (w) : WEIGHT_ALPHA) \
  216. * ((f) > 0 ? (f) : FREQ_ALPHA) \
  217. / (t > TIME_ALPHA ? t : TIME_ALPHA))
  218. static gboolean rspamd_symcache_check_symbol (struct rspamd_task *task,
  219. struct rspamd_symcache *cache,
  220. struct rspamd_symcache_item *item,
  221. struct cache_savepoint *checkpoint);
  222. static gboolean rspamd_symcache_check_deps (struct rspamd_task *task,
  223. struct rspamd_symcache *cache,
  224. struct rspamd_symcache_item *item,
  225. struct cache_savepoint *checkpoint,
  226. guint recursion,
  227. gboolean check_only);
  228. static void rspamd_symcache_disable_symbol_checkpoint (struct rspamd_task *task,
  229. struct rspamd_symcache *cache, const gchar *symbol);
  230. static void rspamd_symcache_enable_symbol_checkpoint (struct rspamd_task *task,
  231. struct rspamd_symcache *cache, const gchar *symbol);
  232. static void
  233. rspamd_symcache_order_dtor (gpointer p)
  234. {
  235. struct symcache_order *ord = p;
  236. g_ptr_array_free (ord->d, TRUE);
  237. g_free (ord);
  238. }
  239. static void
  240. rspamd_symcache_order_unref (gpointer p)
  241. {
  242. struct symcache_order *ord = p;
  243. REF_RELEASE (ord);
  244. }
  245. static gint
  246. rspamd_id_cmp (const void * a, const void * b)
  247. {
  248. return (*(guint32*)a - *(guint32*)b);
  249. }
  250. static struct symcache_order *
  251. rspamd_symcache_order_new (struct rspamd_symcache *cache,
  252. gsize nelts)
  253. {
  254. struct symcache_order *ord;
  255. ord = g_malloc0 (sizeof (*ord));
  256. ord->d = g_ptr_array_sized_new (nelts);
  257. ord->id = cache->id;
  258. REF_INIT_RETAIN (ord, rspamd_symcache_order_dtor);
  259. return ord;
  260. }
  261. static inline struct rspamd_symcache_dynamic_item*
  262. rspamd_symcache_get_dynamic (struct cache_savepoint *checkpoint,
  263. struct rspamd_symcache_item *item)
  264. {
  265. return &checkpoint->dynamic_items[item->id];
  266. }
  267. static inline struct rspamd_symcache_item *
  268. rspamd_symcache_find_filter (struct rspamd_symcache *cache,
  269. const gchar *name,
  270. bool resolve_parent)
  271. {
  272. struct rspamd_symcache_item *item;
  273. g_assert (cache != NULL);
  274. if (name == NULL) {
  275. return NULL;
  276. }
  277. item = g_hash_table_lookup (cache->items_by_symbol, name);
  278. if (item != NULL) {
  279. if (resolve_parent && item->is_virtual && !(item->type & SYMBOL_TYPE_GHOST)) {
  280. item =item->specific.virtual.parent_item;
  281. }
  282. return item;
  283. }
  284. return NULL;
  285. }
  286. const gchar *
  287. rspamd_symcache_get_parent (struct rspamd_symcache *cache,
  288. const gchar *symbol)
  289. {
  290. struct rspamd_symcache_item *item, *parent;
  291. g_assert (cache != NULL);
  292. if (symbol == NULL) {
  293. return NULL;
  294. }
  295. item = g_hash_table_lookup (cache->items_by_symbol, symbol);
  296. if (item != NULL) {
  297. if (item->is_virtual && !(item->type & SYMBOL_TYPE_GHOST)) {
  298. parent = item->specific.virtual.parent_item;
  299. if (!parent) {
  300. item->specific.virtual.parent_item = g_ptr_array_index (cache->items_by_id,
  301. item->specific.virtual.parent);
  302. parent = item->specific.virtual.parent_item;
  303. }
  304. item = parent;
  305. }
  306. return item->symbol;
  307. }
  308. return NULL;
  309. }
  310. static gint
  311. postfilters_cmp (const void *p1, const void *p2, gpointer ud)
  312. {
  313. const struct rspamd_symcache_item *i1 = *(struct rspamd_symcache_item **)p1,
  314. *i2 = *(struct rspamd_symcache_item **)p2;
  315. double w1, w2;
  316. w1 = i1->priority;
  317. w2 = i2->priority;
  318. if (w1 > w2) {
  319. return 1;
  320. }
  321. else if (w1 < w2) {
  322. return -1;
  323. }
  324. return 0;
  325. }
  326. static gint
  327. prefilters_cmp (const void *p1, const void *p2, gpointer ud)
  328. {
  329. const struct rspamd_symcache_item *i1 = *(struct rspamd_symcache_item **)p1,
  330. *i2 = *(struct rspamd_symcache_item **)p2;
  331. double w1, w2;
  332. w1 = i1->priority;
  333. w2 = i2->priority;
  334. if (w1 < w2) {
  335. return 1;
  336. }
  337. else if (w1 > w2) {
  338. return -1;
  339. }
  340. return 0;
  341. }
  342. #define TSORT_MARK_PERM(it) (it)->order |= (1u << 31)
  343. #define TSORT_MARK_TEMP(it) (it)->order |= (1u << 30)
  344. #define TSORT_IS_MARKED_PERM(it) ((it)->order & (1u << 31))
  345. #define TSORT_IS_MARKED_TEMP(it) ((it)->order & (1u << 30))
  346. #define TSORT_UNMASK(it) ((it)->order & ~((1u << 31) | (1u << 30)))
  347. static gint
  348. cache_logic_cmp (const void *p1, const void *p2, gpointer ud)
  349. {
  350. const struct rspamd_symcache_item *i1 = *(struct rspamd_symcache_item **)p1,
  351. *i2 = *(struct rspamd_symcache_item **)p2;
  352. struct rspamd_symcache *cache = ud;
  353. double w1, w2;
  354. double weight1, weight2;
  355. double f1 = 0, f2 = 0, t1, t2, avg_freq, avg_weight;
  356. guint o1 = TSORT_UNMASK (i1), o2 = TSORT_UNMASK (i2);
  357. if (o1 == o2) {
  358. /* Heurstic */
  359. if (i1->priority == i2->priority) {
  360. avg_freq = ((gdouble) cache->total_hits / cache->used_items);
  361. avg_weight = (cache->total_weight / cache->used_items);
  362. f1 = (double) i1->st->total_hits / avg_freq;
  363. f2 = (double) i2->st->total_hits / avg_freq;
  364. weight1 = fabs (i1->st->weight) / avg_weight;
  365. weight2 = fabs (i2->st->weight) / avg_weight;
  366. t1 = i1->st->avg_time;
  367. t2 = i2->st->avg_time;
  368. w1 = SCORE_FUN (weight1, f1, t1);
  369. w2 = SCORE_FUN (weight2, f2, t2);
  370. } else {
  371. /* Strict sorting */
  372. w1 = abs (i1->priority);
  373. w2 = abs (i2->priority);
  374. }
  375. }
  376. else {
  377. w1 = o1;
  378. w2 = o2;
  379. }
  380. if (w2 > w1) {
  381. return 1;
  382. }
  383. else if (w2 < w1) {
  384. return -1;
  385. }
  386. return 0;
  387. }
  388. static void
  389. rspamd_symcache_tsort_visit (struct rspamd_symcache *cache,
  390. struct rspamd_symcache_item *it,
  391. guint cur_order)
  392. {
  393. struct cache_dependency *dep;
  394. guint i;
  395. if (TSORT_IS_MARKED_PERM (it)) {
  396. if (cur_order > TSORT_UNMASK (it)) {
  397. /* Need to recalculate the whole chain */
  398. it->order = cur_order; /* That also removes all masking */
  399. }
  400. else {
  401. /* We are fine, stop DFS */
  402. return;
  403. }
  404. }
  405. else if (TSORT_IS_MARKED_TEMP (it)) {
  406. msg_err_cache ("cyclic dependencies found when checking '%s'!",
  407. it->symbol);
  408. return;
  409. }
  410. TSORT_MARK_TEMP (it);
  411. msg_debug_cache ("visiting node: %s (%d)", it->symbol, cur_order);
  412. PTR_ARRAY_FOREACH (it->deps, i, dep) {
  413. msg_debug_cache ("visiting dep: %s (%d)", dep->item->symbol, cur_order + 1);
  414. rspamd_symcache_tsort_visit (cache, dep->item, cur_order + 1);
  415. }
  416. it->order = cur_order;
  417. TSORT_MARK_PERM (it);
  418. }
  419. static void
  420. rspamd_symcache_resort (struct rspamd_symcache *cache)
  421. {
  422. struct symcache_order *ord;
  423. guint i;
  424. guint64 total_hits = 0;
  425. struct rspamd_symcache_item *it;
  426. ord = rspamd_symcache_order_new (cache, cache->filters->len);
  427. for (i = 0; i < cache->filters->len; i ++) {
  428. it = g_ptr_array_index (cache->filters, i);
  429. total_hits += it->st->total_hits;
  430. it->order = 0;
  431. g_ptr_array_add (ord->d, it);
  432. }
  433. /* Topological sort, intended to be O(N) but my implementation
  434. * is not linear (semi-linear usually) as I want to make it as
  435. * simple as possible.
  436. * On each stage it does DFS for unseen nodes. In theory, that
  437. * can be more complicated than linear - O(N^2) for specially
  438. * crafted data. But I don't care.
  439. */
  440. PTR_ARRAY_FOREACH (ord->d, i, it) {
  441. if (it->order == 0) {
  442. rspamd_symcache_tsort_visit (cache, it, 1);
  443. }
  444. }
  445. /*
  446. * Now we have all sorted and can do some heuristical sort, keeping
  447. * topological order invariant
  448. */
  449. g_ptr_array_sort_with_data (ord->d, cache_logic_cmp, cache);
  450. cache->total_hits = total_hits;
  451. if (cache->items_by_order) {
  452. REF_RELEASE (cache->items_by_order);
  453. }
  454. cache->items_by_order = ord;
  455. }
  456. static void
  457. rspamd_symcache_propagate_dep (struct rspamd_symcache *cache,
  458. struct rspamd_symcache_item *it,
  459. struct rspamd_symcache_item *dit)
  460. {
  461. const guint *ids;
  462. guint nids = 0;
  463. msg_debug_cache ("check id propagation for dependency %s from %s",
  464. it->symbol, dit->symbol);
  465. ids = rspamd_symcache_get_allowed_settings_ids (cache, dit->symbol, &nids);
  466. /* TODO: merge? */
  467. if (nids > 0) {
  468. msg_info_cache ("propagate allowed ids from %s to %s",
  469. dit->symbol, it->symbol);
  470. rspamd_symcache_set_allowed_settings_ids (cache, it->symbol, ids,
  471. nids);
  472. }
  473. ids = rspamd_symcache_get_forbidden_settings_ids (cache, dit->symbol, &nids);
  474. if (nids > 0) {
  475. msg_info_cache ("propagate forbidden ids from %s to %s",
  476. dit->symbol, it->symbol);
  477. rspamd_symcache_set_forbidden_settings_ids (cache, it->symbol, ids,
  478. nids);
  479. }
  480. }
  481. static void
  482. rspamd_symcache_process_dep (struct rspamd_symcache *cache,
  483. struct rspamd_symcache_item *it,
  484. struct cache_dependency *dep)
  485. {
  486. struct rspamd_symcache_item *dit = NULL, *vdit = NULL;
  487. struct cache_dependency *rdep;
  488. if (dep->id >= 0) {
  489. msg_debug_cache ("process real dependency %s on %s", it->symbol, dep->sym);
  490. dit = rspamd_symcache_find_filter (cache, dep->sym, true);
  491. }
  492. if (dep->vid >= 0) {
  493. /* Case of the virtual symbol that depends on another (maybe virtual) symbol */
  494. vdit = rspamd_symcache_find_filter (cache, dep->sym, false);
  495. if (!vdit) {
  496. msg_err_cache ("cannot add dependency from %s on %s: no dependency symbol registered",
  497. dep->sym, dit->symbol);
  498. }
  499. else {
  500. msg_debug_cache ("process virtual dependency %s(%d) on %s(%d)", it->symbol,
  501. dep->vid, vdit->symbol, vdit->id);
  502. }
  503. }
  504. else {
  505. vdit = dit;
  506. }
  507. if (dit != NULL) {
  508. if (!dit->is_filter) {
  509. /*
  510. * Check sanity:
  511. * - filters -> prefilter dependency is OK and always satisfied
  512. * - postfilter -> (filter, prefilter) dep is ok
  513. * - idempotent -> (any) dep is OK
  514. *
  515. * Otherwise, emit error
  516. * However, even if everything is fine this dep is useless ¯\_(ツ)_/¯
  517. */
  518. gboolean ok_dep = FALSE;
  519. if (it->is_filter) {
  520. if (dit->is_filter) {
  521. ok_dep = TRUE;
  522. }
  523. else if (dit->type & SYMBOL_TYPE_PREFILTER) {
  524. ok_dep = TRUE;
  525. }
  526. }
  527. else if (it->type & SYMBOL_TYPE_POSTFILTER) {
  528. if (dit->type & SYMBOL_TYPE_PREFILTER) {
  529. ok_dep = TRUE;
  530. }
  531. }
  532. else if (it->type & SYMBOL_TYPE_IDEMPOTENT) {
  533. if (dit->type & (SYMBOL_TYPE_PREFILTER|SYMBOL_TYPE_POSTFILTER)) {
  534. ok_dep = TRUE;
  535. }
  536. }
  537. else if (it->type & SYMBOL_TYPE_PREFILTER) {
  538. if (it->priority < dit->priority) {
  539. /* Also OK */
  540. ok_dep = TRUE;
  541. }
  542. }
  543. if (!ok_dep) {
  544. msg_err_cache ("cannot add dependency from %s on %s: invalid symbol types",
  545. dep->sym, dit->symbol);
  546. return;
  547. }
  548. }
  549. else {
  550. if (dit->id == it->id) {
  551. msg_err_cache ("cannot add dependency on self: %s -> %s "
  552. "(resolved to %s)",
  553. it->symbol, dep->sym, dit->symbol);
  554. } else {
  555. rdep = rspamd_mempool_alloc (cache->static_pool,
  556. sizeof (*rdep));
  557. rdep->sym = dep->sym;
  558. rdep->item = it;
  559. rdep->id = it->id;
  560. g_assert (dit->rdeps != NULL);
  561. g_ptr_array_add (dit->rdeps, rdep);
  562. dep->item = dit;
  563. dep->id = dit->id;
  564. msg_debug_cache ("add dependency from %d on %d", it->id,
  565. dit->id);
  566. }
  567. }
  568. }
  569. else if (dep->id >= 0) {
  570. msg_err_cache ("cannot find dependency on symbol %s for symbol %s",
  571. dep->sym, it->symbol);
  572. return;
  573. }
  574. if (vdit) {
  575. /* Use virtual symbol to propagate deps */
  576. rspamd_symcache_propagate_dep (cache, it, vdit);
  577. }
  578. }
  579. /* Sort items in logical order */
  580. static void
  581. rspamd_symcache_post_init (struct rspamd_symcache *cache)
  582. {
  583. struct rspamd_symcache_item *it, *vit;
  584. struct cache_dependency *dep;
  585. struct delayed_cache_dependency *ddep;
  586. struct delayed_cache_condition *dcond;
  587. GList *cur;
  588. gint i, j;
  589. cur = cache->delayed_deps;
  590. while (cur) {
  591. ddep = cur->data;
  592. vit = rspamd_symcache_find_filter (cache, ddep->from, false);
  593. it = rspamd_symcache_find_filter (cache, ddep->from, true);
  594. if (it == NULL) {
  595. msg_err_cache ("cannot register delayed dependency between %s and %s: "
  596. "%s is missing", ddep->from, ddep->to, ddep->from);
  597. }
  598. else {
  599. msg_debug_cache ("delayed between %s(%d:%d) -> %s", ddep->from,
  600. it->id, vit->id, ddep->to);
  601. rspamd_symcache_add_dependency (cache, it->id, ddep->to, vit != it ?
  602. vit->id : -1);
  603. }
  604. cur = g_list_next (cur);
  605. }
  606. cur = cache->delayed_conditions;
  607. while (cur) {
  608. dcond = cur->data;
  609. it = rspamd_symcache_find_filter (cache, dcond->sym, true);
  610. if (it == NULL) {
  611. msg_err_cache (
  612. "cannot register delayed condition for %s",
  613. dcond->sym);
  614. luaL_unref (dcond->L, LUA_REGISTRYINDEX, dcond->cbref);
  615. }
  616. else {
  617. struct rspamd_symcache_condition *ncond = rspamd_mempool_alloc0 (cache->static_pool,
  618. sizeof (*ncond));
  619. ncond->cb = dcond->cbref;
  620. DL_APPEND (it->specific.normal.conditions, ncond);
  621. }
  622. cur = g_list_next (cur);
  623. }
  624. PTR_ARRAY_FOREACH (cache->items_by_id, i, it) {
  625. PTR_ARRAY_FOREACH (it->deps, j, dep) {
  626. rspamd_symcache_process_dep (cache, it, dep);
  627. }
  628. if (it->deps) {
  629. /* Reversed loop to make removal safe */
  630. for (j = it->deps->len - 1; j >= 0; j--) {
  631. dep = g_ptr_array_index (it->deps, j);
  632. if (dep->item == NULL) {
  633. /* Remove useless dep */
  634. g_ptr_array_remove_index (it->deps, j);
  635. }
  636. }
  637. }
  638. }
  639. /* Special case for virtual symbols */
  640. PTR_ARRAY_FOREACH (cache->virtual, i, it) {
  641. PTR_ARRAY_FOREACH (it->deps, j, dep) {
  642. rspamd_symcache_process_dep (cache, it, dep);
  643. }
  644. }
  645. g_ptr_array_sort_with_data (cache->connfilters, prefilters_cmp, cache);
  646. g_ptr_array_sort_with_data (cache->prefilters, prefilters_cmp, cache);
  647. g_ptr_array_sort_with_data (cache->postfilters, postfilters_cmp, cache);
  648. g_ptr_array_sort_with_data (cache->idempotent, postfilters_cmp, cache);
  649. rspamd_symcache_resort (cache);
  650. }
  651. static gboolean
  652. rspamd_symcache_load_items (struct rspamd_symcache *cache, const gchar *name)
  653. {
  654. struct rspamd_symcache_header *hdr;
  655. struct stat st;
  656. struct ucl_parser *parser;
  657. ucl_object_t *top;
  658. const ucl_object_t *cur, *elt;
  659. ucl_object_iter_t it;
  660. struct rspamd_symcache_item *item, *parent;
  661. const guchar *p;
  662. gint fd;
  663. gpointer map;
  664. fd = open (name, O_RDONLY);
  665. if (fd == -1) {
  666. msg_info_cache ("cannot open file %s, error %d, %s", name,
  667. errno, strerror (errno));
  668. return FALSE;
  669. }
  670. rspamd_file_lock (fd, FALSE);
  671. if (fstat (fd, &st) == -1) {
  672. rspamd_file_unlock (fd, FALSE);
  673. close (fd);
  674. msg_info_cache ("cannot stat file %s, error %d, %s", name,
  675. errno, strerror (errno));
  676. return FALSE;
  677. }
  678. if (st.st_size < (gint)sizeof (*hdr)) {
  679. rspamd_file_unlock (fd, FALSE);
  680. close (fd);
  681. errno = EINVAL;
  682. msg_info_cache ("cannot use file %s, error %d, %s", name,
  683. errno, strerror (errno));
  684. return FALSE;
  685. }
  686. map = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
  687. if (map == MAP_FAILED) {
  688. rspamd_file_unlock (fd, FALSE);
  689. close (fd);
  690. msg_info_cache ("cannot mmap file %s, error %d, %s", name,
  691. errno, strerror (errno));
  692. return FALSE;
  693. }
  694. hdr = map;
  695. if (memcmp (hdr->magic, rspamd_symcache_magic,
  696. sizeof (rspamd_symcache_magic)) != 0) {
  697. msg_info_cache ("cannot use file %s, bad magic", name);
  698. munmap (map, st.st_size);
  699. rspamd_file_unlock (fd, FALSE);
  700. close (fd);
  701. return FALSE;
  702. }
  703. parser = ucl_parser_new (0);
  704. p = (const guchar *)(hdr + 1);
  705. if (!ucl_parser_add_chunk (parser, p, st.st_size - sizeof (*hdr))) {
  706. msg_info_cache ("cannot use file %s, cannot parse: %s", name,
  707. ucl_parser_get_error (parser));
  708. munmap (map, st.st_size);
  709. ucl_parser_free (parser);
  710. rspamd_file_unlock (fd, FALSE);
  711. close (fd);
  712. return FALSE;
  713. }
  714. top = ucl_parser_get_object (parser);
  715. munmap (map, st.st_size);
  716. rspamd_file_unlock (fd, FALSE);
  717. close (fd);
  718. ucl_parser_free (parser);
  719. if (top == NULL || ucl_object_type (top) != UCL_OBJECT) {
  720. msg_info_cache ("cannot use file %s, bad object", name);
  721. ucl_object_unref (top);
  722. return FALSE;
  723. }
  724. it = ucl_object_iterate_new (top);
  725. while ((cur = ucl_object_iterate_safe (it, true))) {
  726. item = g_hash_table_lookup (cache->items_by_symbol, ucl_object_key (cur));
  727. if (item) {
  728. /* Copy saved info */
  729. /*
  730. * XXX: don't save or load weight, it should be obtained from the
  731. * metric
  732. */
  733. #if 0
  734. elt = ucl_object_lookup (cur, "weight");
  735. if (elt) {
  736. w = ucl_object_todouble (elt);
  737. if (w != 0) {
  738. item->weight = w;
  739. }
  740. }
  741. #endif
  742. elt = ucl_object_lookup (cur, "time");
  743. if (elt) {
  744. item->st->avg_time = ucl_object_todouble (elt);
  745. }
  746. elt = ucl_object_lookup (cur, "count");
  747. if (elt) {
  748. item->st->total_hits = ucl_object_toint (elt);
  749. item->last_count = item->st->total_hits;
  750. }
  751. elt = ucl_object_lookup (cur, "frequency");
  752. if (elt && ucl_object_type (elt) == UCL_OBJECT) {
  753. const ucl_object_t *freq_elt;
  754. freq_elt = ucl_object_lookup (elt, "avg");
  755. if (freq_elt) {
  756. item->st->avg_frequency = ucl_object_todouble (freq_elt);
  757. }
  758. freq_elt = ucl_object_lookup (elt, "stddev");
  759. if (freq_elt) {
  760. item->st->stddev_frequency = ucl_object_todouble (freq_elt);
  761. }
  762. }
  763. if (item->is_virtual && !(item->type & SYMBOL_TYPE_GHOST)) {
  764. g_assert (item->specific.virtual.parent < (gint)cache->items_by_id->len);
  765. parent = g_ptr_array_index (cache->items_by_id,
  766. item->specific.virtual.parent);
  767. item->specific.virtual.parent_item = parent;
  768. if (parent->st->weight < item->st->weight) {
  769. parent->st->weight = item->st->weight;
  770. }
  771. /*
  772. * We maintain avg_time for virtual symbols equal to the
  773. * parent item avg_time
  774. */
  775. item->st->avg_time = parent->st->avg_time;
  776. }
  777. cache->total_weight += fabs (item->st->weight);
  778. cache->total_hits += item->st->total_hits;
  779. }
  780. }
  781. ucl_object_iterate_free (it);
  782. ucl_object_unref (top);
  783. return TRUE;
  784. }
  785. #define ROUND_DOUBLE(x) (floor((x) * 100.0) / 100.0)
  786. static gboolean
  787. rspamd_symcache_save_items (struct rspamd_symcache *cache, const gchar *name)
  788. {
  789. struct rspamd_symcache_header hdr;
  790. ucl_object_t *top, *elt, *freq;
  791. GHashTableIter it;
  792. struct rspamd_symcache_item *item;
  793. struct ucl_emitter_functions *efunc;
  794. gpointer k, v;
  795. gint fd;
  796. FILE *fp;
  797. bool ret;
  798. gchar path[PATH_MAX];
  799. rspamd_snprintf (path, sizeof (path), "%s.new", name);
  800. for (;;) {
  801. fd = open (path, O_CREAT | O_WRONLY | O_EXCL, 00644);
  802. if (fd == -1) {
  803. if (errno == EEXIST) {
  804. /* Some other process is already writing data, give up silently */
  805. return TRUE;
  806. }
  807. msg_err_cache ("cannot open file %s, error %d, %s", path,
  808. errno, strerror (errno));
  809. return FALSE;
  810. }
  811. break;
  812. }
  813. rspamd_file_lock (fd, FALSE);
  814. fp = fdopen (fd, "w");
  815. memset (&hdr, 0, sizeof (hdr));
  816. memcpy (hdr.magic, rspamd_symcache_magic,
  817. sizeof (rspamd_symcache_magic));
  818. if (fwrite (&hdr, sizeof (hdr), 1, fp) == -1) {
  819. msg_err_cache ("cannot write to file %s, error %d, %s", path,
  820. errno, strerror (errno));
  821. rspamd_file_unlock (fd, FALSE);
  822. fclose (fp);
  823. return FALSE;
  824. }
  825. top = ucl_object_typed_new (UCL_OBJECT);
  826. g_hash_table_iter_init (&it, cache->items_by_symbol);
  827. while (g_hash_table_iter_next (&it, &k, &v)) {
  828. item = v;
  829. elt = ucl_object_typed_new (UCL_OBJECT);
  830. ucl_object_insert_key (elt,
  831. ucl_object_fromdouble (ROUND_DOUBLE (item->st->weight)),
  832. "weight", 0, false);
  833. ucl_object_insert_key (elt,
  834. ucl_object_fromdouble (ROUND_DOUBLE (item->st->time_counter.mean)),
  835. "time", 0, false);
  836. ucl_object_insert_key (elt, ucl_object_fromint (item->st->total_hits),
  837. "count", 0, false);
  838. freq = ucl_object_typed_new (UCL_OBJECT);
  839. ucl_object_insert_key (freq,
  840. ucl_object_fromdouble (ROUND_DOUBLE (item->st->frequency_counter.mean)),
  841. "avg", 0, false);
  842. ucl_object_insert_key (freq,
  843. ucl_object_fromdouble (ROUND_DOUBLE (item->st->frequency_counter.stddev)),
  844. "stddev", 0, false);
  845. ucl_object_insert_key (elt, freq, "frequency", 0, false);
  846. ucl_object_insert_key (top, elt, k, 0, false);
  847. }
  848. efunc = ucl_object_emit_file_funcs (fp);
  849. ret = ucl_object_emit_full (top, UCL_EMIT_JSON_COMPACT, efunc, NULL);
  850. ucl_object_emit_funcs_free (efunc);
  851. ucl_object_unref (top);
  852. rspamd_file_unlock (fd, FALSE);
  853. fclose (fp);
  854. if (rename (path, name) == -1) {
  855. msg_err_cache ("cannot rename %s -> %s, error %d, %s", path, name,
  856. errno, strerror (errno));
  857. (void)unlink (path);
  858. ret = FALSE;
  859. }
  860. return ret;
  861. }
  862. #undef ROUND_DOUBLE
  863. gint
  864. rspamd_symcache_add_symbol (struct rspamd_symcache *cache,
  865. const gchar *name,
  866. gint priority,
  867. symbol_func_t func,
  868. gpointer user_data,
  869. enum rspamd_symbol_type type,
  870. gint parent)
  871. {
  872. struct rspamd_symcache_item *item = NULL;
  873. const gchar *type_str = "normal";
  874. g_assert (cache != NULL);
  875. if (name == NULL && !(type & SYMBOL_TYPE_CALLBACK)) {
  876. msg_warn_cache ("no name for non-callback symbol!");
  877. }
  878. else if ((type & SYMBOL_TYPE_VIRTUAL & (~SYMBOL_TYPE_GHOST)) && parent == -1) {
  879. msg_warn_cache ("no parent symbol is associated with virtual symbol %s",
  880. name);
  881. }
  882. if (name != NULL && !(type & SYMBOL_TYPE_CALLBACK)) {
  883. struct rspamd_symcache_item *existing;
  884. if (strcspn (name, " \t\n\r") != strlen (name)) {
  885. msg_warn_cache ("bogus characters in symbol name: \"%s\"",
  886. name);
  887. }
  888. if ((existing = g_hash_table_lookup (cache->items_by_symbol, name)) != NULL) {
  889. if (existing->type & SYMBOL_TYPE_GHOST) {
  890. /*
  891. * Complicated part:
  892. * - we need to remove the existing ghost symbol
  893. * - we need to cleanup containers:
  894. * - symbols hash
  895. * - specific array
  896. * - items_by_it
  897. * - decrement used_items
  898. */
  899. msg_info_cache ("duplicate ghost symbol %s is removed", name);
  900. if (existing->container) {
  901. g_ptr_array_remove (existing->container, existing);
  902. }
  903. g_ptr_array_remove (cache->items_by_id, existing->container);
  904. cache->used_items --;
  905. g_hash_table_remove (cache->items_by_symbol, name);
  906. /*
  907. * Here can be memory leak, but we assume that ghost symbols
  908. * are also virtual
  909. */
  910. }
  911. else {
  912. msg_err_cache ("skip duplicate symbol registration for %s", name);
  913. return -1;
  914. }
  915. }
  916. }
  917. if (type & (SYMBOL_TYPE_CLASSIFIER|SYMBOL_TYPE_CALLBACK|
  918. SYMBOL_TYPE_PREFILTER|SYMBOL_TYPE_POSTFILTER|
  919. SYMBOL_TYPE_IDEMPOTENT|SYMBOL_TYPE_GHOST)) {
  920. type |= SYMBOL_TYPE_NOSTAT;
  921. }
  922. item = rspamd_mempool_alloc0 (cache->static_pool,
  923. sizeof (struct rspamd_symcache_item));
  924. item->st = rspamd_mempool_alloc0_shared (cache->static_pool,
  925. sizeof (*item->st));
  926. item->enabled = TRUE;
  927. /*
  928. * We do not share cd to skip locking, instead we'll just calculate it on
  929. * save or accumulate
  930. */
  931. item->cd = rspamd_mempool_alloc0 (cache->static_pool,
  932. sizeof (struct rspamd_counter_data));
  933. item->priority = priority;
  934. item->type = type;
  935. if ((type & SYMBOL_TYPE_FINE) && item->priority == 0) {
  936. /* Make priority for negative weighted symbols */
  937. item->priority = 1;
  938. }
  939. if (func) {
  940. /* Non-virtual symbol */
  941. g_assert (parent == -1);
  942. if (item->type & SYMBOL_TYPE_PREFILTER) {
  943. type_str = "prefilter";
  944. g_ptr_array_add (cache->prefilters, item);
  945. item->container = cache->prefilters;
  946. }
  947. else if (item->type & SYMBOL_TYPE_IDEMPOTENT) {
  948. type_str = "idempotent";
  949. g_ptr_array_add (cache->idempotent, item);
  950. item->container = cache->idempotent;
  951. }
  952. else if (item->type & SYMBOL_TYPE_POSTFILTER) {
  953. type_str = "postfilter";
  954. g_ptr_array_add (cache->postfilters, item);
  955. item->container = cache->postfilters;
  956. }
  957. else if (item->type & SYMBOL_TYPE_CONNFILTER) {
  958. type_str = "connfilter";
  959. g_ptr_array_add (cache->connfilters, item);
  960. item->container = cache->connfilters;
  961. }
  962. else {
  963. item->is_filter = TRUE;
  964. g_ptr_array_add (cache->filters, item);
  965. item->container = cache->filters;
  966. }
  967. item->id = cache->items_by_id->len;
  968. g_ptr_array_add (cache->items_by_id, item);
  969. item->specific.normal.func = func;
  970. item->specific.normal.user_data = user_data;
  971. item->specific.normal.conditions = NULL;
  972. }
  973. else {
  974. /*
  975. * Three possibilities here when no function is specified:
  976. * - virtual symbol (beware of ghosts!)
  977. * - classifier symbol
  978. * - composite symbol
  979. */
  980. if (item->type & SYMBOL_TYPE_COMPOSITE) {
  981. item->specific.normal.conditions = NULL;
  982. item->specific.normal.user_data = user_data;
  983. g_assert (user_data != NULL);
  984. g_ptr_array_add (cache->composites, item);
  985. item->id = cache->items_by_id->len;
  986. g_ptr_array_add (cache->items_by_id, item);
  987. item->container = cache->composites;
  988. type_str = "composite";
  989. }
  990. else if (item->type & SYMBOL_TYPE_CLASSIFIER) {
  991. /* Treat it as normal symbol to allow enable/disable */
  992. item->id = cache->items_by_id->len;
  993. g_ptr_array_add (cache->items_by_id, item);
  994. item->is_filter = TRUE;
  995. item->specific.normal.func = NULL;
  996. item->specific.normal.user_data = NULL;
  997. item->specific.normal.conditions = NULL;
  998. type_str = "classifier";
  999. }
  1000. else {
  1001. item->is_virtual = TRUE;
  1002. item->specific.virtual.parent = parent;
  1003. item->specific.virtual.parent_item =
  1004. g_ptr_array_index (cache->items_by_id, parent);
  1005. item->id = cache->virtual->len;
  1006. g_ptr_array_add (cache->virtual, item);
  1007. item->container = cache->virtual;
  1008. /* Not added to items_by_id, handled by parent */
  1009. type_str = "virtual";
  1010. }
  1011. }
  1012. cache->used_items ++;
  1013. cache->id ++;
  1014. if (!(item->type &
  1015. (SYMBOL_TYPE_IDEMPOTENT|SYMBOL_TYPE_NOSTAT|SYMBOL_TYPE_CLASSIFIER))) {
  1016. if (name != NULL) {
  1017. cache->cksum = t1ha (name, strlen (name),
  1018. cache->cksum);
  1019. } else {
  1020. cache->cksum = t1ha (&item->id, sizeof (item->id),
  1021. cache->cksum);
  1022. }
  1023. cache->stats_symbols_count ++;
  1024. }
  1025. if (name != NULL) {
  1026. item->symbol = rspamd_mempool_strdup (cache->static_pool, name);
  1027. msg_debug_cache ("used items: %d, added symbol: %s, %d; symbol type: %s",
  1028. cache->used_items, name, item->id, type_str);
  1029. } else {
  1030. g_assert (func != NULL);
  1031. msg_debug_cache ("used items: %d, added unnamed symbol: %d; symbol type: %s",
  1032. cache->used_items, item->id, type_str);
  1033. }
  1034. item->deps = g_ptr_array_new ();
  1035. item->rdeps = g_ptr_array_new ();
  1036. item->type_descr = type_str;
  1037. rspamd_mempool_add_destructor (cache->static_pool,
  1038. rspamd_ptr_array_free_hard, item->deps);
  1039. rspamd_mempool_add_destructor (cache->static_pool,
  1040. rspamd_ptr_array_free_hard, item->rdeps);
  1041. if (name != NULL) {
  1042. g_hash_table_insert (cache->items_by_symbol, item->symbol, item);
  1043. }
  1044. return item->id;
  1045. }
  1046. void
  1047. rspamd_symcache_set_peak_callback (struct rspamd_symcache *cache,
  1048. gint cbref)
  1049. {
  1050. g_assert (cache != NULL);
  1051. if (cache->peak_cb != -1) {
  1052. luaL_unref (cache->cfg->lua_state, LUA_REGISTRYINDEX,
  1053. cache->peak_cb);
  1054. }
  1055. cache->peak_cb = cbref;
  1056. msg_info_cache ("registered peak callback");
  1057. }
  1058. gboolean
  1059. rspamd_symcache_add_condition_delayed (struct rspamd_symcache *cache,
  1060. const gchar *sym, lua_State *L, gint cbref)
  1061. {
  1062. struct delayed_cache_condition *ncond;
  1063. g_assert (cache != NULL);
  1064. g_assert (sym != NULL);
  1065. ncond = g_malloc0 (sizeof (*ncond));
  1066. ncond->sym = g_strdup (sym);
  1067. ncond->cbref = cbref;
  1068. ncond->L = L;
  1069. cache->id ++;
  1070. cache->delayed_conditions = g_list_prepend (cache->delayed_conditions, ncond);
  1071. return TRUE;
  1072. }
  1073. void
  1074. rspamd_symcache_save (struct rspamd_symcache *cache)
  1075. {
  1076. if (cache != NULL) {
  1077. if (cache->cfg->cache_filename) {
  1078. /* Try to sync values to the disk */
  1079. if (!rspamd_symcache_save_items (cache,
  1080. cache->cfg->cache_filename)) {
  1081. msg_err_cache ("cannot save cache data to %s: %s",
  1082. cache->cfg->cache_filename, strerror (errno));
  1083. }
  1084. }
  1085. }
  1086. }
  1087. void
  1088. rspamd_symcache_destroy (struct rspamd_symcache *cache)
  1089. {
  1090. GList *cur;
  1091. struct delayed_cache_dependency *ddep;
  1092. struct delayed_cache_condition *dcond;
  1093. if (cache != NULL) {
  1094. if (cache->delayed_deps) {
  1095. cur = cache->delayed_deps;
  1096. while (cur) {
  1097. ddep = cur->data;
  1098. g_free (ddep->from);
  1099. g_free (ddep->to);
  1100. g_free (ddep);
  1101. cur = g_list_next (cur);
  1102. }
  1103. g_list_free (cache->delayed_deps);
  1104. }
  1105. if (cache->delayed_conditions) {
  1106. cur = cache->delayed_conditions;
  1107. while (cur) {
  1108. dcond = cur->data;
  1109. g_free (dcond->sym);
  1110. g_free (dcond);
  1111. cur = g_list_next (cur);
  1112. }
  1113. g_list_free (cache->delayed_conditions);
  1114. }
  1115. g_hash_table_destroy (cache->items_by_symbol);
  1116. g_ptr_array_free (cache->items_by_id, TRUE);
  1117. rspamd_mempool_delete (cache->static_pool);
  1118. g_ptr_array_free (cache->connfilters, TRUE);
  1119. g_ptr_array_free (cache->prefilters, TRUE);
  1120. g_ptr_array_free (cache->filters, TRUE);
  1121. g_ptr_array_free (cache->postfilters, TRUE);
  1122. g_ptr_array_free (cache->idempotent, TRUE);
  1123. g_ptr_array_free (cache->composites, TRUE);
  1124. g_ptr_array_free (cache->virtual, TRUE);
  1125. REF_RELEASE (cache->items_by_order);
  1126. if (cache->peak_cb != -1) {
  1127. luaL_unref (cache->cfg->lua_state, LUA_REGISTRYINDEX, cache->peak_cb);
  1128. }
  1129. g_free (cache);
  1130. }
  1131. }
  1132. struct rspamd_symcache*
  1133. rspamd_symcache_new (struct rspamd_config *cfg)
  1134. {
  1135. struct rspamd_symcache *cache;
  1136. cache = g_malloc0 (sizeof (struct rspamd_symcache));
  1137. cache->static_pool =
  1138. rspamd_mempool_new (rspamd_mempool_suggest_size (), "symcache", 0);
  1139. cache->items_by_symbol = g_hash_table_new (rspamd_str_hash,
  1140. rspamd_str_equal);
  1141. cache->items_by_id = g_ptr_array_new ();
  1142. cache->connfilters = g_ptr_array_new ();
  1143. cache->prefilters = g_ptr_array_new ();
  1144. cache->filters = g_ptr_array_new ();
  1145. cache->postfilters = g_ptr_array_new ();
  1146. cache->idempotent = g_ptr_array_new ();
  1147. cache->composites = g_ptr_array_new ();
  1148. cache->virtual = g_ptr_array_new ();
  1149. cache->reload_time = cfg->cache_reload_time;
  1150. cache->total_hits = 1;
  1151. cache->total_weight = 1.0;
  1152. cache->cfg = cfg;
  1153. cache->cksum = 0xdeadbabe;
  1154. cache->peak_cb = -1;
  1155. cache->id = (guint)rspamd_random_uint64_fast ();
  1156. return cache;
  1157. }
  1158. gboolean
  1159. rspamd_symcache_init (struct rspamd_symcache *cache)
  1160. {
  1161. gboolean res;
  1162. g_assert (cache != NULL);
  1163. cache->reload_time = cache->cfg->cache_reload_time;
  1164. /* Just in-memory cache */
  1165. if (cache->cfg->cache_filename == NULL) {
  1166. rspamd_symcache_post_init (cache);
  1167. return TRUE;
  1168. }
  1169. /* Copy saved cache entries */
  1170. res = rspamd_symcache_load_items (cache, cache->cfg->cache_filename);
  1171. rspamd_symcache_post_init (cache);
  1172. return res;
  1173. }
  1174. static void
  1175. rspamd_symcache_validate_cb (gpointer k, gpointer v, gpointer ud)
  1176. {
  1177. struct rspamd_symcache_item *item = v, *parent;
  1178. struct rspamd_config *cfg;
  1179. struct rspamd_symcache *cache = (struct rspamd_symcache *)ud;
  1180. struct rspamd_symbol *s;
  1181. gboolean skipped, ghost;
  1182. gint p1, p2;
  1183. ghost = item->st->weight == 0 ? TRUE : FALSE;
  1184. cfg = cache->cfg;
  1185. /* Check whether this item is skipped */
  1186. skipped = !ghost;
  1187. g_assert (cfg != NULL);
  1188. if ((item->type &
  1189. (SYMBOL_TYPE_NORMAL|SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_COMPOSITE|SYMBOL_TYPE_CLASSIFIER))
  1190. && g_hash_table_lookup (cfg->symbols, item->symbol) == NULL) {
  1191. if (cfg->unknown_weight != 0) {
  1192. skipped = FALSE;
  1193. item->st->weight = cfg->unknown_weight;
  1194. s = rspamd_mempool_alloc0 (cache->static_pool,
  1195. sizeof (*s));
  1196. s->name = item->symbol;
  1197. s->weight_ptr = &item->st->weight;
  1198. g_hash_table_insert (cfg->symbols, item->symbol, s);
  1199. msg_info_cache ("adding unknown symbol %s", item->symbol);
  1200. ghost = FALSE;
  1201. }
  1202. else {
  1203. skipped = TRUE;
  1204. }
  1205. }
  1206. else {
  1207. skipped = FALSE;
  1208. }
  1209. if (!ghost && skipped) {
  1210. if (!(item->type & SYMBOL_TYPE_SKIPPED)) {
  1211. item->type |= SYMBOL_TYPE_SKIPPED;
  1212. msg_warn_cache ("symbol %s has no score registered, skip its check",
  1213. item->symbol);
  1214. }
  1215. }
  1216. if (ghost) {
  1217. msg_debug_cache ("symbol %s is registered as ghost symbol, it won't be inserted "
  1218. "to any metric", item->symbol);
  1219. }
  1220. if (item->st->weight < 0 && item->priority == 0) {
  1221. item->priority ++;
  1222. }
  1223. if (item->is_virtual) {
  1224. if (!(item->type & SYMBOL_TYPE_GHOST)) {
  1225. g_assert (item->specific.virtual.parent != -1);
  1226. g_assert (item->specific.virtual.parent < (gint) cache->items_by_id->len);
  1227. parent = g_ptr_array_index (cache->items_by_id,
  1228. item->specific.virtual.parent);
  1229. item->specific.virtual.parent_item = parent;
  1230. if (fabs (parent->st->weight) < fabs (item->st->weight)) {
  1231. parent->st->weight = item->st->weight;
  1232. }
  1233. p1 = abs (item->priority);
  1234. p2 = abs (parent->priority);
  1235. if (p1 != p2) {
  1236. parent->priority = MAX (p1, p2);
  1237. item->priority = parent->priority;
  1238. }
  1239. }
  1240. }
  1241. cache->total_weight += fabs (item->st->weight);
  1242. }
  1243. static void
  1244. rspamd_symcache_metric_validate_cb (gpointer k, gpointer v, gpointer ud)
  1245. {
  1246. struct rspamd_symcache *cache = (struct rspamd_symcache *)ud;
  1247. const gchar *sym = k;
  1248. struct rspamd_symbol *s = (struct rspamd_symbol *)v;
  1249. gdouble weight;
  1250. struct rspamd_symcache_item *item;
  1251. weight = *s->weight_ptr;
  1252. item = g_hash_table_lookup (cache->items_by_symbol, sym);
  1253. if (item) {
  1254. item->st->weight = weight;
  1255. s->cache_item = item;
  1256. }
  1257. }
  1258. gboolean
  1259. rspamd_symcache_validate (struct rspamd_symcache *cache,
  1260. struct rspamd_config *cfg,
  1261. gboolean strict)
  1262. {
  1263. struct rspamd_symcache_item *item;
  1264. GHashTableIter it;
  1265. gpointer k, v;
  1266. struct rspamd_symbol *sym_def;
  1267. gboolean ignore_symbol = FALSE, ret = TRUE;
  1268. if (cache == NULL) {
  1269. msg_err ("empty cache is invalid");
  1270. return FALSE;
  1271. }
  1272. /* Now adjust symbol weights according to default metric */
  1273. g_hash_table_foreach (cfg->symbols,
  1274. rspamd_symcache_metric_validate_cb,
  1275. cache);
  1276. g_hash_table_foreach (cache->items_by_symbol,
  1277. rspamd_symcache_validate_cb,
  1278. cache);
  1279. /* Now check each metric item and find corresponding symbol in a cache */
  1280. g_hash_table_iter_init (&it, cfg->symbols);
  1281. while (g_hash_table_iter_next (&it, &k, &v)) {
  1282. ignore_symbol = FALSE;
  1283. sym_def = v;
  1284. if (sym_def && (sym_def->flags &
  1285. (RSPAMD_SYMBOL_FLAG_IGNORE_METRIC|RSPAMD_SYMBOL_FLAG_DISABLED))) {
  1286. ignore_symbol = TRUE;
  1287. }
  1288. if (!ignore_symbol) {
  1289. item = g_hash_table_lookup (cache->items_by_symbol, k);
  1290. if (item == NULL) {
  1291. msg_warn_cache (
  1292. "symbol '%s' has its score defined but there is no "
  1293. "corresponding rule registered",
  1294. k);
  1295. if (strict) {
  1296. ret = FALSE;
  1297. }
  1298. }
  1299. }
  1300. else if (sym_def->flags & RSPAMD_SYMBOL_FLAG_DISABLED) {
  1301. item = g_hash_table_lookup (cache->items_by_symbol, k);
  1302. if (item) {
  1303. item->enabled = FALSE;
  1304. }
  1305. }
  1306. }
  1307. return ret;
  1308. }
  1309. /* Return true if metric has score that is more than spam score for it */
  1310. static gboolean
  1311. rspamd_symcache_metric_limit (struct rspamd_task *task,
  1312. struct cache_savepoint *cp)
  1313. {
  1314. struct rspamd_scan_result *res;
  1315. double ms;
  1316. if (task->flags & RSPAMD_TASK_FLAG_PASS_ALL) {
  1317. return FALSE;
  1318. }
  1319. if (cp->lim == 0.0) {
  1320. res = task->result;
  1321. if (res) {
  1322. ms = rspamd_task_get_required_score (task, res);
  1323. if (!isnan (ms) && cp->lim < ms) {
  1324. cp->rs = res;
  1325. cp->lim = ms;
  1326. }
  1327. }
  1328. }
  1329. if (cp->rs) {
  1330. if (cp->rs->score > cp->lim) {
  1331. return TRUE;
  1332. }
  1333. }
  1334. else {
  1335. /* No reject score define, always check all rules */
  1336. cp->lim = -1;
  1337. }
  1338. return FALSE;
  1339. }
  1340. static inline gboolean
  1341. rspamd_symcache_check_id_list (const struct rspamd_symcache_id_list *ls, guint32 id)
  1342. {
  1343. guint i;
  1344. if (ls->dyn.e == -1) {
  1345. guint *res = bsearch (&id, ls->dyn.n, ls->dyn.len, sizeof (guint32),
  1346. rspamd_id_cmp);
  1347. if (res) {
  1348. return TRUE;
  1349. }
  1350. }
  1351. else {
  1352. for (i = 0; i < G_N_ELEMENTS (ls->st); i ++) {
  1353. if (ls->st[i] == id) {
  1354. return TRUE;
  1355. }
  1356. else if (ls->st[i] == 0) {
  1357. return FALSE;
  1358. }
  1359. }
  1360. }
  1361. return FALSE;
  1362. }
  1363. gboolean
  1364. rspamd_symcache_is_item_allowed (struct rspamd_task *task,
  1365. struct rspamd_symcache_item *item,
  1366. gboolean exec_only)
  1367. {
  1368. const gchar *what = "execution";
  1369. if (!exec_only) {
  1370. what = "symbol insertion";
  1371. }
  1372. /* Static checks */
  1373. if (!item->enabled ||
  1374. (RSPAMD_TASK_IS_EMPTY (task) && !(item->type & SYMBOL_TYPE_EMPTY)) ||
  1375. (item->type & SYMBOL_TYPE_MIME_ONLY && !RSPAMD_TASK_IS_MIME(task))) {
  1376. if (!item->enabled) {
  1377. msg_debug_cache_task ("skipping %s of %s as it is permanently disabled; symbol type=%s",
  1378. what, item->symbol, item->type_descr);
  1379. return FALSE;
  1380. }
  1381. else {
  1382. /*
  1383. * Exclude virtual symbols
  1384. */
  1385. if (exec_only) {
  1386. msg_debug_cache_task ("skipping check of %s as it cannot be "
  1387. "executed for this task type; symbol type=%s",
  1388. item->symbol, item->type_descr);
  1389. return FALSE;
  1390. }
  1391. }
  1392. }
  1393. /* Settings checks */
  1394. if (task->settings_elt != 0) {
  1395. guint32 id = task->settings_elt->id;
  1396. if (item->forbidden_ids.st[0] != 0 &&
  1397. rspamd_symcache_check_id_list (&item->forbidden_ids,
  1398. id)) {
  1399. msg_debug_cache_task ("deny %s of %s as it is forbidden for "
  1400. "settings id %ud; symbol type=%s",
  1401. what,
  1402. item->symbol,
  1403. id,
  1404. item->type_descr);
  1405. return FALSE;
  1406. }
  1407. if (!(item->type & SYMBOL_TYPE_EXPLICIT_DISABLE)) {
  1408. if (item->allowed_ids.st[0] == 0 ||
  1409. !rspamd_symcache_check_id_list (&item->allowed_ids,
  1410. id)) {
  1411. if (task->settings_elt->policy == RSPAMD_SETTINGS_POLICY_IMPLICIT_ALLOW) {
  1412. msg_debug_cache_task ("allow execution of %s settings id %ud "
  1413. "allows implicit execution of the symbols;"
  1414. "symbol type=%s",
  1415. item->symbol,
  1416. id,
  1417. item->type_descr);
  1418. return TRUE;
  1419. }
  1420. if (exec_only) {
  1421. /*
  1422. * Special case if any of our virtual children are enabled
  1423. */
  1424. if (rspamd_symcache_check_id_list (&item->exec_only_ids, id)) {
  1425. return TRUE;
  1426. }
  1427. }
  1428. msg_debug_cache_task ("deny %s of %s as it is not listed "
  1429. "as allowed for settings id %ud; symbol type=%s",
  1430. what,
  1431. item->symbol,
  1432. id,
  1433. item->type_descr);
  1434. return FALSE;
  1435. }
  1436. }
  1437. else {
  1438. msg_debug_cache_task ("allow %s of %s for "
  1439. "settings id %ud as it can be only disabled explicitly;"
  1440. " symbol type=%s",
  1441. what,
  1442. item->symbol,
  1443. id,
  1444. item->type_descr);
  1445. }
  1446. }
  1447. else if (item->type & SYMBOL_TYPE_EXPLICIT_ENABLE) {
  1448. msg_debug_cache_task ("deny %s of %s as it must be explicitly enabled; symbol type=%s",
  1449. what,
  1450. item->symbol,
  1451. item->type_descr);
  1452. return FALSE;
  1453. }
  1454. /* Allow all symbols with no settings id */
  1455. return TRUE;
  1456. }
  1457. static gboolean
  1458. rspamd_symcache_check_symbol (struct rspamd_task *task,
  1459. struct rspamd_symcache *cache,
  1460. struct rspamd_symcache_item *item,
  1461. struct cache_savepoint *checkpoint)
  1462. {
  1463. struct rspamd_task **ptask;
  1464. lua_State *L;
  1465. gboolean check = TRUE;
  1466. struct rspamd_symcache_dynamic_item *dyn_item =
  1467. rspamd_symcache_get_dynamic (checkpoint, item);
  1468. if (item->type & (SYMBOL_TYPE_CLASSIFIER|SYMBOL_TYPE_COMPOSITE)) {
  1469. /* Classifiers are special :( */
  1470. return TRUE;
  1471. }
  1472. if (rspamd_session_blocked (task->s)) {
  1473. /*
  1474. * We cannot add new events as session is either destroyed or
  1475. * being cleaned up.
  1476. */
  1477. return TRUE;
  1478. }
  1479. g_assert (!item->is_virtual);
  1480. g_assert (item->specific.normal.func != NULL);
  1481. if (CHECK_START_BIT (checkpoint, dyn_item)) {
  1482. /*
  1483. * This can actually happen when deps span over different layers
  1484. */
  1485. return CHECK_FINISH_BIT (checkpoint, dyn_item);
  1486. }
  1487. /* Check has been started */
  1488. SET_START_BIT (checkpoint, dyn_item);
  1489. if (!rspamd_symcache_is_item_allowed (task, item, TRUE)) {
  1490. check = FALSE;
  1491. }
  1492. else if (item->specific.normal.conditions) {
  1493. struct rspamd_symcache_condition *cur_cond;
  1494. DL_FOREACH (item->specific.normal.conditions, cur_cond) {
  1495. /* We also executes condition callback to check if we need this symbol */
  1496. L = task->cfg->lua_state;
  1497. lua_rawgeti (L, LUA_REGISTRYINDEX, cur_cond->cb);
  1498. ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
  1499. rspamd_lua_setclass (L, "rspamd{task}", -1);
  1500. *ptask = task;
  1501. if (lua_pcall (L, 1, 1, 0) != 0) {
  1502. msg_info_task ("call to condition for %s failed: %s",
  1503. item->symbol, lua_tostring (L, -1));
  1504. lua_pop (L, 1);
  1505. }
  1506. else {
  1507. check = lua_toboolean (L, -1);
  1508. lua_pop (L, 1);
  1509. }
  1510. if (!check) {
  1511. break;
  1512. }
  1513. }
  1514. if (!check) {
  1515. msg_debug_cache_task ("skipping check of %s as its start condition is false; "
  1516. "symbol type = %s",
  1517. item->symbol, item->type_descr);
  1518. }
  1519. }
  1520. if (check) {
  1521. msg_debug_cache_task ("execute %s, %d; symbol type = %s", item->symbol,
  1522. item->id, item->type_descr);
  1523. if (checkpoint->profile) {
  1524. ev_now_update_if_cheap (task->event_loop);
  1525. dyn_item->start_msec = (ev_now (task->event_loop) -
  1526. checkpoint->profile_start) * 1e3;
  1527. }
  1528. dyn_item->async_events = 0;
  1529. checkpoint->cur_item = item;
  1530. checkpoint->items_inflight ++;
  1531. /* Callback now must finalize itself */
  1532. item->specific.normal.func (task, item, item->specific.normal.user_data);
  1533. checkpoint->cur_item = NULL;
  1534. if (checkpoint->items_inflight == 0) {
  1535. return TRUE;
  1536. }
  1537. if (dyn_item->async_events == 0 && !CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  1538. msg_err_cache ("critical error: item %s has no async events pending, "
  1539. "but it is not finalised", item->symbol);
  1540. g_assert_not_reached ();
  1541. }
  1542. return FALSE;
  1543. }
  1544. else {
  1545. SET_FINISH_BIT (checkpoint, dyn_item);
  1546. }
  1547. return TRUE;
  1548. }
  1549. static gboolean
  1550. rspamd_symcache_check_deps (struct rspamd_task *task,
  1551. struct rspamd_symcache *cache,
  1552. struct rspamd_symcache_item *item,
  1553. struct cache_savepoint *checkpoint,
  1554. guint recursion,
  1555. gboolean check_only)
  1556. {
  1557. struct cache_dependency *dep;
  1558. guint i;
  1559. gboolean ret = TRUE;
  1560. static const guint max_recursion = 20;
  1561. struct rspamd_symcache_dynamic_item *dyn_item;
  1562. if (recursion > max_recursion) {
  1563. msg_err_task ("cyclic dependencies: maximum check level %ud exceed when "
  1564. "checking dependencies for %s", max_recursion, item->symbol);
  1565. return TRUE;
  1566. }
  1567. if (item->deps != NULL && item->deps->len > 0) {
  1568. for (i = 0; i < item->deps->len; i ++) {
  1569. dep = g_ptr_array_index (item->deps, i);
  1570. if (dep->item == NULL) {
  1571. /* Assume invalid deps as done */
  1572. msg_debug_cache_task ("symbol %d(%s) has invalid dependencies on %d(%s)",
  1573. item->id, item->symbol, dep->id, dep->sym);
  1574. continue;
  1575. }
  1576. dyn_item = rspamd_symcache_get_dynamic (checkpoint, dep->item);
  1577. if (!CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  1578. if (!CHECK_START_BIT (checkpoint, dyn_item)) {
  1579. /* Not started */
  1580. if (!check_only) {
  1581. if (!rspamd_symcache_check_deps (task, cache,
  1582. dep->item,
  1583. checkpoint,
  1584. recursion + 1,
  1585. check_only)) {
  1586. ret = FALSE;
  1587. msg_debug_cache_task ("delayed dependency %d(%s) for "
  1588. "symbol %d(%s)",
  1589. dep->id, dep->sym, item->id, item->symbol);
  1590. }
  1591. else if (!rspamd_symcache_check_symbol (task, cache,
  1592. dep->item,
  1593. checkpoint)) {
  1594. /* Now started, but has events pending */
  1595. ret = FALSE;
  1596. msg_debug_cache_task ("started check of %d(%s) symbol "
  1597. "as dep for "
  1598. "%d(%s)",
  1599. dep->id, dep->sym, item->id, item->symbol);
  1600. }
  1601. else {
  1602. msg_debug_cache_task ("dependency %d(%s) for symbol %d(%s) is "
  1603. "already processed",
  1604. dep->id, dep->sym, item->id, item->symbol);
  1605. }
  1606. }
  1607. else {
  1608. msg_debug_cache_task ("dependency %d(%s) for symbol %d(%s) "
  1609. "cannot be started now",
  1610. dep->id, dep->sym,
  1611. item->id, item->symbol);
  1612. ret = FALSE;
  1613. }
  1614. }
  1615. else {
  1616. /* Started but not finished */
  1617. msg_debug_cache_task ("dependency %d(%s) for symbol %d(%s) is "
  1618. "still executing",
  1619. dep->id, dep->sym,
  1620. item->id, item->symbol);
  1621. ret = FALSE;
  1622. }
  1623. }
  1624. else {
  1625. msg_debug_cache_task ("dependency %d(%s) for symbol %d(%s) is already "
  1626. "checked",
  1627. dep->id, dep->sym,
  1628. item->id, item->symbol);
  1629. }
  1630. }
  1631. }
  1632. return ret;
  1633. }
  1634. static struct cache_savepoint *
  1635. rspamd_symcache_make_checkpoint (struct rspamd_task *task,
  1636. struct rspamd_symcache *cache)
  1637. {
  1638. struct cache_savepoint *checkpoint;
  1639. if (cache->items_by_order->id != cache->id) {
  1640. /*
  1641. * Cache has been modified, need to resort it
  1642. */
  1643. msg_info_cache ("symbols cache has been modified since last check:"
  1644. " old id: %ud, new id: %ud",
  1645. cache->items_by_order->id, cache->id);
  1646. rspamd_symcache_resort (cache);
  1647. }
  1648. checkpoint = rspamd_mempool_alloc0 (task->task_pool,
  1649. sizeof (*checkpoint) +
  1650. sizeof (struct rspamd_symcache_dynamic_item) * cache->items_by_id->len);
  1651. g_assert (cache->items_by_order != NULL);
  1652. checkpoint->version = cache->items_by_order->d->len;
  1653. checkpoint->order = cache->items_by_order;
  1654. REF_RETAIN (checkpoint->order);
  1655. rspamd_mempool_add_destructor (task->task_pool,
  1656. rspamd_symcache_order_unref, checkpoint->order);
  1657. /* Calculate profile probability */
  1658. ev_now_update_if_cheap (task->event_loop);
  1659. ev_tstamp now = ev_now (task->event_loop);
  1660. checkpoint->profile_start = now;
  1661. if ((cache->last_profile == 0.0 || now > cache->last_profile + PROFILE_MAX_TIME) ||
  1662. (task->msg.len >= PROFILE_MESSAGE_SIZE_THRESHOLD) ||
  1663. (rspamd_random_double_fast () >= (1 - PROFILE_PROBABILITY))) {
  1664. msg_debug_cache_task ("enable profiling of symbols for task");
  1665. checkpoint->profile = TRUE;
  1666. cache->last_profile = now;
  1667. }
  1668. task->checkpoint = checkpoint;
  1669. return checkpoint;
  1670. }
  1671. gboolean
  1672. rspamd_symcache_process_settings (struct rspamd_task *task,
  1673. struct rspamd_symcache *cache)
  1674. {
  1675. const ucl_object_t *wl, *cur, *disabled, *enabled;
  1676. struct rspamd_symbols_group *gr;
  1677. GHashTableIter gr_it;
  1678. ucl_object_iter_t it = NULL;
  1679. gboolean already_disabled = FALSE;
  1680. gpointer k, v;
  1681. wl = ucl_object_lookup (task->settings, "whitelist");
  1682. if (wl != NULL) {
  1683. msg_info_task ("task is whitelisted");
  1684. task->flags |= RSPAMD_TASK_FLAG_SKIP;
  1685. return TRUE;
  1686. }
  1687. enabled = ucl_object_lookup (task->settings, "symbols_enabled");
  1688. if (enabled) {
  1689. /* Disable all symbols but selected */
  1690. rspamd_symcache_disable_all_symbols (task, cache,
  1691. SYMBOL_TYPE_EXPLICIT_DISABLE);
  1692. already_disabled = TRUE;
  1693. it = NULL;
  1694. while ((cur = ucl_iterate_object (enabled, &it, true)) != NULL) {
  1695. rspamd_symcache_enable_symbol_checkpoint (task, cache,
  1696. ucl_object_tostring (cur));
  1697. }
  1698. }
  1699. /* Enable groups of symbols */
  1700. enabled = ucl_object_lookup (task->settings, "groups_enabled");
  1701. if (enabled) {
  1702. it = NULL;
  1703. if (!already_disabled) {
  1704. rspamd_symcache_disable_all_symbols (task, cache,
  1705. SYMBOL_TYPE_EXPLICIT_DISABLE);
  1706. }
  1707. while ((cur = ucl_iterate_object (enabled, &it, true)) != NULL) {
  1708. if (ucl_object_type (cur) == UCL_STRING) {
  1709. gr = g_hash_table_lookup (task->cfg->groups,
  1710. ucl_object_tostring (cur));
  1711. if (gr) {
  1712. g_hash_table_iter_init (&gr_it, gr->symbols);
  1713. while (g_hash_table_iter_next (&gr_it, &k, &v)) {
  1714. rspamd_symcache_enable_symbol_checkpoint (task, cache, k);
  1715. }
  1716. }
  1717. }
  1718. }
  1719. }
  1720. disabled = ucl_object_lookup (task->settings, "symbols_disabled");
  1721. if (disabled) {
  1722. it = NULL;
  1723. while ((cur = ucl_iterate_object (disabled, &it, true)) != NULL) {
  1724. rspamd_symcache_disable_symbol_checkpoint (task, cache,
  1725. ucl_object_tostring (cur));
  1726. }
  1727. }
  1728. /* Disable groups of symbols */
  1729. disabled = ucl_object_lookup (task->settings, "groups_disabled");
  1730. if (disabled) {
  1731. it = NULL;
  1732. while ((cur = ucl_iterate_object (disabled, &it, true)) != NULL) {
  1733. if (ucl_object_type (cur) == UCL_STRING) {
  1734. gr = g_hash_table_lookup (task->cfg->groups,
  1735. ucl_object_tostring (cur));
  1736. if (gr) {
  1737. g_hash_table_iter_init (&gr_it, gr->symbols);
  1738. while (g_hash_table_iter_next (&gr_it, &k, &v)) {
  1739. rspamd_symcache_disable_symbol_checkpoint (task, cache, k);
  1740. }
  1741. }
  1742. }
  1743. }
  1744. }
  1745. return FALSE;
  1746. }
  1747. gboolean
  1748. rspamd_symcache_process_symbols (struct rspamd_task *task,
  1749. struct rspamd_symcache *cache,
  1750. gint stage)
  1751. {
  1752. struct rspamd_symcache_item *item = NULL;
  1753. struct rspamd_symcache_dynamic_item *dyn_item;
  1754. struct cache_savepoint *checkpoint;
  1755. gint i;
  1756. gboolean all_done = TRUE;
  1757. gint saved_priority;
  1758. guint start_events_pending;
  1759. g_assert (cache != NULL);
  1760. if (task->checkpoint == NULL) {
  1761. checkpoint = rspamd_symcache_make_checkpoint (task, cache);
  1762. task->checkpoint = checkpoint;
  1763. }
  1764. else {
  1765. checkpoint = task->checkpoint;
  1766. }
  1767. msg_debug_cache_task ("symbols processing stage at pass: %d", stage);
  1768. start_events_pending = rspamd_session_events_pending (task->s);
  1769. switch (stage) {
  1770. case RSPAMD_TASK_STAGE_CONNFILTERS:
  1771. /* Check for connection filters */
  1772. saved_priority = G_MININT;
  1773. all_done = TRUE;
  1774. for (i = 0; i < (gint) cache->connfilters->len; i++) {
  1775. item = g_ptr_array_index (cache->connfilters, i);
  1776. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  1777. if (RSPAMD_TASK_IS_SKIPPED (task)) {
  1778. return TRUE;
  1779. }
  1780. if (!CHECK_START_BIT (checkpoint, dyn_item) &&
  1781. !CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  1782. if (checkpoint->has_slow) {
  1783. /* Delay */
  1784. checkpoint->has_slow = FALSE;
  1785. return FALSE;
  1786. }
  1787. /* Check priorities */
  1788. if (saved_priority == G_MININT) {
  1789. saved_priority = item->priority;
  1790. }
  1791. else {
  1792. if (item->priority < saved_priority &&
  1793. rspamd_session_events_pending (task->s) > start_events_pending) {
  1794. /*
  1795. * Delay further checks as we have higher
  1796. * priority filters to be processed
  1797. */
  1798. return FALSE;
  1799. }
  1800. }
  1801. rspamd_symcache_check_symbol (task, cache, item,
  1802. checkpoint);
  1803. all_done = FALSE;
  1804. }
  1805. }
  1806. break;
  1807. case RSPAMD_TASK_STAGE_PRE_FILTERS:
  1808. /* Check for prefilters */
  1809. saved_priority = G_MININT;
  1810. all_done = TRUE;
  1811. for (i = 0; i < (gint) cache->prefilters->len; i++) {
  1812. item = g_ptr_array_index (cache->prefilters, i);
  1813. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  1814. if (RSPAMD_TASK_IS_SKIPPED (task)) {
  1815. return TRUE;
  1816. }
  1817. if (!CHECK_START_BIT (checkpoint, dyn_item) &&
  1818. !CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  1819. /* Check priorities */
  1820. if (checkpoint->has_slow) {
  1821. /* Delay */
  1822. checkpoint->has_slow = FALSE;
  1823. return FALSE;
  1824. }
  1825. if (saved_priority == G_MININT) {
  1826. saved_priority = item->priority;
  1827. }
  1828. else {
  1829. if (item->priority < saved_priority &&
  1830. rspamd_session_events_pending (task->s) > start_events_pending) {
  1831. /*
  1832. * Delay further checks as we have higher
  1833. * priority filters to be processed
  1834. */
  1835. return FALSE;
  1836. }
  1837. }
  1838. rspamd_symcache_check_symbol (task, cache, item,
  1839. checkpoint);
  1840. all_done = FALSE;
  1841. }
  1842. }
  1843. break;
  1844. case RSPAMD_TASK_STAGE_FILTERS:
  1845. all_done = TRUE;
  1846. for (i = 0; i < (gint) checkpoint->version; i++) {
  1847. if (RSPAMD_TASK_IS_SKIPPED (task)) {
  1848. return TRUE;
  1849. }
  1850. item = g_ptr_array_index (checkpoint->order->d, i);
  1851. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  1852. if (item->type & SYMBOL_TYPE_CLASSIFIER) {
  1853. continue;
  1854. }
  1855. if (!CHECK_START_BIT (checkpoint, dyn_item)) {
  1856. all_done = FALSE;
  1857. if (!rspamd_symcache_check_deps (task, cache, item,
  1858. checkpoint, 0, FALSE)) {
  1859. msg_debug_cache_task ("blocked execution of %d(%s) unless deps are "
  1860. "resolved",
  1861. item->id, item->symbol);
  1862. continue;
  1863. }
  1864. rspamd_symcache_check_symbol (task, cache, item,
  1865. checkpoint);
  1866. if (checkpoint->has_slow) {
  1867. /* Delay */
  1868. checkpoint->has_slow = FALSE;
  1869. return FALSE;
  1870. }
  1871. }
  1872. if (!(item->type & SYMBOL_TYPE_FINE)) {
  1873. if (rspamd_symcache_metric_limit (task, checkpoint)) {
  1874. msg_info_task ("task has already scored more than %.2f, so do "
  1875. "not "
  1876. "plan more checks",
  1877. checkpoint->rs->score);
  1878. all_done = TRUE;
  1879. break;
  1880. }
  1881. }
  1882. }
  1883. break;
  1884. case RSPAMD_TASK_STAGE_POST_FILTERS:
  1885. /* Check for postfilters */
  1886. saved_priority = G_MININT;
  1887. all_done = TRUE;
  1888. for (i = 0; i < (gint) cache->postfilters->len; i++) {
  1889. if (RSPAMD_TASK_IS_SKIPPED (task)) {
  1890. return TRUE;
  1891. }
  1892. item = g_ptr_array_index (cache->postfilters, i);
  1893. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  1894. if (!CHECK_START_BIT (checkpoint, dyn_item) &&
  1895. !CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  1896. /* Check priorities */
  1897. all_done = FALSE;
  1898. if (checkpoint->has_slow) {
  1899. /* Delay */
  1900. checkpoint->has_slow = FALSE;
  1901. return FALSE;
  1902. }
  1903. if (saved_priority == G_MININT) {
  1904. saved_priority = item->priority;
  1905. }
  1906. else {
  1907. if (item->priority > saved_priority &&
  1908. rspamd_session_events_pending (task->s) > start_events_pending) {
  1909. /*
  1910. * Delay further checks as we have higher
  1911. * priority filters to be processed
  1912. */
  1913. return FALSE;
  1914. }
  1915. }
  1916. rspamd_symcache_check_symbol (task, cache, item,
  1917. checkpoint);
  1918. }
  1919. }
  1920. break;
  1921. case RSPAMD_TASK_STAGE_IDEMPOTENT:
  1922. /* Check for postfilters */
  1923. saved_priority = G_MININT;
  1924. for (i = 0; i < (gint) cache->idempotent->len; i++) {
  1925. item = g_ptr_array_index (cache->idempotent, i);
  1926. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  1927. if (!CHECK_START_BIT (checkpoint, dyn_item) &&
  1928. !CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  1929. /* Check priorities */
  1930. if (checkpoint->has_slow) {
  1931. /* Delay */
  1932. checkpoint->has_slow = FALSE;
  1933. return FALSE;
  1934. }
  1935. if (saved_priority == G_MININT) {
  1936. saved_priority = item->priority;
  1937. }
  1938. else {
  1939. if (item->priority > saved_priority &&
  1940. rspamd_session_events_pending (task->s) > start_events_pending) {
  1941. /*
  1942. * Delay further checks as we have higher
  1943. * priority filters to be processed
  1944. */
  1945. return FALSE;
  1946. }
  1947. }
  1948. rspamd_symcache_check_symbol (task, cache, item,
  1949. checkpoint);
  1950. }
  1951. }
  1952. break;
  1953. default:
  1954. g_assert_not_reached ();
  1955. }
  1956. return all_done;
  1957. }
  1958. struct counters_cbdata {
  1959. ucl_object_t *top;
  1960. struct rspamd_symcache *cache;
  1961. };
  1962. #define ROUND_DOUBLE(x) (floor((x) * 100.0) / 100.0)
  1963. static void
  1964. rspamd_symcache_counters_cb (gpointer k, gpointer v, gpointer ud)
  1965. {
  1966. struct counters_cbdata *cbd = ud;
  1967. ucl_object_t *obj, *top;
  1968. struct rspamd_symcache_item *item = v, *parent;
  1969. const gchar *symbol = k;
  1970. top = cbd->top;
  1971. obj = ucl_object_typed_new (UCL_OBJECT);
  1972. ucl_object_insert_key (obj, ucl_object_fromstring (symbol ? symbol : "unknown"),
  1973. "symbol", 0, false);
  1974. if (item->is_virtual) {
  1975. if (!(item->type & SYMBOL_TYPE_GHOST)) {
  1976. parent = g_ptr_array_index (cbd->cache->items_by_id,
  1977. item->specific.virtual.parent);
  1978. ucl_object_insert_key (obj,
  1979. ucl_object_fromdouble (ROUND_DOUBLE (item->st->weight)),
  1980. "weight", 0, false);
  1981. ucl_object_insert_key (obj,
  1982. ucl_object_fromdouble (ROUND_DOUBLE (parent->st->avg_frequency)),
  1983. "frequency", 0, false);
  1984. ucl_object_insert_key (obj,
  1985. ucl_object_fromint (parent->st->total_hits),
  1986. "hits", 0, false);
  1987. ucl_object_insert_key (obj,
  1988. ucl_object_fromdouble (ROUND_DOUBLE (parent->st->avg_time)),
  1989. "time", 0, false);
  1990. }
  1991. else {
  1992. ucl_object_insert_key (obj,
  1993. ucl_object_fromdouble (ROUND_DOUBLE (item->st->weight)),
  1994. "weight", 0, false);
  1995. ucl_object_insert_key (obj,
  1996. ucl_object_fromdouble (0.0),
  1997. "frequency", 0, false);
  1998. ucl_object_insert_key (obj,
  1999. ucl_object_fromdouble (0.0),
  2000. "hits", 0, false);
  2001. ucl_object_insert_key (obj,
  2002. ucl_object_fromdouble (0.0),
  2003. "time", 0, false);
  2004. }
  2005. }
  2006. else {
  2007. ucl_object_insert_key (obj,
  2008. ucl_object_fromdouble (ROUND_DOUBLE (item->st->weight)),
  2009. "weight", 0, false);
  2010. ucl_object_insert_key (obj,
  2011. ucl_object_fromdouble (ROUND_DOUBLE (item->st->avg_frequency)),
  2012. "frequency", 0, false);
  2013. ucl_object_insert_key (obj,
  2014. ucl_object_fromint (item->st->total_hits),
  2015. "hits", 0, false);
  2016. ucl_object_insert_key (obj,
  2017. ucl_object_fromdouble (ROUND_DOUBLE (item->st->avg_time)),
  2018. "time", 0, false);
  2019. }
  2020. ucl_array_append (top, obj);
  2021. }
  2022. #undef ROUND_DOUBLE
  2023. ucl_object_t *
  2024. rspamd_symcache_counters (struct rspamd_symcache *cache)
  2025. {
  2026. ucl_object_t *top;
  2027. struct counters_cbdata cbd;
  2028. g_assert (cache != NULL);
  2029. top = ucl_object_typed_new (UCL_ARRAY);
  2030. cbd.top = top;
  2031. cbd.cache = cache;
  2032. g_hash_table_foreach (cache->items_by_symbol,
  2033. rspamd_symcache_counters_cb, &cbd);
  2034. return top;
  2035. }
  2036. static void
  2037. rspamd_symcache_call_peak_cb (struct ev_loop *ev_base,
  2038. struct rspamd_symcache *cache,
  2039. struct rspamd_symcache_item *item,
  2040. gdouble cur_value,
  2041. gdouble cur_err)
  2042. {
  2043. lua_State *L = cache->cfg->lua_state;
  2044. struct ev_loop **pbase;
  2045. lua_rawgeti (L, LUA_REGISTRYINDEX, cache->peak_cb);
  2046. pbase = lua_newuserdata (L, sizeof (*pbase));
  2047. *pbase = ev_base;
  2048. rspamd_lua_setclass (L, "rspamd{ev_base}", -1);
  2049. lua_pushstring (L, item->symbol);
  2050. lua_pushnumber (L, item->st->avg_frequency);
  2051. lua_pushnumber (L, sqrt (item->st->stddev_frequency));
  2052. lua_pushnumber (L, cur_value);
  2053. lua_pushnumber (L, cur_err);
  2054. if (lua_pcall (L, 6, 0, 0) != 0) {
  2055. msg_info_cache ("call to peak function for %s failed: %s",
  2056. item->symbol, lua_tostring (L, -1));
  2057. lua_pop (L, 1);
  2058. }
  2059. }
  2060. static void
  2061. rspamd_symcache_resort_cb (EV_P_ ev_timer *w, int revents)
  2062. {
  2063. gdouble tm;
  2064. struct rspamd_cache_refresh_cbdata *cbdata =
  2065. (struct rspamd_cache_refresh_cbdata *)w->data;
  2066. struct rspamd_symcache *cache;
  2067. struct rspamd_symcache_item *item;
  2068. guint i;
  2069. gdouble cur_ticks;
  2070. static const double decay_rate = 0.7;
  2071. cache = cbdata->cache;
  2072. /* Plan new event */
  2073. tm = rspamd_time_jitter (cache->reload_time, 0);
  2074. cur_ticks = rspamd_get_ticks (FALSE);
  2075. msg_debug_cache ("resort symbols cache, next reload in %.2f seconds", tm);
  2076. g_assert (cache != NULL);
  2077. cbdata->resort_ev.repeat = tm;
  2078. ev_timer_again (EV_A_ w);
  2079. if (rspamd_worker_is_primary_controller (cbdata->w)) {
  2080. /* Gather stats from shared execution times */
  2081. for (i = 0; i < cache->filters->len; i ++) {
  2082. item = g_ptr_array_index (cache->filters, i);
  2083. item->st->total_hits += item->st->hits;
  2084. g_atomic_int_set (&item->st->hits, 0);
  2085. if (item->last_count > 0 && cbdata->w->index == 0) {
  2086. /* Calculate frequency */
  2087. gdouble cur_err, cur_value;
  2088. cur_value = (item->st->total_hits - item->last_count) /
  2089. (cur_ticks - cbdata->last_resort);
  2090. rspamd_set_counter_ema (&item->st->frequency_counter,
  2091. cur_value, decay_rate);
  2092. item->st->avg_frequency = item->st->frequency_counter.mean;
  2093. item->st->stddev_frequency = item->st->frequency_counter.stddev;
  2094. if (cur_value > 0) {
  2095. msg_debug_cache ("frequency for %s is %.2f, avg: %.2f",
  2096. item->symbol, cur_value, item->st->avg_frequency);
  2097. }
  2098. cur_err = (item->st->avg_frequency - cur_value);
  2099. cur_err *= cur_err;
  2100. /*
  2101. * TODO: replace magic number
  2102. */
  2103. if (item->st->frequency_counter.number > 10 &&
  2104. cur_err > sqrt (item->st->stddev_frequency) * 3) {
  2105. item->frequency_peaks ++;
  2106. msg_debug_cache ("peak found for %s is %.2f, avg: %.2f, "
  2107. "stddev: %.2f, error: %.2f, peaks: %d",
  2108. item->symbol, cur_value,
  2109. item->st->avg_frequency,
  2110. item->st->stddev_frequency,
  2111. cur_err,
  2112. item->frequency_peaks);
  2113. if (cache->peak_cb != -1) {
  2114. rspamd_symcache_call_peak_cb (cbdata->event_loop,
  2115. cache, item,
  2116. cur_value, cur_err);
  2117. }
  2118. }
  2119. }
  2120. item->last_count = item->st->total_hits;
  2121. if (item->cd->number > 0) {
  2122. if (item->type & (SYMBOL_TYPE_CALLBACK|SYMBOL_TYPE_NORMAL)) {
  2123. item->st->avg_time = item->cd->mean;
  2124. rspamd_set_counter_ema (&item->st->time_counter,
  2125. item->st->avg_time, decay_rate);
  2126. item->st->avg_time = item->st->time_counter.mean;
  2127. memset (item->cd, 0, sizeof (*item->cd));
  2128. }
  2129. }
  2130. }
  2131. cbdata->last_resort = cur_ticks;
  2132. /* We don't do actual sorting due to topological guarantees */
  2133. }
  2134. }
  2135. static void
  2136. rspamd_symcache_refresh_dtor (void *d)
  2137. {
  2138. struct rspamd_cache_refresh_cbdata *cbdata =
  2139. (struct rspamd_cache_refresh_cbdata *)d;
  2140. ev_timer_stop (cbdata->event_loop, &cbdata->resort_ev);
  2141. }
  2142. void
  2143. rspamd_symcache_start_refresh (struct rspamd_symcache *cache,
  2144. struct ev_loop *ev_base, struct rspamd_worker *w)
  2145. {
  2146. gdouble tm;
  2147. struct rspamd_cache_refresh_cbdata *cbdata;
  2148. cbdata = rspamd_mempool_alloc0 (cache->static_pool, sizeof (*cbdata));
  2149. cbdata->last_resort = rspamd_get_ticks (TRUE);
  2150. cbdata->event_loop = ev_base;
  2151. cbdata->w = w;
  2152. cbdata->cache = cache;
  2153. tm = rspamd_time_jitter (cache->reload_time, 0);
  2154. msg_debug_cache ("next reload in %.2f seconds", tm);
  2155. g_assert (cache != NULL);
  2156. cbdata->resort_ev.data = cbdata;
  2157. ev_timer_init (&cbdata->resort_ev, rspamd_symcache_resort_cb,
  2158. tm, tm);
  2159. ev_timer_start (cbdata->event_loop, &cbdata->resort_ev);
  2160. rspamd_mempool_add_destructor (cache->static_pool,
  2161. rspamd_symcache_refresh_dtor, cbdata);
  2162. }
  2163. void
  2164. rspamd_symcache_inc_frequency (struct rspamd_symcache *cache,
  2165. struct rspamd_symcache_item *item)
  2166. {
  2167. if (item != NULL) {
  2168. g_atomic_int_inc (&item->st->hits);
  2169. }
  2170. }
  2171. void
  2172. rspamd_symcache_add_dependency (struct rspamd_symcache *cache,
  2173. gint id_from, const gchar *to,
  2174. gint virtual_id_from)
  2175. {
  2176. struct rspamd_symcache_item *source, *vsource;
  2177. struct cache_dependency *dep;
  2178. g_assert (id_from >= 0 && id_from < (gint)cache->items_by_id->len);
  2179. source = (struct rspamd_symcache_item *)g_ptr_array_index (cache->items_by_id, id_from);
  2180. dep = rspamd_mempool_alloc (cache->static_pool, sizeof (*dep));
  2181. dep->id = id_from;
  2182. dep->sym = rspamd_mempool_strdup (cache->static_pool, to);
  2183. /* Will be filled later */
  2184. dep->item = NULL;
  2185. dep->vid = -1;
  2186. g_ptr_array_add (source->deps, dep);
  2187. if (virtual_id_from >= 0) {
  2188. g_assert (virtual_id_from < (gint)cache->virtual->len);
  2189. /* We need that for settings id propagation */
  2190. vsource = (struct rspamd_symcache_item *)
  2191. g_ptr_array_index (cache->virtual, virtual_id_from);
  2192. dep = rspamd_mempool_alloc (cache->static_pool, sizeof (*dep));
  2193. dep->vid = virtual_id_from;
  2194. dep->id = -1;
  2195. dep->sym = rspamd_mempool_strdup (cache->static_pool, to);
  2196. /* Will be filled later */
  2197. dep->item = NULL;
  2198. g_ptr_array_add (vsource->deps, dep);
  2199. }
  2200. }
  2201. void
  2202. rspamd_symcache_add_delayed_dependency (struct rspamd_symcache *cache,
  2203. const gchar *from, const gchar *to)
  2204. {
  2205. struct delayed_cache_dependency *ddep;
  2206. g_assert (from != NULL);
  2207. g_assert (to != NULL);
  2208. ddep = g_malloc0 (sizeof (*ddep));
  2209. ddep->from = g_strdup (from);
  2210. ddep->to = g_strdup (to);
  2211. cache->delayed_deps = g_list_prepend (cache->delayed_deps, ddep);
  2212. }
  2213. gint
  2214. rspamd_symcache_find_symbol (struct rspamd_symcache *cache, const gchar *name)
  2215. {
  2216. struct rspamd_symcache_item *item;
  2217. g_assert (cache != NULL);
  2218. if (name == NULL) {
  2219. return -1;
  2220. }
  2221. item = g_hash_table_lookup (cache->items_by_symbol, name);
  2222. if (item != NULL) {
  2223. return item->id;
  2224. }
  2225. return -1;
  2226. }
  2227. gboolean
  2228. rspamd_symcache_stat_symbol (struct rspamd_symcache *cache,
  2229. const gchar *name,
  2230. gdouble *frequency,
  2231. gdouble *freq_stddev,
  2232. gdouble *tm,
  2233. guint *nhits)
  2234. {
  2235. struct rspamd_symcache_item *item;
  2236. g_assert (cache != NULL);
  2237. if (name == NULL) {
  2238. return FALSE;
  2239. }
  2240. item = g_hash_table_lookup (cache->items_by_symbol, name);
  2241. if (item != NULL) {
  2242. *frequency = item->st->avg_frequency;
  2243. *freq_stddev = sqrt (item->st->stddev_frequency);
  2244. *tm = item->st->time_counter.mean;
  2245. if (nhits) {
  2246. *nhits = item->st->hits;
  2247. }
  2248. return TRUE;
  2249. }
  2250. return FALSE;
  2251. }
  2252. const gchar *
  2253. rspamd_symcache_symbol_by_id (struct rspamd_symcache *cache,
  2254. gint id)
  2255. {
  2256. struct rspamd_symcache_item *item;
  2257. g_assert (cache != NULL);
  2258. if (id < 0 || id >= (gint)cache->items_by_id->len) {
  2259. return NULL;
  2260. }
  2261. item = g_ptr_array_index (cache->items_by_id, id);
  2262. return item->symbol;
  2263. }
  2264. guint
  2265. rspamd_symcache_stats_symbols_count (struct rspamd_symcache *cache)
  2266. {
  2267. g_assert (cache != NULL);
  2268. return cache->stats_symbols_count;
  2269. }
  2270. void
  2271. rspamd_symcache_disable_all_symbols (struct rspamd_task *task,
  2272. struct rspamd_symcache *cache,
  2273. guint skip_mask)
  2274. {
  2275. struct cache_savepoint *checkpoint;
  2276. guint i;
  2277. struct rspamd_symcache_item *item;
  2278. struct rspamd_symcache_dynamic_item *dyn_item;
  2279. if (task->checkpoint == NULL) {
  2280. checkpoint = rspamd_symcache_make_checkpoint (task, cache);
  2281. task->checkpoint = checkpoint;
  2282. }
  2283. else {
  2284. checkpoint = task->checkpoint;
  2285. }
  2286. /* Enable for squeezed symbols */
  2287. PTR_ARRAY_FOREACH (cache->items_by_id, i, item) {
  2288. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2289. if (!(item->type & (skip_mask))) {
  2290. SET_FINISH_BIT (checkpoint, dyn_item);
  2291. SET_START_BIT (checkpoint, dyn_item);
  2292. }
  2293. }
  2294. }
  2295. static void
  2296. rspamd_symcache_disable_symbol_checkpoint (struct rspamd_task *task,
  2297. struct rspamd_symcache *cache, const gchar *symbol)
  2298. {
  2299. struct cache_savepoint *checkpoint;
  2300. struct rspamd_symcache_item *item;
  2301. struct rspamd_symcache_dynamic_item *dyn_item;
  2302. if (task->checkpoint == NULL) {
  2303. checkpoint = rspamd_symcache_make_checkpoint (task, cache);
  2304. task->checkpoint = checkpoint;
  2305. }
  2306. else {
  2307. checkpoint = task->checkpoint;
  2308. }
  2309. item = rspamd_symcache_find_filter (cache, symbol, true);
  2310. if (item) {
  2311. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2312. SET_FINISH_BIT (checkpoint, dyn_item);
  2313. SET_START_BIT (checkpoint, dyn_item);
  2314. msg_debug_cache_task ("disable execution of %s", symbol);
  2315. }
  2316. else {
  2317. msg_info_task ("cannot disable %s: not found", symbol);
  2318. }
  2319. }
  2320. static void
  2321. rspamd_symcache_enable_symbol_checkpoint (struct rspamd_task *task,
  2322. struct rspamd_symcache *cache, const gchar *symbol)
  2323. {
  2324. struct cache_savepoint *checkpoint;
  2325. struct rspamd_symcache_item *item;
  2326. struct rspamd_symcache_dynamic_item *dyn_item;
  2327. if (task->checkpoint == NULL) {
  2328. checkpoint = rspamd_symcache_make_checkpoint (task, cache);
  2329. task->checkpoint = checkpoint;
  2330. }
  2331. else {
  2332. checkpoint = task->checkpoint;
  2333. }
  2334. item = rspamd_symcache_find_filter (cache, symbol, true);
  2335. if (item) {
  2336. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2337. dyn_item->finished = 0;
  2338. dyn_item->started = 0;
  2339. msg_debug_cache_task ("enable execution of %s", symbol);
  2340. }
  2341. else {
  2342. msg_info_task ("cannot enable %s: not found", symbol);
  2343. }
  2344. }
  2345. struct rspamd_abstract_callback_data*
  2346. rspamd_symcache_get_cbdata (struct rspamd_symcache *cache,
  2347. const gchar *symbol)
  2348. {
  2349. struct rspamd_symcache_item *item;
  2350. g_assert (cache != NULL);
  2351. g_assert (symbol != NULL);
  2352. item = rspamd_symcache_find_filter (cache, symbol, true);
  2353. if (item) {
  2354. return item->specific.normal.user_data;
  2355. }
  2356. return NULL;
  2357. }
  2358. gboolean
  2359. rspamd_symcache_is_checked (struct rspamd_task *task,
  2360. struct rspamd_symcache *cache, const gchar *symbol)
  2361. {
  2362. struct cache_savepoint *checkpoint;
  2363. struct rspamd_symcache_item *item;
  2364. struct rspamd_symcache_dynamic_item *dyn_item;
  2365. g_assert (cache != NULL);
  2366. g_assert (symbol != NULL);
  2367. if (task->checkpoint == NULL) {
  2368. checkpoint = rspamd_symcache_make_checkpoint (task, cache);
  2369. task->checkpoint = checkpoint;
  2370. }
  2371. else {
  2372. checkpoint = task->checkpoint;
  2373. }
  2374. item = rspamd_symcache_find_filter (cache, symbol, true);
  2375. if (item) {
  2376. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2377. return dyn_item->started;
  2378. }
  2379. return FALSE;
  2380. }
  2381. void
  2382. rspamd_symcache_disable_symbol_perm (struct rspamd_symcache *cache,
  2383. const gchar *symbol,
  2384. gboolean resolve_parent)
  2385. {
  2386. struct rspamd_symcache_item *item;
  2387. g_assert (cache != NULL);
  2388. g_assert (symbol != NULL);
  2389. item = rspamd_symcache_find_filter (cache, symbol, resolve_parent);
  2390. if (item) {
  2391. item->enabled = FALSE;
  2392. }
  2393. }
  2394. void
  2395. rspamd_symcache_enable_symbol_perm (struct rspamd_symcache *cache,
  2396. const gchar *symbol)
  2397. {
  2398. struct rspamd_symcache_item *item;
  2399. g_assert (cache != NULL);
  2400. g_assert (symbol != NULL);
  2401. item = rspamd_symcache_find_filter (cache, symbol, true);
  2402. if (item) {
  2403. item->enabled = TRUE;
  2404. }
  2405. }
  2406. guint64
  2407. rspamd_symcache_get_cksum (struct rspamd_symcache *cache)
  2408. {
  2409. g_assert (cache != NULL);
  2410. return cache->cksum;
  2411. }
  2412. gboolean
  2413. rspamd_symcache_is_symbol_enabled (struct rspamd_task *task,
  2414. struct rspamd_symcache *cache,
  2415. const gchar *symbol)
  2416. {
  2417. struct cache_savepoint *checkpoint;
  2418. struct rspamd_symcache_item *item;
  2419. struct rspamd_symcache_dynamic_item *dyn_item;
  2420. lua_State *L;
  2421. struct rspamd_task **ptask;
  2422. gboolean ret = TRUE;
  2423. g_assert (cache != NULL);
  2424. g_assert (symbol != NULL);
  2425. checkpoint = task->checkpoint;
  2426. if (checkpoint) {
  2427. item = rspamd_symcache_find_filter (cache, symbol, true);
  2428. if (item) {
  2429. if (!rspamd_symcache_is_item_allowed (task, item, TRUE)) {
  2430. ret = FALSE;
  2431. }
  2432. else {
  2433. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2434. if (CHECK_START_BIT (checkpoint, dyn_item)) {
  2435. ret = FALSE;
  2436. }
  2437. else {
  2438. if (item->specific.normal.conditions) {
  2439. struct rspamd_symcache_condition *cur_cond;
  2440. DL_FOREACH (item->specific.normal.conditions, cur_cond) {
  2441. /*
  2442. * We also executes condition callback to check
  2443. * if we need this symbol
  2444. */
  2445. L = task->cfg->lua_state;
  2446. lua_rawgeti (L, LUA_REGISTRYINDEX, cur_cond->cb);
  2447. ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
  2448. rspamd_lua_setclass (L, "rspamd{task}", -1);
  2449. *ptask = task;
  2450. if (lua_pcall (L, 1, 1, 0) != 0) {
  2451. msg_info_task ("call to condition for %s failed: %s",
  2452. item->symbol, lua_tostring (L, -1));
  2453. lua_pop (L, 1);
  2454. }
  2455. else {
  2456. ret = lua_toboolean (L, -1);
  2457. lua_pop (L, 1);
  2458. }
  2459. if (!ret) {
  2460. break;
  2461. }
  2462. }
  2463. }
  2464. }
  2465. }
  2466. }
  2467. }
  2468. return ret;
  2469. }
  2470. gboolean
  2471. rspamd_symcache_enable_symbol (struct rspamd_task *task,
  2472. struct rspamd_symcache *cache,
  2473. const gchar *symbol)
  2474. {
  2475. struct cache_savepoint *checkpoint;
  2476. struct rspamd_symcache_item *item;
  2477. struct rspamd_symcache_dynamic_item *dyn_item;
  2478. gboolean ret = FALSE;
  2479. g_assert (cache != NULL);
  2480. g_assert (symbol != NULL);
  2481. checkpoint = task->checkpoint;
  2482. if (checkpoint) {
  2483. item = rspamd_symcache_find_filter (cache, symbol, true);
  2484. if (item) {
  2485. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2486. if (!CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  2487. ret = TRUE;
  2488. CLR_START_BIT (checkpoint, dyn_item);
  2489. CLR_FINISH_BIT (checkpoint, dyn_item);
  2490. }
  2491. else {
  2492. msg_debug_task ("cannot enable symbol %s: already started", symbol);
  2493. }
  2494. }
  2495. }
  2496. return ret;
  2497. }
  2498. gboolean
  2499. rspamd_symcache_disable_symbol (struct rspamd_task *task,
  2500. struct rspamd_symcache *cache,
  2501. const gchar *symbol)
  2502. {
  2503. struct cache_savepoint *checkpoint;
  2504. struct rspamd_symcache_item *item;
  2505. struct rspamd_symcache_dynamic_item *dyn_item;
  2506. gboolean ret = FALSE;
  2507. g_assert (cache != NULL);
  2508. g_assert (symbol != NULL);
  2509. checkpoint = task->checkpoint;
  2510. if (checkpoint) {
  2511. item = rspamd_symcache_find_filter (cache, symbol, true);
  2512. if (item) {
  2513. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2514. if (!CHECK_START_BIT (checkpoint, dyn_item)) {
  2515. ret = TRUE;
  2516. SET_START_BIT (checkpoint, dyn_item);
  2517. SET_FINISH_BIT (checkpoint, dyn_item);
  2518. }
  2519. else {
  2520. if (!CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  2521. msg_warn_task ("cannot disable symbol %s: already started",
  2522. symbol);
  2523. }
  2524. }
  2525. }
  2526. }
  2527. return ret;
  2528. }
  2529. void
  2530. rspamd_symcache_foreach (struct rspamd_symcache *cache,
  2531. void (*func) (struct rspamd_symcache_item *, gpointer),
  2532. gpointer ud)
  2533. {
  2534. struct rspamd_symcache_item *item;
  2535. GHashTableIter it;
  2536. gpointer k, v;
  2537. g_hash_table_iter_init (&it, cache->items_by_symbol);
  2538. while (g_hash_table_iter_next (&it, &k, &v)) {
  2539. item = (struct rspamd_symcache_item *)v;
  2540. func (item, ud);
  2541. }
  2542. }
  2543. struct rspamd_symcache_item *
  2544. rspamd_symcache_get_cur_item (struct rspamd_task *task)
  2545. {
  2546. struct cache_savepoint *checkpoint = task->checkpoint;
  2547. if (checkpoint == NULL) {
  2548. return NULL;
  2549. }
  2550. return checkpoint->cur_item;
  2551. }
  2552. /**
  2553. * Replaces the current item being processed.
  2554. * Returns the current item being processed (if any)
  2555. * @param task
  2556. * @param item
  2557. * @return
  2558. */
  2559. struct rspamd_symcache_item *
  2560. rspamd_symcache_set_cur_item (struct rspamd_task *task,
  2561. struct rspamd_symcache_item *item)
  2562. {
  2563. struct cache_savepoint *checkpoint = task->checkpoint;
  2564. struct rspamd_symcache_item *ex;
  2565. ex = checkpoint->cur_item;
  2566. checkpoint->cur_item = item;
  2567. return ex;
  2568. }
  2569. struct rspamd_symcache_delayed_cbdata {
  2570. struct rspamd_symcache_item *item;
  2571. struct rspamd_task *task;
  2572. struct rspamd_async_event *event;
  2573. struct ev_timer tm;
  2574. };
  2575. static void
  2576. rspamd_symcache_delayed_item_fin (gpointer ud)
  2577. {
  2578. struct rspamd_symcache_delayed_cbdata *cbd =
  2579. (struct rspamd_symcache_delayed_cbdata *)ud;
  2580. struct rspamd_task *task;
  2581. struct cache_savepoint *checkpoint;
  2582. task = cbd->task;
  2583. checkpoint = task->checkpoint;
  2584. checkpoint->has_slow = FALSE;
  2585. ev_timer_stop (task->event_loop, &cbd->tm);
  2586. }
  2587. static void
  2588. rspamd_symcache_delayed_item_cb (EV_P_ ev_timer *w, int what)
  2589. {
  2590. struct rspamd_symcache_delayed_cbdata *cbd =
  2591. (struct rspamd_symcache_delayed_cbdata *)w->data;
  2592. struct rspamd_symcache_item *item;
  2593. struct rspamd_task *task;
  2594. struct cache_dependency *rdep;
  2595. struct cache_savepoint *checkpoint;
  2596. struct rspamd_symcache_dynamic_item *dyn_item;
  2597. guint i;
  2598. item = cbd->item;
  2599. task = cbd->task;
  2600. checkpoint = task->checkpoint;
  2601. cbd->event = NULL;
  2602. /* Timer will be stopped here */
  2603. rspamd_session_remove_event (task->s,
  2604. rspamd_symcache_delayed_item_fin, cbd);
  2605. /* Process all reverse dependencies */
  2606. PTR_ARRAY_FOREACH (item->rdeps, i, rdep) {
  2607. if (rdep->item) {
  2608. dyn_item = rspamd_symcache_get_dynamic (checkpoint, rdep->item);
  2609. if (!CHECK_START_BIT (checkpoint, dyn_item)) {
  2610. msg_debug_cache_task ("check item %d(%s) rdep of %s ",
  2611. rdep->item->id, rdep->item->symbol, item->symbol);
  2612. if (!rspamd_symcache_check_deps (task, task->cfg->cache,
  2613. rdep->item,
  2614. checkpoint, 0, FALSE)) {
  2615. msg_debug_cache_task ("blocked execution of %d(%s) rdep of %s "
  2616. "unless deps are resolved",
  2617. rdep->item->id, rdep->item->symbol, item->symbol);
  2618. }
  2619. else {
  2620. rspamd_symcache_check_symbol (task, task->cfg->cache,
  2621. rdep->item,
  2622. checkpoint);
  2623. }
  2624. }
  2625. }
  2626. }
  2627. }
  2628. static void
  2629. rspamd_delayed_timer_dtor (gpointer d)
  2630. {
  2631. struct rspamd_symcache_delayed_cbdata *cbd =
  2632. (struct rspamd_symcache_delayed_cbdata *)d;
  2633. if (cbd->event) {
  2634. /* Event has not been executed */
  2635. rspamd_session_remove_event (cbd->task->s,
  2636. rspamd_symcache_delayed_item_fin, cbd);
  2637. cbd->event = NULL;
  2638. }
  2639. }
  2640. /**
  2641. * Finalize the current async element potentially calling its deps
  2642. */
  2643. void
  2644. rspamd_symcache_finalize_item (struct rspamd_task *task,
  2645. struct rspamd_symcache_item *item)
  2646. {
  2647. struct cache_savepoint *checkpoint = task->checkpoint;
  2648. struct cache_dependency *rdep;
  2649. struct rspamd_symcache_dynamic_item *dyn_item;
  2650. gdouble diff;
  2651. guint i;
  2652. gboolean enable_slow_timer = FALSE;
  2653. const gdouble slow_diff_limit = 300;
  2654. /* Sanity checks */
  2655. g_assert (checkpoint->items_inflight > 0);
  2656. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2657. if (dyn_item->async_events > 0) {
  2658. /*
  2659. * XXX: Race condition
  2660. *
  2661. * It is possible that some async event is still in flight, but we
  2662. * already know its result, however, it is the responsibility of that
  2663. * event to decrease async events count and call this function
  2664. * one more time
  2665. */
  2666. msg_debug_cache_task ("postpone finalisation of %s(%d) as there are %d "
  2667. "async events pendning",
  2668. item->symbol, item->id, dyn_item->async_events);
  2669. return;
  2670. }
  2671. msg_debug_cache_task ("process finalize for item %s(%d)", item->symbol, item->id);
  2672. SET_FINISH_BIT (checkpoint, dyn_item);
  2673. checkpoint->items_inflight --;
  2674. checkpoint->cur_item = NULL;
  2675. if (checkpoint->profile) {
  2676. ev_now_update_if_cheap (task->event_loop);
  2677. diff = ((ev_now (task->event_loop) - checkpoint->profile_start) * 1e3 -
  2678. dyn_item->start_msec);
  2679. if (diff > slow_diff_limit) {
  2680. if (!checkpoint->has_slow) {
  2681. checkpoint->has_slow = TRUE;
  2682. enable_slow_timer = TRUE;
  2683. msg_info_task ("slow rule: %s(%d): %.2f ms; enable slow timer delay",
  2684. item->symbol, item->id,
  2685. diff);
  2686. }
  2687. else {
  2688. msg_info_task ("slow rule: %s(%d): %.2f ms",
  2689. item->symbol, item->id,
  2690. diff);
  2691. }
  2692. }
  2693. if (G_UNLIKELY (RSPAMD_TASK_IS_PROFILING (task))) {
  2694. rspamd_task_profile_set (task, item->symbol, diff);
  2695. }
  2696. if (rspamd_worker_is_scanner (task->worker)) {
  2697. rspamd_set_counter (item->cd, diff);
  2698. }
  2699. }
  2700. if (enable_slow_timer) {
  2701. struct rspamd_symcache_delayed_cbdata *cbd =
  2702. rspamd_mempool_alloc (task->task_pool,sizeof (*cbd));
  2703. /* Add timer to allow something else to be executed */
  2704. ev_timer *tm = &cbd->tm;
  2705. cbd->event = rspamd_session_add_event (task->s,
  2706. rspamd_symcache_delayed_item_fin, cbd,
  2707. "symcache");
  2708. /*
  2709. * If no event could be added, then we are already in the destruction
  2710. * phase. So the main issue is to deal with has slow here
  2711. */
  2712. if (cbd->event) {
  2713. ev_timer_init (tm, rspamd_symcache_delayed_item_cb, 0.1, 0.0);
  2714. ev_set_priority (tm, EV_MINPRI);
  2715. rspamd_mempool_add_destructor (task->task_pool,
  2716. rspamd_delayed_timer_dtor, cbd);
  2717. cbd->task = task;
  2718. cbd->item = item;
  2719. tm->data = cbd;
  2720. ev_timer_start (task->event_loop, tm);
  2721. }
  2722. else {
  2723. /* Just reset as no timer is added */
  2724. checkpoint->has_slow = FALSE;
  2725. }
  2726. return;
  2727. }
  2728. /* Process all reverse dependencies */
  2729. PTR_ARRAY_FOREACH (item->rdeps, i, rdep) {
  2730. if (rdep->item) {
  2731. dyn_item = rspamd_symcache_get_dynamic (checkpoint, rdep->item);
  2732. if (!CHECK_START_BIT (checkpoint, dyn_item)) {
  2733. msg_debug_cache_task ("check item %d(%s) rdep of %s ",
  2734. rdep->item->id, rdep->item->symbol, item->symbol);
  2735. if (!rspamd_symcache_check_deps (task, task->cfg->cache,
  2736. rdep->item,
  2737. checkpoint, 0, FALSE)) {
  2738. msg_debug_cache_task ("blocked execution of %d(%s) rdep of %s "
  2739. "unless deps are resolved",
  2740. rdep->item->id, rdep->item->symbol, item->symbol);
  2741. }
  2742. else {
  2743. rspamd_symcache_check_symbol (task, task->cfg->cache,
  2744. rdep->item,
  2745. checkpoint);
  2746. }
  2747. }
  2748. }
  2749. }
  2750. }
  2751. guint
  2752. rspamd_symcache_item_async_inc_full (struct rspamd_task *task,
  2753. struct rspamd_symcache_item *item,
  2754. const gchar *subsystem,
  2755. const gchar *loc)
  2756. {
  2757. struct rspamd_symcache_dynamic_item *dyn_item;
  2758. struct cache_savepoint *checkpoint = task->checkpoint;
  2759. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2760. msg_debug_cache_task ("increase async events counter for %s(%d) = %d + 1; "
  2761. "subsystem %s (%s)",
  2762. item->symbol, item->id, dyn_item->async_events, subsystem, loc);
  2763. return ++dyn_item->async_events;
  2764. }
  2765. guint
  2766. rspamd_symcache_item_async_dec_full (struct rspamd_task *task,
  2767. struct rspamd_symcache_item *item,
  2768. const gchar *subsystem,
  2769. const gchar *loc)
  2770. {
  2771. struct rspamd_symcache_dynamic_item *dyn_item;
  2772. struct cache_savepoint *checkpoint = task->checkpoint;
  2773. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2774. msg_debug_cache_task ("decrease async events counter for %s(%d) = %d - 1; "
  2775. "subsystem %s (%s)",
  2776. item->symbol, item->id, dyn_item->async_events, subsystem, loc);
  2777. g_assert (dyn_item->async_events > 0);
  2778. return --dyn_item->async_events;
  2779. }
  2780. gboolean
  2781. rspamd_symcache_item_async_dec_check_full (struct rspamd_task *task,
  2782. struct rspamd_symcache_item *item,
  2783. const gchar *subsystem,
  2784. const gchar *loc)
  2785. {
  2786. if (rspamd_symcache_item_async_dec_full (task, item, subsystem, loc) == 0) {
  2787. rspamd_symcache_finalize_item (task, item);
  2788. return TRUE;
  2789. }
  2790. return FALSE;
  2791. }
  2792. gboolean
  2793. rspamd_symcache_add_symbol_flags (struct rspamd_symcache *cache,
  2794. const gchar *symbol,
  2795. guint flags)
  2796. {
  2797. struct rspamd_symcache_item *item;
  2798. g_assert (cache != NULL);
  2799. g_assert (symbol != NULL);
  2800. item = rspamd_symcache_find_filter (cache, symbol, true);
  2801. if (item) {
  2802. item->type |= flags;
  2803. return TRUE;
  2804. }
  2805. return FALSE;
  2806. }
  2807. gboolean
  2808. rspamd_symcache_set_symbol_flags (struct rspamd_symcache *cache,
  2809. const gchar *symbol,
  2810. guint flags)
  2811. {
  2812. struct rspamd_symcache_item *item;
  2813. g_assert (cache != NULL);
  2814. g_assert (symbol != NULL);
  2815. item = rspamd_symcache_find_filter (cache, symbol, true);
  2816. if (item) {
  2817. item->type = flags;
  2818. return TRUE;
  2819. }
  2820. return FALSE;
  2821. }
  2822. guint
  2823. rspamd_symcache_get_symbol_flags (struct rspamd_symcache *cache,
  2824. const gchar *symbol)
  2825. {
  2826. struct rspamd_symcache_item *item;
  2827. g_assert (cache != NULL);
  2828. g_assert (symbol != NULL);
  2829. item = rspamd_symcache_find_filter (cache, symbol, true);
  2830. if (item) {
  2831. return item->type;
  2832. }
  2833. return 0;
  2834. }
  2835. void
  2836. rspamd_symcache_composites_foreach (struct rspamd_task *task,
  2837. struct rspamd_symcache *cache,
  2838. GHFunc func,
  2839. gpointer fd)
  2840. {
  2841. guint i;
  2842. struct rspamd_symcache_item *item;
  2843. struct rspamd_symcache_dynamic_item *dyn_item;
  2844. if (task->checkpoint == NULL) {
  2845. return;
  2846. }
  2847. PTR_ARRAY_FOREACH (cache->composites, i, item) {
  2848. dyn_item = rspamd_symcache_get_dynamic (task->checkpoint, item);
  2849. if (!CHECK_START_BIT (task->checkpoint, dyn_item)) {
  2850. /* Cannot do it due to 2 passes */
  2851. /* SET_START_BIT (task->checkpoint, dyn_item); */
  2852. func (item->symbol, item->specific.normal.user_data, fd);
  2853. SET_FINISH_BIT (task->checkpoint, dyn_item);
  2854. }
  2855. }
  2856. }
  2857. bool
  2858. rspamd_symcache_set_allowed_settings_ids (struct rspamd_symcache *cache,
  2859. const gchar *symbol,
  2860. const guint32 *ids,
  2861. guint nids)
  2862. {
  2863. struct rspamd_symcache_item *item;
  2864. item = rspamd_symcache_find_filter (cache, symbol, false);
  2865. if (item == NULL) {
  2866. return false;
  2867. }
  2868. if (nids <= G_N_ELEMENTS (item->allowed_ids.st)) {
  2869. /* Use static version */
  2870. memset (&item->allowed_ids, 0, sizeof (item->allowed_ids));
  2871. for (guint i = 0; i < nids; i++) {
  2872. item->allowed_ids.st[i] = ids[i];
  2873. }
  2874. }
  2875. else {
  2876. /* Need to use a separate list */
  2877. item->allowed_ids.dyn.e = -1; /* Flag */
  2878. item->allowed_ids.dyn.n = rspamd_mempool_alloc (cache->static_pool,
  2879. sizeof (guint32) * nids);
  2880. item->allowed_ids.dyn.len = nids;
  2881. item->allowed_ids.dyn.allocated = nids;
  2882. for (guint i = 0; i < nids; i++) {
  2883. item->allowed_ids.dyn.n[i] = ids[i];
  2884. }
  2885. /* Keep sorted */
  2886. qsort (item->allowed_ids.dyn.n, nids, sizeof (guint32), rspamd_id_cmp);
  2887. }
  2888. return true;
  2889. }
  2890. bool
  2891. rspamd_symcache_set_forbidden_settings_ids (struct rspamd_symcache *cache,
  2892. const gchar *symbol,
  2893. const guint32 *ids,
  2894. guint nids)
  2895. {
  2896. struct rspamd_symcache_item *item;
  2897. item = rspamd_symcache_find_filter (cache, symbol, false);
  2898. if (item == NULL) {
  2899. return false;
  2900. }
  2901. g_assert (nids < G_MAXUINT16);
  2902. if (nids <= G_N_ELEMENTS (item->forbidden_ids.st)) {
  2903. /* Use static version */
  2904. memset (&item->forbidden_ids, 0, sizeof (item->forbidden_ids));
  2905. for (guint i = 0; i < nids; i++) {
  2906. item->forbidden_ids.st[i] = ids[i];
  2907. }
  2908. }
  2909. else {
  2910. /* Need to use a separate list */
  2911. item->forbidden_ids.dyn.e = -1; /* Flag */
  2912. item->forbidden_ids.dyn.n = rspamd_mempool_alloc (cache->static_pool,
  2913. sizeof (guint32) * nids);
  2914. item->forbidden_ids.dyn.len = nids;
  2915. item->forbidden_ids.dyn.allocated = nids;
  2916. for (guint i = 0; i < nids; i++) {
  2917. item->forbidden_ids.dyn.n[i] = ids[i];
  2918. }
  2919. /* Keep sorted */
  2920. qsort (item->forbidden_ids.dyn.n, nids, sizeof (guint32), rspamd_id_cmp);
  2921. }
  2922. return true;
  2923. }
  2924. const guint32*
  2925. rspamd_symcache_get_allowed_settings_ids (struct rspamd_symcache *cache,
  2926. const gchar *symbol,
  2927. guint *nids)
  2928. {
  2929. struct rspamd_symcache_item *item;
  2930. guint cnt = 0;
  2931. item = rspamd_symcache_find_filter (cache, symbol, false);
  2932. if (item == NULL) {
  2933. return NULL;
  2934. }
  2935. if (item->allowed_ids.dyn.e == -1) {
  2936. /* Dynamic list */
  2937. *nids = item->allowed_ids.dyn.len;
  2938. return item->allowed_ids.dyn.n;
  2939. }
  2940. else {
  2941. while (item->allowed_ids.st[cnt] != 0 && cnt < G_N_ELEMENTS (item->allowed_ids.st)) {
  2942. cnt ++;
  2943. }
  2944. *nids = cnt;
  2945. return item->allowed_ids.st;
  2946. }
  2947. }
  2948. const guint32*
  2949. rspamd_symcache_get_forbidden_settings_ids (struct rspamd_symcache *cache,
  2950. const gchar *symbol,
  2951. guint *nids)
  2952. {
  2953. struct rspamd_symcache_item *item;
  2954. guint cnt = 0;
  2955. item = rspamd_symcache_find_filter (cache, symbol, false);
  2956. if (item == NULL) {
  2957. return NULL;
  2958. }
  2959. if (item->forbidden_ids.dyn.e == -1) {
  2960. /* Dynamic list */
  2961. *nids = item->allowed_ids.dyn.len;
  2962. return item->allowed_ids.dyn.n;
  2963. }
  2964. else {
  2965. while (item->forbidden_ids.st[cnt] != 0 && cnt < G_N_ELEMENTS (item->allowed_ids.st)) {
  2966. cnt ++;
  2967. }
  2968. *nids = cnt;
  2969. return item->forbidden_ids.st;
  2970. }
  2971. }
  2972. /* Insertion sort: usable for near-sorted ids list */
  2973. static inline void
  2974. rspamd_ids_insertion_sort (guint *a, guint n)
  2975. {
  2976. for (guint i = 1; i < n; i++) {
  2977. guint32 tmp = a[i];
  2978. guint j = i;
  2979. while (j > 0 && tmp < a[j - 1]) {
  2980. a[j] = a[j - 1];
  2981. j --;
  2982. }
  2983. a[j] = tmp;
  2984. }
  2985. }
  2986. static inline void
  2987. rspamd_symcache_add_id_to_list (rspamd_mempool_t *pool,
  2988. struct rspamd_symcache_id_list *ls,
  2989. guint32 id)
  2990. {
  2991. guint cnt = 0;
  2992. guint *new_array;
  2993. if (ls->st[0] == -1) {
  2994. /* Dynamic array */
  2995. if (ls->dyn.len < ls->dyn.allocated) {
  2996. /* Trivial, append + sort */
  2997. ls->dyn.n[ls->dyn.len++] = id;
  2998. }
  2999. else {
  3000. /* Reallocate */
  3001. g_assert (ls->dyn.allocated <= G_MAXINT16);
  3002. ls->dyn.allocated *= 2;
  3003. new_array = rspamd_mempool_alloc (pool,
  3004. ls->dyn.allocated * sizeof (guint32));
  3005. memcpy (new_array, ls->dyn.n, ls->dyn.len * sizeof (guint32));
  3006. ls->dyn.n = new_array;
  3007. ls->dyn.n[ls->dyn.len++] = id;
  3008. }
  3009. rspamd_ids_insertion_sort (ls->dyn.n, ls->dyn.len);
  3010. }
  3011. else {
  3012. /* Static part */
  3013. while (ls->st[cnt] != 0 && cnt < G_N_ELEMENTS (ls->st)) {
  3014. cnt ++;
  3015. }
  3016. if (cnt < G_N_ELEMENTS (ls->st)) {
  3017. ls->st[cnt] = id;
  3018. }
  3019. else {
  3020. /* Switch to dynamic */
  3021. new_array = rspamd_mempool_alloc (pool,
  3022. G_N_ELEMENTS (ls->st) * 2 * sizeof (guint32));
  3023. memcpy (new_array, ls->st, G_N_ELEMENTS (ls->st) * sizeof (guint32));
  3024. ls->dyn.n = new_array;
  3025. ls->dyn.e = -1;
  3026. ls->dyn.allocated = G_N_ELEMENTS (ls->st) * 2;
  3027. ls->dyn.len = G_N_ELEMENTS (ls->st);
  3028. /* Recursively jump to dynamic branch that will handle insertion + sorting */
  3029. rspamd_symcache_add_id_to_list (pool, ls, id);
  3030. }
  3031. }
  3032. }
  3033. void
  3034. rspamd_symcache_process_settings_elt (struct rspamd_symcache *cache,
  3035. struct rspamd_config_settings_elt *elt)
  3036. {
  3037. guint32 id = elt->id;
  3038. ucl_object_iter_t iter;
  3039. struct rspamd_symcache_item *item, *parent;
  3040. const ucl_object_t *cur;
  3041. if (elt->symbols_disabled) {
  3042. /* Process denied symbols */
  3043. iter = NULL;
  3044. while ((cur = ucl_object_iterate (elt->symbols_disabled, &iter, true)) != NULL) {
  3045. const gchar *sym = ucl_object_key (cur);
  3046. item = rspamd_symcache_find_filter (cache, sym, false);
  3047. if (item) {
  3048. if (item->is_virtual) {
  3049. /*
  3050. * Virtual symbols are special:
  3051. * we ignore them in symcache but prevent them from being
  3052. * inserted.
  3053. */
  3054. rspamd_symcache_add_id_to_list (cache->static_pool,
  3055. &item->forbidden_ids, id);
  3056. msg_debug_cache ("deny virtual symbol %s for settings %ud (%s); "
  3057. "parent can still be executed",
  3058. sym, id, elt->name);
  3059. }
  3060. else {
  3061. /* Normal symbol, disable it */
  3062. rspamd_symcache_add_id_to_list (cache->static_pool,
  3063. &item->forbidden_ids, id);
  3064. msg_debug_cache ("deny symbol %s for settings %ud (%s)",
  3065. sym, id, elt->name);
  3066. }
  3067. }
  3068. else {
  3069. msg_warn_cache ("cannot find a symbol to disable %s "
  3070. "when processing settings %ud (%s)",
  3071. sym, id, elt->name);
  3072. }
  3073. }
  3074. }
  3075. if (elt->symbols_enabled) {
  3076. iter = NULL;
  3077. while ((cur = ucl_object_iterate (elt->symbols_enabled, &iter, true)) != NULL) {
  3078. /* Here, we resolve parent and explicitly allow it */
  3079. const gchar *sym = ucl_object_key (cur);
  3080. item = rspamd_symcache_find_filter (cache, sym, false);
  3081. if (item) {
  3082. if (item->is_virtual) {
  3083. if (!(item->type & SYMBOL_TYPE_GHOST)) {
  3084. parent = rspamd_symcache_find_filter (cache, sym, true);
  3085. if (parent) {
  3086. if (elt->symbols_disabled &&
  3087. ucl_object_lookup (elt->symbols_disabled, parent->symbol)) {
  3088. msg_err_cache ("conflict in %s: cannot enable disabled symbol %s, "
  3089. "wanted to enable symbol %s",
  3090. elt->name, parent->symbol, sym);
  3091. continue;
  3092. }
  3093. rspamd_symcache_add_id_to_list (cache->static_pool,
  3094. &parent->exec_only_ids, id);
  3095. msg_debug_cache ("allow just execution of symbol %s for settings %ud (%s)",
  3096. parent->symbol, id, elt->name);
  3097. }
  3098. }
  3099. /* Ignore ghosts */
  3100. }
  3101. rspamd_symcache_add_id_to_list (cache->static_pool,
  3102. &item->allowed_ids, id);
  3103. msg_debug_cache ("allow execution of symbol %s for settings %ud (%s)",
  3104. sym, id, elt->name);
  3105. }
  3106. else {
  3107. msg_warn_cache ("cannot find a symbol to enable %s "
  3108. "when processing settings %ud (%s)",
  3109. sym, id, elt->name);
  3110. }
  3111. }
  3112. }
  3113. }
  3114. gint
  3115. rspamd_symcache_item_flags (struct rspamd_symcache_item *item)
  3116. {
  3117. if (item) {
  3118. return item->type;
  3119. }
  3120. return 0;
  3121. }
  3122. const gchar*
  3123. rspamd_symcache_item_name (struct rspamd_symcache_item *item)
  3124. {
  3125. return item ? item->symbol : NULL;
  3126. }
  3127. const struct rspamd_symcache_item_stat *
  3128. rspamd_symcache_item_stat (struct rspamd_symcache_item *item)
  3129. {
  3130. return item ? item->st : NULL;
  3131. }
  3132. gboolean
  3133. rspamd_symcache_item_is_enabled (struct rspamd_symcache_item *item)
  3134. {
  3135. if (item) {
  3136. if (!item->enabled) {
  3137. return FALSE;
  3138. }
  3139. if (item->is_virtual && item->specific.virtual.parent_item != NULL) {
  3140. return rspamd_symcache_item_is_enabled (item->specific.virtual.parent_item);
  3141. }
  3142. return TRUE;
  3143. }
  3144. return FALSE;
  3145. }
  3146. struct rspamd_symcache_item * rspamd_symcache_item_get_parent (
  3147. struct rspamd_symcache_item *item)
  3148. {
  3149. if (item && item->is_virtual && item->specific.virtual.parent_item != NULL) {
  3150. return item->specific.virtual.parent_item;
  3151. }
  3152. return NULL;
  3153. }
  3154. const GPtrArray*
  3155. rspamd_symcache_item_get_deps (struct rspamd_symcache_item *item)
  3156. {
  3157. struct rspamd_symcache_item *parent;
  3158. if (item) {
  3159. parent = rspamd_symcache_item_get_parent (item);
  3160. if (parent) {
  3161. item = parent;
  3162. }
  3163. return item->deps;
  3164. }
  3165. return NULL;
  3166. }
  3167. const GPtrArray*
  3168. rspamd_symcache_item_get_rdeps (struct rspamd_symcache_item *item)
  3169. {
  3170. struct rspamd_symcache_item *parent;
  3171. if (item) {
  3172. parent = rspamd_symcache_item_get_parent (item);
  3173. if (parent) {
  3174. item = parent;
  3175. }
  3176. return item->rdeps;
  3177. }
  3178. return NULL;
  3179. }
  3180. void
  3181. rspamd_symcache_enable_profile (struct rspamd_task *task)
  3182. {
  3183. struct cache_savepoint *checkpoint = task->checkpoint;
  3184. if (checkpoint && !checkpoint->profile) {
  3185. ev_now_update_if_cheap (task->event_loop);
  3186. ev_tstamp now = ev_now (task->event_loop);
  3187. checkpoint->profile_start = now;
  3188. msg_debug_cache_task ("enable profiling of symbols for task");
  3189. checkpoint->profile = TRUE;
  3190. }
  3191. }