You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

symcache_internal.hxx 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652
  1. /*
  2. * Copyright 2023 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /**
  17. * Internal C++ structures and classes for symcache
  18. */
  19. #ifndef RSPAMD_SYMCACHE_INTERNAL_HXX
  20. #define RSPAMD_SYMCACHE_INTERNAL_HXX
  21. #pragma once
  22. #include <cmath>
  23. #include <cstdlib>
  24. #include <cstdint>
  25. #include <utility>
  26. #include <vector>
  27. #include <string>
  28. #include <string_view>
  29. #include <memory>
  30. #include <variant>
  31. #include "rspamd_symcache.h"
  32. #include "contrib/libev/ev.h"
  33. #include "contrib/ankerl/unordered_dense.h"
  34. #include "contrib/expected/expected.hpp"
  35. #include "cfg_file.h"
  36. #include "symcache_id_list.hxx"
  37. #define msg_err_cache(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
  38. "symcache", log_tag(), \
  39. RSPAMD_LOG_FUNC, \
  40. __VA_ARGS__)
  41. #define msg_err_cache_lambda(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
  42. "symcache", log_tag(), \
  43. log_func, \
  44. __VA_ARGS__)
  45. #define msg_err_cache_task(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
  46. "symcache", task->task_pool->tag.uid, \
  47. RSPAMD_LOG_FUNC, \
  48. __VA_ARGS__)
  49. #define msg_warn_cache(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
  50. "symcache", log_tag(), \
  51. RSPAMD_LOG_FUNC, \
  52. __VA_ARGS__)
  53. #define msg_info_cache(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
  54. "symcache", log_tag(), \
  55. RSPAMD_LOG_FUNC, \
  56. __VA_ARGS__)
  57. #define msg_debug_cache(...) rspamd_conditional_debug_fast(NULL, NULL, \
  58. ::rspamd::symcache::rspamd_symcache_log_id, "symcache", log_tag(), \
  59. RSPAMD_LOG_FUNC, \
  60. __VA_ARGS__)
  61. #define msg_debug_cache_lambda(...) rspamd_conditional_debug_fast(NULL, NULL, \
  62. ::rspamd::symcache::rspamd_symcache_log_id, "symcache", log_tag(), \
  63. log_func, \
  64. __VA_ARGS__)
  65. #define msg_debug_cache_task(...) rspamd_conditional_debug_fast(NULL, NULL, \
  66. ::rspamd::symcache::rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \
  67. RSPAMD_LOG_FUNC, \
  68. __VA_ARGS__)
  69. #define msg_debug_cache_task_lambda(...) rspamd_conditional_debug_fast(NULL, NULL, \
  70. ::rspamd::symcache::rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \
  71. log_func, \
  72. __VA_ARGS__)
  73. struct lua_State;
  74. namespace rspamd::symcache {
  75. /* Defined in symcache_impl.cxx */
  76. extern int rspamd_symcache_log_id;
  77. static const std::uint8_t symcache_magic[8] = {'r', 's', 'c', 2, 0, 0, 0, 0};
  78. struct symcache_header {
  79. std::uint8_t magic[8];
  80. unsigned int nitems;
  81. std::uint8_t checksum[64];
  82. std::uint8_t unused[128];
  83. };
  84. struct cache_item;
  85. using cache_item_ptr = std::shared_ptr<cache_item>;
  86. /**
  87. * This structure is intended to keep the current ordering for all symbols
  88. * It is designed to be shared among all tasks and keep references to the real
  89. * symbols.
  90. * If some symbol has been added or removed to the symbol cache, it will not affect
  91. * the current order, and it will only be regenerated for the subsequent tasks.
  92. * This allows safe and no copy sharing and keeping track of all symbols in the
  93. * cache runtime.
  94. */
  95. struct order_generation {
  96. /* All items ordered */
  97. std::vector<cache_item_ptr> d;
  98. /* Mapping from symbol name to the position in the order array */
  99. ankerl::unordered_dense::map<std::string_view, unsigned int> by_symbol;
  100. /* Mapping from symbol id to the position in the order array */
  101. ankerl::unordered_dense::map<unsigned int, unsigned int> by_cache_id;
  102. /* It matches cache->generation_id; if not, a fresh ordering is required */
  103. unsigned int generation_id;
  104. explicit order_generation(std::size_t nelts, unsigned id)
  105. : generation_id(id)
  106. {
  107. d.reserve(nelts);
  108. by_symbol.reserve(nelts);
  109. by_cache_id.reserve(nelts);
  110. }
  111. auto size() const -> auto
  112. {
  113. return d.size();
  114. }
  115. };
  116. using order_generation_ptr = std::shared_ptr<order_generation>;
  117. struct delayed_cache_dependency {
  118. std::string from;
  119. std::string to;
  120. delayed_cache_dependency(std::string_view _from, std::string_view _to)
  121. : from(_from), to(_to)
  122. {
  123. }
  124. };
  125. struct delayed_cache_condition {
  126. std::string sym;
  127. int cbref;
  128. lua_State *L;
  129. public:
  130. delayed_cache_condition(std::string_view sym, int cbref, lua_State *L)
  131. : sym(sym), cbref(cbref), L(L)
  132. {
  133. }
  134. };
  135. class delayed_symbol_elt {
  136. private:
  137. std::variant<std::string, rspamd_regexp_t *> content;
  138. public:
  139. /* Disable copy */
  140. delayed_symbol_elt() = delete;
  141. delayed_symbol_elt(const delayed_symbol_elt &) = delete;
  142. delayed_symbol_elt &operator=(const delayed_symbol_elt &) = delete;
  143. /* Enable move */
  144. delayed_symbol_elt(delayed_symbol_elt &&other) noexcept = default;
  145. delayed_symbol_elt &operator=(delayed_symbol_elt &&other) noexcept = default;
  146. explicit delayed_symbol_elt(std::string_view elt) noexcept
  147. {
  148. if (!elt.empty() && elt[0] == '/') {
  149. /* Possibly regexp */
  150. auto *re = rspamd_regexp_new_len(elt.data(), elt.size(), nullptr, nullptr);
  151. if (re != nullptr) {
  152. std::get<rspamd_regexp_t *>(content) = re;
  153. }
  154. else {
  155. std::get<std::string>(content) = elt;
  156. }
  157. }
  158. else {
  159. std::get<std::string>(content) = elt;
  160. }
  161. }
  162. ~delayed_symbol_elt()
  163. {
  164. if (std::holds_alternative<rspamd_regexp_t *>(content)) {
  165. rspamd_regexp_unref(std::get<rspamd_regexp_t *>(content));
  166. }
  167. }
  168. auto matches(std::string_view what) const -> bool
  169. {
  170. return std::visit([&](auto &elt) {
  171. using T = typeof(elt);
  172. if constexpr (std::is_same_v<T, rspamd_regexp_t *>) {
  173. if (rspamd_regexp_match(elt, what.data(), what.size(), false)) {
  174. return true;
  175. }
  176. }
  177. else if constexpr (std::is_same_v<T, std::string>) {
  178. return elt == what;
  179. }
  180. return false;
  181. },
  182. content);
  183. }
  184. auto to_string_view() const -> std::string_view
  185. {
  186. return std::visit([&](auto &elt) {
  187. using T = typeof(elt);
  188. if constexpr (std::is_same_v<T, rspamd_regexp_t *>) {
  189. return std::string_view{rspamd_regexp_get_pattern(elt)};
  190. }
  191. else if constexpr (std::is_same_v<T, std::string>) {
  192. return std::string_view{elt};
  193. }
  194. return std::string_view{};
  195. },
  196. content);
  197. }
  198. };
  199. struct delayed_symbol_elt_equal {
  200. using is_transparent = void;
  201. auto operator()(const delayed_symbol_elt &a, const delayed_symbol_elt &b) const
  202. {
  203. return a.to_string_view() == b.to_string_view();
  204. }
  205. auto operator()(const delayed_symbol_elt &a, const std::string_view &b) const
  206. {
  207. return a.to_string_view() == b;
  208. }
  209. auto operator()(const std::string_view &a, const delayed_symbol_elt &b) const
  210. {
  211. return a == b.to_string_view();
  212. }
  213. };
  214. struct delayed_symbol_elt_hash {
  215. using is_transparent = void;
  216. auto operator()(const delayed_symbol_elt &a) const
  217. {
  218. return ankerl::unordered_dense::hash<std::string_view>()(a.to_string_view());
  219. }
  220. auto operator()(const std::string_view &a) const
  221. {
  222. return ankerl::unordered_dense::hash<std::string_view>()(a);
  223. }
  224. };
  225. class symcache {
  226. private:
  227. using items_ptr_vec = std::vector<cache_item *>;
  228. /* Map indexed by symbol name: all symbols must have unique names, so this map holds ownership */
  229. ankerl::unordered_dense::map<std::string_view, cache_item *> items_by_symbol;
  230. ankerl::unordered_dense::map<int, cache_item_ptr> items_by_id;
  231. /* Items sorted into some order */
  232. order_generation_ptr items_by_order;
  233. unsigned int cur_order_gen;
  234. /* Specific vectors for execution/iteration */
  235. items_ptr_vec connfilters;
  236. items_ptr_vec prefilters;
  237. items_ptr_vec filters;
  238. items_ptr_vec postfilters;
  239. items_ptr_vec composites;
  240. items_ptr_vec idempotent;
  241. items_ptr_vec classifiers;
  242. items_ptr_vec virtual_symbols;
  243. /* These are stored within pointer to clean up after init */
  244. std::unique_ptr<std::vector<delayed_cache_dependency>> delayed_deps;
  245. std::unique_ptr<std::vector<delayed_cache_condition>> delayed_conditions;
  246. /* Delayed statically enabled or disabled symbols */
  247. using delayed_symbol_names = ankerl::unordered_dense::set<delayed_symbol_elt,
  248. delayed_symbol_elt_hash, delayed_symbol_elt_equal>;
  249. std::unique_ptr<delayed_symbol_names> disabled_symbols;
  250. std::unique_ptr<delayed_symbol_names> enabled_symbols;
  251. rspamd_mempool_t *static_pool;
  252. std::uint64_t cksum;
  253. double total_weight;
  254. std::size_t stats_symbols_count;
  255. private:
  256. std::uint64_t total_hits;
  257. struct rspamd_config *cfg;
  258. lua_State *L;
  259. double reload_time;
  260. double last_profile;
  261. private:
  262. int peak_cb;
  263. int cache_id;
  264. private:
  265. /* Internal methods */
  266. auto load_items() -> bool;
  267. auto resort() -> void;
  268. auto get_item_specific_vector(const cache_item &) -> items_ptr_vec &;
  269. /* Helper for g_hash_table_foreach */
  270. static auto metric_connect_cb(void *k, void *v, void *ud) -> void;
  271. public:
  272. explicit symcache(struct rspamd_config *cfg)
  273. : cfg(cfg)
  274. {
  275. /* XXX: do we need a special pool for symcache? I don't think so */
  276. static_pool = cfg->cfg_pool;
  277. reload_time = cfg->cache_reload_time;
  278. total_hits = 1;
  279. total_weight = 1.0;
  280. cksum = 0xdeadbabe;
  281. peak_cb = -1;
  282. cache_id = rspamd_random_uint64_fast();
  283. L = (lua_State *) cfg->lua_state;
  284. delayed_conditions = std::make_unique<std::vector<delayed_cache_condition>>();
  285. delayed_deps = std::make_unique<std::vector<delayed_cache_dependency>>();
  286. }
  287. virtual ~symcache();
  288. /**
  289. * Saves items on disk (if possible)
  290. * @return
  291. */
  292. auto save_items() const -> bool;
  293. /**
  294. * Get an item by ID
  295. * @param id
  296. * @param resolve_parent
  297. * @return
  298. */
  299. auto get_item_by_id(int id, bool resolve_parent) const -> const cache_item *;
  300. auto get_item_by_id_mut(int id, bool resolve_parent) const -> cache_item *;
  301. /**
  302. * Get an item by it's name
  303. * @param name
  304. * @param resolve_parent
  305. * @return
  306. */
  307. auto get_item_by_name(std::string_view name, bool resolve_parent) const -> const cache_item *;
  308. /**
  309. * Get an item by it's name, mutable pointer
  310. * @param name
  311. * @param resolve_parent
  312. * @return
  313. */
  314. auto get_item_by_name_mut(std::string_view name, bool resolve_parent) const -> cache_item *;
  315. /**
  316. * Add a direct dependency
  317. * @param id_from
  318. * @param to
  319. * @param virtual_id_from
  320. * @return
  321. */
  322. auto add_dependency(int id_from, std::string_view to, int virtual_id_from) -> void;
  323. /**
  324. * Add a delayed dependency between symbols that will be resolved on the init stage
  325. * @param from
  326. * @param to
  327. */
  328. auto add_delayed_dependency(std::string_view from, std::string_view to) -> void
  329. {
  330. if (!delayed_deps) {
  331. delayed_deps = std::make_unique<std::vector<delayed_cache_dependency>>();
  332. }
  333. delayed_deps->emplace_back(from, to);
  334. }
  335. /**
  336. * Adds a symbol to the list of the disabled symbols
  337. * @param sym
  338. * @return
  339. */
  340. auto disable_symbol_delayed(std::string_view sym) -> bool
  341. {
  342. if (!disabled_symbols) {
  343. disabled_symbols = std::make_unique<delayed_symbol_names>();
  344. }
  345. if (!disabled_symbols->contains(sym)) {
  346. disabled_symbols->emplace(sym);
  347. return true;
  348. }
  349. return false;
  350. }
  351. /**
  352. * Adds a symbol to the list of the enabled symbols
  353. * @param sym
  354. * @return
  355. */
  356. auto enable_symbol_delayed(std::string_view sym) -> bool
  357. {
  358. if (!enabled_symbols) {
  359. enabled_symbols = std::make_unique<delayed_symbol_names>();
  360. }
  361. if (!enabled_symbols->contains(sym)) {
  362. enabled_symbols->emplace(sym);
  363. return true;
  364. }
  365. return false;
  366. }
  367. /**
  368. * Initialises the symbols cache, must be called after all symbols are added
  369. * and the config file is loaded
  370. */
  371. auto init() -> bool;
  372. /**
  373. * Log helper that returns cfg checksum
  374. * @return
  375. */
  376. auto log_tag() const -> const char *
  377. {
  378. return cfg->checksum;
  379. }
  380. /**
  381. * Helper to return a memory pool associated with the cache
  382. * @return
  383. */
  384. auto get_pool() const
  385. {
  386. return static_pool;
  387. }
  388. /**
  389. * A method to add a generic symbol with a callback to couple with C API
  390. * @param name name of the symbol, unlike C API it must be "" for callback only (compat) symbols, in this case an automatic name is generated
  391. * @param priority
  392. * @param func
  393. * @param user_data
  394. * @param flags_and_type mix of flags and type in a messy C enum
  395. * @return id of a new symbol or -1 in case of failure
  396. */
  397. auto add_symbol_with_callback(std::string_view name,
  398. int priority,
  399. symbol_func_t func,
  400. void *user_data,
  401. int flags_and_type) -> int;
  402. /**
  403. * A method to add a generic virtual symbol with no function associated
  404. * @param name must have some value, or a fatal error will strike you
  405. * @param parent_id if this param is -1 then this symbol is associated with nothing
  406. * @param flags_and_type mix of flags and type in a messy C enum
  407. * @return id of a new symbol or -1 in case of failure
  408. */
  409. auto add_virtual_symbol(std::string_view name, int parent_id,
  410. int flags_and_type) -> int;
  411. /**
  412. * Sets a lua callback to be called on peaks in execution time
  413. * @param cbref
  414. */
  415. auto set_peak_cb(int cbref) -> void;
  416. /**
  417. * Add a delayed condition for a symbol that might not be registered yet
  418. * @param sym
  419. * @param cbref
  420. */
  421. auto add_delayed_condition(std::string_view sym, int cbref) -> void;
  422. /**
  423. * Returns number of symbols that needs to be checked in statistical algorithm
  424. * @return
  425. */
  426. auto get_stats_symbols_count() const
  427. {
  428. return stats_symbols_count;
  429. }
  430. /**
  431. * Returns a checksum for the cache
  432. * @return
  433. */
  434. auto get_cksum() const
  435. {
  436. return cksum;
  437. }
  438. /**
  439. * Validate symbols in the cache
  440. * @param strict
  441. * @return
  442. */
  443. auto validate(bool strict) -> bool;
  444. /**
  445. * Returns counters for the cache
  446. * @return
  447. */
  448. auto counters() const -> ucl_object_t *;
  449. /**
  450. * Adjusts stats of the cache for the periodic counter
  451. */
  452. auto periodic_resort(struct ev_loop *ev_loop, double cur_time, double last_resort) -> void;
  453. /**
  454. * A simple helper to get the reload time
  455. * @return
  456. */
  457. auto get_reload_time() const
  458. {
  459. return reload_time;
  460. };
  461. /**
  462. * Iterate over all symbols using a specific functor
  463. * @tparam Functor
  464. * @param f
  465. */
  466. template<typename Functor>
  467. auto symbols_foreach(Functor f) -> void
  468. {
  469. for (const auto &sym_it: items_by_symbol) {
  470. f(sym_it.second);
  471. }
  472. }
  473. /**
  474. * Iterate over all composites using a specific functor
  475. * @tparam Functor
  476. * @param f
  477. */
  478. template<typename Functor>
  479. auto composites_foreach(Functor f) -> void
  480. {
  481. for (const auto &sym_it: composites) {
  482. f(sym_it);
  483. }
  484. }
  485. /**
  486. * Iterate over all composites using a specific functor
  487. * @tparam Functor
  488. * @param f
  489. */
  490. template<typename Functor>
  491. auto connfilters_foreach(Functor f) -> bool
  492. {
  493. return std::all_of(std::begin(connfilters), std::end(connfilters),
  494. [&](const auto &sym_it) {
  495. return f(sym_it);
  496. });
  497. }
  498. template<typename Functor>
  499. auto prefilters_foreach(Functor f) -> bool
  500. {
  501. return std::all_of(std::begin(prefilters), std::end(prefilters),
  502. [&](const auto &sym_it) {
  503. return f(sym_it);
  504. });
  505. }
  506. template<typename Functor>
  507. auto postfilters_foreach(Functor f) -> bool
  508. {
  509. return std::all_of(std::begin(postfilters), std::end(postfilters),
  510. [&](const auto &sym_it) {
  511. return f(sym_it);
  512. });
  513. }
  514. template<typename Functor>
  515. auto idempotent_foreach(Functor f) -> bool
  516. {
  517. return std::all_of(std::begin(idempotent), std::end(idempotent),
  518. [&](const auto &sym_it) {
  519. return f(sym_it);
  520. });
  521. }
  522. template<typename Functor>
  523. auto filters_foreach(Functor f) -> bool
  524. {
  525. return std::all_of(std::begin(filters), std::end(filters),
  526. [&](const auto &sym_it) {
  527. return f(sym_it);
  528. });
  529. }
  530. /**
  531. * Resort cache if anything has been changed since last time
  532. * @return
  533. */
  534. auto maybe_resort() -> bool;
  535. /**
  536. * Returns current set of items ordered for sharing ownership
  537. * @return
  538. */
  539. auto get_cache_order() const -> auto
  540. {
  541. return items_by_order;
  542. }
  543. /**
  544. * Get last profile timestamp
  545. * @return
  546. */
  547. auto get_last_profile() const -> auto
  548. {
  549. return last_profile;
  550. }
  551. /**
  552. * Sets last profile timestamp
  553. * @param last_profile
  554. * @return
  555. */
  556. auto set_last_profile(double last_profile)
  557. {
  558. symcache::last_profile = last_profile;
  559. }
  560. /**
  561. * Process settings elt identified by id
  562. * @param elt
  563. */
  564. auto process_settings_elt(struct rspamd_config_settings_elt *elt) -> void;
  565. /**
  566. * Returns maximum timeout that is requested by all rules
  567. * @return
  568. */
  569. auto get_max_timeout(std::vector<std::pair<double, const cache_item *>> &elts) const -> double;
  570. };
  571. }// namespace rspamd::symcache
  572. #endif//RSPAMD_SYMCACHE_INTERNAL_HXX