You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rspamd_symcache.h 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578
  1. /*
  2. * Copyright 2023 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef RSPAMD_SYMBOLS_CACHE_H
  17. #define RSPAMD_SYMBOLS_CACHE_H
  18. #include "config.h"
  19. #include "ucl.h"
  20. #include "cfg_file.h"
  21. #include "contrib/libev/ev.h"
  22. #include <lua.h>
  23. #ifdef __cplusplus
  24. extern "C" {
  25. #endif
  26. struct rspamd_task;
  27. struct rspamd_config;
  28. struct rspamd_symcache;
  29. struct rspamd_worker;
  30. struct rspamd_symcache_dynamic_item;
  31. struct rspamd_symcache_item;
  32. struct rspamd_config_settings_elt;
  33. typedef void (*symbol_func_t)(struct rspamd_task *task,
  34. struct rspamd_symcache_dynamic_item *item,
  35. gpointer user_data);
  36. enum rspamd_symbol_type {
  37. SYMBOL_TYPE_NORMAL = (1u << 0u),
  38. SYMBOL_TYPE_VIRTUAL = (1u << 1u),
  39. SYMBOL_TYPE_CALLBACK = (1u << 2u),
  40. SYMBOL_TYPE_GHOST = (1u << 3u),
  41. SYMBOL_TYPE_SKIPPED = (1u << 4u),
  42. SYMBOL_TYPE_COMPOSITE = (1u << 5u),
  43. SYMBOL_TYPE_CLASSIFIER = (1u << 6u),
  44. SYMBOL_TYPE_FINE = (1u << 7u),
  45. SYMBOL_TYPE_EMPTY = (1u << 8u), /* Allow execution on empty tasks */
  46. SYMBOL_TYPE_CONNFILTER = (1u << 9u), /* Connection stage filter */
  47. SYMBOL_TYPE_PREFILTER = (1u << 10u),
  48. SYMBOL_TYPE_POSTFILTER = (1u << 11u),
  49. SYMBOL_TYPE_NOSTAT = (1u << 12u), /* Skip as statistical symbol */
  50. SYMBOL_TYPE_IDEMPOTENT = (1u << 13u), /* Symbol cannot change metric */
  51. SYMBOL_TYPE_TRIVIAL = (1u << 14u), /* Symbol is trivial */
  52. SYMBOL_TYPE_MIME_ONLY = (1u << 15u), /* Symbol is mime only */
  53. SYMBOL_TYPE_EXPLICIT_DISABLE = (1u << 16u), /* Symbol should be disabled explicitly only */
  54. SYMBOL_TYPE_IGNORE_PASSTHROUGH = (1u << 17u), /* Symbol ignores passthrough result */
  55. SYMBOL_TYPE_EXPLICIT_ENABLE = (1u << 18u), /* Symbol should be enabled explicitly only */
  56. SYMBOL_TYPE_USE_CORO = (1u << 19u), /* Symbol uses lua coroutines */
  57. };
  58. /**
  59. * Abstract structure for saving callback data for symbols
  60. */
  61. struct rspamd_abstract_callback_data {
  62. uint64_t magic;
  63. char data[];
  64. };
  65. /**
  66. * Shared memory block specific for each symbol
  67. */
  68. struct rspamd_symcache_item_stat {
  69. struct rspamd_counter_data time_counter;
  70. double avg_time;
  71. double weight;
  72. unsigned int hits;
  73. uint64_t total_hits;
  74. struct rspamd_counter_data frequency_counter;
  75. double avg_frequency;
  76. double stddev_frequency;
  77. };
  78. /**
  79. * Creates new cache structure
  80. * @return
  81. */
  82. struct rspamd_symcache *rspamd_symcache_new(struct rspamd_config *cfg);
  83. /**
  84. * Remove the cache structure syncing data if needed
  85. * @param cache
  86. */
  87. void rspamd_symcache_destroy(struct rspamd_symcache *cache);
  88. /**
  89. * Saves symbols cache to disk if possible
  90. * @param cache
  91. */
  92. void rspamd_symcache_save(struct rspamd_symcache *cache);
  93. /**
  94. * Load symbols cache from file, must be called _after_ init_symbols_cache
  95. */
  96. gboolean rspamd_symcache_init(struct rspamd_symcache *cache);
  97. /**
  98. * Generic function to register a symbol
  99. * @param cache
  100. * @param name
  101. * @param weight
  102. * @param priority
  103. * @param func
  104. * @param user_data
  105. * @param type
  106. * @param parent
  107. */
  108. int rspamd_symcache_add_symbol(struct rspamd_symcache *cache,
  109. const char *name,
  110. int priority,
  111. symbol_func_t func,
  112. gpointer user_data,
  113. int type,
  114. int parent);
  115. /**
  116. * Adds augmentation to the symbol
  117. * @param cache
  118. * @param sym_id
  119. * @param augmentation
  120. * @return
  121. */
  122. bool rspamd_symcache_add_symbol_augmentation(struct rspamd_symcache *cache,
  123. int sym_id,
  124. const char *augmentation,
  125. const char *value);
  126. /**
  127. * Add callback to be executed whenever symbol has peak value
  128. * @param cache
  129. * @param cbref
  130. */
  131. void rspamd_symcache_set_peak_callback(struct rspamd_symcache *cache,
  132. int cbref);
  133. /**
  134. * Add delayed condition to the specific symbol in cache. So symbol can be absent
  135. * to the moment of addition
  136. * @param cache
  137. * @param id id of symbol
  138. * @param L lua state pointer
  139. * @param cbref callback reference (returned by luaL_ref)
  140. * @return TRUE if condition has been added
  141. */
  142. gboolean rspamd_symcache_add_condition_delayed(struct rspamd_symcache *cache,
  143. const char *sym,
  144. lua_State *L, int cbref);
  145. /**
  146. * Find symbol in cache by id and returns its id resolving virtual symbols if
  147. * applicable
  148. * @param cache
  149. * @param name
  150. * @return id of symbol or (-1) if a symbol has not been found
  151. */
  152. int rspamd_symcache_find_symbol(struct rspamd_symcache *cache,
  153. const char *name);
  154. /**
  155. * Get statistics for a specific symbol
  156. * @param cache
  157. * @param name
  158. * @param frequency
  159. * @param tm
  160. * @return
  161. */
  162. gboolean rspamd_symcache_stat_symbol(struct rspamd_symcache *cache,
  163. const char *name,
  164. double *frequency,
  165. double *freq_stddev,
  166. double *tm,
  167. unsigned int *nhits);
  168. /**
  169. * Returns number of symbols registered in symbols cache
  170. * @param cache
  171. * @return number of symbols in the cache
  172. */
  173. unsigned int rspamd_symcache_stats_symbols_count(struct rspamd_symcache *cache);
  174. /**
  175. * Validate cache items against theirs weights defined in metrics
  176. * @param cache symbols cache
  177. * @param cfg configuration
  178. * @param strict do strict checks - symbols MUST be described in metrics
  179. */
  180. gboolean rspamd_symcache_validate(struct rspamd_symcache *cache,
  181. struct rspamd_config *cfg,
  182. gboolean strict);
  183. /**
  184. * Call function for cached symbol using saved callback
  185. * @param task task object
  186. * @param cache symbols cache
  187. * @param saved_item pointer to currently saved item
  188. */
  189. gboolean rspamd_symcache_process_symbols(struct rspamd_task *task,
  190. struct rspamd_symcache *cache,
  191. unsigned int stage);
  192. /**
  193. * Return statistics about the cache as ucl object (array of objects one per item)
  194. * @param cache
  195. * @return
  196. */
  197. ucl_object_t *rspamd_symcache_counters(struct rspamd_symcache *cache);
  198. /**
  199. * Start cache reloading
  200. * @param cache
  201. * @param ev_base
  202. */
  203. void *rspamd_symcache_start_refresh(struct rspamd_symcache *cache,
  204. struct ev_loop *ev_base,
  205. struct rspamd_worker *w);
  206. /**
  207. * Increases counter for a specific symbol
  208. * @param cache
  209. * @param symbol
  210. */
  211. void rspamd_symcache_inc_frequency(struct rspamd_symcache *_cache,
  212. struct rspamd_symcache_item *item,
  213. const char *sym_name);
  214. /**
  215. * Add delayed dependency that is resolved on cache post-load routine
  216. * @param cache
  217. * @param from
  218. * @param to
  219. */
  220. void rspamd_symcache_add_delayed_dependency(struct rspamd_symcache *cache,
  221. const char *from, const char *to);
  222. /**
  223. * Get abstract callback data for a symbol (or its parent symbol)
  224. * @param cache cache object
  225. * @param symbol symbol name
  226. * @return abstract callback data or NULL if symbol is absent or has no data attached
  227. */
  228. struct rspamd_abstract_callback_data *rspamd_symcache_get_cbdata(
  229. struct rspamd_symcache *cache, const char *symbol);
  230. /**
  231. * Returns symbol's parent name (or symbol name itself)
  232. * @param cache
  233. * @param symbol
  234. * @return
  235. */
  236. const char *rspamd_symcache_get_parent(struct rspamd_symcache *cache,
  237. const char *symbol);
  238. unsigned int rspamd_symcache_get_symbol_flags(struct rspamd_symcache *cache,
  239. const char *symbol);
  240. void rspamd_symcache_get_symbol_details(struct rspamd_symcache *cache,
  241. const char *symbol,
  242. ucl_object_t *this_sym_ucl);
  243. /**
  244. * Process settings for task
  245. * @param task
  246. * @param cache
  247. * @return
  248. */
  249. gboolean rspamd_symcache_process_settings(struct rspamd_task *task,
  250. struct rspamd_symcache *cache);
  251. /**
  252. * Checks if a symbol specified has been checked (or disabled)
  253. * @param task
  254. * @param cache
  255. * @param symbol
  256. * @return
  257. */
  258. gboolean rspamd_symcache_is_checked(struct rspamd_task *task,
  259. struct rspamd_symcache *cache,
  260. const char *symbol);
  261. /**
  262. * Returns checksum for all cache items
  263. * @param cache
  264. * @return
  265. */
  266. uint64_t rspamd_symcache_get_cksum(struct rspamd_symcache *cache);
  267. /**
  268. * Checks if a symbols is enabled (not checked and conditions return true if present)
  269. * @param task
  270. * @param cache
  271. * @param symbol
  272. * @return
  273. */
  274. gboolean rspamd_symcache_is_symbol_enabled(struct rspamd_task *task,
  275. struct rspamd_symcache *cache,
  276. const char *symbol);
  277. /**
  278. * Enable this symbol for task
  279. * @param task
  280. * @param cache
  281. * @param symbol
  282. * @return TRUE if a symbol has been enabled (not executed before)
  283. */
  284. gboolean rspamd_symcache_enable_symbol(struct rspamd_task *task,
  285. struct rspamd_symcache *cache,
  286. const char *symbol);
  287. /**
  288. * Enable this symbol for task
  289. * @param task
  290. * @param cache
  291. * @param symbol
  292. * @return TRUE if a symbol has been disabled (not executed before)
  293. */
  294. gboolean rspamd_symcache_disable_symbol(struct rspamd_task *task,
  295. struct rspamd_symcache *cache,
  296. const char *symbol);
  297. /**
  298. * Disable execution of a symbol or a pattern (a string enclosed in `//`) permanently
  299. * @param task
  300. * @param cache
  301. * @param symbol
  302. * @return
  303. */
  304. void rspamd_symcache_disable_symbol_static(struct rspamd_symcache *cache,
  305. const char *symbol);
  306. /**
  307. * Add a symbol or a pattern to the list of explicitly and statically enabled symbols
  308. * @param cache
  309. * @param symbol
  310. * @return
  311. */
  312. void rspamd_symcache_enable_symbol_static(struct rspamd_symcache *cache,
  313. const char *symbol);
  314. /**
  315. * Process specific function for each cache element (in order they are added)
  316. * @param cache
  317. * @param func
  318. * @param ud
  319. */
  320. void rspamd_symcache_foreach(struct rspamd_symcache *cache,
  321. void (*func)(struct rspamd_symcache_item *item, gpointer /* userdata */),
  322. gpointer ud);
  323. /**
  324. * Returns the current item being processed (if any)
  325. * @param task
  326. * @return
  327. */
  328. struct rspamd_symcache_dynamic_item *rspamd_symcache_get_cur_item(struct rspamd_task *task);
  329. /**
  330. * Replaces the current item being processed.
  331. * Returns the current item being processed (if any)
  332. * @param task
  333. * @param item
  334. * @return
  335. */
  336. struct rspamd_symcache_dynamic_item *rspamd_symcache_set_cur_item(struct rspamd_task *task,
  337. struct rspamd_symcache_dynamic_item *item);
  338. /**
  339. * Finalize the current async element potentially calling its deps
  340. */
  341. void rspamd_symcache_finalize_item(struct rspamd_task *task,
  342. struct rspamd_symcache_dynamic_item *item);
  343. /*
  344. * Increase number of async events pending for an item
  345. */
  346. unsigned int rspamd_symcache_item_async_inc_full(struct rspamd_task *task,
  347. struct rspamd_symcache_dynamic_item *item,
  348. const char *subsystem,
  349. const char *loc);
  350. #define rspamd_symcache_item_async_inc(task, item, subsystem) \
  351. rspamd_symcache_item_async_inc_full(task, item, subsystem, G_STRLOC)
  352. /*
  353. * Decrease number of async events pending for an item, asserts if no events pending
  354. */
  355. unsigned int rspamd_symcache_item_async_dec_full(struct rspamd_task *task,
  356. struct rspamd_symcache_dynamic_item *item,
  357. const char *subsystem,
  358. const char *loc);
  359. #define rspamd_symcache_item_async_dec(task, item, subsystem) \
  360. rspamd_symcache_item_async_dec_full(task, item, subsystem, G_STRLOC)
  361. /**
  362. * Decrease number of async events pending for an item, asserts if no events pending
  363. * If no events are left, this function calls `rspamd_symbols_cache_finalize_item` and returns TRUE
  364. * @param task
  365. * @param item
  366. * @return
  367. */
  368. gboolean rspamd_symcache_item_async_dec_check_full(struct rspamd_task *task,
  369. struct rspamd_symcache_dynamic_item *item,
  370. const char *subsystem,
  371. const char *loc);
  372. #define rspamd_symcache_item_async_dec_check(task, item, subsystem) \
  373. rspamd_symcache_item_async_dec_check_full(task, item, subsystem, G_STRLOC)
  374. /**
  375. * Disables execution of all symbols, excluding those specified in `skip_mask`
  376. * @param task
  377. * @param cache
  378. * @param skip_mask
  379. */
  380. void rspamd_symcache_disable_all_symbols(struct rspamd_task *task,
  381. struct rspamd_symcache *cache,
  382. unsigned int skip_mask);
  383. /**
  384. * Iterates over the list of the enabled composites calling specified function
  385. * @param task
  386. * @param cache
  387. * @param func
  388. * @param fd
  389. */
  390. void rspamd_symcache_composites_foreach(struct rspamd_task *task,
  391. struct rspamd_symcache *cache,
  392. GHFunc func,
  393. gpointer fd);
  394. /**
  395. * Sets allowed settings ids for a symbol
  396. * @param cache
  397. * @param symbol
  398. * @param ids
  399. * @param nids
  400. */
  401. bool rspamd_symcache_set_allowed_settings_ids(struct rspamd_symcache *cache,
  402. const char *symbol,
  403. const uint32_t *ids,
  404. unsigned int nids);
  405. /**
  406. * Sets denied settings ids for a symbol
  407. * @param cache
  408. * @param symbol
  409. * @param ids
  410. * @param nids
  411. */
  412. bool rspamd_symcache_set_forbidden_settings_ids(struct rspamd_symcache *cache,
  413. const char *symbol,
  414. const uint32_t *ids,
  415. unsigned int nids);
  416. /**
  417. * Returns allowed ids for a symbol as a constant array
  418. * @param cache
  419. * @param symbol
  420. * @param nids
  421. * @return
  422. */
  423. const uint32_t *rspamd_symcache_get_allowed_settings_ids(struct rspamd_symcache *cache,
  424. const char *symbol,
  425. unsigned int *nids);
  426. /**
  427. * Returns denied ids for a symbol as a constant array
  428. * @param cache
  429. * @param symbol
  430. * @param nids
  431. * @return
  432. */
  433. const uint32_t *rspamd_symcache_get_forbidden_settings_ids(struct rspamd_symcache *cache,
  434. const char *symbol,
  435. unsigned int *nids);
  436. /**
  437. * Processes settings_elt in cache and converts it to a set of
  438. * adjustments for forbidden/allowed settings_ids for each symbol
  439. * @param cache
  440. * @param elt
  441. */
  442. void rspamd_symcache_process_settings_elt(struct rspamd_symcache *cache,
  443. struct rspamd_config_settings_elt *elt);
  444. /**
  445. * Check if a symbol is allowed for execution/insertion, this does not involve
  446. * condition scripts to be checked (so it is intended to be fast).
  447. * @param task
  448. * @param item
  449. * @param exec_only
  450. * @return
  451. */
  452. gboolean rspamd_symcache_is_item_allowed(struct rspamd_task *task,
  453. struct rspamd_symcache_item *item,
  454. gboolean exec_only);
  455. /**
  456. * Returns symcache item flags
  457. * @param item
  458. * @return
  459. */
  460. int rspamd_symcache_dyn_item_flags(struct rspamd_task *task,
  461. struct rspamd_symcache_dynamic_item *dyn_item);
  462. int rspamd_symcache_item_flags(struct rspamd_symcache_item *item);
  463. /**
  464. * Returns cache item name
  465. * @param item
  466. * @return
  467. */
  468. const char *rspamd_symcache_dyn_item_name(struct rspamd_task *task,
  469. struct rspamd_symcache_dynamic_item *dyn_item);
  470. const char *rspamd_symcache_item_name(struct rspamd_symcache_item *item);
  471. /**
  472. * Returns the current item stat
  473. * @param item
  474. * @return
  475. */
  476. const struct rspamd_symcache_item_stat *
  477. rspamd_symcache_item_stat(struct rspamd_symcache_item *item);
  478. /**
  479. * Enable profiling for task (e.g. when a slow rule has been found)
  480. * @param task
  481. */
  482. void rspamd_symcache_enable_profile(struct rspamd_task *task);
  483. struct rspamd_symcache_timeout_item {
  484. double timeout;
  485. const struct rspamd_symcache_item *item;
  486. };
  487. struct rspamd_symcache_timeout_result {
  488. double max_timeout;
  489. struct rspamd_symcache_timeout_item *items;
  490. size_t nitems;
  491. };
  492. /**
  493. * Gets maximum timeout announced by symbols cache
  494. * @param cache
  495. * @return new symcache timeout_result structure, that should be freed by call
  496. * `rspamd_symcache_timeout_result_free`
  497. */
  498. struct rspamd_symcache_timeout_result *rspamd_symcache_get_max_timeout(struct rspamd_symcache *cache);
  499. /**
  500. * Frees results obtained from the previous function
  501. * @param res
  502. */
  503. void rspamd_symcache_timeout_result_free(struct rspamd_symcache_timeout_result *res);
  504. /**
  505. * Destroy internal state of the symcache runtime
  506. * @param task
  507. */
  508. void rspamd_symcache_runtime_destroy(struct rspamd_task *task);
  509. #ifdef __cplusplus
  510. }
  511. #endif
  512. #endif