You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

symbols_cache.c 36KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "util.h"
  18. #include "rspamd.h"
  19. #include "message.h"
  20. #include "symbols_cache.h"
  21. #include "cfg_file.h"
  22. #include "lua/lua_common.h"
  23. #include "unix-std.h"
  24. #include <math.h>
  25. #define msg_err_cache(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
  26. cache->static_pool->tag.tagname, cache->cfg->checksum, \
  27. G_STRFUNC, \
  28. __VA_ARGS__)
  29. #define msg_warn_cache(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
  30. cache->static_pool->tag.tagname, cache->cfg->checksum, \
  31. G_STRFUNC, \
  32. __VA_ARGS__)
  33. #define msg_info_cache(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \
  34. cache->static_pool->tag.tagname, cache->cfg->checksum, \
  35. G_STRFUNC, \
  36. __VA_ARGS__)
  37. #define msg_debug_cache(...) rspamd_default_log_function (G_LOG_LEVEL_DEBUG, \
  38. cache->static_pool->tag.tagname, cache->cfg->checksum, \
  39. G_STRFUNC, \
  40. __VA_ARGS__)
  41. static const guchar rspamd_symbols_cache_magic[8] = {'r', 's', 'c', 1, 0, 0, 0, 0 };
  42. struct rspamd_symbols_cache_header {
  43. guchar magic[8];
  44. guint nitems;
  45. guchar checksum[64];
  46. guchar unused[128];
  47. };
  48. struct symbols_cache {
  49. /* Hash table for fast access */
  50. GHashTable *items_by_symbol;
  51. GPtrArray *items_by_order;
  52. GPtrArray *items_by_id;
  53. GList *delayed_deps;
  54. GList *delayed_conditions;
  55. rspamd_mempool_t *static_pool;
  56. gdouble max_weight;
  57. guint used_items;
  58. guint64 total_freq;
  59. struct rspamd_config *cfg;
  60. rspamd_mempool_mutex_t *mtx;
  61. gdouble reload_time;
  62. struct event resort_ev;
  63. };
  64. struct counter_data {
  65. gdouble value;
  66. gint number;
  67. };
  68. struct cache_item {
  69. /* This block is likely shared */
  70. gdouble avg_time;
  71. gdouble weight;
  72. guint32 frequency;
  73. guint32 avg_counter;
  74. /* Per process counter */
  75. struct counter_data *cd;
  76. gchar *symbol;
  77. enum rspamd_symbol_type type;
  78. /* Callback data */
  79. symbol_func_t func;
  80. gpointer user_data;
  81. /* Condition of execution */
  82. gint condition_cb;
  83. /* Parent symbol id for virtual symbols */
  84. gint parent;
  85. /* Priority */
  86. gint priority;
  87. gint id;
  88. /* Dependencies */
  89. GPtrArray *deps;
  90. GPtrArray *rdeps;
  91. };
  92. struct cache_dependency {
  93. struct cache_item *item;
  94. gchar *sym;
  95. gint id;
  96. };
  97. struct delayed_cache_dependency {
  98. gchar *from;
  99. gchar *to;
  100. };
  101. struct delayed_cache_condition {
  102. gchar *sym;
  103. gint cbref;
  104. lua_State *L;
  105. };
  106. struct cache_savepoint {
  107. guchar *processed_bits;
  108. guint pass;
  109. struct metric_result *rs;
  110. gdouble lim;
  111. GPtrArray *waitq;
  112. };
  113. /* XXX: Maybe make it configurable */
  114. #define CACHE_RELOAD_TIME 60.0
  115. /* weight, frequency, time */
  116. #define TIME_ALPHA (1.0)
  117. #define WEIGHT_ALPHA (0.001)
  118. #define FREQ_ALPHA (0.001)
  119. #define SCORE_FUN(w, f, t) (((w) > 0 ? (w) : WEIGHT_ALPHA) \
  120. * ((f) > 0 ? (f) : FREQ_ALPHA) \
  121. / (t > TIME_ALPHA ? t : TIME_ALPHA))
  122. static gboolean rspamd_symbols_cache_check_symbol (struct rspamd_task *task,
  123. struct symbols_cache *cache,
  124. struct cache_item *item,
  125. struct cache_savepoint *checkpoint,
  126. gdouble *total_diff);
  127. static gboolean rspamd_symbols_cache_check_deps (struct rspamd_task *task,
  128. struct symbols_cache *cache,
  129. struct cache_item *item,
  130. struct cache_savepoint *checkpoint);
  131. gint
  132. cache_logic_cmp (const void *p1, const void *p2, gpointer ud)
  133. {
  134. const struct cache_item *i1 = *(struct cache_item **)p1,
  135. *i2 = *(struct cache_item **)p2;
  136. struct symbols_cache *cache = ud;
  137. double w1, w2;
  138. double weight1, weight2;
  139. double f1 = 0, f2 = 0, t1, t2;
  140. if (i1->deps->len != 0 || i2->deps->len != 0) {
  141. /* TODO: handle complex dependencies */
  142. w1 = -(i1->deps->len);
  143. w2 = -(i2->deps->len);
  144. }
  145. else if (i1->priority == i2->priority) {
  146. f1 = (double)i1->frequency / (double)cache->total_freq;
  147. f2 = (double)i2->frequency / (double)cache->total_freq;
  148. weight1 = fabs (i1->weight) / cache->max_weight;
  149. weight2 = fabs (i2->weight) / cache->max_weight;
  150. t1 = i1->avg_time;
  151. t2 = i2->avg_time;
  152. w1 = SCORE_FUN (weight1, f1, t1);
  153. w2 = SCORE_FUN (weight2, f2, t2);
  154. msg_debug_cache ("%s -> %.2f, %s -> %.2f", i1->symbol, w1 * 1000.0,
  155. i2->symbol, w2 * 1000.0);
  156. }
  157. else {
  158. /* Strict sorting */
  159. w1 = abs (i1->priority);
  160. w2 = abs (i2->priority);
  161. msg_debug_cache ("priority: %s -> %.2f, %s -> %.2f", i1->symbol, w1 * 1000.0,
  162. i2->symbol, w2 * 1000.0);
  163. }
  164. if (w2 > w1) {
  165. return 1;
  166. }
  167. else if (w2 < w1) {
  168. return -1;
  169. }
  170. return 0;
  171. }
  172. /**
  173. * Set counter for a symbol
  174. */
  175. static double
  176. rspamd_set_counter (struct cache_item *item, gdouble value)
  177. {
  178. struct counter_data *cd;
  179. cd = item->cd;
  180. /* Cumulative moving average using per-process counter data */
  181. if (cd->number == 0) {
  182. cd->value = 0;
  183. }
  184. cd->value = cd->value + (value - cd->value) / (gdouble)(++cd->number);
  185. return cd->value;
  186. }
  187. /* Sort items in logical order */
  188. static void
  189. post_cache_init (struct symbols_cache *cache)
  190. {
  191. struct cache_item *it, *dit;
  192. struct cache_dependency *dep, *rdep;
  193. struct delayed_cache_dependency *ddep;
  194. struct delayed_cache_condition *dcond;
  195. GList *cur;
  196. guint i, j;
  197. gint id;
  198. g_ptr_array_sort_with_data (cache->items_by_order, cache_logic_cmp, cache);
  199. cur = cache->delayed_deps;
  200. while (cur) {
  201. ddep = cur->data;
  202. id = rspamd_symbols_cache_find_symbol (cache, ddep->from);
  203. if (id != -1) {
  204. it = g_ptr_array_index (cache->items_by_id, id);
  205. }
  206. else {
  207. it = NULL;
  208. }
  209. if (it == NULL) {
  210. msg_err_cache ("cannot register delayed dependency between %s and %s, "
  211. "%s is missing", ddep->from, ddep->to, ddep->from);
  212. }
  213. else {
  214. rspamd_symbols_cache_add_dependency (cache, it->id, ddep->to);
  215. }
  216. cur = g_list_next (cur);
  217. }
  218. cur = cache->delayed_conditions;
  219. while (cur) {
  220. dcond = cur->data;
  221. id = rspamd_symbols_cache_find_symbol (cache, dcond->sym);
  222. if (id != -1) {
  223. it = g_ptr_array_index (cache->items_by_id, id);
  224. }
  225. else {
  226. it = NULL;
  227. }
  228. if (it == NULL) {
  229. msg_err_cache (
  230. "cannot register delayed condition for %s",
  231. dcond->sym);
  232. luaL_unref (dcond->L, LUA_REGISTRYINDEX, dcond->cbref);
  233. }
  234. else {
  235. rspamd_symbols_cache_add_condition (cache, it->id, dcond->L,
  236. dcond->cbref);
  237. }
  238. cur = g_list_next (cur);
  239. }
  240. for (i = 0; i < cache->items_by_id->len; i ++) {
  241. it = g_ptr_array_index (cache->items_by_id, i);
  242. for (j = 0; j < it->deps->len; j ++) {
  243. dep = g_ptr_array_index (it->deps, j);
  244. dit = g_hash_table_lookup (cache->items_by_symbol, dep->sym);
  245. if (dit != NULL) {
  246. if (dit->parent != -1) {
  247. dit = g_ptr_array_index (cache->items_by_id, dit->parent);
  248. }
  249. rdep = rspamd_mempool_alloc (cache->static_pool, sizeof (*rdep));
  250. rdep->sym = dep->sym;
  251. rdep->item = it;
  252. rdep->id = i;
  253. g_ptr_array_add (dit->rdeps, rdep);
  254. dep->item = dit;
  255. dep->id = dit->id;
  256. msg_debug_cache ("add dependency from %d on %d", it->id, dit->id);
  257. }
  258. else {
  259. msg_err_cache ("cannot find dependency on symbol %s", dep->sym);
  260. }
  261. }
  262. }
  263. }
  264. static gboolean
  265. rspamd_symbols_cache_load_items (struct symbols_cache *cache, const gchar *name)
  266. {
  267. struct rspamd_symbols_cache_header *hdr;
  268. struct stat st;
  269. struct ucl_parser *parser;
  270. ucl_object_t *top;
  271. const ucl_object_t *cur, *elt;
  272. ucl_object_iter_t it;
  273. struct cache_item *item, *parent;
  274. const guchar *p;
  275. gint fd;
  276. gpointer map;
  277. fd = open (name, O_RDONLY);
  278. if (fd == -1) {
  279. msg_info_cache ("cannot open file %s, error %d, %s", name,
  280. errno, strerror (errno));
  281. return FALSE;
  282. }
  283. if (fstat (fd, &st) == -1) {
  284. close (fd);
  285. msg_info_cache ("cannot stat file %s, error %d, %s", name,
  286. errno, strerror (errno));
  287. return FALSE;
  288. }
  289. if (st.st_size < (gint)sizeof (*hdr)) {
  290. close (fd);
  291. errno = EINVAL;
  292. msg_info_cache ("cannot use file %s, error %d, %s", name,
  293. errno, strerror (errno));
  294. return FALSE;
  295. }
  296. map = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
  297. if (map == MAP_FAILED) {
  298. close (fd);
  299. msg_info_cache ("cannot mmap file %s, error %d, %s", name,
  300. errno, strerror (errno));
  301. return FALSE;
  302. }
  303. close (fd);
  304. hdr = map;
  305. if (memcmp (hdr->magic, rspamd_symbols_cache_magic,
  306. sizeof (rspamd_symbols_cache_magic)) != 0) {
  307. msg_info_cache ("cannot use file %s, bad magic", name);
  308. munmap (map, st.st_size);
  309. return FALSE;
  310. }
  311. parser = ucl_parser_new (0);
  312. p = (const guchar *)(hdr + 1);
  313. if (!ucl_parser_add_chunk (parser, p, st.st_size - sizeof (*hdr))) {
  314. msg_info_cache ("cannot use file %s, cannot parse: %s", name,
  315. ucl_parser_get_error (parser));
  316. munmap (map, st.st_size);
  317. ucl_parser_free (parser);
  318. return FALSE;
  319. }
  320. top = ucl_parser_get_object (parser);
  321. munmap (map, st.st_size);
  322. ucl_parser_free (parser);
  323. if (top == NULL || ucl_object_type (top) != UCL_OBJECT) {
  324. msg_info_cache ("cannot use file %s, bad object", name);
  325. ucl_object_unref (top);
  326. return FALSE;
  327. }
  328. it = ucl_object_iterate_new (top);
  329. while ((cur = ucl_object_iterate_safe (it, true))) {
  330. item = g_hash_table_lookup (cache->items_by_symbol, ucl_object_key (cur));
  331. if (item) {
  332. /* Copy saved info */
  333. /*
  334. * XXX: don't save or load weight, it should be obtained from the
  335. * metric
  336. */
  337. #if 0
  338. elt = ucl_object_find_key (cur, "weight");
  339. if (elt) {
  340. w = ucl_object_todouble (elt);
  341. if (w != 0) {
  342. item->weight = w;
  343. }
  344. }
  345. #endif
  346. elt = ucl_object_find_key (cur, "time");
  347. if (elt) {
  348. item->avg_time = ucl_object_todouble (elt);
  349. }
  350. elt = ucl_object_find_key (cur, "count");
  351. if (elt) {
  352. item->avg_counter = ucl_object_toint (elt);
  353. }
  354. elt = ucl_object_find_key (cur, "frequency");
  355. if (elt) {
  356. item->frequency = ucl_object_toint (elt);
  357. }
  358. if ((item->type & SYMBOL_TYPE_VIRTUAL) && item->parent != -1) {
  359. g_assert (item->parent < (gint)cache->items_by_id->len);
  360. parent = g_ptr_array_index (cache->items_by_id, item->parent);
  361. if (parent->weight < item->weight) {
  362. parent->weight = item->weight;
  363. }
  364. /*
  365. * We maintain avg_time for virtual symbols equal to the
  366. * parent item avg_time
  367. */
  368. parent->avg_time = item->avg_time;
  369. parent->avg_counter = item->avg_counter;
  370. }
  371. if (fabs (item->weight) > cache->max_weight) {
  372. cache->max_weight = fabs (item->weight);
  373. }
  374. cache->total_freq += item->frequency;
  375. }
  376. }
  377. ucl_object_iterate_free (it);
  378. ucl_object_unref (top);
  379. return TRUE;
  380. }
  381. static gboolean
  382. rspamd_symbols_cache_save_items (struct symbols_cache *cache, const gchar *name)
  383. {
  384. struct rspamd_symbols_cache_header hdr;
  385. ucl_object_t *top, *elt;
  386. GHashTableIter it;
  387. struct cache_item *item;
  388. struct ucl_emitter_functions *efunc;
  389. gpointer k, v;
  390. gint fd;
  391. FILE *f;
  392. bool ret;
  393. fd = open (name, O_CREAT | O_TRUNC | O_WRONLY, 00644);
  394. if (fd == -1) {
  395. msg_info_cache ("cannot open file %s, error %d, %s", name,
  396. errno, strerror (errno));
  397. return FALSE;
  398. }
  399. memset (&hdr, 0, sizeof (hdr));
  400. memcpy (hdr.magic, rspamd_symbols_cache_magic,
  401. sizeof (rspamd_symbols_cache_magic));
  402. if (write (fd, &hdr, sizeof (hdr)) == -1) {
  403. msg_info_cache ("cannot write to file %s, error %d, %s", name,
  404. errno, strerror (errno));
  405. close (fd);
  406. return FALSE;
  407. }
  408. top = ucl_object_typed_new (UCL_OBJECT);
  409. g_hash_table_iter_init (&it, cache->items_by_symbol);
  410. while (g_hash_table_iter_next (&it, &k, &v)) {
  411. item = v;
  412. elt = ucl_object_typed_new (UCL_OBJECT);
  413. ucl_object_insert_key (elt, ucl_object_fromdouble (item->weight),
  414. "weight", 0, false);
  415. ucl_object_insert_key (elt, ucl_object_fromdouble (item->avg_time),
  416. "time", 0, false);
  417. ucl_object_insert_key (elt, ucl_object_fromdouble (item->avg_counter),
  418. "count", 0, false);
  419. ucl_object_insert_key (elt, ucl_object_fromint (item->frequency),
  420. "frequency", 0, false);
  421. ucl_object_insert_key (top, elt, k, 0, false);
  422. }
  423. f = fdopen (fd, "a");
  424. g_assert (f != NULL);
  425. efunc = ucl_object_emit_file_funcs (f);
  426. ret = ucl_object_emit_full (top, UCL_EMIT_JSON_COMPACT, efunc);
  427. ucl_object_emit_funcs_free (efunc);
  428. fclose (f);
  429. return ret;
  430. }
  431. gint
  432. rspamd_symbols_cache_add_symbol (struct symbols_cache *cache,
  433. const gchar *name,
  434. gint priority,
  435. symbol_func_t func,
  436. gpointer user_data,
  437. enum rspamd_symbol_type type,
  438. gint parent)
  439. {
  440. struct cache_item *item = NULL;
  441. g_assert (cache != NULL);
  442. if (name == NULL && type != SYMBOL_TYPE_CALLBACK) {
  443. msg_warn_cache ("no name for non-callback symbol!");
  444. }
  445. else if (type == SYMBOL_TYPE_VIRTUAL && parent == -1) {
  446. msg_warn_cache ("no parent symbol is associated with virtual symbol %s",
  447. name);
  448. }
  449. if (name != NULL && type != SYMBOL_TYPE_CALLBACK) {
  450. if (g_hash_table_lookup (cache->items_by_symbol, name) != NULL) {
  451. msg_err_cache ("skip duplicate symbol registration for %s", name);
  452. return -1;
  453. }
  454. }
  455. item = rspamd_mempool_alloc0_shared (cache->static_pool,
  456. sizeof (struct cache_item));
  457. item->condition_cb = -1;
  458. /*
  459. * We do not share cd to skip locking, instead we'll just calculate it on
  460. * save or accumulate
  461. */
  462. item->cd = rspamd_mempool_alloc0 (cache->static_pool,
  463. sizeof (struct counter_data));
  464. if (name != NULL) {
  465. item->symbol = rspamd_mempool_strdup (cache->static_pool, name);
  466. }
  467. item->func = func;
  468. item->user_data = user_data;
  469. item->priority = priority;
  470. item->type = type;
  471. if ((type & SYMBOL_TYPE_FINE) && item->priority == 0) {
  472. /* Make priority for negative weighted symbols */
  473. item->priority = 1;
  474. }
  475. item->id = cache->used_items;
  476. item->parent = parent;
  477. cache->used_items ++;
  478. msg_debug_cache ("used items: %d, added symbol: %s", cache->used_items, name);
  479. rspamd_set_counter (item, 0);
  480. g_ptr_array_add (cache->items_by_id, item);
  481. g_ptr_array_add (cache->items_by_order, item);
  482. item->deps = g_ptr_array_new ();
  483. item->rdeps = g_ptr_array_new ();
  484. rspamd_mempool_add_destructor (cache->static_pool,
  485. rspamd_ptr_array_free_hard, item->deps);
  486. rspamd_mempool_add_destructor (cache->static_pool,
  487. rspamd_ptr_array_free_hard, item->rdeps);
  488. if (name != NULL && type != SYMBOL_TYPE_CALLBACK) {
  489. g_hash_table_insert (cache->items_by_symbol, item->symbol, item);
  490. }
  491. return item->id;
  492. }
  493. gboolean
  494. rspamd_symbols_cache_add_condition (struct symbols_cache *cache, gint id,
  495. lua_State *L, gint cbref)
  496. {
  497. struct cache_item *item;
  498. g_assert (cache != NULL);
  499. if (id < 0 || id >= (gint)cache->items_by_id->len) {
  500. return FALSE;
  501. }
  502. item = g_ptr_array_index (cache->items_by_id, id);
  503. if (item->condition_cb != -1) {
  504. /* We already have a condition, so we need to remove old cbref first */
  505. msg_warn_cache ("rewriting condition for symbol %s", item->symbol);
  506. luaL_unref (L, LUA_REGISTRYINDEX, item->condition_cb);
  507. }
  508. item->condition_cb = cbref;
  509. msg_debug_cache ("adding condition at lua ref %d to %s (%d)",
  510. cbref, item->symbol, item->id);
  511. return TRUE;
  512. }
  513. gboolean rspamd_symbols_cache_add_condition_delayed (struct symbols_cache *cache,
  514. const gchar *sym, lua_State *L, gint cbref)
  515. {
  516. gint id;
  517. struct delayed_cache_condition *ncond;
  518. g_assert (cache != NULL);
  519. g_assert (sym != NULL);
  520. id = rspamd_symbols_cache_find_symbol (cache, sym);
  521. if (id != -1) {
  522. /* We already know id, so just register a direct condition */
  523. return rspamd_symbols_cache_add_condition (cache, id, L, cbref);
  524. }
  525. ncond = g_slice_alloc (sizeof (*ncond));
  526. ncond->sym = g_strdup (sym);
  527. ncond->cbref = cbref;
  528. ncond->L = L;
  529. cache->delayed_conditions = g_list_prepend (cache->delayed_conditions, ncond);
  530. return TRUE;
  531. }
  532. void
  533. rspamd_symbols_cache_destroy (struct symbols_cache *cache)
  534. {
  535. GList *cur;
  536. struct delayed_cache_dependency *ddep;
  537. struct delayed_cache_condition *dcond;
  538. if (cache != NULL) {
  539. if (cache->cfg->cache_filename) {
  540. /* Try to sync values to the disk */
  541. if (!rspamd_symbols_cache_save_items (cache,
  542. cache->cfg->cache_filename)) {
  543. msg_err_cache ("cannot save cache data to %s",
  544. cache->cfg->cache_filename);
  545. }
  546. }
  547. if (cache->delayed_deps) {
  548. cur = cache->delayed_deps;
  549. while (cur) {
  550. ddep = cur->data;
  551. g_free (ddep->from);
  552. g_free (ddep->to);
  553. g_slice_free1 (sizeof (*ddep), ddep);
  554. cur = g_list_next (cur);
  555. }
  556. g_list_free (cache->delayed_deps);
  557. }
  558. if (cache->delayed_conditions) {
  559. cur = cache->delayed_conditions;
  560. while (cur) {
  561. dcond = cur->data;
  562. g_free (dcond->sym);
  563. g_slice_free1 (sizeof (*dcond), dcond);
  564. cur = g_list_next (cur);
  565. }
  566. g_list_free (cache->delayed_conditions);
  567. }
  568. g_hash_table_destroy (cache->items_by_symbol);
  569. rspamd_mempool_delete (cache->static_pool);
  570. g_ptr_array_free (cache->items_by_id, TRUE);
  571. g_ptr_array_free (cache->items_by_order, TRUE);
  572. g_slice_free1 (sizeof (*cache), cache);
  573. }
  574. }
  575. struct symbols_cache*
  576. rspamd_symbols_cache_new (struct rspamd_config *cfg)
  577. {
  578. struct symbols_cache *cache;
  579. cache = g_slice_alloc0 (sizeof (struct symbols_cache));
  580. cache->static_pool =
  581. rspamd_mempool_new (rspamd_mempool_suggest_size (), "symcache");
  582. cache->items_by_symbol = g_hash_table_new (rspamd_str_hash,
  583. rspamd_str_equal);
  584. cache->items_by_order = g_ptr_array_new ();
  585. cache->items_by_id = g_ptr_array_new ();
  586. cache->mtx = rspamd_mempool_get_mutex (cache->static_pool);
  587. cache->reload_time = CACHE_RELOAD_TIME;
  588. cache->total_freq = 1;
  589. cache->max_weight = 1.0;
  590. cache->cfg = cfg;
  591. return cache;
  592. }
  593. gboolean
  594. rspamd_symbols_cache_init (struct symbols_cache* cache)
  595. {
  596. gboolean res;
  597. g_assert (cache != NULL);
  598. /* Just in-memory cache */
  599. if (cache->cfg->cache_filename == NULL) {
  600. post_cache_init (cache);
  601. return TRUE;
  602. }
  603. /* Copy saved cache entries */
  604. res = rspamd_symbols_cache_load_items (cache, cache->cfg->cache_filename);
  605. return res;
  606. }
  607. static void
  608. rspamd_symbols_cache_validate_cb (gpointer k, gpointer v, gpointer ud)
  609. {
  610. struct cache_item *item = v, *parent;
  611. struct symbols_cache *cache = (struct symbols_cache *)ud;
  612. GList *cur;
  613. struct metric *m;
  614. struct rspamd_symbol_def *s;
  615. gboolean skipped, ghost;
  616. gint p1, p2;
  617. ghost = item->weight == 0 ? TRUE : FALSE;
  618. /* Check whether this item is skipped */
  619. skipped = !ghost;
  620. if ((item->type &
  621. (SYMBOL_TYPE_NORMAL|SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_COMPOSITE|SYMBOL_TYPE_CLASSIFIER))
  622. && cache->cfg
  623. && g_hash_table_lookup (cache->cfg->metrics_symbols, item->symbol) == NULL) {
  624. cur = g_list_first (cache->cfg->metrics_list);
  625. while (cur) {
  626. m = cur->data;
  627. if (m->accept_unknown_symbols) {
  628. GList *mlist;
  629. skipped = FALSE;
  630. item->weight = m->unknown_weight;
  631. s = rspamd_mempool_alloc0 (cache->static_pool,
  632. sizeof (*s));
  633. s->name = item->symbol;
  634. s->weight_ptr = &item->weight;
  635. g_hash_table_insert (m->symbols, item->symbol, s);
  636. mlist = g_hash_table_lookup (cache->cfg->metrics_symbols,
  637. item->symbol);
  638. mlist = g_list_prepend (mlist, m);
  639. g_hash_table_insert (cache->cfg->metrics_symbols,
  640. item->symbol, mlist);
  641. msg_info_cache ("adding unknown symbol %s to metric %s", item->symbol,
  642. m->name);
  643. }
  644. cur = g_list_next (cur);
  645. }
  646. }
  647. else {
  648. skipped = FALSE;
  649. }
  650. if (skipped) {
  651. item->type |= SYMBOL_TYPE_SKIPPED;
  652. msg_warn_cache ("symbol %s is not registered in any metric, so skip its check",
  653. item->symbol);
  654. }
  655. if (ghost) {
  656. msg_debug_cache ("symbol %s is registered as ghost symbol, it won't be inserted "
  657. "to any metric", item->symbol);
  658. }
  659. if (item->weight < 0 && item->priority == 0) {
  660. item->priority ++;
  661. }
  662. if ((item->type & SYMBOL_TYPE_VIRTUAL) && item->parent != -1) {
  663. g_assert (item->parent < (gint)cache->items_by_id->len);
  664. parent = g_ptr_array_index (cache->items_by_id, item->parent);
  665. if (fabs (parent->weight) < fabs (item->weight)) {
  666. parent->weight = item->weight;
  667. }
  668. p1 = abs (item->priority);
  669. p2 = abs (parent->priority);
  670. if (p1 != p2) {
  671. parent->priority = MAX (p1, p2);
  672. item->priority = parent->priority;
  673. }
  674. }
  675. if (fabs (item->weight) > cache->max_weight) {
  676. cache->max_weight = fabs (item->weight);
  677. }
  678. }
  679. static void
  680. rspamd_symbols_cache_metric_validate_cb (gpointer k, gpointer v, gpointer ud)
  681. {
  682. struct symbols_cache *cache = (struct symbols_cache *)ud;
  683. const gchar *sym = k;
  684. struct rspamd_symbol_def *s = (struct rspamd_symbol_def *)v;
  685. gdouble weight;
  686. struct cache_item *item;
  687. weight = *s->weight_ptr;
  688. item = g_hash_table_lookup (cache->items_by_symbol, sym);
  689. if (item) {
  690. item->weight = weight;
  691. }
  692. }
  693. gboolean
  694. rspamd_symbols_cache_validate (struct symbols_cache *cache,
  695. struct rspamd_config *cfg,
  696. gboolean strict)
  697. {
  698. struct cache_item *item;
  699. GHashTableIter it;
  700. GList *cur;
  701. gpointer k, v;
  702. struct rspamd_symbol_def *sym_def;
  703. struct metric *metric;
  704. gboolean ignore_symbol = FALSE, ret = TRUE;
  705. if (cache == NULL) {
  706. msg_err_cache ("empty cache is invalid");
  707. return FALSE;
  708. }
  709. /* Now adjust symbol weights according to default metric */
  710. if (cfg->default_metric != NULL) {
  711. g_hash_table_foreach (cfg->default_metric->symbols,
  712. rspamd_symbols_cache_metric_validate_cb,
  713. cache);
  714. }
  715. g_hash_table_foreach (cache->items_by_symbol,
  716. rspamd_symbols_cache_validate_cb,
  717. cache);
  718. /* Now check each metric item and find corresponding symbol in a cache */
  719. g_hash_table_iter_init (&it, cfg->metrics_symbols);
  720. while (g_hash_table_iter_next (&it, &k, &v)) {
  721. ignore_symbol = FALSE;
  722. cur = v;
  723. while (cur) {
  724. metric = cur->data;
  725. sym_def = g_hash_table_lookup (metric->symbols, k);
  726. if (sym_def && (sym_def->flags & RSPAMD_SYMBOL_FLAG_IGNORE)) {
  727. ignore_symbol = TRUE;
  728. break;
  729. }
  730. cur = g_list_next (cur);
  731. }
  732. if (!ignore_symbol) {
  733. item = g_hash_table_lookup (cache->items_by_symbol, k);
  734. if (item == NULL) {
  735. msg_warn_cache (
  736. "symbol '%s' has its score defined but there is no "
  737. "corresponding rule registered",
  738. k);
  739. if (strict) {
  740. ret = FALSE;
  741. }
  742. }
  743. }
  744. }
  745. post_cache_init (cache);
  746. return ret;
  747. }
  748. static gboolean
  749. check_metric_settings (struct rspamd_task *task, struct metric *metric,
  750. double *score)
  751. {
  752. const ucl_object_t *mobj, *reject, *act;
  753. double val;
  754. if (task->settings == NULL) {
  755. return FALSE;
  756. }
  757. mobj = ucl_object_find_key (task->settings, metric->name);
  758. if (mobj != NULL) {
  759. act = ucl_object_find_key (mobj, "actions");
  760. if (act != NULL) {
  761. reject = ucl_object_find_key (act,
  762. rspamd_action_to_str (METRIC_ACTION_REJECT));
  763. if (reject != NULL && ucl_object_todouble_safe (reject, &val)) {
  764. *score = val;
  765. return TRUE;
  766. }
  767. }
  768. }
  769. return FALSE;
  770. }
  771. /* Return true if metric has score that is more than spam score for it */
  772. static gboolean
  773. rspamd_symbols_cache_metric_limit (struct rspamd_task *task,
  774. struct cache_savepoint *cp)
  775. {
  776. struct metric_result *res;
  777. GList *cur;
  778. struct metric *metric;
  779. double ms;
  780. if (task->flags & RSPAMD_TASK_FLAG_PASS_ALL) {
  781. return FALSE;
  782. }
  783. cur = task->cfg->metrics_list;
  784. if (cp->lim == 0.0) {
  785. /*
  786. * Look for metric that has the maximum reject score
  787. */
  788. while (cur) {
  789. metric = cur->data;
  790. res = g_hash_table_lookup (task->results, metric->name);
  791. if (res) {
  792. if (!check_metric_settings (task, metric, &ms)) {
  793. ms = metric->actions[METRIC_ACTION_REJECT].score;
  794. }
  795. if (cp->lim < ms) {
  796. cp->rs = res;
  797. cp->lim = ms;
  798. }
  799. }
  800. cur = g_list_next (cur);
  801. }
  802. }
  803. if (cp->rs) {
  804. if (cp->rs->score > cp->lim) {
  805. return TRUE;
  806. }
  807. }
  808. else {
  809. /* No reject score define, always check all rules */
  810. cp->lim = -1;
  811. }
  812. return FALSE;
  813. }
  814. static void
  815. rspamd_symbols_cache_watcher_cb (gpointer sessiond, gpointer ud)
  816. {
  817. struct rspamd_task *task = sessiond;
  818. struct cache_item *item = ud, *it;
  819. struct cache_savepoint *checkpoint;
  820. struct symbols_cache *cache;
  821. gint i, remain = 0;
  822. checkpoint = task->checkpoint;
  823. cache = task->cfg->cache;
  824. /* Specify that we are done with this item */
  825. setbit (checkpoint->processed_bits, item->id * 2 + 1);
  826. if (checkpoint->pass > 0) {
  827. for (i = 0; i < (gint)checkpoint->waitq->len; i ++) {
  828. it = g_ptr_array_index (checkpoint->waitq, i);
  829. if (!isset (checkpoint->processed_bits, it->id * 2)) {
  830. if (!rspamd_symbols_cache_check_deps (task, cache, it,
  831. checkpoint)) {
  832. remain ++;
  833. break;
  834. }
  835. rspamd_symbols_cache_check_symbol (task, cache, it, checkpoint,
  836. NULL);
  837. }
  838. }
  839. }
  840. msg_debug_task ("finished watcher, %ud symbols waiting", remain);
  841. }
  842. static gboolean
  843. rspamd_symbols_cache_check_symbol (struct rspamd_task *task,
  844. struct symbols_cache *cache,
  845. struct cache_item *item,
  846. struct cache_savepoint *checkpoint,
  847. gdouble *total_diff)
  848. {
  849. guint pending_before, pending_after;
  850. double t1, t2;
  851. gdouble diff;
  852. struct rspamd_task **ptask;
  853. lua_State *L;
  854. gboolean check = TRUE;
  855. const gdouble slow_diff_limit = 1e5;
  856. if (item->type & (SYMBOL_TYPE_NORMAL|SYMBOL_TYPE_CALLBACK)) {
  857. g_assert (item->func != NULL);
  858. /* Check has been started */
  859. setbit (checkpoint->processed_bits, item->id * 2);
  860. if (item->condition_cb != -1) {
  861. /* We also executes condition callback to check if we need this symbol */
  862. L = task->cfg->lua_state;
  863. lua_rawgeti (L, LUA_REGISTRYINDEX, item->condition_cb);
  864. ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
  865. rspamd_lua_setclass (L, "rspamd{task}", -1);
  866. *ptask = task;
  867. if (lua_pcall (L, 1, 1, 0) != 0) {
  868. msg_info_task ("call to condition for %s failed: %s",
  869. item->symbol, lua_tostring (L, -1));
  870. }
  871. else {
  872. check = lua_toboolean (L, -1);
  873. lua_pop (L, 1);
  874. }
  875. }
  876. if (check) {
  877. t1 = rspamd_get_ticks ();
  878. pending_before = rspamd_session_events_pending (task->s);
  879. /* Watch for events appeared */
  880. rspamd_session_watch_start (task->s, rspamd_symbols_cache_watcher_cb,
  881. item);
  882. msg_debug_task ("execute %s, %d", item->symbol, item->id);
  883. item->func (task, item->user_data);
  884. t2 = rspamd_get_ticks ();
  885. diff = (t2 - t1) * 1e6;
  886. if (total_diff) {
  887. *total_diff += diff;
  888. }
  889. if (diff > slow_diff_limit) {
  890. msg_info_task ("slow rule: %s: %d ms", item->symbol,
  891. (gint)(diff / 1000.));
  892. }
  893. rspamd_set_counter (item, diff);
  894. rspamd_session_watch_stop (task->s);
  895. pending_after = rspamd_session_events_pending (task->s);
  896. if (pending_before == pending_after) {
  897. /* No new events registered */
  898. setbit (checkpoint->processed_bits, item->id * 2 + 1);
  899. return TRUE;
  900. }
  901. return FALSE;
  902. }
  903. else {
  904. msg_debug_task ("skipping check of %s as its condition is false",
  905. item->symbol);
  906. setbit (checkpoint->processed_bits, item->id * 2 + 1);
  907. return TRUE;
  908. }
  909. }
  910. else {
  911. setbit (checkpoint->processed_bits, item->id * 2);
  912. setbit (checkpoint->processed_bits, item->id * 2 + 1);
  913. return TRUE;
  914. }
  915. }
  916. static gboolean
  917. rspamd_symbols_cache_check_deps (struct rspamd_task *task,
  918. struct symbols_cache *cache,
  919. struct cache_item *item,
  920. struct cache_savepoint *checkpoint)
  921. {
  922. struct cache_dependency *dep;
  923. guint i;
  924. gboolean ret = TRUE;
  925. if (item->deps != NULL && item->deps->len > 0) {
  926. for (i = 0; i < item->deps->len; i ++) {
  927. dep = g_ptr_array_index (item->deps, i);
  928. if (dep->item == NULL) {
  929. /* Assume invalid deps as done */
  930. continue;
  931. }
  932. if (!isset (checkpoint->processed_bits, dep->id * 2 + 1)) {
  933. if (!isset (checkpoint->processed_bits, dep->id * 2)) {
  934. /* Not started */
  935. if (!rspamd_symbols_cache_check_deps (task, cache,
  936. dep->item,
  937. checkpoint)) {
  938. g_ptr_array_add (checkpoint->waitq, item);
  939. ret = FALSE;
  940. msg_debug_task ("delayed dependency %d for symbol %d",
  941. dep->id, item->id);
  942. }
  943. else if (!rspamd_symbols_cache_check_symbol (task, cache,
  944. dep->item,
  945. checkpoint,
  946. NULL)) {
  947. /* Now started, but has events pending */
  948. ret = FALSE;
  949. msg_debug_task ("started check of %d symbol as dep for "
  950. "%d",
  951. dep->id, item->id);
  952. }
  953. else {
  954. msg_debug_task ("dependency %d for symbol %d is "
  955. "already processed",
  956. dep->id, item->id);
  957. }
  958. }
  959. else {
  960. /* Started but not finished */
  961. ret = FALSE;
  962. }
  963. }
  964. else {
  965. msg_debug_task ("dependency %d for symbol %d is already "
  966. "checked",
  967. dep->id, item->id);
  968. }
  969. }
  970. }
  971. return ret;
  972. }
  973. gboolean
  974. rspamd_symbols_cache_process_symbols (struct rspamd_task * task,
  975. struct symbols_cache *cache)
  976. {
  977. struct cache_item *item = NULL;
  978. struct cache_savepoint *checkpoint;
  979. gint i;
  980. gdouble total_microseconds = 0;
  981. const gdouble max_microseconds = 3e5;
  982. guint start_events_pending;
  983. g_assert (cache != NULL);
  984. if (task->checkpoint == NULL) {
  985. checkpoint = rspamd_mempool_alloc0 (task->task_pool, sizeof (*checkpoint));
  986. /* Bit 0: check started, Bit 1: check finished */
  987. checkpoint->processed_bits = rspamd_mempool_alloc0 (task->task_pool,
  988. NBYTES (cache->used_items) * 2);
  989. checkpoint->waitq = g_ptr_array_new ();
  990. rspamd_mempool_add_destructor (task->task_pool,
  991. rspamd_ptr_array_free_hard, checkpoint->waitq);
  992. task->checkpoint = checkpoint;
  993. rspamd_create_metric_result (task, DEFAULT_METRIC);
  994. if (task->settings) {
  995. const ucl_object_t *wl;
  996. wl = ucl_object_find_key (task->settings, "whitelist");
  997. if (wl != NULL) {
  998. msg_info_task ("<%s> is whitelisted", task->message_id);
  999. task->flags |= RSPAMD_TASK_FLAG_SKIP;
  1000. return TRUE;
  1001. }
  1002. }
  1003. }
  1004. else {
  1005. checkpoint = task->checkpoint;
  1006. }
  1007. msg_debug_task ("symbols processing stage at pass: %d", checkpoint->pass);
  1008. start_events_pending = rspamd_session_events_pending (task->s);
  1009. if (checkpoint->pass == 0) {
  1010. /*
  1011. * On the first pass we check symbols that do not have dependencies
  1012. * If we figure out symbol that has no dependencies satisfied, then
  1013. * we just save it for another pass
  1014. */
  1015. for (i = 0; i < (gint)cache->used_items; i ++) {
  1016. if (rspamd_symbols_cache_metric_limit (task, checkpoint)) {
  1017. msg_info_task ("<%s> has already scored more than %.2f, so do "
  1018. "not "
  1019. "plan any more checks", task->message_id,
  1020. checkpoint->rs->score);
  1021. return TRUE;
  1022. }
  1023. item = g_ptr_array_index (cache->items_by_order, i);
  1024. if (!isset (checkpoint->processed_bits, item->id * 2)) {
  1025. if (!rspamd_symbols_cache_check_deps (task, cache, item,
  1026. checkpoint)) {
  1027. msg_debug_task ("blocked execution of %d unless deps are "
  1028. "resolved",
  1029. item->id);
  1030. g_ptr_array_add (checkpoint->waitq, item);
  1031. continue;
  1032. }
  1033. rspamd_symbols_cache_check_symbol (task, cache, item,
  1034. checkpoint, &total_microseconds);
  1035. }
  1036. if (total_microseconds > max_microseconds) {
  1037. /* Maybe we should stop and check pending events? */
  1038. if (rspamd_session_events_pending (task->s) >
  1039. start_events_pending) {
  1040. msg_debug_task ("trying to check async events after spending "
  1041. "%d microseconds processing symbols",
  1042. (gint)total_microseconds);
  1043. return TRUE;
  1044. }
  1045. }
  1046. }
  1047. checkpoint->pass ++;
  1048. }
  1049. else {
  1050. /* We just go through the blocked symbols and check if they are ready */
  1051. for (i = 0; i < (gint)checkpoint->waitq->len; i ++) {
  1052. item = g_ptr_array_index (checkpoint->waitq, i);
  1053. if (!isset (checkpoint->processed_bits, item->id * 2)) {
  1054. if (!rspamd_symbols_cache_check_deps (task, cache, item,
  1055. checkpoint)) {
  1056. break;
  1057. }
  1058. rspamd_symbols_cache_check_symbol (task, cache, item,
  1059. checkpoint, &total_microseconds);
  1060. }
  1061. if (total_microseconds > max_microseconds) {
  1062. /* Maybe we should stop and check pending events? */
  1063. if (rspamd_session_events_pending (task->s) >
  1064. start_events_pending) {
  1065. msg_debug_task ("trying to check async events after spending "
  1066. "%d microseconds processing symbols",
  1067. (gint)total_microseconds);
  1068. return TRUE;
  1069. }
  1070. }
  1071. }
  1072. }
  1073. return TRUE;
  1074. }
  1075. struct counters_cbdata {
  1076. ucl_object_t *top;
  1077. struct symbols_cache *cache;
  1078. };
  1079. static void
  1080. rspamd_symbols_cache_counters_cb (gpointer v, gpointer ud)
  1081. {
  1082. struct counters_cbdata *cbd = ud;
  1083. ucl_object_t *obj, *top;
  1084. struct cache_item *item = v, *parent;
  1085. top = cbd->top;
  1086. if (!(item->type & SYMBOL_TYPE_CALLBACK)) {
  1087. obj = ucl_object_typed_new (UCL_OBJECT);
  1088. ucl_object_insert_key (obj, ucl_object_fromstring (item->symbol),
  1089. "symbol", 0, false);
  1090. if ((item->type & SYMBOL_TYPE_VIRTUAL) && item->parent != -1) {
  1091. g_assert (item->parent < (gint)cbd->cache->items_by_id->len);
  1092. parent = g_ptr_array_index (cbd->cache->items_by_id,
  1093. item->parent);
  1094. ucl_object_insert_key (obj, ucl_object_fromdouble (item->weight),
  1095. "weight", 0, false);
  1096. ucl_object_insert_key (obj, ucl_object_fromint (item->frequency),
  1097. "frequency", 0, false);
  1098. ucl_object_insert_key (obj, ucl_object_fromdouble (parent->avg_time),
  1099. "time", 0, false);
  1100. }
  1101. else {
  1102. ucl_object_insert_key (obj, ucl_object_fromdouble (item->weight),
  1103. "weight", 0, false);
  1104. ucl_object_insert_key (obj, ucl_object_fromint (item->frequency),
  1105. "frequency", 0, false);
  1106. ucl_object_insert_key (obj, ucl_object_fromdouble (item->avg_time),
  1107. "time", 0, false);
  1108. }
  1109. ucl_array_append (top, obj);
  1110. }
  1111. }
  1112. ucl_object_t *
  1113. rspamd_symbols_cache_counters (struct symbols_cache * cache)
  1114. {
  1115. ucl_object_t *top;
  1116. struct counters_cbdata cbd;
  1117. g_assert (cache != NULL);
  1118. top = ucl_object_typed_new (UCL_ARRAY);
  1119. cbd.top = top;
  1120. cbd.cache = cache;
  1121. g_ptr_array_foreach (cache->items_by_order,
  1122. rspamd_symbols_cache_counters_cb, &cbd);
  1123. return top;
  1124. }
  1125. static void
  1126. rspamd_symbols_cache_resort_cb (gint fd, short what, gpointer ud)
  1127. {
  1128. struct timeval tv;
  1129. gdouble tm;
  1130. struct symbols_cache *cache = ud;
  1131. struct cache_item *item, *parent;
  1132. guint i;
  1133. /* Plan new event */
  1134. tm = rspamd_time_jitter (cache->reload_time, 0);
  1135. msg_debug_cache ("resort symbols cache, next reload in %.2f seconds", tm);
  1136. g_assert (cache != NULL);
  1137. evtimer_set (&cache->resort_ev, rspamd_symbols_cache_resort_cb, cache);
  1138. double_to_tv (tm, &tv);
  1139. event_add (&cache->resort_ev, &tv);
  1140. rspamd_mempool_lock_mutex (cache->mtx);
  1141. /* Gather stats from shared execution times */
  1142. for (i = 0; i < cache->items_by_order->len; i ++) {
  1143. item = g_ptr_array_index (cache->items_by_order, i);
  1144. if (item->type & (SYMBOL_TYPE_CALLBACK|SYMBOL_TYPE_NORMAL)) {
  1145. if (item->cd->number > 0) {
  1146. item->avg_counter += item->cd->number + 1;
  1147. item->avg_time = item->avg_time +
  1148. (item->cd->value - item->avg_time) /
  1149. (gdouble)item->avg_counter;
  1150. item->cd->value = item->avg_time;
  1151. item->cd->number = item->avg_counter;
  1152. }
  1153. }
  1154. }
  1155. /* Sync virtual symbols */
  1156. for (i = 0; i < cache->items_by_id->len; i ++) {
  1157. item = g_ptr_array_index (cache->items_by_id, i);
  1158. if (item->parent != -1) {
  1159. parent = g_ptr_array_index (cache->items_by_id, item->parent);
  1160. if (parent) {
  1161. item->avg_time = parent->avg_time;
  1162. item->avg_counter = parent->avg_counter;
  1163. }
  1164. }
  1165. }
  1166. rspamd_mempool_unlock_mutex (cache->mtx);
  1167. g_ptr_array_sort_with_data (cache->items_by_order, cache_logic_cmp, cache);
  1168. }
  1169. void
  1170. rspamd_symbols_cache_start_refresh (struct symbols_cache * cache,
  1171. struct event_base *ev_base)
  1172. {
  1173. struct timeval tv;
  1174. gdouble tm;
  1175. tm = rspamd_time_jitter (cache->reload_time, 0);
  1176. g_assert (cache != NULL);
  1177. evtimer_set (&cache->resort_ev, rspamd_symbols_cache_resort_cb, cache);
  1178. event_base_set (ev_base, &cache->resort_ev);
  1179. double_to_tv (tm, &tv);
  1180. event_add (&cache->resort_ev, &tv);
  1181. }
  1182. void
  1183. rspamd_symbols_cache_inc_frequency (struct symbols_cache *cache,
  1184. const gchar *symbol)
  1185. {
  1186. struct cache_item *item, *parent;
  1187. g_assert (cache != NULL);
  1188. item = g_hash_table_lookup (cache->items_by_symbol, symbol);
  1189. if (item != NULL) {
  1190. /* We assume ++ as atomic op */
  1191. item->frequency ++;
  1192. cache->total_freq ++;
  1193. /* For virtual symbols we also increase counter for parent */
  1194. if (item->parent != -1) {
  1195. parent = g_ptr_array_index (cache->items_by_id, item->parent);
  1196. parent->frequency ++;
  1197. }
  1198. }
  1199. }
  1200. void
  1201. rspamd_symbols_cache_add_dependency (struct symbols_cache *cache,
  1202. gint id_from, const gchar *to)
  1203. {
  1204. struct cache_item *source;
  1205. struct cache_dependency *dep;
  1206. g_assert (id_from < (gint)cache->items_by_id->len);
  1207. source = g_ptr_array_index (cache->items_by_id, id_from);
  1208. dep = rspamd_mempool_alloc (cache->static_pool, sizeof (*dep));
  1209. dep->id = id_from;
  1210. dep->sym = rspamd_mempool_strdup (cache->static_pool, to);
  1211. /* Will be filled later */
  1212. dep->item = NULL;
  1213. g_ptr_array_add (source->deps, dep);
  1214. }
  1215. void
  1216. rspamd_symbols_cache_add_delayed_dependency (struct symbols_cache *cache,
  1217. const gchar *from, const gchar *to)
  1218. {
  1219. struct delayed_cache_dependency *ddep;
  1220. g_assert (from != NULL);
  1221. g_assert (to != NULL);
  1222. ddep = g_slice_alloc (sizeof (*ddep));
  1223. ddep->from = g_strdup (from);
  1224. ddep->to = g_strdup (to);
  1225. cache->delayed_deps = g_list_prepend (cache->delayed_deps, ddep);
  1226. }
  1227. gint
  1228. rspamd_symbols_cache_find_symbol (struct symbols_cache *cache, const gchar *name)
  1229. {
  1230. struct cache_item *item;
  1231. g_assert (cache != NULL);
  1232. if (name == NULL) {
  1233. return -1;
  1234. }
  1235. item = g_hash_table_lookup (cache->items_by_symbol, name);
  1236. if (item != NULL) {
  1237. while (item != NULL && item->parent != -1) {
  1238. item = g_ptr_array_index (cache->items_by_id, item->parent);
  1239. }
  1240. return item ? item->id : -1;
  1241. }
  1242. return -1;
  1243. }