You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

hash.c 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "hash.h"
  18. #include "util.h"
  19. #include "khash.h"
  20. /**
  21. * LRU hashing
  22. */
  23. static const guint log_base = 10;
  24. static const guint eviction_candidates = 16;
  25. static const gdouble lfu_base_value = 5.0;
  26. struct rspamd_lru_volatile_element_s;
  27. struct rspamd_lru_hash_s {
  28. guint maxsize;
  29. guint eviction_min_prio;
  30. guint eviction_used;
  31. struct rspamd_lru_element_s **eviction_pool;
  32. GDestroyNotify value_destroy;
  33. GDestroyNotify key_destroy;
  34. GHashFunc hfunc;
  35. GEqualFunc eqfunc;
  36. khint_t n_buckets, size, n_occupied, upper_bound;
  37. khint32_t *flags;
  38. gpointer *keys;
  39. struct rspamd_lru_volatile_element_s *vals;
  40. };
  41. enum rspamd_lru_element_flags {
  42. RSPAMD_LRU_ELEMENT_NORMAL = 0,
  43. RSPAMD_LRU_ELEMENT_VOLATILE = (1 << 0),
  44. RSPAMD_LRU_ELEMENT_IMMORTAL = (1 << 1),
  45. };
  46. struct rspamd_lru_element_s {
  47. guint16 last;
  48. guint8 lg_usages;
  49. guint8 eviction_pos;
  50. guint8 flags;
  51. gpointer data;
  52. };
  53. struct rspamd_lru_volatile_element_s {
  54. struct rspamd_lru_element_s e;
  55. time_t creation_time;
  56. time_t ttl;
  57. };
  58. typedef struct rspamd_lru_volatile_element_s rspamd_lru_vol_element_t;
  59. #define TIME_TO_TS(t) ((guint16)(((t) / 60) & 0xFFFFU))
  60. static rspamd_lru_vol_element_t *
  61. rspamd_lru_hash_get (const rspamd_lru_hash_t *h, gconstpointer key)
  62. {
  63. if (h->n_buckets) {
  64. khint_t k, i, last, mask, step = 0;
  65. mask = h->n_buckets - 1;
  66. k = h->hfunc (key);
  67. i = k & mask;
  68. last = i;
  69. while (!__ac_isempty(h->flags, i) &&
  70. (__ac_isdel(h->flags, i) || !h->eqfunc(h->keys[i], key))) {
  71. i = (i + (++step)) & mask;
  72. if (i == last) {
  73. return NULL;
  74. }
  75. }
  76. return __ac_iseither(h->flags, i) ? NULL : &h->vals[i];
  77. }
  78. return NULL;
  79. }
  80. static int
  81. rspamd_lru_hash_resize (rspamd_lru_hash_t *h,
  82. khint_t new_n_buckets)
  83. {
  84. /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */
  85. khint32_t *new_flags = 0;
  86. khint_t j = 1;
  87. kroundup32(new_n_buckets);
  88. if (new_n_buckets < 4) {
  89. new_n_buckets = 4;
  90. }
  91. if (h->size >= (khint_t) (new_n_buckets * __ac_HASH_UPPER + 0.5)) {
  92. j = 0;
  93. /* requested size is too small */
  94. }
  95. else {
  96. /* hash table size to be changed (shrink or expand); rehash */
  97. new_flags = (khint32_t *) g_malloc(__ac_fsize (new_n_buckets) * sizeof (khint32_t));
  98. if (!new_flags) {
  99. return -1;
  100. }
  101. memset(new_flags, 0xaa, __ac_fsize (new_n_buckets) * sizeof (khint32_t));
  102. if (h->n_buckets < new_n_buckets) {
  103. /* expand */
  104. gpointer *new_keys = (gpointer *) g_realloc((void *) h->keys,
  105. new_n_buckets * sizeof (gpointer));
  106. if (!new_keys) {
  107. g_free(new_flags);
  108. return -1;
  109. }
  110. h->keys = new_keys;
  111. rspamd_lru_vol_element_t *new_vals =
  112. (rspamd_lru_vol_element_t *) g_realloc((void *) h->vals,
  113. new_n_buckets * sizeof (rspamd_lru_vol_element_t));
  114. if (!new_vals) {
  115. g_free(new_flags);
  116. return -1;
  117. }
  118. h->vals = new_vals;
  119. }
  120. /* Shrink */
  121. }
  122. if (j) {
  123. /* rehashing is needed */
  124. h->eviction_used = 0;
  125. for (j = 0; j != h->n_buckets; ++j) {
  126. if (__ac_iseither(h->flags, j) == 0) {
  127. gpointer key = h->keys[j];
  128. rspamd_lru_vol_element_t val;
  129. khint_t new_mask;
  130. new_mask = new_n_buckets - 1;
  131. val = h->vals[j];
  132. val.e.eviction_pos = (guint8)-1;
  133. __ac_set_isdel_true(h->flags, j);
  134. while (1) { /* kick-out process; sort of like in Cuckoo hashing */
  135. khint_t k, i, step = 0;
  136. k = h->hfunc(key);
  137. i = k & new_mask;
  138. while (!__ac_isempty(new_flags, i)) {
  139. i = (i + (++step)) & new_mask;
  140. }
  141. __ac_set_isempty_false(new_flags, i);
  142. if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) {
  143. /* kick out the existing element */
  144. {
  145. gpointer tmp = h->keys[i];
  146. h->keys[i] = key;
  147. key = tmp;
  148. }
  149. {
  150. rspamd_lru_vol_element_t tmp = h->vals[i];
  151. h->vals[i] = val;
  152. val = tmp;
  153. val.e.eviction_pos = (guint8)-1;
  154. }
  155. __ac_set_isdel_true(h->flags, i);
  156. /* mark it as deleted in the old hash table */
  157. } else { /* write the element and jump out of the loop */
  158. h->keys[i] = key;
  159. h->vals[i] = val;
  160. break;
  161. }
  162. }
  163. }
  164. }
  165. if (h->n_buckets > new_n_buckets) {
  166. /* shrink the hash table */
  167. h->keys = (gpointer *) g_realloc((void *) h->keys,
  168. new_n_buckets * sizeof (gpointer));
  169. h->vals = (rspamd_lru_vol_element_t *) g_realloc((void *) h->vals,
  170. new_n_buckets * sizeof (rspamd_lru_vol_element_t));
  171. }
  172. g_free(h->flags); /* free the working space */
  173. h->flags = new_flags;
  174. h->n_buckets = new_n_buckets;
  175. h->n_occupied = h->size;
  176. h->upper_bound = (khint_t) (h->n_buckets * __ac_HASH_UPPER + 0.5);
  177. }
  178. return 0;
  179. }
  180. static rspamd_lru_vol_element_t *
  181. rspamd_lru_hash_put (rspamd_lru_hash_t *h, gpointer key, int *ret)
  182. {
  183. khint_t x;
  184. if (h->n_occupied >= h->upper_bound) {
  185. /* update the hash table */
  186. if (h->n_buckets > (h->size << 1)) {
  187. if (rspamd_lru_hash_resize (h, h->n_buckets - 1) < 0) {
  188. /* clear "deleted" elements */
  189. *ret = -1;
  190. return NULL;
  191. }
  192. }
  193. else if (rspamd_lru_hash_resize (h, h->n_buckets + 1) < 0) {
  194. /* expand the hash table */
  195. *ret = -1;
  196. return NULL;
  197. }
  198. }
  199. khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0;
  200. x = site = h->n_buckets;
  201. k = h->hfunc(key);
  202. i = k & mask;
  203. if (__ac_isempty(h->flags, i)) {
  204. x = i; /* for speed up */
  205. }
  206. else {
  207. last = i;
  208. while (!__ac_isempty(h->flags, i) &&
  209. (__ac_isdel(h->flags, i) ||
  210. !h->eqfunc (h->keys[i], key))) {
  211. if (__ac_isdel(h->flags, i)) {
  212. site = i;
  213. }
  214. i = (i + (++step)) & mask;
  215. if (i == last) {
  216. x = site;
  217. break;
  218. }
  219. }
  220. if (x == h->n_buckets) {
  221. if (__ac_isempty(h->flags, i) && site != h->n_buckets) {
  222. x = site;
  223. }
  224. else {
  225. x = i;
  226. }
  227. }
  228. }
  229. if (__ac_isempty(h->flags, x)) { /* not present at all */
  230. h->keys[x] = key;
  231. __ac_set_isboth_false(h->flags, x);
  232. ++h->size;
  233. ++h->n_occupied;
  234. *ret = 1;
  235. }
  236. else if (__ac_isdel(h->flags, x)) { /* deleted */
  237. h->keys[x] = key;
  238. __ac_set_isboth_false(h->flags, x);
  239. ++h->size;
  240. *ret = 2;
  241. }
  242. else {
  243. /* Don't touch h->keys[x] if present and not deleted */
  244. *ret = 0;
  245. }
  246. return &h->vals[x];
  247. }
  248. static void
  249. rspamd_lru_hash_del (rspamd_lru_hash_t *h, rspamd_lru_vol_element_t *elt)
  250. {
  251. khint_t x = elt - h->vals;
  252. if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {
  253. __ac_set_isdel_true(h->flags, x);
  254. --h->size;
  255. if (h->key_destroy) {
  256. h->key_destroy (h->keys[x]);
  257. }
  258. if (h->value_destroy) {
  259. h->value_destroy (elt->e.data);
  260. }
  261. }
  262. }
  263. static void
  264. rspamd_lru_hash_remove_evicted (rspamd_lru_hash_t *hash,
  265. rspamd_lru_element_t *elt)
  266. {
  267. guint i;
  268. rspamd_lru_element_t *cur;
  269. g_assert (hash->eviction_used > 0);
  270. g_assert (elt->eviction_pos < hash->eviction_used);
  271. memmove (&hash->eviction_pool[elt->eviction_pos],
  272. &hash->eviction_pool[elt->eviction_pos + 1],
  273. sizeof (rspamd_lru_element_t *) *
  274. (eviction_candidates - elt->eviction_pos - 1));
  275. hash->eviction_used--;
  276. if (hash->eviction_used > 0) {
  277. /* We also need to update min_prio and renumber eviction list */
  278. hash->eviction_min_prio = G_MAXUINT;
  279. for (i = 0; i < hash->eviction_used; i ++) {
  280. cur = hash->eviction_pool[i];
  281. if (hash->eviction_min_prio > cur->lg_usages) {
  282. hash->eviction_min_prio = cur->lg_usages;
  283. }
  284. cur->eviction_pos = i;
  285. }
  286. }
  287. else {
  288. hash->eviction_min_prio = G_MAXUINT;
  289. }
  290. }
  291. static void
  292. rspamd_lru_hash_update_counter (rspamd_lru_element_t *elt)
  293. {
  294. guint8 counter = elt->lg_usages;
  295. if (counter != 255) {
  296. double r, baseval, p;
  297. r = rspamd_random_double_fast ();
  298. baseval = counter - lfu_base_value;
  299. if (baseval < 0) {
  300. baseval = 0;
  301. }
  302. p = 1.0 / (baseval * log_base + 1);
  303. if (r < p) {
  304. elt->lg_usages ++;
  305. }
  306. }
  307. }
  308. static inline void
  309. rspamd_lru_hash_decrease_counter (rspamd_lru_element_t *elt, time_t now)
  310. {
  311. if (now - elt->last > lfu_base_value) {
  312. /* Penalise counters for outdated records */
  313. elt->lg_usages /= 2;
  314. }
  315. }
  316. static gboolean
  317. rspamd_lru_hash_maybe_evict (rspamd_lru_hash_t *hash,
  318. rspamd_lru_element_t *elt)
  319. {
  320. guint i;
  321. rspamd_lru_element_t *cur;
  322. if (elt->eviction_pos == (guint8)-1) {
  323. if (hash->eviction_used < eviction_candidates) {
  324. /* There are free places in eviction pool */
  325. hash->eviction_pool[hash->eviction_used] = elt;
  326. elt->eviction_pos = hash->eviction_used;
  327. hash->eviction_used ++;
  328. if (hash->eviction_min_prio > elt->lg_usages) {
  329. hash->eviction_min_prio = elt->lg_usages;
  330. }
  331. return TRUE;
  332. }
  333. else {
  334. /* Find any candidate that has higher usage count */
  335. for (i = 0; i < hash->eviction_used; i ++) {
  336. cur = hash->eviction_pool[i];
  337. if (cur->lg_usages > elt->lg_usages) {
  338. cur->eviction_pos = -1;
  339. elt->eviction_pos = i;
  340. hash->eviction_pool[i] = elt;
  341. if (hash->eviction_min_prio > elt->lg_usages) {
  342. hash->eviction_min_prio = elt->lg_usages;
  343. }
  344. return TRUE;
  345. }
  346. }
  347. }
  348. }
  349. else {
  350. /* Already in the eviction list */
  351. return TRUE;
  352. }
  353. return FALSE;
  354. }
  355. static void
  356. rspamd_lru_hash_remove_node (rspamd_lru_hash_t *hash, rspamd_lru_element_t *elt)
  357. {
  358. if (elt->eviction_pos != (guint8)-1) {
  359. rspamd_lru_hash_remove_evicted (hash, elt);
  360. }
  361. rspamd_lru_hash_del (hash, (rspamd_lru_vol_element_t *)elt);
  362. }
  363. static void
  364. rspamd_lru_hash_evict (rspamd_lru_hash_t *hash, time_t now)
  365. {
  366. double r;
  367. guint i;
  368. rspamd_lru_element_t *elt = NULL;
  369. guint nexpired = 0;
  370. /*
  371. * We either evict one node from the eviction list
  372. * or, at some probability scan all table and update eviction
  373. * list first
  374. */
  375. r = rspamd_random_double_fast ();
  376. if (r < ((double)eviction_candidates) / hash->maxsize) {
  377. /* Full hash scan */
  378. rspamd_lru_vol_element_t *cur;
  379. rspamd_lru_element_t *selected = NULL;
  380. kh_foreach_value_ptr (hash, cur, {
  381. rspamd_lru_element_t *node = &cur->e;
  382. if (node->flags & RSPAMD_LRU_ELEMENT_IMMORTAL) {
  383. continue;
  384. }
  385. if (node->flags & RSPAMD_LRU_ELEMENT_VOLATILE) {
  386. /* If element is expired, just remove it */
  387. if (now - cur->creation_time > cur->ttl) {
  388. rspamd_lru_hash_remove_node (hash, node);
  389. nexpired ++;
  390. continue;
  391. }
  392. }
  393. else {
  394. rspamd_lru_hash_decrease_counter (node, now);
  395. if (rspamd_lru_hash_maybe_evict (hash, node)) {
  396. if (selected && node->lg_usages < selected->lg_usages) {
  397. selected = node;
  398. }
  399. else if (selected == NULL) {
  400. selected = node;
  401. }
  402. }
  403. }
  404. });
  405. if (selected) {
  406. elt = selected;
  407. }
  408. }
  409. else {
  410. /* Fast random eviction */
  411. for (i = 0; i < hash->eviction_used; i ++) {
  412. elt = hash->eviction_pool[i];
  413. if (elt->lg_usages <= hash->eviction_min_prio) {
  414. break;
  415. }
  416. }
  417. }
  418. /* Evict if nothing else has been cleaned */
  419. if (elt && nexpired == 0) {
  420. rspamd_lru_hash_remove_node (hash, elt);
  421. }
  422. }
  423. rspamd_lru_hash_t *
  424. rspamd_lru_hash_new_full (gint maxsize,
  425. GDestroyNotify key_destroy,
  426. GDestroyNotify value_destroy,
  427. GHashFunc hf,
  428. GEqualFunc cmpf)
  429. {
  430. rspamd_lru_hash_t *h;
  431. if (maxsize < eviction_candidates * 2) {
  432. maxsize = eviction_candidates * 2;
  433. }
  434. h = g_malloc0 (sizeof (rspamd_lru_hash_t));
  435. h->hfunc = hf;
  436. h->eqfunc = cmpf;
  437. h->eviction_pool = g_malloc0 (sizeof (rspamd_lru_element_t *) *
  438. eviction_candidates);
  439. h->maxsize = maxsize;
  440. h->value_destroy = value_destroy;
  441. h->key_destroy = key_destroy;
  442. h->eviction_min_prio = G_MAXUINT;
  443. /* Preallocate some elements */
  444. rspamd_lru_hash_resize (h, MIN (h->maxsize, 128));
  445. return h;
  446. }
  447. rspamd_lru_hash_t *
  448. rspamd_lru_hash_new (gint maxsize,
  449. GDestroyNotify key_destroy,
  450. GDestroyNotify value_destroy)
  451. {
  452. return rspamd_lru_hash_new_full (maxsize,
  453. key_destroy, value_destroy,
  454. rspamd_strcase_hash, rspamd_strcase_equal);
  455. }
  456. gpointer
  457. rspamd_lru_hash_lookup (rspamd_lru_hash_t *hash, gconstpointer key, time_t now)
  458. {
  459. rspamd_lru_element_t *res;
  460. rspamd_lru_vol_element_t *vnode;
  461. vnode = rspamd_lru_hash_get (hash, (gpointer)key);
  462. if (vnode != NULL) {
  463. res = &vnode->e;
  464. if (res->flags & RSPAMD_LRU_ELEMENT_VOLATILE) {
  465. /* Check ttl */
  466. if (now - vnode->creation_time > vnode->ttl) {
  467. rspamd_lru_hash_remove_node (hash, res);
  468. return NULL;
  469. }
  470. }
  471. now = TIME_TO_TS(now);
  472. res->last = MAX (res->last, now);
  473. rspamd_lru_hash_update_counter (res);
  474. rspamd_lru_hash_maybe_evict (hash, res);
  475. return res->data;
  476. }
  477. return NULL;
  478. }
  479. gboolean
  480. rspamd_lru_hash_remove (rspamd_lru_hash_t *hash,
  481. gconstpointer key)
  482. {
  483. rspamd_lru_vol_element_t *res;
  484. res = rspamd_lru_hash_get (hash, key);
  485. if (res != NULL) {
  486. rspamd_lru_hash_remove_node (hash, &res->e);
  487. return TRUE;
  488. }
  489. return FALSE;
  490. }
  491. void
  492. rspamd_lru_hash_insert (rspamd_lru_hash_t *hash,
  493. gpointer key,
  494. gpointer value,
  495. time_t now,
  496. guint ttl)
  497. {
  498. rspamd_lru_element_t *node;
  499. rspamd_lru_vol_element_t *vnode;
  500. gint ret;
  501. vnode = rspamd_lru_hash_put (hash, key, &ret);
  502. node = &vnode->e;
  503. if (ret == 0) {
  504. /* Existing element, be careful about destructors */
  505. if (hash->value_destroy) {
  506. /* Remove old data */
  507. hash->value_destroy (vnode->e.data);
  508. }
  509. if (hash->key_destroy) {
  510. /* Here are dragons! */
  511. goffset off = vnode - hash->vals;
  512. hash->key_destroy (hash->keys[off]);
  513. hash->keys[off] = key;
  514. }
  515. }
  516. if (ttl == 0) {
  517. node->flags = RSPAMD_LRU_ELEMENT_NORMAL;
  518. }
  519. else {
  520. vnode->creation_time = now;
  521. vnode->ttl = ttl;
  522. node->flags = RSPAMD_LRU_ELEMENT_VOLATILE;
  523. }
  524. node->data = value;
  525. node->lg_usages = (guint8)lfu_base_value;
  526. node->last = TIME_TO_TS (now);
  527. node->eviction_pos = (guint8)-1;
  528. if (ret != 0) {
  529. /* Also need to check maxsize */
  530. if (kh_size (hash) >= hash->maxsize) {
  531. node->flags |= RSPAMD_LRU_ELEMENT_IMMORTAL;
  532. rspamd_lru_hash_evict (hash, now);
  533. node->flags &= ~RSPAMD_LRU_ELEMENT_IMMORTAL;
  534. }
  535. }
  536. rspamd_lru_hash_maybe_evict (hash, node);
  537. }
  538. void
  539. rspamd_lru_hash_destroy (rspamd_lru_hash_t *hash)
  540. {
  541. if (hash) {
  542. if (hash->key_destroy || hash->value_destroy) {
  543. gpointer k;
  544. rspamd_lru_vol_element_t cur;
  545. kh_foreach (hash, k, cur, {
  546. if (hash->key_destroy) {
  547. hash->key_destroy (k);
  548. }
  549. if (hash->value_destroy) {
  550. hash->value_destroy (cur.e.data);
  551. }
  552. });
  553. }
  554. g_free (hash->keys);
  555. g_free (hash->vals);
  556. g_free (hash->flags);
  557. g_free (hash->eviction_pool);
  558. g_free (hash);
  559. }
  560. }
  561. gpointer
  562. rspamd_lru_hash_element_data (rspamd_lru_element_t *elt)
  563. {
  564. return elt->data;
  565. }
  566. int
  567. rspamd_lru_hash_foreach (rspamd_lru_hash_t *h, int it, gpointer *k,
  568. gpointer *v)
  569. {
  570. gint i;
  571. g_assert (it >= 0);
  572. for (i = it; i != kh_end (h); ++i) {
  573. if (!kh_exist (h, i)) {
  574. continue;
  575. }
  576. *k = h->keys[i];
  577. *v = h->vals[i].e.data;
  578. break;
  579. }
  580. if (i == kh_end (h)) {
  581. return -1;
  582. }
  583. return i + 1;
  584. }
  585. guint
  586. rspamd_lru_hash_size (rspamd_lru_hash_t *hash)
  587. {
  588. return kh_size (hash);
  589. }
  590. /**
  591. * Returns hash capacity
  592. * @param hash hash object
  593. */
  594. guint
  595. rspamd_lru_hash_capacity (rspamd_lru_hash_t *hash)
  596. {
  597. return hash->maxsize;
  598. }