You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

hash.c 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "hash.h"
  18. #include "util.h"
  19. #include "khash.h"
  20. /**
  21. * LRU hashing
  22. */
  23. static const guint log_base = 10;
  24. static const guint eviction_candidates = 16;
  25. static const gdouble lfu_base_value = 5.0;
  26. struct rspamd_lru_volatile_element_s;
  27. struct rspamd_lru_hash_s {
  28. guint maxsize;
  29. guint eviction_min_prio;
  30. guint eviction_used;
  31. struct rspamd_lru_element_s **eviction_pool;
  32. GDestroyNotify value_destroy;
  33. GDestroyNotify key_destroy;
  34. GHashFunc hfunc;
  35. GEqualFunc eqfunc;
  36. khint_t n_buckets, size, n_occupied, upper_bound;
  37. khint32_t *flags;
  38. gpointer *keys;
  39. struct rspamd_lru_volatile_element_s *vals;
  40. };
  41. enum rspamd_lru_element_flags {
  42. RSPAMD_LRU_ELEMENT_NORMAL = 0,
  43. RSPAMD_LRU_ELEMENT_VOLATILE = (1 << 0),
  44. RSPAMD_LRU_ELEMENT_IMMORTAL = (1 << 1),
  45. };
  46. struct rspamd_lru_element_s {
  47. guint16 last;
  48. guint8 lg_usages;
  49. guint8 eviction_pos;
  50. guint8 flags;
  51. gpointer data;
  52. };
  53. struct rspamd_lru_volatile_element_s {
  54. struct rspamd_lru_element_s e;
  55. time_t creation_time;
  56. time_t ttl;
  57. };
  58. typedef struct rspamd_lru_volatile_element_s rspamd_lru_vol_element_t;
  59. #define TIME_TO_TS(t) ((guint16) (((t) / 60) & 0xFFFFU))
  60. static rspamd_lru_vol_element_t *
  61. rspamd_lru_hash_get(const rspamd_lru_hash_t *h, gconstpointer key)
  62. {
  63. if (h->n_buckets) {
  64. khint_t k, i, last, mask, step = 0;
  65. mask = h->n_buckets - 1;
  66. k = h->hfunc(key);
  67. i = k & mask;
  68. last = i;
  69. while (!__ac_isempty(h->flags, i) &&
  70. (__ac_isdel(h->flags, i) || !h->eqfunc(h->keys[i], key))) {
  71. i = (i + (++step)) & mask;
  72. if (i == last) {
  73. return NULL;
  74. }
  75. }
  76. return __ac_iseither(h->flags, i) ? NULL : &h->vals[i];
  77. }
  78. return NULL;
  79. }
  80. static int
  81. rspamd_lru_hash_resize(rspamd_lru_hash_t *h,
  82. khint_t new_n_buckets)
  83. {
  84. /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */
  85. khint32_t *new_flags = 0;
  86. khint_t j = 1;
  87. kroundup32(new_n_buckets);
  88. if (new_n_buckets < 4) {
  89. new_n_buckets = 4;
  90. }
  91. if (h->size >= (khint_t) (new_n_buckets * __ac_HASH_UPPER + 0.5)) {
  92. j = 0;
  93. /* requested size is too small */
  94. }
  95. else {
  96. /* hash table size to be changed (shrink or expand); rehash */
  97. new_flags = (khint32_t *) g_malloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t));
  98. if (!new_flags) {
  99. return -1;
  100. }
  101. memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t));
  102. if (h->n_buckets < new_n_buckets) {
  103. /* expand */
  104. gpointer *new_keys = (gpointer *) g_realloc((void *) h->keys,
  105. new_n_buckets * sizeof(gpointer));
  106. if (!new_keys) {
  107. g_free(new_flags);
  108. return -1;
  109. }
  110. h->keys = new_keys;
  111. rspamd_lru_vol_element_t *new_vals =
  112. (rspamd_lru_vol_element_t *) g_realloc((void *) h->vals,
  113. new_n_buckets * sizeof(rspamd_lru_vol_element_t));
  114. if (!new_vals) {
  115. g_free(new_flags);
  116. return -1;
  117. }
  118. h->vals = new_vals;
  119. }
  120. /* Shrink */
  121. }
  122. if (j) {
  123. /* rehashing is needed */
  124. h->eviction_used = 0;
  125. for (j = 0; j != h->n_buckets; ++j) {
  126. if (__ac_iseither(h->flags, j) == 0) {
  127. gpointer key = h->keys[j];
  128. rspamd_lru_vol_element_t val;
  129. khint_t new_mask;
  130. new_mask = new_n_buckets - 1;
  131. val = h->vals[j];
  132. val.e.eviction_pos = (guint8) -1;
  133. __ac_set_isdel_true(h->flags, j);
  134. while (1) { /* kick-out process; sort of like in Cuckoo hashing */
  135. khint_t k, i, step = 0;
  136. k = h->hfunc(key);
  137. i = k & new_mask;
  138. while (!__ac_isempty(new_flags, i)) {
  139. i = (i + (++step)) & new_mask;
  140. }
  141. __ac_set_isempty_false(new_flags, i);
  142. if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) {
  143. /* kick out the existing element */
  144. {
  145. gpointer tmp = h->keys[i];
  146. h->keys[i] = key;
  147. key = tmp;
  148. }
  149. {
  150. rspamd_lru_vol_element_t tmp = h->vals[i];
  151. h->vals[i] = val;
  152. val = tmp;
  153. val.e.eviction_pos = (guint8) -1;
  154. }
  155. __ac_set_isdel_true(h->flags, i);
  156. /* mark it as deleted in the old hash table */
  157. }
  158. else { /* write the element and jump out of the loop */
  159. h->keys[i] = key;
  160. h->vals[i] = val;
  161. break;
  162. }
  163. }
  164. }
  165. }
  166. if (h->n_buckets > new_n_buckets) {
  167. /* shrink the hash table */
  168. h->keys = (gpointer *) g_realloc((void *) h->keys,
  169. new_n_buckets * sizeof(gpointer));
  170. h->vals = (rspamd_lru_vol_element_t *) g_realloc((void *) h->vals,
  171. new_n_buckets * sizeof(rspamd_lru_vol_element_t));
  172. }
  173. g_free(h->flags); /* free the working space */
  174. h->flags = new_flags;
  175. h->n_buckets = new_n_buckets;
  176. h->n_occupied = h->size;
  177. h->upper_bound = (khint_t) (h->n_buckets * __ac_HASH_UPPER + 0.5);
  178. }
  179. return 0;
  180. }
  181. static rspamd_lru_vol_element_t *
  182. rspamd_lru_hash_put(rspamd_lru_hash_t *h, gpointer key, int *ret)
  183. {
  184. khint_t x;
  185. if (h->n_occupied >= h->upper_bound) {
  186. /* update the hash table */
  187. if (h->n_buckets > (h->size << 1)) {
  188. if (rspamd_lru_hash_resize(h, h->n_buckets - 1) < 0) {
  189. /* clear "deleted" elements */
  190. *ret = -1;
  191. return NULL;
  192. }
  193. }
  194. else if (rspamd_lru_hash_resize(h, h->n_buckets + 1) < 0) {
  195. /* expand the hash table */
  196. *ret = -1;
  197. return NULL;
  198. }
  199. }
  200. khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0;
  201. x = site = h->n_buckets;
  202. k = h->hfunc(key);
  203. i = k & mask;
  204. if (__ac_isempty(h->flags, i)) {
  205. x = i; /* for speed up */
  206. }
  207. else {
  208. last = i;
  209. while (!__ac_isempty(h->flags, i) &&
  210. (__ac_isdel(h->flags, i) ||
  211. !h->eqfunc(h->keys[i], key))) {
  212. if (__ac_isdel(h->flags, i)) {
  213. site = i;
  214. }
  215. i = (i + (++step)) & mask;
  216. if (i == last) {
  217. x = site;
  218. break;
  219. }
  220. }
  221. if (x == h->n_buckets) {
  222. if (__ac_isempty(h->flags, i) && site != h->n_buckets) {
  223. x = site;
  224. }
  225. else {
  226. x = i;
  227. }
  228. }
  229. }
  230. if (__ac_isempty(h->flags, x)) { /* not present at all */
  231. h->keys[x] = key;
  232. __ac_set_isboth_false(h->flags, x);
  233. ++h->size;
  234. ++h->n_occupied;
  235. *ret = 1;
  236. }
  237. else if (__ac_isdel(h->flags, x)) { /* deleted */
  238. h->keys[x] = key;
  239. __ac_set_isboth_false(h->flags, x);
  240. ++h->size;
  241. *ret = 2;
  242. }
  243. else {
  244. /* Don't touch h->keys[x] if present and not deleted */
  245. *ret = 0;
  246. }
  247. return &h->vals[x];
  248. }
  249. static void
  250. rspamd_lru_hash_del(rspamd_lru_hash_t *h, rspamd_lru_vol_element_t *elt)
  251. {
  252. khint_t x = elt - h->vals;
  253. if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {
  254. __ac_set_isdel_true(h->flags, x);
  255. --h->size;
  256. if (h->key_destroy) {
  257. h->key_destroy(h->keys[x]);
  258. }
  259. if (h->value_destroy) {
  260. h->value_destroy(elt->e.data);
  261. }
  262. }
  263. }
  264. static void
  265. rspamd_lru_hash_remove_evicted(rspamd_lru_hash_t *hash,
  266. rspamd_lru_element_t *elt)
  267. {
  268. guint i;
  269. rspamd_lru_element_t *cur;
  270. g_assert(hash->eviction_used > 0);
  271. g_assert(elt->eviction_pos < hash->eviction_used);
  272. memmove(&hash->eviction_pool[elt->eviction_pos],
  273. &hash->eviction_pool[elt->eviction_pos + 1],
  274. sizeof(rspamd_lru_element_t *) *
  275. (eviction_candidates - elt->eviction_pos - 1));
  276. hash->eviction_used--;
  277. if (hash->eviction_used > 0) {
  278. /* We also need to update min_prio and renumber eviction list */
  279. hash->eviction_min_prio = G_MAXUINT;
  280. for (i = 0; i < hash->eviction_used; i++) {
  281. cur = hash->eviction_pool[i];
  282. if (hash->eviction_min_prio > cur->lg_usages) {
  283. hash->eviction_min_prio = cur->lg_usages;
  284. }
  285. cur->eviction_pos = i;
  286. }
  287. }
  288. else {
  289. hash->eviction_min_prio = G_MAXUINT;
  290. }
  291. }
  292. static void
  293. rspamd_lru_hash_update_counter(rspamd_lru_element_t *elt)
  294. {
  295. guint8 counter = elt->lg_usages;
  296. if (counter != 255) {
  297. double r, baseval, p;
  298. r = rspamd_random_double_fast();
  299. baseval = counter - lfu_base_value;
  300. if (baseval < 0) {
  301. baseval = 0;
  302. }
  303. p = 1.0 / (baseval * log_base + 1);
  304. if (r < p) {
  305. elt->lg_usages++;
  306. }
  307. }
  308. }
  309. static inline void
  310. rspamd_lru_hash_decrease_counter(rspamd_lru_element_t *elt, time_t now)
  311. {
  312. if (now - elt->last > lfu_base_value) {
  313. /* Penalise counters for outdated records */
  314. elt->lg_usages /= 2;
  315. }
  316. }
  317. static gboolean
  318. rspamd_lru_hash_maybe_evict(rspamd_lru_hash_t *hash,
  319. rspamd_lru_element_t *elt)
  320. {
  321. guint i;
  322. rspamd_lru_element_t *cur;
  323. if (elt->eviction_pos == (guint8) -1) {
  324. if (hash->eviction_used < eviction_candidates) {
  325. /* There are free places in eviction pool */
  326. hash->eviction_pool[hash->eviction_used] = elt;
  327. elt->eviction_pos = hash->eviction_used;
  328. hash->eviction_used++;
  329. if (hash->eviction_min_prio > elt->lg_usages) {
  330. hash->eviction_min_prio = elt->lg_usages;
  331. }
  332. return TRUE;
  333. }
  334. else {
  335. /* Find any candidate that has higher usage count */
  336. for (i = 0; i < hash->eviction_used; i++) {
  337. cur = hash->eviction_pool[i];
  338. if (cur->lg_usages > elt->lg_usages) {
  339. cur->eviction_pos = -1;
  340. elt->eviction_pos = i;
  341. hash->eviction_pool[i] = elt;
  342. if (hash->eviction_min_prio > elt->lg_usages) {
  343. hash->eviction_min_prio = elt->lg_usages;
  344. }
  345. return TRUE;
  346. }
  347. }
  348. }
  349. }
  350. else {
  351. /* Already in the eviction list */
  352. return TRUE;
  353. }
  354. return FALSE;
  355. }
  356. static void
  357. rspamd_lru_hash_remove_node(rspamd_lru_hash_t *hash, rspamd_lru_element_t *elt)
  358. {
  359. if (elt->eviction_pos != (guint8) -1) {
  360. rspamd_lru_hash_remove_evicted(hash, elt);
  361. }
  362. rspamd_lru_hash_del(hash, (rspamd_lru_vol_element_t *) elt);
  363. }
  364. static void
  365. rspamd_lru_hash_evict(rspamd_lru_hash_t *hash, time_t now)
  366. {
  367. double r;
  368. guint i;
  369. rspamd_lru_element_t *elt = NULL;
  370. guint nexpired = 0;
  371. /*
  372. * We either evict one node from the eviction list
  373. * or, at some probability scan all table and update eviction
  374. * list first
  375. */
  376. r = rspamd_random_double_fast();
  377. if (r < ((double) eviction_candidates) / hash->maxsize) {
  378. /* Full hash scan */
  379. rspamd_lru_vol_element_t *cur;
  380. rspamd_lru_element_t *selected = NULL;
  381. kh_foreach_value_ptr(hash, cur, {
  382. rspamd_lru_element_t *node = &cur->e;
  383. if (node->flags & RSPAMD_LRU_ELEMENT_IMMORTAL) {
  384. continue;
  385. }
  386. if (node->flags & RSPAMD_LRU_ELEMENT_VOLATILE) {
  387. /* If element is expired, just remove it */
  388. if (now - cur->creation_time > cur->ttl) {
  389. rspamd_lru_hash_remove_node(hash, node);
  390. nexpired++;
  391. continue;
  392. }
  393. }
  394. else {
  395. rspamd_lru_hash_decrease_counter(node, now);
  396. if (rspamd_lru_hash_maybe_evict(hash, node)) {
  397. if (selected && node->lg_usages < selected->lg_usages) {
  398. selected = node;
  399. }
  400. else if (selected == NULL) {
  401. selected = node;
  402. }
  403. }
  404. }
  405. });
  406. if (selected) {
  407. elt = selected;
  408. }
  409. }
  410. else {
  411. /* Fast random eviction */
  412. for (i = 0; i < hash->eviction_used; i++) {
  413. elt = hash->eviction_pool[i];
  414. if (elt->lg_usages <= hash->eviction_min_prio) {
  415. break;
  416. }
  417. }
  418. }
  419. /* Evict if nothing else has been cleaned */
  420. if (elt && nexpired == 0) {
  421. rspamd_lru_hash_remove_node(hash, elt);
  422. }
  423. }
  424. rspamd_lru_hash_t *
  425. rspamd_lru_hash_new_full(gint maxsize,
  426. GDestroyNotify key_destroy,
  427. GDestroyNotify value_destroy,
  428. GHashFunc hf,
  429. GEqualFunc cmpf)
  430. {
  431. rspamd_lru_hash_t *h;
  432. if (maxsize < eviction_candidates * 2) {
  433. maxsize = eviction_candidates * 2;
  434. }
  435. h = g_malloc0(sizeof(rspamd_lru_hash_t));
  436. h->hfunc = hf;
  437. h->eqfunc = cmpf;
  438. h->eviction_pool = g_malloc0(sizeof(rspamd_lru_element_t *) *
  439. eviction_candidates);
  440. h->maxsize = maxsize;
  441. h->value_destroy = value_destroy;
  442. h->key_destroy = key_destroy;
  443. h->eviction_min_prio = G_MAXUINT;
  444. /* Preallocate some elements */
  445. rspamd_lru_hash_resize(h, MIN(h->maxsize, 128));
  446. return h;
  447. }
  448. rspamd_lru_hash_t *
  449. rspamd_lru_hash_new(gint maxsize,
  450. GDestroyNotify key_destroy,
  451. GDestroyNotify value_destroy)
  452. {
  453. return rspamd_lru_hash_new_full(maxsize,
  454. key_destroy, value_destroy,
  455. rspamd_strcase_hash, rspamd_strcase_equal);
  456. }
  457. gpointer
  458. rspamd_lru_hash_lookup(rspamd_lru_hash_t *hash, gconstpointer key, time_t now)
  459. {
  460. rspamd_lru_element_t *res;
  461. rspamd_lru_vol_element_t *vnode;
  462. vnode = rspamd_lru_hash_get(hash, (gpointer) key);
  463. if (vnode != NULL) {
  464. res = &vnode->e;
  465. if (res->flags & RSPAMD_LRU_ELEMENT_VOLATILE) {
  466. /* Check ttl */
  467. if (now - vnode->creation_time > vnode->ttl) {
  468. rspamd_lru_hash_remove_node(hash, res);
  469. return NULL;
  470. }
  471. }
  472. now = TIME_TO_TS(now);
  473. res->last = MAX(res->last, now);
  474. rspamd_lru_hash_update_counter(res);
  475. rspamd_lru_hash_maybe_evict(hash, res);
  476. return res->data;
  477. }
  478. return NULL;
  479. }
  480. gboolean
  481. rspamd_lru_hash_remove(rspamd_lru_hash_t *hash,
  482. gconstpointer key)
  483. {
  484. rspamd_lru_vol_element_t *res;
  485. res = rspamd_lru_hash_get(hash, key);
  486. if (res != NULL) {
  487. rspamd_lru_hash_remove_node(hash, &res->e);
  488. return TRUE;
  489. }
  490. return FALSE;
  491. }
  492. void rspamd_lru_hash_insert(rspamd_lru_hash_t *hash,
  493. gpointer key,
  494. gpointer value,
  495. time_t now,
  496. guint ttl)
  497. {
  498. rspamd_lru_element_t *node;
  499. rspamd_lru_vol_element_t *vnode;
  500. gint ret;
  501. vnode = rspamd_lru_hash_put(hash, key, &ret);
  502. node = &vnode->e;
  503. if (ret == 0) {
  504. /* Existing element, be careful about destructors */
  505. if (hash->value_destroy) {
  506. /* Remove old data */
  507. hash->value_destroy(vnode->e.data);
  508. }
  509. if (hash->key_destroy) {
  510. /* Here are dragons! */
  511. goffset off = vnode - hash->vals;
  512. hash->key_destroy(hash->keys[off]);
  513. hash->keys[off] = key;
  514. }
  515. }
  516. if (ttl == 0) {
  517. node->flags = RSPAMD_LRU_ELEMENT_NORMAL;
  518. }
  519. else {
  520. vnode->creation_time = now;
  521. vnode->ttl = ttl;
  522. node->flags = RSPAMD_LRU_ELEMENT_VOLATILE;
  523. }
  524. node->data = value;
  525. node->lg_usages = (guint8) lfu_base_value;
  526. node->last = TIME_TO_TS(now);
  527. node->eviction_pos = (guint8) -1;
  528. if (ret != 0) {
  529. /* Also need to check maxsize */
  530. if (kh_size(hash) >= hash->maxsize) {
  531. node->flags |= RSPAMD_LRU_ELEMENT_IMMORTAL;
  532. rspamd_lru_hash_evict(hash, now);
  533. node->flags &= ~RSPAMD_LRU_ELEMENT_IMMORTAL;
  534. }
  535. }
  536. rspamd_lru_hash_maybe_evict(hash, node);
  537. }
  538. void rspamd_lru_hash_destroy(rspamd_lru_hash_t *hash)
  539. {
  540. if (hash) {
  541. if (hash->key_destroy || hash->value_destroy) {
  542. gpointer k;
  543. rspamd_lru_vol_element_t cur;
  544. kh_foreach(hash, k, cur, {
  545. if (hash->key_destroy) {
  546. hash->key_destroy(k);
  547. }
  548. if (hash->value_destroy) {
  549. hash->value_destroy(cur.e.data);
  550. }
  551. });
  552. }
  553. g_free(hash->keys);
  554. g_free(hash->vals);
  555. g_free(hash->flags);
  556. g_free(hash->eviction_pool);
  557. g_free(hash);
  558. }
  559. }
  560. gpointer
  561. rspamd_lru_hash_element_data(rspamd_lru_element_t *elt)
  562. {
  563. return elt->data;
  564. }
  565. int rspamd_lru_hash_foreach(rspamd_lru_hash_t *h, int it, gpointer *k,
  566. gpointer *v)
  567. {
  568. gint i;
  569. g_assert(it >= 0);
  570. for (i = it; i != kh_end(h); ++i) {
  571. if (!kh_exist(h, i)) {
  572. continue;
  573. }
  574. *k = h->keys[i];
  575. *v = h->vals[i].e.data;
  576. break;
  577. }
  578. if (i == kh_end(h)) {
  579. return -1;
  580. }
  581. return i + 1;
  582. }
  583. guint rspamd_lru_hash_size(rspamd_lru_hash_t *hash)
  584. {
  585. return kh_size(hash);
  586. }
  587. /**
  588. * Returns hash capacity
  589. * @param hash hash object
  590. */
  591. guint rspamd_lru_hash_capacity(rspamd_lru_hash_t *hash)
  592. {
  593. return hash->maxsize;
  594. }