You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ucl_hash.c 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. /* Copyright (c) 2013, Vsevolod Stakhov
  2. * All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions are met:
  6. * * Redistributions of source code must retain the above copyright
  7. * notice, this list of conditions and the following disclaimer.
  8. * * Redistributions in binary form must reproduce the above copyright
  9. * notice, this list of conditions and the following disclaimer in the
  10. * documentation and/or other materials provided with the distribution.
  11. *
  12. * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
  13. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  14. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  15. * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
  16. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  17. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  18. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  19. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  20. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  21. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  22. */
  23. #include "ucl_internal.h"
  24. #include "ucl_hash.h"
  25. #include "khash.h"
  26. #include "utlist.h"
  27. #include "cryptobox.h"
  28. #include "libutil/str_util.h"
  29. #include "ucl.h"
  30. #include <time.h>
  31. #include <limits.h>
  32. struct ucl_hash_elt {
  33. const ucl_object_t *obj;
  34. struct ucl_hash_elt *prev, *next;
  35. };
  36. struct ucl_hash_struct {
  37. void *hash;
  38. struct ucl_hash_elt *head;
  39. bool caseless;
  40. };
  41. static uint64_t
  42. ucl_hash_seed (void)
  43. {
  44. static uint64_t seed;
  45. if (seed == 0) {
  46. #ifdef UCL_RANDOM_FUNCTION
  47. seed = UCL_RANDOM_FUNCTION;
  48. #else
  49. /* Not very random but can be useful for our purposes */
  50. seed = time (NULL);
  51. #endif
  52. }
  53. return seed;
  54. }
  55. extern const guchar lc_map[256];
  56. static inline uint32_t
  57. ucl_hash_func (const ucl_object_t *o)
  58. {
  59. return (uint32_t)rspamd_cryptobox_fast_hash (o->key, o->keylen, 0xb9a1ef83c4561c95ULL);
  60. }
  61. static inline int
  62. ucl_hash_equal (const ucl_object_t *k1, const ucl_object_t *k2)
  63. {
  64. if (k1->keylen == k2->keylen) {
  65. return memcmp (k1->key, k2->key, k1->keylen) == 0;
  66. }
  67. return 0;
  68. }
  69. KHASH_INIT (ucl_hash_node, const ucl_object_t *, struct ucl_hash_elt *, 1,
  70. ucl_hash_func, ucl_hash_equal)
  71. static inline uint32_t
  72. ucl_hash_caseless_func (const ucl_object_t *o)
  73. {
  74. unsigned len = o->keylen;
  75. unsigned leftover = o->keylen % 4;
  76. unsigned fp, i;
  77. const uint8_t* s = (const uint8_t*)o->key;
  78. union {
  79. struct {
  80. unsigned char c1, c2, c3, c4;
  81. } c;
  82. uint32_t pp;
  83. } u;
  84. uint64_t h = 0xe5ae6ab1ef9f3b54ULL;
  85. rspamd_cryptobox_fast_hash_state_t hst;
  86. fp = len - leftover;
  87. rspamd_cryptobox_fast_hash_init (&hst, h);
  88. for (i = 0; i != fp; i += 4) {
  89. u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
  90. u.c.c1 = lc_map[u.c.c1];
  91. u.c.c2 = lc_map[u.c.c2];
  92. u.c.c3 = lc_map[u.c.c3];
  93. u.c.c4 = lc_map[u.c.c4];
  94. rspamd_cryptobox_fast_hash_update (&hst, &u, sizeof (u));
  95. }
  96. u.pp = 0;
  97. switch (leftover) {
  98. case 3:
  99. u.c.c3 = lc_map[(unsigned char)s[i++]];
  100. case 2:
  101. /* fallthrough */
  102. u.c.c2 = lc_map[(unsigned char)s[i++]];
  103. case 1:
  104. /* fallthrough */
  105. u.c.c1 = lc_map[(unsigned char)s[i]];
  106. rspamd_cryptobox_fast_hash_update (&hst, &u, sizeof (u));
  107. break;
  108. }
  109. return (uint32_t)rspamd_cryptobox_fast_hash_final (&hst);
  110. }
  111. static inline bool
  112. ucl_hash_caseless_equal (const ucl_object_t *k1, const ucl_object_t *k2)
  113. {
  114. if (k1->keylen == k2->keylen) {
  115. return rspamd_lc_cmp (k1->key, k2->key, k1->keylen) == 0;
  116. }
  117. return false;
  118. }
  119. KHASH_INIT (ucl_hash_caseless_node, const ucl_object_t *, struct ucl_hash_elt *, 1,
  120. ucl_hash_caseless_func, ucl_hash_caseless_equal)
  121. ucl_hash_t*
  122. ucl_hash_create (bool ignore_case)
  123. {
  124. ucl_hash_t *new;
  125. new = UCL_ALLOC (sizeof (ucl_hash_t));
  126. if (new != NULL) {
  127. void *h;
  128. new->head = NULL;
  129. new->caseless = ignore_case;
  130. if (ignore_case) {
  131. h = (void *)kh_init (ucl_hash_caseless_node);
  132. }
  133. else {
  134. h = (void *)kh_init (ucl_hash_node);
  135. }
  136. if (h == NULL) {
  137. UCL_FREE (sizeof (ucl_hash_t), new);
  138. return NULL;
  139. }
  140. new->hash = h;
  141. }
  142. return new;
  143. }
  144. void ucl_hash_destroy (ucl_hash_t* hashlin, ucl_hash_free_func func)
  145. {
  146. if (hashlin == NULL) {
  147. return;
  148. }
  149. if (func != NULL) {
  150. /* Iterate over the hash first */
  151. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  152. hashlin->hash;
  153. khiter_t k;
  154. const ucl_object_t *cur, *tmp;
  155. for (k = kh_begin (h); k != kh_end (h); ++k) {
  156. if (kh_exist (h, k)) {
  157. cur = (kh_value (h, k))->obj;
  158. while (cur != NULL) {
  159. tmp = cur->next;
  160. func (__DECONST (ucl_object_t *, cur));
  161. cur = tmp;
  162. }
  163. }
  164. }
  165. }
  166. if (hashlin->caseless) {
  167. khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
  168. hashlin->hash;
  169. kh_destroy (ucl_hash_caseless_node, h);
  170. }
  171. else {
  172. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  173. hashlin->hash;
  174. kh_destroy (ucl_hash_node, h);
  175. }
  176. struct ucl_hash_elt *cur, *tmp;
  177. DL_FOREACH_SAFE(hashlin->head, cur, tmp) {
  178. UCL_FREE(sizeof(*cur), cur);
  179. }
  180. UCL_FREE (sizeof (*hashlin), hashlin);
  181. }
  182. bool
  183. ucl_hash_insert (ucl_hash_t* hashlin, const ucl_object_t *obj,
  184. const char *key, unsigned keylen)
  185. {
  186. khiter_t k;
  187. int ret;
  188. struct ucl_hash_elt **pelt, *elt;
  189. if (hashlin == NULL) {
  190. return false;
  191. }
  192. if (hashlin->caseless) {
  193. khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
  194. hashlin->hash;
  195. k = kh_put (ucl_hash_caseless_node, h, obj, &ret);
  196. if (ret > 0) {
  197. elt = UCL_ALLOC(sizeof(*elt));
  198. pelt = &kh_value (h, k);
  199. *pelt = elt;
  200. DL_APPEND(hashlin->head, elt);
  201. elt->obj = obj;
  202. }
  203. else if (ret < 0) {
  204. goto e0;
  205. }
  206. }
  207. else {
  208. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  209. hashlin->hash;
  210. k = kh_put (ucl_hash_node, h, obj, &ret);
  211. if (ret > 0) {
  212. elt = UCL_ALLOC(sizeof(*elt));
  213. pelt = &kh_value (h, k);
  214. *pelt = elt;
  215. DL_APPEND(hashlin->head, elt);
  216. elt->obj = obj;
  217. } else if (ret < 0) {
  218. goto e0;
  219. }
  220. }
  221. return true;
  222. e0:
  223. return false;
  224. }
  225. void ucl_hash_replace (ucl_hash_t* hashlin, const ucl_object_t *old,
  226. const ucl_object_t *new)
  227. {
  228. khiter_t k;
  229. int ret;
  230. struct ucl_hash_elt *elt, *nelt;
  231. if (hashlin == NULL) {
  232. return;
  233. }
  234. if (hashlin->caseless) {
  235. khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
  236. hashlin->hash;
  237. k = kh_put (ucl_hash_caseless_node, h, old, &ret);
  238. if (ret == 0) {
  239. elt = kh_value(h, k);
  240. kh_del (ucl_hash_caseless_node, h, k);
  241. k = kh_put (ucl_hash_caseless_node, h, new, &ret);
  242. nelt = UCL_ALLOC(sizeof(*nelt));
  243. nelt->obj = new;
  244. kh_value(h, k) = nelt;
  245. DL_REPLACE_ELEM(hashlin->head, elt, nelt);
  246. UCL_FREE(sizeof(*elt), elt);
  247. }
  248. }
  249. else {
  250. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  251. hashlin->hash;
  252. k = kh_put (ucl_hash_node, h, old, &ret);
  253. if (ret == 0) {
  254. elt = kh_value (h, k);
  255. kh_del (ucl_hash_node, h, k);
  256. k = kh_put (ucl_hash_node, h, new, &ret);
  257. nelt = UCL_ALLOC(sizeof(*nelt));
  258. nelt->obj = new;
  259. kh_value(h, k) = nelt;
  260. DL_REPLACE_ELEM(hashlin->head, elt, nelt);
  261. UCL_FREE(sizeof(*elt), elt);
  262. }
  263. }
  264. }
  265. struct ucl_hash_real_iter {
  266. const struct ucl_hash_elt *cur;
  267. };
  268. #define UHI_SETERR(ep, ern) {if (ep != NULL) *ep = (ern);}
  269. const void*
  270. ucl_hash_iterate2 (ucl_hash_t *hashlin, ucl_hash_iter_t *iter, int *ep)
  271. {
  272. struct ucl_hash_real_iter *it = (struct ucl_hash_real_iter *)(*iter);
  273. const ucl_object_t *ret = NULL;
  274. if (hashlin == NULL) {
  275. UHI_SETERR(ep, EINVAL);
  276. return NULL;
  277. }
  278. if (it == NULL) {
  279. it = UCL_ALLOC (sizeof (*it));
  280. if (it == NULL) {
  281. UHI_SETERR(ep, ENOMEM);
  282. return NULL;
  283. }
  284. it->cur = hashlin->head;
  285. }
  286. UHI_SETERR(ep, 0);
  287. if (it->cur) {
  288. ret = it->cur->obj;
  289. it->cur = it->cur->next;
  290. }
  291. else {
  292. UCL_FREE (sizeof (*it), it);
  293. *iter = NULL;
  294. return NULL;
  295. }
  296. *iter = it;
  297. return ret;
  298. }
  299. bool
  300. ucl_hash_iter_has_next (ucl_hash_t *hashlin, ucl_hash_iter_t iter)
  301. {
  302. struct ucl_hash_real_iter *it = (struct ucl_hash_real_iter *)(iter);
  303. return it->cur != NULL;
  304. }
  305. const ucl_object_t*
  306. ucl_hash_search (ucl_hash_t* hashlin, const char *key, unsigned keylen)
  307. {
  308. khiter_t k;
  309. const ucl_object_t *ret = NULL;
  310. ucl_object_t search;
  311. struct ucl_hash_elt *elt;
  312. search.key = key;
  313. search.keylen = keylen;
  314. if (hashlin == NULL) {
  315. return NULL;
  316. }
  317. if (hashlin->caseless) {
  318. khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
  319. hashlin->hash;
  320. k = kh_get (ucl_hash_caseless_node, h, &search);
  321. if (k != kh_end (h)) {
  322. elt = kh_value (h, k);
  323. ret = elt->obj;
  324. }
  325. }
  326. else {
  327. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  328. hashlin->hash;
  329. k = kh_get (ucl_hash_node, h, &search);
  330. if (k != kh_end (h)) {
  331. elt = kh_value (h, k);
  332. ret = elt->obj;
  333. }
  334. }
  335. return ret;
  336. }
  337. void
  338. ucl_hash_delete (ucl_hash_t* hashlin, const ucl_object_t *obj)
  339. {
  340. khiter_t k;
  341. struct ucl_hash_elt *elt;
  342. if (hashlin == NULL) {
  343. return;
  344. }
  345. if (hashlin->caseless) {
  346. khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
  347. hashlin->hash;
  348. k = kh_get (ucl_hash_caseless_node, h, obj);
  349. if (k != kh_end (h)) {
  350. elt = kh_value (h, k);
  351. DL_DELETE(hashlin->head, elt);
  352. kh_del (ucl_hash_caseless_node, h, k);
  353. UCL_FREE(sizeof(*elt), elt);
  354. }
  355. }
  356. else {
  357. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  358. hashlin->hash;
  359. k = kh_get (ucl_hash_node, h, obj);
  360. if (k != kh_end (h)) {
  361. elt = kh_value (h, k);
  362. DL_DELETE(hashlin->head, elt);
  363. kh_del (ucl_hash_node, h, k);
  364. UCL_FREE(sizeof(*elt), elt);
  365. }
  366. }
  367. }
  368. bool
  369. ucl_hash_reserve (ucl_hash_t *hashlin, size_t sz)
  370. {
  371. if (hashlin == NULL) {
  372. return false;
  373. }
  374. if (sz > kh_size((khash_t(ucl_hash_node) *)hashlin->hash)) {
  375. if (hashlin->caseless) {
  376. khash_t(ucl_hash_caseless_node) *h = (khash_t(
  377. ucl_hash_caseless_node) *)
  378. hashlin->hash;
  379. kh_resize (ucl_hash_caseless_node, h, sz * 2);
  380. } else {
  381. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  382. hashlin->hash;
  383. kh_resize (ucl_hash_node, h, sz * 2);
  384. }
  385. }
  386. return true;
  387. }
  388. static int
  389. ucl_hash_cmp_icase (const void *a, const void *b)
  390. {
  391. const struct ucl_hash_elt *oa = (const struct ucl_hash_elt *)a,
  392. *ob = (const struct ucl_hash_elt *)b;
  393. if (oa->obj->keylen == ob->obj->keylen) {
  394. return rspamd_lc_cmp (oa->obj->key, ob->obj->key, oa->obj->keylen);
  395. }
  396. return ((int)(oa->obj->keylen)) - ob->obj->keylen;
  397. }
  398. static int
  399. ucl_hash_cmp_case_sens (const void *a, const void *b)
  400. {
  401. const struct ucl_hash_elt *oa = (const struct ucl_hash_elt *)a,
  402. *ob = (const struct ucl_hash_elt *)b;
  403. if (oa->obj->keylen == ob->obj->keylen) {
  404. return memcmp (oa->obj->key, ob->obj->key, oa->obj->keylen);
  405. }
  406. return ((int)(oa->obj->keylen)) - ob->obj->keylen;
  407. }
  408. void
  409. ucl_hash_sort (ucl_hash_t *hashlin, enum ucl_object_keys_sort_flags fl)
  410. {
  411. if (fl & UCL_SORT_KEYS_ICASE) {
  412. DL_SORT(hashlin->head, ucl_hash_cmp_icase);
  413. }
  414. else {
  415. DL_SORT(hashlin->head, ucl_hash_cmp_case_sens);
  416. }
  417. if (fl & UCL_SORT_KEYS_RECURSIVE) {
  418. struct ucl_hash_elt *elt;
  419. DL_FOREACH(hashlin->head, elt) {
  420. if (ucl_object_type (elt->obj) == UCL_OBJECT) {
  421. ucl_hash_sort (elt->obj->value.ov, fl);
  422. }
  423. }
  424. }
  425. }