You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ucl_hash.c 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. /* Copyright (c) 2013, Vsevolod Stakhov
  2. * All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions are met:
  6. * * Redistributions of source code must retain the above copyright
  7. * notice, this list of conditions and the following disclaimer.
  8. * * Redistributions in binary form must reproduce the above copyright
  9. * notice, this list of conditions and the following disclaimer in the
  10. * documentation and/or other materials provided with the distribution.
  11. *
  12. * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
  13. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  14. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  15. * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
  16. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  17. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  18. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  19. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  20. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  21. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  22. */
  23. #include "ucl_internal.h"
  24. #include "ucl_hash.h"
  25. #include "khash.h"
  26. #include "utlist.h"
  27. #include "cryptobox.h"
  28. #include "libutil/str_util.h"
  29. #include "ucl.h"
  30. #include <time.h>
  31. #include <limits.h>
  32. struct ucl_hash_elt {
  33. const ucl_object_t *obj;
  34. struct ucl_hash_elt *prev, *next;
  35. };
  36. struct ucl_hash_struct {
  37. void *hash;
  38. struct ucl_hash_elt *head;
  39. bool caseless;
  40. };
  41. static uint64_t
  42. ucl_hash_seed(void)
  43. {
  44. static uint64_t seed;
  45. if (seed == 0) {
  46. #ifdef UCL_RANDOM_FUNCTION
  47. seed = UCL_RANDOM_FUNCTION;
  48. #else
  49. /* Not very random but can be useful for our purposes */
  50. seed = time(NULL);
  51. #endif
  52. }
  53. return seed;
  54. }
  55. extern const unsigned char lc_map[256];
  56. static inline uint32_t
  57. ucl_hash_func(const ucl_object_t *o)
  58. {
  59. return (uint32_t) rspamd_cryptobox_fast_hash(o->key, o->keylen, 0xb9a1ef83c4561c95ULL);
  60. }
  61. static inline int
  62. ucl_hash_equal(const ucl_object_t *k1, const ucl_object_t *k2)
  63. {
  64. if (k1->keylen == k2->keylen) {
  65. return memcmp(k1->key, k2->key, k1->keylen) == 0;
  66. }
  67. return 0;
  68. }
  69. KHASH_INIT(ucl_hash_node, const ucl_object_t *, struct ucl_hash_elt *, 1,
  70. ucl_hash_func, ucl_hash_equal)
  71. static inline uint32_t
  72. ucl_hash_caseless_func(const ucl_object_t *o)
  73. {
  74. unsigned len = o->keylen;
  75. unsigned leftover = o->keylen % 4;
  76. unsigned fp, i;
  77. const uint8_t *s = (const uint8_t *) o->key;
  78. union {
  79. struct {
  80. unsigned char c1, c2, c3, c4;
  81. } c;
  82. uint32_t pp;
  83. } u;
  84. uint64_t h = 0xe5ae6ab1ef9f3b54ULL;
  85. rspamd_cryptobox_fast_hash_state_t hst;
  86. fp = len - leftover;
  87. rspamd_cryptobox_fast_hash_init(&hst, h);
  88. for (i = 0; i != fp; i += 4) {
  89. u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
  90. u.c.c1 = lc_map[u.c.c1];
  91. u.c.c2 = lc_map[u.c.c2];
  92. u.c.c3 = lc_map[u.c.c3];
  93. u.c.c4 = lc_map[u.c.c4];
  94. rspamd_cryptobox_fast_hash_update(&hst, &u, sizeof(u));
  95. }
  96. u.pp = 0;
  97. switch (leftover) {
  98. case 3:
  99. u.c.c3 = lc_map[(unsigned char) s[i++]];
  100. case 2:
  101. /* fallthrough */
  102. u.c.c2 = lc_map[(unsigned char) s[i++]];
  103. case 1:
  104. /* fallthrough */
  105. u.c.c1 = lc_map[(unsigned char) s[i]];
  106. rspamd_cryptobox_fast_hash_update(&hst, &u, sizeof(u));
  107. break;
  108. }
  109. return (uint32_t) rspamd_cryptobox_fast_hash_final(&hst);
  110. }
  111. static inline bool
  112. ucl_hash_caseless_equal(const ucl_object_t *k1, const ucl_object_t *k2)
  113. {
  114. if (k1->keylen == k2->keylen) {
  115. return rspamd_lc_cmp(k1->key, k2->key, k1->keylen) == 0;
  116. }
  117. return false;
  118. }
  119. KHASH_INIT(ucl_hash_caseless_node, const ucl_object_t *, struct ucl_hash_elt *, 1,
  120. ucl_hash_caseless_func, ucl_hash_caseless_equal)
  121. ucl_hash_t *
  122. ucl_hash_create(bool ignore_case)
  123. {
  124. ucl_hash_t *new;
  125. new = UCL_ALLOC(sizeof(ucl_hash_t));
  126. if (new != NULL) {
  127. void *h;
  128. new->head = NULL;
  129. new->caseless = ignore_case;
  130. if (ignore_case) {
  131. h = (void *) kh_init(ucl_hash_caseless_node);
  132. }
  133. else {
  134. h = (void *) kh_init(ucl_hash_node);
  135. }
  136. if (h == NULL) {
  137. UCL_FREE(sizeof(ucl_hash_t), new);
  138. return NULL;
  139. }
  140. new->hash = h;
  141. }
  142. return new;
  143. }
  144. void ucl_hash_destroy(ucl_hash_t *hashlin, ucl_hash_free_func func)
  145. {
  146. if (hashlin == NULL) {
  147. return;
  148. }
  149. if (func != NULL) {
  150. /* Iterate over the hash first */
  151. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  152. hashlin->hash;
  153. khiter_t k;
  154. const ucl_object_t *cur, *tmp;
  155. for (k = kh_begin(h); k != kh_end(h); ++k) {
  156. if (kh_exist(h, k)) {
  157. cur = (kh_value(h, k))->obj;
  158. while (cur != NULL) {
  159. tmp = cur->next;
  160. func(__DECONST(ucl_object_t *, cur));
  161. cur = tmp;
  162. }
  163. }
  164. }
  165. }
  166. if (hashlin->caseless) {
  167. khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
  168. hashlin->hash;
  169. kh_destroy(ucl_hash_caseless_node, h);
  170. }
  171. else {
  172. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  173. hashlin->hash;
  174. kh_destroy(ucl_hash_node, h);
  175. }
  176. struct ucl_hash_elt *cur, *tmp;
  177. DL_FOREACH_SAFE(hashlin->head, cur, tmp)
  178. {
  179. UCL_FREE(sizeof(*cur), cur);
  180. }
  181. UCL_FREE(sizeof(*hashlin), hashlin);
  182. }
  183. bool ucl_hash_insert(ucl_hash_t *hashlin, const ucl_object_t *obj,
  184. const char *key, unsigned keylen)
  185. {
  186. khiter_t k;
  187. int ret;
  188. struct ucl_hash_elt **pelt, *elt;
  189. if (hashlin == NULL) {
  190. return false;
  191. }
  192. if (hashlin->caseless) {
  193. khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
  194. hashlin->hash;
  195. k = kh_put(ucl_hash_caseless_node, h, obj, &ret);
  196. if (ret > 0) {
  197. elt = UCL_ALLOC(sizeof(*elt));
  198. pelt = &kh_value(h, k);
  199. *pelt = elt;
  200. DL_APPEND(hashlin->head, elt);
  201. elt->obj = obj;
  202. }
  203. else if (ret < 0) {
  204. goto e0;
  205. }
  206. }
  207. else {
  208. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  209. hashlin->hash;
  210. k = kh_put(ucl_hash_node, h, obj, &ret);
  211. if (ret > 0) {
  212. elt = UCL_ALLOC(sizeof(*elt));
  213. pelt = &kh_value(h, k);
  214. *pelt = elt;
  215. DL_APPEND(hashlin->head, elt);
  216. elt->obj = obj;
  217. }
  218. else if (ret < 0) {
  219. goto e0;
  220. }
  221. }
  222. return true;
  223. e0:
  224. return false;
  225. }
  226. void ucl_hash_replace(ucl_hash_t *hashlin, const ucl_object_t *old,
  227. const ucl_object_t *new)
  228. {
  229. khiter_t k;
  230. int ret;
  231. struct ucl_hash_elt *elt, *nelt;
  232. if (hashlin == NULL) {
  233. return;
  234. }
  235. if (hashlin->caseless) {
  236. khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
  237. hashlin->hash;
  238. k = kh_put(ucl_hash_caseless_node, h, old, &ret);
  239. if (ret == 0) {
  240. elt = kh_value(h, k);
  241. kh_del(ucl_hash_caseless_node, h, k);
  242. k = kh_put(ucl_hash_caseless_node, h, new, &ret);
  243. nelt = UCL_ALLOC(sizeof(*nelt));
  244. nelt->obj = new;
  245. kh_value(h, k) = nelt;
  246. DL_REPLACE_ELEM(hashlin->head, elt, nelt);
  247. UCL_FREE(sizeof(*elt), elt);
  248. }
  249. }
  250. else {
  251. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  252. hashlin->hash;
  253. k = kh_put(ucl_hash_node, h, old, &ret);
  254. if (ret == 0) {
  255. elt = kh_value(h, k);
  256. kh_del(ucl_hash_node, h, k);
  257. k = kh_put(ucl_hash_node, h, new, &ret);
  258. nelt = UCL_ALLOC(sizeof(*nelt));
  259. nelt->obj = new;
  260. kh_value(h, k) = nelt;
  261. DL_REPLACE_ELEM(hashlin->head, elt, nelt);
  262. UCL_FREE(sizeof(*elt), elt);
  263. }
  264. }
  265. }
  266. struct ucl_hash_real_iter {
  267. const struct ucl_hash_elt *cur;
  268. };
  269. #define UHI_SETERR(ep, ern) \
  270. { \
  271. if (ep != NULL) *ep = (ern); \
  272. }
  273. const void *
  274. ucl_hash_iterate2(ucl_hash_t *hashlin, ucl_hash_iter_t *iter, int *ep)
  275. {
  276. struct ucl_hash_real_iter *it = (struct ucl_hash_real_iter *) (*iter);
  277. const ucl_object_t *ret = NULL;
  278. if (hashlin == NULL) {
  279. UHI_SETERR(ep, EINVAL);
  280. return NULL;
  281. }
  282. if (it == NULL) {
  283. it = UCL_ALLOC(sizeof(*it));
  284. if (it == NULL) {
  285. UHI_SETERR(ep, ENOMEM);
  286. return NULL;
  287. }
  288. it->cur = hashlin->head;
  289. }
  290. UHI_SETERR(ep, 0);
  291. if (it->cur) {
  292. ret = it->cur->obj;
  293. it->cur = it->cur->next;
  294. }
  295. else {
  296. UCL_FREE(sizeof(*it), it);
  297. *iter = NULL;
  298. return NULL;
  299. }
  300. *iter = it;
  301. return ret;
  302. }
  303. bool ucl_hash_iter_has_next(ucl_hash_t *hashlin, ucl_hash_iter_t iter)
  304. {
  305. struct ucl_hash_real_iter *it = (struct ucl_hash_real_iter *) (iter);
  306. return it->cur != NULL;
  307. }
  308. const ucl_object_t *
  309. ucl_hash_search(ucl_hash_t *hashlin, const char *key, unsigned keylen)
  310. {
  311. khiter_t k;
  312. const ucl_object_t *ret = NULL;
  313. ucl_object_t search;
  314. struct ucl_hash_elt *elt;
  315. search.key = key;
  316. search.keylen = keylen;
  317. if (hashlin == NULL) {
  318. return NULL;
  319. }
  320. if (hashlin->caseless) {
  321. khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
  322. hashlin->hash;
  323. k = kh_get(ucl_hash_caseless_node, h, &search);
  324. if (k != kh_end(h)) {
  325. elt = kh_value(h, k);
  326. ret = elt->obj;
  327. }
  328. }
  329. else {
  330. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  331. hashlin->hash;
  332. k = kh_get(ucl_hash_node, h, &search);
  333. if (k != kh_end(h)) {
  334. elt = kh_value(h, k);
  335. ret = elt->obj;
  336. }
  337. }
  338. return ret;
  339. }
  340. void ucl_hash_delete(ucl_hash_t *hashlin, const ucl_object_t *obj)
  341. {
  342. khiter_t k;
  343. struct ucl_hash_elt *elt;
  344. if (hashlin == NULL) {
  345. return;
  346. }
  347. if (hashlin->caseless) {
  348. khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
  349. hashlin->hash;
  350. k = kh_get(ucl_hash_caseless_node, h, obj);
  351. if (k != kh_end(h)) {
  352. elt = kh_value(h, k);
  353. DL_DELETE(hashlin->head, elt);
  354. kh_del(ucl_hash_caseless_node, h, k);
  355. UCL_FREE(sizeof(*elt), elt);
  356. }
  357. }
  358. else {
  359. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  360. hashlin->hash;
  361. k = kh_get(ucl_hash_node, h, obj);
  362. if (k != kh_end(h)) {
  363. elt = kh_value(h, k);
  364. DL_DELETE(hashlin->head, elt);
  365. kh_del(ucl_hash_node, h, k);
  366. UCL_FREE(sizeof(*elt), elt);
  367. }
  368. }
  369. }
  370. bool ucl_hash_reserve(ucl_hash_t *hashlin, size_t sz)
  371. {
  372. if (hashlin == NULL) {
  373. return false;
  374. }
  375. if (sz > kh_size((khash_t(ucl_hash_node) *) hashlin->hash)) {
  376. if (hashlin->caseless) {
  377. khash_t(ucl_hash_caseless_node) *h = (khash_t(
  378. ucl_hash_caseless_node) *)
  379. hashlin->hash;
  380. kh_resize(ucl_hash_caseless_node, h, sz * 2);
  381. }
  382. else {
  383. khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
  384. hashlin->hash;
  385. kh_resize(ucl_hash_node, h, sz * 2);
  386. }
  387. }
  388. return true;
  389. }
  390. static int
  391. ucl_hash_cmp_icase(const void *a, const void *b)
  392. {
  393. const struct ucl_hash_elt *oa = (const struct ucl_hash_elt *) a,
  394. *ob = (const struct ucl_hash_elt *) b;
  395. if (oa->obj->keylen == ob->obj->keylen) {
  396. return rspamd_lc_cmp(oa->obj->key, ob->obj->key, oa->obj->keylen);
  397. }
  398. return ((int) (oa->obj->keylen)) - ob->obj->keylen;
  399. }
  400. static int
  401. ucl_hash_cmp_case_sens(const void *a, const void *b)
  402. {
  403. const struct ucl_hash_elt *oa = (const struct ucl_hash_elt *) a,
  404. *ob = (const struct ucl_hash_elt *) b;
  405. if (oa->obj->keylen == ob->obj->keylen) {
  406. return memcmp(oa->obj->key, ob->obj->key, oa->obj->keylen);
  407. }
  408. return ((int) (oa->obj->keylen)) - ob->obj->keylen;
  409. }
  410. void ucl_hash_sort(ucl_hash_t *hashlin, enum ucl_object_keys_sort_flags fl)
  411. {
  412. if (fl & UCL_SORT_KEYS_ICASE) {
  413. DL_SORT(hashlin->head, ucl_hash_cmp_icase);
  414. }
  415. else {
  416. DL_SORT(hashlin->head, ucl_hash_cmp_case_sens);
  417. }
  418. if (fl & UCL_SORT_KEYS_RECURSIVE) {
  419. struct ucl_hash_elt *elt;
  420. DL_FOREACH(hashlin->head, elt)
  421. {
  422. if (ucl_object_type(elt->obj) == UCL_OBJECT) {
  423. ucl_hash_sort(elt->obj->value.ov, fl);
  424. }
  425. }
  426. }
  427. }