From 1b39a1b22f04395501138a8b31196704dbc21e59 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 10 Jun 2016 14:46:03 +0100 Subject: [PATCH] [Fix] Improve strcase hash used in uthash --- src/libutil/str_util.c | 3 +- src/libutil/str_util.h | 2 ++ src/libutil/uthash_strcase.h | 62 ++++++++++++++++++++++++++++-------- 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 67aa63aa8..1ce81bc9e 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -17,9 +17,10 @@ #include "util.h" #include "cryptobox.h" #include "url.h" +#include "str_util.h" #include -static const guchar lc_map[256] = { +const guchar lc_map[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index a63b160dd..695a8d022 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -308,4 +308,6 @@ gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b); /* Compare two urls for building emails hash */ gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b); +extern const guchar lc_map[256]; + #endif /* SRC_LIBUTIL_STR_UTIL_H_ */ diff --git a/src/libutil/uthash_strcase.h b/src/libutil/uthash_strcase.h index 5d1df130f..45ed84f67 100644 --- a/src/libutil/uthash_strcase.h +++ b/src/libutil/uthash_strcase.h @@ -16,30 +16,66 @@ #ifndef UTHASH_STRCASE_H_ #define UTHASH_STRCASE_H_ -#include "xxhash.h" - /* Utils for uthash tuning */ #ifndef HASH_CASELESS #define HASH_FUNCTION(key,keylen,num_bkts,hashv,bkt) do {\ - hashv = XXH32(key, keylen, 0); \ + hashv = mum(key, keylen, 0xdeadbabe); \ bkt = (hashv) & (num_bkts-1); \ } while (0) #define HASH_KEYCMP(a,b,len) memcmp(a,b,len) #else #define HASH_FUNCTION(key,keylen,num_bkts,hashv,bkt) do {\ - XXH32_state_t xxh; \ - XXH32_reset(&xxh, 0xdead); \ - unsigned char *p = (unsigned char *)key, t; \ - for (unsigned int i = 0; i < keylen; i ++) { \ - t = g_ascii_tolower(p[i]); \ - XXH32_update(&xxh, &t, 1); \ - } \ - hashv = XXH32_digest(&xxh); \ - bkt = (hashv) & (num_bkts-1); \ + unsigned len = keylen; \ + unsigned leftover = keylen % 8; \ + unsigned fp, i; \ + const uint8_t* s = (const uint8_t*)key; \ + union { \ + struct { \ + unsigned char c1, c2, c3, c4, c5, c6, c7, c8; \ + } c; \ + uint64_t pp; \ + } u; \ + uint64_t r; \ + fp = len - leftover; \ + r = 0xdeadbabe; \ + for (i = 0; i != fp; i += 8) { \ + u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3]; \ + u.c.c5 = s[i + 4], u.c.c6 = s[i + 5], u.c.c7 = s[i + 6], u.c.c8 = s[i + 7]; \ + u.c.c1 = lc_map[u.c.c1]; \ + u.c.c2 = lc_map[u.c.c2]; \ + u.c.c3 = lc_map[u.c.c3]; \ + u.c.c4 = lc_map[u.c.c4]; \ + u.c.c1 = lc_map[u.c.c5]; \ + u.c.c2 = lc_map[u.c.c6]; \ + u.c.c3 = lc_map[u.c.c7]; \ + u.c.c4 = lc_map[u.c.c8]; \ + r = mum_hash_step (r, u.pp); \ + } \ + u.pp = 0; \ + switch (leftover) { \ + case 7: \ + u.c.c7 = lc_map[(unsigned char)s[i++]]; \ + case 6: \ + u.c.c6 = lc_map[(unsigned char)s[i++]]; \ + case 5: \ + u.c.c5 = lc_map[(unsigned char)s[i++]]; \ + case 4: \ + u.c.c4 = lc_map[(unsigned char)s[i++]]; \ + case 3: \ + u.c.c3 = lc_map[(unsigned char)s[i++]]; \ + case 2: \ + u.c.c2 = lc_map[(unsigned char)s[i++]]; \ + case 1: \ + u.c.c1 = lc_map[(unsigned char)s[i]]; \ + r = mum_hash_step (r, u.pp); \ + break; \ + } \ + hashv = mum_hash_finish (r); \ + bkt = (hashv) & (num_bkts-1); \ } while (0) -#define HASH_KEYCMP(a,b,len) strncasecmp(a,b,len) +#define HASH_KEYCMP(a,b,len) rspamd_lc_cmp(a,b,len) #endif #include "uthash.h" -- 2.39.5