]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Improve strcase hash used in uthash
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 10 Jun 2016 13:46:03 +0000 (14:46 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 10 Jun 2016 13:46:03 +0000 (14:46 +0100)
src/libutil/str_util.c
src/libutil/str_util.h
src/libutil/uthash_strcase.h

index 67aa63aa80a949da8a7e84761e61adfd55a01021..1ce81bc9e6ac412791e73d003cf56fcc28fba5a0 100644 (file)
 #include "util.h"
 #include "cryptobox.h"
 #include "url.h"
+#include "str_util.h"
 #include <math.h>
 
-static const guchar lc_map[256] = {
+const guchar lc_map[256] = {
                0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
                0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
                0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
index a63b160ddd190866441acd05df33c00baadaad66..695a8d022e7ccfd80d6bb533a9ca4716d7f8e256 100644 (file)
@@ -308,4 +308,6 @@ gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b);
 /* Compare two urls for building emails hash */
 gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b);
 
+extern const guchar lc_map[256];
+
 #endif /* SRC_LIBUTIL_STR_UTIL_H_ */
index 5d1df130fdb4a0b4f3b932086424baf8634f2b68..45ed84f67d3417b69b9ff7db06dc74e29ee5be35 100644 (file)
 #ifndef UTHASH_STRCASE_H_
 #define UTHASH_STRCASE_H_
 
-#include "xxhash.h"
-
 
 /* Utils for uthash tuning */
 #ifndef HASH_CASELESS
 #define HASH_FUNCTION(key,keylen,num_bkts,hashv,bkt) do {\
-       hashv = XXH32(key, keylen, 0); \
+       hashv = mum(key, keylen, 0xdeadbabe); \
        bkt = (hashv) & (num_bkts-1); \
 } while (0)
 
 #define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
 #else
 #define HASH_FUNCTION(key,keylen,num_bkts,hashv,bkt) do {\
-       XXH32_state_t xxh; \
-       XXH32_reset(&xxh, 0xdead);      \
-       unsigned char *p = (unsigned char *)key, t;     \
-       for (unsigned int i = 0; i < keylen; i ++) {    \
-               t = g_ascii_tolower(p[i]);      \
-               XXH32_update(&xxh, &t, 1);      \
-       }       \
-       hashv = XXH32_digest(&xxh);     \
-       bkt = (hashv) & (num_bkts-1);   \
+       unsigned len = keylen; \
+       unsigned leftover = keylen % 8; \
+       unsigned fp, i; \
+       const uint8_t* s = (const uint8_t*)key; \
+       union { \
+               struct { \
+                       unsigned char c1, c2, c3, c4, c5, c6, c7, c8; \
+               } c; \
+               uint64_t pp; \
+       } u; \
+       uint64_t r; \
+       fp = len - leftover; \
+       r = 0xdeadbabe; \
+       for (i = 0; i != fp; i += 8) { \
+               u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3]; \
+               u.c.c5 = s[i + 4], u.c.c6 = s[i + 5], u.c.c7 = s[i + 6], u.c.c8 = s[i + 7]; \
+               u.c.c1 = lc_map[u.c.c1]; \
+               u.c.c2 = lc_map[u.c.c2]; \
+               u.c.c3 = lc_map[u.c.c3]; \
+               u.c.c4 = lc_map[u.c.c4]; \
+               u.c.c1 = lc_map[u.c.c5]; \
+               u.c.c2 = lc_map[u.c.c6]; \
+               u.c.c3 = lc_map[u.c.c7]; \
+               u.c.c4 = lc_map[u.c.c8]; \
+               r = mum_hash_step (r, u.pp); \
+       } \
+       u.pp = 0; \
+       switch (leftover) { \
+       case 7: \
+               u.c.c7 = lc_map[(unsigned char)s[i++]]; \
+       case 6: \
+               u.c.c6 = lc_map[(unsigned char)s[i++]]; \
+       case 5: \
+               u.c.c5 = lc_map[(unsigned char)s[i++]]; \
+       case 4: \
+               u.c.c4 = lc_map[(unsigned char)s[i++]]; \
+       case 3: \
+               u.c.c3 = lc_map[(unsigned char)s[i++]]; \
+       case 2: \
+               u.c.c2 = lc_map[(unsigned char)s[i++]]; \
+       case 1: \
+               u.c.c1 = lc_map[(unsigned char)s[i]]; \
+               r = mum_hash_step (r, u.pp); \
+               break; \
+       } \
+       hashv = mum_hash_finish (r); \
+       bkt = (hashv) & (num_bkts-1); \
 } while (0)
-#define HASH_KEYCMP(a,b,len) strncasecmp(a,b,len)
+#define HASH_KEYCMP(a,b,len) rspamd_lc_cmp(a,b,len)
 #endif
 
 #include "uthash.h"