From 63eb2e602070feb5536c08009e494cbab20ad921 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 11 May 2016 14:59:29 +0100 Subject: [Feature] Add and use mumhash for non-crypto hashing --- contrib/mumhash/mum.h | 369 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 369 insertions(+) create mode 100644 contrib/mumhash/mum.h (limited to 'contrib') diff --git a/contrib/mumhash/mum.h b/contrib/mumhash/mum.h new file mode 100644 index 000000000..9391de0a6 --- /dev/null +++ b/contrib/mumhash/mum.h @@ -0,0 +1,369 @@ +/* Copyright (c) 2016 Vladimir Makarov + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* This file implements MUM (MUltiply and Mix) hashing. We randomize + input data by 64x64-bit multiplication and mixing hi- and low-parts + of the multiplication result by using an addition and then mix it + into the current state. We use prime numbers randomly generated + with the equal probability of their bit values for the + multiplication. When all primes are used once, the state is + randomized and the same prime numbers are used again for data + randomization. + + The MUM hashing passes all SMHasher tests. Pseudo Random Number + Generator based on MUM also passes NIST Statistical Test Suite for + Random and Pseudorandom Number Generators for Cryptographic + Applications (version 2.2.1) with 1000 bitstreams each containing + 1M bits. MUM hashing is also faster Spooky64 and City64 on small + strings (at least upto 512-bit) on Haswell and Power7. The MUM bulk + speed (speed on very long data) is bigger than Spooky and City on + Power7. On Haswell the bulk speed is bigger than Spooky one and + close to City speed. */ + +#ifndef __MUM_HASH__ +#define __MUM_HASH__ + +#include +#include +#include +#include +#ifdef _MSC_VER +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#else +#include +#endif + +#ifdef __GNUC__ +#define _MUM_ATTRIBUTE_UNUSED __attribute__((unused)) +#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts))) +#define _MUM_TARGET(opts) __attribute__((__target__ (opts))) +#else +#define _MUM_ATTRIBUTE_UNUSED +#define _MUM_OPTIMIZE(opts) +#define _MUM_TARGET(opts) +#endif + +/* Macro saying to use 128-bit integers implemented by GCC for some + targets. */ +#ifndef _MUM_USE_INT128 +/* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64. + HOST_WIDE_INT is long if HOST_BITS_PER_LONG > HOST_BITS_PER_INT, + otherwise int. */ +#if defined(__GNUC__) && UINT_MAX != ULONG_MAX +#define _MUM_USE_INT128 1 +#else +#define _MUM_USE_INT128 0 +#endif +#endif + + +/* Here are different primes randomly generated with the equal + probability of their bit values. They are used to randomize input + values. */ +static uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL; +static uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL; +static uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL; +static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL; +static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL; +static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL; +static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL; + +static uint64_t _mum_primes [] = { + 0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f, + 0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81, + 0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f, + 0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9, +}; + +/* Multiply 64-bit V and P and return sum of high and low parts of the + result. */ +static inline uint64_t +_mum (uint64_t v, uint64_t p) { + uint64_t hi, lo; +#if _MUM_USE_INT128 +#if defined(__aarch64__) + /* AARCH64 needs 2 insns to calculate 128-bit result of the + multiplication. If we use a generic code we actually call a + function doing 128x128->128 bit multiplication. The function is + very slow. */ + lo = v * p, hi; + asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p)); +#else + __uint128_t r = (__uint128_t) v * (__uint128_t) p; + hi = (uint64_t) (r >> 64); + lo = (uint64_t) r; +#endif +#else + /* Implementation of 64x64->128-bit multiplication by four 32x32->64 + bit multiplication. */ + uint64_t hv = v >> 32, hp = p >> 32; + uint64_t lv = (uint32_t) v, lp = (uint32_t) p; + uint64_t rh = hv * hp; + uint64_t rm_0 = hv * lp; + uint64_t rm_1 = hp * lv; + uint64_t rl = lv * lp; + uint64_t t, carry = 0; + + /* We could ignore a carry bit here if we did not care about the + same hash for 32-bit and 64-bit targets. */ + t = rl + (rm_0 << 32); +#ifdef MUM_TARGET_INDEPENDENT_HASH + carry = t < rl; +#endif + lo = t + (rm_1 << 32); +#ifdef MUM_TARGET_INDEPENDENT_HASH + carry += lo < t; +#endif + hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry; +#endif + /* We could use XOR here too but, for some reasons, on Haswell and + Power7 using an addition improves hashing performance by 10% for + small strings. */ + return hi + lo; +} + +#if defined(_MSC_VER) +#define _mum_bswap_64(x) _byteswap_uint64_t (x) +#elif defined(__APPLE__) +#include +#define _mum_bswap_64(x) OSSwapInt64 (x) +#elif defined(__GNUC__) +#define _mum_bswap64(x) __builtin_bswap64 (x) +#else +#include +#define _mum_bswap64(x) bswap64 (x) +#endif + +static inline uint64_t +_mum_le (uint64_t v) { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH) + return v; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return _mum_bswap64 (v); +#else +#error "Unknown endianess" +#endif +} +/* Macro defining how many times the most nested loop in + _mum_hash_aligned will be unrolled by the compiler (although it can + make an own decision:). Use only a constant here to help a + compiler to unroll a major loop. + + The macro value affects the result hash for strings > 128 bit. The + unroll factor greatly affects the hashing speed. We prefer the + speed. */ +#ifndef _MUM_UNROLL_FACTOR_POWER +#if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH) +#define _MUM_UNROLL_FACTOR_POWER 3 +#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH) +#define _MUM_UNROLL_FACTOR_POWER 4 +#else +#define _MUM_UNROLL_FACTOR_POWER 2 +#endif +#endif + +#if _MUM_UNROLL_FACTOR_POWER < 1 +#error "too small unroll factor" +#elif _MUM_UNROLL_FACTOR_POWER > 4 +#error "We have not enough primes for such unroll factor" +#endif + +#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER) + +static inline uint64_t _MUM_OPTIMIZE("unroll-loops") +_mum_hash_aligned (uint64_t start, const void *key, size_t len) { + uint64_t result = start; + const unsigned char *str = (const unsigned char *) key; + union {uint64_t i; unsigned char s[sizeof (uint64_t)];} p; + int i; + size_t n; + + result = _mum (result, _mum_block_start_prime); + while (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) { + /* This loop could be vectorized when we have vector insns for + 64x64->128-bit multiplication. AVX2 currently only have a + vector insn for 4 32x32->64-bit multiplication. */ + for (i = 0; i < _MUM_UNROLL_FACTOR; i++) + result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]); + len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t); + str += _MUM_UNROLL_FACTOR * sizeof (uint64_t); + /* We will use the same prime numbers on the next iterations -- + randomize the state. */ + result = _mum (result, _mum_unroll_prime); + } + n = len / sizeof (uint64_t); + for (i = 0; i < n; i++) + result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]); + len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t); + if (len) { + p.i = 0; memcpy (p.s, str, len); + result ^= _mum (_mum_le (((uint64_t *) p.s)[0]), _mum_tail_prime); + } + return result; +} + +/* Final randomization of H. */ +static inline uint64_t +_mum_final (uint64_t h) { + h ^= _mum (h, _mum_finish_prime1); + h ^= _mum (h, _mum_finish_prime2); + return h; +} + +#if defined(__x86_64__) && defined(__GNUC__) + +/* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where + it is possible. Although on modern Intel processors MULQ takes + 3-cycles vs. 4 for MULX, MULX permits more freedom in insn + scheduling as it uses less fixed registers. */ +static inline uint64_t _MUM_TARGET("arch=haswell") +_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) { + return _mum_final (_mum_hash_aligned (seed + len, key, len)); +} +#endif + +#ifndef _MUM_UNALIGNED_ACCESS +#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \ + || defined(__s390__) || defined(__m32c__) || defined(cris) \ + || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \ + || defined(__aarch64__) +#define _MUM_UNALIGNED_ACCESS 1 +#else +#define _MUM_UNALIGNED_ACCESS 0 +#endif +#endif + +/* When we need an aligned access to data being hashed we move part of + the unaligned data to an aligned block of given size and then + process it, repeating processing the data by the block. */ +#ifndef _MUM_BLOCK_LEN +#define _MUM_BLOCK_LEN 1024 +#endif + +#if _MUM_BLOCK_LEN < 8 +#error "too small block length" +#endif + +static inline uint64_t +#if defined(__x86_64__) +_MUM_TARGET("inline-all-stringops") +#endif +_mum_hash_default (const void *key, size_t len, uint64_t seed) { + uint64_t result; + const unsigned char *str = (const unsigned char *) key; + size_t block_len; + uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)]; + + result = seed + len; + if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0) + result = _mum_hash_aligned (result, key, len); + else { + while (len != 0) { + block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN; + memcpy (buf, str, block_len); + result = _mum_hash_aligned (result, buf, block_len); + len -= block_len; + str += block_len; + } + } + return _mum_final (result); +} + +static inline uint64_t +_mum_next_factor (void) { + uint64_t start = 0; + int i; + + for (i = 0; i < 8; i++) + start = (start << 8) | rand() % 256; + return start; +} + +/* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++ */ + +/* Set random multiplicators depending on SEED. */ +static inline void +mum_hash_randomize (uint64_t seed) { + int i; + + srand (seed); + _mum_hash_step_prime = _mum_next_factor (); + _mum_key_step_prime = _mum_next_factor (); + _mum_finish_prime1 = _mum_next_factor (); + _mum_finish_prime2 = _mum_next_factor (); + _mum_block_start_prime = _mum_next_factor (); + _mum_unroll_prime = _mum_next_factor (); + _mum_tail_prime = _mum_next_factor (); + for (i = 0; i < sizeof (_mum_primes) / sizeof (uint64_t); i++) + _mum_primes[i] = _mum_next_factor (); +} + +/* Start hashing data with SEED. Return the state. */ +static inline uint64_t +mum_hash_init (uint64_t seed) { + return seed; +} + +/* Process data KEY with the state H and return the updated state. */ +static inline uint64_t +mum_hash_step (uint64_t h, uint64_t key) +{ + return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime); +} + +/* Return the result of hashing using the current state H. */ +static inline uint64_t +mum_hash_finish (uint64_t h) { + return _mum_final (h); +} + +/* Fast hashing of KEY with SEED. The hash is always the same for the + same key on any target. */ +static inline size_t +mum_hash64 (uint64_t key, uint64_t seed) { + return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key)); +} + +/* Hash data KEY of length LEN and SEED. The hash depends on the + target endianess and the unroll factor. */ +static inline uint64_t +mum_hash (const void *key, size_t len, uint64_t seed) { +#if defined(__x86_64__) && defined(__GNUC__) + static int avx2_support = 0; + + if (avx2_support > 0) + return _mum_hash_avx2 (key, len, seed); +#if 0 + else if (! avx2_support) { + __builtin_cpu_init (); + avx2_support = __builtin_cpu_supports ("avx2") ? 1 : -1; + if (avx2_support > 0) + return _mum_hash_avx2 (key, len, seed); + } +#endif +#endif + return _mum_hash_default (key, len, seed); +} + +#endif -- cgit v1.2.3 From bcb5eaadd0b810b399fbdb133253bbc9ec7f060e Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 11 May 2016 15:14:49 +0100 Subject: [Fix] Fix compilation issue --- contrib/mumhash/mum.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'contrib') diff --git a/contrib/mumhash/mum.h b/contrib/mumhash/mum.h index 9391de0a6..a9a661661 100644 --- a/contrib/mumhash/mum.h +++ b/contrib/mumhash/mum.h @@ -232,7 +232,7 @@ _mum_final (uint64_t h) { } #if defined(__x86_64__) && defined(__GNUC__) - +#if 0 /* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where it is possible. Although on modern Intel processors MULQ takes 3-cycles vs. 4 for MULX, MULX permits more freedom in insn @@ -242,6 +242,7 @@ _mum_hash_avx2 (const void * key, size_t len, uint64_t seed) { return _mum_final (_mum_hash_aligned (seed + len, key, len)); } #endif +#endif #ifndef _MUM_UNALIGNED_ACCESS #if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \ @@ -350,11 +351,12 @@ mum_hash64 (uint64_t key, uint64_t seed) { static inline uint64_t mum_hash (const void *key, size_t len, uint64_t seed) { #if defined(__x86_64__) && defined(__GNUC__) - static int avx2_support = 0; +#if 0 + static int avx2_support = 0; if (avx2_support > 0) return _mum_hash_avx2 (key, len, seed); -#if 0 + else if (! avx2_support) { __builtin_cpu_init (); avx2_support = __builtin_cpu_supports ("avx2") ? 1 : -1; -- cgit v1.2.3 From 931615a2e0c1069c161bf2c9516732f576f20ee3 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 12 May 2016 09:48:29 +0100 Subject: [Feature] Further micro-optimizations for hashing and shingles --- config.h.in | 2 ++ contrib/mumhash/mum.h | 5 +++-- src/libcryptobox/cryptobox.c | 13 +++++------ src/libcryptobox/cryptobox.h | 3 ++- src/libutil/shingles.c | 53 ++++++++++++++++++++++---------------------- src/libutil/shingles.h | 1 + test/rspamd_shingles_test.c | 6 ++--- 7 files changed, 44 insertions(+), 39 deletions(-) (limited to 'contrib') diff --git a/config.h.in b/config.h.in index d858f65c6..c837500f6 100644 --- a/config.h.in +++ b/config.h.in @@ -323,8 +323,10 @@ typedef off_t goffset; # define RSPAMD_ALIGNED(x) __declspec(align(x)) #elif defined(__GNUC__) # define RSPAMD_ALIGNED(x) __attribute__((aligned(x))) +# define RSPAMD_OPTIMIZE(x) __attribute__((__optimize__ (x))) #else # define RSPAMD_ALIGNED(x) +# define RSPAMD_OPTIMIZE(x) #endif #endif diff --git a/contrib/mumhash/mum.h b/contrib/mumhash/mum.h index a9a661661..1daebf3de 100644 --- a/contrib/mumhash/mum.h +++ b/contrib/mumhash/mum.h @@ -43,6 +43,7 @@ #ifndef __MUM_HASH__ #define __MUM_HASH__ +#include "config.h" #include #include #include @@ -56,8 +57,8 @@ typedef unsigned __int64 uint64_t; #ifdef __GNUC__ #define _MUM_ATTRIBUTE_UNUSED __attribute__((unused)) -#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts))) -#define _MUM_TARGET(opts) __attribute__((__target__ (opts))) +#define _MUM_OPTIMIZE(opts) RSPAMD_OPTIMIZE(opts) +#define _MUM_TARGET(opts) #else #define _MUM_ATTRIBUTE_UNUSED #define _MUM_OPTIMIZE(opts) diff --git a/src/libcryptobox/cryptobox.c b/src/libcryptobox/cryptobox.c index d8fc737d7..162fcd662 100644 --- a/src/libcryptobox/cryptobox.c +++ b/src/libcryptobox/cryptobox.c @@ -1459,20 +1459,17 @@ guint64 rspamd_cryptobox_fast_hash (const void *data, gsize len, guint64 seed) { - if (len % 2 == 0) { + if (len > 8 && len % 8 == 0) { return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_MUMHASH, data, len, seed); } else { #if defined(__LP64__) || defined(_LP64) - if (len > 8) { - return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, - data, len, seed); - } + return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + data, len, seed); #endif } - return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH32, data, len, seed); } @@ -1490,7 +1487,9 @@ rspamd_cryptobox_fast_hash_specific ( case RSPAMD_CRYPTOBOX_XXHASH64: return XXH64 (data, len, seed); case RSPAMD_CRYPTOBOX_MUMHASH: - default: return mum_hash (data, len, seed); + case RSPAMD_CRYPTOBOX_HASHFAST: + default: + return rspamd_cryptobox_fast_hash (data, len, seed); } } diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h index 18e66df60..c44044f48 100644 --- a/src/libcryptobox/cryptobox.h +++ b/src/libcryptobox/cryptobox.h @@ -357,7 +357,8 @@ guint64 rspamd_cryptobox_fast_hash (const void *data, enum rspamd_cryptobox_fast_hash_type { RSPAMD_CRYPTOBOX_XXHASH64 = 0, RSPAMD_CRYPTOBOX_XXHASH32, - RSPAMD_CRYPTOBOX_MUMHASH + RSPAMD_CRYPTOBOX_MUMHASH, + RSPAMD_CRYPTOBOX_HASHFAST }; /** * Platform independent version diff --git a/src/libutil/shingles.c b/src/libutil/shingles.c index 3e238fa5c..8d1b147db 100644 --- a/src/libutil/shingles.c +++ b/src/libutil/shingles.c @@ -19,7 +19,7 @@ #define SHINGLES_WINDOW 3 -struct rspamd_shingle* +struct rspamd_shingle* RSPAMD_OPTIMIZE("unroll-loops") rspamd_shingles_generate (GArray *input, const guchar key[16], rspamd_mempool_t *pool, @@ -28,7 +28,7 @@ rspamd_shingles_generate (GArray *input, enum rspamd_shingle_alg alg) { struct rspamd_shingle *res; - GArray *hashes[RSPAMD_SHINGLE_SIZE]; + guint64 **hashes; rspamd_sipkey_t keys[RSPAMD_SHINGLE_SIZE]; guchar shabuf[rspamd_cryptobox_HASHBYTES], *out_key; const guchar *cur_key; @@ -36,7 +36,8 @@ rspamd_shingles_generate (GArray *input, rspamd_ftok_t *word; rspamd_cryptobox_hash_state_t bs; guint64 val; - gint i, j, k, beg = 0; + gint i, j, k; + gsize hlen, beg = 0; enum rspamd_cryptobox_fast_hash_type ht; if (pool != NULL) { @@ -52,9 +53,11 @@ rspamd_shingles_generate (GArray *input, out_key = (guchar *)&keys[0]; /* Init hashes pipes and keys */ + hashes = g_slice_alloc (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE); + hlen = input->len > SHINGLES_WINDOW ? (input->len - SHINGLES_WINDOW + 1) : 1; + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { - hashes[i] = g_array_sized_new (FALSE, FALSE, sizeof (guint64), - input->len + SHINGLES_WINDOW); + hashes[i] = g_slice_alloc (hlen * sizeof (guint64)); /* * To generate a set of hashes we just apply sha256 to the * initial key as many times as many hashes are required and @@ -80,29 +83,34 @@ rspamd_shingles_generate (GArray *input, word = &g_array_index (input, rspamd_ftok_t, j); row = rspamd_fstring_append (row, word->begin, word->len); } - beg++; /* Now we need to create a new row here */ for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) { rspamd_cryptobox_siphash ((guchar *)&val, row->str, row->len, keys[j]); - g_array_append_val (hashes[j], val); + g_assert (hlen > beg); + hashes[j][beg] = val; } + beg++; + row = rspamd_fstring_assign (row, "", 0); } } } else { guint64 res[SHINGLES_WINDOW * RSPAMD_SHINGLE_SIZE]; - guint64 RSPAMD_ALIGNED(32) tmpbuf[16]; - guint rlen; - if (alg == RSPAMD_SHINGLES_XXHASH) { + switch (alg) { + case RSPAMD_SHINGLES_XXHASH: ht = RSPAMD_CRYPTOBOX_XXHASH64; - } - else { + break; + case RSPAMD_SHINGLES_MUMHASH: ht = RSPAMD_CRYPTOBOX_MUMHASH; + break; + default: + ht = RSPAMD_CRYPTOBOX_HASHFAST; + break; } memset (res, 0, sizeof (res)); @@ -119,26 +127,17 @@ rspamd_shingles_generate (GArray *input, word = &g_array_index (input, rspamd_ftok_t, beg); /* Insert the last element to the pipe */ - if (word->len >= sizeof (tmpbuf)) { - rlen = sizeof (tmpbuf); - memcpy (tmpbuf, word->begin, rlen); - } - else { - rlen = word->len / sizeof (guint64) + 1; - memset (tmpbuf, 0, rlen * sizeof (guint64)); - memcpy (tmpbuf, word->begin, word->len); - } - res[j * SHINGLES_WINDOW + SHINGLES_WINDOW - 1] = rspamd_cryptobox_fast_hash_specific (ht, - tmpbuf,rlen * sizeof (guint64), + word->begin, word->len, *(guint64 *)keys[j]); val = 0; for (k = 0; k < SHINGLES_WINDOW; k ++) { val ^= res[j * SHINGLES_WINDOW + k]; } - g_array_append_val (hashes[j], val); + g_assert (hlen > beg); + hashes[j][beg] = val; } beg++; } @@ -147,11 +146,13 @@ rspamd_shingles_generate (GArray *input, /* Now we need to filter all hashes and make a shingles result */ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { - res->hashes[i] = filter ((guint64 *)hashes[i]->data, hashes[i]->len, + res->hashes[i] = filter (hashes[i], hlen, i, key, filterd); - g_array_free (hashes[i], TRUE); + g_slice_free1 (hlen * sizeof (guint64), hashes[i]); } + g_slice_free1 (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE, hashes); + rspamd_fstring_free (row); return res; diff --git a/src/libutil/shingles.h b/src/libutil/shingles.h index a46a2fc0f..fd7d3bfc7 100644 --- a/src/libutil/shingles.h +++ b/src/libutil/shingles.h @@ -29,6 +29,7 @@ enum rspamd_shingle_alg { RSPAMD_SHINGLES_OLD = 0, RSPAMD_SHINGLES_XXHASH, RSPAMD_SHINGLES_MUMHASH, + RSPAMD_SHINGLES_FAST }; /** diff --git a/test/rspamd_shingles_test.c b/test/rspamd_shingles_test.c index a4289497e..5669137d4 100644 --- a/test/rspamd_shingles_test.c +++ b/test/rspamd_shingles_test.c @@ -93,10 +93,10 @@ test_case (gsize cnt, gsize max_len, gdouble perm_factor, ottery_rand_bytes (key, sizeof (key)); input = generate_fuzzy_words (cnt, max_len); - ts1 = rspamd_get_ticks (); + ts1 = rspamd_get_virtual_ticks (); sgl = rspamd_shingles_generate (input, key, NULL, rspamd_shingles_default_filter, NULL, alg); - ts2 = rspamd_get_ticks (); + ts2 = rspamd_get_virtual_ticks (); permute_vector (input, perm_factor); sgl_permuted = rspamd_shingles_generate (input, key, NULL, rspamd_shingles_default_filter, NULL, alg); @@ -118,7 +118,7 @@ rspamd_shingles_test_func (void) { enum rspamd_shingle_alg alg = RSPAMD_SHINGLES_OLD; - for (alg = RSPAMD_SHINGLES_OLD; alg <= RSPAMD_SHINGLES_MUMHASH; alg ++) { + for (alg = RSPAMD_SHINGLES_OLD; alg <= RSPAMD_SHINGLES_FAST; alg ++) { test_case (200, 10, 0.1, alg); test_case (500, 20, 0.01, alg); test_case (5000, 20, 0.01, alg); -- cgit v1.2.3 From b00d4cd3cc0e70e90f50a8fed294754a4c6e9475 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 12 May 2016 12:45:17 +0100 Subject: [Feature] Use metrohash as well --- contrib/metrohash/metro.h | 141 +++++++++++++++++++++++++++++++++++++++++++ src/libcryptobox/cryptobox.c | 36 ++++++++--- src/libcryptobox/cryptobox.h | 4 +- 3 files changed, 170 insertions(+), 11 deletions(-) create mode 100644 contrib/metrohash/metro.h (limited to 'contrib') diff --git a/contrib/metrohash/metro.h b/contrib/metrohash/metro.h new file mode 100644 index 000000000..36c44d65c --- /dev/null +++ b/contrib/metrohash/metro.h @@ -0,0 +1,141 @@ +/*- +The MIT License (MIT) + +Copyright (c) 2015 J. Andrew Rogers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + */ +#ifndef CONTRIB_METROHASH_METRO_H_ +#define CONTRIB_METROHASH_METRO_H_ + +#include +#include +#include +#include +#ifdef _MSC_VER +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#else +#include +#endif + +/* rotate right idiom recognized by compiler*/ +inline static uint64_t rotate_right(uint64_t v, unsigned k) +{ + return (v >> k) | (v << (64 - k)); +} + +inline static uint64_t read_u64(const void * const ptr) +{ + return * (uint64_t *) ptr; +} + +inline static uint64_t read_u32(const void * const ptr) +{ + return * (uint32_t *) ptr; +} + +inline static uint64_t read_u16(const void * const ptr) +{ + return * (uint16_t *) ptr; +} + +inline static uint64_t read_u8 (const void * const ptr) +{ + return * (uint8_t *) ptr; +} + +static inline uint64_t metrohash64_1(const uint8_t * key, uint64_t len, uint64_t seed) +{ + static const uint64_t k0 = 0xC83A91E1; + static const uint64_t k1 = 0x8648DBDB; + static const uint64_t k2 = 0x7BDEC03B; + static const uint64_t k3 = 0x2F5870A5; + + const uint8_t * ptr = key; + const uint8_t * const end = ptr + len; + + uint64_t hash = ((((uint64_t) seed) + k2) * k0) + len; + + if (len >= 32) + { + uint64_t v[4]; + v[0] = hash; + v[1] = hash; + v[2] = hash; + v[3] = hash; + + do + { + v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; + v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; + v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; + v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; + } + while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0; + hash += v[0] ^ v[1]; + } + + if ((end - ptr) >= 16) + { + uint64_t v0 = hash + (read_u64(ptr) * k0); ptr += 8; v0 = rotate_right(v0,33) * k1; + uint64_t v1 = hash + (read_u64(ptr) * k1); ptr += 8; v1 = rotate_right(v1,33) * k2; + v0 ^= rotate_right(v0 * k0, 35) + v1; + v1 ^= rotate_right(v1 * k3, 35) + v0; + hash += v1; + } + + if ((end - ptr) >= 8) + { + hash += read_u64(ptr) * k3; ptr += 8; + hash ^= rotate_right(hash, 33) * k1; + + } + + if ((end - ptr) >= 4) + { + hash += read_u32(ptr) * k3; ptr += 4; + hash ^= rotate_right(hash, 15) * k1; + } + + if ((end - ptr) >= 2) + { + hash += read_u16(ptr) * k3; ptr += 2; + hash ^= rotate_right(hash, 13) * k1; + } + + if ((end - ptr) >= 1) + { + hash += read_u8 (ptr) * k3; + hash ^= rotate_right(hash, 25) * k1; + } + + hash ^= rotate_right(hash, 33); + hash *= k0; + hash ^= rotate_right(hash, 33); + + return hash; +} + +#endif /* CONTRIB_METROHASH_METRO_H_ */ diff --git a/src/libcryptobox/cryptobox.c b/src/libcryptobox/cryptobox.c index c6eb3310a..71180da12 100644 --- a/src/libcryptobox/cryptobox.c +++ b/src/libcryptobox/cryptobox.c @@ -34,7 +34,7 @@ #include "xxhash.h" #define MUM_TARGET_INDEPENDENT_HASH 1 /* For 32/64 bit equal hashes */ #include "../../contrib/mumhash/mum.h" - +#include "../../contrib/metrohash/metro.h" #ifdef HAVE_CPUID_H #include #endif @@ -1455,25 +1455,39 @@ rspamd_cryptobox_fast_hash_final (rspamd_cryptobox_fast_hash_state_t *st) /** * One in all function */ -guint64 -rspamd_cryptobox_fast_hash (const void *data, +static inline guint64 +rspamd_cryptobox_fast_hash_machdep (const void *data, gsize len, guint64 seed) { if (len >= 8 && len % 8 == 0) { - return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_MUMHASH, - data, len, seed); + return mum_hash (data, len, seed); } else { #if defined(__LP64__) || defined(_LP64) - return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, - data, len, seed); + return metrohash64_1 (data, len, seed); #endif } - return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH32, - data, len, seed); + return XXH32 (data, len, seed); } +static inline guint64 +rspamd_cryptobox_fast_hash_indep (const void *data, + gsize len, guint64 seed) +{ + if (len >= 8 && len % 8 == 0) { + return mum_hash (data, len, seed); + } + + return metrohash64_1 (data, len, seed); +} + +guint64 +rspamd_cryptobox_fast_hash (const void *data, + gsize len, guint64 seed) +{ + return rspamd_cryptobox_fast_hash_machdep (data, len, seed); +} guint64 rspamd_cryptobox_fast_hash_specific ( @@ -1488,8 +1502,10 @@ rspamd_cryptobox_fast_hash_specific ( return XXH64 (data, len, seed); case RSPAMD_CRYPTOBOX_MUMHASH: return mum_hash (data, len, seed); + case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT: + return rspamd_cryptobox_fast_hash_indep (data, len, seed); case RSPAMD_CRYPTOBOX_HASHFAST: default: - return rspamd_cryptobox_fast_hash (data, len, seed); + return rspamd_cryptobox_fast_hash_machdep (data, len, seed); } } diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h index c44044f48..6facf0a0e 100644 --- a/src/libcryptobox/cryptobox.h +++ b/src/libcryptobox/cryptobox.h @@ -358,7 +358,9 @@ enum rspamd_cryptobox_fast_hash_type { RSPAMD_CRYPTOBOX_XXHASH64 = 0, RSPAMD_CRYPTOBOX_XXHASH32, RSPAMD_CRYPTOBOX_MUMHASH, - RSPAMD_CRYPTOBOX_HASHFAST + RSPAMD_CRYPTOBOX_METROHASH, + RSPAMD_CRYPTOBOX_HASHFAST, + RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT }; /** * Platform independent version -- cgit v1.2.3