From 63eb2e602070feb5536c08009e494cbab20ad921 Mon Sep 17 00:00:00 2001
From: Vsevolod Stakhov <vsevolod@highsecure.ru>
Date: Wed, 11 May 2016 14:59:29 +0100
Subject: [Feature] Add and use mumhash for non-crypto hashing

---
 contrib/mumhash/mum.h | 369 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 369 insertions(+)
 create mode 100644 contrib/mumhash/mum.h

(limited to 'contrib')

diff --git a/contrib/mumhash/mum.h b/contrib/mumhash/mum.h
new file mode 100644
index 000000000..9391de0a6
--- /dev/null
+++ b/contrib/mumhash/mum.h
@@ -0,0 +1,369 @@
+/* Copyright (c) 2016 Vladimir Makarov <vmakarov@gcc.gnu.org>
+
+   Permission is hereby granted, free of charge, to any person
+   obtaining a copy of this software and associated documentation
+   files (the "Software"), to deal in the Software without
+   restriction, including without limitation the rights to use, copy,
+   modify, merge, publish, distribute, sublicense, and/or sell copies
+   of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* This file implements MUM (MUltiply and Mix) hashing.  We randomize
+   input data by 64x64-bit multiplication and mixing hi- and low-parts
+   of the multiplication result by using an addition and then mix it
+   into the current state.  We use prime numbers randomly generated
+   with the equal probability of their bit values for the
+   multiplication.  When all primes are used once, the state is
+   randomized and the same prime numbers are used again for data
+   randomization.
+
+   The MUM hashing passes all SMHasher tests.  Pseudo Random Number
+   Generator based on MUM also passes NIST Statistical Test Suite for
+   Random and Pseudorandom Number Generators for Cryptographic
+   Applications (version 2.2.1) with 1000 bitstreams each containing
+   1M bits.  MUM hashing is also faster Spooky64 and City64 on small
+   strings (at least upto 512-bit) on Haswell and Power7.  The MUM bulk
+   speed (speed on very long data) is bigger than Spooky and City on
+   Power7.  On Haswell the bulk speed is bigger than Spooky one and
+   close to City speed.  */
+
+#ifndef __MUM_HASH__
+#define __MUM_HASH__
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#ifdef _MSC_VER
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+#ifdef __GNUC__
+#define _MUM_ATTRIBUTE_UNUSED  __attribute__((unused))
+#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
+#define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
+#else
+#define _MUM_ATTRIBUTE_UNUSED
+#define _MUM_OPTIMIZE(opts)
+#define _MUM_TARGET(opts)
+#endif
+
+/* Macro saying to use 128-bit integers implemented by GCC for some
+   targets.  */
+#ifndef _MUM_USE_INT128
+/* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64.
+   HOST_WIDE_INT is long if HOST_BITS_PER_LONG > HOST_BITS_PER_INT,
+   otherwise int. */
+#if defined(__GNUC__) && UINT_MAX != ULONG_MAX
+#define _MUM_USE_INT128 1
+#else
+#define _MUM_USE_INT128 0
+#endif
+#endif
+
+
+/* Here are different primes randomly generated with the equal
+   probability of their bit values.  They are used to randomize input
+   values.  */
+static uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL;
+static uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL;
+static uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL;
+static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
+static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
+static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
+static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
+
+static uint64_t _mum_primes [] = {
+  0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
+  0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
+  0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f,
+  0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9,
+};
+
+/* Multiply 64-bit V and P and return sum of high and low parts of the
+   result.  */
+static inline uint64_t
+_mum (uint64_t v, uint64_t p) {
+  uint64_t hi, lo;
+#if _MUM_USE_INT128
+#if defined(__aarch64__)
+  /* AARCH64 needs 2 insns to calculate 128-bit result of the
+     multiplication.  If we use a generic code we actually call a
+     function doing 128x128->128 bit multiplication.  The function is
+     very slow.  */
+  lo = v * p, hi;
+  asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
+#else
+  __uint128_t r = (__uint128_t) v * (__uint128_t) p;
+  hi = (uint64_t) (r >> 64);
+  lo = (uint64_t) r;
+#endif
+#else
+  /* Implementation of 64x64->128-bit multiplication by four 32x32->64
+     bit multiplication.  */
+  uint64_t hv = v >> 32, hp = p >> 32;
+  uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
+  uint64_t rh =  hv * hp;
+  uint64_t rm_0 = hv * lp;
+  uint64_t rm_1 = hp * lv;
+  uint64_t rl =  lv * lp;
+  uint64_t t, carry = 0;
+
+  /* We could ignore a carry bit here if we did not care about the
+     same hash for 32-bit and 64-bit targets.  */
+  t = rl + (rm_0 << 32);
+#ifdef MUM_TARGET_INDEPENDENT_HASH
+  carry = t < rl;
+#endif
+  lo = t + (rm_1 << 32);
+#ifdef MUM_TARGET_INDEPENDENT_HASH
+  carry += lo < t;
+#endif
+  hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
+#endif
+  /* We could use XOR here too but, for some reasons, on Haswell and
+     Power7 using an addition improves hashing performance by 10% for
+     small strings.  */
+  return hi + lo;
+}
+
+#if defined(_MSC_VER)
+#define _mum_bswap_64(x) _byteswap_uint64_t (x)
+#elif defined(__APPLE__)
+#include <libkern/OSByteOrder.h>
+#define _mum_bswap_64(x) OSSwapInt64 (x)
+#elif defined(__GNUC__)
+#define _mum_bswap64(x) __builtin_bswap64 (x)
+#else
+#include <byteswap.h>
+#define _mum_bswap64(x) bswap64 (x)
+#endif
+
+static inline uint64_t
+_mum_le (uint64_t v) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
+  return v;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  return _mum_bswap64 (v);
+#else
+#error "Unknown endianess"
+#endif
+}
+/* Macro defining how many times the most nested loop in
+   _mum_hash_aligned will be unrolled by the compiler (although it can
+   make an own decision:).  Use only a constant here to help a
+   compiler to unroll a major loop.
+
+   The macro value affects the result hash for strings > 128 bit.  The
+   unroll factor greatly affects the hashing speed.  We prefer the
+   speed.  */
+#ifndef _MUM_UNROLL_FACTOR_POWER
+#if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
+#define _MUM_UNROLL_FACTOR_POWER 3
+#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
+#define _MUM_UNROLL_FACTOR_POWER 4
+#else
+#define _MUM_UNROLL_FACTOR_POWER 2
+#endif
+#endif
+
+#if _MUM_UNROLL_FACTOR_POWER < 1
+#error "too small unroll factor"
+#elif _MUM_UNROLL_FACTOR_POWER > 4
+#error "We have not enough primes for such unroll factor"
+#endif
+
+#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
+
+static inline uint64_t _MUM_OPTIMIZE("unroll-loops")
+_mum_hash_aligned (uint64_t start, const void *key, size_t len) {
+  uint64_t result = start;
+  const unsigned char *str = (const unsigned char *) key;
+  union {uint64_t i; unsigned char s[sizeof (uint64_t)];} p;
+  int i;
+  size_t n;
+
+  result = _mum (result, _mum_block_start_prime);
+  while  (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
+    /* This loop could be vectorized when we have vector insns for
+       64x64->128-bit multiplication.  AVX2 currently only have a
+       vector insn for 4 32x32->64-bit multiplication.  */
+    for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
+      result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
+    len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
+    str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
+    /* We will use the same prime numbers on the next iterations --
+       randomize the state.  */
+    result = _mum (result, _mum_unroll_prime);
+  }
+  n = len / sizeof (uint64_t);
+  for (i = 0; i < n; i++)
+    result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
+  len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
+  if (len) {
+    p.i = 0; memcpy (p.s, str, len);
+    result ^= _mum (_mum_le (((uint64_t *) p.s)[0]), _mum_tail_prime);
+  }
+  return result;
+}
+
+/* Final randomization of H.  */
+static inline uint64_t
+_mum_final (uint64_t h) {
+  h ^= _mum (h, _mum_finish_prime1);
+  h ^= _mum (h, _mum_finish_prime2);
+  return h;
+}
+
+#if defined(__x86_64__) && defined(__GNUC__)
+
+/* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
+   it is possible.  Although on modern Intel processors MULQ takes
+   3-cycles vs. 4 for MULX, MULX permits more freedom in insn
+   scheduling as it uses less fixed registers.  */
+static inline uint64_t _MUM_TARGET("arch=haswell")
+_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
+  return _mum_final (_mum_hash_aligned (seed + len, key, len));
+}
+#endif
+
+#ifndef _MUM_UNALIGNED_ACCESS
+#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
+    || defined(__s390__) || defined(__m32c__) || defined(cris)     \
+    || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \
+    || defined(__aarch64__)
+#define _MUM_UNALIGNED_ACCESS 1
+#else
+#define _MUM_UNALIGNED_ACCESS 0
+#endif
+#endif
+
+/* When we need an aligned access to data being hashed we move part of
+   the unaligned data to an aligned block of given size and then
+   process it, repeating processing the data by the block.  */
+#ifndef _MUM_BLOCK_LEN
+#define _MUM_BLOCK_LEN 1024
+#endif
+
+#if _MUM_BLOCK_LEN < 8
+#error "too small block length"
+#endif
+
+static inline uint64_t
+#if defined(__x86_64__)
+_MUM_TARGET("inline-all-stringops")
+#endif
+_mum_hash_default (const void *key, size_t len, uint64_t seed) {
+  uint64_t result;
+  const unsigned char *str = (const unsigned char *) key;
+  size_t block_len;
+  uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
+
+  result = seed + len;
+  if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
+    result = _mum_hash_aligned (result, key, len);
+  else {
+    while (len != 0) {
+      block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
+      memcpy (buf, str, block_len);
+      result = _mum_hash_aligned (result, buf, block_len);
+      len -= block_len;
+      str += block_len;
+    }
+  }
+  return _mum_final (result);
+}
+
+static inline uint64_t
+_mum_next_factor (void) {
+  uint64_t start = 0;
+  int i;
+
+  for (i = 0; i < 8; i++)
+    start = (start << 8) | rand() % 256;
+  return start;
+}
+
+/* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++  */
+
+/* Set random multiplicators depending on SEED.  */
+static inline void
+mum_hash_randomize (uint64_t seed) {
+  int i;
+
+  srand (seed);
+  _mum_hash_step_prime = _mum_next_factor ();
+  _mum_key_step_prime = _mum_next_factor ();
+  _mum_finish_prime1 = _mum_next_factor ();
+  _mum_finish_prime2 = _mum_next_factor ();
+  _mum_block_start_prime = _mum_next_factor ();
+  _mum_unroll_prime = _mum_next_factor ();
+  _mum_tail_prime = _mum_next_factor ();
+  for (i = 0; i < sizeof (_mum_primes) / sizeof (uint64_t); i++)
+    _mum_primes[i] = _mum_next_factor ();
+}
+
+/* Start hashing data with SEED.  Return the state.  */
+static inline uint64_t
+mum_hash_init (uint64_t seed) {
+  return seed;
+}
+
+/* Process data KEY with the state H and return the updated state.  */
+static inline uint64_t
+mum_hash_step (uint64_t h, uint64_t key)
+{
+  return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
+}
+
+/* Return the result of hashing using the current state H.  */
+static inline uint64_t
+mum_hash_finish (uint64_t h) {
+  return _mum_final (h);
+}
+
+/* Fast hashing of KEY with SEED.  The hash is always the same for the
+   same key on any target. */
+static inline size_t
+mum_hash64 (uint64_t key, uint64_t seed) {
+  return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key));
+}
+
+/* Hash data KEY of length LEN and SEED.  The hash depends on the
+   target endianess and the unroll factor.  */
+static inline uint64_t
+mum_hash (const void *key, size_t len, uint64_t seed) {
+#if defined(__x86_64__) && defined(__GNUC__)
+  static int avx2_support = 0;
+
+  if (avx2_support > 0)
+    return _mum_hash_avx2 (key, len, seed);
+#if 0
+  else if (! avx2_support) {
+    __builtin_cpu_init ();
+    avx2_support =  __builtin_cpu_supports ("avx2") ? 1 : -1;
+    if (avx2_support > 0)
+      return _mum_hash_avx2 (key, len, seed);
+  }
+#endif
+#endif
+  return _mum_hash_default (key, len, seed);
+}
+
+#endif
-- 
cgit v1.2.3


From bcb5eaadd0b810b399fbdb133253bbc9ec7f060e Mon Sep 17 00:00:00 2001
From: Vsevolod Stakhov <vsevolod@highsecure.ru>
Date: Wed, 11 May 2016 15:14:49 +0100
Subject: [Fix] Fix compilation issue

---
 contrib/mumhash/mum.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'contrib')

diff --git a/contrib/mumhash/mum.h b/contrib/mumhash/mum.h
index 9391de0a6..a9a661661 100644
--- a/contrib/mumhash/mum.h
+++ b/contrib/mumhash/mum.h
@@ -232,7 +232,7 @@ _mum_final (uint64_t h) {
 }
 
 #if defined(__x86_64__) && defined(__GNUC__)
-
+#if 0
 /* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
    it is possible.  Although on modern Intel processors MULQ takes
    3-cycles vs. 4 for MULX, MULX permits more freedom in insn
@@ -242,6 +242,7 @@ _mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
   return _mum_final (_mum_hash_aligned (seed + len, key, len));
 }
 #endif
+#endif
 
 #ifndef _MUM_UNALIGNED_ACCESS
 #if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
@@ -350,11 +351,12 @@ mum_hash64 (uint64_t key, uint64_t seed) {
 static inline uint64_t
 mum_hash (const void *key, size_t len, uint64_t seed) {
 #if defined(__x86_64__) && defined(__GNUC__)
-  static int avx2_support = 0;
+#if 0
+	static int avx2_support = 0;
 
   if (avx2_support > 0)
     return _mum_hash_avx2 (key, len, seed);
-#if 0
+
   else if (! avx2_support) {
     __builtin_cpu_init ();
     avx2_support =  __builtin_cpu_supports ("avx2") ? 1 : -1;
-- 
cgit v1.2.3


From 931615a2e0c1069c161bf2c9516732f576f20ee3 Mon Sep 17 00:00:00 2001
From: Vsevolod Stakhov <vsevolod@highsecure.ru>
Date: Thu, 12 May 2016 09:48:29 +0100
Subject: [Feature] Further micro-optimizations for hashing and shingles

---
 config.h.in                  |  2 ++
 contrib/mumhash/mum.h        |  5 +++--
 src/libcryptobox/cryptobox.c | 13 +++++------
 src/libcryptobox/cryptobox.h |  3 ++-
 src/libutil/shingles.c       | 53 ++++++++++++++++++++++----------------------
 src/libutil/shingles.h       |  1 +
 test/rspamd_shingles_test.c  |  6 ++---
 7 files changed, 44 insertions(+), 39 deletions(-)

(limited to 'contrib')

diff --git a/config.h.in b/config.h.in
index d858f65c6..c837500f6 100644
--- a/config.h.in
+++ b/config.h.in
@@ -323,8 +323,10 @@ typedef off_t goffset;
 # define RSPAMD_ALIGNED(x) __declspec(align(x))
 #elif defined(__GNUC__)
 # define RSPAMD_ALIGNED(x) __attribute__((aligned(x)))
+# define RSPAMD_OPTIMIZE(x) __attribute__((__optimize__ (x)))
 #else
 # define RSPAMD_ALIGNED(x)
+# define RSPAMD_OPTIMIZE(x)
 #endif
 #endif
 
diff --git a/contrib/mumhash/mum.h b/contrib/mumhash/mum.h
index a9a661661..1daebf3de 100644
--- a/contrib/mumhash/mum.h
+++ b/contrib/mumhash/mum.h
@@ -43,6 +43,7 @@
 #ifndef __MUM_HASH__
 #define __MUM_HASH__
 
+#include "config.h"
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
@@ -56,8 +57,8 @@ typedef unsigned __int64 uint64_t;
 
 #ifdef __GNUC__
 #define _MUM_ATTRIBUTE_UNUSED  __attribute__((unused))
-#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
-#define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
+#define _MUM_OPTIMIZE(opts) RSPAMD_OPTIMIZE(opts)
+#define _MUM_TARGET(opts)
 #else
 #define _MUM_ATTRIBUTE_UNUSED
 #define _MUM_OPTIMIZE(opts)
diff --git a/src/libcryptobox/cryptobox.c b/src/libcryptobox/cryptobox.c
index d8fc737d7..162fcd662 100644
--- a/src/libcryptobox/cryptobox.c
+++ b/src/libcryptobox/cryptobox.c
@@ -1459,20 +1459,17 @@ guint64
 rspamd_cryptobox_fast_hash (const void *data,
 		gsize len, guint64 seed)
 {
-	if (len % 2 == 0) {
+	if (len > 8 && len % 8 == 0) {
 		return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_MUMHASH,
 					data, len, seed);
 	}
 	else {
 #if defined(__LP64__) || defined(_LP64)
-		if (len > 8) {
-			return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
-					data, len, seed);
-		}
+		return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
+				data, len, seed);
 #endif
 	}
 
-
 	return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH32,
 			data, len, seed);
 }
@@ -1490,7 +1487,9 @@ rspamd_cryptobox_fast_hash_specific (
 	case RSPAMD_CRYPTOBOX_XXHASH64:
 		return XXH64 (data, len, seed);
 	case RSPAMD_CRYPTOBOX_MUMHASH:
-	default:
 		return mum_hash (data, len, seed);
+	case RSPAMD_CRYPTOBOX_HASHFAST:
+	default:
+		return rspamd_cryptobox_fast_hash (data, len, seed);
 	}
 }
diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h
index 18e66df60..c44044f48 100644
--- a/src/libcryptobox/cryptobox.h
+++ b/src/libcryptobox/cryptobox.h
@@ -357,7 +357,8 @@ guint64 rspamd_cryptobox_fast_hash (const void *data,
 enum rspamd_cryptobox_fast_hash_type {
 	RSPAMD_CRYPTOBOX_XXHASH64 = 0,
 	RSPAMD_CRYPTOBOX_XXHASH32,
-	RSPAMD_CRYPTOBOX_MUMHASH
+	RSPAMD_CRYPTOBOX_MUMHASH,
+	RSPAMD_CRYPTOBOX_HASHFAST
 };
 /**
  * Platform independent version
diff --git a/src/libutil/shingles.c b/src/libutil/shingles.c
index 3e238fa5c..8d1b147db 100644
--- a/src/libutil/shingles.c
+++ b/src/libutil/shingles.c
@@ -19,7 +19,7 @@
 
 #define SHINGLES_WINDOW 3
 
-struct rspamd_shingle*
+struct rspamd_shingle* RSPAMD_OPTIMIZE("unroll-loops")
 rspamd_shingles_generate (GArray *input,
 		const guchar key[16],
 		rspamd_mempool_t *pool,
@@ -28,7 +28,7 @@ rspamd_shingles_generate (GArray *input,
 		enum rspamd_shingle_alg alg)
 {
 	struct rspamd_shingle *res;
-	GArray *hashes[RSPAMD_SHINGLE_SIZE];
+	guint64 **hashes;
 	rspamd_sipkey_t keys[RSPAMD_SHINGLE_SIZE];
 	guchar shabuf[rspamd_cryptobox_HASHBYTES], *out_key;
 	const guchar *cur_key;
@@ -36,7 +36,8 @@ rspamd_shingles_generate (GArray *input,
 	rspamd_ftok_t *word;
 	rspamd_cryptobox_hash_state_t bs;
 	guint64 val;
-	gint i, j, k, beg = 0;
+	gint i, j, k;
+	gsize hlen, beg = 0;
 	enum rspamd_cryptobox_fast_hash_type ht;
 
 	if (pool != NULL) {
@@ -52,9 +53,11 @@ rspamd_shingles_generate (GArray *input,
 	out_key = (guchar *)&keys[0];
 
 	/* Init hashes pipes and keys */
+	hashes = g_slice_alloc (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE);
+	hlen = input->len > SHINGLES_WINDOW ? (input->len - SHINGLES_WINDOW + 1) : 1;
+
 	for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
-		hashes[i] = g_array_sized_new (FALSE, FALSE, sizeof (guint64),
-				input->len + SHINGLES_WINDOW);
+		hashes[i] = g_slice_alloc (hlen * sizeof (guint64));
 		/*
 		 * To generate a set of hashes we just apply sha256 to the
 		 * initial key as many times as many hashes are required and
@@ -80,29 +83,34 @@ rspamd_shingles_generate (GArray *input,
 					word = &g_array_index (input, rspamd_ftok_t, j);
 					row = rspamd_fstring_append (row, word->begin, word->len);
 				}
-				beg++;
 
 				/* Now we need to create a new row here */
 				for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) {
 					rspamd_cryptobox_siphash ((guchar *)&val, row->str, row->len,
 							keys[j]);
-					g_array_append_val (hashes[j], val);
+					g_assert (hlen > beg);
+					hashes[j][beg] = val;
 				}
 
+				beg++;
+
 				row = rspamd_fstring_assign (row, "", 0);
 			}
 		}
 	}
 	else {
 		guint64 res[SHINGLES_WINDOW * RSPAMD_SHINGLE_SIZE];
-		guint64 RSPAMD_ALIGNED(32) tmpbuf[16];
-		guint rlen;
 
-		if (alg == RSPAMD_SHINGLES_XXHASH) {
+		switch (alg) {
+		case RSPAMD_SHINGLES_XXHASH:
 			ht = RSPAMD_CRYPTOBOX_XXHASH64;
-		}
-		else {
+			break;
+		case RSPAMD_SHINGLES_MUMHASH:
 			ht = RSPAMD_CRYPTOBOX_MUMHASH;
+			break;
+		default:
+			ht = RSPAMD_CRYPTOBOX_HASHFAST;
+			break;
 		}
 
 		memset (res, 0, sizeof (res));
@@ -119,26 +127,17 @@ rspamd_shingles_generate (GArray *input,
 
 					word = &g_array_index (input, rspamd_ftok_t, beg);
 					/* Insert the last element to the pipe */
-					if (word->len >= sizeof (tmpbuf)) {
-						rlen = sizeof (tmpbuf);
-						memcpy (tmpbuf, word->begin, rlen);
-					}
-					else {
-						rlen = word->len / sizeof (guint64) + 1;
-						memset (tmpbuf, 0, rlen * sizeof (guint64));
-						memcpy (tmpbuf, word->begin, word->len);
-					}
-
 					res[j * SHINGLES_WINDOW + SHINGLES_WINDOW - 1] =
 							rspamd_cryptobox_fast_hash_specific (ht,
-									tmpbuf,rlen * sizeof (guint64),
+									word->begin, word->len,
 									*(guint64 *)keys[j]);
 					val = 0;
 					for (k = 0; k < SHINGLES_WINDOW; k ++) {
 						val ^= res[j * SHINGLES_WINDOW + k];
 					}
 
-					g_array_append_val (hashes[j], val);
+					g_assert (hlen > beg);
+					hashes[j][beg] = val;
 				}
 				beg++;
 			}
@@ -147,11 +146,13 @@ rspamd_shingles_generate (GArray *input,
 
 	/* Now we need to filter all hashes and make a shingles result */
 	for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
-		res->hashes[i] = filter ((guint64 *)hashes[i]->data, hashes[i]->len,
+		res->hashes[i] = filter (hashes[i], hlen,
 				i, key, filterd);
-		g_array_free (hashes[i], TRUE);
+		g_slice_free1 (hlen * sizeof (guint64), hashes[i]);
 	}
 
+	g_slice_free1 (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE, hashes);
+
 	rspamd_fstring_free (row);
 
 	return res;
diff --git a/src/libutil/shingles.h b/src/libutil/shingles.h
index a46a2fc0f..fd7d3bfc7 100644
--- a/src/libutil/shingles.h
+++ b/src/libutil/shingles.h
@@ -29,6 +29,7 @@ enum rspamd_shingle_alg {
 	RSPAMD_SHINGLES_OLD = 0,
 	RSPAMD_SHINGLES_XXHASH,
 	RSPAMD_SHINGLES_MUMHASH,
+	RSPAMD_SHINGLES_FAST
 };
 
 /**
diff --git a/test/rspamd_shingles_test.c b/test/rspamd_shingles_test.c
index a4289497e..5669137d4 100644
--- a/test/rspamd_shingles_test.c
+++ b/test/rspamd_shingles_test.c
@@ -93,10 +93,10 @@ test_case (gsize cnt, gsize max_len, gdouble perm_factor,
 
 	ottery_rand_bytes (key, sizeof (key));
 	input = generate_fuzzy_words (cnt, max_len);
-	ts1 = rspamd_get_ticks ();
+	ts1 = rspamd_get_virtual_ticks ();
 	sgl = rspamd_shingles_generate (input, key, NULL,
 			rspamd_shingles_default_filter, NULL, alg);
-	ts2 = rspamd_get_ticks ();
+	ts2 = rspamd_get_virtual_ticks ();
 	permute_vector (input, perm_factor);
 	sgl_permuted = rspamd_shingles_generate (input, key, NULL,
 			rspamd_shingles_default_filter, NULL, alg);
@@ -118,7 +118,7 @@ rspamd_shingles_test_func (void)
 {
 	enum rspamd_shingle_alg alg = RSPAMD_SHINGLES_OLD;
 
-	for (alg = RSPAMD_SHINGLES_OLD; alg <= RSPAMD_SHINGLES_MUMHASH; alg ++) {
+	for (alg = RSPAMD_SHINGLES_OLD; alg <= RSPAMD_SHINGLES_FAST; alg ++) {
 		test_case (200, 10, 0.1, alg);
 		test_case (500, 20, 0.01, alg);
 		test_case (5000, 20, 0.01, alg);
-- 
cgit v1.2.3


From b00d4cd3cc0e70e90f50a8fed294754a4c6e9475 Mon Sep 17 00:00:00 2001
From: Vsevolod Stakhov <vsevolod@highsecure.ru>
Date: Thu, 12 May 2016 12:45:17 +0100
Subject: [Feature] Use metrohash as well

---
 contrib/metrohash/metro.h    | 141 +++++++++++++++++++++++++++++++++++++++++++
 src/libcryptobox/cryptobox.c |  36 ++++++++---
 src/libcryptobox/cryptobox.h |   4 +-
 3 files changed, 170 insertions(+), 11 deletions(-)
 create mode 100644 contrib/metrohash/metro.h

(limited to 'contrib')

diff --git a/contrib/metrohash/metro.h b/contrib/metrohash/metro.h
new file mode 100644
index 000000000..36c44d65c
--- /dev/null
+++ b/contrib/metrohash/metro.h
@@ -0,0 +1,141 @@
+/*-
+The MIT License (MIT)
+
+Copyright (c) 2015 J. Andrew Rogers
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+ */
+#ifndef CONTRIB_METROHASH_METRO_H_
+#define CONTRIB_METROHASH_METRO_H_
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#ifdef _MSC_VER
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+/* rotate right idiom recognized by compiler*/
+inline static uint64_t rotate_right(uint64_t v, unsigned k)
+{
+    return (v >> k) | (v << (64 - k));
+}
+
+inline static uint64_t read_u64(const void * const ptr)
+{
+    return * (uint64_t *) ptr;
+}
+
+inline static uint64_t read_u32(const void * const ptr)
+{
+    return * (uint32_t *) ptr;
+}
+
+inline static uint64_t read_u16(const void * const ptr)
+{
+    return * (uint16_t *) ptr;
+}
+
+inline static uint64_t read_u8 (const void * const ptr)
+{
+    return * (uint8_t *) ptr;
+}
+
+static inline uint64_t metrohash64_1(const uint8_t * key, uint64_t len, uint64_t seed)
+{
+    static const uint64_t k0 = 0xC83A91E1;
+    static const uint64_t k1 = 0x8648DBDB;
+    static const uint64_t k2 = 0x7BDEC03B;
+    static const uint64_t k3 = 0x2F5870A5;
+
+    const uint8_t * ptr = key;
+    const uint8_t * const end = ptr + len;
+
+    uint64_t hash = ((((uint64_t) seed) + k2) * k0) + len;
+
+    if (len >= 32)
+    {
+        uint64_t v[4];
+        v[0] = hash;
+        v[1] = hash;
+        v[2] = hash;
+        v[3] = hash;
+
+        do
+        {
+            v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2];
+            v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3];
+            v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0];
+            v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1];
+        }
+        while (ptr <= (end - 32));
+
+        v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1;
+        v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0;
+        v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1;
+        v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0;
+        hash += v[0] ^ v[1];
+    }
+
+    if ((end - ptr) >= 16)
+    {
+        uint64_t v0 = hash + (read_u64(ptr) * k0); ptr += 8; v0 = rotate_right(v0,33) * k1;
+        uint64_t v1 = hash + (read_u64(ptr) * k1); ptr += 8; v1 = rotate_right(v1,33) * k2;
+        v0 ^= rotate_right(v0 * k0, 35) + v1;
+        v1 ^= rotate_right(v1 * k3, 35) + v0;
+        hash += v1;
+    }
+
+    if ((end - ptr) >= 8)
+    {
+        hash += read_u64(ptr) * k3; ptr += 8;
+        hash ^= rotate_right(hash, 33) * k1;
+
+    }
+
+    if ((end - ptr) >= 4)
+    {
+        hash += read_u32(ptr) * k3; ptr += 4;
+        hash ^= rotate_right(hash, 15) * k1;
+    }
+
+    if ((end - ptr) >= 2)
+    {
+        hash += read_u16(ptr) * k3; ptr += 2;
+        hash ^= rotate_right(hash, 13) * k1;
+    }
+
+    if ((end - ptr) >= 1)
+    {
+        hash += read_u8 (ptr) * k3;
+        hash ^= rotate_right(hash, 25) * k1;
+    }
+
+    hash ^= rotate_right(hash, 33);
+    hash *= k0;
+    hash ^= rotate_right(hash, 33);
+
+    return hash;
+}
+
+#endif /* CONTRIB_METROHASH_METRO_H_ */
diff --git a/src/libcryptobox/cryptobox.c b/src/libcryptobox/cryptobox.c
index c6eb3310a..71180da12 100644
--- a/src/libcryptobox/cryptobox.c
+++ b/src/libcryptobox/cryptobox.c
@@ -34,7 +34,7 @@
 #include "xxhash.h"
 #define MUM_TARGET_INDEPENDENT_HASH 1 /* For 32/64 bit equal hashes */
 #include "../../contrib/mumhash/mum.h"
-
+#include "../../contrib/metrohash/metro.h"
 #ifdef HAVE_CPUID_H
 #include <cpuid.h>
 #endif
@@ -1455,25 +1455,39 @@ rspamd_cryptobox_fast_hash_final (rspamd_cryptobox_fast_hash_state_t *st)
 /**
  * One in all function
  */
-guint64
-rspamd_cryptobox_fast_hash (const void *data,
+static inline guint64
+rspamd_cryptobox_fast_hash_machdep (const void *data,
 		gsize len, guint64 seed)
 {
 	if (len >= 8 && len % 8 == 0) {
-		return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_MUMHASH,
-					data, len, seed);
+		return mum_hash (data, len, seed);
 	}
 	else {
 #if defined(__LP64__) || defined(_LP64)
-		return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
-				data, len, seed);
+		return metrohash64_1 (data, len, seed);
 #endif
 	}
 
-	return rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH32,
-			data, len, seed);
+	return XXH32 (data, len, seed);
 }
 
+static inline guint64
+rspamd_cryptobox_fast_hash_indep (const void *data,
+		gsize len, guint64 seed)
+{
+	if (len >= 8 && len % 8 == 0) {
+		return mum_hash (data, len, seed);
+	}
+
+	return metrohash64_1 (data, len, seed);
+}
+
+guint64
+rspamd_cryptobox_fast_hash (const void *data,
+		gsize len, guint64 seed)
+{
+	return rspamd_cryptobox_fast_hash_machdep (data, len, seed);
+}
 
 guint64
 rspamd_cryptobox_fast_hash_specific (
@@ -1488,8 +1502,10 @@ rspamd_cryptobox_fast_hash_specific (
 		return XXH64 (data, len, seed);
 	case RSPAMD_CRYPTOBOX_MUMHASH:
 		return mum_hash (data, len, seed);
+	case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT:
+		return rspamd_cryptobox_fast_hash_indep (data, len, seed);
 	case RSPAMD_CRYPTOBOX_HASHFAST:
 	default:
-		return rspamd_cryptobox_fast_hash (data, len, seed);
+		return rspamd_cryptobox_fast_hash_machdep (data, len, seed);
 	}
 }
diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h
index c44044f48..6facf0a0e 100644
--- a/src/libcryptobox/cryptobox.h
+++ b/src/libcryptobox/cryptobox.h
@@ -358,7 +358,9 @@ enum rspamd_cryptobox_fast_hash_type {
 	RSPAMD_CRYPTOBOX_XXHASH64 = 0,
 	RSPAMD_CRYPTOBOX_XXHASH32,
 	RSPAMD_CRYPTOBOX_MUMHASH,
-	RSPAMD_CRYPTOBOX_HASHFAST
+	RSPAMD_CRYPTOBOX_METROHASH,
+	RSPAMD_CRYPTOBOX_HASHFAST,
+	RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT
 };
 /**
  * Platform independent version
-- 
cgit v1.2.3