@@ -1242,6 +1242,7 @@ ADD_SUBDIRECTORY(contrib/librdns) | |||
ADD_SUBDIRECTORY(contrib/aho-corasick) | |||
ADD_SUBDIRECTORY(contrib/lpeg) | |||
ADD_SUBDIRECTORY(contrib/linenoise) | |||
ADD_SUBDIRECTORY(contrib/t1ha) | |||
IF (ENABLE_SNOWBALL MATCHES "ON") | |||
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES stemmer) | |||
@@ -1252,6 +1253,7 @@ ENDIF() | |||
IF(ENABLE_HYPERSCAN MATCHES "OFF") | |||
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES rspamd-actrie) | |||
ENDIF() | |||
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES rspamd-t1ha) | |||
IF(ENABLE_CLANG_PLUGIN MATCHES "ON") | |||
ADD_SUBDIRECTORY(clang-plugin) |
@@ -0,0 +1,14 @@ | |||
SET(T1HASRC t1ha0.c | |||
t1ha0_ia32aes_noavx.c | |||
t1ha1.c | |||
t1ha2.c) | |||
ADD_LIBRARY(rspamd-t1ha STATIC ${T1HASRC}) | |||
SET_TARGET_PROPERTIES(rspamd-t1ha PROPERTIES VERSION ${RSPAMD_VERSION}) | |||
ADD_DEFINITIONS("-DT1HA_USE_FAST_ONESHOT_READ=1") | |||
IF(ENABLE_FULL_DEBUG MATCHES "OFF") | |||
if ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") | |||
SET_TARGET_PROPERTIES(rspamd-t1ha PROPERTIES COMPILE_FLAGS "-O3") | |||
endif () | |||
ENDIF() |
@@ -0,0 +1,21 @@ | |||
Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com, | |||
Fast Positive Hash. | |||
Portions Copyright (c) 2010-2013 Leonid Yuriev <leo@yuriev.ru>, | |||
The 1Hippeus project (t1h). | |||
This software is provided 'as-is', without any express or implied | |||
warranty. In no event will the authors be held liable for any damages | |||
arising from the use of this software. | |||
Permission is granted to anyone to use this software for any purpose, | |||
including commercial applications, and to alter it and redistribute it | |||
freely, subject to the following restrictions: | |||
1. The origin of this software must not be misrepresented; you must not | |||
claim that you wrote the original software. If you use this software | |||
in a product, an acknowledgement in the product documentation would be | |||
appreciated but is not required. | |||
2. Altered source versions must be plainly marked as such, and must not be | |||
misrepresented as being the original software. | |||
3. This notice may not be removed or altered from any source distribution. |
@@ -1,8 +1,8 @@ | |||
/* | |||
* Copyright (c) 2016 Positive Technologies, https://www.ptsecurity.com, | |||
* Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com, | |||
* Fast Positive Hash. | |||
* | |||
* Portions Copyright (c) 2010-2016 Leonid Yuriev <leo@yuriev.ru>, | |||
* Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>, | |||
* The 1Hippeus project (t1h). | |||
* | |||
* This software is provided 'as-is', without any express or implied | |||
@@ -23,534 +23,400 @@ | |||
*/ | |||
/* | |||
* t1ha = { Fast Positive Hash} | |||
* t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } | |||
* by [Positive Technologies](https://www.ptsecurity.ru) | |||
* | |||
* Briefly, it is a 64-bit Hash Function: | |||
* 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, | |||
* but without penalties could runs on any 64-bit CPU. | |||
* but portable and without penalties it can run on any 64-bit CPU. | |||
* 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash | |||
* and all others which are not use specific hardware tricks. | |||
* and all others portable hash-functions (which do not use specific | |||
* hardware tricks). | |||
* 3. Not suitable for cryptography. | |||
* | |||
* The Future will Positive. Всё будет хорошо. | |||
* | |||
* ACKNOWLEDGEMENT: | |||
* The t1ha was originally developed by Leonid Yuriev | |||
* The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) | |||
* for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! | |||
*/ | |||
#ifndef T1HA_INCLUDED | |||
#define T1HA_INCLUDED | |||
#include "config.h" | |||
#include <string.h> | |||
#include <stddef.h> | |||
#pragma once | |||
#ifndef __has_attribute | |||
#define __has_attribute(x) (0) | |||
#endif | |||
#ifndef __has_builtin | |||
#define __has_builtin(x) (0) | |||
#ifndef __has_include | |||
#define __has_include(x) (0) | |||
#endif | |||
#ifdef BYTE_ORDER | |||
#ifndef __ORDER_LITTLE_ENDIAN__ | |||
#define __ORDER_LITTLE_ENDIAN__ LITTLE_ENDIAN | |||
#ifndef __GNUC_PREREQ | |||
#if defined(__GNUC__) && defined(__GNUC_MINOR__) | |||
#define __GNUC_PREREQ(maj, min) \ | |||
((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) | |||
#else | |||
#define __GNUC_PREREQ(maj, min) 0 | |||
#endif | |||
#ifndef __ORDER_BIG_ENDIAN__ | |||
#define __ORDER_BIG_ENDIAN__ BIG_ENDIAN | |||
#endif /* __GNUC_PREREQ */ | |||
#ifndef __CLANG_PREREQ | |||
#ifdef __clang__ | |||
#define __CLANG_PREREQ(maj, min) \ | |||
((__clang_major__ << 16) + __clang_minor__ >= ((maj) << 16) + (min)) | |||
#else | |||
#define __CLANG_PREREQ(maj, min) (0) | |||
#endif | |||
#ifndef __BYTE_ORDER__ | |||
#define __BYTE_ORDER__ BYTE_ORDER | |||
#endif /* __CLANG_PREREQ */ | |||
/*****************************************************************************/ | |||
#ifdef _MSC_VER | |||
/* Avoid '16' bytes padding added after data member 't1ha_context::total' | |||
* and other warnings from std-headers if warning-level > 3. */ | |||
#pragma warning(push, 3) | |||
#endif | |||
#if defined(__cplusplus) && __cplusplus >= 201103L | |||
#include <climits> | |||
#include <cstddef> | |||
#include <cstdint> | |||
#else | |||
#include <limits.h> | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#endif | |||
/*****************************************************************************/ | |||
#if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || \ | |||
defined(i486) || defined(__i486) || defined(__i486__) || \ | |||
defined(i586) | defined(__i586) || defined(__i586__) || defined(i686) || \ | |||
defined(__i686) || defined(__i686__) || defined(_M_IX86) || \ | |||
defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) || \ | |||
defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) || \ | |||
defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \ | |||
defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__) | |||
#ifndef __ia32__ | |||
/* LY: define neutral __ia32__ for x86 and x86-64 archs */ | |||
#define __ia32__ 1 | |||
#endif /* __ia32__ */ | |||
#if !defined(__amd64__) && (defined(__x86_64) || defined(__x86_64__) || \ | |||
defined(__amd64) || defined(_M_X64)) | |||
/* LY: define trusty __amd64__ for all AMD64/x86-64 arch */ | |||
#define __amd64__ 1 | |||
#endif /* __amd64__ */ | |||
#endif /* all x86 */ | |||
#if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || \ | |||
!defined(__ORDER_BIG_ENDIAN__) | |||
!defined(__ORDER_BIG_ENDIAN__) | |||
/* *INDENT-OFF* */ | |||
/* clang-format off */ | |||
#if defined(__GLIBC__) || defined(__GNU_LIBRARY__) || defined(__ANDROID__) || \ | |||
defined(HAVE_ENDIAN_H) || __has_include(<endian.h>) | |||
#include <endian.h> | |||
#elif defined(__APPLE__) || defined(__MACH__) || defined(__OpenBSD__) || \ | |||
defined(HAVE_MACHINE_ENDIAN_H) || __has_include(<machine/endian.h>) | |||
#include <machine/endian.h> | |||
#elif defined(HAVE_SYS_ISA_DEFS_H) || __has_include(<sys/isa_defs.h>) | |||
#include <sys/isa_defs.h> | |||
#elif (defined(HAVE_SYS_TYPES_H) && defined(HAVE_SYS_ENDIAN_H)) || \ | |||
(__has_include(<sys/types.h>) && __has_include(<sys/endian.h>)) | |||
#include <sys/endian.h> | |||
#include <sys/types.h> | |||
#elif defined(__bsdi__) || defined(__DragonFly__) || defined(__FreeBSD__) || \ | |||
defined(__NETBSD__) || defined(__NetBSD__) || \ | |||
defined(HAVE_SYS_PARAM_H) || __has_include(<sys/param.h>) | |||
#include <sys/param.h> | |||
#endif /* OS */ | |||
/* *INDENT-ON* */ | |||
/* clang-format on */ | |||
#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN) | |||
#define __ORDER_LITTLE_ENDIAN__ __LITTLE_ENDIAN | |||
#define __ORDER_BIG_ENDIAN__ __BIG_ENDIAN | |||
#define __BYTE_ORDER__ __BYTE_ORDER | |||
#elif defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN) | |||
#define __ORDER_LITTLE_ENDIAN__ _LITTLE_ENDIAN | |||
#define __ORDER_BIG_ENDIAN__ _BIG_ENDIAN | |||
#define __BYTE_ORDER__ _BYTE_ORDER | |||
#else | |||
#define __ORDER_LITTLE_ENDIAN__ 1234 | |||
#define __ORDER_BIG_ENDIAN__ 4321 | |||
#if defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || \ | |||
defined(__THUMBEL__) || defined(__AARCH64EL__) || defined(__MIPSEL__) || \ | |||
defined(_MIPSEL) || defined(__MIPSEL) || defined(__i386) || \ | |||
defined(__x86_64) || defined(_M_IX86) || defined(_M_X64) || \ | |||
defined(i386) || defined(_X86_) || defined(__i386__) || defined(_X86_64_) | |||
#if defined(__LITTLE_ENDIAN__) || \ | |||
(defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) || \ | |||
defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) || \ | |||
defined(__MIPSEL__) || defined(_MIPSEL) || defined(__MIPSEL) || \ | |||
defined(_M_ARM) || defined(_M_ARM64) || defined(__e2k__) || \ | |||
defined(__elbrus_4c__) || defined(__elbrus_8c__) || defined(__bfin__) || \ | |||
defined(__BFIN__) || defined(__ia64__) || defined(_IA64) || \ | |||
defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || \ | |||
defined(__itanium__) || defined(__ia32__) || defined(__CYGWIN__) || \ | |||
defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) || \ | |||
defined(__WINDOWS__) | |||
#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ | |||
#elif defined(__BIG_ENDIAN__) || defined(__ARMEB__) || defined(__THUMBEB__) || \ | |||
defined(__AARCH64EB__) || defined(__MIPSEB__) || defined(_MIPSEB) || \ | |||
defined(__MIPSEB) | |||
#elif defined(__BIG_ENDIAN__) || \ | |||
(defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) || \ | |||
defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || \ | |||
defined(__MIPSEB__) || defined(_MIPSEB) || defined(__MIPSEB) || \ | |||
defined(__m68k__) || defined(M68000) || defined(__hppa__) || \ | |||
defined(__hppa) || defined(__HPPA__) || defined(__sparc__) || \ | |||
defined(__sparc) || defined(__370__) || defined(__THW_370__) || \ | |||
defined(__s390__) || defined(__s390x__) || defined(__SYSC_ZARCH__) | |||
#define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__ | |||
#else | |||
#error __BYTE_ORDER__ should be defined. | |||
#endif /* Arch */ | |||
#endif | |||
#endif | |||
#endif | |||
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ && \ | |||
__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ | |||
#error Unsupported byte order. | |||
#endif | |||
#endif /* __BYTE_ORDER__ || __ORDER_LITTLE_ENDIAN__ || __ORDER_BIG_ENDIAN__ */ | |||
/*****************************************************************************/ | |||
#if !defined(UNALIGNED_OK) | |||
#if defined(__i386) || defined(__x86_64) || defined(_M_IX86) || \ | |||
defined(_M_X64) || defined(i386) || defined(_X86_) || defined(__i386__) || \ | |||
defined(_X86_64_) | |||
#define UNALIGNED_OK 1 | |||
#ifndef __dll_export | |||
#if defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) | |||
#if defined(__GNUC__) || __has_attribute(dllexport) | |||
#define __dll_export __attribute__((dllexport)) | |||
#elif defined(_MSC_VER) | |||
#define __dll_export __declspec(dllexport) | |||
#else | |||
#define UNALIGNED_OK 0 | |||
#define __dll_export | |||
#endif | |||
#elif defined(__GNUC__) || __has_attribute(visibility) | |||
#define __dll_export __attribute__((visibility("default"))) | |||
#else | |||
#define __dll_export | |||
#endif | |||
#endif /* __dll_export */ | |||
#ifndef __GNUC_PREREQ | |||
#if defined(__GNUC__) && defined(__GNUC_MINOR__) | |||
#define __GNUC_PREREQ(maj, min) \ | |||
((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) | |||
#ifndef __dll_import | |||
#if defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) | |||
#if defined(__GNUC__) || __has_attribute(dllimport) | |||
#define __dll_import __attribute__((dllimport)) | |||
#elif defined(_MSC_VER) | |||
#define __dll_import __declspec(dllimport) | |||
#else | |||
#define __GNUC_PREREQ(maj, min) 0 | |||
#define __dll_import | |||
#endif | |||
#else | |||
#define __dll_import | |||
#endif | |||
#endif /* __dll_import */ | |||
#if defined(t1ha_EXPORTS) | |||
#define T1HA_API __dll_export | |||
#elif defined(t1ha_IMPORTS) | |||
#define T1HA_API __dll_import | |||
#else | |||
#define T1HA_API | |||
#endif /* T1HA_API */ | |||
#if __GNUC_PREREQ(4, 4) || defined(__clang__) | |||
#if defined(__i386__) || defined(__x86_64__) | |||
#include <x86intrin.h> | |||
#endif | |||
#define likely(cond) __builtin_expect(!!(cond), 1) | |||
#define unlikely(cond) __builtin_expect(!!(cond), 0) | |||
# if __GNUC_PREREQ(4, 6) || defined(__clang__) | |||
#define unreachable() __builtin_unreachable() | |||
# else | |||
#define unreachable() \ | |||
do { \ | |||
for (;;) \ | |||
; \ | |||
} while (0) | |||
# endif | |||
#define bswap64(v) __builtin_bswap64(v) | |||
#define bswap32(v) __builtin_bswap32(v) | |||
#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) | |||
#define bswap16(v) __builtin_bswap16(v) | |||
#endif | |||
#if __GNUC_PREREQ(4, 3) || __has_attribute(unused) | |||
#define maybe_unused __attribute__((unused)) | |||
#endif | |||
#if defined(_MSC_VER) && defined(__ia32__) | |||
#define T1HA_ALIGN_PREFIX __declspec(align(32)) /* required only for SIMD */ | |||
#else | |||
#define T1HA_ALIGN_PREFIX | |||
#endif /* _MSC_VER */ | |||
#elif defined(_MSC_VER) | |||
#if defined(__GNUC__) && defined(__ia32__) | |||
#define T1HA_ALIGN_SUFFIX \ | |||
__attribute__((aligned(32))) /* required only for SIMD */ | |||
#else | |||
#define T1HA_ALIGN_SUFFIX | |||
#endif /* GCC x86 */ | |||
#include <intrin.h> | |||
#include <stdlib.h> | |||
#define likely(cond) (cond) | |||
#define unlikely(cond) (cond) | |||
#define unreachable() __assume(0) | |||
#define bswap64(v) _byteswap_uint64(v) | |||
#define bswap32(v) _byteswap_ulong(v) | |||
#define bswap16(v) _byteswap_ushort(v) | |||
#define rot64(v, s) _rotr64(v, s) | |||
#define rot32(v, s) _rotr(v, s) | |||
#if defined(_M_ARM64) || defined(_M_X64) | |||
#pragma intrinsic(_umul128) | |||
#define mul_64x64_128(a, b, ph) _umul128(a, b, ph) | |||
#pragma intrinsic(__umulh) | |||
#define mul_64x64_high(a, b) __umulh(a, b) | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
#if defined(_M_IX86) | |||
#pragma intrinsic(__emulu) | |||
#define mul_32x32_64(a, b) __emulu(a, b) | |||
#elif defined(_M_ARM) | |||
#define mul_32x32_64(a, b) _arm_umull(a, b) | |||
typedef union T1HA_ALIGN_PREFIX t1ha_state256 { | |||
uint8_t bytes[32]; | |||
uint32_t u32[8]; | |||
uint64_t u64[4]; | |||
struct { | |||
uint64_t a, b, c, d; | |||
} n; | |||
} t1ha_state256_t T1HA_ALIGN_SUFFIX; | |||
typedef struct t1ha_context { | |||
t1ha_state256_t state; | |||
t1ha_state256_t buffer; | |||
size_t partial; | |||
uint64_t total; | |||
} t1ha_context_t; | |||
#ifdef _MSC_VER | |||
#pragma warning(pop) | |||
#endif | |||
#else /* Compiler */ | |||
#define likely(cond) (cond) | |||
#define unlikely(cond) (cond) | |||
#define unreachable() \ | |||
do \ | |||
for (;;) \ | |||
; \ | |||
while (0) | |||
#endif /* Compiler */ | |||
#ifndef bswap64 | |||
static __inline uint64_t bswap64(uint64_t v) { | |||
return v << 56 | v >> 56 | ((v << 40) & 0x00ff000000000000ull) | | |||
((v << 24) & 0x0000ff0000000000ull) | | |||
((v << 8) & 0x000000ff00000000ull) | | |||
((v >> 8) & 0x00000000ff000000ull) | | |||
((v >> 24) & 0x0000000000ff0000ull) | | |||
((v >> 40) & 0x000000000000ff00ull); | |||
} | |||
#endif /* bswap64 */ | |||
/****************************************************************************** | |||
* | |||
* t1ha2 = 64 and 128-bit, SLIGHTLY MORE ATTENTION FOR QUALITY AND STRENGTH. | |||
* | |||
* - The recommended version of "Fast Positive Hash" with good quality | |||
* for checksum, hash tables and fingerprinting. | |||
* - Portable and extremely efficiency on modern 64-bit CPUs. | |||
* Designed for 64-bit little-endian platforms, | |||
* in other cases will runs slowly. | |||
* - Great quality of hashing and still faster than other non-t1ha hashes. | |||
* Provides streaming mode and 128-bit result. | |||
* | |||
* Note: Due performance reason 64- and 128-bit results are completely | |||
* different each other, i.e. 64-bit result is NOT any part of 128-bit. | |||
*/ | |||
#ifndef bswap32 | |||
static __inline uint32_t bswap32(uint32_t v) { | |||
return v << 24 | v >> 24 | ((v << 8) & 0x00ff0000) | ((v >> 8) & 0x0000ff00); | |||
} | |||
#endif /* bswap32 */ | |||
/* The at-once variant with 64-bit result */ | |||
T1HA_API uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed); | |||
/* The at-once variant with 128-bit result. | |||
* Argument `extra_result` is NOT optional and MUST be valid. | |||
* The high 64-bit part of 128-bit hash will be always unconditionally | |||
* stored to the address given by `extra_result` argument. */ | |||
T1HA_API uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result, | |||
const void *__restrict data, size_t length, | |||
uint64_t seed); | |||
/* The init/update/final trinity for streaming. | |||
* Return 64 or 128-bit result depentently from `extra_result` argument. */ | |||
T1HA_API void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y); | |||
T1HA_API void t1ha2_update(t1ha_context_t *__restrict ctx, | |||
const void *__restrict data, size_t length); | |||
/* Argument `extra_result` is optional and MAY be NULL. | |||
* - If `extra_result` is NOT NULL then the 128-bit hash will be calculated, | |||
* and high 64-bit part of it will be stored to the address given | |||
* by `extra_result` argument. | |||
* - Otherwise the 64-bit hash will be calculated | |||
* and returned from function directly. | |||
* | |||
* Note: Due performance reason 64- and 128-bit results are completely | |||
* different each other, i.e. 64-bit result is NOT any part of 128-bit. */ | |||
T1HA_API uint64_t t1ha2_final(t1ha_context_t *__restrict ctx, | |||
uint64_t *__restrict extra_result /* optional */); | |||
#ifndef bswap16 | |||
static __inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; } | |||
#endif /* bswap16 */ | |||
/****************************************************************************** | |||
* | |||
* t1ha1 = 64-bit, BASELINE FAST PORTABLE HASH: | |||
* | |||
* - Runs faster on 64-bit platforms in other cases may runs slowly. | |||
* - Portable and stable, returns same 64-bit result | |||
* on all architectures and CPUs. | |||
* - Unfortunately it fails the "strict avalanche criteria", | |||
* see test results at https://github.com/demerphq/smhasher. | |||
* | |||
* This flaw is insignificant for the t1ha1() purposes and imperceptible | |||
* from a practical point of view. | |||
* However, nowadays this issue has resolved in the next t1ha2(), | |||
* that was initially planned to providing a bit more quality. | |||
*/ | |||
#ifndef rot64 | |||
static __inline uint64_t rot64(uint64_t v, unsigned s) { | |||
return (v >> s) | (v << (64 - s)); | |||
} | |||
#endif /* rot64 */ | |||
/* The little-endian variant. */ | |||
T1HA_API uint64_t t1ha1_le(const void *data, size_t length, uint64_t seed); | |||
#ifndef rot32 | |||
static __inline uint32_t rot32(uint32_t v, unsigned s) { | |||
return (v >> s) | (v << (32 - s)); | |||
} | |||
#endif /* rot32 */ | |||
/* The big-endian variant. */ | |||
T1HA_API uint64_t t1ha1_be(const void *data, size_t length, uint64_t seed); | |||
#ifndef mul_32x32_64 | |||
static __inline uint64_t mul_32x32_64(uint32_t a, uint32_t b) { | |||
return a * (uint64_t)b; | |||
/* The historical nicname for generic little-endian variant. */ | |||
static __inline uint64_t t1ha(const void *data, size_t length, uint64_t seed) { | |||
return t1ha1_le(data, length, seed); | |||
} | |||
#endif /* mul_32x32_64 */ | |||
/***************************************************************************/ | |||
static __inline uint64_t fetch64(const void *v) { | |||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |||
return *(const uint64_t *)v; | |||
#else | |||
return bswap64(*(const uint64_t *)v); | |||
#endif | |||
} | |||
/****************************************************************************** | |||
* | |||
* t1ha0 = 64-bit, JUST ONLY FASTER: | |||
* | |||
* - Provides fast-as-possible hashing for current CPU, including | |||
* 32-bit systems and engaging the available hardware acceleration. | |||
* - It is a facade that selects most quick-and-dirty hash | |||
* for the current processor. For instance, on IA32 (x86) actual function | |||
* will be selected in runtime, depending on current CPU capabilities | |||
* | |||
* BE CAREFUL!!! THIS IS MEANS: | |||
* | |||
* 1. The quality of hash is a subject for tradeoffs with performance. | |||
* So, the quality and strength of t1ha0() may be lower than t1ha1(), | |||
* especially on 32-bit targets, but then much faster. | |||
* However, guaranteed that it passes all SMHasher tests. | |||
* | |||
* 2. No warranty that the hash result will be same for particular | |||
* key on another machine or another version of libt1ha. | |||
* | |||
* Briefly, such hash-results and their derivatives, should be | |||
* used only in runtime, but should not be persist or transferred | |||
* over a network. | |||
*/ | |||
static __inline uint64_t fetch32(const void *v) { | |||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |||
return *(const uint32_t *)v; | |||
/* The little-endian variant for 32-bit CPU. */ | |||
uint64_t t1ha0_32le(const void *data, size_t length, uint64_t seed); | |||
/* The big-endian variant for 32-bit CPU. */ | |||
uint64_t t1ha0_32be(const void *data, size_t length, uint64_t seed); | |||
/* Define T1HA0_AESNI_AVAILABLE to 0 for disable AES-NI support. */ | |||
#ifndef T1HA0_AESNI_AVAILABLE | |||
#if (defined(__ia32__) && (!defined(_M_IX86) || _MSC_VER > 1800)) | |||
#if defined(__GNUC__) && \ | |||
((defined(__clang__) && (__clang_major__ >= 4 || (__clang_major__ >= 3 && __clang_minor__ >= 8))) || \ | |||
((__GNUC__ == 4) && (__GNUC_MINOR__ >= 8) || (__GNUC__ > 4))) | |||
#define T1HA0_AESNI_AVAILABLE 1 | |||
#else | |||
#define T1HA0_AESNI_AVAILABLE 0 | |||
#endif | |||
#else | |||
return bswap32(*(const uint32_t *)v); | |||
#define T1HA0_AESNI_AVAILABLE 0 | |||
#endif | |||
} | |||
#endif /* T1HA0_AESNI_AVAILABLE */ | |||
static __inline uint64_t fetch16(const void *v) { | |||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |||
return *(const uint16_t *)v; | |||
/* Define T1HA0_RUNTIME_SELECT to 0 for disable dispatching t1ha0 at runtime. */ | |||
#ifndef T1HA0_RUNTIME_SELECT | |||
#if T1HA0_AESNI_AVAILABLE && !defined(__e2k__) | |||
#define T1HA0_RUNTIME_SELECT 1 | |||
#else | |||
return bswap16(*(const uint16_t *)v); | |||
#define T1HA0_RUNTIME_SELECT 0 | |||
#endif | |||
} | |||
#endif /* T1HA0_RUNTIME_SELECT */ | |||
static __inline uint64_t fetch_tail(const void *v, size_t tail) { | |||
const uint8_t *_ = (const uint8_t *)v; | |||
switch (tail & 7) { | |||
case 1: | |||
return _[0]; | |||
case 2: | |||
return fetch16(_); | |||
case 3: | |||
return fetch16(_) | (_[2] << 16); | |||
case 4: | |||
return fetch32(_); | |||
case 5: | |||
return fetch32(_) | ((uint64_t)_[4] << 32); | |||
case 6: | |||
return fetch32(_) | (fetch16(_ + 4) << 32); | |||
case 7: | |||
return fetch32(_) | (fetch16(_ + 4) << 32) | ((uint64_t)_[6] << 48); | |||
case 0: | |||
return fetch64(_); | |||
default: | |||
unreachable(); | |||
} | |||
} | |||
/* xor-mul-xor mixer */ | |||
static __inline uint64_t mix(uint64_t v, uint64_t p) { | |||
static const unsigned s0 = 41; | |||
v *= p; | |||
return v ^ rot64(v, s0); | |||
} | |||
static __inline unsigned add_with_carry(uint64_t *sum, uint64_t addend) { | |||
*sum += addend; | |||
return *sum < addend; | |||
} | |||
#if T1HA0_AESNI_AVAILABLE | |||
uint64_t t1ha0_ia32aes_noavx(const void *data, size_t length, uint64_t seed); | |||
#endif /* T1HA0_AESNI_AVAILABLE */ | |||
/* xor high and low parts of full 128-bit product */ | |||
static __inline uint64_t mux64(uint64_t v, uint64_t p) { | |||
#ifdef __SIZEOF_INT128__ | |||
__uint128_t r = (__uint128_t)v * (__uint128_t)p; | |||
/* modern GCC could nicely optimize this */ | |||
return r ^ (r >> 64); | |||
#elif defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128 | |||
__uint128 r = (__uint128)v * (__uint128)p; | |||
return r ^ (r >> 64); | |||
#elif defined(mul_64x64_128) | |||
uint64_t l, h; | |||
l = mul_64x64_128(v, p, &h); | |||
return l ^ h; | |||
#elif defined(mul_64x64_high) | |||
uint64_t l, h; | |||
l = v * p; | |||
h = mul_64x64_high(v, p); | |||
return l ^ h; | |||
#if T1HA0_RUNTIME_SELECT | |||
#ifdef __ELF__ | |||
/* ifunc/gnu_indirect_function will be used on ELF. | |||
* Please see https://en.wikipedia.org/wiki/Executable_and_Linkable_Format */ | |||
T1HA_API uint64_t t1ha0(const void *data, size_t length, uint64_t seed); | |||
#else | |||
/* performs 64x64 to 128 bit multiplication */ | |||
uint64_t ll = mul_32x32_64((uint32_t)v, (uint32_t)p); | |||
uint64_t lh = mul_32x32_64(v >> 32, (uint32_t)p); | |||
uint64_t hl = mul_32x32_64(p >> 32, (uint32_t)v); | |||
uint64_t hh = | |||
mul_32x32_64(v >> 32, p >> 32) + (lh >> 32) + (hl >> 32) + | |||
/* Few simplification are possible here for 32-bit architectures, | |||
* but thus we would lost compatibility with the original 64-bit | |||
* version. Think is very bad idea, because then 32-bit t1ha will | |||
* still (relatively) very slowly and well yet not compatible. */ | |||
add_with_carry(&ll, lh << 32) + add_with_carry(&ll, hl << 32); | |||
return hh ^ ll; | |||
#endif | |||
} | |||
static uint64_t | |||
t1ha(const void *data, size_t len, uint64_t seed) | |||
{ | |||
/* 'magic' primes */ | |||
static const uint64_t p0 = 17048867929148541611ull; | |||
static const uint64_t p1 = 9386433910765580089ull; | |||
static const uint64_t p2 = 15343884574428479051ull; | |||
static const uint64_t p3 = 13662985319504319857ull; | |||
static const uint64_t p4 = 11242949449147999147ull; | |||
static const uint64_t p5 = 13862205317416547141ull; | |||
static const uint64_t p6 = 14653293970879851569ull; | |||
/* rotations */ | |||
static const unsigned s0 = 41; | |||
static const unsigned s1 = 17; | |||
static const unsigned s2 = 31; | |||
uint64_t a = seed; | |||
uint64_t b = len; | |||
const int need_align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK; | |||
uint64_t align[4]; | |||
if (unlikely(len > 32)) { | |||
uint64_t c = rot64(len, s1) + seed; | |||
uint64_t d = len ^ rot64(seed, s1); | |||
const void *detent = (const uint8_t *)data + len - 31; | |||
do { | |||
const uint64_t *v = (const uint64_t *)data; | |||
if (unlikely(need_align)) | |||
v = (const uint64_t *)memcpy(&align, v, 32); | |||
uint64_t w0 = fetch64(v + 0); | |||
uint64_t w1 = fetch64(v + 1); | |||
uint64_t w2 = fetch64(v + 2); | |||
uint64_t w3 = fetch64(v + 3); | |||
uint64_t d02 = w0 ^ rot64(w2 + d, s1); | |||
uint64_t c13 = w1 ^ rot64(w3 + c, s1); | |||
c += a ^ rot64(w0, s0); | |||
d -= b ^ rot64(w1, s2); | |||
a ^= p1 * (d02 + w3); | |||
b ^= p0 * (c13 + w2); | |||
data = (const uint64_t *)data + 4; | |||
} while (likely(data < detent)); | |||
a ^= p6 * (rot64(c, s1) + d); | |||
b ^= p5 * (c + rot64(d, s1)); | |||
len &= 31; | |||
} | |||
const uint64_t *v = (const uint64_t *)data; | |||
if (unlikely(need_align) && len > 1) | |||
v = (const uint64_t *)memcpy(&align, v, len); | |||
switch (len) { | |||
default: | |||
b += mux64(fetch64(v++), p4); | |||
case 24: | |||
case 23: | |||
case 22: | |||
case 21: | |||
case 20: | |||
case 19: | |||
case 18: | |||
case 17: | |||
a += mux64(fetch64(v++), p3); | |||
case 16: | |||
case 15: | |||
case 14: | |||
case 13: | |||
case 12: | |||
case 11: | |||
case 10: | |||
case 9: | |||
b += mux64(fetch64(v++), p2); | |||
case 8: | |||
case 7: | |||
case 6: | |||
case 5: | |||
case 4: | |||
case 3: | |||
case 2: | |||
case 1: | |||
a += mux64(fetch_tail(v, len), p1); | |||
case 0: | |||
return mux64(rot64(a + b, s1), p4) + mix(a ^ b, p0); | |||
} | |||
/* Otherwise function pointer will be used. | |||
* Unfortunately this may cause some overhead calling. */ | |||
T1HA_API extern uint64_t (*t1ha0_funcptr)(const void *data, size_t length, | |||
uint64_t seed); | |||
static __inline uint64_t t1ha0(const void *data, size_t length, uint64_t seed) { | |||
return t1ha0_funcptr(data, length, seed); | |||
} | |||
#endif /* __ELF__ */ | |||
static __inline uint32_t tail32_le(const void *v, size_t tail) { | |||
const uint8_t *p = (const uint8_t *)v; | |||
uint32_t r = 0; | |||
switch (tail & 3) { | |||
#if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |||
/* For most CPUs this code is better when not needed | |||
* copying for alignment or byte reordering. */ | |||
case 0: | |||
return fetch32(p); | |||
case 3: | |||
r = (uint32_t)p[2] << 16; | |||
case 2: | |||
return r + fetch16(p); | |||
case 1: | |||
return p[0]; | |||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |||
static __inline uint64_t t1ha0(const void *data, size_t length, uint64_t seed) { | |||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul | |||
return t1ha1_be(data, length, seed); | |||
#else | |||
/* For most CPUs this code is better than a | |||
* copying for alignment and/or byte reordering. */ | |||
case 0: | |||
r += p[3]; | |||
r <<= 8; | |||
case 3: | |||
r += p[2]; | |||
r <<= 8; | |||
case 2: | |||
r += p[1]; | |||
r <<= 8; | |||
case 1: | |||
return r + p[0]; | |||
return t1ha0_32be(data, length, seed); | |||
#endif | |||
} | |||
unreachable(); | |||
} | |||
static __inline uint32_t tail32_be(const void *v, size_t tail) { | |||
const uint8_t *p = (const uint8_t *)v; | |||
switch (tail & 3) { | |||
#if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |||
/* For most CPUs this code is better when not needed | |||
* copying for alignment or byte reordering. */ | |||
case 1: | |||
return p[0]; | |||
case 2: | |||
return fetch16_be(p); | |||
case 3: | |||
return fetch16_be(p) << 8 | p[2]; | |||
case 0: | |||
return fetch32_be(p); | |||
#else | |||
/* For most CPUs this code is better than a | |||
* copying for alignment and/or byte reordering. */ | |||
case 1: | |||
return p[0]; | |||
case 2: | |||
return p[1] | (uint32_t)p[0] << 8; | |||
case 3: | |||
return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; | |||
case 0: | |||
return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | | |||
(uint32_t)p[0] << 24; | |||
static __inline uint64_t t1ha0(const void *data, size_t length, uint64_t seed) { | |||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul | |||
return t1ha1_le(data, length, seed); | |||
#else | |||
return t1ha0_32le(data, length, seed); | |||
#endif | |||
} | |||
unreachable(); | |||
} | |||
#endif /* !T1HA0_RUNTIME_SELECT */ | |||
static __inline uint64_t remix32(uint32_t a, uint32_t b) { | |||
static const uint64_t p0 = 17048867929148541611ull; | |||
a ^= rot32(b, 13); | |||
uint64_t l = a | (uint64_t)b << 32; | |||
l *= p0; | |||
l ^= l >> 41; | |||
return l; | |||
#ifdef __cplusplus | |||
} | |||
static __inline void mixup32(uint32_t *a, uint32_t *b, uint32_t v, uint32_t p) { | |||
uint64_t l = mul_32x32_64(*b + v, p); | |||
*a ^= (uint32_t)l; | |||
*b += (uint32_t)(l >> 32); | |||
} | |||
static uint64_t t1ha32(const void *data, size_t len, uint64_t seed) { | |||
/* 32-bit 'magic' primes */ | |||
static const uint32_t q0 = 0x92D78269; | |||
static const uint32_t q1 = 0xCA9B4735; | |||
static const uint32_t q2 = 0xA4ABA1C3; | |||
static const uint32_t q3 = 0xF6499843; | |||
static const uint32_t q4 = 0x86F0FD61; | |||
static const uint32_t q5 = 0xCA2DA6FB; | |||
static const uint32_t q6 = 0xC4BB3575; | |||
/* rotations */ | |||
static const unsigned s1 = 17; | |||
uint32_t a = rot32((uint32_t)len, s1) + (uint32_t)seed; | |||
uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32); | |||
const int need_align = (((uintptr_t)data) & 3) != 0 && !UNALIGNED_OK; | |||
uint32_t align[4]; | |||
if (unlikely(len > 16)) { | |||
uint32_t c = ~a; | |||
uint32_t d = rot32(b, 5); | |||
const void *detent = (const uint8_t *)data + len - 15; | |||
do { | |||
const uint32_t *v = (const uint32_t *)data; | |||
if (unlikely(need_align)) | |||
v = (const uint32_t *)memcpy(&align, v, 16); | |||
uint32_t w0 = fetch32(v + 0); | |||
uint32_t w1 = fetch32(v + 1); | |||
uint32_t w2 = fetch32(v + 2); | |||
uint32_t w3 = fetch32(v + 3); | |||
uint32_t c02 = w0 ^ rot32(w2 + c, 11); | |||
uint32_t d13 = w1 + rot32(w3 + d, s1); | |||
c ^= rot32(b + w1, 7); | |||
d ^= rot32(a + w0, 3); | |||
b = q1 * (c02 + w3); | |||
a = q0 * (d13 ^ w2); | |||
data = (const uint32_t *)data + 4; | |||
} while (likely(data < detent)); | |||
c += a; | |||
d += b; | |||
a ^= q6 * (rot32(c, 16) + d); | |||
b ^= q5 * (c + rot32(d, 16)); | |||
len &= 15; | |||
} | |||
const uint8_t *v = (const uint8_t *)data; | |||
if (unlikely(need_align) && len > 4) | |||
v = (const uint8_t *)memcpy(&align, v, len); | |||
switch (len) { | |||
default: | |||
mixup32(&a, &b, fetch32(v), q4); | |||
v += 4; | |||
case 12: | |||
case 11: | |||
case 10: | |||
case 9: | |||
mixup32(&b, &a, fetch32(v), q3); | |||
v += 4; | |||
case 8: | |||
case 7: | |||
case 6: | |||
case 5: | |||
mixup32(&a, &b, fetch32(v), q2); | |||
v += 4; | |||
case 4: | |||
case 3: | |||
case 2: | |||
case 1: | |||
mixup32(&b, &a, tail32_le(v, len), q1); | |||
case 0: | |||
return remix32(a, b); | |||
} | |||
} | |||
#endif |
@@ -0,0 +1,411 @@ | |||
/* | |||
* Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com, | |||
* Fast Positive Hash. | |||
* | |||
* Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>, | |||
* The 1Hippeus project (t1h). | |||
* | |||
* This software is provided 'as-is', without any express or implied | |||
* warranty. In no event will the authors be held liable for any damages | |||
* arising from the use of this software. | |||
* | |||
* Permission is granted to anyone to use this software for any purpose, | |||
* including commercial applications, and to alter it and redistribute it | |||
* freely, subject to the following restrictions: | |||
* | |||
* 1. The origin of this software must not be misrepresented; you must not | |||
* claim that you wrote the original software. If you use this software | |||
* in a product, an acknowledgement in the product documentation would be | |||
* appreciated but is not required. | |||
* 2. Altered source versions must be plainly marked as such, and must not be | |||
* misrepresented as being the original software. | |||
* 3. This notice may not be removed or altered from any source distribution. | |||
*/ | |||
/* | |||
* t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } | |||
* by [Positive Technologies](https://www.ptsecurity.ru) | |||
* | |||
* Briefly, it is a 64-bit Hash Function: | |||
* 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, | |||
* but portable and without penalties it can run on any 64-bit CPU. | |||
* 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash | |||
* and all others portable hash-functions (which do not use specific | |||
* hardware tricks). | |||
* 3. Not suitable for cryptography. | |||
* | |||
* The Future will Positive. Всё будет хорошо. | |||
* | |||
* ACKNOWLEDGEMENT: | |||
* The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) | |||
* for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! | |||
*/ | |||
#include "config.h" | |||
#include "t1ha_bits.h" | |||
static __always_inline uint32_t tail32_le(const void *v, size_t tail) { | |||
const uint8_t *p = (const uint8_t *)v; | |||
#ifdef can_read_underside | |||
/* On some systems (e.g. x86) we can perform a 'oneshot' read, which | |||
* is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> | |||
* for the reminder. */ | |||
const unsigned offset = (4 - tail) & 3; | |||
const unsigned shift = offset << 3; | |||
if (likely(can_read_underside(p, 4))) { | |||
p -= offset; | |||
return fetch32_le(p) >> shift; | |||
} | |||
return fetch32_le(p) & ((~UINT32_C(0)) >> shift); | |||
#endif /* 'oneshot' read */ | |||
uint32_t r = 0; | |||
switch (tail & 3) { | |||
#if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |||
/* For most CPUs this code is better when not needed | |||
* copying for alignment or byte reordering. */ | |||
case 0: | |||
return fetch32_le(p); | |||
case 3: | |||
r = (uint32_t)p[2] << 16; | |||
/* fall through */ | |||
case 2: | |||
return r + fetch16_le(p); | |||
case 1: | |||
return p[0]; | |||
#else | |||
/* For most CPUs this code is better than a | |||
* copying for alignment and/or byte reordering. */ | |||
case 0: | |||
r += p[3]; | |||
r <<= 8; | |||
/* fall through */ | |||
case 3: | |||
r += p[2]; | |||
r <<= 8; | |||
/* fall through */ | |||
case 2: | |||
r += p[1]; | |||
r <<= 8; | |||
/* fall through */ | |||
case 1: | |||
return r + p[0]; | |||
#endif | |||
} | |||
unreachable(); | |||
} | |||
static __always_inline uint32_t tail32_be(const void *v, size_t tail) { | |||
const uint8_t *p = (const uint8_t *)v; | |||
#ifdef can_read_underside | |||
/* On some systems we can perform a 'oneshot' read, which is little bit | |||
* faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> for the | |||
* reminder. */ | |||
const unsigned offset = (4 - tail) & 3; | |||
const unsigned shift = offset << 3; | |||
if (likely(can_read_underside(p, 4))) { | |||
p -= offset; | |||
return fetch32_be(p) & ((~UINT32_C(0)) >> shift); | |||
} | |||
return fetch32_be(p) >> shift; | |||
#endif /* 'oneshot' read */ | |||
switch (tail & 3) { | |||
#if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |||
/* For most CPUs this code is better when not needed | |||
* copying for alignment or byte reordering. */ | |||
case 1: | |||
return p[0]; | |||
case 2: | |||
return fetch16_be(p); | |||
case 3: | |||
return fetch16_be(p) << 8 | p[2]; | |||
case 0: | |||
return fetch32_be(p); | |||
#else | |||
/* For most CPUs this code is better than a | |||
* copying for alignment and/or byte reordering. */ | |||
case 1: | |||
return p[0]; | |||
case 2: | |||
return p[1] | (uint32_t)p[0] << 8; | |||
case 3: | |||
return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; | |||
case 0: | |||
return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | | |||
(uint32_t)p[0] << 24; | |||
#endif | |||
} | |||
unreachable(); | |||
} | |||
/***************************************************************************/ | |||
#ifndef rot32 | |||
static __maybe_unused __always_inline uint32_t rot32(uint32_t v, unsigned s) { | |||
return (v >> s) | (v << (32 - s)); | |||
} | |||
#endif /* rot32 */ | |||
static __always_inline void mixup32(uint32_t *a, uint32_t *b, uint32_t v, | |||
uint32_t prime) { | |||
uint64_t l = mul_32x32_64(*b + v, prime); | |||
*a ^= (uint32_t)l; | |||
*b += (uint32_t)(l >> 32); | |||
} | |||
static __always_inline uint64_t final32(uint32_t a, uint32_t b) { | |||
uint64_t l = (b ^ rot32(a, 13)) | (uint64_t)a << 32; | |||
l *= prime_0; | |||
l ^= l >> 41; | |||
l *= prime_4; | |||
l ^= l >> 47; | |||
l *= prime_6; | |||
return l; | |||
} | |||
/* 32-bit 'magic' primes */ | |||
static const uint32_t prime32_0 = UINT32_C(0x92D78269); | |||
static const uint32_t prime32_1 = UINT32_C(0xCA9B4735); | |||
static const uint32_t prime32_2 = UINT32_C(0xA4ABA1C3); | |||
static const uint32_t prime32_3 = UINT32_C(0xF6499843); | |||
static const uint32_t prime32_4 = UINT32_C(0x86F0FD61); | |||
static const uint32_t prime32_5 = UINT32_C(0xCA2DA6FB); | |||
static const uint32_t prime32_6 = UINT32_C(0xC4BB3575); | |||
uint64_t t1ha0_32le(const void *data, size_t len, uint64_t seed) { | |||
uint32_t a = rot32((uint32_t)len, 17) + (uint32_t)seed; | |||
uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32); | |||
const int need_align = (((uintptr_t)data) & 3) != 0 && !UNALIGNED_OK; | |||
uint32_t align[4]; | |||
if (unlikely(len > 16)) { | |||
uint32_t c = ~a; | |||
uint32_t d = rot32(b, 5); | |||
const void *detent = (const uint8_t *)data + len - 15; | |||
do { | |||
const uint32_t *v = (const uint32_t *)data; | |||
if (unlikely(need_align)) | |||
v = (const uint32_t *)memcpy(&align, unaligned(v), 16); | |||
uint32_t w0 = fetch32_le(v + 0); | |||
uint32_t w1 = fetch32_le(v + 1); | |||
uint32_t w2 = fetch32_le(v + 2); | |||
uint32_t w3 = fetch32_le(v + 3); | |||
uint32_t c02 = w0 ^ rot32(w2 + c, 11); | |||
uint32_t d13 = w1 + rot32(w3 + d, 17); | |||
c ^= rot32(b + w1, 7); | |||
d ^= rot32(a + w0, 3); | |||
b = prime32_1 * (c02 + w3); | |||
a = prime32_0 * (d13 ^ w2); | |||
data = (const uint32_t *)data + 4; | |||
} while (likely(data < detent)); | |||
c += a; | |||
d += b; | |||
a ^= prime32_6 * (rot32(c, 16) + d); | |||
b ^= prime32_5 * (c + rot32(d, 16)); | |||
len &= 15; | |||
} | |||
const uint8_t *v = (const uint8_t *)data; | |||
if (unlikely(need_align) && len > 4) | |||
v = (const uint8_t *)memcpy(&align, unaligned(v), len); | |||
switch (len) { | |||
default: | |||
mixup32(&a, &b, fetch32_le(v), prime32_4); | |||
v += 4; | |||
/* fall through */ | |||
case 12: | |||
case 11: | |||
case 10: | |||
case 9: | |||
mixup32(&b, &a, fetch32_le(v), prime32_3); | |||
v += 4; | |||
/* fall through */ | |||
case 8: | |||
case 7: | |||
case 6: | |||
case 5: | |||
mixup32(&a, &b, fetch32_le(v), prime32_2); | |||
v += 4; | |||
/* fall through */ | |||
case 4: | |||
case 3: | |||
case 2: | |||
case 1: | |||
mixup32(&b, &a, tail32_le(v, len), prime32_1); | |||
/* fall through */ | |||
case 0: | |||
return final32(a, b); | |||
} | |||
} | |||
uint64_t t1ha0_32be(const void *data, size_t len, uint64_t seed) { | |||
uint32_t a = rot32((uint32_t)len, 17) + (uint32_t)seed; | |||
uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32); | |||
const int need_align = (((uintptr_t)data) & 3) != 0 && !UNALIGNED_OK; | |||
uint32_t align[4]; | |||
if (unlikely(len > 16)) { | |||
uint32_t c = ~a; | |||
uint32_t d = rot32(b, 5); | |||
const void *detent = (const uint8_t *)data + len - 15; | |||
do { | |||
const uint32_t *v = (const uint32_t *)data; | |||
if (unlikely(need_align)) | |||
v = (const uint32_t *)memcpy(&align, unaligned(v), 16); | |||
uint32_t w0 = fetch32_be(v + 0); | |||
uint32_t w1 = fetch32_be(v + 1); | |||
uint32_t w2 = fetch32_be(v + 2); | |||
uint32_t w3 = fetch32_be(v + 3); | |||
uint32_t c02 = w0 ^ rot32(w2 + c, 11); | |||
uint32_t d13 = w1 + rot32(w3 + d, 17); | |||
c ^= rot32(b + w1, 7); | |||
d ^= rot32(a + w0, 3); | |||
b = prime32_1 * (c02 + w3); | |||
a = prime32_0 * (d13 ^ w2); | |||
data = (const uint32_t *)data + 4; | |||
} while (likely(data < detent)); | |||
c += a; | |||
d += b; | |||
a ^= prime32_6 * (rot32(c, 16) + d); | |||
b ^= prime32_5 * (c + rot32(d, 16)); | |||
len &= 15; | |||
} | |||
const uint8_t *v = (const uint8_t *)data; | |||
if (unlikely(need_align) && len > 4) | |||
v = (const uint8_t *)memcpy(&align, unaligned(v), len); | |||
switch (len) { | |||
default: | |||
mixup32(&a, &b, fetch32_be(v), prime32_4); | |||
v += 4; | |||
/* fall through */ | |||
case 12: | |||
case 11: | |||
case 10: | |||
case 9: | |||
mixup32(&b, &a, fetch32_be(v), prime32_3); | |||
v += 4; | |||
/* fall through */ | |||
case 8: | |||
case 7: | |||
case 6: | |||
case 5: | |||
mixup32(&a, &b, fetch32_be(v), prime32_2); | |||
v += 4; | |||
/* fall through */ | |||
case 4: | |||
case 3: | |||
case 2: | |||
case 1: | |||
mixup32(&b, &a, tail32_be(v, len), prime32_1); | |||
/* fall through */ | |||
case 0: | |||
return final32(a, b); | |||
} | |||
} | |||
/***************************************************************************/ | |||
#if T1HA0_RUNTIME_SELECT | |||
#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) | |||
static uint64_t x86_cpu_features(void) { | |||
uint32_t features = 0; | |||
uint32_t extended = 0; | |||
#ifdef __GNUC__ | |||
uint32_t eax, ebx, ecx, edx; | |||
const unsigned cpuid_max = __get_cpuid_max(0, NULL); | |||
if (cpuid_max >= 1) { | |||
__cpuid_count(1, 0, eax, ebx, features, edx); | |||
if (cpuid_max >= 7) | |||
__cpuid_count(7, 0, eax, extended, ecx, edx); | |||
} | |||
#elif defined(_MSC_VER) | |||
int info[4]; | |||
__cpuid(info, 0); | |||
const unsigned cpuid_max = info[0]; | |||
if (cpuid_max >= 1) { | |||
__cpuidex(info, 1, 0); | |||
features = info[2]; | |||
if (cpuid_max >= 7) { | |||
__cpuidex(info, 7, 0); | |||
extended = info[1]; | |||
} | |||
} | |||
#endif | |||
return features | (uint64_t)extended << 32; | |||
} | |||
#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ | |||
static | |||
#if __GNUC_PREREQ(4, 0) || __has_attribute(used) | |||
__attribute__((used)) | |||
#endif | |||
uint64_t (*t1ha0_resolve(void))(const void *, size_t, uint64_t) { | |||
#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) | |||
uint64_t features = x86_cpu_features(); | |||
if (features & UINT32_C(0x02000000) /* check for AES-NI */) { | |||
return t1ha0_ia32aes_noavx; | |||
} | |||
#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ | |||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul | |||
return t1ha1_be; | |||
#else | |||
return t1ha0_32be; | |||
#endif | |||
#else /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */ | |||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul | |||
return t1ha1_le; | |||
#else | |||
return t1ha0_32le; | |||
#endif | |||
#endif /* __BYTE_ORDER__ */ | |||
} | |||
#ifdef __ELF__ | |||
#if __has_attribute(ifunc) | |||
uint64_t t1ha0(const void *data, size_t len, uint64_t seed) | |||
__attribute__((ifunc("t1ha0_resolve"))); | |||
#else | |||
__asm("\t.globl\tt1ha0\n\t.type\tt1ha0, " | |||
"%gnu_indirect_function\n\t.set\tt1ha0,t1ha0_resolve"); | |||
#endif /* ifunc */ | |||
#elif __GNUC_PREREQ(4, 0) || __has_attribute(constructor) | |||
uint64_t (*t1ha0_funcptr)(const void *, size_t, uint64_t); | |||
static void __attribute__((constructor)) t1ha0_init(void) { | |||
t1ha0_funcptr = t1ha0_resolve(); | |||
} | |||
#else /* ELF */ | |||
static uint64_t t1ha0_proxy(const void *data, size_t len, uint64_t seed) { | |||
t1ha0_funcptr = t1ha0_resolve(); | |||
return t1ha0_funcptr(data, len, seed); | |||
} | |||
uint64_t (*t1ha0_funcptr)(const void *, size_t, uint64_t) = t1ha0_proxy; | |||
#endif /* !ELF */ | |||
#endif /* T1HA0_RUNTIME_SELECT */ |
@@ -0,0 +1,200 @@ | |||
/* | |||
* Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com, | |||
* Fast Positive Hash. | |||
* | |||
* Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>, | |||
* The 1Hippeus project (t1h). | |||
* | |||
* This software is provided 'as-is', without any express or implied | |||
* warranty. In no event will the authors be held liable for any damages | |||
* arising from the use of this software. | |||
* | |||
* Permission is granted to anyone to use this software for any purpose, | |||
* including commercial applications, and to alter it and redistribute it | |||
* freely, subject to the following restrictions: | |||
* | |||
* 1. The origin of this software must not be misrepresented; you must not | |||
* claim that you wrote the original software. If you use this software | |||
* in a product, an acknowledgement in the product documentation would be | |||
* appreciated but is not required. | |||
* 2. Altered source versions must be plainly marked as such, and must not be | |||
* misrepresented as being the original software. | |||
* 3. This notice may not be removed or altered from any source distribution. | |||
*/ | |||
/* | |||
* t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } | |||
* by [Positive Technologies](https://www.ptsecurity.ru) | |||
* | |||
* Briefly, it is a 64-bit Hash Function: | |||
* 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, | |||
* but portable and without penalties it can run on any 64-bit CPU. | |||
* 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash | |||
* and all others portable hash-functions (which do not use specific | |||
* hardware tricks). | |||
* 3. Not suitable for cryptography. | |||
* | |||
* The Future will Positive. Всё будет хорошо. | |||
* | |||
* ACKNOWLEDGEMENT: | |||
* The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) | |||
* for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! | |||
*/ | |||
#include "t1ha_bits.h" | |||
#if T1HA0_AESNI_AVAILABLE | |||
#pragma GCC push_options | |||
#pragma GCC target("aes") | |||
#ifndef __SSE2__ | |||
#define __SSE2__ | |||
#endif | |||
#ifndef __SSE__ | |||
#define __SSE__ | |||
#endif | |||
#ifndef __AES__ | |||
#define __AES__ | |||
#endif | |||
#include <immintrin.h> | |||
uint64_t T1HA_IA32AES_NAME(const void *data, size_t len, uint64_t seed) { | |||
uint64_t a = seed; | |||
uint64_t b = len; | |||
if (unlikely(len > 32)) { | |||
__m128i x = _mm_set_epi64x(a, b); | |||
__m128i y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_5, prime_6)); | |||
const __m128i *__restrict v = (const __m128i *)data; | |||
const __m128i *__restrict const detent = | |||
(const __m128i *)((const uint8_t *)data + len - 127); | |||
while (v < detent) { | |||
__m128i v0 = _mm_loadu_si128(v + 0); | |||
__m128i v1 = _mm_loadu_si128(v + 1); | |||
__m128i v2 = _mm_loadu_si128(v + 2); | |||
__m128i v3 = _mm_loadu_si128(v + 3); | |||
__m128i v4 = _mm_loadu_si128(v + 4); | |||
__m128i v5 = _mm_loadu_si128(v + 5); | |||
__m128i v6 = _mm_loadu_si128(v + 6); | |||
__m128i v7 = _mm_loadu_si128(v + 7); | |||
__m128i v0y = _mm_aesenc_si128(v0, y); | |||
__m128i v2x6 = _mm_aesenc_si128(v2, _mm_xor_si128(x, v6)); | |||
__m128i v45_67 = | |||
_mm_xor_si128(_mm_aesenc_si128(v4, v5), _mm_add_epi64(v6, v7)); | |||
__m128i v0y7_1 = _mm_aesdec_si128(_mm_sub_epi64(v7, v0y), v1); | |||
__m128i v2x6_3 = _mm_aesenc_si128(v2x6, v3); | |||
x = _mm_aesenc_si128(v45_67, _mm_add_epi64(x, y)); | |||
y = _mm_aesenc_si128(v2x6_3, _mm_xor_si128(v0y7_1, v5)); | |||
v += 8; | |||
} | |||
if (len & 64) { | |||
__m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); | |||
__m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); | |||
x = _mm_aesdec_si128(x, v0y); | |||
y = _mm_aesdec_si128(y, v1x); | |||
__m128i v2y = _mm_add_epi64(y, _mm_loadu_si128(v++)); | |||
__m128i v3x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); | |||
x = _mm_aesdec_si128(x, v2y); | |||
y = _mm_aesdec_si128(y, v3x); | |||
} | |||
if (len & 32) { | |||
__m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); | |||
__m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); | |||
x = _mm_aesdec_si128(x, v0y); | |||
y = _mm_aesdec_si128(y, v1x); | |||
} | |||
if (len & 16) { | |||
y = _mm_add_epi64(x, y); | |||
x = _mm_aesdec_si128(x, _mm_loadu_si128(v++)); | |||
} | |||
x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y); | |||
#if defined(__x86_64__) || defined(_M_X64) | |||
#if defined(__SSE4_1__) || defined(__AVX__) | |||
a = _mm_extract_epi64(x, 0); | |||
b = _mm_extract_epi64(x, 1); | |||
#else | |||
a = _mm_cvtsi128_si64(x); | |||
b = _mm_cvtsi128_si64(_mm_unpackhi_epi64(x, x)); | |||
#endif | |||
#else | |||
#if defined(__SSE4_1__) || defined(__AVX__) | |||
a = (uint32_t)_mm_extract_epi32(x, 0) | | |||
(uint64_t)_mm_extract_epi32(x, 1) << 32; | |||
b = (uint32_t)_mm_extract_epi32(x, 2) | | |||
(uint64_t)_mm_extract_epi32(x, 3) << 32; | |||
#else | |||
a = (uint32_t)_mm_cvtsi128_si32(x); | |||
a |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; | |||
x = _mm_unpackhi_epi64(x, x); | |||
b = (uint32_t)_mm_cvtsi128_si32(x); | |||
b |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; | |||
#endif | |||
#endif | |||
#ifdef __AVX__ | |||
_mm256_zeroall(); | |||
#elif !(defined(_X86_64_) || defined(__x86_64__) || defined(_M_X64)) | |||
_mm_empty(); | |||
#endif | |||
data = v; | |||
len &= 15; | |||
} | |||
const uint64_t *v = (const uint64_t *)data; | |||
#ifdef __e2k__ | |||
const int need_align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK; | |||
uint64_t align[4]; | |||
if (unlikely(need_align) && len > 8) | |||
v = (const uint64_t *)memcpy(&align, unaligned(v), len); | |||
#endif /* __e2k__ */ | |||
switch (len) { | |||
default: | |||
mixup64(&a, &b, *v++, prime_4); | |||
/* fall through */ | |||
case 24: | |||
case 23: | |||
case 22: | |||
case 21: | |||
case 20: | |||
case 19: | |||
case 18: | |||
case 17: | |||
mixup64(&b, &a, *v++, prime_3); | |||
/* fall through */ | |||
case 16: | |||
case 15: | |||
case 14: | |||
case 13: | |||
case 12: | |||
case 11: | |||
case 10: | |||
case 9: | |||
mixup64(&a, &b, *v++, prime_2); | |||
/* fall through */ | |||
case 8: | |||
case 7: | |||
case 6: | |||
case 5: | |||
case 4: | |||
case 3: | |||
case 2: | |||
case 1: | |||
mixup64(&b, &a, tail64_le(v, len), prime_1); | |||
/* fall through */ | |||
case 0: | |||
return final64(a, b); | |||
} | |||
} | |||
#endif /* T1HA0_AESNI_AVAILABLE */ | |||
#undef T1HA_IA32AES_NAME |
@@ -0,0 +1,2 @@ | |||
#define T1HA_IA32AES_NAME t1ha0_ia32aes_noavx | |||
#include "t1ha0_ia32aes_a.h" |
@@ -0,0 +1,215 @@ | |||
/* | |||
* Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com, | |||
* Fast Positive Hash. | |||
* | |||
* Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>, | |||
* The 1Hippeus project (t1h). | |||
* | |||
* This software is provided 'as-is', without any express or implied | |||
* warranty. In no event will the authors be held liable for any damages | |||
* arising from the use of this software. | |||
* | |||
* Permission is granted to anyone to use this software for any purpose, | |||
* including commercial applications, and to alter it and redistribute it | |||
* freely, subject to the following restrictions: | |||
* | |||
* 1. The origin of this software must not be misrepresented; you must not | |||
* claim that you wrote the original software. If you use this software | |||
* in a product, an acknowledgement in the product documentation would be | |||
* appreciated but is not required. | |||
* 2. Altered source versions must be plainly marked as such, and must not be | |||
* misrepresented as being the original software. | |||
* 3. This notice may not be removed or altered from any source distribution. | |||
*/ | |||
/* | |||
* t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } | |||
* by [Positive Technologies](https://www.ptsecurity.ru) | |||
* | |||
* Briefly, it is a 64-bit Hash Function: | |||
* 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, | |||
* but portable and without penalties it can run on any 64-bit CPU. | |||
* 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash | |||
* and all others portable hash-functions (which do not use specific | |||
* hardware tricks). | |||
* 3. Not suitable for cryptography. | |||
* | |||
* The Future will Positive. Всё будет хорошо. | |||
* | |||
* ACKNOWLEDGEMENT: | |||
* The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) | |||
* for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! | |||
*/ | |||
#include "config.h" | |||
#include "t1ha_bits.h" | |||
/* xor-mul-xor mixer */ | |||
static __inline uint64_t mix64(uint64_t v, uint64_t p) { | |||
v *= p; | |||
return v ^ rot64(v, 41); | |||
} | |||
static __inline uint64_t final_weak_avalanche(uint64_t a, uint64_t b) { | |||
/* LY: for performance reason on a some not high-end CPUs | |||
* I replaced the second mux64() operation by mix64(). | |||
* Unfortunately this approach fails the "strict avalanche criteria", | |||
* see test results at https://github.com/demerphq/smhasher. */ | |||
return mux64(rot64(a + b, 17), prime_4) + mix64(a ^ b, prime_0); | |||
} | |||
uint64_t t1ha1_le(const void *data, size_t len, uint64_t seed) { | |||
uint64_t a = seed; | |||
uint64_t b = len; | |||
const int need_align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK; | |||
uint64_t align[4]; | |||
if (unlikely(len > 32)) { | |||
uint64_t c = rot64(len, 17) + seed; | |||
uint64_t d = len ^ rot64(seed, 17); | |||
const void *detent = (const uint8_t *)data + len - 31; | |||
do { | |||
const uint64_t *v = (const uint64_t *)data; | |||
if (unlikely(need_align)) | |||
v = (const uint64_t *)memcpy(&align, unaligned(v), 32); | |||
uint64_t w0 = fetch64_le(v + 0); | |||
uint64_t w1 = fetch64_le(v + 1); | |||
uint64_t w2 = fetch64_le(v + 2); | |||
uint64_t w3 = fetch64_le(v + 3); | |||
uint64_t d02 = w0 ^ rot64(w2 + d, 17); | |||
uint64_t c13 = w1 ^ rot64(w3 + c, 17); | |||
c += a ^ rot64(w0, 41); | |||
d -= b ^ rot64(w1, 31); | |||
a ^= prime_1 * (d02 + w3); | |||
b ^= prime_0 * (c13 + w2); | |||
data = (const uint64_t *)data + 4; | |||
} while (likely(data < detent)); | |||
a ^= prime_6 * (rot64(c, 17) + d); | |||
b ^= prime_5 * (c + rot64(d, 17)); | |||
len &= 31; | |||
} | |||
const uint64_t *v = (const uint64_t *)data; | |||
if (unlikely(need_align) && len > 8) | |||
v = (const uint64_t *)memcpy(&align, unaligned(v), len); | |||
switch (len) { | |||
default: | |||
b += mux64(fetch64_le(v++), prime_4); | |||
/* fall through */ | |||
case 24: | |||
case 23: | |||
case 22: | |||
case 21: | |||
case 20: | |||
case 19: | |||
case 18: | |||
case 17: | |||
a += mux64(fetch64_le(v++), prime_3); | |||
/* fall through */ | |||
case 16: | |||
case 15: | |||
case 14: | |||
case 13: | |||
case 12: | |||
case 11: | |||
case 10: | |||
case 9: | |||
b += mux64(fetch64_le(v++), prime_2); | |||
/* fall through */ | |||
case 8: | |||
case 7: | |||
case 6: | |||
case 5: | |||
case 4: | |||
case 3: | |||
case 2: | |||
case 1: | |||
a += mux64(tail64_le(v, len), prime_1); | |||
/* fall through */ | |||
case 0: | |||
return final_weak_avalanche(a, b); | |||
} | |||
} | |||
uint64_t t1ha1_be(const void *data, size_t len, uint64_t seed) { | |||
uint64_t a = seed; | |||
uint64_t b = len; | |||
const int need_align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK; | |||
uint64_t align[4]; | |||
if (unlikely(len > 32)) { | |||
uint64_t c = rot64(len, 17) + seed; | |||
uint64_t d = len ^ rot64(seed, 17); | |||
const void *detent = (const uint8_t *)data + len - 31; | |||
do { | |||
const uint64_t *v = (const uint64_t *)data; | |||
if (unlikely(need_align)) | |||
v = (const uint64_t *)memcpy(&align, unaligned(v), 32); | |||
uint64_t w0 = fetch64_be(v + 0); | |||
uint64_t w1 = fetch64_be(v + 1); | |||
uint64_t w2 = fetch64_be(v + 2); | |||
uint64_t w3 = fetch64_be(v + 3); | |||
uint64_t d02 = w0 ^ rot64(w2 + d, 17); | |||
uint64_t c13 = w1 ^ rot64(w3 + c, 17); | |||
c += a ^ rot64(w0, 41); | |||
d -= b ^ rot64(w1, 31); | |||
a ^= prime_1 * (d02 + w3); | |||
b ^= prime_0 * (c13 + w2); | |||
data = (const uint64_t *)data + 4; | |||
} while (likely(data < detent)); | |||
a ^= prime_6 * (rot64(c, 17) + d); | |||
b ^= prime_5 * (c + rot64(d, 17)); | |||
len &= 31; | |||
} | |||
const uint64_t *v = (const uint64_t *)data; | |||
if (unlikely(need_align) && len > 8) | |||
v = (const uint64_t *)memcpy(&align, unaligned(v), len); | |||
switch (len) { | |||
default: | |||
b += mux64(fetch64_be(v++), prime_4); | |||
/* fall through */ | |||
case 24: | |||
case 23: | |||
case 22: | |||
case 21: | |||
case 20: | |||
case 19: | |||
case 18: | |||
case 17: | |||
a += mux64(fetch64_be(v++), prime_3); | |||
/* fall through */ | |||
case 16: | |||
case 15: | |||
case 14: | |||
case 13: | |||
case 12: | |||
case 11: | |||
case 10: | |||
case 9: | |||
b += mux64(fetch64_be(v++), prime_2); | |||
/* fall through */ | |||
case 8: | |||
case 7: | |||
case 6: | |||
case 5: | |||
case 4: | |||
case 3: | |||
case 2: | |||
case 1: | |||
a += mux64(tail64_be(v, len), prime_1); | |||
/* fall through */ | |||
case 0: | |||
return final_weak_avalanche(a, b); | |||
} | |||
} |
@@ -0,0 +1,297 @@ | |||
/* | |||
* Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com, | |||
* Fast Positive Hash. | |||
* | |||
* Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>, | |||
* The 1Hippeus project (t1h). | |||
* | |||
* This software is provided 'as-is', without any express or implied | |||
* warranty. In no event will the authors be held liable for any damages | |||
* arising from the use of this software. | |||
* | |||
* Permission is granted to anyone to use this software for any purpose, | |||
* including commercial applications, and to alter it and redistribute it | |||
* freely, subject to the following restrictions: | |||
* | |||
* 1. The origin of this software must not be misrepresented; you must not | |||
* claim that you wrote the original software. If you use this software | |||
* in a product, an acknowledgement in the product documentation would be | |||
* appreciated but is not required. | |||
* 2. Altered source versions must be plainly marked as such, and must not be | |||
* misrepresented as being the original software. | |||
* 3. This notice may not be removed or altered from any source distribution. | |||
*/ | |||
/* | |||
* t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } | |||
* by [Positive Technologies](https://www.ptsecurity.ru) | |||
* | |||
* Briefly, it is a 64-bit Hash Function: | |||
* 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, | |||
* but portable and without penalties it can run on any 64-bit CPU. | |||
* 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash | |||
* and all others portable hash-functions (which do not use specific | |||
* hardware tricks). | |||
* 3. Not suitable for cryptography. | |||
* | |||
* The Future will Positive. Всё будет хорошо. | |||
* | |||
* ACKNOWLEDGEMENT: | |||
* The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) | |||
* for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! | |||
*/ | |||
#include "config.h" | |||
#include "t1ha_bits.h" | |||
static __always_inline void init_ab(t1ha_state256_t *s, uint64_t x, | |||
uint64_t y) { | |||
s->n.a = x; | |||
s->n.b = y; | |||
} | |||
static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x, | |||
uint64_t y) { | |||
s->n.c = rot64(y, 23) + ~x; | |||
s->n.d = ~y + rot64(x, 19); | |||
} | |||
static __always_inline void update(t1ha_state256_t *__restrict s, | |||
const uint64_t *__restrict v) { | |||
uint64_t w0 = fetch64_le(v + 0); | |||
uint64_t w1 = fetch64_le(v + 1); | |||
uint64_t w2 = fetch64_le(v + 2); | |||
uint64_t w3 = fetch64_le(v + 3); | |||
uint64_t d02 = w0 + rot64(w2 + s->n.d, 56); | |||
uint64_t c13 = w1 + rot64(w3 + s->n.c, 19); | |||
#ifdef __e2k__ | |||
/* FIXME: temporary workaround for lcc's ELBRUS scheduling bug (LY) */ | |||
s->n.c ^= s->n.a + rot64(w0, 57); | |||
s->n.d ^= s->n.b + rot64(w1, 38); | |||
#else | |||
s->n.d ^= s->n.b + rot64(w1, 38); | |||
s->n.c ^= s->n.a + rot64(w0, 57); | |||
#endif | |||
s->n.b ^= prime_6 * (c13 + w2); | |||
s->n.a ^= prime_5 * (d02 + w3); | |||
} | |||
static __always_inline void squash(t1ha_state256_t *s) { | |||
s->n.a ^= prime_6 * (s->n.c + rot64(s->n.d, 23)); | |||
s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d); | |||
} | |||
static __always_inline const void * | |||
loop(bool need_copy4align, uint64_t *__restrict buffer4align, | |||
t1ha_state256_t *__restrict s, const void *__restrict data, size_t len) { | |||
const void *detent = (const uint8_t *)data + len - 31; | |||
do { | |||
const uint64_t *v = (const uint64_t *)data; | |||
if (unlikely(need_copy4align)) | |||
v = (const uint64_t *)memcpy(buffer4align, unaligned(v), 32); | |||
update(s, v); | |||
data = (const uint64_t *)data + 4; | |||
} while (likely(data < detent)); | |||
return data; | |||
} | |||
static __always_inline void tail_ab(t1ha_state256_t *__restrict s, | |||
const uint64_t *__restrict v, size_t len) { | |||
switch (len) { | |||
default: | |||
mixup64(&s->n.a, &s->n.b, fetch64_le(v++), prime_4); | |||
/* fall through */ | |||
case 24: | |||
case 23: | |||
case 22: | |||
case 21: | |||
case 20: | |||
case 19: | |||
case 18: | |||
case 17: | |||
mixup64(&s->n.b, &s->n.a, fetch64_le(v++), prime_3); | |||
/* fall through */ | |||
case 16: | |||
case 15: | |||
case 14: | |||
case 13: | |||
case 12: | |||
case 11: | |||
case 10: | |||
case 9: | |||
mixup64(&s->n.a, &s->n.b, fetch64_le(v++), prime_2); | |||
/* fall through */ | |||
case 8: | |||
case 7: | |||
case 6: | |||
case 5: | |||
case 4: | |||
case 3: | |||
case 2: | |||
case 1: | |||
mixup64(&s->n.b, &s->n.a, tail64_le(v, len), prime_1); | |||
/* fall through */ | |||
case 0: | |||
return; | |||
} | |||
} | |||
static __always_inline void tail_abcd(t1ha_state256_t *__restrict s, | |||
const uint64_t *__restrict v, | |||
size_t len) { | |||
switch (len) { | |||
default: | |||
mixup64(&s->n.a, &s->n.d, fetch64_le(v++), prime_4); | |||
/* fall through */ | |||
case 24: | |||
case 23: | |||
case 22: | |||
case 21: | |||
case 20: | |||
case 19: | |||
case 18: | |||
case 17: | |||
mixup64(&s->n.b, &s->n.a, fetch64_le(v++), prime_3); | |||
/* fall through */ | |||
case 16: | |||
case 15: | |||
case 14: | |||
case 13: | |||
case 12: | |||
case 11: | |||
case 10: | |||
case 9: | |||
mixup64(&s->n.c, &s->n.b, fetch64_le(v++), prime_2); | |||
/* fall through */ | |||
case 8: | |||
case 7: | |||
case 6: | |||
case 5: | |||
case 4: | |||
case 3: | |||
case 2: | |||
case 1: | |||
mixup64(&s->n.d, &s->n.c, tail64_le(v, len), prime_1); | |||
/* fall through */ | |||
case 0: | |||
return; | |||
} | |||
} | |||
static __always_inline uint64_t final128(uint64_t a, uint64_t b, uint64_t c, | |||
uint64_t d, uint64_t *h) { | |||
mixup64(&a, &b, rot64(c, 41) ^ d, prime_0); | |||
mixup64(&b, &c, rot64(d, 23) ^ a, prime_6); | |||
mixup64(&c, &d, rot64(a, 19) ^ b, prime_5); | |||
mixup64(&d, &a, rot64(b, 31) ^ c, prime_4); | |||
*h = c + d; | |||
return a ^ b; | |||
} | |||
//------------------------------------------------------------------------------ | |||
uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) { | |||
t1ha_state256_t state; | |||
init_ab(&state, seed, length); | |||
const int need_copy4align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK; | |||
uint64_t buffer4align[4]; | |||
if (unlikely(length > 32)) { | |||
init_cd(&state, seed, length); | |||
data = loop(need_copy4align, buffer4align, &state, data, length); | |||
squash(&state); | |||
length &= 31; | |||
} | |||
const uint64_t *v = (const uint64_t *)data; | |||
if (unlikely(need_copy4align) && length > 8) | |||
v = (const uint64_t *)memcpy(&buffer4align, unaligned(v), length); | |||
tail_ab(&state, v, length); | |||
return final64(state.n.a, state.n.b); | |||
} | |||
uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result, | |||
const void *__restrict data, size_t length, | |||
uint64_t seed) { | |||
t1ha_state256_t state; | |||
init_ab(&state, seed, length); | |||
init_cd(&state, seed, length); | |||
const int need_copy4align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK; | |||
uint64_t buffer4align[4]; | |||
if (unlikely(length > 32)) { | |||
data = loop(need_copy4align, buffer4align, &state, data, length); | |||
length &= 31; | |||
} | |||
const uint64_t *v = (const uint64_t *)data; | |||
if (unlikely(need_copy4align) && length > 8) | |||
v = (const uint64_t *)memcpy(&buffer4align, unaligned(v), length); | |||
tail_abcd(&state, v, length); | |||
return final128(state.n.a, state.n.b, state.n.c, state.n.d, extra_result); | |||
} | |||
//------------------------------------------------------------------------------ | |||
void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y) { | |||
init_ab(&ctx->state, seed_x, seed_y); | |||
init_cd(&ctx->state, seed_x, seed_y); | |||
ctx->partial = 0; | |||
ctx->total = 0; | |||
} | |||
void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data, | |||
size_t length) { | |||
ctx->total += length; | |||
if (ctx->partial) { | |||
const size_t left = 32 - ctx->partial; | |||
const size_t chunk = (length >= left) ? left : length; | |||
memcpy(ctx->buffer.bytes + ctx->partial, unaligned(data), chunk); | |||
ctx->partial += chunk; | |||
if (ctx->partial < 32) { | |||
assert(left >= length); | |||
return; | |||
} | |||
ctx->partial = 0; | |||
data = (const uint8_t *)data + chunk; | |||
length -= chunk; | |||
update(&ctx->state, ctx->buffer.u64); | |||
} | |||
if (length >= 32) { | |||
const bool need_copy4align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK; | |||
if (need_copy4align) | |||
data = loop(true, ctx->buffer.u64, &ctx->state, data, length); | |||
else | |||
data = loop(false, NULL, &ctx->state, data, length); | |||
length &= 31; | |||
} | |||
if (length) | |||
memcpy(ctx->buffer.bytes, unaligned(data), ctx->partial = length); | |||
} | |||
uint64_t t1ha2_final(t1ha_context_t *__restrict ctx, | |||
uint64_t *__restrict extra_result) { | |||
uint64_t bytes = (ctx->total << 3) ^ (UINT64_C(1) << 63); | |||
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ | |||
bytes = bswap64(bytes); | |||
#endif | |||
t1ha2_update(ctx, &bytes, 8); | |||
if (likely(!extra_result)) { | |||
squash(&ctx->state); | |||
tail_ab(&ctx->state, ctx->buffer.u64, ctx->partial); | |||
return final64(ctx->state.n.a, ctx->state.n.b); | |||
} | |||
tail_abcd(&ctx->state, ctx->buffer.u64, ctx->partial); | |||
return final128(ctx->state.n.a, ctx->state.n.b, ctx->state.n.c, | |||
ctx->state.n.d, extra_result); | |||
} |
@@ -0,0 +1,827 @@ | |||
/* | |||
* Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com, | |||
* Fast Positive Hash. | |||
* | |||
* Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>, | |||
* The 1Hippeus project (t1h). | |||
* | |||
* This software is provided 'as-is', without any express or implied | |||
* warranty. In no event will the authors be held liable for any damages | |||
* arising from the use of this software. | |||
* | |||
* Permission is granted to anyone to use this software for any purpose, | |||
* including commercial applications, and to alter it and redistribute it | |||
* freely, subject to the following restrictions: | |||
* | |||
* 1. The origin of this software must not be misrepresented; you must not | |||
* claim that you wrote the original software. If you use this software | |||
* in a product, an acknowledgement in the product documentation would be | |||
* appreciated but is not required. | |||
* 2. Altered source versions must be plainly marked as such, and must not be | |||
* misrepresented as being the original software. | |||
* 3. This notice may not be removed or altered from any source distribution. | |||
*/ | |||
/* | |||
* t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } | |||
* by [Positive Technologies](https://www.ptsecurity.ru) | |||
* | |||
* Briefly, it is a 64-bit Hash Function: | |||
* 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, | |||
* but portable and without penalties it can run on any 64-bit CPU. | |||
* 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash | |||
* and all others portable hash-functions (which do not use specific | |||
* hardware tricks). | |||
* 3. Not suitable for cryptography. | |||
* | |||
* The Future will Positive. Всё будет хорошо. | |||
* | |||
* ACKNOWLEDGEMENT: | |||
* The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) | |||
* for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! | |||
*/ | |||
#pragma once | |||
#if defined(_MSC_VER) | |||
#pragma warning(disable : 4201) /* nameless struct/union */ | |||
#if _MSC_VER > 1800 | |||
#pragma warning(disable : 4464) /* relative include path contains '..' */ | |||
#endif /* 1800 */ | |||
#endif /* MSVC */ | |||
#include "config.h" | |||
#include "t1ha.h" | |||
#ifndef T1HA_USE_FAST_ONESHOT_READ | |||
/* Define it to 1 for little bit faster code. | |||
* Unfortunately this may triggering a false-positive alarms from Valgrind, | |||
* AddressSanitizer and other similar tool. | |||
* So, define it to 0 for calmness if doubt. */ | |||
#define T1HA_USE_FAST_ONESHOT_READ 1 | |||
#endif /* T1HA_USE_FAST_ONESHOT_READ */ | |||
/*****************************************************************************/ | |||
#include <assert.h> /* for assert() */ | |||
#include <stdbool.h> /* for bool */ | |||
#include <string.h> /* for memcpy() */ | |||
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ && \ | |||
__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ | |||
#error Unsupported byte order. | |||
#endif | |||
#if !defined(UNALIGNED_OK) | |||
#if (defined(__ia32__) || defined(__e2k__) || \ | |||
defined(__ARM_FEATURE_UNALIGNED)) && \ | |||
!defined(__ALIGNED__) | |||
#define UNALIGNED_OK 1 | |||
#else | |||
#define UNALIGNED_OK 0 | |||
#endif | |||
#endif /* UNALIGNED_OK */ | |||
#if UNALIGNED_OK && !defined(PAGESIZE) | |||
#define PAGESIZE 4096 | |||
#endif /* PAGESIZE */ | |||
/***************************************************************************/ | |||
#ifndef __has_builtin | |||
#define __has_builtin(x) (0) | |||
#endif | |||
#if __GNUC_PREREQ(4, 4) || defined(__clang__) | |||
#if defined(__ia32__) || defined(__e2k__) | |||
#include <x86intrin.h> | |||
#endif | |||
#if defined(__ia32__) | |||
#include <cpuid.h> | |||
#endif | |||
#if defined(__e2k__) | |||
#include <e2kbuiltin.h> | |||
#endif | |||
#ifndef likely | |||
#define likely(cond) __builtin_expect(!!(cond), 1) | |||
#endif | |||
#ifndef unlikely | |||
#define unlikely(cond) __builtin_expect(!!(cond), 0) | |||
#endif | |||
#if __GNUC_PREREQ(4, 5) || __has_builtin(__builtin_unreachable) | |||
#define unreachable() __builtin_unreachable() | |||
#endif | |||
#define bswap64(v) __builtin_bswap64(v) | |||
#define bswap32(v) __builtin_bswap32(v) | |||
#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) | |||
#define bswap16(v) __builtin_bswap16(v) | |||
#endif | |||
#if !defined(__maybe_unused) && (__GNUC_PREREQ(4, 3) || __has_attribute(unused)) | |||
#define __maybe_unused __attribute__((unused)) | |||
#endif | |||
#if !defined(__always_inline) && \ | |||
(__GNUC_PREREQ(3, 2) || __has_attribute(always_inline)) | |||
#define __always_inline __inline __attribute__((always_inline)) | |||
#endif | |||
#if defined(__e2k__) | |||
#if __iset__ >= 3 | |||
#define mul_64x64_high(a, b) __builtin_e2k_umulhd(a, b) | |||
#endif /* __iset__ >= 3 */ | |||
#if __iset__ >= 5 | |||
static __maybe_unused __always_inline unsigned | |||
e2k_add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { | |||
*sum = base + addend; | |||
return (unsigned)__builtin_e2k_addcd_c(base, addend, 0); | |||
} | |||
#define add64carry_first(base, addend, sum) \ | |||
e2k_add64carry_first(base, addend, sum) | |||
static __maybe_unused __always_inline unsigned | |||
e2k_add64carry_next(unsigned carry, uint64_t base, uint64_t addend, | |||
uint64_t *sum) { | |||
*sum = __builtin_e2k_addcd(base, addend, carry); | |||
return (unsigned)__builtin_e2k_addcd_c(base, addend, carry); | |||
} | |||
#define add64carry_next(carry, base, addend, sum) \ | |||
e2k_add64carry_next(carry, base, addend, sum) | |||
static __maybe_unused __always_inline void e2k_add64carry_last(unsigned carry, | |||
uint64_t base, | |||
uint64_t addend, | |||
uint64_t *sum) { | |||
*sum = __builtin_e2k_addcd(base, addend, carry); | |||
} | |||
#define add64carry_last(carry, base, addend, sum) \ | |||
e2k_add64carry_last(carry, base, addend, sum) | |||
#endif /* __iset__ >= 5 */ | |||
#if 0 /* LY: unreasonable, because alignment is required :( */ | |||
#define fetch64_be(ptr) ((uint64_t)__builtin_e2k_ld_64s_be(ptr)) | |||
#define fetch32_be(ptr) ((uint32_t)__builtin_e2k_ld_32u_be(ptr)) | |||
#endif | |||
#endif /* __e2k__ Elbrus */ | |||
#elif defined(_MSC_VER) | |||
#if _MSC_FULL_VER < 190024218 && defined(_M_IX86) | |||
#pragma message( \ | |||
"For AES-NI at least \"Microsoft C/C++ Compiler\" version 19.00.24218 (Visual Studio 2015 Update 5) is required.") | |||
#endif | |||
#if _MSC_FULL_VER < 191025019 | |||
#pragma message( \ | |||
"It is recommended to use \"Microsoft C/C++ Compiler\" version 19.10.25019 (Visual Studio 2017) or newer.") | |||
#endif | |||
#if _MSC_FULL_VER < 180040629 | |||
#error At least "Microsoft C/C++ Compiler" version 18.00.40629 (Visual Studio 2013 Update 5) is required. | |||
#endif | |||
#pragma warning(push, 1) | |||
#include <intrin.h> | |||
#include <stdlib.h> | |||
#define likely(cond) (cond) | |||
#define unlikely(cond) (cond) | |||
#define unreachable() __assume(0) | |||
#define bswap64(v) _byteswap_uint64(v) | |||
#define bswap32(v) _byteswap_ulong(v) | |||
#define bswap16(v) _byteswap_ushort(v) | |||
#define rot64(v, s) _rotr64(v, s) | |||
#define rot32(v, s) _rotr(v, s) | |||
#define __always_inline __forceinline | |||
#if defined(_M_X64) || defined(_M_IA64) | |||
#pragma intrinsic(_umul128) | |||
#define mul_64x64_128(a, b, ph) _umul128(a, b, ph) | |||
#pragma intrinsic(_addcarry_u64) | |||
#define add64carry_first(base, addend, sum) _addcarry_u64(0, base, addend, sum) | |||
#define add64carry_next(carry, base, addend, sum) \ | |||
_addcarry_u64(carry, base, addend, sum) | |||
#define add64carry_last(carry, base, addend, sum) \ | |||
(void)_addcarry_u64(carry, base, addend, sum) | |||
#endif | |||
#if defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) | |||
#pragma intrinsic(__umulh) | |||
#define mul_64x64_high(a, b) __umulh(a, b) | |||
#endif | |||
#if defined(_M_IX86) | |||
#pragma intrinsic(__emulu) | |||
#define mul_32x32_64(a, b) __emulu(a, b) | |||
#if _MSC_FULL_VER >= 190024231 /* LY: workaround for optimizer bug */ | |||
#pragma intrinsic(_addcarry_u32) | |||
#define add32carry_first(base, addend, sum) _addcarry_u32(0, base, addend, sum) | |||
#define add32carry_next(carry, base, addend, sum) \ | |||
_addcarry_u32(carry, base, addend, sum) | |||
#define add32carry_last(carry, base, addend, sum) \ | |||
(void)_addcarry_u32(carry, base, addend, sum) | |||
static __forceinline char | |||
msvc32_add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { | |||
uint32_t *const sum32 = (uint32_t *)sum; | |||
const uint32_t base_32l = (uint32_t)base; | |||
const uint32_t base_32h = (uint32_t)(base >> 32); | |||
const uint32_t addend_32l = (uint32_t)addend; | |||
const uint32_t addend_32h = (uint32_t)(addend >> 32); | |||
return add32carry_next(add32carry_first(base_32l, addend_32l, sum32), | |||
base_32h, addend_32h, sum32 + 1); | |||
} | |||
#define add64carry_first(base, addend, sum) \ | |||
msvc32_add64carry_first(base, addend, sum) | |||
static __forceinline char msvc32_add64carry_next(char carry, uint64_t base, | |||
uint64_t addend, | |||
uint64_t *sum) { | |||
uint32_t *const sum32 = (uint32_t *)sum; | |||
const uint32_t base_32l = (uint32_t)base; | |||
const uint32_t base_32h = (uint32_t)(base >> 32); | |||
const uint32_t addend_32l = (uint32_t)addend; | |||
const uint32_t addend_32h = (uint32_t)(addend >> 32); | |||
return add32carry_next(add32carry_next(carry, base_32l, addend_32l, sum32), | |||
base_32h, addend_32h, sum32 + 1); | |||
} | |||
#define add64carry_next(carry, base, addend, sum) \ | |||
msvc32_add64carry_next(carry, base, addend, sum) | |||
static __forceinline void msvc32_add64carry_last(char carry, uint64_t base, | |||
uint64_t addend, | |||
uint64_t *sum) { | |||
uint32_t *const sum32 = (uint32_t *)sum; | |||
const uint32_t base_32l = (uint32_t)base; | |||
const uint32_t base_32h = (uint32_t)(base >> 32); | |||
const uint32_t addend_32l = (uint32_t)addend; | |||
const uint32_t addend_32h = (uint32_t)(addend >> 32); | |||
add32carry_last(add32carry_next(carry, base_32l, addend_32l, sum32), base_32h, | |||
addend_32h, sum32 + 1); | |||
} | |||
#define add64carry_last(carry, base, addend, sum) \ | |||
msvc32_add64carry_last(carry, base, addend, sum) | |||
#endif /* _MSC_FULL_VER >= 190024231 */ | |||
#elif defined(_M_ARM) | |||
#define mul_32x32_64(a, b) _arm_umull(a, b) | |||
#endif | |||
#pragma warning(pop) | |||
#pragma warning(disable : 4514) /* 'xyz': unreferenced inline function \ | |||
has been removed */ | |||
#pragma warning(disable : 4710) /* 'xyz': function not inlined */ | |||
#pragma warning(disable : 4711) /* function 'xyz' selected for \ | |||
automatic inline expansion */ | |||
#pragma warning(disable : 4127) /* conditional expression is constant */ | |||
#pragma warning(disable : 4702) /* unreachable code */ | |||
#endif /* Compiler */ | |||
#ifndef likely | |||
#define likely(cond) (cond) | |||
#endif | |||
#ifndef unlikely | |||
#define unlikely(cond) (cond) | |||
#endif | |||
#ifndef __maybe_unused | |||
#define __maybe_unused | |||
#endif | |||
#ifndef __always_inline | |||
#define __always_inline __inline | |||
#endif | |||
#ifndef unreachable | |||
#define unreachable() \ | |||
do { \ | |||
} while (1) | |||
#endif | |||
#ifndef bswap64 | |||
#if defined(bswap_64) | |||
#define bswap64 bswap_64 | |||
#elif defined(__bswap_64) | |||
#define bswap64 __bswap_64 | |||
#else | |||
static __always_inline uint64_t bswap64(uint64_t v) { | |||
return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | | |||
((v << 24) & UINT64_C(0x0000ff0000000000)) | | |||
((v << 8) & UINT64_C(0x000000ff00000000)) | | |||
((v >> 8) & UINT64_C(0x00000000ff000000)) | | |||
((v >> 24) & UINT64_C(0x0000000000ff0000)) | | |||
((v >> 40) & UINT64_C(0x000000000000ff00)); | |||
} | |||
#endif | |||
#endif /* bswap64 */ | |||
#ifndef bswap32 | |||
#if defined(bswap_32) | |||
#define bswap32 bswap_32 | |||
#elif defined(__bswap_32) | |||
#define bswap32 __bswap_32 | |||
#else | |||
static __always_inline uint32_t bswap32(uint32_t v) { | |||
return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) | | |||
((v >> 8) & UINT32_C(0x0000ff00)); | |||
} | |||
#endif | |||
#endif /* bswap32 */ | |||
#ifndef bswap16 | |||
#if defined(bswap_16) | |||
#define bswap16 bswap_16 | |||
#elif defined(__bswap_16) | |||
#define bswap16 __bswap_16 | |||
#else | |||
static __always_inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; } | |||
#endif | |||
#endif /* bswap16 */ | |||
#ifndef unaligned | |||
#if defined(__LCC__) | |||
#pragma diag_suppress wrong_entity_for_attribute | |||
#define unaligned(ptr) ((const char __attribute__((packed, aligned(1))) *)(ptr)) | |||
#elif defined(__clang__) | |||
#pragma clang diagnostic ignored "-Wignored-attributes" | |||
#define unaligned(ptr) ((const char __attribute__((packed, aligned(1))) *)(ptr)) | |||
#elif defined(__GNUC__) | |||
#pragma GCC diagnostic ignored "-Wpacked" | |||
#define unaligned(ptr) ((const char __attribute__((packed, aligned(1))) *)(ptr)) | |||
#elif defined(_MSC_VER) | |||
#pragma warning( \ | |||
disable : 4235) /* nonstandard extension used: '__unaligned' \ | |||
* keyword not supported on this architecture */ | |||
#define unaligned(ptr) ((const char __unaligned *)(ptr)) | |||
#else | |||
#define unaligned(ptr) ((const char *)(ptr)) | |||
#endif | |||
#endif /* unaligned */ | |||
/***************************************************************************/ | |||
#ifndef fetch64_le | |||
static __always_inline uint64_t fetch64_le(const void *v) { | |||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |||
return *(const uint64_t *)v; | |||
#else | |||
return bswap64(*(const uint64_t *)v); | |||
#endif | |||
} | |||
#endif /* fetch64_le */ | |||
#ifndef fetch32_le | |||
static __always_inline uint32_t fetch32_le(const void *v) { | |||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |||
return *(const uint32_t *)v; | |||
#else | |||
return bswap32(*(const uint32_t *)v); | |||
#endif | |||
} | |||
#endif /* fetch32_le */ | |||
#ifndef fetch16_le | |||
static __always_inline uint16_t fetch16_le(const void *v) { | |||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |||
return *(const uint16_t *)v; | |||
#else | |||
return bswap16(*(const uint16_t *)v); | |||
#endif | |||
} | |||
#endif /* fetch16_le */ | |||
#if T1HA_USE_FAST_ONESHOT_READ && UNALIGNED_OK && defined(PAGESIZE) && \ | |||
PAGESIZE > 0 && !defined(__SANITIZE_ADDRESS__) | |||
#define can_read_underside(ptr, size) \ | |||
((size) <= sizeof(uintptr_t) && ((PAGESIZE - (size)) & (uintptr_t)(ptr)) != 0) | |||
#endif /* can_fast_read */ | |||
static __always_inline uint64_t tail64_le(const void *v, size_t tail) { | |||
const uint8_t *p = (const uint8_t *)v; | |||
#ifdef can_read_underside | |||
/* On some systems (e.g. x86) we can perform a 'oneshot' read, which | |||
* is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> | |||
* for the reminder. */ | |||
const unsigned offset = (8 - tail) & 7; | |||
const unsigned shift = offset << 3; | |||
if (likely(can_read_underside(p, 8))) { | |||
p -= offset; | |||
return fetch64_le(p) >> shift; | |||
} | |||
return fetch64_le(p) & ((~UINT64_C(0)) >> shift); | |||
#endif /* 'oneshot' read */ | |||
uint64_t r = 0; | |||
switch (tail & 7) { | |||
#if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |||
/* For most CPUs this code is better when not needed | |||
* copying for alignment or byte reordering. */ | |||
case 0: | |||
return fetch64_le(p); | |||
case 7: | |||
r = (uint64_t)p[6] << 8; | |||
/* fall through */ | |||
case 6: | |||
r += p[5]; | |||
r <<= 8; | |||
/* fall through */ | |||
case 5: | |||
r += p[4]; | |||
r <<= 32; | |||
/* fall through */ | |||
case 4: | |||
return r + fetch32_le(p); | |||
case 3: | |||
r = (uint64_t)p[2] << 16; | |||
/* fall through */ | |||
case 2: | |||
return r + fetch16_le(p); | |||
case 1: | |||
return p[0]; | |||
#else | |||
/* For most CPUs this code is better than a | |||
* copying for alignment and/or byte reordering. */ | |||
case 0: | |||
r = p[7] << 8; | |||
/* fall through */ | |||
case 7: | |||
r += p[6]; | |||
r <<= 8; | |||
/* fall through */ | |||
case 6: | |||
r += p[5]; | |||
r <<= 8; | |||
/* fall through */ | |||
case 5: | |||
r += p[4]; | |||
r <<= 8; | |||
/* fall through */ | |||
case 4: | |||
r += p[3]; | |||
r <<= 8; | |||
/* fall through */ | |||
case 3: | |||
r += p[2]; | |||
r <<= 8; | |||
/* fall through */ | |||
case 2: | |||
r += p[1]; | |||
r <<= 8; | |||
/* fall through */ | |||
case 1: | |||
return r + p[0]; | |||
#endif | |||
} | |||
unreachable(); | |||
} | |||
#ifndef fetch64_be | |||
static __maybe_unused __always_inline uint64_t fetch64_be(const void *v) { | |||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |||
return *(const uint64_t *)v; | |||
#else | |||
return bswap64(*(const uint64_t *)v); | |||
#endif | |||
} | |||
#endif /* fetch64_be */ | |||
#ifndef fetch32_be | |||
static __maybe_unused __always_inline uint32_t fetch32_be(const void *v) { | |||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |||
return *(const uint32_t *)v; | |||
#else | |||
return bswap32(*(const uint32_t *)v); | |||
#endif | |||
} | |||
#endif /* fetch32_be */ | |||
#ifndef fetch16_be | |||
static __maybe_unused __always_inline uint16_t fetch16_be(const void *v) { | |||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |||
return *(const uint16_t *)v; | |||
#else | |||
return bswap16(*(const uint16_t *)v); | |||
#endif | |||
} | |||
#endif /* fetch16_be */ | |||
static __maybe_unused __always_inline uint64_t tail64_be(const void *v, | |||
size_t tail) { | |||
const uint8_t *p = (const uint8_t *)v; | |||
#ifdef can_read_underside | |||
/* On some systems we can perform a 'oneshot' read, which is little bit | |||
* faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> for the | |||
* reminder. */ | |||
const unsigned offset = (8 - tail) & 7; | |||
const unsigned shift = offset << 3; | |||
if (likely(can_read_underside(p, 8))) { | |||
p -= offset; | |||
return fetch64_be(p) & ((~UINT64_C(0)) >> shift); | |||
} | |||
return fetch64_be(p) >> shift; | |||
#endif /* 'oneshot' read */ | |||
switch (tail & 7) { | |||
#if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |||
/* For most CPUs this code is better when not needed | |||
* copying for alignment or byte reordering. */ | |||
case 1: | |||
return p[0]; | |||
case 2: | |||
return fetch16_be(p); | |||
case 3: | |||
return (uint32_t)fetch16_be(p) << 8 | p[2]; | |||
case 4: | |||
return fetch32_be(p); | |||
case 5: | |||
return (uint64_t)fetch32_be(p) << 8 | p[4]; | |||
case 6: | |||
return (uint64_t)fetch32_be(p) << 16 | fetch16_be(p + 4); | |||
case 7: | |||
return (uint64_t)fetch32_be(p) << 24 | (uint32_t)fetch16_be(p + 4) << 8 | | |||
p[6]; | |||
case 0: | |||
return fetch64_be(p); | |||
#else | |||
/* For most CPUs this code is better than a | |||
* copying for alignment and/or byte reordering. */ | |||
case 1: | |||
return p[0]; | |||
case 2: | |||
return p[1] | (uint32_t)p[0] << 8; | |||
case 3: | |||
return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; | |||
case 4: | |||
return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | | |||
(uint32_t)p[0] << 24; | |||
case 5: | |||
return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 | | |||
(uint32_t)p[1] << 24 | (uint64_t)p[0] << 32; | |||
case 6: | |||
return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 | | |||
(uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40; | |||
case 7: | |||
return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 | | |||
(uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 | | |||
(uint64_t)p[0] << 48; | |||
case 0: | |||
return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 | | |||
(uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 | | |||
(uint64_t)p[1] << 48 | (uint64_t)p[0] << 56; | |||
#endif | |||
} | |||
unreachable(); | |||
} | |||
/***************************************************************************/ | |||
#ifndef rot64 | |||
static __always_inline uint64_t rot64(uint64_t v, unsigned s) { | |||
return (v >> s) | (v << (64 - s)); | |||
} | |||
#endif /* rot64 */ | |||
#ifndef mul_32x32_64 | |||
static __always_inline uint64_t mul_32x32_64(uint32_t a, uint32_t b) { | |||
return a * (uint64_t)b; | |||
} | |||
#endif /* mul_32x32_64 */ | |||
#ifndef add64carry_first | |||
static __maybe_unused __always_inline unsigned | |||
add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { | |||
#if __has_builtin(__builtin_addcll) | |||
unsigned long long carryout; | |||
*sum = __builtin_addcll(base, addend, 0, &carryout); | |||
return (unsigned)carryout; | |||
#else | |||
*sum = base + addend; | |||
return *sum < addend; | |||
#endif /* __has_builtin(__builtin_addcll) */ | |||
} | |||
#endif /* add64carry_fist */ | |||
#ifndef add64carry_next | |||
static __maybe_unused __always_inline unsigned | |||
add64carry_next(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { | |||
#if __has_builtin(__builtin_addcll) | |||
unsigned long long carryout; | |||
*sum = __builtin_addcll(base, addend, carry, &carryout); | |||
return (unsigned)carryout; | |||
#else | |||
*sum = base + addend + carry; | |||
return *sum < addend || (carry && *sum == addend); | |||
#endif /* __has_builtin(__builtin_addcll) */ | |||
} | |||
#endif /* add64carry_next */ | |||
#ifndef add64carry_last | |||
static __maybe_unused __always_inline void | |||
add64carry_last(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { | |||
#if __has_builtin(__builtin_addcll) | |||
unsigned long long carryout; | |||
*sum = __builtin_addcll(base, addend, carry, &carryout); | |||
(void)carryout; | |||
#else | |||
*sum = base + addend + carry; | |||
#endif /* __has_builtin(__builtin_addcll) */ | |||
} | |||
#endif /* add64carry_last */ | |||
#ifndef mul_64x64_128 | |||
static __maybe_unused __always_inline uint64_t mul_64x64_128(uint64_t a, | |||
uint64_t b, | |||
uint64_t *h) { | |||
#if defined(__SIZEOF_INT128__) || \ | |||
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) | |||
__uint128_t r = (__uint128_t)a * (__uint128_t)b; | |||
/* modern GCC could nicely optimize this */ | |||
*h = (uint64_t)(r >> 64); | |||
return (uint64_t)r; | |||
#elif defined(mul_64x64_high) | |||
*h = mul_64x64_high(a, b); | |||
return a * b; | |||
#else | |||
/* performs 64x64 to 128 bit multiplication */ | |||
const uint64_t ll = mul_32x32_64((uint32_t)a, (uint32_t)b); | |||
const uint64_t lh = mul_32x32_64(a >> 32, (uint32_t)b); | |||
const uint64_t hl = mul_32x32_64((uint32_t)a, b >> 32); | |||
const uint64_t hh = mul_32x32_64(a >> 32, b >> 32); | |||
/* Few simplification are possible here for 32-bit architectures, | |||
* but thus we would lost compatibility with the original 64-bit | |||
* version. Think is very bad idea, because then 32-bit t1ha will | |||
* still (relatively) very slowly and well yet not compatible. */ | |||
uint64_t l; | |||
add64carry_last(add64carry_first(ll, lh << 32, &l), hh, lh >> 32, h); | |||
add64carry_last(add64carry_first(l, hl << 32, &l), *h, hl >> 32, h); | |||
return l; | |||
#endif | |||
} | |||
#endif /* mul_64x64_128() */ | |||
#ifndef mul_64x64_high | |||
static __maybe_unused __always_inline uint64_t mul_64x64_high(uint64_t a, | |||
uint64_t b) { | |||
uint64_t h; | |||
mul_64x64_128(a, b, &h); | |||
return h; | |||
} | |||
#endif /* mul_64x64_high */ | |||
/***************************************************************************/ | |||
/* 'magic' primes */ | |||
static const uint64_t prime_0 = UINT64_C(0xEC99BF0D8372CAAB); | |||
static const uint64_t prime_1 = UINT64_C(0x82434FE90EDCEF39); | |||
static const uint64_t prime_2 = UINT64_C(0xD4F06DB99D67BE4B); | |||
static const uint64_t prime_3 = UINT64_C(0xBD9CACC22C6E9571); | |||
static const uint64_t prime_4 = UINT64_C(0x9C06FAF4D023E3AB); | |||
static const uint64_t prime_5 = UINT64_C(0xC060724A8424F345); | |||
static const uint64_t prime_6 = UINT64_C(0xCB5AF53AE3AAAC31); | |||
/* xor high and low parts of full 128-bit product */ | |||
static __maybe_unused __always_inline uint64_t mux64(uint64_t v, | |||
uint64_t prime) { | |||
uint64_t l, h; | |||
l = mul_64x64_128(v, prime, &h); | |||
return l ^ h; | |||
} | |||
static __always_inline uint64_t final64(uint64_t a, uint64_t b) { | |||
uint64_t x = (a + rot64(b, 41)) * prime_0; | |||
uint64_t y = (rot64(a, 23) + b) * prime_6; | |||
return mux64(x ^ y, prime_5); | |||
} | |||
static __always_inline void mixup64(uint64_t *__restrict a, | |||
uint64_t *__restrict b, uint64_t v, | |||
uint64_t prime) { | |||
uint64_t h; | |||
*a ^= mul_64x64_128(*b + v, prime, &h); | |||
*b += h; | |||
} | |||
/***************************************************************************/ | |||
typedef union t1ha_uint128 { | |||
#if defined(__SIZEOF_INT128__) || \ | |||
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) | |||
__uint128_t v; | |||
#endif | |||
struct { | |||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |||
uint64_t l, h; | |||
#else | |||
uint64_t h, l; | |||
#endif | |||
}; | |||
} t1ha_uint128_t; | |||
static __always_inline t1ha_uint128_t not128(const t1ha_uint128_t v) { | |||
t1ha_uint128_t r; | |||
#if defined(__SIZEOF_INT128__) || \ | |||
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) | |||
r.v = ~v.v; | |||
#else | |||
r.l = ~v.l; | |||
r.h = ~v.h; | |||
#endif | |||
return r; | |||
} | |||
static __always_inline t1ha_uint128_t left128(const t1ha_uint128_t v, | |||
unsigned s) { | |||
t1ha_uint128_t r; | |||
assert(s < 128); | |||
#if defined(__SIZEOF_INT128__) || \ | |||
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) | |||
r.v = v.v << s; | |||
#else | |||
r.l = (s < 64) ? v.l << s : 0; | |||
r.h = (s < 64) ? (v.h << s) | (s ? v.l >> (64 - s) : 0) : v.l << (s - 64); | |||
#endif | |||
return r; | |||
} | |||
static __always_inline t1ha_uint128_t right128(const t1ha_uint128_t v, | |||
unsigned s) { | |||
t1ha_uint128_t r; | |||
assert(s < 128); | |||
#if defined(__SIZEOF_INT128__) || \ | |||
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) | |||
r.v = v.v >> s; | |||
#else | |||
r.l = (s < 64) ? (s ? v.h << (64 - s) : 0) | (v.l >> s) : v.h >> (s - 64); | |||
r.h = (s < 64) ? v.h >> s : 0; | |||
#endif | |||
return r; | |||
} | |||
static __always_inline t1ha_uint128_t or128(t1ha_uint128_t x, | |||
t1ha_uint128_t y) { | |||
t1ha_uint128_t r; | |||
#if defined(__SIZEOF_INT128__) || \ | |||
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) | |||
r.v = x.v | y.v; | |||
#else | |||
r.l = x.l | y.l; | |||
r.h = x.h | y.h; | |||
#endif | |||
return r; | |||
} | |||
static __always_inline t1ha_uint128_t xor128(t1ha_uint128_t x, | |||
t1ha_uint128_t y) { | |||
t1ha_uint128_t r; | |||
#if defined(__SIZEOF_INT128__) || \ | |||
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) | |||
r.v = x.v ^ y.v; | |||
#else | |||
r.l = x.l ^ y.l; | |||
r.h = x.h ^ y.h; | |||
#endif | |||
return r; | |||
} | |||
static __always_inline t1ha_uint128_t rot128(t1ha_uint128_t v, unsigned s) { | |||
s &= 127; | |||
#if defined(__SIZEOF_INT128__) || \ | |||
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) | |||
v.v = (v.v << (128 - s)) | (v.v >> s); | |||
return v; | |||
#else | |||
return s ? or128(left128(v, 128 - s), right128(v, s)) : v; | |||
#endif | |||
} | |||
static __always_inline t1ha_uint128_t add128(t1ha_uint128_t x, | |||
t1ha_uint128_t y) { | |||
t1ha_uint128_t r; | |||
#if defined(__SIZEOF_INT128__) || \ | |||
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) | |||
r.v = x.v + y.v; | |||
#else | |||
add64carry_last(add64carry_first(x.l, y.l, &r.l), x.h, y.h, &r.h); | |||
#endif | |||
return r; | |||
} | |||
static __always_inline t1ha_uint128_t mul128(t1ha_uint128_t x, | |||
t1ha_uint128_t y) { | |||
t1ha_uint128_t r; | |||
#if defined(__SIZEOF_INT128__) || \ | |||
(defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) | |||
r.v = x.v * y.v; | |||
#else | |||
r.l = mul_64x64_128(x.l, y.l, &r.h); | |||
r.h += x.l * y.h + y.l * x.h; | |||
#endif | |||
return r; | |||
} |
@@ -1477,78 +1477,31 @@ void rspamd_cryptobox_hash (guchar *out, | |||
rspamd_cryptobox_hash_final (&st, out); | |||
} | |||
/* MUST be 64 bytes at maximum */ | |||
struct rspamd_cryptobox_fast_hash_state_real { | |||
guint64 h; /* current hash value */ | |||
guint64 pos; /* pos in bytes in the buf */ | |||
guint64 buf; | |||
}; | |||
G_STATIC_ASSERT (sizeof (t1ha_context_t) == | |||
sizeof (rspamd_cryptobox_fast_hash_state_t)); | |||
void | |||
rspamd_cryptobox_fast_hash_init (rspamd_cryptobox_fast_hash_state_t *st, | |||
guint64 seed) | |||
{ | |||
struct rspamd_cryptobox_fast_hash_state_real *rst = | |||
(struct rspamd_cryptobox_fast_hash_state_real *)st; | |||
memset (rst, 0, sizeof (*rst)); | |||
rst->h = seed; | |||
t1ha_context_t *rst = (t1ha_context_t *)st; | |||
t1ha2_init (rst, seed, 0); | |||
} | |||
void | |||
rspamd_cryptobox_fast_hash_update (rspamd_cryptobox_fast_hash_state_t *st, | |||
const void *data, gsize len) | |||
{ | |||
struct rspamd_cryptobox_fast_hash_state_real *rst = | |||
(struct rspamd_cryptobox_fast_hash_state_real *)st; | |||
const guchar *d = data; | |||
guint leftover; | |||
guint64 n; | |||
leftover = rst->pos; | |||
if (leftover > 0 && len + leftover >= 8) { | |||
n = sizeof (rst->buf) - leftover; | |||
memcpy (((guchar *)&rst->buf) + leftover, d, n); | |||
d += n; | |||
len -= n; | |||
rst->h = mum_hash_step (rst->h, rst->buf); | |||
rst->buf = 0; | |||
rst->pos = 0; | |||
} | |||
while (len > 8) { | |||
#ifdef _MUM_UNALIGNED_ACCESS | |||
rst->h = mum_hash_step (rst->h, *(guint64 *)d); | |||
#else | |||
memcpy (&n, d, sizeof (n)); | |||
rst->h = mum_hash_step (rst->h, n); | |||
#endif | |||
len -= 8; | |||
d += 8; | |||
} | |||
if (len > 0 && rst->pos + len <= 8) { | |||
memcpy (((guchar *)&rst->buf) + rst->pos, d, len); | |||
rst->pos += len; | |||
} | |||
t1ha_context_t *rst = (t1ha_context_t *)st; | |||
t1ha2_update (rst, data, len); | |||
} | |||
guint64 | |||
rspamd_cryptobox_fast_hash_final (rspamd_cryptobox_fast_hash_state_t *st) | |||
{ | |||
struct rspamd_cryptobox_fast_hash_state_real *rst = | |||
(struct rspamd_cryptobox_fast_hash_state_real *)st; | |||
guint leftover; | |||
t1ha_context_t *rst = (t1ha_context_t *)st; | |||
leftover = rst->pos; | |||
if (leftover > 0) { | |||
memset (((guchar *)&rst->buf) + leftover, 0, sizeof (rst->buf) - leftover); | |||
rst->h = mum_hash_step (rst->h, rst->buf); | |||
} | |||
return mum_hash_finish (rst->h); | |||
return t1ha2_final (rst, NULL); | |||
} | |||
/** | |||
@@ -1558,18 +1511,14 @@ static inline guint64 | |||
rspamd_cryptobox_fast_hash_machdep (const void *data, | |||
gsize len, guint64 seed) | |||
{ | |||
#if defined(__LP64__) || defined(_LP64) | |||
return t1ha (data, len, seed); | |||
#else | |||
return t1ha32 (data, len, seed); | |||
#endif | |||
return t1ha0 (data, len, seed); | |||
} | |||
static inline guint64 | |||
rspamd_cryptobox_fast_hash_indep (const void *data, | |||
gsize len, guint64 seed) | |||
{ | |||
return t1ha (data, len, seed); | |||
return t1ha2_atonce (data, len, seed); | |||
} | |||
guint64 | |||
@@ -1593,7 +1542,7 @@ rspamd_cryptobox_fast_hash_specific ( | |||
case RSPAMD_CRYPTOBOX_MUMHASH: | |||
return mum_hash (data, len, seed); | |||
case RSPAMD_CRYPTOBOX_T1HA: | |||
return t1ha (data, len, seed); | |||
return t1ha2_atonce (data, len, seed); | |||
case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT: | |||
return rspamd_cryptobox_fast_hash_indep (data, len, seed); | |||
case RSPAMD_CRYPTOBOX_HASHFAST: |
@@ -344,7 +344,7 @@ void rspamd_cryptobox_hash (guchar *out, | |||
/* Non crypto hash IUF interface */ | |||
typedef struct RSPAMD_ALIGNED(32) rspamd_cryptobox_fast_hash_state_s { | |||
unsigned char opaque[64]; | |||
unsigned char opaque[64 + sizeof (size_t) + sizeof (uint64_t)]; | |||
} rspamd_cryptobox_fast_hash_state_t; | |||
/** |