From ebc15f59da42f0b01afb74caf844b8d8005d79c7 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sun, 25 Oct 2015 21:36:13 +0000 Subject: [PATCH] Start rework of blake2 implementation. --- src/libcryptobox/CMakeLists.txt | 4 +- src/libcryptobox/blake2/blake2-internal.h | 37 +++ src/libcryptobox/blake2/blake2.c | 292 ++++++++++++++++++++++ src/libcryptobox/blake2/blake2.h | 74 ++++++ src/libcryptobox/blake2/ref.c | 185 ++++++++++++++ 5 files changed, 591 insertions(+), 1 deletion(-) create mode 100644 src/libcryptobox/blake2/blake2-internal.h create mode 100644 src/libcryptobox/blake2/blake2.c create mode 100644 src/libcryptobox/blake2/blake2.h create mode 100644 src/libcryptobox/blake2/ref.c diff --git a/src/libcryptobox/CMakeLists.txt b/src/libcryptobox/CMakeLists.txt index 3579ebc1a..9cb7ecbe7 100644 --- a/src/libcryptobox/CMakeLists.txt +++ b/src/libcryptobox/CMakeLists.txt @@ -8,6 +8,8 @@ SET(CHACHASRC ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/chacha.c SET(POLYSRC ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/poly1305.c) SET(SIPHASHSRC ${CMAKE_CURRENT_SOURCE_DIR}/siphash/siphash.c ${CMAKE_CURRENT_SOURCE_DIR}/siphash/ref.c) +SET(BLAKE2SRC ${CMAKE_CURRENT_SOURCE_DIR}/blake2/blake2.c + ${CMAKE_CURRENT_SOURCE_DIR}/blake2/ref.c) SET(CURVESRC ${CMAKE_CURRENT_SOURCE_DIR}/curve25519/ref.c ${CMAKE_CURRENT_SOURCE_DIR}/curve25519/curve25519.c) @@ -81,4 +83,4 @@ INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}") SET(LIBCRYPTOBOXSRC ${CMAKE_CURRENT_SOURCE_DIR}/cryptobox.c) SET(RSPAMD_CRYPTOBOX ${LIBCRYPTOBOXSRC} ${CHACHASRC} ${POLYSRC} ${SIPHASHSRC} - ${CURVESRC} PARENT_SCOPE) \ No newline at end of file + ${CURVESRC} ${BLAKE2SRC} PARENT_SCOPE) \ No newline at end of file diff --git a/src/libcryptobox/blake2/blake2-internal.h b/src/libcryptobox/blake2/blake2-internal.h new file mode 100644 index 000000000..951ef5c7a --- /dev/null +++ b/src/libcryptobox/blake2/blake2-internal.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef RSPAMD_BLAKE2_INTERNAL_H +#define RSPAMD_BLAKE2_INTERNAL_H + +#include "blake2.h" + +typedef struct blake2b_state_internal_t { + unsigned char h[64]; + unsigned char t[16]; + unsigned char f[16]; + size_t leftover; + unsigned char buffer[BLAKE2B_BLOCKBYTES]; +} blake2b_state_internal; + +#endif diff --git a/src/libcryptobox/blake2/blake2.c b/src/libcryptobox/blake2/blake2.c new file mode 100644 index 000000000..f11eb33ef --- /dev/null +++ b/src/libcryptobox/blake2/blake2.c @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * Copyright (c) 2015, Andrew Moon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "cryptobox.h" +#include "platform_config.h" +#include "blake2.h" +#include "blake2-internal.h" + +extern unsigned long cpu_config; + +typedef struct blake2b_impl_t { + unsigned long cpu_flags; + const char *desc; + + void (*blake2b_blocks) (blake2b_state_internal *state, + const unsigned char *in, + size_t bytes, + size_t stride); +} blake2b_impl_t; + +#define BLAKE2B_STRIDE BLAKE2B_BLOCKBYTES +#define BLAKE2B_STRIDE_NONE 0 + +#define BLAKE2B_DECLARE(ext) \ + void blake2b_blocks_##ext(blake2b_state_internal *state, const unsigned char *in, size_t bytes, size_t stride); + +#define BLAKE2B_IMPL(cpuflags, desc, ext) \ + {(cpuflags), desc, blake2b_blocks_##ext} + +#if defined(HAVE_AVX2) +BLAKE2B_DECLARE(avx2) +#define BLAKE2B_AVX2 BLAKE2B_IMPL(CPUID_AVX2, "avx2", avx2) +#endif + +#if defined(HAVE_AVX) +BLAKE2B_DECLARE(avx) +#define BLAKE2B_AVX BLAKE2B_IMPL(CPUID_AVX, "avx", avx) +#endif + +#if defined(CMAKE_ARCH_x86_64) || defined(CMAKE_ARCH_i386) +BLAKE2B_DECLARE(x86) +#define BLAKE2B_X86 BLAKE2B_IMPL(0, "x86", x86) +#endif + +/* the "always runs" version */ +BLAKE2B_DECLARE(ref) +#define BLAKE2B_GENERIC BLAKE2B_IMPL(0, "generic", ref) + + + +/* list implemenations from most optimized to least, with generic as the last entry */ +static const blake2b_impl_t blake2b_list[] = { + /* x86 */ +#if defined(BLAKE2B_AVX2) + BLAKE2B_AVX2, +#endif +#if defined(BLAKE2B_AVX) + BLAKE2B_AVX, +#endif +#if defined(BLAKE2B_X86) + BLAKE2B_X86, +#endif + BLAKE2B_GENERIC +}; + +static const blake2b_impl_t *blake2b_opt = &blake2b_list[0]; + + +/* is the pointer not aligned on a word boundary? */ +static int +blake2b_not_aligned (const void *p) +{ +#if !defined(CPU_8BITS) + return ((size_t) p & (sizeof (size_t) - 1)) != 0; +#else + return 0; +#endif +} + +static const union endian_test_t { + unsigned char b[2]; + unsigned short s; +} blake2b_endian_test = {{1, 0}}; + +/* copy the hash from the internal state */ +static void +blake2b_store_hash (blake2b_state_internal *state, unsigned char *hash) +{ + if (blake2b_endian_test.s == 0x0001) { + memcpy (hash, state->h, 64); + } + else { + size_t i, j; + for (i = 0; i < 8; i++, hash += 8) { + for (j = 0; j < 8; j++) + hash[7 - j] = state->h[(i * 8) + j]; + } + } +} + +static const unsigned char blake2b_init_le[64] = { + 0x08 ^ 0x40, 0xc9 ^ 0x00, 0xbc ^ 0x01, 0xf3 ^ 0x01, 0x67 ^ 0x00, + 0xe6 ^ 0x00, 0x09 ^ 0x00, 0x6a ^ 0x00, + 0x3b, 0xa7, 0xca, 0x84, 0x85, 0xae, 0x67, 0xbb, + 0x2b, 0xf8, 0x94, 0xfe, 0x72, 0xf3, 0x6e, 0x3c, + 0xf1, 0x36, 0x1d, 0x5f, 0x3a, 0xf5, 0x4f, 0xa5, + 0xd1, 0x82, 0xe6, 0xad, 0x7f, 0x52, 0x0e, 0x51, + 0x1f, 0x6c, 0x3e, 0x2b, 0x8c, 0x68, 0x05, 0x9b, + 0x6b, 0xbd, 0x41, 0xfb, 0xab, 0xd9, 0x83, 0x1f, + 0x79, 0x21, 0x7e, 0x13, 0x19, 0xcd, 0xe0, 0x5b, +}; + +/* initialize the state in serial mode */ +void +blake2b_init (blake2b_state *S) +{ + blake2b_state_internal *state = (blake2b_state_internal *) S; + /* assume state is fully little endian for now */ + memcpy (state, blake2b_init_le, 64); + /*memcpy(state, (blake2b_endian_test.s == 1) ? blake2b_init_le : blake2b_init_be, 64);*/ + memset (state->t, + 0, + sizeof (state->t) + sizeof (state->f) + sizeof (state->leftover)); +} + +/* initialized the state in serial-key'd mode */ +void +blake2b_keyed_init (blake2b_state *S, const unsigned char *key, size_t keylen) +{ + unsigned char k[BLAKE2B_BLOCKBYTES] = {0}; + if (keylen > BLAKE2B_KEYBYTES) { + fprintf (stderr, + "key size larger than %u passed to blake2b_keyed_init", + BLAKE2B_KEYBYTES); + exit (-1); + } + else { + memcpy (k, key, keylen); + } + blake2b_init (S); + blake2b_update (S, k, BLAKE2B_BLOCKBYTES); +} + +/* hash inlen bytes from in, which may or may not be word aligned, returns the number of bytes used */ +static size_t +blake2b_consume_blocks (blake2b_state_internal *state, + const unsigned char *in, + size_t inlen) +{ + /* always need to leave at least BLAKE2B_BLOCKBYTES in case this is the final block */ + if (inlen <= BLAKE2B_BLOCKBYTES) + return 0; + + inlen = ((inlen - 1) & ~(BLAKE2B_BLOCKBYTES - 1)); + if (blake2b_not_aligned (in)) { + /* copy the unaligned data to an aligned buffer and process in chunks */ + unsigned char buffer[16 * BLAKE2B_BLOCKBYTES]; + size_t left = inlen; + while (left) { + const size_t bytes = (left > sizeof (buffer)) ? sizeof (buffer) + : left; + memcpy (buffer, in, bytes); + blake2b_opt->blake2b_blocks (state, buffer, bytes, BLAKE2B_STRIDE); + in += bytes; + left -= bytes; + } + } + else { + /* word aligned, handle directly */ + blake2b_opt->blake2b_blocks (state, in, inlen, BLAKE2B_STRIDE); + } + + return inlen; +} + +/* update the hash state with inlen bytes from in */ +void +blake2b_update (blake2b_state *S, const unsigned char *in, size_t inlen) +{ + blake2b_state_internal *state = (blake2b_state_internal *) S; + size_t bytes; + + /* blake2b processes the final <=BLOCKBYTES bytes raw, so we can only update if there are at least BLOCKBYTES+1 bytes available */ + if ((state->leftover + inlen) > BLAKE2B_BLOCKBYTES) { + /* handle the previous data, we know there is enough for at least one block */ + if (state->leftover) { + bytes = (BLAKE2B_BLOCKBYTES - state->leftover); + memcpy (state->buffer + state->leftover, in, bytes); + in += bytes; + inlen -= bytes; + state->leftover = 0; + blake2b_opt->blake2b_blocks (state, + state->buffer, + BLAKE2B_BLOCKBYTES, + BLAKE2B_STRIDE_NONE); + } + + /* handle the direct data (if any) */ + bytes = blake2b_consume_blocks (state, in, inlen); + inlen -= bytes; + in += bytes; + } + + /* handle leftover data */ + memcpy (state->buffer + state->leftover, in, inlen); + state->leftover += inlen; +} + +/* finalize the hash */ +void +blake2b_final (blake2b_state *S, unsigned char *hash) +{ + blake2b_state_internal *state = (blake2b_state_internal *) S; + memset (&state->f[0], 0xff, 8); + blake2b_opt->blake2b_blocks (state, + state->buffer, + state->leftover, + BLAKE2B_STRIDE_NONE); + blake2b_store_hash (state, hash); +} + +/* one-shot hash inlen bytes from in */ +void +blake2b (unsigned char *hash, const unsigned char *in, size_t inlen) +{ + blake2b_state S; + blake2b_state_internal *state = (blake2b_state_internal *) &S; + size_t bytes; + + blake2b_init (&S); + + /* hash until <= 128 bytes left */ + bytes = blake2b_consume_blocks (state, in, inlen); + in += bytes; + inlen -= bytes; + + /* final block */ + memset (&state->f[0], 0xff, 8); + blake2b_opt->blake2b_blocks (state, in, inlen, BLAKE2B_STRIDE_NONE); + blake2b_store_hash (state, hash); +} + +void +blake2b_keyed (unsigned char *hash, + const unsigned char *in, + size_t inlen, + const unsigned char *key, + size_t keylen) +{ + blake2b_state S; + blake2b_keyed_init (&S, key, keylen); + blake2b_update (&S, in, inlen); + blake2b_final (&S, hash); +} + +void +blake2b_load (void) +{ + guint i; + + if (cpu_config != 0) { + for (i = 0; i < G_N_ELEMENTS (blake2b_list); i++) { + if (blake2b_list[i].cpu_flags & cpu_config) { + blake2b_opt = &blake2b_list[i]; + break; + } + } + } +} \ No newline at end of file diff --git a/src/libcryptobox/blake2/blake2.h b/src/libcryptobox/blake2/blake2.h new file mode 100644 index 000000000..abc0f1a9c --- /dev/null +++ b/src/libcryptobox/blake2/blake2.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RSPAMD_BLAKE2_H +#define RSPAMD_BLAKE2_H + +#if defined(__cplusplus) +extern "C" { +#endif + +enum blake2b_constant { + BLAKE2B_BLOCKBYTES = 128, + BLAKE2B_OUTBYTES = 64, + BLAKE2B_KEYBYTES = 64, + BLAKE2B_SALTBYTES = 16, + BLAKE2B_PERSONALBYTES = 16 +}; + +typedef struct blake2b_state_t { + unsigned char opaque[256]; +} blake2b_state; + +/* incremental */ +void blake2b_init (blake2b_state *S); + +void blake2b_keyed_init (blake2b_state *S, + const unsigned char *key, + size_t keylen); + +void blake2b_update (blake2b_state *S, + const unsigned char *in, + size_t inlen); + +void blake2b_final (blake2b_state *S, unsigned char *hash); + +/* one-shot */ +void blake2b (unsigned char *hash, + const unsigned char *in, + size_t inlen); + +void blake2b_keyed (unsigned char *hash, + const unsigned char *in, + size_t inlen, + const unsigned char *key, + size_t keylen); + +int blake2b_startup (void); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/libcryptobox/blake2/ref.c b/src/libcryptobox/blake2/ref.c new file mode 100644 index 000000000..15b74351b --- /dev/null +++ b/src/libcryptobox/blake2/ref.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * Copyright (c) 2015, Andrew Moon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "blake2.h" +#include "blake2-internal.h" + +typedef uint64_t blake2b_uint64; + +static const unsigned char blake2b_sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3} +}; + +static blake2b_uint64 +ROTR64 (blake2b_uint64 x, int k) +{ + return ((x >> k) | (x << (64 - k))); +} + +static blake2b_uint64 +U8TO64 (const unsigned char *p) +{ + return + ((blake2b_uint64) p[0]) | + ((blake2b_uint64) p[1] << 8) | + ((blake2b_uint64) p[2] << 16) | + ((blake2b_uint64) p[3] << 24) | + ((blake2b_uint64) p[4] << 32) | + ((blake2b_uint64) p[5] << 40) | + ((blake2b_uint64) p[6] << 48) | + ((blake2b_uint64) p[7] << 56); +} + +static void +U64TO8 (unsigned char *p, blake2b_uint64 v) +{ + p[0] = (v) & 0xff; + p[1] = (v >> 8) & 0xff; + p[2] = (v >> 16) & 0xff; + p[3] = (v >> 24) & 0xff; + p[4] = (v >> 32) & 0xff; + p[5] = (v >> 40) & 0xff; + p[6] = (v >> 48) & 0xff; + p[7] = (v >> 56) & 0xff; +} + +static void +blake2b_blocks_ref (blake2b_state_internal *S, + const unsigned char *in, + size_t bytes, + size_t stride) +{ + const blake2b_uint64 f0 = U8TO64 (&S->f[0]); + const blake2b_uint64 f1 = U8TO64 (&S->f[8]); + + const blake2b_uint64 w8 = 0x6a09e667f3bcc908ull; + const blake2b_uint64 w9 = 0xbb67ae8584caa73bull; + const blake2b_uint64 w10 = 0x3c6ef372fe94f82bull; + const blake2b_uint64 w11 = 0xa54ff53a5f1d36f1ull; + const blake2b_uint64 w12 = 0x510e527fade682d1ull; + const blake2b_uint64 w13 = 0x9b05688c2b3e6c1full; + const blake2b_uint64 w14 = 0x1f83d9abfb41bd6bull ^f0; + const blake2b_uint64 w15 = 0x5be0cd19137e2179ull ^f1; + + const size_t inc = (bytes >= 128) ? 128 : bytes; + + blake2b_uint64 t0 = U8TO64 (&S->t[0]); + blake2b_uint64 t1 = U8TO64 (&S->t[8]); + + blake2b_uint64 h[8]; + blake2b_uint64 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; + unsigned char buffer[128]; + + size_t i; + + if (f0) { + memset (buffer, 0, sizeof (buffer)); + memcpy (buffer, in, bytes); + in = buffer; + } + + for (i = 0; i < 8; i++) + h[i] = U8TO64 (&S->h[i * 8]); + + while (1) { + blake2b_uint64 m[16]; + + t0 += inc; + if (t0 < inc) + t1 += 1; + + for (i = 0; i < 16; i++) + m[i] = U8TO64 (in + (i * 8)); + + v0 = h[0]; + v1 = h[1]; + v2 = h[2]; + v3 = h[3]; + v4 = h[4]; + v5 = h[5]; + v6 = h[6]; + v7 = h[7]; + v8 = w8; + v9 = w9; + v10 = w10; + v11 = w11; + v12 = w12 ^ t0; + v13 = w13 ^ t1; + v14 = w14; + v15 = w15; + +#define G(r, x, a, b, c, d) \ + a += b + m[blake2b_sigma[r][2*x+0]]; \ + d = ROTR64(d ^ a, 32); \ + c += d; \ + b = ROTR64(b ^ c, 24); \ + a += b + m[blake2b_sigma[r][2*x+1]]; \ + d = ROTR64(d ^ a, 16); \ + c += d; \ + b = ROTR64(b ^ c, 63); + + for (i = 0; i < 12; i++) { + G(i, 0, v0, v4, v8, v12); + G(i, 1, v1, v5, v9, v13); + G(i, 2, v2, v6, v10, v14); + G(i, 3, v3, v7, v11, v15); + G(i, 4, v0, v5, v10, v15); + G(i, 5, v1, v6, v11, v12); + G(i, 6, v2, v7, v8, v13); + G(i, 7, v3, v4, v9, v14); + } + + h[0] ^= (v0 ^ v8); + h[1] ^= (v1 ^ v9); + h[2] ^= (v2 ^ v10); + h[3] ^= (v3 ^ v11); + h[4] ^= (v4 ^ v12); + h[5] ^= (v5 ^ v13); + h[6] ^= (v6 ^ v14); + h[7] ^= (v7 ^ v15); + + if (bytes <= 128) + break; + in += stride; + bytes -= 128; + } + + for (i = 0; i < 8; i++) + U64TO8 (&S->h[i * 8], h[i]); + U64TO8 (&S->t[0], t0); + U64TO8 (&S->t[8], t1); +} -- 2.39.5