From: Vsevolod Stakhov Date: Fri, 6 Feb 2015 15:04:31 +0000 (+0000) Subject: Add chacha20 driver. X-Git-Tag: 0.9.0~758 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=b14cc5bc89dc9d925e1de775ed666354a3672b2b;p=rspamd.git Add chacha20 driver. --- diff --git a/src/libcryptobox/chacha20/chacha.c b/src/libcryptobox/chacha20/chacha.c new file mode 100644 index 000000000..caeba8edc --- /dev/null +++ b/src/libcryptobox/chacha20/chacha.c @@ -0,0 +1,260 @@ +/* Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "cryptobox.h" +#include "chacha.h" +#include "platform_config.h" + +extern unsigned long cpu_config; + +typedef struct chacha_impl_t { + unsigned long cpu_flags; + const char *desc; + void (*chacha) (const chacha_key *key, const chacha_iv *iv, + const unsigned char *in, unsigned char *out, size_t inlen, + size_t rounds); + void (*xchacha) (const chacha_key *key, const chacha_iv24 *iv, + const unsigned char *in, unsigned char *out, size_t inlen, + size_t rounds); + void (*chacha_blocks) (chacha_state_internal *state, + const unsigned char *in, unsigned char *out, size_t bytes); + void (*hchacha) (const unsigned char key[32], const unsigned char iv[16], + unsigned char out[32], size_t rounds); +} chacha_impl_t; + +#define CHACHA_DECLARE(ext) \ + void chacha_##ext(const chacha_key *key, const chacha_iv *iv, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds); \ + void xchacha_##ext(const chacha_key *key, const chacha_iv24 *iv, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds); \ + void chacha_blocks_##ext(chacha_state_internal *state, const unsigned char *in, unsigned char *out, size_t bytes); \ + void hchacha_##ext(const unsigned char key[32], const unsigned char iv[16], unsigned char out[32], size_t rounds); +#define CHACHA_IMPL(cpuflags, desc, ext) \ + {(cpuflags), desc, chacha_##ext, xchacha_##ext, chacha_blocks_##ext, hchacha_##ext} + +#if defined(HAVE_AVX2) + CHACHA_DECLARE(avx2) + #define CHACHA_AVX2 CHACHA_IMPL(CPUID_AVX2, "avx2", avx2) +#endif +#if defined(HAVE_AVX) + CHACHA_DECLARE(avx) + #define CHACHA_AVX CHACHA_IMPL(CPUID_AVX, "avx", avx) +#endif +#if defined(HAVE_SSE2) + CHACHA_DECLARE(sse2) + #define CHACHA_SSE2 CHACHA_IMPL(CPUID_SSE2, "sse2", sse2) +#endif + +CHACHA_DECLARE(ref) +#define CHACHA_GENERIC CHACHA_IMPL(0, "generic", ref) + +static const chacha_impl_t chacha_list[] = { + CHACHA_GENERIC, +#if defined(CHACHA_AVX2) + CHACHA_AVX2, +#endif +#if defined(CHACHA_AVX) + CHACHA_AVX, +#endif +#if defined(CHACHA_SSE2) + CHACHA_SSE2 +#endif +}; + +static const chacha_impl_t *chacha_impl = &chacha_list[0]; + +static int +chacha_is_aligned (const void *p) +{ + return ((size_t) p & (sizeof(size_t) - 1)) == 0; +} + +void +chacha_load (void) +{ + guint i; + + if (cpu_config != 0) { + for (i = 0; i < G_N_ELEMENTS (chacha_list); i ++) { + if (chacha_list[i].cpu_flags & cpu_config) { + chacha_impl = &chacha_list[i]; + break; + } + } + } +} + +void chacha_init (chacha_state *S, const chacha_key *key, + const chacha_iv *iv, size_t rounds) +{ + chacha_state_internal *state = (chacha_state_internal *) S; + memcpy (state->s + 0, key, 32); + memset (state->s + 32, 0, 8); + memcpy (state->s + 40, iv, 8); + state->rounds = rounds; + state->leftover = 0; +} + +/* processes inlen bytes (can do partial blocks), handling input/ouput alignment */ +static void +chacha_consume (chacha_state_internal *state, + const unsigned char *in, unsigned char *out, size_t inlen) +{ + unsigned char buffer[16 * CHACHA_BLOCKBYTES]; + int in_aligned, out_aligned; + + /* it's ok to call with 0 bytes */ + if (!inlen) + return; + + /* if everything is aligned, handle directly */ + in_aligned = chacha_is_aligned (in); + out_aligned = chacha_is_aligned (out); + if (in_aligned && out_aligned) { + chacha_impl->chacha_blocks (state, in, out, inlen); + return; + } + + /* copy the unaligned data to an aligned buffer and process in chunks */ + while (inlen) { + const size_t bytes = (inlen > sizeof(buffer)) ? sizeof(buffer) : inlen; + const unsigned char *src = in; + unsigned char *dst = (out_aligned) ? out : buffer; + if (!in_aligned) { + memcpy (buffer, in, bytes); + src = buffer; + } + chacha_impl->chacha_blocks (state, src, dst, bytes); + if (!out_aligned) + memcpy (out, buffer, bytes); + if (in) + in += bytes; + out += bytes; + inlen -= bytes; + } +} + +/* hchacha */ +void hchacha (const unsigned char key[32], + const unsigned char iv[16], unsigned char out[32], size_t rounds) +{ + chacha_impl->hchacha (key, iv, out, rounds); +} + +/* update, returns number of bytes written to out */ +size_t +chacha_update (chacha_state *S, const unsigned char *in, unsigned char *out, + size_t inlen) +{ + chacha_state_internal *state = (chacha_state_internal *) S; + unsigned char *out_start = out; + size_t bytes; + + /* enough for at least one block? */ + if ((state->leftover + inlen) >= CHACHA_BLOCKBYTES) { + /* handle the previous data */ + if (state->leftover) { + bytes = (CHACHA_BLOCKBYTES - state->leftover); + if (in) { + memcpy (state->buffer + state->leftover, in, bytes); + in += bytes; + } + chacha_consume (state, (in) ? state->buffer : NULL, out, + CHACHA_BLOCKBYTES); + inlen -= bytes; + out += CHACHA_BLOCKBYTES; + state->leftover = 0; + } + + /* handle the direct data */ + bytes = (inlen & ~(CHACHA_BLOCKBYTES - 1)); + if (bytes) { + chacha_consume (state, in, out, bytes); + inlen -= bytes; + if (in) + in += bytes; + out += bytes; + } + } + + /* handle leftover data */ + if (inlen) { + if (in) + memcpy (state->buffer + state->leftover, in, inlen); + else + memset (state->buffer + state->leftover, 0, inlen); + state->leftover += inlen; + } + + return out - out_start; +} + +/* finalize, write out any leftover data */ +size_t +chacha_final (chacha_state *S, unsigned char *out) +{ + chacha_state_internal *state = (chacha_state_internal *) S; + size_t leftover = state->leftover; + if (leftover) { + if (chacha_is_aligned (out)) { + chacha_impl->chacha_blocks (state, state->buffer, out, leftover); + } + else { + chacha_impl->chacha_blocks (state, state->buffer, state->buffer, + leftover); + memcpy (out, state->buffer, leftover); + } + } + rspamd_explicit_memzero (S, sizeof(chacha_state)); + return leftover; +} + +/* one-shot, input/output assumed to be word aligned */ +void +chacha (const chacha_key *key, const chacha_iv *iv, + const unsigned char *in, unsigned char *out, size_t inlen, + size_t rounds) +{ + chacha_impl->chacha (key, iv, in, out, inlen, rounds); +} + +/* + xchacha, chacha with a 192 bit nonce + */ + +void +xchacha_init (chacha_state *S, const chacha_key *key, + const chacha_iv24 *iv, size_t rounds) +{ + chacha_key subkey; + hchacha (key->b, iv->b, subkey.b, rounds); + chacha_init (S, &subkey, (chacha_iv *) (iv->b + 16), rounds); +} + +/* one-shot, input/output assumed to be word aligned */ +void +xchacha (const chacha_key *key, const chacha_iv24 *iv, + const unsigned char *in, unsigned char *out, size_t inlen, + size_t rounds) +{ + chacha_impl->xchacha (key, iv, in, out, inlen, rounds); +} diff --git a/src/libcryptobox/chacha20/chacha.h b/src/libcryptobox/chacha20/chacha.h new file mode 100644 index 000000000..cd2a63bd9 --- /dev/null +++ b/src/libcryptobox/chacha20/chacha.h @@ -0,0 +1,79 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Andrew Moon, Vsevolod Stakhov + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + + +#ifndef CHACHA_H_ +#define CHACHA_H_ + +enum chacha_constants { + CHACHA_BLOCKBYTES = 64, +}; + +typedef struct chacha_state_internal_t { + unsigned char s[48]; + size_t rounds; + size_t leftover; + unsigned char buffer[CHACHA_BLOCKBYTES]; +} chacha_state_internal; + +typedef struct chacha_state_t { + unsigned char opaque[128]; +} chacha_state; + +typedef struct chacha_key_t { + unsigned char b[32]; +} chacha_key; + +typedef struct chacha_iv_t { + unsigned char b[8]; +} chacha_iv; + +typedef struct chacha_iv24_t { + unsigned char b[24]; +} chacha_iv24; + +void hchacha (const unsigned char key[32], const unsigned char iv[16], + unsigned char out[32], size_t rounds); + +void chacha_init (chacha_state *S, const chacha_key *key, const chacha_iv *iv, + size_t rounds); + +void xchacha_init (chacha_state *S, const chacha_key *key, + const chacha_iv24 *iv, size_t rounds); + +size_t chacha_update (chacha_state *S, const unsigned char *in, + unsigned char *out, size_t inlen); + +size_t chacha_final (chacha_state *S, unsigned char *out); + +void chacha (const chacha_key *key, const chacha_iv *iv, + const unsigned char *in, unsigned char *out, size_t inlen, + size_t rounds); +void xchacha (const chacha_key *key, const chacha_iv24 *iv, + const unsigned char *in, unsigned char *out, size_t inlen, + size_t rounds); + +void chacha_load (void); + +#endif /* CHACHA_H_ */ diff --git a/src/libcryptobox/chacha20/ref.c b/src/libcryptobox/chacha20/ref.c new file mode 100644 index 000000000..bd6a44a82 --- /dev/null +++ b/src/libcryptobox/chacha20/ref.c @@ -0,0 +1,262 @@ +#include "config.h" +#include "chacha.h" +#include "cryptobox.h" + +#if defined(HAVE_INT32) +typedef uint32_t chacha_int32; +#else +typedef guint32 chacha_int32; +#endif + +/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */ +static chacha_int32 +U8TO32(const unsigned char *p) { + return + (((chacha_int32)(p[0]) ) | + ((chacha_int32)(p[1]) << 8) | + ((chacha_int32)(p[2]) << 16) | + ((chacha_int32)(p[3]) << 24)); +} + +/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */ +static void +U32TO8(unsigned char *p, chacha_int32 v) { + p[0] = (v ) & 0xff; + p[1] = (v >> 8) & 0xff; + p[2] = (v >> 16) & 0xff; + p[3] = (v >> 24) & 0xff; +} + +/* 32 bit left rotate */ +static chacha_int32 +ROTL32(chacha_int32 x, int k) { + return ((x << k) | (x >> (32 - k))) & 0xffffffff; +} + +/* "expand 32-byte k", as 4 little endian 32-bit unsigned integers */ +static const chacha_int32 chacha_constants[4] = { + 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 +}; + +void +chacha_blocks_ref(chacha_state_internal *state, const unsigned char *in, unsigned char *out, size_t bytes) { + chacha_int32 x[16], j[12]; + chacha_int32 t; + unsigned char *ctarget = out, tmp[64]; + size_t i, r; + + if (!bytes) return; + + j[0] = U8TO32(state->s + 0); + j[1] = U8TO32(state->s + 4); + j[2] = U8TO32(state->s + 8); + j[3] = U8TO32(state->s + 12); + j[4] = U8TO32(state->s + 16); + j[5] = U8TO32(state->s + 20); + j[6] = U8TO32(state->s + 24); + j[7] = U8TO32(state->s + 28); + j[8] = U8TO32(state->s + 32); + j[9] = U8TO32(state->s + 36); + j[10] = U8TO32(state->s + 40); + j[11] = U8TO32(state->s + 44); + + r = state->rounds; + + for (;;) { + if (bytes < 64) { + if (in) { + for (i = 0; i < bytes; i++) tmp[i] = in[i]; + in = tmp; + } + ctarget = out; + out = tmp; + } + + x[0] = chacha_constants[0]; + x[1] = chacha_constants[1]; + x[2] = chacha_constants[2]; + x[3] = chacha_constants[3]; + x[4] = j[0]; + x[5] = j[1]; + x[6] = j[2]; + x[7] = j[3]; + x[8] = j[4]; + x[9] = j[5]; + x[10] = j[6]; + x[11] = j[7]; + x[12] = j[8]; + x[13] = j[9]; + x[14] = j[10]; + x[15] = j[11]; + + #define quarter(a,b,c,d) \ + a += b; t = d^a; d = ROTL32(t,16); \ + c += d; t = b^c; b = ROTL32(t,12); \ + a += b; t = d^a; d = ROTL32(t, 8); \ + c += d; t = b^c; b = ROTL32(t, 7); + + #define doubleround() \ + quarter( x[0], x[4], x[8],x[12]) \ + quarter( x[1], x[5], x[9],x[13]) \ + quarter( x[2], x[6],x[10],x[14]) \ + quarter( x[3], x[7],x[11],x[15]) \ + quarter( x[0], x[5],x[10],x[15]) \ + quarter( x[1], x[6],x[11],x[12]) \ + quarter( x[2], x[7], x[8],x[13]) \ + quarter( x[3], x[4], x[9],x[14]) + + i = r; + do { + doubleround() + i -= 2; + } while (i); + + x[0] += chacha_constants[0]; + x[1] += chacha_constants[1]; + x[2] += chacha_constants[2]; + x[3] += chacha_constants[3]; + x[4] += j[0]; + x[5] += j[1]; + x[6] += j[2]; + x[7] += j[3]; + x[8] += j[4]; + x[9] += j[5]; + x[10] += j[6]; + x[11] += j[7]; + x[12] += j[8]; + x[13] += j[9]; + x[14] += j[10]; + x[15] += j[11]; + + if (in) { + U32TO8(out + 0, x[0] ^ U8TO32(in + 0)); + U32TO8(out + 4, x[1] ^ U8TO32(in + 4)); + U32TO8(out + 8, x[2] ^ U8TO32(in + 8)); + U32TO8(out + 12, x[3] ^ U8TO32(in + 12)); + U32TO8(out + 16, x[4] ^ U8TO32(in + 16)); + U32TO8(out + 20, x[5] ^ U8TO32(in + 20)); + U32TO8(out + 24, x[6] ^ U8TO32(in + 24)); + U32TO8(out + 28, x[7] ^ U8TO32(in + 28)); + U32TO8(out + 32, x[8] ^ U8TO32(in + 32)); + U32TO8(out + 36, x[9] ^ U8TO32(in + 36)); + U32TO8(out + 40, x[10] ^ U8TO32(in + 40)); + U32TO8(out + 44, x[11] ^ U8TO32(in + 44)); + U32TO8(out + 48, x[12] ^ U8TO32(in + 48)); + U32TO8(out + 52, x[13] ^ U8TO32(in + 52)); + U32TO8(out + 56, x[14] ^ U8TO32(in + 56)); + U32TO8(out + 60, x[15] ^ U8TO32(in + 60)); + in += 64; + } else { + U32TO8(out + 0, x[0]); + U32TO8(out + 4, x[1]); + U32TO8(out + 8, x[2]); + U32TO8(out + 12, x[3]); + U32TO8(out + 16, x[4]); + U32TO8(out + 20, x[5]); + U32TO8(out + 24, x[6]); + U32TO8(out + 28, x[7]); + U32TO8(out + 32, x[8]); + U32TO8(out + 36, x[9]); + U32TO8(out + 40, x[10]); + U32TO8(out + 44, x[11]); + U32TO8(out + 48, x[12]); + U32TO8(out + 52, x[13]); + U32TO8(out + 56, x[14]); + U32TO8(out + 60, x[15]); + } + + /* increment the 64 bit counter, split in to two 32 bit halves */ + j[8]++; + if (!j[8]) + j[9]++; + + if (bytes <= 64) { + if (bytes < 64) for (i = 0; i < bytes; i++) ctarget[i] = out[i]; + + /* store the counter back to the state */ + U32TO8(state->s + 32, j[8]); + U32TO8(state->s + 36, j[9]); + goto cleanup; + } + bytes -= 64; + out += 64; + } + +cleanup: + rspamd_explicit_memzero(j, sizeof(j)); +} + +void +hchacha_ref(const unsigned char key[32], const unsigned char iv[16], unsigned char out[32], size_t rounds) { + chacha_int32 x[16]; + chacha_int32 t; + + x[0] = chacha_constants[0]; + x[1] = chacha_constants[1]; + x[2] = chacha_constants[2]; + x[3] = chacha_constants[3]; + x[4] = U8TO32(key + 0); + x[5] = U8TO32(key + 4); + x[6] = U8TO32(key + 8); + x[7] = U8TO32(key + 12); + x[8] = U8TO32(key + 16); + x[9] = U8TO32(key + 20); + x[10] = U8TO32(key + 24); + x[11] = U8TO32(key + 28); + x[12] = U8TO32(iv + 0); + x[13] = U8TO32(iv + 4); + x[14] = U8TO32(iv + 8); + x[15] = U8TO32(iv + 12); + + do { + doubleround() + rounds -= 2; + } while (rounds); + + /* indices for the chacha constant */ + U32TO8(out + 0, x[0]); + U32TO8(out + 4, x[1]); + U32TO8(out + 8, x[2]); + U32TO8(out + 12, x[3]); + + /* indices for the iv */ + U32TO8(out + 16, x[12]); + U32TO8(out + 20, x[13]); + U32TO8(out + 24, x[14]); + U32TO8(out + 28, x[15]); +} + +void +chacha_clear_state_ref(chacha_state_internal *state) { + void * (* volatile safe_memset)(void *s, int c, size_t n) = memset; + safe_memset(state, 0, 48); +} + +void +chacha_ref(const chacha_key *key, const chacha_iv *iv, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds) { + chacha_state_internal state; + size_t i; + for (i = 0; i < 32; i++) + state.s[i + 0] = key->b[i]; + for (i = 0; i < 8; i++) + state.s[i + 32] = 0; + for (i = 0; i < 8; i++) + state.s[i + 40] = iv->b[i]; + state.rounds = rounds; + chacha_blocks_ref(&state, in, out, inlen); + chacha_clear_state_ref(&state); +} + +void +xchacha_ref(const chacha_key *key, const chacha_iv24 *iv, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds) { + chacha_state_internal state; + size_t i; + hchacha_ref(key->b, iv->b, &state.s[0], rounds); + for (i = 0; i < 8; i++) + state.s[i + 32] = 0; + for (i = 0; i < 8; i++) + state.s[i + 40] = iv->b[i + 16]; + state.rounds = rounds; + chacha_blocks_ref(&state, in, out, inlen); + chacha_clear_state_ref(&state); +}