diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-07-08 17:41:31 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-07-08 17:41:31 +0100 |
commit | 9026da71bb262886a275a3e24b1db51ab3395240 (patch) | |
tree | 6050f1a73472b3251b9fd18db7e8c96a7cca276c /src/libcryptobox | |
parent | c271eb36656a4ff88a9c8c1d59934949260275a3 (diff) | |
download | rspamd-9026da71bb262886a275a3e24b1db51ab3395240.tar.gz rspamd-9026da71bb262886a275a3e24b1db51ab3395240.zip |
[Rework] Use libsodium instead of hand crafted crypto implementations
Diffstat (limited to 'src/libcryptobox')
42 files changed, 157 insertions, 18989 deletions
diff --git a/src/libcryptobox/CMakeLists.txt b/src/libcryptobox/CMakeLists.txt index 82a477a8a..8ab390fa1 100644 --- a/src/libcryptobox/CMakeLists.txt +++ b/src/libcryptobox/CMakeLists.txt @@ -4,17 +4,7 @@ TARGET_ARCHITECTURE(ARCH) SET(CHACHASRC ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/chacha.c ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/ref.c) -SET(POLYSRC ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/poly1305.c) -SET(SIPHASHSRC ${CMAKE_CURRENT_SOURCE_DIR}/siphash/siphash.c - ${CMAKE_CURRENT_SOURCE_DIR}/siphash/ref.c) -SET(BLAKE2SRC ${CMAKE_CURRENT_SOURCE_DIR}/blake2/blake2.c - ${CMAKE_CURRENT_SOURCE_DIR}/blake2/ref.c) -SET(CURVESRC ${CMAKE_CURRENT_SOURCE_DIR}/curve25519/ref.c - ${CMAKE_CURRENT_SOURCE_DIR}/curve25519/curve25519.c) - -SET(EDSRC ${CMAKE_CURRENT_SOURCE_DIR}/ed25519/ref.c - ${CMAKE_CURRENT_SOURCE_DIR}/ed25519/ed25519.c) SET(BASE64SRC ${CMAKE_CURRENT_SOURCE_DIR}/base64/ref.c ${CMAKE_CURRENT_SOURCE_DIR}/base64/base64.c) @@ -105,51 +95,21 @@ int main(int argc, char** argv) { ASM_OP(HAVE_SSE41 "sse41") SET(ASM_CODE "crc32 %eax, %eax") ASM_OP(HAVE_SSE42 "sse42") - - if ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") - SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/ref-64.c) - SET(CURVESRC ${CURVESRC} ${CMAKE_CURRENT_SOURCE_DIR}/curve25519/curve25519-donna-c64.c) - else() - SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/ref-32.c) - SET(CURVESRC ${CURVESRC} ${CMAKE_CURRENT_SOURCE_DIR}/curve25519/curve25519-donna.c) - endif() - - SET(BLAKE2SRC ${BLAKE2SRC} ${CMAKE_CURRENT_SOURCE_DIR}/blake2/x86-64.S) -ELSEIF("${ARCH}" STREQUAL "i386") - IF(NOT HAVE_SLASHMACRO AND NOT HAVE_DOLLARMACRO) - MESSAGE(FATAL_ERROR "Your assembler cannot compile macros, please check your CMakeFiles/CMakeError.log") - ENDIF() - SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/ref-32.c) - SET(CURVESRC ${CURVESRC} ${CMAKE_CURRENT_SOURCE_DIR}/curve25519/curve25519-donna.c) - SET(BLAKE2SRC ${BLAKE2SRC} ${CMAKE_CURRENT_SOURCE_DIR}/blake2/x86-32.S) -ELSE() - SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/ref-32.c) ENDIF() IF(HAVE_AVX2) SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/avx2.S) - SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/avx2.S) - SET(SIPHASHSRC ${SIPHASHSRC} ${CMAKE_CURRENT_SOURCE_DIR}/siphash/avx2.S) SET(BASE64SRC ${BASE64SRC} ${CMAKE_CURRENT_SOURCE_DIR}/base64/avx2.c) MESSAGE(STATUS "AVX2 support is added") ENDIF(HAVE_AVX2) IF(HAVE_AVX) SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/avx.S) - SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/avx.S) - SET(CURVESRC ${CURVESRC} ${CMAKE_CURRENT_SOURCE_DIR}/curve25519/avx.S - ${CMAKE_CURRENT_SOURCE_DIR}/curve25519/avx.c) - SET(BLAKE2SRC ${BLAKE2SRC} ${CMAKE_CURRENT_SOURCE_DIR}/blake2/avx.S) MESSAGE(STATUS "AVX support is added") ENDIF(HAVE_AVX) IF(HAVE_SSE2) SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/sse2.S) - SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/sse2.S) MESSAGE(STATUS "SSE2 support is added") ENDIF(HAVE_SSE2) -IF(HAVE_SSE41) - SET(SIPHASHSRC ${SIPHASHSRC} ${CMAKE_CURRENT_SOURCE_DIR}/siphash/sse41.S) - MESSAGE(STATUS "SSE41 support is added") -ENDIF(HAVE_SSE41) IF(HAVE_SSE42) SET(BASE64SRC ${BASE64SRC} ${CMAKE_CURRENT_SOURCE_DIR}/base64/sse42.c) MESSAGE(STATUS "SSE42 support is added") @@ -162,5 +122,4 @@ SET(LIBCRYPTOBOXSRC ${CMAKE_CURRENT_SOURCE_DIR}/cryptobox.c ${CMAKE_CURRENT_SOURCE_DIR}/keypairs_cache.c ${CMAKE_CURRENT_SOURCE_DIR}/catena/catena.c) -SET(RSPAMD_CRYPTOBOX ${LIBCRYPTOBOXSRC} ${CHACHASRC} ${POLYSRC} ${SIPHASHSRC} - ${CURVESRC} ${BLAKE2SRC} ${EDSRC} ${BASE64SRC} PARENT_SCOPE) +SET(RSPAMD_CRYPTOBOX ${LIBCRYPTOBOXSRC} ${CHACHASRC} ${BASE64SRC} PARENT_SCOPE) diff --git a/src/libcryptobox/blake2/avx.S b/src/libcryptobox/blake2/avx.S deleted file mode 100644 index e569f0ba7..000000000 --- a/src/libcryptobox/blake2/avx.S +++ /dev/null @@ -1,689 +0,0 @@ -#include "../macro.S" -#include "constants.S" - -SECTION_TEXT - -GLOBAL_HIDDEN_FN_EXT blake2b_blocks_avx, 4, 16 -pushq %rbp -movq %rsp, %rbp -andq $-64, %rsp -pushq %r12 -pushq %r13 -pushq %r14 -pushq %r15 -pushq %rbx -subq $344, %rsp -LOAD_VAR_PIC 48+blake2b_constants, %r9 -LOAD_VAR_PIC blake2b_constants_ssse3, %rax -leaq 16(%rax), %r8 -vmovdqu 80(%rdi), %xmm0 -cmpq $128, %rdx -vpxor (%r9), %xmm0, %xmm0 -movl $128, %r9d -vmovdqu (%rax), %xmm12 -cmovbe %rdx, %r9 -vmovdqu (%r8), %xmm1 -movq 64(%rdi), %r8 -movq 72(%rdi), %rax -cmpq $0, 80(%rdi) -je blake2b_blocks_avx_L21 -blake2b_blocks_avx_L2: -cmpq $128, %rdx -je blake2b_blocks_avx_L21 -blake2b_blocks_avx_L3: -lea (%rsp), %r10 -testq $64, %rdx -je blake2b_blocks_avx_L5 -blake2b_blocks_avx_L4: -vmovdqu (%rsi), %xmm2 -vmovdqu %xmm2, (%rsp) -lea 64(%rsp), %r10 -vmovdqu 16(%rsi), %xmm3 -vmovdqu %xmm3, 16(%rsp) -vpxor %xmm2, %xmm2, %xmm2 -vmovdqu 32(%rsi), %xmm4 -vmovdqu %xmm4, 32(%rsp) -vmovdqu 48(%rsi), %xmm5 -vmovdqu %xmm5, 48(%rsp) -addq $64, %rsi -jmp blake2b_blocks_avx_L6 -blake2b_blocks_avx_L5: -vpxor %xmm2, %xmm2, %xmm2 -vmovdqu %xmm2, 64(%rsp) -vmovdqu %xmm2, 80(%rsp) -vmovdqu %xmm2, 96(%rsp) -vmovdqu %xmm2, 112(%rsp) -blake2b_blocks_avx_L6: -vmovdqu %xmm2, (%r10) -vmovdqu %xmm2, 16(%r10) -vmovdqu %xmm2, 32(%r10) -vmovdqu %xmm2, 48(%r10) -testq $32, %rdx -je blake2b_blocks_avx_L8 -blake2b_blocks_avx_L7: -vmovdqu (%rsi), %xmm2 -vmovdqu %xmm2, (%r10) -vmovdqu 16(%rsi), %xmm3 -vmovdqu %xmm3, 16(%r10) -addq $32, %rsi -addq $32, %r10 -blake2b_blocks_avx_L8: -testq $16, %rdx -je blake2b_blocks_avx_L10 -blake2b_blocks_avx_L9: -vmovdqu (%rsi), %xmm2 -vmovdqu %xmm2, (%r10) -addq $16, %rsi -addq $16, %r10 -blake2b_blocks_avx_L10: -testq $8, %rdx -je blake2b_blocks_avx_L12 -blake2b_blocks_avx_L11: -movq (%rsi), %r11 -addq $8, %rsi -movq %r11, (%r10) -addq $8, %r10 -blake2b_blocks_avx_L12: -testq $4, %rdx -je blake2b_blocks_avx_L14 -blake2b_blocks_avx_L13: -movl (%rsi), %r11d -addq $4, %rsi -movl %r11d, (%r10) -addq $4, %r10 -blake2b_blocks_avx_L14: -testq $2, %rdx -je blake2b_blocks_avx_L16 -blake2b_blocks_avx_L15: -movzwl (%rsi), %r11d -addq $2, %rsi -movw %r11w, (%r10) -addq $2, %r10 -blake2b_blocks_avx_L16: -testq $1, %rdx -je blake2b_blocks_avx_L18 -blake2b_blocks_avx_L17: -movb (%rsi), %sil -movb %sil, (%r10) -blake2b_blocks_avx_L18: -lea (%rsp), %rsi -blake2b_blocks_avx_L21: -LOAD_VAR_PIC 32+blake2b_constants, %r10 -LOAD_VAR_PIC blake2b_constants, %r11 -vmovdqu (%rdi), %xmm5 -vmovdqu 16(%rdi), %xmm6 -vmovdqu 32(%rdi), %xmm7 -vmovdqu (%r10), %xmm4 -LOAD_VAR_PIC 16+blake2b_constants, %r10 -vmovdqu 48(%rdi), %xmm8 -vmovdqu (%r11), %xmm3 -vmovdqu %xmm3, 176(%rsp) -vmovdqu (%r10), %xmm2 -vmovdqu %xmm2, 160(%rsp) -vmovdqu %xmm4, 144(%rsp) -vmovdqu %xmm8, 240(%rsp) -vmovdqu %xmm7, 256(%rsp) -vmovdqu %xmm6, 224(%rsp) -vmovdqu %xmm5, 208(%rsp) -vmovdqu %xmm0, 192(%rsp) -movq %r9, 272(%rsp) -movq %rdi, 128(%rsp) -movq %rcx, 136(%rsp) -jmp blake2b_blocks_avx_L22 -# align to 31 mod 64 -.p2align 6 -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -blake2b_blocks_avx_L25: -addq 136(%rsp), %rsi -addq $-128, %rdx -blake2b_blocks_avx_L22: -movq 272(%rsp), %rcx -addq %rcx, %r8 -cmpq %rcx, %r8 -lea 1(%rax), %rbx -vmovdqu (%rsi), %xmm15 -vmovdqu 16(%rsi), %xmm5 -vmovdqu 32(%rsi), %xmm3 -vmovdqu 48(%rsi), %xmm6 -cmovb %rbx, %rax -vmovd %r8, %xmm7 -vpunpcklqdq %xmm5, %xmm15, %xmm2 -LOAD_VAR_PIC 96+blake2b_constants, %rcx -vpunpcklqdq %xmm6, %xmm3, %xmm8 -LOAD_VAR_PIC 224+blake2b_constants, %rbx -vpaddq 208(%rsp), %xmm2, %xmm0 -vpaddq 224(%rsp), %xmm8, %xmm10 -vmovd %rax, %xmm14 -vmovdqu 256(%rsp), %xmm4 -vmovdqu 240(%rsp), %xmm11 -vpunpcklqdq %xmm14, %xmm7, %xmm9 -vpaddq %xmm4, %xmm0, %xmm13 -vpaddq %xmm11, %xmm10, %xmm2 -vpxor 144(%rsp), %xmm9, %xmm0 -vpxor 192(%rsp), %xmm2, %xmm10 -vpxor %xmm13, %xmm0, %xmm8 -vpshufd $177, %xmm8, %xmm8 -vpshufd $177, %xmm10, %xmm7 -vpaddq 176(%rsp), %xmm8, %xmm14 -vpaddq 160(%rsp), %xmm7, %xmm9 -vpxor %xmm14, %xmm4, %xmm4 -vpxor %xmm9, %xmm11, %xmm11 -vpshufb %xmm1, %xmm4, %xmm4 -vpshufb %xmm1, %xmm11, %xmm0 -vpunpckhqdq %xmm5, %xmm15, %xmm15 -vpunpckhqdq %xmm6, %xmm3, %xmm6 -vpaddq %xmm15, %xmm13, %xmm13 -vpaddq %xmm6, %xmm2, %xmm6 -vpaddq %xmm4, %xmm13, %xmm10 -vpaddq %xmm0, %xmm6, %xmm15 -vpxor %xmm10, %xmm8, %xmm2 -vpxor %xmm15, %xmm7, %xmm8 -vpshufb %xmm12, %xmm2, %xmm5 -vpshufb %xmm12, %xmm8, %xmm2 -vpaddq %xmm5, %xmm14, %xmm6 -vpaddq %xmm2, %xmm9, %xmm7 -vpxor %xmm6, %xmm4, %xmm4 -vpxor %xmm7, %xmm0, %xmm9 -vpaddq %xmm4, %xmm4, %xmm14 -vpaddq %xmm9, %xmm9, %xmm13 -vpsrlq $63, %xmm4, %xmm0 -vpsrlq $63, %xmm9, %xmm11 -vpor %xmm14, %xmm0, %xmm8 -vpor %xmm13, %xmm11, %xmm4 -vpalignr $8, %xmm8, %xmm4, %xmm0 -vpalignr $8, %xmm4, %xmm8, %xmm14 -vmovdqu 64(%rsi), %xmm9 -vmovdqu 80(%rsi), %xmm8 -vmovdqu 96(%rsi), %xmm4 -vpunpcklqdq %xmm8, %xmm9, %xmm11 -vpaddq %xmm11, %xmm10, %xmm10 -vmovdqu 112(%rsi), %xmm11 -vpaddq %xmm0, %xmm10, %xmm13 -vpunpcklqdq %xmm11, %xmm4, %xmm10 -vpaddq %xmm10, %xmm15, %xmm15 -vpaddq %xmm14, %xmm15, %xmm15 -vpalignr $8, %xmm2, %xmm5, %xmm10 -vpalignr $8, %xmm5, %xmm2, %xmm5 -vpxor %xmm13, %xmm10, %xmm10 -vpxor %xmm15, %xmm5, %xmm2 -vpshufd $177, %xmm10, %xmm10 -vpshufd $177, %xmm2, %xmm2 -vpaddq %xmm10, %xmm7, %xmm7 -vpaddq %xmm2, %xmm6, %xmm5 -vpxor %xmm7, %xmm0, %xmm6 -vpxor %xmm5, %xmm14, %xmm14 -vpshufb %xmm1, %xmm6, %xmm0 -vpshufb %xmm1, %xmm14, %xmm6 -vpunpckhqdq %xmm8, %xmm9, %xmm14 -vpaddq %xmm14, %xmm13, %xmm13 -vpaddq %xmm0, %xmm13, %xmm14 -vpunpckhqdq %xmm11, %xmm4, %xmm13 -vpxor %xmm14, %xmm10, %xmm10 -vpaddq %xmm13, %xmm15, %xmm15 -vpshufb %xmm12, %xmm10, %xmm13 -vpaddq %xmm6, %xmm15, %xmm15 -vpaddq %xmm13, %xmm7, %xmm10 -vpxor %xmm15, %xmm2, %xmm2 -vpxor %xmm10, %xmm0, %xmm0 -vpshufb %xmm12, %xmm2, %xmm2 -vpaddq %xmm2, %xmm5, %xmm5 -vpxor %xmm5, %xmm6, %xmm7 -vpsrlq $63, %xmm0, %xmm6 -vpaddq %xmm0, %xmm0, %xmm0 -vpor %xmm0, %xmm6, %xmm6 -vpsrlq $63, %xmm7, %xmm0 -vpaddq %xmm7, %xmm7, %xmm7 -vpor %xmm7, %xmm0, %xmm0 -vpalignr $8, %xmm0, %xmm6, %xmm7 -vpalignr $8, %xmm6, %xmm0, %xmm6 -vpunpcklqdq %xmm3, %xmm11, %xmm0 -vpaddq %xmm0, %xmm14, %xmm14 -vpaddq %xmm7, %xmm14, %xmm0 -vpunpckhqdq %xmm4, %xmm9, %xmm14 -vpaddq %xmm14, %xmm15, %xmm15 -vpaddq %xmm6, %xmm15, %xmm14 -vpalignr $8, %xmm13, %xmm2, %xmm15 -vpxor %xmm0, %xmm15, %xmm15 -vpshufd $177, %xmm15, %xmm15 -vpalignr $8, %xmm2, %xmm13, %xmm2 -vpxor %xmm14, %xmm2, %xmm13 -vpaddq %xmm15, %xmm5, %xmm2 -vpshufd $177, %xmm13, %xmm13 -vpxor %xmm2, %xmm7, %xmm5 -vpunpcklqdq %xmm9, %xmm8, %xmm7 -vpaddq %xmm13, %xmm10, %xmm10 -vpaddq %xmm7, %xmm0, %xmm9 -vmovdqu 48(%rsi), %xmm0 -vpshufb %xmm1, %xmm5, %xmm5 -vpxor %xmm10, %xmm6, %xmm6 -vpshufb %xmm1, %xmm6, %xmm6 -vpaddq %xmm5, %xmm9, %xmm9 -vpalignr $8, %xmm11, %xmm0, %xmm11 -vpxor %xmm9, %xmm15, %xmm15 -vpaddq %xmm11, %xmm14, %xmm7 -vpshufb %xmm12, %xmm15, %xmm11 -vpaddq %xmm6, %xmm7, %xmm14 -vpaddq %xmm11, %xmm2, %xmm2 -vpxor %xmm14, %xmm13, %xmm13 -vpxor %xmm2, %xmm5, %xmm5 -vpshufb %xmm12, %xmm13, %xmm13 -vpaddq %xmm13, %xmm10, %xmm10 -vpxor %xmm10, %xmm6, %xmm15 -vpsrlq $63, %xmm5, %xmm6 -vpaddq %xmm5, %xmm5, %xmm5 -vpsrlq $63, %xmm15, %xmm7 -vpor %xmm5, %xmm6, %xmm6 -vpaddq %xmm15, %xmm15, %xmm15 -vpor %xmm15, %xmm7, %xmm5 -vpalignr $8, %xmm6, %xmm5, %xmm15 -vpalignr $8, %xmm5, %xmm6, %xmm5 -vpshufd $78, (%rsi), %xmm6 -vpaddq %xmm6, %xmm9, %xmm9 -vpunpckhqdq %xmm3, %xmm8, %xmm3 -vpaddq %xmm3, %xmm14, %xmm6 -vpaddq %xmm15, %xmm9, %xmm9 -vpaddq %xmm5, %xmm6, %xmm8 -vpalignr $8, %xmm13, %xmm11, %xmm3 -vpalignr $8, %xmm11, %xmm13, %xmm11 -vpxor %xmm9, %xmm3, %xmm7 -vpshufd $177, %xmm7, %xmm14 -vpxor %xmm8, %xmm11, %xmm13 -vpshufd $177, %xmm13, %xmm3 -vpaddq %xmm14, %xmm10, %xmm6 -vpaddq %xmm3, %xmm2, %xmm10 -vpxor %xmm6, %xmm15, %xmm2 -vmovdqu 16(%rsi), %xmm15 -vpshufb %xmm1, %xmm2, %xmm7 -vpxor %xmm10, %xmm5, %xmm2 -vpshufb %xmm1, %xmm2, %xmm5 -vpunpcklqdq %xmm15, %xmm4, %xmm4 -vpunpckhqdq %xmm15, %xmm0, %xmm0 -vpaddq %xmm4, %xmm9, %xmm2 -vpaddq %xmm0, %xmm8, %xmm8 -vpaddq %xmm7, %xmm2, %xmm2 -vpaddq %xmm5, %xmm8, %xmm0 -vpxor %xmm2, %xmm14, %xmm15 -vpxor %xmm0, %xmm3, %xmm9 -vpshufb %xmm12, %xmm15, %xmm15 -vpshufb %xmm12, %xmm9, %xmm3 -vpaddq %xmm15, %xmm6, %xmm8 -vpaddq %xmm3, %xmm10, %xmm6 -vpxor %xmm8, %xmm7, %xmm10 -vpxor %xmm6, %xmm5, %xmm5 -vpaddq %xmm5, %xmm5, %xmm9 -vpsrlq $63, %xmm10, %xmm4 -vpsrlq $63, %xmm5, %xmm7 -vpaddq %xmm10, %xmm10, %xmm10 -vpor %xmm10, %xmm4, %xmm13 -vpor %xmm9, %xmm7, %xmm11 -vpalignr $8, %xmm11, %xmm13, %xmm4 -vpalignr $8, %xmm13, %xmm11, %xmm7 -vpalignr $8, %xmm15, %xmm3, %xmm9 -vpalignr $8, %xmm3, %xmm15, %xmm10 -blake2b_blocks_avx_L23: -movzbl (%rcx), %edi -movzbl 2(%rcx), %r9d -movzbl 4(%rcx), %r10d -movzbl 6(%rcx), %r11d -vmovq (%rdi,%rsi), %xmm5 -vpinsrq $1, (%r9,%rsi), %xmm5, %xmm14 -vmovq (%r10,%rsi), %xmm3 -vpinsrq $1, (%r11,%rsi), %xmm3, %xmm15 -vpaddq %xmm14, %xmm2, %xmm2 -vpaddq %xmm15, %xmm0, %xmm0 -vpaddq %xmm4, %xmm2, %xmm2 -vpaddq %xmm7, %xmm0, %xmm0 -vpxor %xmm2, %xmm9, %xmm11 -vpxor %xmm0, %xmm10, %xmm10 -vpshufd $177, %xmm11, %xmm3 -movzbl 1(%rcx), %r12d -movzbl 5(%rcx), %r14d -vpshufd $177, %xmm10, %xmm5 -vpaddq %xmm3, %xmm6, %xmm6 -vpaddq %xmm5, %xmm8, %xmm9 -movzbl 3(%rcx), %r13d -vpxor %xmm6, %xmm4, %xmm14 -movzbl 7(%rcx), %r15d -vpxor %xmm9, %xmm7, %xmm15 -vmovq (%r12,%rsi), %xmm4 -vmovq (%r14,%rsi), %xmm11 -vpinsrq $1, (%r13,%rsi), %xmm4, %xmm7 -vpinsrq $1, (%r15,%rsi), %xmm11, %xmm13 -vpshufb %xmm1, %xmm14, %xmm8 -vpshufb %xmm1, %xmm15, %xmm14 -vpaddq %xmm7, %xmm2, %xmm2 -vpaddq %xmm13, %xmm0, %xmm0 -vpaddq %xmm8, %xmm2, %xmm4 -vpaddq %xmm14, %xmm0, %xmm7 -vpxor %xmm4, %xmm3, %xmm10 -vpxor %xmm7, %xmm5, %xmm3 -vpshufb %xmm12, %xmm10, %xmm11 -vpshufb %xmm12, %xmm3, %xmm10 -vpaddq %xmm11, %xmm6, %xmm13 -vpaddq %xmm10, %xmm9, %xmm9 -movzbl 8(%rcx), %edi -vpxor %xmm13, %xmm8, %xmm8 -movzbl 12(%rcx), %r10d -vpxor %xmm9, %xmm14, %xmm2 -movzbl 10(%rcx), %r9d -vpsrlq $63, %xmm8, %xmm6 -movzbl 14(%rcx), %r11d -vpsrlq $63, %xmm2, %xmm0 -vpaddq %xmm8, %xmm8, %xmm5 -vpaddq %xmm2, %xmm2, %xmm14 -vmovq (%rdi,%rsi), %xmm15 -vpor %xmm5, %xmm6, %xmm8 -vmovq (%r10,%rsi), %xmm3 -vpor %xmm14, %xmm0, %xmm6 -vpinsrq $1, (%r9,%rsi), %xmm15, %xmm5 -vpinsrq $1, (%r11,%rsi), %xmm3, %xmm0 -vpalignr $8, %xmm8, %xmm6, %xmm2 -vpalignr $8, %xmm6, %xmm8, %xmm14 -vpalignr $8, %xmm10, %xmm11, %xmm8 -vpalignr $8, %xmm11, %xmm10, %xmm11 -vpaddq %xmm5, %xmm4, %xmm4 -vpaddq %xmm0, %xmm7, %xmm7 -vpaddq %xmm2, %xmm4, %xmm15 -vpaddq %xmm14, %xmm7, %xmm0 -vpxor %xmm15, %xmm8, %xmm6 -vpxor %xmm0, %xmm11, %xmm10 -vpshufd $177, %xmm6, %xmm6 -vpshufd $177, %xmm10, %xmm8 -movzbl 9(%rcx), %r12d -movzbl 13(%rcx), %r14d -vpaddq %xmm6, %xmm9, %xmm4 -vpaddq %xmm8, %xmm13, %xmm7 -movzbl 11(%rcx), %r13d -vpxor %xmm4, %xmm2, %xmm9 -movzbl 15(%rcx), %r15d -vpxor %xmm7, %xmm14, %xmm2 -vmovq (%r12,%rsi), %xmm14 -addq $16, %rcx -vmovq (%r14,%rsi), %xmm3 -vpshufb %xmm1, %xmm9, %xmm13 -vpinsrq $1, (%r13,%rsi), %xmm14, %xmm5 -vpinsrq $1, (%r15,%rsi), %xmm3, %xmm9 -vpshufb %xmm1, %xmm2, %xmm11 -vpaddq %xmm5, %xmm15, %xmm15 -vpaddq %xmm9, %xmm0, %xmm0 -vpaddq %xmm13, %xmm15, %xmm2 -vpaddq %xmm11, %xmm0, %xmm0 -vpxor %xmm2, %xmm6, %xmm6 -vpxor %xmm0, %xmm8, %xmm8 -vpshufb %xmm12, %xmm6, %xmm14 -vpshufb %xmm12, %xmm8, %xmm15 -vpaddq %xmm14, %xmm4, %xmm8 -vpaddq %xmm15, %xmm7, %xmm6 -vpxor %xmm8, %xmm13, %xmm4 -vpxor %xmm6, %xmm11, %xmm11 -vpaddq %xmm4, %xmm4, %xmm10 -vpsrlq $63, %xmm4, %xmm7 -vpsrlq $63, %xmm11, %xmm13 -vpaddq %xmm11, %xmm11, %xmm4 -vpor %xmm10, %xmm7, %xmm3 -vpor %xmm4, %xmm13, %xmm11 -vpalignr $8, %xmm11, %xmm3, %xmm4 -vpalignr $8, %xmm3, %xmm11, %xmm7 -vpalignr $8, %xmm15, %xmm14, %xmm10 -vpalignr $8, %xmm14, %xmm15, %xmm9 -cmpq %rbx, %rcx -jb blake2b_blocks_avx_L23 -blake2b_blocks_avx_L24: -movq 32(%rsi), %r13 -movq (%rsi), %r10 -movq 48(%rsi), %r9 -vmovd %r13, %xmm13 -vpinsrq $1, %r9, %xmm13, %xmm14 -vmovd %r10, %xmm3 -movq 16(%rsi), %rbx -vpinsrq $1, %rbx, %xmm3, %xmm15 -vpaddq %xmm14, %xmm0, %xmm0 -vpaddq %xmm7, %xmm0, %xmm3 -vpxor %xmm3, %xmm10, %xmm10 -vpaddq %xmm15, %xmm2, %xmm2 -vpaddq %xmm4, %xmm2, %xmm5 -vpshufd $177, %xmm10, %xmm15 -vpxor %xmm5, %xmm9, %xmm9 -vpshufd $177, %xmm9, %xmm9 -vpaddq %xmm15, %xmm8, %xmm14 -vpaddq %xmm9, %xmm6, %xmm0 -vpxor %xmm14, %xmm7, %xmm7 -vpxor %xmm0, %xmm4, %xmm8 -vpshufb %xmm1, %xmm7, %xmm4 -vpshufb %xmm1, %xmm8, %xmm2 -vmovq 8(%rsi), %xmm7 -movq %r8, 288(%rsp) -movq 24(%rsi), %r8 -vpinsrq $1, %r8, %xmm7, %xmm6 -vpinsrq $1, %r10, %xmm7, %xmm7 -vpaddq %xmm6, %xmm5, %xmm13 -movq 40(%rsi), %rcx -movq 56(%rsi), %rdi -vpaddq %xmm2, %xmm13, %xmm13 -vmovd %rcx, %xmm5 -vpxor %xmm13, %xmm9, %xmm9 -vpinsrq $1, %rdi, %xmm5, %xmm10 -vpshufb %xmm12, %xmm9, %xmm5 -vpaddq %xmm10, %xmm3, %xmm3 -vpaddq %xmm4, %xmm3, %xmm11 -vpaddq %xmm5, %xmm0, %xmm3 -vpxor %xmm11, %xmm15, %xmm8 -vpshufb %xmm12, %xmm8, %xmm10 -vpaddq %xmm10, %xmm14, %xmm8 -vpxor %xmm3, %xmm2, %xmm14 -vpxor %xmm8, %xmm4, %xmm9 -vpsrlq $63, %xmm14, %xmm4 -vpsrlq $63, %xmm9, %xmm0 -vpaddq %xmm14, %xmm14, %xmm14 -movq 64(%rsi), %r15 -vpor %xmm14, %xmm4, %xmm6 -vpaddq %xmm9, %xmm9, %xmm4 -vmovq 96(%rsi), %xmm9 -vpor %xmm4, %xmm0, %xmm2 -movq 112(%rsi), %r14 -vmovd %r15, %xmm15 -vpinsrq $1, %r14, %xmm9, %xmm0 -vpinsrq $1, %rbx, %xmm9, %xmm9 -vpalignr $8, %xmm6, %xmm2, %xmm4 -vpalignr $8, %xmm2, %xmm6, %xmm2 -vpaddq %xmm0, %xmm11, %xmm11 -movq 80(%rsi), %r11 -vpinsrq $1, %r11, %xmm15, %xmm14 -vpaddq %xmm2, %xmm11, %xmm11 -vpalignr $8, %xmm10, %xmm5, %xmm15 -vpalignr $8, %xmm5, %xmm10, %xmm5 -vpxor %xmm11, %xmm5, %xmm10 -vpaddq %xmm14, %xmm13, %xmm13 -vpaddq %xmm4, %xmm13, %xmm6 -vpshufd $177, %xmm10, %xmm14 -vpxor %xmm6, %xmm15, %xmm13 -vpaddq %xmm14, %xmm3, %xmm0 -vpshufd $177, %xmm13, %xmm13 -vpaddq %xmm13, %xmm8, %xmm15 -vpxor %xmm0, %xmm2, %xmm8 -vpxor %xmm15, %xmm4, %xmm3 -vpshufb %xmm1, %xmm8, %xmm5 -vpshufb %xmm1, %xmm3, %xmm4 -vmovq 72(%rsi), %xmm8 -movq %rax, 296(%rsp) -movq 88(%rsi), %rax -vpinsrq $1, %rax, %xmm8, %xmm2 -movq 104(%rsi), %r12 -vpaddq %xmm2, %xmm6, %xmm6 -vpinsrq $1, %r12, %xmm8, %xmm8 -vmovd %r12, %xmm3 -vpaddq %xmm4, %xmm6, %xmm10 -vpxor %xmm10, %xmm13, %xmm13 -movq %rsi, 280(%rsp) -movq 120(%rsi), %rsi -vpinsrq $1, %rsi, %xmm3, %xmm6 -vpshufb %xmm12, %xmm13, %xmm3 -vpaddq %xmm6, %xmm11, %xmm11 -vpaddq %xmm5, %xmm11, %xmm6 -vpxor %xmm6, %xmm14, %xmm14 -vpshufb %xmm12, %xmm14, %xmm2 -vpaddq %xmm3, %xmm15, %xmm14 -vpaddq %xmm2, %xmm0, %xmm0 -vpaddq %xmm8, %xmm6, %xmm6 -vpxor %xmm14, %xmm4, %xmm4 -vpxor %xmm0, %xmm5, %xmm13 -vpsrlq $63, %xmm4, %xmm5 -vpsrlq $63, %xmm13, %xmm15 -vpaddq %xmm4, %xmm4, %xmm4 -vpaddq %xmm13, %xmm13, %xmm13 -vpor %xmm4, %xmm5, %xmm11 -vpor %xmm13, %xmm15, %xmm5 -vpalignr $8, %xmm5, %xmm11, %xmm15 -vmovd %r11, %xmm4 -vpalignr $8, %xmm11, %xmm5, %xmm5 -vmovd %r14, %xmm11 -vpinsrq $1, %r13, %xmm11, %xmm13 -vpinsrq $1, %r15, %xmm4, %xmm11 -vpaddq %xmm5, %xmm6, %xmm6 -vpaddq %xmm13, %xmm10, %xmm10 -vpaddq %xmm15, %xmm10, %xmm10 -vpalignr $8, %xmm3, %xmm2, %xmm13 -vpxor %xmm10, %xmm13, %xmm8 -vmovd %rsi, %xmm13 -vpshufd $177, %xmm8, %xmm8 -vpalignr $8, %xmm2, %xmm3, %xmm3 -vpxor %xmm6, %xmm3, %xmm2 -vpaddq %xmm8, %xmm0, %xmm3 -vpaddq %xmm11, %xmm10, %xmm10 -vpxor %xmm3, %xmm15, %xmm0 -vpshufd $177, %xmm2, %xmm2 -vpshufb %xmm1, %xmm0, %xmm0 -vpaddq %xmm2, %xmm14, %xmm14 -vpxor %xmm14, %xmm5, %xmm5 -vpshufb %xmm1, %xmm5, %xmm15 -vpaddq %xmm0, %xmm10, %xmm5 -vpinsrq $1, %r9, %xmm13, %xmm10 -vpaddq %xmm10, %xmm6, %xmm6 -vpaddq %xmm15, %xmm6, %xmm13 -vpxor %xmm5, %xmm8, %xmm10 -vpxor %xmm13, %xmm2, %xmm8 -vpshufb %xmm12, %xmm10, %xmm4 -vpshufb %xmm12, %xmm8, %xmm6 -vpaddq %xmm4, %xmm3, %xmm8 -vpaddq %xmm6, %xmm14, %xmm2 -vpxor %xmm8, %xmm0, %xmm14 -vpxor %xmm2, %xmm15, %xmm15 -vpaddq %xmm14, %xmm14, %xmm0 -vpsrlq $63, %xmm14, %xmm3 -vpsrlq $63, %xmm15, %xmm14 -vpor %xmm0, %xmm3, %xmm10 -vpaddq %xmm15, %xmm15, %xmm3 -vpor %xmm3, %xmm14, %xmm0 -vpaddq %xmm7, %xmm5, %xmm14 -vpalignr $8, %xmm10, %xmm0, %xmm11 -vmovd %rax, %xmm5 -vpaddq %xmm11, %xmm14, %xmm7 -vpinsrq $1, %rcx, %xmm5, %xmm14 -vpalignr $8, %xmm0, %xmm10, %xmm15 -vpaddq %xmm9, %xmm7, %xmm3 -vmovd %rdi, %xmm9 -vpinsrq $1, %r8, %xmm9, %xmm10 -vpaddq %xmm14, %xmm13, %xmm13 -vpaddq %xmm15, %xmm13, %xmm5 -vpalignr $8, %xmm6, %xmm4, %xmm13 -vpalignr $8, %xmm4, %xmm6, %xmm4 -vpxor %xmm7, %xmm13, %xmm14 -vpxor %xmm5, %xmm4, %xmm6 -vpshufd $177, %xmm14, %xmm13 -vpshufd $177, %xmm6, %xmm14 -vpaddq %xmm13, %xmm2, %xmm6 -vpaddq %xmm14, %xmm8, %xmm4 -vpaddq %xmm10, %xmm5, %xmm5 -vpxor %xmm6, %xmm11, %xmm2 -vpxor %xmm4, %xmm15, %xmm8 -vpshufb %xmm1, %xmm2, %xmm2 -vpshufb %xmm1, %xmm8, %xmm8 -vpaddq %xmm2, %xmm3, %xmm7 -vpaddq %xmm8, %xmm5, %xmm5 -vpxor %xmm7, %xmm13, %xmm13 -vpxor %xmm5, %xmm14, %xmm14 -vpshufb %xmm12, %xmm13, %xmm13 -vpshufb %xmm12, %xmm14, %xmm14 -vpaddq %xmm13, %xmm6, %xmm10 -vpaddq %xmm14, %xmm4, %xmm0 -vpxor %xmm10, %xmm2, %xmm2 -vpxor %xmm0, %xmm8, %xmm8 -vpaddq %xmm2, %xmm2, %xmm6 -vpaddq %xmm8, %xmm8, %xmm15 -vpsrlq $63, %xmm2, %xmm4 -vpsrlq $63, %xmm8, %xmm11 -vpor %xmm6, %xmm4, %xmm3 -vpor %xmm15, %xmm11, %xmm9 -vpxor %xmm0, %xmm7, %xmm0 -vpxor 208(%rsp), %xmm0, %xmm7 -vpxor %xmm10, %xmm5, %xmm0 -vpalignr $8, %xmm9, %xmm3, %xmm4 -vpalignr $8, %xmm13, %xmm14, %xmm5 -vpalignr $8, %xmm3, %xmm9, %xmm3 -vpxor %xmm5, %xmm4, %xmm6 -vpalignr $8, %xmm14, %xmm13, %xmm8 -vpxor %xmm8, %xmm3, %xmm9 -vmovdqu %xmm7, 208(%rsp) -vpxor 224(%rsp), %xmm0, %xmm2 -vpxor 256(%rsp), %xmm6, %xmm7 -vpxor 240(%rsp), %xmm9, %xmm10 -movq 296(%rsp), %rax -movq 288(%rsp), %r8 -movq 280(%rsp), %rsi -vmovdqu %xmm2, 224(%rsp) -vmovdqu %xmm7, 256(%rsp) -vmovdqu %xmm10, 240(%rsp) -cmpq $128, %rdx -ja blake2b_blocks_avx_L25 -blake2b_blocks_avx_L26: -vmovdqu 240(%rsp), %xmm8 -vmovdqu 256(%rsp), %xmm7 -vmovdqu 224(%rsp), %xmm6 -vmovdqu 208(%rsp), %xmm5 -movq 128(%rsp), %rdi -vmovdqu %xmm5, (%rdi) -vmovdqu %xmm6, 16(%rdi) -vmovdqu %xmm7, 32(%rdi) -vmovdqu %xmm8, 48(%rdi) -movq %r8, 64(%rdi) -movq %rax, 72(%rdi) -addq $344, %rsp -popq %rbx -popq %r15 -popq %r14 -popq %r13 -popq %r12 -movq %rbp, %rsp -popq %rbp -ret -FN_END blake2b_blocks_avx
\ No newline at end of file diff --git a/src/libcryptobox/blake2/blake2-internal.h b/src/libcryptobox/blake2/blake2-internal.h deleted file mode 100644 index 18b825900..000000000 --- a/src/libcryptobox/blake2/blake2-internal.h +++ /dev/null @@ -1,29 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef RSPAMD_BLAKE2_INTERNAL_H -#define RSPAMD_BLAKE2_INTERNAL_H - -#include "blake2.h" - -typedef struct blake2b_state_internal_t { - unsigned char h[64]; - unsigned char t[16]; - unsigned char f[16]; - size_t leftover; - unsigned char buffer[BLAKE2B_BLOCKBYTES]; -} blake2b_state_internal; - -#endif diff --git a/src/libcryptobox/blake2/blake2.c b/src/libcryptobox/blake2/blake2.c deleted file mode 100644 index bb681b5d3..000000000 --- a/src/libcryptobox/blake2/blake2.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - * Copyright (c) 2015, Vsevolod Stakhov - * Copyright (c) 2015, Andrew Moon - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "cryptobox.h" -#include "platform_config.h" -#include "blake2.h" -#include "blake2-internal.h" - -extern unsigned long cpu_config; - -typedef struct blake2b_impl_t { - unsigned long cpu_flags; - const char *desc; - - void (*blake2b_blocks) (blake2b_state_internal *state, - const unsigned char *in, - size_t bytes, - size_t stride); -} blake2b_impl_t; - -#define BLAKE2B_STRIDE BLAKE2B_BLOCKBYTES -#define BLAKE2B_STRIDE_NONE 0 - -#define BLAKE2B_DECLARE(ext) \ - void blake2b_blocks_##ext(blake2b_state_internal *state, const unsigned char *in, size_t bytes, size_t stride); - -#define BLAKE2B_IMPL(cpuflags, desc, ext) \ - {(cpuflags), desc, blake2b_blocks_##ext} - -#if defined(HAVE_AVX) -BLAKE2B_DECLARE(avx) -#define BLAKE2B_AVX BLAKE2B_IMPL(CPUID_AVX, "avx", avx) -#endif - -#if defined(CMAKE_ARCH_x86_64) || defined(CMAKE_ARCH_i386) -BLAKE2B_DECLARE(x86) -#define BLAKE2B_X86 BLAKE2B_IMPL(CPUID_SSE2, "x86", x86) -#endif - -/* the "always runs" version */ -BLAKE2B_DECLARE(ref) -#define BLAKE2B_GENERIC BLAKE2B_IMPL(0, "generic", ref) - -/* list implementations from most optimized to least, with generic as the last entry */ -static const blake2b_impl_t blake2b_list[] = { - BLAKE2B_GENERIC, -#if defined(BLAKE2B_AVX) - BLAKE2B_AVX, -#endif -#if defined(BLAKE2B_X86) - BLAKE2B_X86, -#endif -}; - -static const blake2b_impl_t *blake2b_opt = &blake2b_list[0]; - - -/* is the pointer not aligned on a word boundary? */ -static int -blake2b_not_aligned (const void *p) -{ -#if !defined(CPU_8BITS) - return ((size_t) p & (sizeof (size_t) - 1)) != 0; -#else - return 0; -#endif -} - -static const union endian_test_t { - unsigned char b[2]; - unsigned short s; -} blake2b_endian_test = {{1, 0}}; - -/* copy the hash from the internal state */ -static void -blake2b_store_hash (blake2b_state_internal *state, unsigned char *hash) -{ - if (blake2b_endian_test.s == 0x0001) { - memcpy (hash, state->h, 64); - } - else { - size_t i, j; - for (i = 0; i < 8; i++, hash += 8) { - for (j = 0; j < 8; j++) - hash[7 - j] = state->h[(i * 8) + j]; - } - } -} - -static const unsigned char blake2b_init_le[64] = { - 0x08 ^ 0x40, 0xc9 ^ 0x00, 0xbc ^ 0x01, 0xf3 ^ 0x01, 0x67 ^ 0x00, - 0xe6 ^ 0x00, 0x09 ^ 0x00, 0x6a ^ 0x00, - 0x3b, 0xa7, 0xca, 0x84, 0x85, 0xae, 0x67, 0xbb, - 0x2b, 0xf8, 0x94, 0xfe, 0x72, 0xf3, 0x6e, 0x3c, - 0xf1, 0x36, 0x1d, 0x5f, 0x3a, 0xf5, 0x4f, 0xa5, - 0xd1, 0x82, 0xe6, 0xad, 0x7f, 0x52, 0x0e, 0x51, - 0x1f, 0x6c, 0x3e, 0x2b, 0x8c, 0x68, 0x05, 0x9b, - 0x6b, 0xbd, 0x41, 0xfb, 0xab, 0xd9, 0x83, 0x1f, - 0x79, 0x21, 0x7e, 0x13, 0x19, 0xcd, 0xe0, 0x5b, -}; - -/* initialize the state in serial mode */ -void -blake2b_init (blake2b_state *S) -{ - blake2b_state_internal *state = (blake2b_state_internal *) S; - /* assume state is fully little endian for now */ - memcpy (state, blake2b_init_le, 64); - /*memcpy(state, (blake2b_endian_test.s == 1) ? blake2b_init_le : blake2b_init_be, 64);*/ - memset (state->t, - 0, - sizeof (state->t) + sizeof (state->f) + sizeof (state->leftover)); -} - -/* initialized the state in serial-key'd mode */ -void -blake2b_keyed_init (blake2b_state *S, const unsigned char *key, size_t keylen) -{ - unsigned char k[BLAKE2B_BLOCKBYTES]; - blake2b_state _ks; - blake2b_state_internal *state = (blake2b_state_internal *)S; - - memset (k, 0, sizeof (k)); - - if (keylen <= BLAKE2B_KEYBYTES) { - memcpy (k, key, keylen); - blake2b_init (S); - state->h[1] ^= keylen; - blake2b_update (S, k, sizeof (k)); - } - else { - blake2b_init (S); - /* - * We use additional blake2 iteration to store large key - * XXX: it is not compatible with the original implementation but safe - */ - blake2b_init (&_ks); - blake2b_update (&_ks, key, keylen); - blake2b_final (&_ks, k); - blake2b_keyed_init (S, k, BLAKE2B_KEYBYTES); - } - - rspamd_explicit_memzero (k, sizeof (k)); -} - -/* hash inlen bytes from in, which may or may not be word aligned, returns the number of bytes used */ -static size_t -blake2b_consume_blocks (blake2b_state_internal *state, - const unsigned char *in, - size_t inlen) -{ - /* always need to leave at least BLAKE2B_BLOCKBYTES in case this is the final block */ - if (inlen <= BLAKE2B_BLOCKBYTES) - return 0; - - inlen = ((inlen - 1) & ~(BLAKE2B_BLOCKBYTES - 1)); - if (blake2b_not_aligned (in)) { - /* copy the unaligned data to an aligned buffer and process in chunks */ - unsigned char buffer[16 * BLAKE2B_BLOCKBYTES]; - size_t left = inlen; - while (left) { - const size_t bytes = (left > sizeof (buffer)) ? sizeof (buffer) - : left; - memcpy (buffer, in, bytes); - blake2b_opt->blake2b_blocks (state, buffer, bytes, BLAKE2B_STRIDE); - in += bytes; - left -= bytes; - } - } - else { - /* word aligned, handle directly */ - blake2b_opt->blake2b_blocks (state, in, inlen, BLAKE2B_STRIDE); - } - - return inlen; -} - -/* update the hash state with inlen bytes from in */ -void -blake2b_update (blake2b_state *S, const unsigned char *in, size_t inlen) -{ - blake2b_state_internal *state = (blake2b_state_internal *) S; - size_t bytes; - - /* blake2b processes the final <=BLOCKBYTES bytes raw, so we can only update if there are at least BLOCKBYTES+1 bytes available */ - if ((state->leftover + inlen) > BLAKE2B_BLOCKBYTES) { - /* handle the previous data, we know there is enough for at least one block */ - if (state->leftover) { - bytes = (BLAKE2B_BLOCKBYTES - state->leftover); - memcpy (state->buffer + state->leftover, in, bytes); - in += bytes; - inlen -= bytes; - state->leftover = 0; - blake2b_opt->blake2b_blocks (state, - state->buffer, - BLAKE2B_BLOCKBYTES, - BLAKE2B_STRIDE_NONE); - } - - /* handle the direct data (if any) */ - bytes = blake2b_consume_blocks (state, in, inlen); - inlen -= bytes; - in += bytes; - } - - /* handle leftover data */ - memcpy (state->buffer + state->leftover, in, inlen); - state->leftover += inlen; -} - -/* finalize the hash */ -void -blake2b_final (blake2b_state *S, unsigned char *hash) -{ - blake2b_state_internal *state = (blake2b_state_internal *) S; - memset (&state->f[0], 0xff, 8); - blake2b_opt->blake2b_blocks (state, - state->buffer, - state->leftover, - BLAKE2B_STRIDE_NONE); - blake2b_store_hash (state, hash); - rspamd_explicit_memzero (state, sizeof (*state)); -} - -/* one-shot hash inlen bytes from in */ -void -blake2b (unsigned char *hash, const unsigned char *in, size_t inlen) -{ - blake2b_state S; - blake2b_state_internal *state = (blake2b_state_internal *) &S; - size_t bytes; - - blake2b_init (&S); - - /* hash until <= 128 bytes left */ - bytes = blake2b_consume_blocks (state, in, inlen); - in += bytes; - inlen -= bytes; - - /* final block */ - memset (&state->f[0], 0xff, 8); - blake2b_opt->blake2b_blocks (state, in, inlen, BLAKE2B_STRIDE_NONE); - blake2b_store_hash (state, hash); -} - -void -blake2b_keyed (unsigned char *hash, - const unsigned char *in, - size_t inlen, - const unsigned char *key, - size_t keylen) -{ - blake2b_state S; - blake2b_keyed_init (&S, key, keylen); - blake2b_update (&S, in, inlen); - blake2b_final (&S, hash); -} - -const char* -blake2b_load (void) -{ - guint i; - - if (cpu_config != 0) { - for (i = 0; i < G_N_ELEMENTS (blake2b_list); i++) { - if (blake2b_list[i].cpu_flags & cpu_config) { - blake2b_opt = &blake2b_list[i]; - break; - } - } - } - - return blake2b_opt->desc; -} diff --git a/src/libcryptobox/blake2/blake2.h b/src/libcryptobox/blake2/blake2.h deleted file mode 100644 index 3da1958ae..000000000 --- a/src/libcryptobox/blake2/blake2.h +++ /dev/null @@ -1,65 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef RSPAMD_BLAKE2_H -#define RSPAMD_BLAKE2_H - -#if defined(__cplusplus) -extern "C" { -#endif - - -#define BLAKE2B_BLOCKBYTES 128 -#define BLAKE2B_OUTBYTES 64 -#define BLAKE2B_KEYBYTES 64 -#define BLAKE2B_SALTBYTES 16 -#define BLAKE2B_PERSONALBYTES 16 - - -typedef struct blake2b_state_t { - unsigned char opaque[256]; -} blake2b_state; - -/* incremental */ -void blake2b_init (blake2b_state *S); - -void blake2b_keyed_init (blake2b_state *S, - const unsigned char *key, - size_t keylen); - -void blake2b_update (blake2b_state *S, - const unsigned char *in, - size_t inlen); - -void blake2b_final (blake2b_state *S, unsigned char *hash); - -/* one-shot */ -void blake2b (unsigned char *hash, - const unsigned char *in, - size_t inlen); - -void blake2b_keyed (unsigned char *hash, - const unsigned char *in, - size_t inlen, - const unsigned char *key, - size_t keylen); - -const char* blake2b_load (void); - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/src/libcryptobox/blake2/constants.S b/src/libcryptobox/blake2/constants.S deleted file mode 100644 index c5c5b9a61..000000000 --- a/src/libcryptobox/blake2/constants.S +++ /dev/null @@ -1,30 +0,0 @@ -SECTION_RODATA -.p2align 6 -blake2b_constants: -.quad 0x6a09e667f3bcc908 -.quad 0xbb67ae8584caa73b -.quad 0x3c6ef372fe94f82b -.quad 0xa54ff53a5f1d36f1 -.quad 0x510e527fade682d1 -.quad 0x9b05688c2b3e6c1f -.quad 0x1f83d9abfb41bd6b -.quad 0x5be0cd19137e2179 - -blake2b_sigma: -.byte 0,8,16,24,32,40,48,56,64,72,80,88,96,104,112,120 -.byte 112,80,32,64,72,120,104,48,8,96,0,16,88,56,40,24 -.byte 88,64,96,0,40,16,120,104,80,112,24,48,56,8,72,32 -.byte 56,72,24,8,104,96,88,112,16,48,40,80,32,0,120,64 -.byte 72,0,40,56,16,32,80,120,112,8,88,96,48,64,24,104 -.byte 16,96,48,80,0,88,64,24,32,104,56,40,120,112,8,72 -.byte 96,40,8,120,112,104,32,80,0,56,48,24,72,16,64,88 -.byte 104,88,56,112,96,8,24,72,40,0,120,32,64,48,16,80 -.byte 48,120,112,72,88,24,0,64,96,16,104,56,8,32,80,40 -.byte 80,16,64,32,56,48,8,40,120,88,72,112,24,96,104,0 -.byte 0,8,16,24,32,40,48,56,64,72,80,88,96,104,112,120 -.byte 112,80,32,64,72,120,104,48,8,96,0,16,88,56,40,24 - -.p2align 4 -blake2b_constants_ssse3: -.byte 2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9 /* 64 bit rotate right by 16 */ -.byte 3,4,5,6,7,0,1,2,11,12,13,14,15,8,9,10 /* 64 bit rotate right by 24 */
\ No newline at end of file diff --git a/src/libcryptobox/blake2/ref.c b/src/libcryptobox/blake2/ref.c deleted file mode 100644 index ed6f395fc..000000000 --- a/src/libcryptobox/blake2/ref.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2015, Vsevolod Stakhov - * Copyright (c) 2015, Andrew Moon - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "blake2.h" -#include "blake2-internal.h" - -typedef uint64_t blake2b_uint64; - -static const unsigned char blake2b_sigma[12][16] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, - {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, - {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, - {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, - {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, - {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, - {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, - {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, - {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3} -}; - -static blake2b_uint64 -ROTR64 (blake2b_uint64 x, int k) -{ - return ((x >> k) | (x << (64 - k))); -} - -static blake2b_uint64 -U8TO64 (const unsigned char *p) -{ - return - ((blake2b_uint64) p[0]) | - ((blake2b_uint64) p[1] << 8) | - ((blake2b_uint64) p[2] << 16) | - ((blake2b_uint64) p[3] << 24) | - ((blake2b_uint64) p[4] << 32) | - ((blake2b_uint64) p[5] << 40) | - ((blake2b_uint64) p[6] << 48) | - ((blake2b_uint64) p[7] << 56); -} - -static void -U64TO8 (unsigned char *p, blake2b_uint64 v) -{ - p[0] = (v) & 0xff; - p[1] = (v >> 8) & 0xff; - p[2] = (v >> 16) & 0xff; - p[3] = (v >> 24) & 0xff; - p[4] = (v >> 32) & 0xff; - p[5] = (v >> 40) & 0xff; - p[6] = (v >> 48) & 0xff; - p[7] = (v >> 56) & 0xff; -} - -void -blake2b_blocks_ref (blake2b_state_internal *S, - const unsigned char *in, - size_t bytes, - size_t stride) -{ - const blake2b_uint64 f0 = U8TO64 (&S->f[0]); - const blake2b_uint64 f1 = U8TO64 (&S->f[8]); - - const blake2b_uint64 w8 = 0x6a09e667f3bcc908ull; - const blake2b_uint64 w9 = 0xbb67ae8584caa73bull; - const blake2b_uint64 w10 = 0x3c6ef372fe94f82bull; - const blake2b_uint64 w11 = 0xa54ff53a5f1d36f1ull; - const blake2b_uint64 w12 = 0x510e527fade682d1ull; - const blake2b_uint64 w13 = 0x9b05688c2b3e6c1full; - const blake2b_uint64 w14 = 0x1f83d9abfb41bd6bull ^f0; - const blake2b_uint64 w15 = 0x5be0cd19137e2179ull ^f1; - - const size_t inc = (bytes >= 128) ? 128 : bytes; - - blake2b_uint64 t0 = U8TO64 (&S->t[0]); - blake2b_uint64 t1 = U8TO64 (&S->t[8]); - - blake2b_uint64 h[8]; - blake2b_uint64 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; - unsigned char buffer[128]; - - size_t i; - - if (f0) { - memset (buffer, 0, sizeof (buffer)); - memcpy (buffer, in, bytes); - in = buffer; - } - - for (i = 0; i < 8; i++) - h[i] = U8TO64 (&S->h[i * 8]); - - while (1) { - blake2b_uint64 m[16]; - - t0 += inc; - if (t0 < inc) - t1 += 1; - - for (i = 0; i < 16; i++) - m[i] = U8TO64 (in + (i * 8)); - - v0 = h[0]; - v1 = h[1]; - v2 = h[2]; - v3 = h[3]; - v4 = h[4]; - v5 = h[5]; - v6 = h[6]; - v7 = h[7]; - v8 = w8; - v9 = w9; - v10 = w10; - v11 = w11; - v12 = w12 ^ t0; - v13 = w13 ^ t1; - v14 = w14; - v15 = w15; - -#define G(r, x, a, b, c, d) \ - a += b + m[blake2b_sigma[r][2*x+0]]; \ - d = ROTR64(d ^ a, 32); \ - c += d; \ - b = ROTR64(b ^ c, 24); \ - a += b + m[blake2b_sigma[r][2*x+1]]; \ - d = ROTR64(d ^ a, 16); \ - c += d; \ - b = ROTR64(b ^ c, 63); - - for (i = 0; i < 12; i++) { - G(i, 0, v0, v4, v8, v12); - G(i, 1, v1, v5, v9, v13); - G(i, 2, v2, v6, v10, v14); - G(i, 3, v3, v7, v11, v15); - G(i, 4, v0, v5, v10, v15); - G(i, 5, v1, v6, v11, v12); - G(i, 6, v2, v7, v8, v13); - G(i, 7, v3, v4, v9, v14); - } - - h[0] ^= (v0 ^ v8); - h[1] ^= (v1 ^ v9); - h[2] ^= (v2 ^ v10); - h[3] ^= (v3 ^ v11); - h[4] ^= (v4 ^ v12); - h[5] ^= (v5 ^ v13); - h[6] ^= (v6 ^ v14); - h[7] ^= (v7 ^ v15); - - if (bytes <= 128) - break; - in += stride; - bytes -= 128; - } - - for (i = 0; i < 8; i++) - U64TO8 (&S->h[i * 8], h[i]); - U64TO8 (&S->t[0], t0); - U64TO8 (&S->t[8], t1); -} diff --git a/src/libcryptobox/blake2/x86-32.S b/src/libcryptobox/blake2/x86-32.S deleted file mode 100644 index 12030e57b..000000000 --- a/src/libcryptobox/blake2/x86-32.S +++ /dev/null @@ -1,1080 +0,0 @@ -#include "../macro.S" -#include "constants.S" - -SECTION_TEXT - -GLOBAL_HIDDEN_FN blake2b_blocks_x86 -pushl %esi -pushl %edi -pushl %ebx -pushl %ebp -subl $492, %esp -movl 512(%esp), %eax -movl 80(%eax), %ebp -movl %ebp, %edi -movl 84(%eax), %ebx -xorl $-79577749, %edi -movl %edi, 144(%esp) -movl %ebx, %edi -xorl $528734635, %edi -movl %edi, 148(%esp) -movl 88(%eax), %edi -movl 92(%eax), %eax -xorl $327033209, %edi -xorl $1541459225, %eax -movl %edi, 152(%esp) -LOAD_VAR_PIC blake2b_sigma, %ecx -lea 192(%ecx), %edi -movl 516(%esp), %esi -orl %ebx, %ebp -movl 520(%esp), %edx -movl %edi, 360(%esp) -jne blake2b_blocks_x86_L2 -blake2b_blocks_x86_L32: -cmpl $128, %edx -jmp blake2b_blocks_x86_L21 -blake2b_blocks_x86_L2: -cmpl $128, %edx -je blake2b_blocks_x86_L21 -blake2b_blocks_x86_L3: -testb $64, %dl -lea (%esp), %ebp -je blake2b_blocks_x86_L5 -blake2b_blocks_x86_L4: -movl (%esi), %ebx -movl 4(%esi), %ebp -movl %ebx, (%esp) -movl %ebp, 4(%esp) -movl 8(%esi), %edi -movl 12(%esi), %ebx -movl %edi, 8(%esp) -movl %ebx, 12(%esp) -movl 16(%esi), %ebp -movl 20(%esi), %edi -movl %ebp, 16(%esp) -movl %edi, 20(%esp) -movl 24(%esi), %ebx -movl 28(%esi), %ebp -movl %ebx, 24(%esp) -movl %ebp, 28(%esp) -movl 32(%esi), %edi -movl 36(%esi), %ebx -movl %edi, 32(%esp) -movl %ebx, 36(%esp) -movl 40(%esi), %ebp -movl 44(%esi), %edi -movl %ebp, 40(%esp) -movl %edi, 44(%esp) -movl 48(%esi), %ebx -movl 52(%esi), %ebp -movl %ebx, 48(%esp) -movl %ebp, 52(%esp) -lea 64(%esp), %ebp -movl 56(%esi), %edi -movl 60(%esi), %ebx -addl $64, %esi -movl %edi, 56(%esp) -movl %ebx, 60(%esp) -jmp blake2b_blocks_x86_L6 -blake2b_blocks_x86_L5: -xorl %ebx, %ebx -movl %ebx, 64(%esp) -movl %ebx, 68(%esp) -movl %ebx, 72(%esp) -movl %ebx, 76(%esp) -movl %ebx, 80(%esp) -movl %ebx, 84(%esp) -movl %ebx, 88(%esp) -movl %ebx, 92(%esp) -movl %ebx, 96(%esp) -movl %ebx, 100(%esp) -movl %ebx, 104(%esp) -movl %ebx, 108(%esp) -movl %ebx, 112(%esp) -movl %ebx, 116(%esp) -movl %ebx, 120(%esp) -movl %ebx, 124(%esp) -blake2b_blocks_x86_L6: -xorl %ebx, %ebx -testb $32, %dl -movl %ebx, (%ebp) -movl %ebx, 4(%ebp) -movl %ebx, 8(%ebp) -movl %ebx, 12(%ebp) -movl %ebx, 16(%ebp) -movl %ebx, 20(%ebp) -movl %ebx, 24(%ebp) -movl %ebx, 28(%ebp) -movl %ebx, 32(%ebp) -movl %ebx, 36(%ebp) -movl %ebx, 40(%ebp) -movl %ebx, 44(%ebp) -movl %ebx, 48(%ebp) -movl %ebx, 52(%ebp) -movl %ebx, 56(%ebp) -movl %ebx, 60(%ebp) -je blake2b_blocks_x86_L8 -blake2b_blocks_x86_L7: -movl (%esi), %ebx -movl 4(%esi), %edi -movl %ebx, (%ebp) -movl %edi, 4(%ebp) -movl 8(%esi), %ebx -movl 12(%esi), %edi -movl %ebx, 8(%ebp) -movl %edi, 12(%ebp) -movl 16(%esi), %ebx -movl 20(%esi), %edi -movl %ebx, 16(%ebp) -movl %edi, 20(%ebp) -movl 24(%esi), %ebx -movl 28(%esi), %edi -addl $32, %esi -movl %ebx, 24(%ebp) -movl %edi, 28(%ebp) -addl $32, %ebp -blake2b_blocks_x86_L8: -testb $16, %dl -je blake2b_blocks_x86_L10 -blake2b_blocks_x86_L9: -movl (%esi), %ebx -movl 4(%esi), %edi -movl %ebx, (%ebp) -movl %edi, 4(%ebp) -movl 8(%esi), %ebx -movl 12(%esi), %edi -addl $16, %esi -movl %ebx, 8(%ebp) -movl %edi, 12(%ebp) -addl $16, %ebp -blake2b_blocks_x86_L10: -testb $8, %dl -je blake2b_blocks_x86_L12 -blake2b_blocks_x86_L11: -movl (%esi), %ebx -movl 4(%esi), %edi -addl $8, %esi -movl %ebx, (%ebp) -movl %edi, 4(%ebp) -addl $8, %ebp -blake2b_blocks_x86_L12: -testb $4, %dl -je blake2b_blocks_x86_L14 -blake2b_blocks_x86_L13: -movl (%esi), %ebx -addl $4, %esi -movl %ebx, (%ebp) -addl $4, %ebp -blake2b_blocks_x86_L14: -testb $2, %dl -je blake2b_blocks_x86_L16 -blake2b_blocks_x86_L15: -movzwl (%esi), %ebx -addl $2, %esi -movw %bx, (%ebp) -addl $2, %ebp -blake2b_blocks_x86_L16: -testb $1, %dl -je blake2b_blocks_x86_L18 -blake2b_blocks_x86_L17: -movzbl (%esi), %ebx -movb %bl, (%ebp) -blake2b_blocks_x86_L18: -cmpl $128, %edx -lea (%esp), %esi -blake2b_blocks_x86_L21: -movl 512(%esp), %ebp -lea (%ecx), %ecx -movl %esi, 236(%esp) -movl %ecx, 128(%esp) -movl 68(%ebp), %edi -movl %edi, 228(%esp) -movl 60(%ebp), %edi -movl %edi, 196(%esp) -movl 72(%ebp), %edi -movl %edi, 164(%esp) -movl 76(%ebp), %edi -movl %edi, 200(%esp) -movl 24(%ebp), %edi -movl %edi, 176(%esp) -movl 28(%ebp), %edi -movl %edi, 208(%esp) -movl 16(%ebp), %edi -movl %edi, 184(%esp) -movl 20(%ebp), %edi -movl %edi, 216(%esp) -movl 48(%ebp), %edi -movl %edi, 168(%esp) -movl 52(%ebp), %edi -movl %edi, 204(%esp) -movl 8(%ebp), %edi -movl 64(%ebp), %ebx -movl %edi, 156(%esp) -movl 12(%ebp), %edi -movl %ebx, 192(%esp) -movl 56(%ebp), %ebx -movl %edi, 224(%esp) -movl 40(%ebp), %edi -movl %ebx, 172(%esp) -movl %edx, %ebx -movl %edi, 160(%esp) -movl 44(%ebp), %edi -jbe blake2b_blocks_x86_LL3 -movl $128, %ebx -blake2b_blocks_x86_LL3: -movl %edi, 212(%esp) -movl (%ebp), %edi -movl %edi, 180(%esp) -movl 4(%ebp), %edi -movl %edi, 232(%esp) -movl 32(%ebp), %edi -movl 36(%ebp), %ebp -movl %edi, 188(%esp) -movl %ebp, 220(%esp) -movl %eax, 132(%esp) -movl %ebx, 136(%esp) -movl %edx, 140(%esp) -movl 512(%esp), %esi -jmp blake2b_blocks_x86_L22 -blake2b_blocks_x86_L28: -movl 524(%esp), %eax -movl 140(%esp), %edx -addl $-128, %edx -addl %eax, 236(%esp) -movl %edx, 140(%esp) -blake2b_blocks_x86_L22: -movl 136(%esp), %edx -xorl %ebx, %ebx -movl 192(%esp), %eax -addl %edx, %eax -movl 228(%esp), %ecx -adcl $0, %ecx -movl %eax, 192(%esp) -movl %eax, 64(%esi) -subl %edx, %eax -movl %ecx, 228(%esp) -movl %ecx, 68(%esi) -sbbl %ebx, %ecx -jae blake2b_blocks_x86_L25 -blake2b_blocks_x86_L23: -movl 164(%esp), %eax -addl $1, %eax -movl 200(%esp), %edx -adcl $0, %edx -movl %eax, 164(%esp) -movl %edx, 200(%esp) -movl %eax, 72(%esi) -movl %edx, 76(%esi) -blake2b_blocks_x86_L25: -movl 152(%esp), %eax -movl %eax, 312(%esp) -movl 172(%esp), %ebp -movl 196(%esp), %ebx -movl 144(%esp), %eax -movl 184(%esp), %edi -movl %ebp, 284(%esp) -movl %ebx, 288(%esp) -movl %eax, 296(%esp) -movl 168(%esp), %ebp -movl 204(%esp), %ebx -movl 212(%esp), %eax -movl %edi, 332(%esp) -movl %ebp, 276(%esp) -movl %ebx, 280(%esp) -movl 148(%esp), %edi -movl %eax, 272(%esp) -movl 224(%esp), %ebp -movl 160(%esp), %ebx -movl 188(%esp), %eax -movl 208(%esp), %ecx -movl %edi, 300(%esp) -movl %ebp, 248(%esp) -movl %ebx, 268(%esp) -movl 180(%esp), %edi -movl %eax, 260(%esp) -movl 176(%esp), %edx -movl 164(%esp), %ebp -movl 232(%esp), %ebx -xorl $725511199, %ebp -movl 128(%esp), %eax -movl %ebp, 348(%esp) -movl %ecx, 256(%esp) -movl 200(%esp), %ebp -movl 216(%esp), %ecx -xorl $-1694144372, %ebp -movl %edi, 240(%esp) -movl %edx, 316(%esp) -movl %ebx, 244(%esp) -movl 220(%esp), %edi -movl %eax, 292(%esp) -movl 192(%esp), %ebx -xorl $-1377402159, %ebx -movl %ebx, 352(%esp) -movl %ecx, 252(%esp) -movl 228(%esp), %ebx -movl %ebp, 356(%esp) -xorl $1359893119, %ebx -movl 132(%esp), %edx -movl 156(%esp), %ecx -movl 332(%esp), %ebp -movl 316(%esp), %esi -movl %edi, 264(%esp) -movl $1595750129, 308(%esp) -movl $-1521486534, 304(%esp) -movl $-23791573, 324(%esp) -movl $1013904242, 320(%esp) -movl $-2067093701, 340(%esp) -movl $-1150833019, 336(%esp) -movl $-205731576, 328(%esp) -movl $1779033703, 344(%esp) -blake2b_blocks_x86_L26: -movl %esi, 316(%esp) -movl %edx, 368(%esp) -movzbl (%eax), %esi -movl 236(%esp), %edx -movl %ecx, 364(%esp) -movl 240(%esp), %ecx -addl (%esi,%edx), %ecx -movl %ebp, 332(%esp) -movl 244(%esp), %ebp -adcl 4(%esi,%edx), %ebp -movl 260(%esp), %edx -addl %edx, %ecx -movl 264(%esp), %esi -adcl %esi, %ebp -xorl %ebp, %ebx -movl 352(%esp), %edi -movl %ecx, 240(%esp) -xorl %ecx, %edi -movl 328(%esp), %ecx -addl %ebx, %ecx -movl %ebx, 372(%esp) -movl 344(%esp), %ebx -adcl %edi, %ebx -xorl %ecx, %edx -xorl %ebx, %esi -movl %edi, 352(%esp) -movl %edx, %edi -movl %ecx, 328(%esp) -movl %esi, %ecx -shrl $24, %esi -shll $8, %edx -orl %edx, %esi -movl %esi, 264(%esp) -movzbl 2(%eax), %edx -movl 236(%esp), %esi -shll $8, %ecx -shrl $24, %edi -orl %edi, %ecx -movl %ecx, 376(%esp) -movl 364(%esp), %ecx -addl (%edx,%esi), %ecx -movl 248(%esp), %edi -movl %ebp, 244(%esp) -movl 268(%esp), %ebp -adcl 4(%edx,%esi), %edi -addl %ebp, %ecx -movl 272(%esp), %edx -adcl %edx, %edi -movl %ebx, 344(%esp) -movl %ecx, 364(%esp) -movl 348(%esp), %ebx -xorl %ecx, %ebx -movl 356(%esp), %ecx -xorl %edi, %ecx -movl %edi, 248(%esp) -movl 340(%esp), %edi -addl %ecx, %edi -movl %ecx, 356(%esp) -movl 336(%esp), %ecx -adcl %ebx, %ecx -xorl %edi, %ebp -xorl %ecx, %edx -movl %ebx, 348(%esp) -movl %edx, %ebx -movl %edi, 340(%esp) -movl %ebp, %edi -shrl $24, %edx -shll $8, %ebp -orl %ebp, %edx -movzbl 4(%eax), %ebp -movl %ecx, 336(%esp) -shll $8, %ebx -shrl $24, %edi -movl 332(%esp), %ecx -orl %edi, %ebx -addl (%ebp,%esi), %ecx -movl 252(%esp), %edi -adcl 4(%ebp,%esi), %edi -movl 276(%esp), %ebp -addl %ebp, %ecx -movl %edx, 272(%esp) -movl 280(%esp), %edx -adcl %edx, %edi -movl %ebx, 380(%esp) -movl %ecx, 332(%esp) -movl 296(%esp), %ebx -xorl %ecx, %ebx -movl 300(%esp), %ecx -xorl %edi, %ecx -movl %edi, 252(%esp) -movl 324(%esp), %edi -addl %ecx, %edi -movl %ecx, 300(%esp) -movl 320(%esp), %ecx -adcl %ebx, %ecx -xorl %edi, %ebp -xorl %ecx, %edx -movl %ebx, 296(%esp) -movl %edx, %ebx -movl %edi, 324(%esp) -movl %ebp, %edi -shrl $24, %edx -shll $8, %ebp -orl %ebp, %edx -movl %edx, 280(%esp) -movzbl 6(%eax), %edx -movl %ecx, 320(%esp) -shll $8, %ebx -shrl $24, %edi -movl 316(%esp), %ecx -orl %edi, %ebx -addl (%edx,%esi), %ecx -movl 256(%esp), %edi -movl 284(%esp), %ebp -adcl 4(%edx,%esi), %edi -addl %ebp, %ecx -movl 288(%esp), %edx -adcl %edx, %edi -movl %ebx, 384(%esp) -movl %ecx, 316(%esp) -movl 312(%esp), %ebx -xorl %ecx, %ebx -movl 368(%esp), %ecx -xorl %edi, %ecx -movl %edi, 256(%esp) -movl 308(%esp), %edi -addl %ecx, %edi -movl %ecx, 368(%esp) -movl 304(%esp), %ecx -adcl %ebx, %ecx -xorl %edi, %ebp -xorl %ecx, %edx -movl %ebx, 312(%esp) -movl %edx, %ebx -movl %edi, 308(%esp) -movl %ebp, %edi -shrl $24, %edx -shll $8, %ebp -orl %ebp, %edx -movzbl 5(%eax), %ebp -movl %ecx, 304(%esp) -shll $8, %ebx -movl (%ebp,%esi), %ecx -addl 332(%esp), %ecx -movl 4(%ebp,%esi), %esi -adcl 252(%esp), %esi -shrl $24, %edi -orl %edi, %ebx -movl %ebx, 388(%esp) -movl 384(%esp), %ebx -addl %ebx, %ecx -movl %edx, 288(%esp) -movl 280(%esp), %edx -adcl %edx, %esi -movl 300(%esp), %ebp -movl 296(%esp), %edi -xorl %ecx, %ebp -xorl %esi, %edi -movl %ecx, 392(%esp) -movl %ebp, %ecx -movl %esi, 396(%esp) -movl %edi, %esi -shll $16, %esi -shrl $16, %ecx -shrl $16, %edi -orl %ecx, %esi -shll $16, %ebp -orl %ebp, %edi -movl 324(%esp), %ebp -addl %esi, %ebp -movl %esi, 400(%esp) -movl 320(%esp), %esi -adcl %edi, %esi -xorl %ebp, %ebx -xorl %esi, %edx -movl %esi, 320(%esp) -movl %edx, %esi -movl %edi, 296(%esp) -movl %ebx, %edi -shrl $31, %esi -addl %ebx, %ebx -shrl $31, %edi -addl %edx, %edx -orl %ebx, %esi -orl %edx, %edi -movl %esi, 408(%esp) -movzbl 7(%eax), %edx -movl 236(%esp), %esi -movl %edi, 404(%esp) -movl 288(%esp), %edi -movl (%edx,%esi), %ebx -addl 316(%esp), %ebx -movl 4(%edx,%esi), %ecx -movl 388(%esp), %edx -adcl 256(%esp), %ecx -addl %edx, %ebx -movl %ebp, 324(%esp) -adcl %edi, %ecx -movl 368(%esp), %ebp -movl 312(%esp), %esi -xorl %ebx, %ebp -xorl %ecx, %esi -movl %ebx, 412(%esp) -movl %ebp, %ebx -movl %ecx, 416(%esp) -movl %esi, %ecx -shll $16, %ecx -shrl $16, %ebx -shrl $16, %esi -orl %ebx, %ecx -shll $16, %ebp -orl %ebp, %esi -movl 308(%esp), %ebp -addl %ecx, %ebp -movl %ecx, 420(%esp) -movl 304(%esp), %ecx -adcl %esi, %ecx -xorl %ebp, %edx -movl %esi, 312(%esp) -xorl %ecx, %edi -movl %edx, %esi -movl %edi, %ebx -shrl $31, %esi -addl %edi, %edi -orl %edi, %esi -addl %edx, %edx -movl %esi, 424(%esp) -movzbl 3(%eax), %edi -movl 236(%esp), %esi -shrl $31, %ebx -orl %edx, %ebx -movl (%edi,%esi), %edx -addl 364(%esp), %edx -movl %ecx, 304(%esp) -movl 4(%edi,%esi), %ecx -movl 380(%esp), %edi -adcl 248(%esp), %ecx -addl %edi, %edx -movl 272(%esp), %esi -adcl %esi, %ecx -movl %ebp, 308(%esp) -movl %ebx, 428(%esp) -movl 356(%esp), %ebx -movl 348(%esp), %ebp -xorl %edx, %ebx -xorl %ecx, %ebp -movl %edx, 432(%esp) -movl %ebp, %edx -movl %ecx, 436(%esp) -movl %ebx, %ecx -shll $16, %edx -shrl $16, %ecx -shrl $16, %ebp -orl %ecx, %edx -shll $16, %ebx -orl %ebx, %ebp -movl 340(%esp), %ebx -addl %edx, %ebx -movl %edx, 440(%esp) -movl 336(%esp), %edx -adcl %ebp, %edx -xorl %ebx, %edi -movl %ebx, 340(%esp) -xorl %edx, %esi -movl %edi, %ebx -movl %esi, %ecx -shrl $31, %ebx -addl %esi, %esi -movl %edx, 336(%esp) -orl %esi, %ebx -movzbl 1(%eax), %esi -addl %edi, %edi -movl 236(%esp), %edx -shrl $31, %ecx -orl %edi, %ecx -movl (%esi,%edx), %edi -addl 240(%esp), %edi -movl %ebp, 348(%esp) -movl 4(%esi,%edx), %ebp -movl 376(%esp), %esi -adcl 244(%esp), %ebp -addl %esi, %edi -movl %ecx, 448(%esp) -movl 264(%esp), %ecx -adcl %ecx, %ebp -movl %ebx, 444(%esp) -movl 372(%esp), %ebx -movl 352(%esp), %edx -xorl %edi, %ebx -xorl %ebp, %edx -movl %edi, 452(%esp) -movl %edx, %edi -movl %ebp, 456(%esp) -movl %ebx, %ebp -shll $16, %edi -shrl $16, %ebp -shrl $16, %edx -orl %ebp, %edi -shll $16, %ebx -orl %ebx, %edx -movl 328(%esp), %ebx -addl %edi, %ebx -movl %edi, 460(%esp) -movl 344(%esp), %edi -adcl %edx, %edi -xorl %ebx, %esi -movl %edx, 352(%esp) -xorl %edi, %ecx -movl %esi, %edx -addl %esi, %esi -movl %ebx, 328(%esp) -movl %ecx, %ebx -shrl $31, %edx -addl %ecx, %ecx -movl %edi, 344(%esp) -orl %ecx, %edx -movzbl 8(%eax), %edi -movl 236(%esp), %ecx -shrl $31, %ebx -orl %esi, %ebx -movl %ebx, 468(%esp) -movl 452(%esp), %ebx -addl (%edi,%ecx), %ebx -movl 456(%esp), %esi -movl %edx, 464(%esp) -movl 448(%esp), %edx -adcl 4(%edi,%ecx), %esi -addl %edx, %ebx -movl 444(%esp), %edi -adcl %edi, %esi -movl 420(%esp), %ebp -movl %ebx, 452(%esp) -xorl %ebx, %ebp -movl 312(%esp), %ebx -xorl %esi, %ebx -movl %esi, 456(%esp) -movl 324(%esp), %esi -addl %ebx, %esi -movl %ebx, 312(%esp) -movl 320(%esp), %ebx -adcl %ebp, %ebx -xorl %esi, %edx -xorl %ebx, %edi -movl %ebp, 420(%esp) -movzbl 10(%eax), %ebp -movl %esi, 324(%esp) -movl %edx, %esi -movl %ebx, 320(%esp) -movl %edi, %ebx -shll $8, %ebx -shrl $24, %esi -orl %esi, %ebx -movl %ebx, 472(%esp) -movl (%ebp,%ecx), %ebx -addl 432(%esp), %ebx -movl 4(%ebp,%ecx), %esi -adcl 436(%esp), %esi -shrl $24, %edi -shll $8, %edx -orl %edx, %edi -movl 408(%esp), %edx -addl %edx, %ebx -movl %edi, 444(%esp) -movl 404(%esp), %edi -adcl %edi, %esi -movl 460(%esp), %ebp -movl %ebx, 364(%esp) -xorl %ebx, %ebp -movl 352(%esp), %ebx -xorl %esi, %ebx -movl %esi, 248(%esp) -movl 308(%esp), %esi -addl %ebx, %esi -movl %ebx, 352(%esp) -movl 304(%esp), %ebx -adcl %ebp, %ebx -xorl %esi, %edx -xorl %ebx, %edi -movl %esi, 308(%esp) -movl %edx, %esi -movl %ebx, 304(%esp) -movl %edi, %ebx -shrl $24, %edi -shll $8, %edx -orl %edx, %edi -movl %edi, 404(%esp) -movzbl 12(%eax), %edi -movl %ebp, 460(%esp) -shll $8, %ebx -shrl $24, %esi -movl (%edi,%ecx), %ebp -orl %esi, %ebx -addl 392(%esp), %ebp -movl 4(%edi,%ecx), %esi -movl 428(%esp), %edx -adcl 396(%esp), %esi -addl %edx, %ebp -movl %ebx, 476(%esp) -movl 424(%esp), %ebx -adcl %ebx, %esi -movl 440(%esp), %edi -movl %ebp, 332(%esp) -xorl %ebp, %edi -movl 348(%esp), %ebp -xorl %esi, %ebp -movl %esi, 252(%esp) -movl 328(%esp), %esi -addl %ebp, %esi -movl %ebp, 348(%esp) -movl 344(%esp), %ebp -adcl %edi, %ebp -xorl %esi, %edx -xorl %ebp, %ebx -movl %esi, 328(%esp) -movl %edx, %esi -movl %ebp, 344(%esp) -movl %ebx, %ebp -shrl $24, %ebx -shll $8, %edx -orl %edx, %ebx -movzbl 14(%eax), %edx -movl %eax, 292(%esp) -shll $8, %ebp -shrl $24, %esi -movl (%edx,%ecx), %eax -orl %esi, %ebp -addl 412(%esp), %eax -movl 4(%edx,%ecx), %esi -movl 468(%esp), %ecx -adcl 416(%esp), %esi -addl %ecx, %eax -movl 464(%esp), %edx -adcl %edx, %esi -movl %edi, 440(%esp) -movl %eax, 316(%esp) -movl 400(%esp), %edi -xorl %eax, %edi -movl 296(%esp), %eax -xorl %esi, %eax -movl %esi, 256(%esp) -movl 340(%esp), %esi -addl %eax, %esi -movl %eax, 296(%esp) -movl 336(%esp), %eax -adcl %edi, %eax -xorl %esi, %ecx -xorl %eax, %edx -movl %edi, 400(%esp) -movl %ecx, %edi -movl %esi, 340(%esp) -movl %edx, %esi -shrl $24, %edx -shll $8, %ecx -orl %ecx, %edx -movl %edx, 464(%esp) -movl 292(%esp), %edx -shll $8, %esi -shrl $24, %edi -orl %edi, %esi -movzbl 13(%edx), %edi -movl 236(%esp), %edx -movl 332(%esp), %ecx -addl %ebp, %ecx -movl %eax, 336(%esp) -movl 252(%esp), %eax -adcl %ebx, %eax -addl (%edi,%edx), %ecx -movl %ecx, 332(%esp) -adcl 4(%edi,%edx), %eax -movl 348(%esp), %edi -movl 440(%esp), %edx -xorl %ecx, %edi -xorl %eax, %edx -movl %edi, %ecx -movl %eax, 252(%esp) -movl %edx, %eax -shll $16, %eax -shrl $16, %ecx -shrl $16, %edx -orl %ecx, %eax -shll $16, %edi -orl %edx, %edi -movl 328(%esp), %edx -addl %eax, %edx -movl %eax, 348(%esp) -movl 344(%esp), %eax -adcl %edi, %eax -xorl %edx, %ebp -xorl %eax, %ebx -movl %eax, 344(%esp) -movl %ebx, %eax -movl %edi, 356(%esp) -movl %ebp, %edi -shrl $31, %eax -addl %ebp, %ebp -orl %ebp, %eax -addl %ebx, %ebx -movl %eax, 284(%esp) -movl 292(%esp), %eax -shrl $31, %edi -orl %ebx, %edi -movl %edi, 288(%esp) -movzbl 15(%eax), %ebx -movl 236(%esp), %edi -movl 316(%esp), %ebp -addl %esi, %ebp -movl %edx, 328(%esp) -movl 256(%esp), %edx -movl 464(%esp), %ecx -adcl %ecx, %edx -addl (%ebx,%edi), %ebp -movl %ebp, 316(%esp) -adcl 4(%ebx,%edi), %edx -movl 296(%esp), %edi -movl 400(%esp), %ebx -xorl %ebp, %edi -xorl %edx, %ebx -movl %edi, %ebp -movl %edx, 256(%esp) -movl %ebx, %edx -shll $16, %edx -shrl $16, %ebp -shrl $16, %ebx -orl %ebp, %edx -shll $16, %edi -orl %ebx, %edi -movl 340(%esp), %ebx -addl %edx, %ebx -movl %edx, 296(%esp) -movl 336(%esp), %edx -adcl %edi, %edx -xorl %ebx, %esi -xorl %edx, %ecx -movl %edx, 336(%esp) -movl %ecx, %edx -movl %edi, 300(%esp) -movl %esi, %edi -shrl $31, %edx -addl %esi, %esi -shrl $31, %edi -addl %ecx, %ecx -movl %ebx, 340(%esp) -orl %esi, %edx -movzbl 11(%eax), %ebp -orl %ecx, %edi -movl 236(%esp), %ebx -movl %edx, 260(%esp) -movl 364(%esp), %ecx -movl 476(%esp), %edx -addl %edx, %ecx -movl %edi, 264(%esp) -movl 248(%esp), %edi -movl 404(%esp), %esi -adcl %esi, %edi -addl (%ebp,%ebx), %ecx -movl %ecx, 364(%esp) -adcl 4(%ebp,%ebx), %edi -movl 352(%esp), %ebp -movl 460(%esp), %ebx -xorl %ecx, %ebp -xorl %edi, %ebx -movl %ebp, %ecx -movl %edi, 248(%esp) -movl %ebx, %edi -shll $16, %edi -shrl $16, %ecx -shrl $16, %ebx -orl %ecx, %edi -shll $16, %ebp -orl %ebx, %ebp -movl 308(%esp), %ebx -addl %edi, %ebx -movl %edi, 352(%esp) -movl 304(%esp), %edi -adcl %ebp, %edi -xorl %ebx, %edx -xorl %edi, %esi -movl %edi, 304(%esp) -movl %esi, %edi -movl %ebp, 372(%esp) -movl %edx, %ebp -shrl $31, %edi -addl %edx, %edx -shrl $31, %ebp -addl %esi, %esi -movzbl 9(%eax), %ecx -orl %edx, %edi -movl 236(%esp), %edx -orl %esi, %ebp -movl %ebx, 308(%esp) -addl $16, %eax -movl %edi, 276(%esp) -movl 452(%esp), %ebx -movl 472(%esp), %edi -addl %edi, %ebx -movl %ebp, 280(%esp) -movl 456(%esp), %ebp -movl 444(%esp), %esi -adcl %esi, %ebp -addl (%ecx,%edx), %ebx -movl %ebx, 240(%esp) -adcl 4(%ecx,%edx), %ebp -movl 312(%esp), %edx -movl 420(%esp), %ecx -xorl %ebx, %edx -xorl %ebp, %ecx -movl %ebp, 244(%esp) -movl %ecx, %ebx -movl %edx, %ebp -shll $16, %ebx -shrl $16, %ebp -shrl $16, %ecx -orl %ebp, %ebx -shll $16, %edx -orl %ecx, %edx -movl 324(%esp), %ecx -addl %ebx, %ecx -movl %ebx, 312(%esp) -movl 320(%esp), %ebx -adcl %edx, %ebx -xorl %ecx, %edi -xorl %ebx, %esi -movl %edi, %ebp -movl %ecx, 324(%esp) -movl %esi, %ecx -shrl $31, %ecx -addl %edi, %edi -shrl $31, %ebp -addl %esi, %esi -orl %esi, %ebp -orl %edi, %ecx -movl %ebx, 320(%esp) -movl %ebp, 272(%esp) -movl %ecx, 268(%esp) -movl 332(%esp), %ebp -movl 316(%esp), %esi -movl 364(%esp), %ecx -movl 372(%esp), %ebx -cmpl 360(%esp), %eax -jb blake2b_blocks_x86_L26 -blake2b_blocks_x86_L27: -movl 328(%esp), %edi -xorl 240(%esp), %edi -movl %esi, 316(%esp) -movl 512(%esp), %esi -movl 180(%esp), %eax -movl %edx, 368(%esp) -xorl %edi, %eax -movl 344(%esp), %edx -movl %eax, 180(%esp) -movl %eax, (%esi) -movl 340(%esp), %eax -xorl %ecx, %eax -movl 336(%esp), %ecx -xorl 244(%esp), %edx -xorl 248(%esp), %ecx -movl 232(%esp), %edi -xorl %edx, %edi -movl 156(%esp), %edx -xorl %eax, %edx -movl 224(%esp), %eax -movl %edi, 232(%esp) -xorl %ecx, %eax -movl %edi, 4(%esi) -movl %ebp, 332(%esp) -movl %eax, 224(%esp) -movl %eax, 12(%esi) -movl 324(%esp), %edi -movl 320(%esp), %eax -xorl 332(%esp), %edi -xorl 252(%esp), %eax -movl %edx, 156(%esp) -movl %edx, 8(%esi) -movl 184(%esp), %edx -movl 216(%esp), %ecx -xorl %edi, %edx -movl %edx, 184(%esp) -xorl %eax, %ecx -movl %edx, 16(%esi) -movl 308(%esp), %eax -movl 304(%esp), %edx -xorl 316(%esp), %eax -xorl 256(%esp), %edx -movl 176(%esp), %edi -xorl 264(%esp), %ebx -xorl %eax, %edi -movl 208(%esp), %eax -xorl %edx, %eax -movl %eax, 208(%esp) -movl %eax, 28(%esi) -movl 352(%esp), %edx -movl 220(%esp), %eax -movl 356(%esp), %ebp -xorl %ebx, %eax -movl 348(%esp), %ebx -xorl 260(%esp), %edx -xorl 268(%esp), %ebx -xorl 272(%esp), %ebp -movl %ecx, 216(%esp) -movl %ecx, 20(%esi) -movl 188(%esp), %ecx -movl %eax, 220(%esp) -xorl %edx, %ecx -movl %eax, 36(%esi) -movl 160(%esp), %eax -movl 212(%esp), %edx -xorl %ebx, %eax -xorl %ebp, %edx -movl 296(%esp), %ebp -movl %eax, 160(%esp) -movl %eax, 40(%esi) -movl %edi, 176(%esp) -movl %edi, 24(%esi) -movl 300(%esp), %eax -movl 312(%esp), %ebx -movl 368(%esp), %edi -xorl 276(%esp), %ebp -xorl 280(%esp), %eax -xorl 284(%esp), %ebx -xorl 288(%esp), %edi -movl %edx, 212(%esp) -movl %edx, 44(%esi) -movl 168(%esp), %edx -movl %ecx, 188(%esp) -xorl %ebp, %edx -movl %ecx, 32(%esi) -movl %edx, 168(%esp) -movl 204(%esp), %ecx -movl %edx, 48(%esi) -xorl %eax, %ecx -movl 172(%esp), %eax -movl 196(%esp), %edx -xorl %ebx, %eax -xorl %edi, %edx -movl %ecx, 204(%esp) -movl %ecx, 52(%esi) -movl %eax, 172(%esp) -movl %edx, 196(%esp) -movl %eax, 56(%esi) -movl %edx, 60(%esi) -cmpl $128, 140(%esp) -ja blake2b_blocks_x86_L28 -blake2b_blocks_x86_L29: -addl $492, %esp -popl %ebp -popl %ebx -popl %edi -popl %esi -ret -FN_END blake2b_blocks_x86 diff --git a/src/libcryptobox/blake2/x86-64.S b/src/libcryptobox/blake2/x86-64.S deleted file mode 100644 index f0de795fb..000000000 --- a/src/libcryptobox/blake2/x86-64.S +++ /dev/null @@ -1,1754 +0,0 @@ -#include "../macro.S" -#include "constants.S" - -SECTION_TEXT - -GLOBAL_HIDDEN_FN_EXT blake2b_blocks_x86, 4, 8 -pushq %rbx -pushq %rbp -pushq %r12 -pushq %r13 -pushq %r14 -pushq %r15 -movq %rsp, %r9 -subq $320, %rsp -andq $~63, %rsp -cmpq $128, %rdx -movq %rdx, %rax -jb blake2b_blocks_x86_usebytesinc -movq $128, %rax -blake2b_blocks_x86_usebytesinc: -movq %rdx, 136(%rsp) -movq %rcx, 144(%rsp) -movq %rax, 152(%rsp) -movq %rdi, 160(%rsp) -movq %r9, 168(%rsp) -movq 80(%rdi), %rcx -andq %rcx, %rcx -jz blake2b_blocks_x86_not_final_call -cmpq $128, %rdx -je blake2b_blocks_x86_not_final_call -leaq 0(%rsp), %rcx -pxor %xmm0, %xmm0 -movdqa %xmm0, 0(%rcx) -movdqa %xmm0, 16(%rcx) -movdqa %xmm0, 32(%rcx) -movdqa %xmm0, 48(%rcx) -movdqa %xmm0, 64(%rcx) -movdqa %xmm0, 80(%rcx) -movdqa %xmm0, 96(%rcx) -movdqa %xmm0, 112(%rcx) -testq $0x40, %rdx -jz blake2b_blocks_x86_skip64 -movdqu 0(%rsi), %xmm0 -movdqu 16(%rsi), %xmm1 -movdqu 32(%rsi), %xmm2 -movdqu 48(%rsi), %xmm3 -movdqa %xmm0, 0(%rcx) -movdqa %xmm1, 16(%rcx) -movdqa %xmm2, 32(%rcx) -movdqa %xmm3, 48(%rcx) -addq $64, %rsi -addq $64, %rcx -blake2b_blocks_x86_skip64: -testq $0x20, %rdx -jz blake2b_blocks_x86_skip32 -movdqu 0(%rsi), %xmm0 -movdqu 16(%rsi), %xmm1 -movdqa %xmm0, 0(%rcx) -movdqa %xmm1, 16(%rcx) -addq $32, %rsi -addq $32, %rcx -blake2b_blocks_x86_skip32: -testq $0x10, %rdx -jz blake2b_blocks_x86_skip16 -movdqu 0(%rsi), %xmm0 -movdqa %xmm0, 0(%rcx) -addq $16, %rsi -addq $16, %rcx -blake2b_blocks_x86_skip16: -testq $0x8, %rdx -jz blake2b_blocks_x86_skip8 -movq 0(%rsi), %rax -movq %rax, 0(%rcx) -addq $8, %rsi -addq $8, %rcx -blake2b_blocks_x86_skip8: -testq $0x4, %rdx -jz blake2b_blocks_x86_skip4 -movl 0(%rsi), %eax -movl %eax, 0(%rcx) -addq $4, %rsi -addq $4, %rcx -blake2b_blocks_x86_skip4: -testq $0x2, %rdx -jz blake2b_blocks_x86_skip2 -movw 0(%rsi), %ax -movw %ax, 0(%rcx) -addq $2, %rsi -addq $2, %rcx -blake2b_blocks_x86_skip2: -testq $0x1, %rdx -jz blake2b_blocks_x86_skip1 -movb 0(%rsi), %al -movb %al, 0(%rcx) -blake2b_blocks_x86_skip1: -leaq 0(%rsp), %rsi -blake2b_blocks_x86_not_final_call: -movq %rsi, 128(%rsp) -movq 64(%rdi), %r12 -movq 72(%rdi), %r13 -movq 80(%rdi), %r14 -movq 88(%rdi), %r15 -movabsq $0x1f83d9abfb41bd6b, %rax -movabsq $0x5be0cd19137e2179, %rbx -xorq %rax, %r14 -xorq %rbx, %r15 -movq %r12, 256(%rsp) -movq %r13, 264(%rsp) -movq %r14, 272(%rsp) -movq %r15, 280(%rsp) -movq 0(%rdi), %rax -movq 8(%rdi), %rdx -movq 16(%rdi), %r8 -movq 24(%rdi), %r12 -movq 32(%rdi), %rbx -movq 40(%rdi), %rsi -movq 48(%rdi), %r9 -movq 56(%rdi), %r13 -.p2align 6,,63 -blake2b_blocks_x86_mainloop: -movq 128(%rsp), %r10 -cmpq %r10, %rsp -je blake2b_blocks_x86_nocopy -movdqu 0(%r10), %xmm0 -movdqu 16(%r10), %xmm1 -movdqu 32(%r10), %xmm2 -movdqu 48(%r10), %xmm3 -movdqu 64(%r10), %xmm4 -movdqu 80(%r10), %xmm5 -movdqu 96(%r10), %xmm6 -movdqu 112(%r10), %xmm7 -movdqa %xmm0, 0(%rsp) -movdqa %xmm1, 16(%rsp) -movdqa %xmm2, 32(%rsp) -movdqa %xmm3, 48(%rsp) -movdqa %xmm4, 64(%rsp) -movdqa %xmm5, 80(%rsp) -movdqa %xmm6, 96(%rsp) -movdqa %xmm7, 112(%rsp) -blake2b_blocks_x86_nocopy: -movq 152(%rsp), %r10 -movq 256(%rsp), %rcx -movq 264(%rsp), %rbp -movabsq $0x510e527fade682d1, %r11 -movabsq $0x9b05688c2b3e6c1f, %r15 -addq %r10, %rcx -cmpq %r10, %rcx -jae blake2b_blocks_x86_nocountercarry -addq $1, %rbp -blake2b_blocks_x86_nocountercarry: -movq %rcx, 256(%rsp) -movq %rbp, 264(%rsp) -xorq %r11, %rcx -xorq %r15, %rbp -movabsq $0x6a09e667f3bcc908, %r11 -movabsq $0xbb67ae8584caa73b, %rdi -movabsq $0x3c6ef372fe94f82b, %r10 -movabsq $0xa54ff53a5f1d36f1, %r14 -movq %r11, 296(%rsp) -movq 272(%rsp), %r11 -movq 280(%rsp), %r15 -movq %rax, 192(%rsp) -movq %rdx, 200(%rsp) -movq %r8, 208(%rsp) -movq %r12, 216(%rsp) -movq %rbx, 224(%rsp) -movq %rsi, 232(%rsp) -movq %r9, 240(%rsp) -movq %r13, 248(%rsp) -addq 0(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 16(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 32(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 48(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 8(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 24(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 40(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 56(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 64(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 80(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 96(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 112(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 72(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 88(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 104(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 120(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 112(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 32(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 72(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 104(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 80(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 64(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 120(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 48(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 8(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 0(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 88(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 40(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 96(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 16(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 56(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 24(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 88(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 96(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 40(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 120(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 64(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 0(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 16(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 104(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 80(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 24(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 56(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 72(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 112(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 48(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 8(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 32(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 56(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 24(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 104(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 88(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 72(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 8(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 96(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 112(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 16(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 40(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 32(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 120(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 48(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 80(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 0(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 64(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 72(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 40(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 16(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 80(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 0(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 56(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 32(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 120(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 112(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 88(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 48(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 24(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 8(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 96(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 64(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 104(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 16(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 48(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 0(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 64(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 96(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 80(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 88(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 24(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 32(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 56(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 120(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 8(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 104(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 40(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 112(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 72(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 96(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 8(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 112(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 32(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 40(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 120(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 104(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 80(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 0(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 48(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 72(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 64(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 56(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 24(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 16(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 88(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 104(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 56(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 96(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 24(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 88(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 112(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 8(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 72(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 40(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 120(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 64(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 16(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 0(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 32(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 48(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 80(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 48(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 112(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 88(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 0(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 120(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 72(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 24(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 64(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 96(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 104(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 8(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 80(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 16(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 56(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 32(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 40(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 80(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 64(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 56(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 8(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 16(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 32(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 48(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 40(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 120(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 72(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 24(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 104(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 88(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 112(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 96(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 0(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 0(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 16(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 32(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 48(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 8(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 24(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 40(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 56(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 64(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 80(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 96(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 112(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 72(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 88(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 104(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 120(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 112(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 32(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 72(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 104(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 80(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 64(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 120(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 48(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 8(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 0(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 88(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 40(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 96(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 16(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 56(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 24(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -xorq 296(%rsp), %rax -xorq %rdi, %rdx -xorq %r10, %r8 -xorq %r14, %r12 -xorq %rcx, %rbx -xorq %rbp, %rsi -xorq %r11, %r9 -xorq %r15, %r13 -xorq 192(%rsp), %rax -xorq 200(%rsp), %rdx -xorq 208(%rsp), %r8 -xorq 216(%rsp), %r12 -xorq 224(%rsp), %rbx -xorq 232(%rsp), %rsi -xorq 240(%rsp), %r9 -xorq 248(%rsp), %r13 -movq 128(%rsp), %rcx -movq 136(%rsp), %rbp -movq 144(%rsp), %r11 -cmpq $128, %rbp -jbe blake2b_blocks_x86_done -addq %r11, %rcx -subq $128, %rbp -movq %rcx, 128(%rsp) -movq %rbp, 136(%rsp) -jmp blake2b_blocks_x86_mainloop -blake2b_blocks_x86_done: -movq 160(%rsp), %rcx -movq 256(%rsp), %rbp -movq 264(%rsp), %r11 -movq %rax, 0(%rcx) -movq %rdx, 8(%rcx) -movq %r8, 16(%rcx) -movq %r12, 24(%rcx) -movq %rbx, 32(%rcx) -movq %rsi, 40(%rcx) -movq %r9, 48(%rcx) -movq %r13, 56(%rcx) -movq %rbp, 64(%rcx) -movq %r11, 72(%rcx) -movq 168(%rsp), %rsp -popq %r15 -popq %r14 -popq %r13 -popq %r12 -popq %rbp -popq %rbx -ret -FN_END blake2b_blocks_x86 diff --git a/src/libcryptobox/catena/catena.c b/src/libcryptobox/catena/catena.c index 29950dd6e..5b1da3129 100644 --- a/src/libcryptobox/catena/catena.c +++ b/src/libcryptobox/catena/catena.c @@ -22,7 +22,8 @@ #include "config.h" #include "catena.h" -#include "../blake2/blake2.h" + +#include <sodium.h> #if __BYTE_ORDER == __LITTLE_ENDIAN #define TO_LITTLE_ENDIAN_64(n) (n) @@ -49,10 +50,10 @@ static inline void __Hash1(const uint8_t *input, const uint32_t inputlen, uint8_t hash[H_LEN]) { - blake2b_state ctx; - blake2b_init (&ctx); - blake2b_update (&ctx, input, inputlen); - blake2b_final (&ctx, hash); + crypto_generichash_blake2b_state ctx; + crypto_generichash_blake2b_init (&ctx, NULL, 0, H_LEN); + crypto_generichash_blake2b_update (&ctx, input, inputlen); + crypto_generichash_blake2b_final (&ctx, hash, H_LEN); } /***************************************************/ @@ -61,11 +62,12 @@ static inline void __Hash2(const uint8_t *i1, const uint8_t i1len, const uint8_t *i2, const uint8_t i2len, uint8_t hash[H_LEN]) { - blake2b_state ctx; - blake2b_init (&ctx); - blake2b_update (&ctx, i1, i1len); - blake2b_update (&ctx, i2, i2len); - blake2b_final (&ctx, hash); + crypto_generichash_blake2b_state ctx; + + crypto_generichash_blake2b_init (&ctx, NULL, 0, H_LEN); + crypto_generichash_blake2b_update (&ctx, i1, i1len); + crypto_generichash_blake2b_update (&ctx, i2, i2len); + crypto_generichash_blake2b_final (&ctx, hash, H_LEN); } /***************************************************/ @@ -75,12 +77,13 @@ void __Hash3(const uint8_t *i1, const uint8_t i1len, const uint8_t *i2, const uint8_t i2len, const uint8_t *i3, const uint8_t i3len, uint8_t hash[H_LEN]) { - blake2b_state ctx; - blake2b_init (&ctx); - blake2b_update (&ctx, i1, i1len); - blake2b_update (&ctx, i2, i2len); - blake2b_update (&ctx, i3, i3len); - blake2b_final (&ctx, hash); + crypto_generichash_blake2b_state ctx; + + crypto_generichash_blake2b_init (&ctx, NULL, 0, H_LEN); + crypto_generichash_blake2b_update (&ctx, i1, i1len); + crypto_generichash_blake2b_update (&ctx, i2, i2len); + crypto_generichash_blake2b_update (&ctx, i3, i3len); + crypto_generichash_blake2b_final (&ctx, hash, H_LEN); } /***************************************************/ @@ -90,13 +93,14 @@ void __Hash4(const uint8_t *i1, const uint8_t i1len, const uint8_t *i2, const uint8_t i2len, const uint8_t *i3, const uint8_t i3len, const uint8_t *i4, const uint8_t i4len, uint8_t hash[H_LEN]) { - blake2b_state ctx; - blake2b_init (&ctx); - blake2b_update (&ctx, i1, i1len); - blake2b_update (&ctx, i2, i2len); - blake2b_update (&ctx, i3, i3len); - blake2b_update (&ctx, i4, i4len); - blake2b_final (&ctx, hash); + crypto_generichash_blake2b_state ctx; + + crypto_generichash_blake2b_init (&ctx, NULL, 0, H_LEN); + crypto_generichash_blake2b_update (&ctx, i1, i1len); + crypto_generichash_blake2b_update (&ctx, i2, i2len); + crypto_generichash_blake2b_update (&ctx, i3, i3len); + crypto_generichash_blake2b_update (&ctx, i4, i4len); + crypto_generichash_blake2b_final (&ctx, hash, H_LEN); } /***************************************************/ @@ -107,14 +111,15 @@ void __Hash5(const uint8_t *i1, const uint8_t i1len, const uint8_t *i2, const uint8_t *i4, const uint8_t i4len, const uint8_t *i5, const uint8_t i5len, uint8_t hash[H_LEN]) { - blake2b_state ctx; - blake2b_init (&ctx); - blake2b_update (&ctx, i1, i1len); - blake2b_update (&ctx, i2, i2len); - blake2b_update (&ctx, i3, i3len); - blake2b_update (&ctx, i4, i4len); - blake2b_update (&ctx, i5, i5len); - blake2b_final (&ctx, hash); + crypto_generichash_blake2b_state ctx; + + crypto_generichash_blake2b_init (&ctx, NULL, 0, H_LEN); + crypto_generichash_blake2b_update (&ctx, i1, i1len); + crypto_generichash_blake2b_update (&ctx, i2, i2len); + crypto_generichash_blake2b_update (&ctx, i3, i3len); + crypto_generichash_blake2b_update (&ctx, i4, i4len); + crypto_generichash_blake2b_update (&ctx, i5, i5len); + crypto_generichash_blake2b_final (&ctx, hash, H_LEN); } static inline void diff --git a/src/libcryptobox/cryptobox.c b/src/libcryptobox/cryptobox.c index 4d42ddb38..c8560925d 100644 --- a/src/libcryptobox/cryptobox.c +++ b/src/libcryptobox/cryptobox.c @@ -23,11 +23,6 @@ #include "cryptobox.h" #include "platform_config.h" #include "chacha20/chacha.h" -#include "poly1305/poly1305.h" -#include "curve25519/curve25519.h" -#include "ed25519/ed25519.h" -#include "blake2/blake2.h" -#include "siphash/siphash.h" #include "catena/catena.h" #include "base64/base64.h" #include "ottery.h" @@ -58,6 +53,9 @@ #include <signal.h> #include <setjmp.h> +#include <stdalign.h> + +#include <sodium.h> unsigned long cpu_config = 0; @@ -69,78 +67,6 @@ static const guchar n0[16] = {0}; #define cryptobox_align_ptr(p, a) \ (void *) (((uintptr_t) (p) + ((uintptr_t) a - 1)) & ~((uintptr_t) a - 1)) -#ifdef HAVE_WEAK_SYMBOLS -__attribute__((weak)) void -_dummy_symbol_to_prevent_lto_memzero(void * const pnt, const size_t len); -__attribute__((weak)) void -_dummy_symbol_to_prevent_lto_memzero(void * const pnt, const size_t len) -{ - (void) pnt; - (void) len; -} - -__attribute__((weak)) void -_dummy_symbol_to_prevent_lto_memcmp(const unsigned char *b1, - const unsigned char *b2, - const size_t len); -__attribute__((weak)) void -_dummy_symbol_to_prevent_lto_memcmp(const unsigned char *b1, - const unsigned char *b2, - const size_t len) -{ - (void) b1; - (void) b2; - (void) len; -} -#endif - -void -rspamd_explicit_memzero(void * const pnt, const gsize len) -{ -#if defined(HAVE_MEMSET_S) - if (memset_s (pnt, (rsize_t) len, 0, (rsize_t) len) != 0) { - g_assert (0); - } -#elif defined(HAVE_EXPLICIT_BZERO) - explicit_bzero (pnt, len); -#elif defined(HAVE_WEAK_SYMBOLS) - memset (pnt, 0, len); - _dummy_symbol_to_prevent_lto_memzero (pnt, len); -#else - volatile unsigned char *pnt_ = (volatile unsigned char *) pnt; - gsize i = (gsize) 0U; - while (i < len) { - pnt_[i++] = 0U; - } -#endif -} - -gint -rspamd_cryptobox_memcmp (const void *const b1_, const void *const b2_, gsize len) -{ -#ifdef HAVE_WEAK_SYMBOLS - const unsigned char *b1 = (const unsigned char *) b1_; - const unsigned char *b2 = (const unsigned char *) b2_; -#else - const volatile unsigned char *volatile b1 = - (const volatile unsigned char *volatile) b1_; - const volatile unsigned char *volatile b2 = - (const volatile unsigned char *volatile) b2_; -#endif - gsize i; - volatile unsigned char d = 0U; - -#if HAVE_WEAK_SYMBOLS - _dummy_symbol_to_prevent_lto_memcmp (b1, b2, len); -#endif - - for (i = 0U; i < len; i++) { - d |= b1[i] ^ b2[i]; - } - - return (1 & ((d - 1) >> 8)) - 1; -} - static void rspamd_cryptobox_cpuid (gint cpu[4], gint info) { @@ -370,13 +296,9 @@ rspamd_cryptobox_init (void) ctx->cpu_extensions = buf->str; g_string_free (buf, FALSE); ctx->cpu_config = cpu_config; + g_assert (sodium_init () != -1); ctx->chacha20_impl = chacha_load (); - ctx->poly1305_impl = poly1305_load (); - ctx->siphash_impl = siphash_load (); - ctx->curve25519_impl = curve25519_load (); - ctx->blake2_impl = blake2b_load (); - ctx->ed25519_impl = ed25519_load (); ctx->base64_impl = base64_load (); #if defined(HAVE_USABLE_OPENSSL) && (OPENSSL_VERSION_NUMBER < 0x10100000L || defined(LIBRESSL_VERSION_NUMBER)) /* Needed for old openssl api, not sure about LibreSSL */ @@ -398,7 +320,7 @@ rspamd_cryptobox_keypair (rspamd_pk_t pk, rspamd_sk_t sk, sk[31] &= 127; sk[31] |= 64; - curve25519_base (pk, sk); + crypto_scalarmult_base (pk, sk); } else { #ifndef HAVE_USABLE_OPENSSL @@ -438,7 +360,7 @@ rspamd_cryptobox_keypair_sig (rspamd_sig_pk_t pk, rspamd_sig_sk_t sk, enum rspamd_cryptobox_mode mode) { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { - ed25519_keypair (pk, sk); + crypto_sign_keypair (pk, sk); } else { #ifndef HAVE_USABLE_OPENSSL @@ -487,8 +409,9 @@ rspamd_cryptobox_nm (rspamd_nm_t nm, e[31] &= 127; e[31] |= 64; - curve25519 (s, e, pk); - hchacha (s, n0, nm, 20); + if (crypto_scalarmult (s, e, pk) != -1) { + hchacha (s, n0, nm, 20); + } rspamd_explicit_memzero (e, 32); } @@ -517,7 +440,7 @@ rspamd_cryptobox_nm (rspamd_nm_t nm, g_assert (len == sizeof (s)); /* Still do hchacha iteration since we are not using SHA1 KDF */ - hchacha (s, n0, nm, 20); + crypto_core_hchacha20 (nm, n0, s, NULL); EC_KEY_free (lk); EC_POINT_free (ec_pub); @@ -528,13 +451,13 @@ rspamd_cryptobox_nm (rspamd_nm_t nm, } void -rspamd_cryptobox_sign (guchar *sig, gsize *siglen_p, +rspamd_cryptobox_sign (guchar *sig, unsigned long long *siglen_p, const guchar *m, gsize mlen, const rspamd_sk_t sk, enum rspamd_cryptobox_mode mode) { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { - ed25519_sign (sig, siglen_p, m, mlen, sk); + crypto_sign (sig, siglen_p, m, mlen, sk); } else { #ifndef HAVE_USABLE_OPENSSL @@ -591,7 +514,7 @@ rspamd_cryptobox_verify (const guchar *sig, if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { if (siglen == rspamd_cryptobox_signature_bytes (RSPAMD_CRYPTOBOX_MODE_25519)) { - ret = ed25519_verify (sig, m, mlen, pk); + ret = (crypto_sign_verify_detached (sig, m, mlen, pk) == 0); } } else { @@ -653,7 +576,7 @@ static gsize rspamd_cryptobox_auth_ctx_len (enum rspamd_cryptobox_mode mode) { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { - return sizeof (poly1305_state) + CRYPTOBOX_ALIGNMENT; + return sizeof (crypto_onetimeauth_state) + alignof (crypto_onetimeauth_state); } else { #ifndef HAVE_USABLE_OPENSSL @@ -708,13 +631,13 @@ rspamd_cryptobox_auth_init (void *auth_ctx, void *enc_ctx, enum rspamd_cryptobox_mode mode) { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { - poly1305_state *mac_ctx; + crypto_onetimeauth_state *mac_ctx; guchar RSPAMD_ALIGNED(32) subkey[CHACHA_BLOCKBYTES]; mac_ctx = cryptobox_align_ptr (auth_ctx, CRYPTOBOX_ALIGNMENT); memset (subkey, 0, sizeof (subkey)); chacha_update (enc_ctx, subkey, subkey, sizeof (subkey)); - poly1305_init (mac_ctx, (const poly1305_key *) subkey); + crypto_onetimeauth_init (mac_ctx, subkey); rspamd_explicit_memzero (subkey, sizeof (subkey)); return mac_ctx; @@ -739,8 +662,11 @@ rspamd_cryptobox_encrypt_update (void *enc_ctx, const guchar *in, gsize inlen, { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { gsize r; + chacha_state *s; - r = chacha_update (enc_ctx, in, out, inlen); + s = cryptobox_align_ptr (enc_ctx, CRYPTOBOX_ALIGNMENT); + + r = chacha_update (s, in, out, inlen); if (outlen != NULL) { *outlen = r; @@ -774,7 +700,10 @@ rspamd_cryptobox_auth_update (void *auth_ctx, const guchar *in, gsize inlen, enum rspamd_cryptobox_mode mode) { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { - poly1305_update (auth_ctx, in, inlen); + crypto_onetimeauth_state *mac_ctx; + + mac_ctx = cryptobox_align_ptr (auth_ctx, CRYPTOBOX_ALIGNMENT); + crypto_onetimeauth_update (mac_ctx, in, inlen); return TRUE; } @@ -794,7 +723,10 @@ rspamd_cryptobox_encrypt_final (void *enc_ctx, guchar *out, gsize remain, enum rspamd_cryptobox_mode mode) { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { - return chacha_final (enc_ctx, out); + chacha_state *s; + + s = cryptobox_align_ptr (enc_ctx, CRYPTOBOX_ALIGNMENT); + return chacha_final (s, out); } else { #ifndef HAVE_USABLE_OPENSSL @@ -817,7 +749,10 @@ rspamd_cryptobox_auth_final (void *auth_ctx, rspamd_mac_t sig, enum rspamd_cryptobox_mode mode) { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { - poly1305_finish (auth_ctx, sig); + crypto_onetimeauth_state *mac_ctx; + + mac_ctx = cryptobox_align_ptr (auth_ctx, CRYPTOBOX_ALIGNMENT); + crypto_onetimeauth_final (mac_ctx, sig); return TRUE; } @@ -880,13 +815,13 @@ rspamd_cryptobox_auth_verify_init (void *auth_ctx, void *enc_ctx, enum rspamd_cryptobox_mode mode) { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { - poly1305_state *mac_ctx; + crypto_onetimeauth_state *mac_ctx; guchar RSPAMD_ALIGNED(32) subkey[CHACHA_BLOCKBYTES]; mac_ctx = cryptobox_align_ptr (auth_ctx, CRYPTOBOX_ALIGNMENT); memset (subkey, 0, sizeof (subkey)); chacha_update (enc_ctx, subkey, subkey, sizeof (subkey)); - poly1305_init (mac_ctx, (const poly1305_key *) subkey); + crypto_onetimeauth_init (mac_ctx, subkey); rspamd_explicit_memzero (subkey, sizeof (subkey)); return mac_ctx; @@ -911,8 +846,10 @@ rspamd_cryptobox_decrypt_update (void *enc_ctx, const guchar *in, gsize inlen, { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { gsize r; + chacha_state *s; - r = chacha_update (enc_ctx, in, out, inlen); + s = cryptobox_align_ptr (enc_ctx, CRYPTOBOX_ALIGNMENT); + r = chacha_update (s, in, out, inlen); if (outlen != NULL) { *outlen = r; @@ -945,7 +882,10 @@ rspamd_cryptobox_auth_verify_update (void *auth_ctx, enum rspamd_cryptobox_mode mode) { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { - poly1305_update (auth_ctx, in, inlen); + crypto_onetimeauth_state *mac_ctx; + + mac_ctx = cryptobox_align_ptr (auth_ctx, CRYPTOBOX_ALIGNMENT); + crypto_onetimeauth_update (mac_ctx, in, inlen); return TRUE; } @@ -965,7 +905,10 @@ rspamd_cryptobox_decrypt_final (void *enc_ctx, guchar *out, gsize remain, enum rspamd_cryptobox_mode mode) { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { - chacha_final (enc_ctx, out); + chacha_state *s; + + s = cryptobox_align_ptr (enc_ctx, CRYPTOBOX_ALIGNMENT); + chacha_final (s, out); return TRUE; } @@ -993,10 +936,12 @@ rspamd_cryptobox_auth_verify_final (void *auth_ctx, const rspamd_mac_t sig, { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { rspamd_mac_t mac; + crypto_onetimeauth_state *mac_ctx; - poly1305_finish (auth_ctx, mac); + mac_ctx = cryptobox_align_ptr (auth_ctx, CRYPTOBOX_ALIGNMENT); + crypto_onetimeauth_final (mac_ctx, mac); - if (!poly1305_verify (mac, sig)) { + if (crypto_verify_16 (mac, sig) != 0) { return FALSE; } @@ -1025,7 +970,10 @@ rspamd_cryptobox_cleanup (void *enc_ctx, void *auth_ctx, enum rspamd_cryptobox_mode mode) { if (G_LIKELY (mode == RSPAMD_CRYPTOBOX_MODE_25519)) { - rspamd_explicit_memzero (auth_ctx, sizeof (poly1305_state)); + crypto_onetimeauth_state *mac_ctx; + + mac_ctx = cryptobox_align_ptr (auth_ctx, CRYPTOBOX_ALIGNMENT); + rspamd_explicit_memzero (mac_ctx, sizeof (*mac_ctx)); } else { #ifndef HAVE_USABLE_OPENSSL @@ -1272,7 +1220,7 @@ rspamd_cryptobox_siphash (unsigned char *out, const unsigned char *in, unsigned long long inlen, const rspamd_sipkey_t k) { - siphash24 (out, in, inlen, k); + crypto_shorthash_siphash24 (out, in, inlen, k); } /* @@ -1284,8 +1232,9 @@ rspamd_cryptobox_pbkdf2 (const char *pass, gsize pass_len, const guint8 *salt, gsize salt_len, guint8 *key, gsize key_len, unsigned int rounds) { - guint8 *asalt, obuf[BLAKE2B_OUTBYTES]; - guint8 d1[BLAKE2B_OUTBYTES], d2[BLAKE2B_OUTBYTES]; + guint8 *asalt, obuf[crypto_generichash_blake2b_BYTES_MAX]; + guint8 d1[crypto_generichash_blake2b_BYTES_MAX], + d2[crypto_generichash_blake2b_BYTES_MAX]; unsigned int i, j; unsigned int count; gsize r; @@ -1305,11 +1254,44 @@ rspamd_cryptobox_pbkdf2 (const char *pass, gsize pass_len, asalt[salt_len + 1] = (count >> 16) & 0xff; asalt[salt_len + 2] = (count >> 8) & 0xff; asalt[salt_len + 3] = count & 0xff; - blake2b_keyed (d1, asalt, salt_len + 4, pass, pass_len); + + if (pass_len <= crypto_generichash_blake2b_KEYBYTES_MAX) { + crypto_generichash_blake2b (d1, sizeof (d1), asalt, salt_len + 4, + pass, pass_len); + } + else { + guint8 k[crypto_generichash_blake2b_BYTES_MAX]; + + /* + * We use additional blake2 iteration to store large key + * XXX: it is not compatible with the original implementation but safe + */ + crypto_generichash_blake2b (k, sizeof (k), pass, pass_len, + NULL, 0); + crypto_generichash_blake2b (d1, sizeof (d1), asalt, salt_len + 4, + k, sizeof (k)); + } + memcpy (obuf, d1, sizeof(obuf)); for (i = 1; i < rounds; i++) { - blake2b_keyed (d2, d1, BLAKE2B_OUTBYTES, pass, pass_len); + if (pass_len <= crypto_generichash_blake2b_KEYBYTES_MAX) { + crypto_generichash_blake2b (d2, sizeof (d2), d1, sizeof (d1), + pass, pass_len); + } + else { + guint8 k[crypto_generichash_blake2b_BYTES_MAX]; + + /* + * We use additional blake2 iteration to store large key + * XXX: it is not compatible with the original implementation but safe + */ + crypto_generichash_blake2b (k, sizeof (k), pass, pass_len, + NULL, 0); + crypto_generichash_blake2b (d2, sizeof (d2), d1, sizeof (d1), + k, sizeof (k)); + } + memcpy (d1, d2, sizeof(d1)); for (j = 0; j < sizeof(obuf); j++) { @@ -1317,7 +1299,7 @@ rspamd_cryptobox_pbkdf2 (const char *pass, gsize pass_len, } } - r = MIN(key_len, BLAKE2B_OUTBYTES); + r = MIN(key_len, crypto_generichash_blake2b_BYTES_MAX); memcpy (key, obuf, r); key += r; key_len -= r; @@ -1443,13 +1425,19 @@ rspamd_cryptobox_mac_bytes (enum rspamd_cryptobox_mode mode) } void -rspamd_cryptobox_hash_init (void *st, const guchar *key, gsize keylen) +rspamd_cryptobox_hash_init (void *p, const guchar *key, gsize keylen) { if (key != NULL && keylen > 0) { - blake2b_keyed_init (st, key, keylen); + crypto_generichash_blake2b_state *st = cryptobox_align_ptr (p, + alignof(crypto_generichash_blake2b_state)); + crypto_generichash_blake2b_init (st, key, keylen, + crypto_generichash_blake2b_BYTES_MAX); } else { - blake2b_init (st); + crypto_generichash_blake2b_state *st = cryptobox_align_ptr (p, + alignof(crypto_generichash_blake2b_state)); + crypto_generichash_blake2b_init (st, key, keylen, + crypto_generichash_blake2b_BYTES_MAX); } } @@ -1457,19 +1445,22 @@ rspamd_cryptobox_hash_init (void *st, const guchar *key, gsize keylen) * Update hash with data portion */ void -rspamd_cryptobox_hash_update (void *st, const guchar *data, gsize len) +rspamd_cryptobox_hash_update (void *p, const guchar *data, gsize len) { - blake2b_update (st, data, len); + crypto_generichash_blake2b_state *st = cryptobox_align_ptr (p, + alignof(crypto_generichash_blake2b_state)); + crypto_generichash_blake2b_update (st, data, len); } /** * Output hash to the buffer of rspamd_cryptobox_HASHBYTES length */ void -rspamd_cryptobox_hash_final (void *st, guchar *out) +rspamd_cryptobox_hash_final (void *p, guchar *out) { - blake2b_final (st, out); - rspamd_explicit_memzero (st, rspamd_cryptobox_HASHSTATEBYTES); + crypto_generichash_blake2b_state *st = cryptobox_align_ptr (p, + alignof(crypto_generichash_blake2b_state)); + crypto_generichash_blake2b_final (st, out, crypto_generichash_blake2b_BYTES_MAX); } /** @@ -1481,11 +1472,8 @@ void rspamd_cryptobox_hash (guchar *out, const guchar *key, gsize keylen) { - blake2b_state RSPAMD_ALIGNED(32) st; - - rspamd_cryptobox_hash_init (&st, key, keylen); - rspamd_cryptobox_hash_update (&st, data, len); - rspamd_cryptobox_hash_final (&st, out); + crypto_generichash_blake2b (out, crypto_generichash_blake2b_BYTES_MAX, + data, len, key, keylen); } G_STATIC_ASSERT (sizeof (t1ha_context_t) <= diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h index d9e4f51cd..3924d7fe3 100644 --- a/src/libcryptobox/cryptobox.h +++ b/src/libcryptobox/cryptobox.h @@ -18,6 +18,8 @@ #include "config.h" +#include <sodium.h> + #ifdef __cplusplus extern "C" { #endif @@ -41,7 +43,7 @@ struct rspamd_cryptobox_segment { #define rspamd_cryptobox_SIPKEYBYTES 16 #define rspamd_cryptobox_HASHBYTES 64 #define rspamd_cryptobox_HASHKEYBYTES 64 -#define rspamd_cryptobox_HASHSTATEBYTES 256 +#define rspamd_cryptobox_HASHSTATEBYTES sizeof(crypto_generichash_blake2b_state) + 64 #define rspamd_cryptobox_MAX_SIGSKBYTES 64 #define rspamd_cryptobox_MAX_SIGPKBYTES 32 #define rspamd_cryptobox_MAX_SIGBYTES 72 @@ -72,12 +74,7 @@ enum rspamd_cryptobox_mode { struct rspamd_cryptobox_library_ctx { gchar *cpu_extensions; - const gchar *curve25519_impl; - const gchar *ed25519_impl; const gchar *chacha20_impl; - const gchar *poly1305_impl; - const gchar *siphash_impl; - const gchar *blake2_impl; const gchar *base64_impl; unsigned long cpu_config; }; @@ -203,7 +200,7 @@ void rspamd_cryptobox_nm (rspamd_nm_t nm, const rspamd_pk_t pk, * @param mlen input length * @param sk secret key */ -void rspamd_cryptobox_sign (guchar *sig, gsize *siglen_p, +void rspamd_cryptobox_sign (guchar *sig, unsigned long long *siglen_p, const guchar *m, gsize mlen, const rspamd_sk_t sk, enum rspamd_cryptobox_mode mode); @@ -229,7 +226,8 @@ bool rspamd_cryptobox_verify (const guchar *sig, * @param buf buffer to zero * @param buflen length of buffer */ -void rspamd_explicit_memzero (void *const buf, gsize buflen); + +#define rspamd_explicit_memzero sodium_memzero /** * Constant time memcmp @@ -238,8 +236,7 @@ void rspamd_explicit_memzero (void *const buf, gsize buflen); * @param len * @return */ -gint -rspamd_cryptobox_memcmp (const void *const b1_, const void *const b2_, gsize len); +#define rspamd_cryptobox_memcmp sodium_memcmp /** * Calculates siphash-2-4 for a message @@ -317,9 +314,7 @@ guint rspamd_cryptobox_mac_bytes (enum rspamd_cryptobox_mode mode); guint rspamd_cryptobox_signature_bytes (enum rspamd_cryptobox_mode mode); /* Hash IUF interface */ -typedef struct rspamd_cryptobox_hash_state_s { - unsigned char opaque[256]; -} rspamd_cryptobox_hash_state_t; +typedef crypto_generichash_blake2b_state rspamd_cryptobox_hash_state_t; /** * Init cryptobox hash state using key if needed, `st` must point to the buffer diff --git a/src/libcryptobox/curve25519/LICENSE.md b/src/libcryptobox/curve25519/LICENSE.md deleted file mode 100644 index 44705d0bf..000000000 --- a/src/libcryptobox/curve25519/LICENSE.md +++ /dev/null @@ -1,49 +0,0 @@ -Copyright 2008, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. -* Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -curve25519-donna: Curve25519 elliptic curve, public key function - -http://code.google.com/p/curve25519-donna/ - -Adam Langley <agl@imperialviolet.org> - -Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to> - -More information about curve25519 can be found here - http://cr.yp.to/ecdh.html - -djb's sample implementation of curve25519 is written in a special assembly -language called qhasm and uses the floating point registers. - -This is, almost, a clean room reimplementation from the curve25519 paper. It -uses many of the tricks described therein. Only the crecip function is taken -from the sample implementation. - -avx code: -public domain implementation by Tung Chou
\ No newline at end of file diff --git a/src/libcryptobox/curve25519/avx.S b/src/libcryptobox/curve25519/avx.S deleted file mode 100644 index 061b6c3d6..000000000 --- a/src/libcryptobox/curve25519/avx.S +++ /dev/null @@ -1,3245 +0,0 @@ -#include "../macro.S" -#include "constants.S" - -.text - -.p2align 5 -GLOBAL_HIDDEN_FN_EXT ladder_avx -ladder_avx_local: -mov %rsp,%r11 -and $31,%r11 -add $1856,%r11 -sub %r11,%rsp -movq %r11,1824(%rsp) -movq %r12,1832(%rsp) -movq %r13,1840(%rsp) -movq %r14,1848(%rsp) -vmovdqa v0_0(%rip),%xmm0 -vmovdqa v1_0(%rip),%xmm1 -vmovdqu 0(%rdi),%xmm2 -vmovdqa %xmm2,0(%rsp) -vmovdqu 16(%rdi),%xmm2 -vmovdqa %xmm2,16(%rsp) -vmovdqu 32(%rdi),%xmm2 -vmovdqa %xmm2,32(%rsp) -vmovdqu 48(%rdi),%xmm2 -vmovdqa %xmm2,48(%rsp) -vmovdqu 64(%rdi),%xmm2 -vmovdqa %xmm2,64(%rsp) -vmovdqa %xmm1,80(%rsp) -vmovdqa %xmm0,96(%rsp) -vmovdqa %xmm0,112(%rsp) -vmovdqa %xmm0,128(%rsp) -vmovdqa %xmm0,144(%rsp) -vmovdqa %xmm1,%xmm0 -vpxor %xmm1,%xmm1,%xmm1 -vpxor %xmm2,%xmm2,%xmm2 -vpxor %xmm3,%xmm3,%xmm3 -vpxor %xmm4,%xmm4,%xmm4 -vpxor %xmm5,%xmm5,%xmm5 -vpxor %xmm6,%xmm6,%xmm6 -vpxor %xmm7,%xmm7,%xmm7 -vpxor %xmm8,%xmm8,%xmm8 -vpxor %xmm9,%xmm9,%xmm9 -vmovdqu 0(%rdi),%xmm10 -vmovdqa %xmm10,160(%rsp) -vmovdqu 16(%rdi),%xmm10 -vmovdqa %xmm10,176(%rsp) -vpmuludq v19_19(%rip),%xmm10,%xmm10 -vmovdqa %xmm10,192(%rsp) -vmovdqu 32(%rdi),%xmm10 -vmovdqa %xmm10,208(%rsp) -vpmuludq v19_19(%rip),%xmm10,%xmm10 -vmovdqa %xmm10,224(%rsp) -vmovdqu 48(%rdi),%xmm10 -vmovdqa %xmm10,240(%rsp) -vpmuludq v19_19(%rip),%xmm10,%xmm10 -vmovdqa %xmm10,256(%rsp) -vmovdqu 64(%rdi),%xmm10 -vmovdqa %xmm10,272(%rsp) -vpmuludq v19_19(%rip),%xmm10,%xmm10 -vmovdqa %xmm10,288(%rsp) -vmovdqu 8(%rdi),%xmm10 -vpmuludq v2_1(%rip),%xmm10,%xmm10 -vmovdqa %xmm10,304(%rsp) -vpmuludq v19_19(%rip),%xmm10,%xmm10 -vmovdqa %xmm10,320(%rsp) -vmovdqu 24(%rdi),%xmm10 -vpmuludq v2_1(%rip),%xmm10,%xmm10 -vmovdqa %xmm10,336(%rsp) -vpmuludq v19_19(%rip),%xmm10,%xmm10 -vmovdqa %xmm10,352(%rsp) -vmovdqu 40(%rdi),%xmm10 -vpmuludq v2_1(%rip),%xmm10,%xmm10 -vmovdqa %xmm10,368(%rsp) -vpmuludq v19_19(%rip),%xmm10,%xmm10 -vmovdqa %xmm10,384(%rsp) -vmovdqu 56(%rdi),%xmm10 -vpmuludq v2_1(%rip),%xmm10,%xmm10 -vmovdqa %xmm10,400(%rsp) -vpmuludq v19_19(%rip),%xmm10,%xmm10 -vmovdqa %xmm10,416(%rsp) -vmovdqu 0(%rdi),%xmm10 -vmovdqu 64(%rdi),%xmm11 -vblendps $12, %xmm11, %xmm10, %xmm10 -vpshufd $2,%xmm10,%xmm10 -vpmuludq v38_1(%rip),%xmm10,%xmm10 -vmovdqa %xmm10,432(%rsp) -movq 0(%rsi),%rdx -movq 8(%rsi),%rcx -movq 16(%rsi),%r8 -movq 24(%rsi),%r9 -shrd $1,%rcx,%rdx -shrd $1,%r8,%rcx -shrd $1,%r9,%r8 -shr $1,%r9 -xorq 0(%rsi),%rdx -xorq 8(%rsi),%rcx -xorq 16(%rsi),%r8 -xorq 24(%rsi),%r9 -leaq 800(%rsp),%rsi -mov $64,%rax - -.p2align 4 -._ladder_small_loop: -mov %rdx,%r10 -mov %rcx,%r11 -mov %r8,%r12 -mov %r9,%r13 -shr $1,%rdx -shr $1,%rcx -shr $1,%r8 -shr $1,%r9 -and $1,%r10d -and $1,%r11d -and $1,%r12d -and $1,%r13d -neg %r10 -neg %r11 -neg %r12 -neg %r13 -movl %r10d,0(%rsi) -movl %r11d,256(%rsi) -movl %r12d,512(%rsi) -movl %r13d,768(%rsi) -add $4,%rsi -sub $1,%rax -jne ._ladder_small_loop -mov $255,%rdx -add $760,%rsi - -.p2align 4 -._ladder_loop: -sub $1,%rdx -vbroadcastss 0(%rsi),%xmm10 -sub $4,%rsi -vmovdqa 0(%rsp),%xmm11 -vmovdqa 80(%rsp),%xmm12 -vpxor %xmm11,%xmm0,%xmm13 -vpand %xmm10,%xmm13,%xmm13 -vpxor %xmm13,%xmm0,%xmm0 -vpxor %xmm13,%xmm11,%xmm11 -vpxor %xmm12,%xmm1,%xmm13 -vpand %xmm10,%xmm13,%xmm13 -vpxor %xmm13,%xmm1,%xmm1 -vpxor %xmm13,%xmm12,%xmm12 -vmovdqa 16(%rsp),%xmm13 -vmovdqa 96(%rsp),%xmm14 -vpxor %xmm13,%xmm2,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm2,%xmm2 -vpxor %xmm15,%xmm13,%xmm13 -vpxor %xmm14,%xmm3,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm3,%xmm3 -vpxor %xmm15,%xmm14,%xmm14 -vmovdqa %xmm13,0(%rsp) -vmovdqa %xmm14,16(%rsp) -vmovdqa 32(%rsp),%xmm13 -vmovdqa 112(%rsp),%xmm14 -vpxor %xmm13,%xmm4,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm4,%xmm4 -vpxor %xmm15,%xmm13,%xmm13 -vpxor %xmm14,%xmm5,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm5,%xmm5 -vpxor %xmm15,%xmm14,%xmm14 -vmovdqa %xmm13,32(%rsp) -vmovdqa %xmm14,80(%rsp) -vmovdqa 48(%rsp),%xmm13 -vmovdqa 128(%rsp),%xmm14 -vpxor %xmm13,%xmm6,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm6,%xmm6 -vpxor %xmm15,%xmm13,%xmm13 -vpxor %xmm14,%xmm7,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm7,%xmm7 -vpxor %xmm15,%xmm14,%xmm14 -vmovdqa %xmm13,48(%rsp) -vmovdqa %xmm14,96(%rsp) -vmovdqa 64(%rsp),%xmm13 -vmovdqa 144(%rsp),%xmm14 -vpxor %xmm13,%xmm8,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm8,%xmm8 -vpxor %xmm15,%xmm13,%xmm13 -vpxor %xmm14,%xmm9,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm9,%xmm9 -vpxor %xmm15,%xmm14,%xmm14 -vmovdqa %xmm13,64(%rsp) -vmovdqa %xmm14,112(%rsp) -vpaddq subc0(%rip),%xmm11,%xmm10 -vpsubq %xmm12,%xmm10,%xmm10 -vpaddq %xmm12,%xmm11,%xmm11 -vpunpckhqdq %xmm10,%xmm11,%xmm12 -vpunpcklqdq %xmm10,%xmm11,%xmm10 -vpaddq %xmm1,%xmm0,%xmm11 -vpaddq subc0(%rip),%xmm0,%xmm0 -vpsubq %xmm1,%xmm0,%xmm0 -vpunpckhqdq %xmm11,%xmm0,%xmm1 -vpunpcklqdq %xmm11,%xmm0,%xmm0 -vpmuludq %xmm0,%xmm10,%xmm11 -vpmuludq %xmm1,%xmm10,%xmm13 -vmovdqa %xmm1,128(%rsp) -vpaddq %xmm1,%xmm1,%xmm1 -vpmuludq %xmm0,%xmm12,%xmm14 -vmovdqa %xmm0,144(%rsp) -vpaddq %xmm14,%xmm13,%xmm13 -vpmuludq %xmm1,%xmm12,%xmm0 -vmovdqa %xmm1,448(%rsp) -vpaddq %xmm3,%xmm2,%xmm1 -vpaddq subc2(%rip),%xmm2,%xmm2 -vpsubq %xmm3,%xmm2,%xmm2 -vpunpckhqdq %xmm1,%xmm2,%xmm3 -vpunpcklqdq %xmm1,%xmm2,%xmm1 -vpmuludq %xmm1,%xmm10,%xmm2 -vpaddq %xmm2,%xmm0,%xmm0 -vpmuludq %xmm3,%xmm10,%xmm2 -vmovdqa %xmm3,464(%rsp) -vpaddq %xmm3,%xmm3,%xmm3 -vpmuludq %xmm1,%xmm12,%xmm14 -vmovdqa %xmm1,480(%rsp) -vpaddq %xmm14,%xmm2,%xmm2 -vpmuludq %xmm3,%xmm12,%xmm1 -vmovdqa %xmm3,496(%rsp) -vpaddq %xmm5,%xmm4,%xmm3 -vpaddq subc2(%rip),%xmm4,%xmm4 -vpsubq %xmm5,%xmm4,%xmm4 -vpunpckhqdq %xmm3,%xmm4,%xmm5 -vpunpcklqdq %xmm3,%xmm4,%xmm3 -vpmuludq %xmm3,%xmm10,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vpmuludq %xmm5,%xmm10,%xmm4 -vmovdqa %xmm5,512(%rsp) -vpaddq %xmm5,%xmm5,%xmm5 -vpmuludq %xmm3,%xmm12,%xmm14 -vmovdqa %xmm3,528(%rsp) -vpaddq %xmm14,%xmm4,%xmm4 -vpaddq %xmm7,%xmm6,%xmm3 -vpaddq subc2(%rip),%xmm6,%xmm6 -vpsubq %xmm7,%xmm6,%xmm6 -vpunpckhqdq %xmm3,%xmm6,%xmm7 -vpunpcklqdq %xmm3,%xmm6,%xmm3 -vpmuludq %xmm3,%xmm10,%xmm6 -vpmuludq %xmm5,%xmm12,%xmm14 -vmovdqa %xmm5,544(%rsp) -vpmuludq v19_19(%rip),%xmm5,%xmm5 -vmovdqa %xmm5,560(%rsp) -vpaddq %xmm14,%xmm6,%xmm6 -vpmuludq %xmm7,%xmm10,%xmm5 -vmovdqa %xmm7,576(%rsp) -vpaddq %xmm7,%xmm7,%xmm7 -vpmuludq %xmm3,%xmm12,%xmm14 -vmovdqa %xmm3,592(%rsp) -vpaddq %xmm14,%xmm5,%xmm5 -vpmuludq v19_19(%rip),%xmm3,%xmm3 -vmovdqa %xmm3,608(%rsp) -vpaddq %xmm9,%xmm8,%xmm3 -vpaddq subc2(%rip),%xmm8,%xmm8 -vpsubq %xmm9,%xmm8,%xmm8 -vpunpckhqdq %xmm3,%xmm8,%xmm9 -vpunpcklqdq %xmm3,%xmm8,%xmm3 -vmovdqa %xmm3,624(%rsp) -vpmuludq %xmm7,%xmm12,%xmm8 -vmovdqa %xmm7,640(%rsp) -vpmuludq v19_19(%rip),%xmm7,%xmm7 -vmovdqa %xmm7,656(%rsp) -vpmuludq %xmm3,%xmm10,%xmm7 -vpaddq %xmm7,%xmm8,%xmm8 -vpmuludq %xmm9,%xmm10,%xmm7 -vmovdqa %xmm9,672(%rsp) -vpaddq %xmm9,%xmm9,%xmm9 -vpmuludq %xmm3,%xmm12,%xmm10 -vpaddq %xmm10,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm3,%xmm3 -vmovdqa %xmm3,688(%rsp) -vpmuludq v19_19(%rip),%xmm12,%xmm12 -vpmuludq %xmm9,%xmm12,%xmm3 -vmovdqa %xmm9,704(%rsp) -vpaddq %xmm3,%xmm11,%xmm11 -vmovdqa 0(%rsp),%xmm3 -vmovdqa 16(%rsp),%xmm9 -vpaddq subc2(%rip),%xmm3,%xmm10 -vpsubq %xmm9,%xmm10,%xmm10 -vpaddq %xmm9,%xmm3,%xmm3 -vpunpckhqdq %xmm10,%xmm3,%xmm9 -vpunpcklqdq %xmm10,%xmm3,%xmm3 -vpmuludq 144(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm0,%xmm0 -vpmuludq 128(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm2,%xmm2 -vpmuludq 480(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm1,%xmm1 -vpmuludq 464(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm4,%xmm4 -vpmuludq 528(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm6,%xmm6 -vpmuludq 512(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm5,%xmm5 -vpmuludq 592(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm8,%xmm8 -vpmuludq 576(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm3,%xmm3 -vpmuludq 624(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm11,%xmm11 -vpmuludq 672(%rsp),%xmm3,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 144(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpmuludq 448(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm1,%xmm1 -vpmuludq 480(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 496(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 528(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 544(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm8,%xmm8 -vpmuludq 592(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm9,%xmm9 -vpmuludq 640(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 624(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 704(%rsp),%xmm9,%xmm9 -vpaddq %xmm9,%xmm0,%xmm0 -vmovdqa 32(%rsp),%xmm3 -vmovdqa 80(%rsp),%xmm9 -vpaddq subc2(%rip),%xmm3,%xmm10 -vpsubq %xmm9,%xmm10,%xmm10 -vpaddq %xmm9,%xmm3,%xmm3 -vpunpckhqdq %xmm10,%xmm3,%xmm9 -vpunpcklqdq %xmm10,%xmm3,%xmm3 -vpmuludq 144(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm1,%xmm1 -vpmuludq 128(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm4,%xmm4 -vpmuludq 480(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm6,%xmm6 -vpmuludq 464(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm5,%xmm5 -vpmuludq 528(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm8,%xmm8 -vpmuludq 512(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm3,%xmm3 -vpmuludq 592(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm11,%xmm11 -vpmuludq 576(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm13,%xmm13 -vpmuludq 624(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm0,%xmm0 -vpmuludq 672(%rsp),%xmm3,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpmuludq 144(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 448(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 480(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 496(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm8,%xmm8 -vpmuludq 528(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm9,%xmm9 -vpmuludq 544(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 592(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 640(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm0,%xmm0 -vpmuludq 624(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpmuludq 704(%rsp),%xmm9,%xmm9 -vpaddq %xmm9,%xmm1,%xmm1 -vmovdqa 48(%rsp),%xmm3 -vmovdqa 96(%rsp),%xmm9 -vpaddq subc2(%rip),%xmm3,%xmm10 -vpsubq %xmm9,%xmm10,%xmm10 -vpaddq %xmm9,%xmm3,%xmm3 -vpunpckhqdq %xmm10,%xmm3,%xmm9 -vpunpcklqdq %xmm10,%xmm3,%xmm3 -vpmuludq 144(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm6,%xmm6 -vpmuludq 128(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm5,%xmm5 -vpmuludq 480(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm8,%xmm8 -vpmuludq 464(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm3,%xmm3 -vpmuludq 528(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm11,%xmm11 -vpmuludq 512(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm13,%xmm13 -vpmuludq 592(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm0,%xmm0 -vpmuludq 576(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm2,%xmm2 -vpmuludq 624(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm1,%xmm1 -vpmuludq 672(%rsp),%xmm3,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 144(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 448(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm8,%xmm8 -vpmuludq 480(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm9,%xmm9 -vpmuludq 496(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 528(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 544(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm0,%xmm0 -vpmuludq 592(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpmuludq 640(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm1,%xmm1 -vpmuludq 624(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 704(%rsp),%xmm9,%xmm9 -vpaddq %xmm9,%xmm6,%xmm6 -vmovdqa 64(%rsp),%xmm3 -vmovdqa 112(%rsp),%xmm9 -vpaddq subc2(%rip),%xmm3,%xmm10 -vpsubq %xmm9,%xmm10,%xmm10 -vpaddq %xmm9,%xmm3,%xmm3 -vpunpckhqdq %xmm10,%xmm3,%xmm9 -vpunpcklqdq %xmm10,%xmm3,%xmm3 -vpmuludq 144(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm8,%xmm8 -vpmuludq 128(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm3,%xmm3 -vpmuludq 480(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm11,%xmm11 -vpmuludq 464(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm13,%xmm13 -vpmuludq 528(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm0,%xmm0 -vpmuludq 512(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm2,%xmm2 -vpmuludq 592(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm1,%xmm1 -vpmuludq 576(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm4,%xmm4 -vpmuludq 624(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm6,%xmm6 -vpmuludq 672(%rsp),%xmm3,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 144(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm9,%xmm9 -vpmuludq 448(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 480(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 496(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm0,%xmm0 -vpmuludq 528(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpmuludq 544(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm1,%xmm1 -vpmuludq 592(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 640(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 624(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 704(%rsp),%xmm9,%xmm9 -vpaddq %xmm9,%xmm8,%xmm8 -vpsrlq $25,%xmm4,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpand m25(%rip),%xmm4,%xmm4 -vpsrlq $26,%xmm11,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpand m26(%rip),%xmm11,%xmm11 -vpsrlq $26,%xmm6,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpand m26(%rip),%xmm6,%xmm6 -vpsrlq $25,%xmm13,%xmm3 -vpaddq %xmm3,%xmm0,%xmm0 -vpand m25(%rip),%xmm13,%xmm13 -vpsrlq $25,%xmm5,%xmm3 -vpaddq %xmm3,%xmm8,%xmm8 -vpand m25(%rip),%xmm5,%xmm5 -vpsrlq $26,%xmm0,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpand m26(%rip),%xmm0,%xmm0 -vpsrlq $26,%xmm8,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpand m26(%rip),%xmm8,%xmm8 -vpsrlq $25,%xmm2,%xmm3 -vpaddq %xmm3,%xmm1,%xmm1 -vpand m25(%rip),%xmm2,%xmm2 -vpsrlq $25,%xmm7,%xmm3 -vpsllq $4,%xmm3,%xmm9 -vpaddq %xmm3,%xmm11,%xmm11 -vpsllq $1,%xmm3,%xmm3 -vpaddq %xmm3,%xmm9,%xmm9 -vpaddq %xmm9,%xmm11,%xmm11 -vpand m25(%rip),%xmm7,%xmm7 -vpsrlq $26,%xmm1,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpand m26(%rip),%xmm1,%xmm1 -vpsrlq $26,%xmm11,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpand m26(%rip),%xmm11,%xmm11 -vpsrlq $25,%xmm4,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpand m25(%rip),%xmm4,%xmm4 -vpunpcklqdq %xmm13,%xmm11,%xmm3 -vpunpckhqdq %xmm13,%xmm11,%xmm9 -vpaddq subc0(%rip),%xmm9,%xmm10 -vpsubq %xmm3,%xmm10,%xmm10 -vpaddq %xmm9,%xmm3,%xmm3 -vpunpckhqdq %xmm3,%xmm10,%xmm9 -vpunpcklqdq %xmm3,%xmm10,%xmm10 -vpmuludq %xmm10,%xmm10,%xmm3 -vpaddq %xmm10,%xmm10,%xmm10 -vpmuludq %xmm9,%xmm10,%xmm11 -vpunpcklqdq %xmm2,%xmm0,%xmm12 -vpunpckhqdq %xmm2,%xmm0,%xmm0 -vpaddq subc2(%rip),%xmm0,%xmm2 -vpsubq %xmm12,%xmm2,%xmm2 -vpaddq %xmm0,%xmm12,%xmm12 -vpunpckhqdq %xmm12,%xmm2,%xmm0 -vpunpcklqdq %xmm12,%xmm2,%xmm2 -vpmuludq %xmm2,%xmm10,%xmm12 -vpaddq %xmm9,%xmm9,%xmm13 -vpmuludq %xmm13,%xmm9,%xmm9 -vpaddq %xmm9,%xmm12,%xmm12 -vpmuludq %xmm0,%xmm10,%xmm9 -vpmuludq %xmm2,%xmm13,%xmm14 -vpaddq %xmm14,%xmm9,%xmm9 -vpunpcklqdq %xmm4,%xmm1,%xmm14 -vpunpckhqdq %xmm4,%xmm1,%xmm1 -vpaddq subc2(%rip),%xmm1,%xmm4 -vpsubq %xmm14,%xmm4,%xmm4 -vpaddq %xmm1,%xmm14,%xmm14 -vpunpckhqdq %xmm14,%xmm4,%xmm1 -vpunpcklqdq %xmm14,%xmm4,%xmm4 -vmovdqa %xmm1,0(%rsp) -vpaddq %xmm1,%xmm1,%xmm1 -vmovdqa %xmm1,16(%rsp) -vpmuludq v19_19(%rip),%xmm1,%xmm1 -vmovdqa %xmm1,32(%rsp) -vpmuludq %xmm4,%xmm10,%xmm1 -vpmuludq %xmm2,%xmm2,%xmm14 -vpaddq %xmm14,%xmm1,%xmm1 -vpmuludq 0(%rsp),%xmm10,%xmm14 -vpmuludq %xmm4,%xmm13,%xmm15 -vpaddq %xmm15,%xmm14,%xmm14 -vpunpcklqdq %xmm5,%xmm6,%xmm15 -vpunpckhqdq %xmm5,%xmm6,%xmm5 -vpaddq subc2(%rip),%xmm5,%xmm6 -vpsubq %xmm15,%xmm6,%xmm6 -vpaddq %xmm5,%xmm15,%xmm15 -vpunpckhqdq %xmm15,%xmm6,%xmm5 -vpunpcklqdq %xmm15,%xmm6,%xmm6 -vmovdqa %xmm6,48(%rsp) -vpmuludq v19_19(%rip),%xmm6,%xmm6 -vmovdqa %xmm6,64(%rsp) -vmovdqa %xmm5,80(%rsp) -vpmuludq v38_38(%rip),%xmm5,%xmm5 -vmovdqa %xmm5,96(%rsp) -vpmuludq 48(%rsp),%xmm10,%xmm5 -vpaddq %xmm0,%xmm0,%xmm6 -vpmuludq %xmm6,%xmm0,%xmm0 -vpaddq %xmm0,%xmm5,%xmm5 -vpmuludq 80(%rsp),%xmm10,%xmm0 -vpmuludq %xmm4,%xmm6,%xmm15 -vpaddq %xmm15,%xmm0,%xmm0 -vpmuludq %xmm6,%xmm13,%xmm15 -vpaddq %xmm15,%xmm1,%xmm1 -vpmuludq %xmm6,%xmm2,%xmm15 -vpaddq %xmm15,%xmm14,%xmm14 -vpunpcklqdq %xmm7,%xmm8,%xmm15 -vpunpckhqdq %xmm7,%xmm8,%xmm7 -vpaddq subc2(%rip),%xmm7,%xmm8 -vpsubq %xmm15,%xmm8,%xmm8 -vpaddq %xmm7,%xmm15,%xmm15 -vpunpckhqdq %xmm15,%xmm8,%xmm7 -vpunpcklqdq %xmm15,%xmm8,%xmm8 -vmovdqa %xmm8,112(%rsp) -vpmuludq v19_19(%rip),%xmm8,%xmm8 -vmovdqa %xmm8,448(%rsp) -vpmuludq 112(%rsp),%xmm10,%xmm8 -vpmuludq %xmm7,%xmm10,%xmm10 -vpmuludq v38_38(%rip),%xmm7,%xmm15 -vpmuludq %xmm15,%xmm7,%xmm7 -vpaddq %xmm7,%xmm8,%xmm8 -vpmuludq %xmm15,%xmm13,%xmm7 -vpaddq %xmm7,%xmm3,%xmm3 -vpmuludq %xmm15,%xmm2,%xmm7 -vpaddq %xmm7,%xmm11,%xmm11 -vpmuludq 80(%rsp),%xmm13,%xmm7 -vpaddq %xmm7,%xmm7,%xmm7 -vpaddq %xmm7,%xmm8,%xmm8 -vpmuludq 16(%rsp),%xmm13,%xmm7 -vpaddq %xmm7,%xmm5,%xmm5 -vpmuludq 48(%rsp),%xmm13,%xmm7 -vpaddq %xmm7,%xmm0,%xmm0 -vpmuludq 112(%rsp),%xmm13,%xmm7 -vpaddq %xmm7,%xmm10,%xmm10 -vpmuludq %xmm15,%xmm6,%xmm7 -vpaddq %xmm7,%xmm12,%xmm12 -vpmuludq %xmm15,%xmm4,%xmm7 -vpaddq %xmm7,%xmm9,%xmm9 -vpaddq %xmm2,%xmm2,%xmm2 -vpmuludq %xmm4,%xmm2,%xmm7 -vpaddq %xmm7,%xmm5,%xmm5 -vpmuludq 448(%rsp),%xmm2,%xmm7 -vpaddq %xmm7,%xmm3,%xmm3 -vpmuludq 448(%rsp),%xmm6,%xmm7 -vpaddq %xmm7,%xmm11,%xmm11 -vpmuludq 0(%rsp),%xmm2,%xmm7 -vpaddq %xmm7,%xmm0,%xmm0 -vpmuludq 48(%rsp),%xmm2,%xmm7 -vpaddq %xmm7,%xmm8,%xmm8 -vpmuludq 80(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq 96(%rsp),%xmm4,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpmuludq %xmm4,%xmm4,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpaddq %xmm4,%xmm4,%xmm2 -vpmuludq 448(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm12,%xmm12 -vpmuludq 16(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vpmuludq 48(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm14,%xmm14 -vpmuludq 96(%rsp),%xmm6,%xmm4 -vpaddq %xmm4,%xmm3,%xmm3 -vmovdqa 16(%rsp),%xmm4 -vpmuludq 448(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm9,%xmm9 -vpmuludq 16(%rsp),%xmm6,%xmm4 -vpaddq %xmm4,%xmm8,%xmm8 -vpmuludq 48(%rsp),%xmm6,%xmm4 -vpaddq %xmm4,%xmm10,%xmm10 -vpmuludq 80(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm4,%xmm4 -vpaddq %xmm4,%xmm5,%xmm5 -vpmuludq 112(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm0,%xmm0 -vmovdqa 48(%rsp),%xmm4 -vpaddq %xmm4,%xmm4,%xmm4 -vpmuludq 448(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vmovdqa 80(%rsp),%xmm4 -vpaddq %xmm4,%xmm4,%xmm4 -vpmuludq 448(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm14,%xmm14 -vpmuludq 64(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm3,%xmm3 -vmovdqa 16(%rsp),%xmm4 -vpmuludq 64(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm11,%xmm11 -vmovdqa 16(%rsp),%xmm4 -vpmuludq 96(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm12,%xmm12 -vmovdqa 48(%rsp),%xmm4 -vpmuludq 96(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm9,%xmm9 -vpmuludq 0(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vmovdqa 32(%rsp),%xmm2 -vpmuludq 0(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm3,%xmm3 -vmovdqa 64(%rsp),%xmm2 -vpmuludq 48(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vmovdqa 96(%rsp),%xmm2 -vpmuludq 80(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm1,%xmm1 -vmovdqa 448(%rsp),%xmm2 -vpmuludq 112(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpsrlq $26,%xmm3,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpand m26(%rip),%xmm3,%xmm3 -vpsrlq $25,%xmm14,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpand m25(%rip),%xmm14,%xmm14 -vpsrlq $25,%xmm11,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpand m25(%rip),%xmm11,%xmm11 -vpsrlq $26,%xmm5,%xmm2 -vpaddq %xmm2,%xmm0,%xmm0 -vpand m26(%rip),%xmm5,%xmm5 -vpsrlq $26,%xmm12,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpand m26(%rip),%xmm12,%xmm12 -vpsrlq $25,%xmm0,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpand m25(%rip),%xmm0,%xmm0 -vpsrlq $25,%xmm9,%xmm2 -vpaddq %xmm2,%xmm1,%xmm1 -vpand m25(%rip),%xmm9,%xmm9 -vpsrlq $26,%xmm8,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpand m26(%rip),%xmm8,%xmm8 -vpsrlq $26,%xmm1,%xmm2 -vpaddq %xmm2,%xmm14,%xmm14 -vpand m26(%rip),%xmm1,%xmm1 -vpsrlq $25,%xmm10,%xmm2 -vpsllq $4,%xmm2,%xmm4 -vpaddq %xmm2,%xmm3,%xmm3 -vpsllq $1,%xmm2,%xmm2 -vpaddq %xmm2,%xmm4,%xmm4 -vpaddq %xmm4,%xmm3,%xmm3 -vpand m25(%rip),%xmm10,%xmm10 -vpsrlq $25,%xmm14,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpand m25(%rip),%xmm14,%xmm14 -vpsrlq $26,%xmm3,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpand m26(%rip),%xmm3,%xmm3 -vpunpckhqdq %xmm11,%xmm3,%xmm2 -vmovdqa %xmm2,0(%rsp) -vpshufd $0,%xmm3,%xmm2 -vpshufd $0,%xmm11,%xmm3 -vpmuludq 160(%rsp),%xmm2,%xmm4 -vpmuludq 432(%rsp),%xmm3,%xmm6 -vpaddq %xmm6,%xmm4,%xmm4 -vpmuludq 176(%rsp),%xmm2,%xmm6 -vpmuludq 304(%rsp),%xmm3,%xmm7 -vpaddq %xmm7,%xmm6,%xmm6 -vpmuludq 208(%rsp),%xmm2,%xmm7 -vpmuludq 336(%rsp),%xmm3,%xmm11 -vpaddq %xmm11,%xmm7,%xmm7 -vpmuludq 240(%rsp),%xmm2,%xmm11 -vpmuludq 368(%rsp),%xmm3,%xmm13 -vpaddq %xmm13,%xmm11,%xmm11 -vpmuludq 272(%rsp),%xmm2,%xmm2 -vpmuludq 400(%rsp),%xmm3,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpunpckhqdq %xmm9,%xmm12,%xmm3 -vmovdqa %xmm3,16(%rsp) -vpshufd $0,%xmm12,%xmm3 -vpshufd $0,%xmm9,%xmm9 -vpmuludq 288(%rsp),%xmm3,%xmm12 -vpaddq %xmm12,%xmm4,%xmm4 -vpmuludq 416(%rsp),%xmm9,%xmm12 -vpaddq %xmm12,%xmm4,%xmm4 -vpmuludq 160(%rsp),%xmm3,%xmm12 -vpaddq %xmm12,%xmm6,%xmm6 -vpmuludq 432(%rsp),%xmm9,%xmm12 -vpaddq %xmm12,%xmm6,%xmm6 -vpmuludq 176(%rsp),%xmm3,%xmm12 -vpaddq %xmm12,%xmm7,%xmm7 -vpmuludq 304(%rsp),%xmm9,%xmm12 -vpaddq %xmm12,%xmm7,%xmm7 -vpmuludq 208(%rsp),%xmm3,%xmm12 -vpaddq %xmm12,%xmm11,%xmm11 -vpmuludq 336(%rsp),%xmm9,%xmm12 -vpaddq %xmm12,%xmm11,%xmm11 -vpmuludq 240(%rsp),%xmm3,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpmuludq 368(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpunpckhqdq %xmm14,%xmm1,%xmm3 -vmovdqa %xmm3,32(%rsp) -vpshufd $0,%xmm1,%xmm1 -vpshufd $0,%xmm14,%xmm3 -vpmuludq 256(%rsp),%xmm1,%xmm9 -vpaddq %xmm9,%xmm4,%xmm4 -vpmuludq 384(%rsp),%xmm3,%xmm9 -vpaddq %xmm9,%xmm4,%xmm4 -vpmuludq 288(%rsp),%xmm1,%xmm9 -vpaddq %xmm9,%xmm6,%xmm6 -vpmuludq 416(%rsp),%xmm3,%xmm9 -vpaddq %xmm9,%xmm6,%xmm6 -vpmuludq 160(%rsp),%xmm1,%xmm9 -vpaddq %xmm9,%xmm7,%xmm7 -vpmuludq 432(%rsp),%xmm3,%xmm9 -vpaddq %xmm9,%xmm7,%xmm7 -vpmuludq 176(%rsp),%xmm1,%xmm9 -vpaddq %xmm9,%xmm11,%xmm11 -vpmuludq 304(%rsp),%xmm3,%xmm9 -vpaddq %xmm9,%xmm11,%xmm11 -vpmuludq 208(%rsp),%xmm1,%xmm1 -vpaddq %xmm1,%xmm2,%xmm2 -vpmuludq 336(%rsp),%xmm3,%xmm1 -vpaddq %xmm1,%xmm2,%xmm2 -vpunpckhqdq %xmm0,%xmm5,%xmm1 -vmovdqa %xmm1,48(%rsp) -vpshufd $0,%xmm5,%xmm1 -vpshufd $0,%xmm0,%xmm0 -vpmuludq 224(%rsp),%xmm1,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 352(%rsp),%xmm0,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 256(%rsp),%xmm1,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 384(%rsp),%xmm0,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 288(%rsp),%xmm1,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq 416(%rsp),%xmm0,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq 160(%rsp),%xmm1,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 432(%rsp),%xmm0,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 176(%rsp),%xmm1,%xmm1 -vpaddq %xmm1,%xmm2,%xmm2 -vpmuludq 304(%rsp),%xmm0,%xmm0 -vpaddq %xmm0,%xmm2,%xmm2 -vpunpckhqdq %xmm10,%xmm8,%xmm0 -vmovdqa %xmm0,64(%rsp) -vpshufd $0,%xmm8,%xmm0 -vpshufd $0,%xmm10,%xmm1 -vpmuludq 192(%rsp),%xmm0,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 320(%rsp),%xmm1,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 224(%rsp),%xmm0,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 352(%rsp),%xmm1,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 256(%rsp),%xmm0,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq 384(%rsp),%xmm1,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq 288(%rsp),%xmm0,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 416(%rsp),%xmm1,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 160(%rsp),%xmm0,%xmm0 -vpaddq %xmm0,%xmm2,%xmm2 -vpmuludq 432(%rsp),%xmm1,%xmm0 -vpaddq %xmm0,%xmm2,%xmm2 -vmovdqa %xmm4,80(%rsp) -vmovdqa %xmm6,96(%rsp) -vmovdqa %xmm7,112(%rsp) -vmovdqa %xmm11,448(%rsp) -vmovdqa %xmm2,496(%rsp) -vmovdqa 144(%rsp),%xmm0 -vpmuludq %xmm0,%xmm0,%xmm1 -vpaddq %xmm0,%xmm0,%xmm0 -vmovdqa 128(%rsp),%xmm2 -vpmuludq %xmm2,%xmm0,%xmm3 -vmovdqa 480(%rsp),%xmm4 -vpmuludq %xmm4,%xmm0,%xmm5 -vmovdqa 464(%rsp),%xmm6 -vpmuludq %xmm6,%xmm0,%xmm7 -vmovdqa 528(%rsp),%xmm8 -vpmuludq %xmm8,%xmm0,%xmm9 -vpmuludq 512(%rsp),%xmm0,%xmm10 -vpmuludq 592(%rsp),%xmm0,%xmm11 -vpmuludq 576(%rsp),%xmm0,%xmm12 -vpmuludq 624(%rsp),%xmm0,%xmm13 -vmovdqa 672(%rsp),%xmm14 -vpmuludq %xmm14,%xmm0,%xmm0 -vpmuludq v38_38(%rip),%xmm14,%xmm15 -vpmuludq %xmm15,%xmm14,%xmm14 -vpaddq %xmm14,%xmm13,%xmm13 -vpaddq %xmm6,%xmm6,%xmm14 -vpmuludq %xmm14,%xmm6,%xmm6 -vpaddq %xmm6,%xmm11,%xmm11 -vpaddq %xmm2,%xmm2,%xmm6 -vpmuludq %xmm6,%xmm2,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpmuludq %xmm15,%xmm6,%xmm2 -vpaddq %xmm2,%xmm1,%xmm1 -vpmuludq %xmm15,%xmm4,%xmm2 -vpaddq %xmm2,%xmm3,%xmm3 -vpmuludq 544(%rsp),%xmm6,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpmuludq 592(%rsp),%xmm6,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq 640(%rsp),%xmm6,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpmuludq 624(%rsp),%xmm6,%xmm2 -vpaddq %xmm2,%xmm0,%xmm0 -vpmuludq %xmm4,%xmm6,%xmm2 -vpaddq %xmm2,%xmm7,%xmm7 -vpmuludq %xmm14,%xmm6,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpmuludq %xmm8,%xmm6,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq %xmm15,%xmm14,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpmuludq %xmm15,%xmm8,%xmm2 -vpaddq %xmm2,%xmm7,%xmm7 -vpmuludq %xmm4,%xmm4,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpmuludq %xmm14,%xmm4,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpaddq %xmm4,%xmm4,%xmm2 -vpmuludq %xmm8,%xmm2,%xmm4 -vpaddq %xmm4,%xmm11,%xmm11 -vpmuludq 688(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vpmuludq 688(%rsp),%xmm14,%xmm4 -vpaddq %xmm4,%xmm3,%xmm3 -vpmuludq 512(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm12,%xmm12 -vpmuludq 592(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm13,%xmm13 -vpmuludq 576(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm0,%xmm0 -vpmuludq 656(%rsp),%xmm8,%xmm2 -vpaddq %xmm2,%xmm3,%xmm3 -vpmuludq %xmm8,%xmm14,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq %xmm8,%xmm8,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpaddq %xmm8,%xmm8,%xmm2 -vpmuludq 688(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm5,%xmm5 -vpmuludq 544(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm9,%xmm9 -vpmuludq 592(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm10,%xmm10 -vpmuludq 656(%rsp),%xmm14,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vmovdqa 544(%rsp),%xmm4 -vpmuludq 688(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm7,%xmm7 -vpmuludq 544(%rsp),%xmm14,%xmm4 -vpaddq %xmm4,%xmm13,%xmm13 -vpmuludq 592(%rsp),%xmm14,%xmm4 -vpaddq %xmm4,%xmm0,%xmm0 -vpmuludq 640(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm11,%xmm11 -vpmuludq 624(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm12,%xmm12 -vmovdqa 592(%rsp),%xmm4 -vpaddq %xmm4,%xmm4,%xmm4 -vpmuludq 688(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm9,%xmm9 -vpmuludq 608(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vmovdqa 544(%rsp),%xmm4 -vpmuludq 608(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm3,%xmm3 -vmovdqa 544(%rsp),%xmm4 -vpmuludq 656(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm5,%xmm5 -vmovdqa 592(%rsp),%xmm4 -vpmuludq 656(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm7,%xmm7 -vmovdqa 640(%rsp),%xmm4 -vpmuludq 688(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm10,%xmm10 -vpmuludq 512(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm0,%xmm0 -vmovdqa 560(%rsp),%xmm2 -vpmuludq 512(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm1,%xmm1 -vmovdqa 608(%rsp),%xmm2 -vpmuludq 592(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vmovdqa 656(%rsp),%xmm2 -vpmuludq 576(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vmovdqa 688(%rsp),%xmm2 -vpmuludq 624(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpsrlq $26,%xmm1,%xmm2 -vpaddq %xmm2,%xmm3,%xmm3 -vpand m26(%rip),%xmm1,%xmm1 -vpsrlq $25,%xmm10,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpand m25(%rip),%xmm10,%xmm10 -vpsrlq $25,%xmm3,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpand m25(%rip),%xmm3,%xmm3 -vpsrlq $26,%xmm11,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpand m26(%rip),%xmm11,%xmm11 -vpsrlq $26,%xmm5,%xmm2 -vpaddq %xmm2,%xmm7,%xmm7 -vpand m26(%rip),%xmm5,%xmm5 -vpsrlq $25,%xmm12,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpand m25(%rip),%xmm12,%xmm12 -vpsrlq $25,%xmm7,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpand m25(%rip),%xmm7,%xmm7 -vpsrlq $26,%xmm13,%xmm2 -vpaddq %xmm2,%xmm0,%xmm0 -vpand m26(%rip),%xmm13,%xmm13 -vpsrlq $26,%xmm9,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpand m26(%rip),%xmm9,%xmm9 -vpsrlq $25,%xmm0,%xmm2 -vpsllq $4,%xmm2,%xmm4 -vpaddq %xmm2,%xmm1,%xmm1 -vpsllq $1,%xmm2,%xmm2 -vpaddq %xmm2,%xmm4,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vpand m25(%rip),%xmm0,%xmm0 -vpsrlq $25,%xmm10,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpand m25(%rip),%xmm10,%xmm10 -vpsrlq $26,%xmm1,%xmm2 -vpaddq %xmm2,%xmm3,%xmm3 -vpand m26(%rip),%xmm1,%xmm1 -vpunpckhqdq %xmm3,%xmm1,%xmm2 -vpunpcklqdq %xmm3,%xmm1,%xmm1 -vmovdqa %xmm1,464(%rsp) -vpaddq subc0(%rip),%xmm2,%xmm3 -vpsubq %xmm1,%xmm3,%xmm3 -vpunpckhqdq %xmm3,%xmm2,%xmm1 -vpunpcklqdq %xmm3,%xmm2,%xmm2 -vmovdqa %xmm2,480(%rsp) -vmovdqa %xmm1,512(%rsp) -vpsllq $1,%xmm1,%xmm1 -vmovdqa %xmm1,528(%rsp) -vpmuludq v121666_121666(%rip),%xmm3,%xmm3 -vmovdqa 80(%rsp),%xmm1 -vpunpcklqdq %xmm1,%xmm3,%xmm2 -vpunpckhqdq %xmm1,%xmm3,%xmm1 -vpunpckhqdq %xmm7,%xmm5,%xmm3 -vpunpcklqdq %xmm7,%xmm5,%xmm4 -vmovdqa %xmm4,544(%rsp) -vpaddq subc2(%rip),%xmm3,%xmm5 -vpsubq %xmm4,%xmm5,%xmm5 -vpunpckhqdq %xmm5,%xmm3,%xmm4 -vpunpcklqdq %xmm5,%xmm3,%xmm3 -vmovdqa %xmm3,560(%rsp) -vmovdqa %xmm4,576(%rsp) -vpsllq $1,%xmm4,%xmm4 -vmovdqa %xmm4,592(%rsp) -vpmuludq v121666_121666(%rip),%xmm5,%xmm5 -vmovdqa 96(%rsp),%xmm3 -vpunpcklqdq %xmm3,%xmm5,%xmm4 -vpunpckhqdq %xmm3,%xmm5,%xmm3 -vpunpckhqdq %xmm10,%xmm9,%xmm5 -vpunpcklqdq %xmm10,%xmm9,%xmm6 -vmovdqa %xmm6,608(%rsp) -vpaddq subc2(%rip),%xmm5,%xmm7 -vpsubq %xmm6,%xmm7,%xmm7 -vpunpckhqdq %xmm7,%xmm5,%xmm6 -vpunpcklqdq %xmm7,%xmm5,%xmm5 -vmovdqa %xmm5,624(%rsp) -vmovdqa %xmm6,640(%rsp) -vpsllq $1,%xmm6,%xmm6 -vmovdqa %xmm6,656(%rsp) -vpmuludq v121666_121666(%rip),%xmm7,%xmm7 -vmovdqa 112(%rsp),%xmm5 -vpunpcklqdq %xmm5,%xmm7,%xmm6 -vpunpckhqdq %xmm5,%xmm7,%xmm5 -vpunpckhqdq %xmm12,%xmm11,%xmm7 -vpunpcklqdq %xmm12,%xmm11,%xmm8 -vmovdqa %xmm8,672(%rsp) -vpaddq subc2(%rip),%xmm7,%xmm9 -vpsubq %xmm8,%xmm9,%xmm9 -vpunpckhqdq %xmm9,%xmm7,%xmm8 -vpunpcklqdq %xmm9,%xmm7,%xmm7 -vmovdqa %xmm7,688(%rsp) -vmovdqa %xmm8,704(%rsp) -vpsllq $1,%xmm8,%xmm8 -vmovdqa %xmm8,720(%rsp) -vpmuludq v121666_121666(%rip),%xmm9,%xmm9 -vmovdqa 448(%rsp),%xmm7 -vpunpcklqdq %xmm7,%xmm9,%xmm8 -vpunpckhqdq %xmm7,%xmm9,%xmm7 -vpunpckhqdq %xmm0,%xmm13,%xmm9 -vpunpcklqdq %xmm0,%xmm13,%xmm0 -vmovdqa %xmm0,448(%rsp) -vpaddq subc2(%rip),%xmm9,%xmm10 -vpsubq %xmm0,%xmm10,%xmm10 -vpunpckhqdq %xmm10,%xmm9,%xmm0 -vpunpcklqdq %xmm10,%xmm9,%xmm9 -vmovdqa %xmm9,736(%rsp) -vmovdqa %xmm0,752(%rsp) -vpsllq $1,%xmm0,%xmm0 -vmovdqa %xmm0,768(%rsp) -vpmuludq v121666_121666(%rip),%xmm10,%xmm10 -vmovdqa 496(%rsp),%xmm0 -vpunpcklqdq %xmm0,%xmm10,%xmm9 -vpunpckhqdq %xmm0,%xmm10,%xmm0 -vpsrlq $26,%xmm2,%xmm10 -vpaddq %xmm10,%xmm1,%xmm1 -vpand m26(%rip),%xmm2,%xmm2 -vpsrlq $25,%xmm5,%xmm10 -vpaddq %xmm10,%xmm8,%xmm8 -vpand m25(%rip),%xmm5,%xmm5 -vpsrlq $25,%xmm1,%xmm10 -vpaddq %xmm10,%xmm4,%xmm4 -vpand m25(%rip),%xmm1,%xmm1 -vpsrlq $26,%xmm8,%xmm10 -vpaddq %xmm10,%xmm7,%xmm7 -vpand m26(%rip),%xmm8,%xmm8 -vpsrlq $26,%xmm4,%xmm10 -vpaddq %xmm10,%xmm3,%xmm3 -vpand m26(%rip),%xmm4,%xmm4 -vpsrlq $25,%xmm7,%xmm10 -vpaddq %xmm10,%xmm9,%xmm9 -vpand m25(%rip),%xmm7,%xmm7 -vpsrlq $25,%xmm3,%xmm10 -vpaddq %xmm10,%xmm6,%xmm6 -vpand m25(%rip),%xmm3,%xmm3 -vpsrlq $26,%xmm9,%xmm10 -vpaddq %xmm10,%xmm0,%xmm0 -vpand m26(%rip),%xmm9,%xmm9 -vpsrlq $26,%xmm6,%xmm10 -vpaddq %xmm10,%xmm5,%xmm5 -vpand m26(%rip),%xmm6,%xmm6 -vpsrlq $25,%xmm0,%xmm10 -vpsllq $4,%xmm10,%xmm11 -vpaddq %xmm10,%xmm2,%xmm2 -vpsllq $1,%xmm10,%xmm10 -vpaddq %xmm10,%xmm11,%xmm11 -vpaddq %xmm11,%xmm2,%xmm2 -vpand m25(%rip),%xmm0,%xmm0 -vpsrlq $25,%xmm5,%xmm10 -vpaddq %xmm10,%xmm8,%xmm8 -vpand m25(%rip),%xmm5,%xmm5 -vpsrlq $26,%xmm2,%xmm10 -vpaddq %xmm10,%xmm1,%xmm1 -vpand m26(%rip),%xmm2,%xmm2 -vpunpckhqdq %xmm1,%xmm2,%xmm10 -vmovdqa %xmm10,80(%rsp) -vpunpcklqdq %xmm1,%xmm2,%xmm1 -vpunpckhqdq %xmm3,%xmm4,%xmm2 -vmovdqa %xmm2,96(%rsp) -vpunpcklqdq %xmm3,%xmm4,%xmm2 -vpunpckhqdq %xmm5,%xmm6,%xmm3 -vmovdqa %xmm3,112(%rsp) -vpunpcklqdq %xmm5,%xmm6,%xmm3 -vpunpckhqdq %xmm7,%xmm8,%xmm4 -vmovdqa %xmm4,128(%rsp) -vpunpcklqdq %xmm7,%xmm8,%xmm4 -vpunpckhqdq %xmm0,%xmm9,%xmm5 -vmovdqa %xmm5,144(%rsp) -vpunpcklqdq %xmm0,%xmm9,%xmm0 -vmovdqa 464(%rsp),%xmm5 -vpaddq %xmm5,%xmm1,%xmm1 -vpunpcklqdq %xmm1,%xmm5,%xmm6 -vpunpckhqdq %xmm1,%xmm5,%xmm1 -vpmuludq 512(%rsp),%xmm6,%xmm5 -vpmuludq 480(%rsp),%xmm1,%xmm7 -vpaddq %xmm7,%xmm5,%xmm5 -vpmuludq 560(%rsp),%xmm6,%xmm7 -vpmuludq 528(%rsp),%xmm1,%xmm8 -vpaddq %xmm8,%xmm7,%xmm7 -vpmuludq 576(%rsp),%xmm6,%xmm8 -vpmuludq 560(%rsp),%xmm1,%xmm9 -vpaddq %xmm9,%xmm8,%xmm8 -vpmuludq 624(%rsp),%xmm6,%xmm9 -vpmuludq 592(%rsp),%xmm1,%xmm10 -vpaddq %xmm10,%xmm9,%xmm9 -vpmuludq 640(%rsp),%xmm6,%xmm10 -vpmuludq 624(%rsp),%xmm1,%xmm11 -vpaddq %xmm11,%xmm10,%xmm10 -vpmuludq 688(%rsp),%xmm6,%xmm11 -vpmuludq 656(%rsp),%xmm1,%xmm12 -vpaddq %xmm12,%xmm11,%xmm11 -vpmuludq 704(%rsp),%xmm6,%xmm12 -vpmuludq 688(%rsp),%xmm1,%xmm13 -vpaddq %xmm13,%xmm12,%xmm12 -vpmuludq 736(%rsp),%xmm6,%xmm13 -vpmuludq 720(%rsp),%xmm1,%xmm14 -vpaddq %xmm14,%xmm13,%xmm13 -vpmuludq 752(%rsp),%xmm6,%xmm14 -vpmuludq 736(%rsp),%xmm1,%xmm15 -vpaddq %xmm15,%xmm14,%xmm14 -vpmuludq 480(%rsp),%xmm6,%xmm6 -vpmuludq v19_19(%rip),%xmm1,%xmm1 -vpmuludq 768(%rsp),%xmm1,%xmm1 -vpaddq %xmm1,%xmm6,%xmm6 -vmovdqa 544(%rsp),%xmm1 -vpaddq %xmm1,%xmm2,%xmm2 -vpunpcklqdq %xmm2,%xmm1,%xmm15 -vpunpckhqdq %xmm2,%xmm1,%xmm1 -vpmuludq 480(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm7,%xmm7 -vpmuludq 512(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpmuludq 560(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpmuludq 576(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq 624(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpmuludq 640(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq 688(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpmuludq 704(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm15,%xmm15 -vpmuludq 736(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm6,%xmm6 -vpmuludq 752(%rsp),%xmm15,%xmm15 -vpaddq %xmm15,%xmm5,%xmm5 -vpmuludq 480(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpmuludq 528(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpmuludq 560(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq 592(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpmuludq 624(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq 656(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpmuludq 688(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm1,%xmm1 -vpmuludq 720(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm6,%xmm6 -vpmuludq 736(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpmuludq 768(%rsp),%xmm1,%xmm1 -vpaddq %xmm1,%xmm7,%xmm7 -vmovdqa 608(%rsp),%xmm1 -vpaddq %xmm1,%xmm3,%xmm3 -vpunpcklqdq %xmm3,%xmm1,%xmm2 -vpunpckhqdq %xmm3,%xmm1,%xmm1 -vpmuludq 480(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm9,%xmm9 -vpmuludq 512(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm10,%xmm10 -vpmuludq 560(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 576(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm12,%xmm12 -vpmuludq 624(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 640(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm2,%xmm2 -vpmuludq 688(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 704(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 736(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq 752(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpmuludq 480(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq 528(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpmuludq 560(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq 592(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpmuludq 624(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm1,%xmm1 -vpmuludq 656(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm6,%xmm6 -vpmuludq 688(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpmuludq 720(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm7,%xmm7 -vpmuludq 736(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpmuludq 768(%rsp),%xmm1,%xmm1 -vpaddq %xmm1,%xmm9,%xmm9 -vmovdqa 672(%rsp),%xmm1 -vpaddq %xmm1,%xmm4,%xmm4 -vpunpcklqdq %xmm4,%xmm1,%xmm2 -vpunpckhqdq %xmm4,%xmm1,%xmm1 -vpmuludq 480(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 512(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm12,%xmm12 -vpmuludq 560(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 576(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm2,%xmm2 -vpmuludq 624(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 640(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 688(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq 704(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm8,%xmm8 -vpmuludq 736(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm9,%xmm9 -vpmuludq 752(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq 480(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq 528(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpmuludq 560(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm1,%xmm1 -vpmuludq 592(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm6,%xmm6 -vpmuludq 624(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpmuludq 656(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm7,%xmm7 -vpmuludq 688(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpmuludq 720(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpmuludq 736(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq 768(%rsp),%xmm1,%xmm1 -vpaddq %xmm1,%xmm11,%xmm11 -vmovdqa 448(%rsp),%xmm1 -vpaddq %xmm1,%xmm0,%xmm0 -vpunpcklqdq %xmm0,%xmm1,%xmm2 -vpunpckhqdq %xmm0,%xmm1,%xmm0 -vpmuludq 480(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm13,%xmm13 -vpmuludq 512(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm2,%xmm2 -vpmuludq 560(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm6,%xmm6 -vpmuludq 576(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm5,%xmm5 -vpmuludq 624(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm7,%xmm7 -vpmuludq 640(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm8,%xmm8 -vpmuludq 688(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm9,%xmm9 -vpmuludq 704(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm10,%xmm10 -vpmuludq 736(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm11,%xmm11 -vpmuludq 752(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq 480(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm0,%xmm0 -vpmuludq 528(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm6,%xmm6 -vpmuludq 560(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm5,%xmm5 -vpmuludq 592(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm7,%xmm7 -vpmuludq 624(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm8,%xmm8 -vpmuludq 656(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm9,%xmm9 -vpmuludq 688(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm10,%xmm10 -vpmuludq 720(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm11,%xmm11 -vpmuludq 736(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm12,%xmm12 -vpmuludq 768(%rsp),%xmm0,%xmm0 -vpaddq %xmm0,%xmm13,%xmm13 -vpsrlq $26,%xmm6,%xmm0 -vpaddq %xmm0,%xmm5,%xmm5 -vpand m26(%rip),%xmm6,%xmm6 -vpsrlq $25,%xmm10,%xmm0 -vpaddq %xmm0,%xmm11,%xmm11 -vpand m25(%rip),%xmm10,%xmm10 -vpsrlq $25,%xmm5,%xmm0 -vpaddq %xmm0,%xmm7,%xmm7 -vpand m25(%rip),%xmm5,%xmm5 -vpsrlq $26,%xmm11,%xmm0 -vpaddq %xmm0,%xmm12,%xmm12 -vpand m26(%rip),%xmm11,%xmm11 -vpsrlq $26,%xmm7,%xmm0 -vpaddq %xmm0,%xmm8,%xmm8 -vpand m26(%rip),%xmm7,%xmm7 -vpsrlq $25,%xmm12,%xmm0 -vpaddq %xmm0,%xmm13,%xmm13 -vpand m25(%rip),%xmm12,%xmm12 -vpsrlq $25,%xmm8,%xmm0 -vpaddq %xmm0,%xmm9,%xmm9 -vpand m25(%rip),%xmm8,%xmm8 -vpsrlq $26,%xmm13,%xmm0 -vpaddq %xmm0,%xmm14,%xmm14 -vpand m26(%rip),%xmm13,%xmm13 -vpsrlq $26,%xmm9,%xmm0 -vpaddq %xmm0,%xmm10,%xmm10 -vpand m26(%rip),%xmm9,%xmm9 -vpsrlq $25,%xmm14,%xmm0 -vpsllq $4,%xmm0,%xmm1 -vpaddq %xmm0,%xmm6,%xmm6 -vpsllq $1,%xmm0,%xmm0 -vpaddq %xmm0,%xmm1,%xmm1 -vpaddq %xmm1,%xmm6,%xmm6 -vpand m25(%rip),%xmm14,%xmm14 -vpsrlq $25,%xmm10,%xmm0 -vpaddq %xmm0,%xmm11,%xmm11 -vpand m25(%rip),%xmm10,%xmm10 -vpsrlq $26,%xmm6,%xmm0 -vpaddq %xmm0,%xmm5,%xmm5 -vpand m26(%rip),%xmm6,%xmm6 -vpunpckhqdq %xmm5,%xmm6,%xmm1 -vpunpcklqdq %xmm5,%xmm6,%xmm0 -vpunpckhqdq %xmm8,%xmm7,%xmm3 -vpunpcklqdq %xmm8,%xmm7,%xmm2 -vpunpckhqdq %xmm10,%xmm9,%xmm5 -vpunpcklqdq %xmm10,%xmm9,%xmm4 -vpunpckhqdq %xmm12,%xmm11,%xmm7 -vpunpcklqdq %xmm12,%xmm11,%xmm6 -vpunpckhqdq %xmm14,%xmm13,%xmm9 -vpunpcklqdq %xmm14,%xmm13,%xmm8 -cmp $0,%rdx -jne ._ladder_loop -vmovdqu %xmm1,160(%rdi) -vmovdqu %xmm0,80(%rdi) -vmovdqu %xmm3,176(%rdi) -vmovdqu %xmm2,96(%rdi) -vmovdqu %xmm5,192(%rdi) -vmovdqu %xmm4,112(%rdi) -vmovdqu %xmm7,208(%rdi) -vmovdqu %xmm6,128(%rdi) -vmovdqu %xmm9,224(%rdi) -vmovdqu %xmm8,144(%rdi) -movq 1824(%rsp),%r11 -movq 1832(%rsp),%r12 -movq 1840(%rsp),%r13 -movq 1848(%rsp),%r14 -add %r11,%rsp -ret -FN_END ladder_avx - -.p2align 5 -GLOBAL_HIDDEN_FN_EXT ladder_base_avx,2,0 -ladder_base_avx_local: -mov %rsp,%r11 -and $31,%r11 -add $1568,%r11 -sub %r11,%rsp -movq %r11,1536(%rsp) -movq %r12,1544(%rsp) -movq %r13,1552(%rsp) -vmovdqa v0_0(%rip),%xmm0 -vmovdqa v1_0(%rip),%xmm1 -vmovdqa v9_0(%rip),%xmm2 -vmovdqa %xmm2,0(%rsp) -vmovdqa %xmm0,16(%rsp) -vmovdqa %xmm0,32(%rsp) -vmovdqa %xmm0,48(%rsp) -vmovdqa %xmm0,64(%rsp) -vmovdqa %xmm1,80(%rsp) -vmovdqa %xmm0,96(%rsp) -vmovdqa %xmm0,112(%rsp) -vmovdqa %xmm0,128(%rsp) -vmovdqa %xmm0,144(%rsp) -vmovdqa %xmm1,%xmm0 -vpxor %xmm1,%xmm1,%xmm1 -vpxor %xmm2,%xmm2,%xmm2 -vpxor %xmm3,%xmm3,%xmm3 -vpxor %xmm4,%xmm4,%xmm4 -vpxor %xmm5,%xmm5,%xmm5 -vpxor %xmm6,%xmm6,%xmm6 -vpxor %xmm7,%xmm7,%xmm7 -vpxor %xmm8,%xmm8,%xmm8 -vpxor %xmm9,%xmm9,%xmm9 -movq 0(%rsi),%rdx -movq 8(%rsi),%rcx -movq 16(%rsi),%r8 -movq 24(%rsi),%r9 -shrd $1,%rcx,%rdx -shrd $1,%r8,%rcx -shrd $1,%r9,%r8 -shr $1,%r9 -xorq 0(%rsi),%rdx -xorq 8(%rsi),%rcx -xorq 16(%rsi),%r8 -xorq 24(%rsi),%r9 -leaq 512(%rsp),%rsi -mov $64,%rax - -.p2align 4 -._ladder_base_small_loop: -mov %rdx,%r10 -mov %rcx,%r11 -mov %r8,%r12 -mov %r9,%r13 -shr $1,%rdx -shr $1,%rcx -shr $1,%r8 -shr $1,%r9 -and $1,%r10d -and $1,%r11d -and $1,%r12d -and $1,%r13d -neg %r10 -neg %r11 -neg %r12 -neg %r13 -movl %r10d,0(%rsi) -movl %r11d,256(%rsi) -movl %r12d,512(%rsi) -movl %r13d,768(%rsi) -add $4,%rsi -sub $1,%rax -jne ._ladder_base_small_loop -mov $255,%rdx -add $760,%rsi - -.p2align 4 -._ladder_base_loop: -sub $1,%rdx -vbroadcastss 0(%rsi),%xmm10 -sub $4,%rsi -vmovdqa 0(%rsp),%xmm11 -vmovdqa 80(%rsp),%xmm12 -vpxor %xmm11,%xmm0,%xmm13 -vpand %xmm10,%xmm13,%xmm13 -vpxor %xmm13,%xmm0,%xmm0 -vpxor %xmm13,%xmm11,%xmm11 -vpxor %xmm12,%xmm1,%xmm13 -vpand %xmm10,%xmm13,%xmm13 -vpxor %xmm13,%xmm1,%xmm1 -vpxor %xmm13,%xmm12,%xmm12 -vmovdqa 16(%rsp),%xmm13 -vmovdqa 96(%rsp),%xmm14 -vpxor %xmm13,%xmm2,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm2,%xmm2 -vpxor %xmm15,%xmm13,%xmm13 -vpxor %xmm14,%xmm3,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm3,%xmm3 -vpxor %xmm15,%xmm14,%xmm14 -vmovdqa %xmm13,0(%rsp) -vmovdqa %xmm14,16(%rsp) -vmovdqa 32(%rsp),%xmm13 -vmovdqa 112(%rsp),%xmm14 -vpxor %xmm13,%xmm4,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm4,%xmm4 -vpxor %xmm15,%xmm13,%xmm13 -vpxor %xmm14,%xmm5,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm5,%xmm5 -vpxor %xmm15,%xmm14,%xmm14 -vmovdqa %xmm13,32(%rsp) -vmovdqa %xmm14,80(%rsp) -vmovdqa 48(%rsp),%xmm13 -vmovdqa 128(%rsp),%xmm14 -vpxor %xmm13,%xmm6,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm6,%xmm6 -vpxor %xmm15,%xmm13,%xmm13 -vpxor %xmm14,%xmm7,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm7,%xmm7 -vpxor %xmm15,%xmm14,%xmm14 -vmovdqa %xmm13,48(%rsp) -vmovdqa %xmm14,96(%rsp) -vmovdqa 64(%rsp),%xmm13 -vmovdqa 144(%rsp),%xmm14 -vpxor %xmm13,%xmm8,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm8,%xmm8 -vpxor %xmm15,%xmm13,%xmm13 -vpxor %xmm14,%xmm9,%xmm15 -vpand %xmm10,%xmm15,%xmm15 -vpxor %xmm15,%xmm9,%xmm9 -vpxor %xmm15,%xmm14,%xmm14 -vmovdqa %xmm13,64(%rsp) -vmovdqa %xmm14,112(%rsp) -vpaddq subc0(%rip),%xmm11,%xmm10 -vpsubq %xmm12,%xmm10,%xmm10 -vpaddq %xmm12,%xmm11,%xmm11 -vpunpckhqdq %xmm10,%xmm11,%xmm12 -vpunpcklqdq %xmm10,%xmm11,%xmm10 -vpaddq %xmm1,%xmm0,%xmm11 -vpaddq subc0(%rip),%xmm0,%xmm0 -vpsubq %xmm1,%xmm0,%xmm0 -vpunpckhqdq %xmm11,%xmm0,%xmm1 -vpunpcklqdq %xmm11,%xmm0,%xmm0 -vpmuludq %xmm0,%xmm10,%xmm11 -vpmuludq %xmm1,%xmm10,%xmm13 -vmovdqa %xmm1,128(%rsp) -vpaddq %xmm1,%xmm1,%xmm1 -vpmuludq %xmm0,%xmm12,%xmm14 -vmovdqa %xmm0,144(%rsp) -vpaddq %xmm14,%xmm13,%xmm13 -vpmuludq %xmm1,%xmm12,%xmm0 -vmovdqa %xmm1,160(%rsp) -vpaddq %xmm3,%xmm2,%xmm1 -vpaddq subc2(%rip),%xmm2,%xmm2 -vpsubq %xmm3,%xmm2,%xmm2 -vpunpckhqdq %xmm1,%xmm2,%xmm3 -vpunpcklqdq %xmm1,%xmm2,%xmm1 -vpmuludq %xmm1,%xmm10,%xmm2 -vpaddq %xmm2,%xmm0,%xmm0 -vpmuludq %xmm3,%xmm10,%xmm2 -vmovdqa %xmm3,176(%rsp) -vpaddq %xmm3,%xmm3,%xmm3 -vpmuludq %xmm1,%xmm12,%xmm14 -vmovdqa %xmm1,192(%rsp) -vpaddq %xmm14,%xmm2,%xmm2 -vpmuludq %xmm3,%xmm12,%xmm1 -vmovdqa %xmm3,208(%rsp) -vpaddq %xmm5,%xmm4,%xmm3 -vpaddq subc2(%rip),%xmm4,%xmm4 -vpsubq %xmm5,%xmm4,%xmm4 -vpunpckhqdq %xmm3,%xmm4,%xmm5 -vpunpcklqdq %xmm3,%xmm4,%xmm3 -vpmuludq %xmm3,%xmm10,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vpmuludq %xmm5,%xmm10,%xmm4 -vmovdqa %xmm5,224(%rsp) -vpaddq %xmm5,%xmm5,%xmm5 -vpmuludq %xmm3,%xmm12,%xmm14 -vmovdqa %xmm3,240(%rsp) -vpaddq %xmm14,%xmm4,%xmm4 -vpaddq %xmm7,%xmm6,%xmm3 -vpaddq subc2(%rip),%xmm6,%xmm6 -vpsubq %xmm7,%xmm6,%xmm6 -vpunpckhqdq %xmm3,%xmm6,%xmm7 -vpunpcklqdq %xmm3,%xmm6,%xmm3 -vpmuludq %xmm3,%xmm10,%xmm6 -vpmuludq %xmm5,%xmm12,%xmm14 -vmovdqa %xmm5,256(%rsp) -vpmuludq v19_19(%rip),%xmm5,%xmm5 -vmovdqa %xmm5,272(%rsp) -vpaddq %xmm14,%xmm6,%xmm6 -vpmuludq %xmm7,%xmm10,%xmm5 -vmovdqa %xmm7,288(%rsp) -vpaddq %xmm7,%xmm7,%xmm7 -vpmuludq %xmm3,%xmm12,%xmm14 -vmovdqa %xmm3,304(%rsp) -vpaddq %xmm14,%xmm5,%xmm5 -vpmuludq v19_19(%rip),%xmm3,%xmm3 -vmovdqa %xmm3,320(%rsp) -vpaddq %xmm9,%xmm8,%xmm3 -vpaddq subc2(%rip),%xmm8,%xmm8 -vpsubq %xmm9,%xmm8,%xmm8 -vpunpckhqdq %xmm3,%xmm8,%xmm9 -vpunpcklqdq %xmm3,%xmm8,%xmm3 -vmovdqa %xmm3,336(%rsp) -vpmuludq %xmm7,%xmm12,%xmm8 -vmovdqa %xmm7,352(%rsp) -vpmuludq v19_19(%rip),%xmm7,%xmm7 -vmovdqa %xmm7,368(%rsp) -vpmuludq %xmm3,%xmm10,%xmm7 -vpaddq %xmm7,%xmm8,%xmm8 -vpmuludq %xmm9,%xmm10,%xmm7 -vmovdqa %xmm9,384(%rsp) -vpaddq %xmm9,%xmm9,%xmm9 -vpmuludq %xmm3,%xmm12,%xmm10 -vpaddq %xmm10,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm3,%xmm3 -vmovdqa %xmm3,400(%rsp) -vpmuludq v19_19(%rip),%xmm12,%xmm12 -vpmuludq %xmm9,%xmm12,%xmm3 -vmovdqa %xmm9,416(%rsp) -vpaddq %xmm3,%xmm11,%xmm11 -vmovdqa 0(%rsp),%xmm3 -vmovdqa 16(%rsp),%xmm9 -vpaddq subc2(%rip),%xmm3,%xmm10 -vpsubq %xmm9,%xmm10,%xmm10 -vpaddq %xmm9,%xmm3,%xmm3 -vpunpckhqdq %xmm10,%xmm3,%xmm9 -vpunpcklqdq %xmm10,%xmm3,%xmm3 -vpmuludq 144(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm0,%xmm0 -vpmuludq 128(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm2,%xmm2 -vpmuludq 192(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm1,%xmm1 -vpmuludq 176(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm4,%xmm4 -vpmuludq 240(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm6,%xmm6 -vpmuludq 224(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm5,%xmm5 -vpmuludq 304(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm8,%xmm8 -vpmuludq 288(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm3,%xmm3 -vpmuludq 336(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm11,%xmm11 -vpmuludq 384(%rsp),%xmm3,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 144(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpmuludq 160(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm1,%xmm1 -vpmuludq 192(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 208(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 240(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 256(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm8,%xmm8 -vpmuludq 304(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm9,%xmm9 -vpmuludq 352(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 336(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 416(%rsp),%xmm9,%xmm9 -vpaddq %xmm9,%xmm0,%xmm0 -vmovdqa 32(%rsp),%xmm3 -vmovdqa 80(%rsp),%xmm9 -vpaddq subc2(%rip),%xmm3,%xmm10 -vpsubq %xmm9,%xmm10,%xmm10 -vpaddq %xmm9,%xmm3,%xmm3 -vpunpckhqdq %xmm10,%xmm3,%xmm9 -vpunpcklqdq %xmm10,%xmm3,%xmm3 -vpmuludq 144(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm1,%xmm1 -vpmuludq 128(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm4,%xmm4 -vpmuludq 192(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm6,%xmm6 -vpmuludq 176(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm5,%xmm5 -vpmuludq 240(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm8,%xmm8 -vpmuludq 224(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm3,%xmm3 -vpmuludq 304(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm11,%xmm11 -vpmuludq 288(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm13,%xmm13 -vpmuludq 336(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm0,%xmm0 -vpmuludq 384(%rsp),%xmm3,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpmuludq 144(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 160(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 192(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 208(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm8,%xmm8 -vpmuludq 240(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm9,%xmm9 -vpmuludq 256(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 304(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 352(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm0,%xmm0 -vpmuludq 336(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpmuludq 416(%rsp),%xmm9,%xmm9 -vpaddq %xmm9,%xmm1,%xmm1 -vmovdqa 48(%rsp),%xmm3 -vmovdqa 96(%rsp),%xmm9 -vpaddq subc2(%rip),%xmm3,%xmm10 -vpsubq %xmm9,%xmm10,%xmm10 -vpaddq %xmm9,%xmm3,%xmm3 -vpunpckhqdq %xmm10,%xmm3,%xmm9 -vpunpcklqdq %xmm10,%xmm3,%xmm3 -vpmuludq 144(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm6,%xmm6 -vpmuludq 128(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm5,%xmm5 -vpmuludq 192(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm8,%xmm8 -vpmuludq 176(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm3,%xmm3 -vpmuludq 240(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm11,%xmm11 -vpmuludq 224(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm13,%xmm13 -vpmuludq 304(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm0,%xmm0 -vpmuludq 288(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm2,%xmm2 -vpmuludq 336(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm1,%xmm1 -vpmuludq 384(%rsp),%xmm3,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 144(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 160(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm8,%xmm8 -vpmuludq 192(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm9,%xmm9 -vpmuludq 208(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 240(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 256(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm0,%xmm0 -vpmuludq 304(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpmuludq 352(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm1,%xmm1 -vpmuludq 336(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 416(%rsp),%xmm9,%xmm9 -vpaddq %xmm9,%xmm6,%xmm6 -vmovdqa 64(%rsp),%xmm3 -vmovdqa 112(%rsp),%xmm9 -vpaddq subc2(%rip),%xmm3,%xmm10 -vpsubq %xmm9,%xmm10,%xmm10 -vpaddq %xmm9,%xmm3,%xmm3 -vpunpckhqdq %xmm10,%xmm3,%xmm9 -vpunpcklqdq %xmm10,%xmm3,%xmm3 -vpmuludq 144(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm8,%xmm8 -vpmuludq 128(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm3,%xmm3 -vpmuludq 192(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm11,%xmm11 -vpmuludq 176(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm13,%xmm13 -vpmuludq 240(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm0,%xmm0 -vpmuludq 224(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm2,%xmm2 -vpmuludq 304(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm1,%xmm1 -vpmuludq 288(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm4,%xmm4 -vpmuludq 336(%rsp),%xmm3,%xmm10 -vpaddq %xmm10,%xmm6,%xmm6 -vpmuludq 384(%rsp),%xmm3,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 144(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq v19_19(%rip),%xmm9,%xmm9 -vpmuludq 160(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 192(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 208(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm0,%xmm0 -vpmuludq 240(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpmuludq 256(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm1,%xmm1 -vpmuludq 304(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpmuludq 352(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 336(%rsp),%xmm9,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 416(%rsp),%xmm9,%xmm9 -vpaddq %xmm9,%xmm8,%xmm8 -vpsrlq $25,%xmm4,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpand m25(%rip),%xmm4,%xmm4 -vpsrlq $26,%xmm11,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpand m26(%rip),%xmm11,%xmm11 -vpsrlq $26,%xmm6,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpand m26(%rip),%xmm6,%xmm6 -vpsrlq $25,%xmm13,%xmm3 -vpaddq %xmm3,%xmm0,%xmm0 -vpand m25(%rip),%xmm13,%xmm13 -vpsrlq $25,%xmm5,%xmm3 -vpaddq %xmm3,%xmm8,%xmm8 -vpand m25(%rip),%xmm5,%xmm5 -vpsrlq $26,%xmm0,%xmm3 -vpaddq %xmm3,%xmm2,%xmm2 -vpand m26(%rip),%xmm0,%xmm0 -vpsrlq $26,%xmm8,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpand m26(%rip),%xmm8,%xmm8 -vpsrlq $25,%xmm2,%xmm3 -vpaddq %xmm3,%xmm1,%xmm1 -vpand m25(%rip),%xmm2,%xmm2 -vpsrlq $25,%xmm7,%xmm3 -vpsllq $4,%xmm3,%xmm9 -vpaddq %xmm3,%xmm11,%xmm11 -vpsllq $1,%xmm3,%xmm3 -vpaddq %xmm3,%xmm9,%xmm9 -vpaddq %xmm9,%xmm11,%xmm11 -vpand m25(%rip),%xmm7,%xmm7 -vpsrlq $26,%xmm1,%xmm3 -vpaddq %xmm3,%xmm4,%xmm4 -vpand m26(%rip),%xmm1,%xmm1 -vpsrlq $26,%xmm11,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpand m26(%rip),%xmm11,%xmm11 -vpsrlq $25,%xmm4,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpand m25(%rip),%xmm4,%xmm4 -vpunpcklqdq %xmm13,%xmm11,%xmm3 -vpunpckhqdq %xmm13,%xmm11,%xmm9 -vpaddq subc0(%rip),%xmm9,%xmm10 -vpsubq %xmm3,%xmm10,%xmm10 -vpaddq %xmm9,%xmm3,%xmm3 -vpunpckhqdq %xmm3,%xmm10,%xmm9 -vpunpcklqdq %xmm3,%xmm10,%xmm10 -vpmuludq %xmm10,%xmm10,%xmm3 -vpaddq %xmm10,%xmm10,%xmm10 -vpmuludq %xmm9,%xmm10,%xmm11 -vpunpcklqdq %xmm2,%xmm0,%xmm12 -vpunpckhqdq %xmm2,%xmm0,%xmm0 -vpaddq subc2(%rip),%xmm0,%xmm2 -vpsubq %xmm12,%xmm2,%xmm2 -vpaddq %xmm0,%xmm12,%xmm12 -vpunpckhqdq %xmm12,%xmm2,%xmm0 -vpunpcklqdq %xmm12,%xmm2,%xmm2 -vpmuludq %xmm2,%xmm10,%xmm12 -vpaddq %xmm9,%xmm9,%xmm13 -vpmuludq %xmm13,%xmm9,%xmm9 -vpaddq %xmm9,%xmm12,%xmm12 -vpmuludq %xmm0,%xmm10,%xmm9 -vpmuludq %xmm2,%xmm13,%xmm14 -vpaddq %xmm14,%xmm9,%xmm9 -vpunpcklqdq %xmm4,%xmm1,%xmm14 -vpunpckhqdq %xmm4,%xmm1,%xmm1 -vpaddq subc2(%rip),%xmm1,%xmm4 -vpsubq %xmm14,%xmm4,%xmm4 -vpaddq %xmm1,%xmm14,%xmm14 -vpunpckhqdq %xmm14,%xmm4,%xmm1 -vpunpcklqdq %xmm14,%xmm4,%xmm4 -vmovdqa %xmm1,0(%rsp) -vpaddq %xmm1,%xmm1,%xmm1 -vmovdqa %xmm1,16(%rsp) -vpmuludq v19_19(%rip),%xmm1,%xmm1 -vmovdqa %xmm1,32(%rsp) -vpmuludq %xmm4,%xmm10,%xmm1 -vpmuludq %xmm2,%xmm2,%xmm14 -vpaddq %xmm14,%xmm1,%xmm1 -vpmuludq 0(%rsp),%xmm10,%xmm14 -vpmuludq %xmm4,%xmm13,%xmm15 -vpaddq %xmm15,%xmm14,%xmm14 -vpunpcklqdq %xmm5,%xmm6,%xmm15 -vpunpckhqdq %xmm5,%xmm6,%xmm5 -vpaddq subc2(%rip),%xmm5,%xmm6 -vpsubq %xmm15,%xmm6,%xmm6 -vpaddq %xmm5,%xmm15,%xmm15 -vpunpckhqdq %xmm15,%xmm6,%xmm5 -vpunpcklqdq %xmm15,%xmm6,%xmm6 -vmovdqa %xmm6,48(%rsp) -vpmuludq v19_19(%rip),%xmm6,%xmm6 -vmovdqa %xmm6,64(%rsp) -vmovdqa %xmm5,80(%rsp) -vpmuludq v38_38(%rip),%xmm5,%xmm5 -vmovdqa %xmm5,96(%rsp) -vpmuludq 48(%rsp),%xmm10,%xmm5 -vpaddq %xmm0,%xmm0,%xmm6 -vpmuludq %xmm6,%xmm0,%xmm0 -vpaddq %xmm0,%xmm5,%xmm5 -vpmuludq 80(%rsp),%xmm10,%xmm0 -vpmuludq %xmm4,%xmm6,%xmm15 -vpaddq %xmm15,%xmm0,%xmm0 -vpmuludq %xmm6,%xmm13,%xmm15 -vpaddq %xmm15,%xmm1,%xmm1 -vpmuludq %xmm6,%xmm2,%xmm15 -vpaddq %xmm15,%xmm14,%xmm14 -vpunpcklqdq %xmm7,%xmm8,%xmm15 -vpunpckhqdq %xmm7,%xmm8,%xmm7 -vpaddq subc2(%rip),%xmm7,%xmm8 -vpsubq %xmm15,%xmm8,%xmm8 -vpaddq %xmm7,%xmm15,%xmm15 -vpunpckhqdq %xmm15,%xmm8,%xmm7 -vpunpcklqdq %xmm15,%xmm8,%xmm8 -vmovdqa %xmm8,112(%rsp) -vpmuludq v19_19(%rip),%xmm8,%xmm8 -vmovdqa %xmm8,160(%rsp) -vpmuludq 112(%rsp),%xmm10,%xmm8 -vpmuludq %xmm7,%xmm10,%xmm10 -vpmuludq v38_38(%rip),%xmm7,%xmm15 -vpmuludq %xmm15,%xmm7,%xmm7 -vpaddq %xmm7,%xmm8,%xmm8 -vpmuludq %xmm15,%xmm13,%xmm7 -vpaddq %xmm7,%xmm3,%xmm3 -vpmuludq %xmm15,%xmm2,%xmm7 -vpaddq %xmm7,%xmm11,%xmm11 -vpmuludq 80(%rsp),%xmm13,%xmm7 -vpaddq %xmm7,%xmm7,%xmm7 -vpaddq %xmm7,%xmm8,%xmm8 -vpmuludq 16(%rsp),%xmm13,%xmm7 -vpaddq %xmm7,%xmm5,%xmm5 -vpmuludq 48(%rsp),%xmm13,%xmm7 -vpaddq %xmm7,%xmm0,%xmm0 -vpmuludq 112(%rsp),%xmm13,%xmm7 -vpaddq %xmm7,%xmm10,%xmm10 -vpmuludq %xmm15,%xmm6,%xmm7 -vpaddq %xmm7,%xmm12,%xmm12 -vpmuludq %xmm15,%xmm4,%xmm7 -vpaddq %xmm7,%xmm9,%xmm9 -vpaddq %xmm2,%xmm2,%xmm2 -vpmuludq %xmm4,%xmm2,%xmm7 -vpaddq %xmm7,%xmm5,%xmm5 -vpmuludq 160(%rsp),%xmm2,%xmm7 -vpaddq %xmm7,%xmm3,%xmm3 -vpmuludq 160(%rsp),%xmm6,%xmm7 -vpaddq %xmm7,%xmm11,%xmm11 -vpmuludq 0(%rsp),%xmm2,%xmm7 -vpaddq %xmm7,%xmm0,%xmm0 -vpmuludq 48(%rsp),%xmm2,%xmm7 -vpaddq %xmm7,%xmm8,%xmm8 -vpmuludq 80(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq 96(%rsp),%xmm4,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpmuludq %xmm4,%xmm4,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpaddq %xmm4,%xmm4,%xmm2 -vpmuludq 160(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm12,%xmm12 -vpmuludq 16(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vpmuludq 48(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm14,%xmm14 -vpmuludq 96(%rsp),%xmm6,%xmm4 -vpaddq %xmm4,%xmm3,%xmm3 -vmovdqa 16(%rsp),%xmm4 -vpmuludq 160(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm9,%xmm9 -vpmuludq 16(%rsp),%xmm6,%xmm4 -vpaddq %xmm4,%xmm8,%xmm8 -vpmuludq 48(%rsp),%xmm6,%xmm4 -vpaddq %xmm4,%xmm10,%xmm10 -vpmuludq 80(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm4,%xmm4 -vpaddq %xmm4,%xmm5,%xmm5 -vpmuludq 112(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm0,%xmm0 -vmovdqa 48(%rsp),%xmm4 -vpaddq %xmm4,%xmm4,%xmm4 -vpmuludq 160(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vmovdqa 80(%rsp),%xmm4 -vpaddq %xmm4,%xmm4,%xmm4 -vpmuludq 160(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm14,%xmm14 -vpmuludq 64(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm3,%xmm3 -vmovdqa 16(%rsp),%xmm4 -vpmuludq 64(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm11,%xmm11 -vmovdqa 16(%rsp),%xmm4 -vpmuludq 96(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm12,%xmm12 -vmovdqa 48(%rsp),%xmm4 -vpmuludq 96(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm9,%xmm9 -vpmuludq 0(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vmovdqa 32(%rsp),%xmm2 -vpmuludq 0(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm3,%xmm3 -vmovdqa 64(%rsp),%xmm2 -vpmuludq 48(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vmovdqa 96(%rsp),%xmm2 -vpmuludq 80(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm1,%xmm1 -vmovdqa 160(%rsp),%xmm2 -vpmuludq 112(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpsrlq $26,%xmm3,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpand m26(%rip),%xmm3,%xmm3 -vpsrlq $25,%xmm14,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpand m25(%rip),%xmm14,%xmm14 -vpsrlq $25,%xmm11,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpand m25(%rip),%xmm11,%xmm11 -vpsrlq $26,%xmm5,%xmm2 -vpaddq %xmm2,%xmm0,%xmm0 -vpand m26(%rip),%xmm5,%xmm5 -vpsrlq $26,%xmm12,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpand m26(%rip),%xmm12,%xmm12 -vpsrlq $25,%xmm0,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpand m25(%rip),%xmm0,%xmm0 -vpsrlq $25,%xmm9,%xmm2 -vpaddq %xmm2,%xmm1,%xmm1 -vpand m25(%rip),%xmm9,%xmm9 -vpsrlq $26,%xmm8,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpand m26(%rip),%xmm8,%xmm8 -vpsrlq $26,%xmm1,%xmm2 -vpaddq %xmm2,%xmm14,%xmm14 -vpand m26(%rip),%xmm1,%xmm1 -vpsrlq $25,%xmm10,%xmm2 -vpsllq $4,%xmm2,%xmm4 -vpaddq %xmm2,%xmm3,%xmm3 -vpsllq $1,%xmm2,%xmm2 -vpaddq %xmm2,%xmm4,%xmm4 -vpaddq %xmm4,%xmm3,%xmm3 -vpand m25(%rip),%xmm10,%xmm10 -vpsrlq $25,%xmm14,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpand m25(%rip),%xmm14,%xmm14 -vpsrlq $26,%xmm3,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpand m26(%rip),%xmm3,%xmm3 -vpunpckhqdq %xmm11,%xmm3,%xmm2 -vmovdqa %xmm2,0(%rsp) -vpunpcklqdq %xmm11,%xmm3,%xmm2 -vpmuludq v9_9(%rip),%xmm2,%xmm2 -vmovdqa %xmm2,80(%rsp) -vpunpckhqdq %xmm9,%xmm12,%xmm2 -vmovdqa %xmm2,16(%rsp) -vpunpcklqdq %xmm9,%xmm12,%xmm2 -vpmuludq v9_9(%rip),%xmm2,%xmm2 -vmovdqa %xmm2,96(%rsp) -vpunpckhqdq %xmm14,%xmm1,%xmm2 -vmovdqa %xmm2,32(%rsp) -vpunpcklqdq %xmm14,%xmm1,%xmm1 -vpmuludq v9_9(%rip),%xmm1,%xmm1 -vmovdqa %xmm1,112(%rsp) -vpunpckhqdq %xmm0,%xmm5,%xmm1 -vmovdqa %xmm1,48(%rsp) -vpunpcklqdq %xmm0,%xmm5,%xmm0 -vpmuludq v9_9(%rip),%xmm0,%xmm0 -vmovdqa %xmm0,160(%rsp) -vpunpckhqdq %xmm10,%xmm8,%xmm0 -vmovdqa %xmm0,64(%rsp) -vpunpcklqdq %xmm10,%xmm8,%xmm0 -vpmuludq v9_9(%rip),%xmm0,%xmm0 -vmovdqa %xmm0,208(%rsp) -vmovdqa 144(%rsp),%xmm0 -vpmuludq %xmm0,%xmm0,%xmm1 -vpaddq %xmm0,%xmm0,%xmm0 -vmovdqa 128(%rsp),%xmm2 -vpmuludq %xmm2,%xmm0,%xmm3 -vmovdqa 192(%rsp),%xmm4 -vpmuludq %xmm4,%xmm0,%xmm5 -vmovdqa 176(%rsp),%xmm6 -vpmuludq %xmm6,%xmm0,%xmm7 -vmovdqa 240(%rsp),%xmm8 -vpmuludq %xmm8,%xmm0,%xmm9 -vpmuludq 224(%rsp),%xmm0,%xmm10 -vpmuludq 304(%rsp),%xmm0,%xmm11 -vpmuludq 288(%rsp),%xmm0,%xmm12 -vpmuludq 336(%rsp),%xmm0,%xmm13 -vmovdqa 384(%rsp),%xmm14 -vpmuludq %xmm14,%xmm0,%xmm0 -vpmuludq v38_38(%rip),%xmm14,%xmm15 -vpmuludq %xmm15,%xmm14,%xmm14 -vpaddq %xmm14,%xmm13,%xmm13 -vpaddq %xmm6,%xmm6,%xmm14 -vpmuludq %xmm14,%xmm6,%xmm6 -vpaddq %xmm6,%xmm11,%xmm11 -vpaddq %xmm2,%xmm2,%xmm6 -vpmuludq %xmm6,%xmm2,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpmuludq %xmm15,%xmm6,%xmm2 -vpaddq %xmm2,%xmm1,%xmm1 -vpmuludq %xmm15,%xmm4,%xmm2 -vpaddq %xmm2,%xmm3,%xmm3 -vpmuludq 256(%rsp),%xmm6,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpmuludq 304(%rsp),%xmm6,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq 352(%rsp),%xmm6,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpmuludq 336(%rsp),%xmm6,%xmm2 -vpaddq %xmm2,%xmm0,%xmm0 -vpmuludq %xmm4,%xmm6,%xmm2 -vpaddq %xmm2,%xmm7,%xmm7 -vpmuludq %xmm14,%xmm6,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpmuludq %xmm8,%xmm6,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq %xmm15,%xmm14,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpmuludq %xmm15,%xmm8,%xmm2 -vpaddq %xmm2,%xmm7,%xmm7 -vpmuludq %xmm4,%xmm4,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpmuludq %xmm14,%xmm4,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpaddq %xmm4,%xmm4,%xmm2 -vpmuludq %xmm8,%xmm2,%xmm4 -vpaddq %xmm4,%xmm11,%xmm11 -vpmuludq 400(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vpmuludq 400(%rsp),%xmm14,%xmm4 -vpaddq %xmm4,%xmm3,%xmm3 -vpmuludq 224(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm12,%xmm12 -vpmuludq 304(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm13,%xmm13 -vpmuludq 288(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm0,%xmm0 -vpmuludq 368(%rsp),%xmm8,%xmm2 -vpaddq %xmm2,%xmm3,%xmm3 -vpmuludq %xmm8,%xmm14,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq %xmm8,%xmm8,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpaddq %xmm8,%xmm8,%xmm2 -vpmuludq 400(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm5,%xmm5 -vpmuludq 256(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm9,%xmm9 -vpmuludq 304(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm10,%xmm10 -vpmuludq 368(%rsp),%xmm14,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vmovdqa 256(%rsp),%xmm4 -vpmuludq 400(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm7,%xmm7 -vpmuludq 256(%rsp),%xmm14,%xmm4 -vpaddq %xmm4,%xmm13,%xmm13 -vpmuludq 304(%rsp),%xmm14,%xmm4 -vpaddq %xmm4,%xmm0,%xmm0 -vpmuludq 352(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm11,%xmm11 -vpmuludq 336(%rsp),%xmm15,%xmm4 -vpaddq %xmm4,%xmm12,%xmm12 -vmovdqa 304(%rsp),%xmm4 -vpaddq %xmm4,%xmm4,%xmm4 -vpmuludq 400(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm9,%xmm9 -vpmuludq 320(%rsp),%xmm2,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vmovdqa 256(%rsp),%xmm4 -vpmuludq 320(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm3,%xmm3 -vmovdqa 256(%rsp),%xmm4 -vpmuludq 368(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm5,%xmm5 -vmovdqa 304(%rsp),%xmm4 -vpmuludq 368(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm7,%xmm7 -vmovdqa 352(%rsp),%xmm4 -vpmuludq 400(%rsp),%xmm4,%xmm4 -vpaddq %xmm4,%xmm10,%xmm10 -vpmuludq 224(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm0,%xmm0 -vmovdqa 272(%rsp),%xmm2 -vpmuludq 224(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm1,%xmm1 -vmovdqa 320(%rsp),%xmm2 -vpmuludq 304(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vmovdqa 368(%rsp),%xmm2 -vpmuludq 288(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vmovdqa 400(%rsp),%xmm2 -vpmuludq 336(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpsrlq $26,%xmm1,%xmm2 -vpaddq %xmm2,%xmm3,%xmm3 -vpand m26(%rip),%xmm1,%xmm1 -vpsrlq $25,%xmm10,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpand m25(%rip),%xmm10,%xmm10 -vpsrlq $25,%xmm3,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpand m25(%rip),%xmm3,%xmm3 -vpsrlq $26,%xmm11,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpand m26(%rip),%xmm11,%xmm11 -vpsrlq $26,%xmm5,%xmm2 -vpaddq %xmm2,%xmm7,%xmm7 -vpand m26(%rip),%xmm5,%xmm5 -vpsrlq $25,%xmm12,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpand m25(%rip),%xmm12,%xmm12 -vpsrlq $25,%xmm7,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpand m25(%rip),%xmm7,%xmm7 -vpsrlq $26,%xmm13,%xmm2 -vpaddq %xmm2,%xmm0,%xmm0 -vpand m26(%rip),%xmm13,%xmm13 -vpsrlq $26,%xmm9,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpand m26(%rip),%xmm9,%xmm9 -vpsrlq $25,%xmm0,%xmm2 -vpsllq $4,%xmm2,%xmm4 -vpaddq %xmm2,%xmm1,%xmm1 -vpsllq $1,%xmm2,%xmm2 -vpaddq %xmm2,%xmm4,%xmm4 -vpaddq %xmm4,%xmm1,%xmm1 -vpand m25(%rip),%xmm0,%xmm0 -vpsrlq $25,%xmm10,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpand m25(%rip),%xmm10,%xmm10 -vpsrlq $26,%xmm1,%xmm2 -vpaddq %xmm2,%xmm3,%xmm3 -vpand m26(%rip),%xmm1,%xmm1 -vpunpckhqdq %xmm3,%xmm1,%xmm2 -vpunpcklqdq %xmm3,%xmm1,%xmm1 -vmovdqa %xmm1,176(%rsp) -vpaddq subc0(%rip),%xmm2,%xmm3 -vpsubq %xmm1,%xmm3,%xmm3 -vpunpckhqdq %xmm3,%xmm2,%xmm1 -vpunpcklqdq %xmm3,%xmm2,%xmm2 -vmovdqa %xmm2,192(%rsp) -vmovdqa %xmm1,224(%rsp) -vpsllq $1,%xmm1,%xmm1 -vmovdqa %xmm1,240(%rsp) -vpmuludq v121666_121666(%rip),%xmm3,%xmm3 -vmovdqa 80(%rsp),%xmm1 -vpunpcklqdq %xmm1,%xmm3,%xmm2 -vpunpckhqdq %xmm1,%xmm3,%xmm1 -vpunpckhqdq %xmm7,%xmm5,%xmm3 -vpunpcklqdq %xmm7,%xmm5,%xmm4 -vmovdqa %xmm4,256(%rsp) -vpaddq subc2(%rip),%xmm3,%xmm5 -vpsubq %xmm4,%xmm5,%xmm5 -vpunpckhqdq %xmm5,%xmm3,%xmm4 -vpunpcklqdq %xmm5,%xmm3,%xmm3 -vmovdqa %xmm3,272(%rsp) -vmovdqa %xmm4,288(%rsp) -vpsllq $1,%xmm4,%xmm4 -vmovdqa %xmm4,304(%rsp) -vpmuludq v121666_121666(%rip),%xmm5,%xmm5 -vmovdqa 96(%rsp),%xmm3 -vpunpcklqdq %xmm3,%xmm5,%xmm4 -vpunpckhqdq %xmm3,%xmm5,%xmm3 -vpunpckhqdq %xmm10,%xmm9,%xmm5 -vpunpcklqdq %xmm10,%xmm9,%xmm6 -vmovdqa %xmm6,320(%rsp) -vpaddq subc2(%rip),%xmm5,%xmm7 -vpsubq %xmm6,%xmm7,%xmm7 -vpunpckhqdq %xmm7,%xmm5,%xmm6 -vpunpcklqdq %xmm7,%xmm5,%xmm5 -vmovdqa %xmm5,336(%rsp) -vmovdqa %xmm6,352(%rsp) -vpsllq $1,%xmm6,%xmm6 -vmovdqa %xmm6,368(%rsp) -vpmuludq v121666_121666(%rip),%xmm7,%xmm7 -vmovdqa 112(%rsp),%xmm5 -vpunpcklqdq %xmm5,%xmm7,%xmm6 -vpunpckhqdq %xmm5,%xmm7,%xmm5 -vpunpckhqdq %xmm12,%xmm11,%xmm7 -vpunpcklqdq %xmm12,%xmm11,%xmm8 -vmovdqa %xmm8,384(%rsp) -vpaddq subc2(%rip),%xmm7,%xmm9 -vpsubq %xmm8,%xmm9,%xmm9 -vpunpckhqdq %xmm9,%xmm7,%xmm8 -vpunpcklqdq %xmm9,%xmm7,%xmm7 -vmovdqa %xmm7,400(%rsp) -vmovdqa %xmm8,416(%rsp) -vpsllq $1,%xmm8,%xmm8 -vmovdqa %xmm8,432(%rsp) -vpmuludq v121666_121666(%rip),%xmm9,%xmm9 -vmovdqa 160(%rsp),%xmm7 -vpunpcklqdq %xmm7,%xmm9,%xmm8 -vpunpckhqdq %xmm7,%xmm9,%xmm7 -vpunpckhqdq %xmm0,%xmm13,%xmm9 -vpunpcklqdq %xmm0,%xmm13,%xmm0 -vmovdqa %xmm0,160(%rsp) -vpaddq subc2(%rip),%xmm9,%xmm10 -vpsubq %xmm0,%xmm10,%xmm10 -vpunpckhqdq %xmm10,%xmm9,%xmm0 -vpunpcklqdq %xmm10,%xmm9,%xmm9 -vmovdqa %xmm9,448(%rsp) -vmovdqa %xmm0,464(%rsp) -vpsllq $1,%xmm0,%xmm0 -vmovdqa %xmm0,480(%rsp) -vpmuludq v121666_121666(%rip),%xmm10,%xmm10 -vmovdqa 208(%rsp),%xmm0 -vpunpcklqdq %xmm0,%xmm10,%xmm9 -vpunpckhqdq %xmm0,%xmm10,%xmm0 -vpsrlq $26,%xmm2,%xmm10 -vpaddq %xmm10,%xmm1,%xmm1 -vpand m26(%rip),%xmm2,%xmm2 -vpsrlq $25,%xmm5,%xmm10 -vpaddq %xmm10,%xmm8,%xmm8 -vpand m25(%rip),%xmm5,%xmm5 -vpsrlq $25,%xmm1,%xmm10 -vpaddq %xmm10,%xmm4,%xmm4 -vpand m25(%rip),%xmm1,%xmm1 -vpsrlq $26,%xmm8,%xmm10 -vpaddq %xmm10,%xmm7,%xmm7 -vpand m26(%rip),%xmm8,%xmm8 -vpsrlq $26,%xmm4,%xmm10 -vpaddq %xmm10,%xmm3,%xmm3 -vpand m26(%rip),%xmm4,%xmm4 -vpsrlq $25,%xmm7,%xmm10 -vpaddq %xmm10,%xmm9,%xmm9 -vpand m25(%rip),%xmm7,%xmm7 -vpsrlq $25,%xmm3,%xmm10 -vpaddq %xmm10,%xmm6,%xmm6 -vpand m25(%rip),%xmm3,%xmm3 -vpsrlq $26,%xmm9,%xmm10 -vpaddq %xmm10,%xmm0,%xmm0 -vpand m26(%rip),%xmm9,%xmm9 -vpsrlq $26,%xmm6,%xmm10 -vpaddq %xmm10,%xmm5,%xmm5 -vpand m26(%rip),%xmm6,%xmm6 -vpsrlq $25,%xmm0,%xmm10 -vpsllq $4,%xmm10,%xmm11 -vpaddq %xmm10,%xmm2,%xmm2 -vpsllq $1,%xmm10,%xmm10 -vpaddq %xmm10,%xmm11,%xmm11 -vpaddq %xmm11,%xmm2,%xmm2 -vpand m25(%rip),%xmm0,%xmm0 -vpsrlq $25,%xmm5,%xmm10 -vpaddq %xmm10,%xmm8,%xmm8 -vpand m25(%rip),%xmm5,%xmm5 -vpsrlq $26,%xmm2,%xmm10 -vpaddq %xmm10,%xmm1,%xmm1 -vpand m26(%rip),%xmm2,%xmm2 -vpunpckhqdq %xmm1,%xmm2,%xmm10 -vmovdqa %xmm10,80(%rsp) -vpunpcklqdq %xmm1,%xmm2,%xmm1 -vpunpckhqdq %xmm3,%xmm4,%xmm2 -vmovdqa %xmm2,96(%rsp) -vpunpcklqdq %xmm3,%xmm4,%xmm2 -vpunpckhqdq %xmm5,%xmm6,%xmm3 -vmovdqa %xmm3,112(%rsp) -vpunpcklqdq %xmm5,%xmm6,%xmm3 -vpunpckhqdq %xmm7,%xmm8,%xmm4 -vmovdqa %xmm4,128(%rsp) -vpunpcklqdq %xmm7,%xmm8,%xmm4 -vpunpckhqdq %xmm0,%xmm9,%xmm5 -vmovdqa %xmm5,144(%rsp) -vpunpcklqdq %xmm0,%xmm9,%xmm0 -vmovdqa 176(%rsp),%xmm5 -vpaddq %xmm5,%xmm1,%xmm1 -vpunpcklqdq %xmm1,%xmm5,%xmm6 -vpunpckhqdq %xmm1,%xmm5,%xmm1 -vpmuludq 224(%rsp),%xmm6,%xmm5 -vpmuludq 192(%rsp),%xmm1,%xmm7 -vpaddq %xmm7,%xmm5,%xmm5 -vpmuludq 272(%rsp),%xmm6,%xmm7 -vpmuludq 240(%rsp),%xmm1,%xmm8 -vpaddq %xmm8,%xmm7,%xmm7 -vpmuludq 288(%rsp),%xmm6,%xmm8 -vpmuludq 272(%rsp),%xmm1,%xmm9 -vpaddq %xmm9,%xmm8,%xmm8 -vpmuludq 336(%rsp),%xmm6,%xmm9 -vpmuludq 304(%rsp),%xmm1,%xmm10 -vpaddq %xmm10,%xmm9,%xmm9 -vpmuludq 352(%rsp),%xmm6,%xmm10 -vpmuludq 336(%rsp),%xmm1,%xmm11 -vpaddq %xmm11,%xmm10,%xmm10 -vpmuludq 400(%rsp),%xmm6,%xmm11 -vpmuludq 368(%rsp),%xmm1,%xmm12 -vpaddq %xmm12,%xmm11,%xmm11 -vpmuludq 416(%rsp),%xmm6,%xmm12 -vpmuludq 400(%rsp),%xmm1,%xmm13 -vpaddq %xmm13,%xmm12,%xmm12 -vpmuludq 448(%rsp),%xmm6,%xmm13 -vpmuludq 432(%rsp),%xmm1,%xmm14 -vpaddq %xmm14,%xmm13,%xmm13 -vpmuludq 464(%rsp),%xmm6,%xmm14 -vpmuludq 448(%rsp),%xmm1,%xmm15 -vpaddq %xmm15,%xmm14,%xmm14 -vpmuludq 192(%rsp),%xmm6,%xmm6 -vpmuludq v19_19(%rip),%xmm1,%xmm1 -vpmuludq 480(%rsp),%xmm1,%xmm1 -vpaddq %xmm1,%xmm6,%xmm6 -vmovdqa 256(%rsp),%xmm1 -vpaddq %xmm1,%xmm2,%xmm2 -vpunpcklqdq %xmm2,%xmm1,%xmm15 -vpunpckhqdq %xmm2,%xmm1,%xmm1 -vpmuludq 192(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm7,%xmm7 -vpmuludq 224(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpmuludq 272(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpmuludq 288(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq 336(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpmuludq 352(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq 400(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpmuludq 416(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm15,%xmm15 -vpmuludq 448(%rsp),%xmm15,%xmm2 -vpaddq %xmm2,%xmm6,%xmm6 -vpmuludq 464(%rsp),%xmm15,%xmm15 -vpaddq %xmm15,%xmm5,%xmm5 -vpmuludq 192(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpmuludq 240(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpmuludq 272(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq 304(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpmuludq 336(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq 368(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpmuludq 400(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm1,%xmm1 -vpmuludq 432(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm6,%xmm6 -vpmuludq 448(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpmuludq 480(%rsp),%xmm1,%xmm1 -vpaddq %xmm1,%xmm7,%xmm7 -vmovdqa 320(%rsp),%xmm1 -vpaddq %xmm1,%xmm3,%xmm3 -vpunpcklqdq %xmm3,%xmm1,%xmm2 -vpunpckhqdq %xmm3,%xmm1,%xmm1 -vpmuludq 192(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm9,%xmm9 -vpmuludq 224(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm10,%xmm10 -vpmuludq 272(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 288(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm12,%xmm12 -vpmuludq 336(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 352(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm2,%xmm2 -vpmuludq 400(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 416(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 448(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq 464(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpmuludq 192(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq 240(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm11,%xmm11 -vpmuludq 272(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq 304(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpmuludq 336(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm1,%xmm1 -vpmuludq 368(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm6,%xmm6 -vpmuludq 400(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpmuludq 432(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm7,%xmm7 -vpmuludq 448(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpmuludq 480(%rsp),%xmm1,%xmm1 -vpaddq %xmm1,%xmm9,%xmm9 -vmovdqa 384(%rsp),%xmm1 -vpaddq %xmm1,%xmm4,%xmm4 -vpunpcklqdq %xmm4,%xmm1,%xmm2 -vpunpckhqdq %xmm4,%xmm1,%xmm1 -vpmuludq 192(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm11,%xmm11 -vpmuludq 224(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm12,%xmm12 -vpmuludq 272(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm13,%xmm13 -vpmuludq 288(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm2,%xmm2 -vpmuludq 336(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm6,%xmm6 -vpmuludq 352(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm5,%xmm5 -vpmuludq 400(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm7,%xmm7 -vpmuludq 416(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm8,%xmm8 -vpmuludq 448(%rsp),%xmm2,%xmm3 -vpaddq %xmm3,%xmm9,%xmm9 -vpmuludq 464(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq 192(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq 240(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm13,%xmm13 -vpmuludq 272(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm1,%xmm1 -vpmuludq 304(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm6,%xmm6 -vpmuludq 336(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm5,%xmm5 -vpmuludq 368(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm7,%xmm7 -vpmuludq 400(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm8,%xmm8 -vpmuludq 432(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm9,%xmm9 -vpmuludq 448(%rsp),%xmm1,%xmm2 -vpaddq %xmm2,%xmm10,%xmm10 -vpmuludq 480(%rsp),%xmm1,%xmm1 -vpaddq %xmm1,%xmm11,%xmm11 -vmovdqa 160(%rsp),%xmm1 -vpaddq %xmm1,%xmm0,%xmm0 -vpunpcklqdq %xmm0,%xmm1,%xmm2 -vpunpckhqdq %xmm0,%xmm1,%xmm0 -vpmuludq 192(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm13,%xmm13 -vpmuludq 224(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm2,%xmm2 -vpmuludq 272(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm6,%xmm6 -vpmuludq 288(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm5,%xmm5 -vpmuludq 336(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm7,%xmm7 -vpmuludq 352(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm8,%xmm8 -vpmuludq 400(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm9,%xmm9 -vpmuludq 416(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm10,%xmm10 -vpmuludq 448(%rsp),%xmm2,%xmm1 -vpaddq %xmm1,%xmm11,%xmm11 -vpmuludq 464(%rsp),%xmm2,%xmm2 -vpaddq %xmm2,%xmm12,%xmm12 -vpmuludq 192(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm14,%xmm14 -vpmuludq v19_19(%rip),%xmm0,%xmm0 -vpmuludq 240(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm6,%xmm6 -vpmuludq 272(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm5,%xmm5 -vpmuludq 304(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm7,%xmm7 -vpmuludq 336(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm8,%xmm8 -vpmuludq 368(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm9,%xmm9 -vpmuludq 400(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm10,%xmm10 -vpmuludq 432(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm11,%xmm11 -vpmuludq 448(%rsp),%xmm0,%xmm1 -vpaddq %xmm1,%xmm12,%xmm12 -vpmuludq 480(%rsp),%xmm0,%xmm0 -vpaddq %xmm0,%xmm13,%xmm13 -vpsrlq $26,%xmm6,%xmm0 -vpaddq %xmm0,%xmm5,%xmm5 -vpand m26(%rip),%xmm6,%xmm6 -vpsrlq $25,%xmm10,%xmm0 -vpaddq %xmm0,%xmm11,%xmm11 -vpand m25(%rip),%xmm10,%xmm10 -vpsrlq $25,%xmm5,%xmm0 -vpaddq %xmm0,%xmm7,%xmm7 -vpand m25(%rip),%xmm5,%xmm5 -vpsrlq $26,%xmm11,%xmm0 -vpaddq %xmm0,%xmm12,%xmm12 -vpand m26(%rip),%xmm11,%xmm11 -vpsrlq $26,%xmm7,%xmm0 -vpaddq %xmm0,%xmm8,%xmm8 -vpand m26(%rip),%xmm7,%xmm7 -vpsrlq $25,%xmm12,%xmm0 -vpaddq %xmm0,%xmm13,%xmm13 -vpand m25(%rip),%xmm12,%xmm12 -vpsrlq $25,%xmm8,%xmm0 -vpaddq %xmm0,%xmm9,%xmm9 -vpand m25(%rip),%xmm8,%xmm8 -vpsrlq $26,%xmm13,%xmm0 -vpaddq %xmm0,%xmm14,%xmm14 -vpand m26(%rip),%xmm13,%xmm13 -vpsrlq $26,%xmm9,%xmm0 -vpaddq %xmm0,%xmm10,%xmm10 -vpand m26(%rip),%xmm9,%xmm9 -vpsrlq $25,%xmm14,%xmm0 -vpsllq $4,%xmm0,%xmm1 -vpaddq %xmm0,%xmm6,%xmm6 -vpsllq $1,%xmm0,%xmm0 -vpaddq %xmm0,%xmm1,%xmm1 -vpaddq %xmm1,%xmm6,%xmm6 -vpand m25(%rip),%xmm14,%xmm14 -vpsrlq $25,%xmm10,%xmm0 -vpaddq %xmm0,%xmm11,%xmm11 -vpand m25(%rip),%xmm10,%xmm10 -vpsrlq $26,%xmm6,%xmm0 -vpaddq %xmm0,%xmm5,%xmm5 -vpand m26(%rip),%xmm6,%xmm6 -vpunpckhqdq %xmm5,%xmm6,%xmm1 -vpunpcklqdq %xmm5,%xmm6,%xmm0 -vpunpckhqdq %xmm8,%xmm7,%xmm3 -vpunpcklqdq %xmm8,%xmm7,%xmm2 -vpunpckhqdq %xmm10,%xmm9,%xmm5 -vpunpcklqdq %xmm10,%xmm9,%xmm4 -vpunpckhqdq %xmm12,%xmm11,%xmm7 -vpunpcklqdq %xmm12,%xmm11,%xmm6 -vpunpckhqdq %xmm14,%xmm13,%xmm9 -vpunpcklqdq %xmm14,%xmm13,%xmm8 -cmp $0,%rdx -jne ._ladder_base_loop -vmovdqu %xmm1,80(%rdi) -vmovdqu %xmm0,0(%rdi) -vmovdqu %xmm3,96(%rdi) -vmovdqu %xmm2,16(%rdi) -vmovdqu %xmm5,112(%rdi) -vmovdqu %xmm4,32(%rdi) -vmovdqu %xmm7,128(%rdi) -vmovdqu %xmm6,48(%rdi) -vmovdqu %xmm9,144(%rdi) -vmovdqu %xmm8,64(%rdi) -movq 1536(%rsp),%r11 -movq 1544(%rsp),%r12 -movq 1552(%rsp),%r13 -add %r11,%rsp -ret -FN_END ladder_base_avx - - -.p2align 5 -GLOBAL_HIDDEN_FN_EXT fe51_pack_avx,2,0 -fe51_pack_avx_local: -mov %rsp,%r11 -and $31,%r11 -add $32,%r11 -sub %r11,%rsp -movq %r11,0(%rsp) -movq %r12,8(%rsp) -movq 0(%rsi),%rdx -movq 8(%rsi),%rcx -movq 16(%rsi),%r8 -movq 24(%rsi),%r9 -movq 32(%rsi),%rsi -movq REDMASK51(%rip),%rax -lea -18(%rax),%r10 -mov $3,%r11 - -.p2align 4 -._reduceloop: -mov %rdx,%r12 -shr $51,%r12 -and %rax,%rdx -add %r12,%rcx -mov %rcx,%r12 -shr $51,%r12 -and %rax,%rcx -add %r12,%r8 -mov %r8,%r12 -shr $51,%r12 -and %rax,%r8 -add %r12,%r9 -mov %r9,%r12 -shr $51,%r12 -and %rax,%r9 -add %r12,%rsi -mov %rsi,%r12 -shr $51,%r12 -and %rax,%rsi -imulq $19, %r12,%r12 -add %r12,%rdx -sub $1,%r11 -ja ._reduceloop - -mov $1,%r12 -cmp %r10,%rdx -cmovl %r11,%r12 -cmp %rax,%rcx -cmovne %r11,%r12 -cmp %rax,%r8 -cmovne %r11,%r12 -cmp %rax,%r9 -cmovne %r11,%r12 -cmp %rax,%rsi -cmovne %r11,%r12 -neg %r12 -and %r12,%rax -and %r12,%r10 -sub %r10,%rdx -sub %rax,%rcx -sub %rax,%r8 -sub %rax,%r9 -sub %rax,%rsi -mov %rdx,%rax -and $0xFF,%eax -movb %al,0(%rdi) -mov %rdx,%rax -shr $8,%rax -and $0xFF,%eax -movb %al,1(%rdi) -mov %rdx,%rax -shr $16,%rax -and $0xFF,%eax -movb %al,2(%rdi) -mov %rdx,%rax -shr $24,%rax -and $0xFF,%eax -movb %al,3(%rdi) -mov %rdx,%rax -shr $32,%rax -and $0xFF,%eax -movb %al,4(%rdi) -mov %rdx,%rax -shr $40,%rax -and $0xFF,%eax -movb %al,5(%rdi) -mov %rdx,%rdx -shr $48,%rdx -mov %rcx,%rax -shl $3,%rax -and $0xF8,%eax -xor %rdx,%rax -movb %al,6(%rdi) -mov %rcx,%rdx -shr $5,%rdx -and $0xFF,%edx -movb %dl,7(%rdi) -mov %rcx,%rdx -shr $13,%rdx -and $0xFF,%edx -movb %dl,8(%rdi) -mov %rcx,%rdx -shr $21,%rdx -and $0xFF,%edx -movb %dl,9(%rdi) -mov %rcx,%rdx -shr $29,%rdx -and $0xFF,%edx -movb %dl,10(%rdi) -mov %rcx,%rdx -shr $37,%rdx -and $0xFF,%edx -movb %dl,11(%rdi) -mov %rcx,%rdx -shr $45,%rdx -mov %r8,%rcx -shl $6,%rcx -and $0xC0,%ecx -xor %rdx,%rcx -movb %cl,12(%rdi) -mov %r8,%rdx -shr $2,%rdx -and $0xFF,%edx -movb %dl,13(%rdi) -mov %r8,%rdx -shr $10,%rdx -and $0xFF,%edx -movb %dl,14(%rdi) -mov %r8,%rdx -shr $18,%rdx -and $0xFF,%edx -movb %dl,15(%rdi) -mov %r8,%rdx -shr $26,%rdx -and $0xFF,%edx -movb %dl,16(%rdi) -mov %r8,%rdx -shr $34,%rdx -and $0xFF,%edx -movb %dl,17(%rdi) -mov %r8,%rdx -shr $42,%rdx -movb %dl,18(%rdi) -mov %r8,%rdx -shr $50,%rdx -mov %r9,%rcx -shl $1,%rcx -and $0xFE,%ecx -xor %rdx,%rcx -movb %cl,19(%rdi) -mov %r9,%rdx -shr $7,%rdx -and $0xFF,%edx -movb %dl,20(%rdi) -mov %r9,%rdx -shr $15,%rdx -and $0xFF,%edx -movb %dl,21(%rdi) -mov %r9,%rdx -shr $23,%rdx -and $0xFF,%edx -movb %dl,22(%rdi) -mov %r9,%rdx -shr $31,%rdx -and $0xFF,%edx -movb %dl,23(%rdi) -mov %r9,%rdx -shr $39,%rdx -and $0xFF,%edx -movb %dl,24(%rdi) -mov %r9,%rdx -shr $47,%rdx -mov %rsi,%rcx -shl $4,%rcx -and $0xF0,%ecx -xor %rdx,%rcx -movb %cl,25(%rdi) -mov %rsi,%rdx -shr $4,%rdx -and $0xFF,%edx -movb %dl,26(%rdi) -mov %rsi,%rdx -shr $12,%rdx -and $0xFF,%edx -movb %dl,27(%rdi) -mov %rsi,%rdx -shr $20,%rdx -and $0xFF,%edx -movb %dl,28(%rdi) -mov %rsi,%rdx -shr $28,%rdx -and $0xFF,%edx -movb %dl,29(%rdi) -mov %rsi,%rdx -shr $36,%rdx -and $0xFF,%edx -movb %dl,30(%rdi) -mov %rsi,%rsi -shr $44,%rsi -movb %sil,31(%rdi) -movq 0(%rsp),%r11 -movq 8(%rsp),%r12 -add %r11,%rsp -ret -FN_END fe51_pack_avx - -.p2align 5 -GLOBAL_HIDDEN_FN_EXT fe51_mul_avx,3,0 -fe51_mul_avx_local: -mov %rsp,%r11 -and $31,%r11 -add $96,%r11 -sub %r11,%rsp -movq %r11,0(%rsp) -movq %r12,8(%rsp) -movq %r13,16(%rsp) -movq %r14,24(%rsp) -movq %r15,32(%rsp) -movq %rbx,40(%rsp) -movq %rbp,48(%rsp) -movq %rdi,56(%rsp) -mov %rdx,%rcx -movq 24(%rsi),%rdx -imulq $19,%rdx,%rax -movq %rax,64(%rsp) -mulq 16(%rcx) -mov %rax,%r8 -mov %rdx,%r9 -movq 32(%rsi),%rdx -imulq $19,%rdx,%rax -movq %rax,72(%rsp) -mulq 8(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsi),%rax -mulq 0(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsi),%rax -mulq 8(%rcx) -mov %rax,%r10 -mov %rdx,%r11 -movq 0(%rsi),%rax -mulq 16(%rcx) -mov %rax,%r12 -mov %rdx,%r13 -movq 0(%rsi),%rax -mulq 24(%rcx) -mov %rax,%r14 -mov %rdx,%r15 -movq 0(%rsi),%rax -mulq 32(%rcx) -mov %rax,%rbx -mov %rdx,%rbp -movq 8(%rsi),%rax -mulq 0(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsi),%rax -mulq 8(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq 8(%rsi),%rax -mulq 16(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsi),%rax -mulq 24(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq 8(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 16(%rsi),%rax -mulq 0(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq 16(%rsi),%rax -mulq 8(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq 16(%rsi),%rax -mulq 16(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq 16(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 24(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 16(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq 24(%rsi),%rax -mulq 0(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq 24(%rsi),%rax -mulq 8(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq 64(%rsp),%rax -mulq 24(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq 64(%rsp),%rax -mulq 32(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq 32(%rsi),%rax -mulq 0(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq 72(%rsp),%rax -mulq 16(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq 72(%rsp),%rax -mulq 24(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq 72(%rsp),%rax -mulq 32(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq REDMASK51(%rip),%rsi -shld $13,%r8,%r9 -and %rsi,%r8 -shld $13,%r10,%r11 -and %rsi,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rsi,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rsi,%r14 -add %r13,%r14 -shld $13,%rbx,%rbp -and %rsi,%rbx -add %r15,%rbx -imulq $19,%rbp,%rdx -add %rdx,%r8 -mov %r8,%rdx -shr $51,%rdx -add %r10,%rdx -mov %rdx,%rcx -shr $51,%rdx -and %rsi,%r8 -add %r12,%rdx -mov %rdx,%r9 -shr $51,%rdx -and %rsi,%rcx -add %r14,%rdx -mov %rdx,%rax -shr $51,%rdx -and %rsi,%r9 -add %rbx,%rdx -mov %rdx,%r10 -shr $51,%rdx -and %rsi,%rax -imulq $19,%rdx,%rdx -add %rdx,%r8 -and %rsi,%r10 -movq %r8,0(%rdi) -movq %rcx,8(%rdi) -movq %r9,16(%rdi) -movq %rax,24(%rdi) -movq %r10,32(%rdi) -movq 0(%rsp),%r11 -movq 8(%rsp),%r12 -movq 16(%rsp),%r13 -movq 24(%rsp),%r14 -movq 32(%rsp),%r15 -movq 40(%rsp),%rbx -movq 48(%rsp),%rbp -add %r11,%rsp -mov %rdi,%rax -mov %rsi,%rdx -ret -FN_END fe51_mul_avx - -.p2align 5 -GLOBAL_HIDDEN_FN_EXT fe51_nsquare_avx,4,0 -fe51_nsquare_avx_local: -mov %rsp,%r11 -and $31,%r11 -add $64,%r11 -sub %r11,%rsp -movq %r11,0(%rsp) -movq %r12,8(%rsp) -movq %r13,16(%rsp) -movq %r14,24(%rsp) -movq %r15,32(%rsp) -movq %rbx,40(%rsp) -movq %rbp,48(%rsp) -movq 0(%rsi),%rcx -movq 8(%rsi),%r8 -movq 16(%rsi),%r9 -movq 24(%rsi),%rax -movq 32(%rsi),%rsi -movq %r9,16(%rdi) -movq %rax,24(%rdi) -movq %rsi,32(%rdi) -mov %rdx,%rsi - -.p2align 4 -._loop: -sub $1,%rsi -mov %rcx,%rax -mul %rcx -add %rcx,%rcx -mov %rax,%r9 -mov %rdx,%r10 -mov %rcx,%rax -mul %r8 -mov %rax,%r11 -mov %rdx,%r12 -mov %rcx,%rax -mulq 16(%rdi) -mov %rax,%r13 -mov %rdx,%r14 -mov %rcx,%rax -mulq 24(%rdi) -mov %rax,%r15 -mov %rdx,%rbx -mov %rcx,%rax -mulq 32(%rdi) -mov %rax,%rcx -mov %rdx,%rbp -mov %r8,%rax -mul %r8 -add %r8,%r8 -add %rax,%r13 -adc %rdx,%r14 -mov %r8,%rax -mulq 16(%rdi) -add %rax,%r15 -adc %rdx,%rbx -mov %r8,%rax -imulq $19, %r8,%r8 -mulq 24(%rdi) -add %rax,%rcx -adc %rdx,%rbp -mov %r8,%rax -mulq 32(%rdi) -add %rax,%r9 -adc %rdx,%r10 -movq 16(%rdi),%rax -mulq 16(%rdi) -add %rax,%rcx -adc %rdx,%rbp -shld $13,%rcx,%rbp -movq 16(%rdi),%rax -imulq $38, %rax,%rax -mulq 24(%rdi) -add %rax,%r9 -adc %rdx,%r10 -shld $13,%r9,%r10 -movq 16(%rdi),%rax -imulq $38, %rax,%rax -mulq 32(%rdi) -add %rax,%r11 -adc %rdx,%r12 -movq 24(%rdi),%rax -imulq $19, %rax,%rax -mulq 24(%rdi) -add %rax,%r11 -adc %rdx,%r12 -shld $13,%r11,%r12 -movq 24(%rdi),%rax -imulq $38, %rax,%rax -mulq 32(%rdi) -add %rax,%r13 -adc %rdx,%r14 -shld $13,%r13,%r14 -movq 32(%rdi),%rax -imulq $19, %rax,%rax -mulq 32(%rdi) -add %rax,%r15 -adc %rdx,%rbx -shld $13,%r15,%rbx -movq REDMASK51(%rip),%rdx -and %rdx,%rcx -add %rbx,%rcx -and %rdx,%r9 -and %rdx,%r11 -add %r10,%r11 -and %rdx,%r13 -add %r12,%r13 -and %rdx,%r15 -add %r14,%r15 -imulq $19, %rbp,%rbp -lea (%r9,%rbp),%r9 -mov %r9,%rax -shr $51,%r9 -add %r11,%r9 -and %rdx,%rax -mov %r9,%r8 -shr $51,%r9 -add %r13,%r9 -and %rdx,%r8 -mov %r9,%r10 -shr $51,%r9 -add %r15,%r9 -and %rdx,%r10 -movq %r10,16(%rdi) -mov %r9,%r10 -shr $51,%r9 -add %rcx,%r9 -and %rdx,%r10 -movq %r10,24(%rdi) -mov %r9,%r10 -shr $51,%r9 -imulq $19, %r9,%r9 -lea (%rax,%r9),%rcx -and %rdx,%r10 -movq %r10,32(%rdi) -cmp $0,%rsi -jne ._loop - -movq %rcx,0(%rdi) -movq %r8,8(%rdi) -movq 0(%rsp),%r11 -movq 8(%rsp),%r12 -movq 16(%rsp),%r13 -movq 24(%rsp),%r14 -movq 32(%rsp),%r15 -movq 40(%rsp),%rbx -movq 48(%rsp),%rbp -add %r11,%rsp -ret -FN_END fe51_nsquare_avx diff --git a/src/libcryptobox/curve25519/avx.c b/src/libcryptobox/curve25519/avx.c deleted file mode 100644 index 27cbf4601..000000000 --- a/src/libcryptobox/curve25519/avx.c +++ /dev/null @@ -1,246 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "config.h" -#include "cryptobox.h" -#include "curve25519.h" -#include "platform_config.h" - -typedef struct { - guint64 v[5]; -} fe51; -typedef guint64 fe[10]; - -extern void ladder_avx (fe *var, const guchar *p); -extern void ladder_base_avx (fe *var, const guchar *p); -extern void fe51_mul_avx (fe51 *a, const fe51 *b, const fe51 *c); -extern void fe51_pack_avx (guchar *out, const fe51 *var); -extern void fe51_nsquare_avx (fe51 *a, const fe51 *b, gint n); - - -static guint64 load_3 (const unsigned char *in) -{ - guint64 result; - result = (guint64) in[0]; - result |= ((guint64) in[1]) << 8; - result |= ((guint64) in[2]) << 16; - return result; -} - -static guint64 load_4 (const unsigned char *in) -{ - guint64 result; - result = (guint64) in[0]; - result |= ((guint64) in[1]) << 8; - result |= ((guint64) in[2]) << 16; - result |= ((guint64) in[3]) << 24; - return result; -} - -static void -fe_frombytes (fe h, const unsigned char *s) -{ - guint64 h0 = load_4 (s); - guint64 h1 = load_3 (s + 4) << 6; - guint64 h2 = load_3 (s + 7) << 5; - guint64 h3 = load_3 (s + 10) << 3; - guint64 h4 = load_3 (s + 13) << 2; - guint64 h5 = load_4 (s + 16); - guint64 h6 = load_3 (s + 20) << 7; - guint64 h7 = load_3 (s + 23) << 5; - guint64 h8 = load_3 (s + 26) << 4; - guint64 h9 = (load_3(s + 29) & 8388607) << 2; - guint64 carry0; - guint64 carry1; - guint64 carry2; - guint64 carry3; - guint64 carry4; - guint64 carry5; - guint64 carry6; - guint64 carry7; - guint64 carry8; - guint64 carry9; - - carry9 = h9 >> 25; - h0 += carry9 * 19; - h9 &= 0x1FFFFFF; - carry1 = h1 >> 25; - h2 += carry1; - h1 &= 0x1FFFFFF; - carry3 = h3 >> 25; - h4 += carry3; - h3 &= 0x1FFFFFF; - carry5 = h5 >> 25; - h6 += carry5; - h5 &= 0x1FFFFFF; - carry7 = h7 >> 25; - h8 += carry7; - h7 &= 0x1FFFFFF; - - carry0 = h0 >> 26; - h1 += carry0; - h0 &= 0x3FFFFFF; - carry2 = h2 >> 26; - h3 += carry2; - h2 &= 0x3FFFFFF; - carry4 = h4 >> 26; - h5 += carry4; - h4 &= 0x3FFFFFF; - carry6 = h6 >> 26; - h7 += carry6; - h6 &= 0x3FFFFFF; - carry8 = h8 >> 26; - h9 += carry8; - h8 &= 0x3FFFFFF; - - h[0] = h0; - h[1] = h1; - h[2] = h2; - h[3] = h3; - h[4] = h4; - h[5] = h5; - h[6] = h6; - h[7] = h7; - h[8] = h8; - h[9] = h9; -} - -#define fe51_square(x, y) fe51_nsquare_avx(x, y, 1) - -void fe51_invert (fe51 *r, const fe51 *x) -{ - fe51 z2; - fe51 z9; - fe51 z11; - fe51 z2_5_0; - fe51 z2_10_0; - fe51 z2_20_0; - fe51 z2_50_0; - fe51 z2_100_0; - fe51 t; - - /* 2 */ fe51_square (&z2, x); - /* 4 */ fe51_square (&t, &z2); - /* 8 */ fe51_square (&t, &t); - /* 9 */ fe51_mul_avx (&z9, &t, x); - /* 11 */ fe51_mul_avx (&z11, &z9, &z2); - /* 22 */ fe51_square (&t, &z11); - /* 2^5 - 2^0 = 31 */ fe51_mul_avx (&z2_5_0, &t, &z9); - - /* 2^10 - 2^5 */ fe51_nsquare_avx (&t, &z2_5_0, 5); - /* 2^10 - 2^0 */ fe51_mul_avx (&z2_10_0, &t, &z2_5_0); - - /* 2^20 - 2^10 */ fe51_nsquare_avx (&t, &z2_10_0, 10); - /* 2^20 - 2^0 */ fe51_mul_avx (&z2_20_0, &t, &z2_10_0); - - /* 2^40 - 2^20 */ fe51_nsquare_avx (&t, &z2_20_0, 20); - /* 2^40 - 2^0 */ fe51_mul_avx (&t, &t, &z2_20_0); - - /* 2^50 - 2^10 */ fe51_nsquare_avx (&t, &t, 10); - /* 2^50 - 2^0 */ fe51_mul_avx (&z2_50_0, &t, &z2_10_0); - - /* 2^100 - 2^50 */ fe51_nsquare_avx (&t, &z2_50_0, 50); - /* 2^100 - 2^0 */ fe51_mul_avx (&z2_100_0, &t, &z2_50_0); - - /* 2^200 - 2^100 */ fe51_nsquare_avx (&t, &z2_100_0, 100); - /* 2^200 - 2^0 */ fe51_mul_avx (&t, &t, &z2_100_0); - - /* 2^250 - 2^50 */ fe51_nsquare_avx (&t, &t, 50); - /* 2^250 - 2^0 */ fe51_mul_avx (&t, &t, &z2_50_0); - - /* 2^255 - 2^5 */ fe51_nsquare_avx (&t, &t, 5); - /* 2^255 - 21 */ fe51_mul_avx (r, &t, &z11); -} - -#define x1 var[0] -#define x2 var[1] -#define z2 var[2] - -void -scalarmult_avx (unsigned char *q, - const unsigned char *n, - const unsigned char *p) -{ - fe var[3]; - fe51 x_51; - fe51 z_51; - unsigned char e[32]; - - memcpy (e, n, 32); - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; - - fe_frombytes (x1, p); - - ladder_avx (var, e); - - z_51.v[0] = (z2[1] << 26) + z2[0]; - z_51.v[1] = (z2[3] << 26) + z2[2]; - z_51.v[2] = (z2[5] << 26) + z2[4]; - z_51.v[3] = (z2[7] << 26) + z2[6]; - z_51.v[4] = (z2[9] << 26) + z2[8]; - - x_51.v[0] = (x2[1] << 26) + x2[0]; - x_51.v[1] = (x2[3] << 26) + x2[2]; - x_51.v[2] = (x2[5] << 26) + x2[4]; - x_51.v[3] = (x2[7] << 26) + x2[6]; - x_51.v[4] = (x2[9] << 26) + x2[8]; - - fe51_invert (&z_51, &z_51); - fe51_mul_avx (&x_51, &x_51, &z_51); - fe51_pack_avx (q, &x_51); -} - -#undef x2 -#undef z2 -#define x2 var[0] -#define z2 var[1] - -int -scalarmult_base_avx (unsigned char *q, const unsigned char *n) -{ - unsigned char e[32]; - - fe var[3]; - - fe51 x_51; - fe51 z_51; - - memcpy (e, n, 32); - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; - - ladder_base_avx (var, e); - - z_51.v[0] = (z2[1] << 26) + z2[0]; - z_51.v[1] = (z2[3] << 26) + z2[2]; - z_51.v[2] = (z2[5] << 26) + z2[4]; - z_51.v[3] = (z2[7] << 26) + z2[6]; - z_51.v[4] = (z2[9] << 26) + z2[8]; - - x_51.v[0] = (x2[1] << 26) + x2[0]; - x_51.v[1] = (x2[3] << 26) + x2[2]; - x_51.v[2] = (x2[5] << 26) + x2[4]; - x_51.v[3] = (x2[7] << 26) + x2[6]; - x_51.v[4] = (x2[9] << 26) + x2[8]; - - fe51_invert (&z_51, &z_51); - fe51_mul_avx (&x_51, &x_51, &z_51); - fe51_pack_avx (q, &x_51); - - return 0; -} diff --git a/src/libcryptobox/curve25519/base_constants.h b/src/libcryptobox/curve25519/base_constants.h deleted file mode 100644 index 48adfcf03..000000000 --- a/src/libcryptobox/curve25519/base_constants.h +++ /dev/null @@ -1,1346 +0,0 @@ -static const ge_precomp event_loop[32][8] = { -{ - { - { 25967493,-14356035,29566456,3660896,-12694345,4014787,27544626,-11754271,-6079156,2047605 }, - { -12545711,934262,-2722910,3049990,-727428,9406986,12720692,5043384,19500929,-15469378 }, - { -8738181,4489570,9688441,-14785194,10184609,-12363380,29287919,11864899,-24514362,-4438546 }, - }, - { - { -12815894,-12976347,-21581243,11784320,-25355658,-2750717,-11717903,-3814571,-358445,-10211303 }, - { -21703237,6903825,27185491,6451973,-29577724,-9554005,-15616551,11189268,-26829678,-5319081 }, - { 26966642,11152617,32442495,15396054,14353839,-12752335,-3128826,-9541118,-15472047,-4166697 }, - }, - { - { 15636291,-9688557,24204773,-7912398,616977,-16685262,27787600,-14772189,28944400,-1550024 }, - { 16568933,4717097,-11556148,-1102322,15682896,-11807043,16354577,-11775962,7689662,11199574 }, - { 30464156,-5976125,-11779434,-15670865,23220365,15915852,7512774,10017326,-17749093,-9920357 }, - }, - { - { -17036878,13921892,10945806,-6033431,27105052,-16084379,-28926210,15006023,3284568,-6276540 }, - { 23599295,-8306047,-11193664,-7687416,13236774,10506355,7464579,9656445,13059162,10374397 }, - { 7798556,16710257,3033922,2874086,28997861,2835604,32406664,-3839045,-641708,-101325 }, - }, - { - { 10861363,11473154,27284546,1981175,-30064349,12577861,32867885,14515107,-15438304,10819380 }, - { 4708026,6336745,20377586,9066809,-11272109,6594696,-25653668,12483688,-12668491,5581306 }, - { 19563160,16186464,-29386857,4097519,10237984,-4348115,28542350,13850243,-23678021,-15815942 }, - }, - { - { -15371964,-12862754,32573250,4720197,-26436522,5875511,-19188627,-15224819,-9818940,-12085777 }, - { -8549212,109983,15149363,2178705,22900618,4543417,3044240,-15689887,1762328,14866737 }, - { -18199695,-15951423,-10473290,1707278,-17185920,3916101,-28236412,3959421,27914454,4383652 }, - }, - { - { 5153746,9909285,1723747,-2777874,30523605,5516873,19480852,5230134,-23952439,-15175766 }, - { -30269007,-3463509,7665486,10083793,28475525,1649722,20654025,16520125,30598449,7715701 }, - { 28881845,14381568,9657904,3680757,-20181635,7843316,-31400660,1370708,29794553,-1409300 }, - }, - { - { 14499471,-2729599,-33191113,-4254652,28494862,14271267,30290735,10876454,-33154098,2381726 }, - { -7195431,-2655363,-14730155,462251,-27724326,3941372,-6236617,3696005,-32300832,15351955 }, - { 27431194,8222322,16448760,-3907995,-18707002,11938355,-32961401,-2970515,29551813,10109425 }, - }, -}, -{ - { - { -13657040,-13155431,-31283750,11777098,21447386,6519384,-2378284,-1627556,10092783,-4764171 }, - { 27939166,14210322,4677035,16277044,-22964462,-12398139,-32508754,12005538,-17810127,12803510 }, - { 17228999,-15661624,-1233527,300140,-1224870,-11714777,30364213,-9038194,18016357,4397660 }, - }, - { - { -10958843,-7690207,4776341,-14954238,27850028,-15602212,-26619106,14544525,-17477504,982639 }, - { 29253598,15796703,-2863982,-9908884,10057023,3163536,7332899,-4120128,-21047696,9934963 }, - { 5793303,16271923,-24131614,-10116404,29188560,1206517,-14747930,4559895,-30123922,-10897950 }, - }, - { - { -27643952,-11493006,16282657,-11036493,28414021,-15012264,24191034,4541697,-13338309,5500568 }, - { 12650548,-1497113,9052871,11355358,-17680037,-8400164,-17430592,12264343,10874051,13524335 }, - { 25556948,-3045990,714651,2510400,23394682,-10415330,33119038,5080568,-22528059,5376628 }, - }, - { - { -26088264,-4011052,-17013699,-3537628,-6726793,1920897,-22321305,-9447443,4535768,1569007 }, - { -2255422,14606630,-21692440,-8039818,28430649,8775819,-30494562,3044290,31848280,12543772 }, - { -22028579,2943893,-31857513,6777306,13784462,-4292203,-27377195,-2062731,7718482,14474653 }, - }, - { - { 2385315,2454213,-22631320,46603,-4437935,-15680415,656965,-7236665,24316168,-5253567 }, - { 13741529,10911568,-33233417,-8603737,-20177830,-1033297,33040651,-13424532,-20729456,8321686 }, - { 21060490,-2212744,15712757,-4336099,1639040,10656336,23845965,-11874838,-9984458,608372 }, - }, - { - { -13672732,-15087586,-10889693,-7557059,-6036909,11305547,1123968,-6780577,27229399,23887 }, - { -23244140,-294205,-11744728,14712571,-29465699,-2029617,12797024,-6440308,-1633405,16678954 }, - { -29500620,4770662,-16054387,14001338,7830047,9564805,-1508144,-4795045,-17169265,4904953 }, - }, - { - { 24059557,14617003,19037157,-15039908,19766093,-14906429,5169211,16191880,2128236,-4326833 }, - { -16981152,4124966,-8540610,-10653797,30336522,-14105247,-29806336,916033,-6882542,-2986532 }, - { -22630907,12419372,-7134229,-7473371,-16478904,16739175,285431,2763829,15736322,4143876 }, - }, - { - { 2379352,11839345,-4110402,-5988665,11274298,794957,212801,-14594663,23527084,-16458268 }, - { 33431127,-11130478,-17838966,-15626900,8909499,8376530,-32625340,4087881,-15188911,-14416214 }, - { 1767683,7197987,-13205226,-2022635,-13091350,448826,5799055,4357868,-4774191,-16323038 }, - }, -}, -{ - { - { 6721966,13833823,-23523388,-1551314,26354293,-11863321,23365147,-3949732,7390890,2759800 }, - { 4409041,2052381,23373853,10530217,7676779,-12885954,21302353,-4264057,1244380,-12919645 }, - { -4421239,7169619,4982368,-2957590,30256825,-2777540,14086413,9208236,15886429,16489664 }, - }, - { - { 1996075,10375649,14346367,13311202,-6874135,-16438411,-13693198,398369,-30606455,-712933 }, - { -25307465,9795880,-2777414,14878809,-33531835,14780363,13348553,12076947,-30836462,5113182 }, - { -17770784,11797796,31950843,13929123,-25888302,12288344,-30341101,-7336386,13847711,5387222 }, - }, - { - { -18582163,-3416217,17824843,-2340966,22744343,-10442611,8763061,3617786,-19600662,10370991 }, - { 20246567,-14369378,22358229,-543712,18507283,-10413996,14554437,-8746092,32232924,16763880 }, - { 9648505,10094563,26416693,14745928,-30374318,-6472621,11094161,15689506,3140038,-16510092 }, - }, - { - { -16160072,5472695,31895588,4744994,8823515,10365685,-27224800,9448613,-28774454,366295 }, - { 19153450,11523972,-11096490,-6503142,-24647631,5420647,28344573,8041113,719605,11671788 }, - { 8678025,2694440,-6808014,2517372,4964326,11152271,-15432916,-15266516,27000813,-10195553 }, - }, - { - { -15157904,7134312,8639287,-2814877,-7235688,10421742,564065,5336097,6750977,-14521026 }, - { 11836410,-3979488,26297894,16080799,23455045,15735944,1695823,-8819122,8169720,16220347 }, - { -18115838,8653647,17578566,-6092619,-8025777,-16012763,-11144307,-2627664,-5990708,-14166033 }, - }, - { - { -23308498,-10968312,15213228,-10081214,-30853605,-11050004,27884329,2847284,2655861,1738395 }, - { -27537433,-14253021,-25336301,-8002780,-9370762,8129821,21651608,-3239336,-19087449,-11005278 }, - { 1533110,3437855,23735889,459276,29970501,11335377,26030092,5821408,10478196,8544890 }, - }, - { - { 32173121,-16129311,24896207,3921497,22579056,-3410854,19270449,12217473,17789017,-3395995 }, - { -30552961,-2228401,-15578829,-10147201,13243889,517024,15479401,-3853233,30460520,1052596 }, - { -11614875,13323618,32618793,8175907,-15230173,12596687,27491595,-4612359,3179268,-9478891 }, - }, - { - { 31947069,-14366651,-4640583,-15339921,-15125977,-6039709,-14756777,-16411740,19072640,-9511060 }, - { 11685058,11822410,3158003,-13952594,33402194,-4165066,5977896,-5215017,473099,5040608 }, - { -20290863,8198642,-27410132,11602123,1290375,-2799760,28326862,1721092,-19558642,-3131606 }, - }, -}, -{ - { - { 7881532,10687937,7578723,7738378,-18951012,-2553952,21820786,8076149,-27868496,11538389 }, - { -19935666,3899861,18283497,-6801568,-15728660,-11249211,8754525,7446702,-5676054,5797016 }, - { -11295600,-3793569,-15782110,-7964573,12708869,-8456199,2014099,-9050574,-2369172,-5877341 }, - }, - { - { -22472376,-11568741,-27682020,1146375,18956691,16640559,1192730,-3714199,15123619,10811505 }, - { 14352098,-3419715,-18942044,10822655,32750596,4699007,-70363,15776356,-28886779,-11974553 }, - { -28241164,-8072475,-4978962,-5315317,29416931,1847569,-20654173,-16484855,4714547,-9600655 }, - }, - { - { 15200332,8368572,19679101,15970074,-31872674,1959451,24611599,-4543832,-11745876,12340220 }, - { 12876937,-10480056,33134381,6590940,-6307776,14872440,9613953,8241152,15370987,9608631 }, - { -4143277,-12014408,8446281,-391603,4407738,13629032,-7724868,15866074,-28210621,-8814099 }, - }, - { - { 26660628,-15677655,8393734,358047,-7401291,992988,-23904233,858697,20571223,8420556 }, - { 14620715,13067227,-15447274,8264467,14106269,15080814,33531827,12516406,-21574435,-12476749 }, - { 236881,10476226,57258,-14677024,6472998,2466984,17258519,7256740,8791136,15069930 }, - }, - { - { 1276410,-9371918,22949635,-16322807,-23493039,-5702186,14711875,4874229,-30663140,-2331391 }, - { 5855666,4990204,-13711848,7294284,-7804282,1924647,-1423175,-7912378,-33069337,9234253 }, - { 20590503,-9018988,31529744,-7352666,-2706834,10650548,31559055,-11609587,18979186,13396066 }, - }, - { - { 24474287,4968103,22267082,4407354,24063882,-8325180,-18816887,13594782,33514650,7021958 }, - { -11566906,-6565505,-21365085,15928892,-26158305,4315421,-25948728,-3916677,-21480480,12868082 }, - { -28635013,13504661,19988037,-2132761,21078225,6443208,-21446107,2244500,-12455797,-8089383 }, - }, - { - { -30595528,13793479,-5852820,319136,-25723172,-6263899,33086546,8957937,-15233648,5540521 }, - { -11630176,-11503902,-8119500,-7643073,2620056,1022908,-23710744,-1568984,-16128528,-14962807 }, - { 23152971,775386,27395463,14006635,-9701118,4649512,1689819,892185,-11513277,-15205948 }, - }, - { - { 9770129,9586738,26496094,4324120,1556511,-3550024,27453819,4763127,-19179614,5867134 }, - { -32765025,1927590,31726409,-4753295,23962434,-16019500,27846559,5931263,-29749703,-16108455 }, - { 27461885,-2977536,22380810,1815854,-23033753,-3031938,7283490,-15148073,-19526700,7734629 }, - }, -}, -{ - { - { -8010264,-9590817,-11120403,6196038,29344158,-13430885,7585295,-3176626,18549497,15302069 }, - { -32658337,-6171222,-7672793,-11051681,6258878,13504381,10458790,-6418461,-8872242,8424746 }, - { 24687205,8613276,-30667046,-3233545,1863892,-1830544,19206234,7134917,-11284482,-828919 }, - }, - { - { 11334899,-9218022,8025293,12707519,17523892,-10476071,10243738,-14685461,-5066034,16498837 }, - { 8911542,6887158,-9584260,-6958590,11145641,-9543680,17303925,-14124238,6536641,10543906 }, - { -28946384,15479763,-17466835,568876,-1497683,11223454,-2669190,-16625574,-27235709,8876771 }, - }, - { - { -25742899,-12566864,-15649966,-846607,-33026686,-796288,-33481822,15824474,-604426,-9039817 }, - { 10330056,70051,7957388,-9002667,9764902,15609756,27698697,-4890037,1657394,3084098 }, - { 10477963,-7470260,12119566,-13250805,29016247,-5365589,31280319,14396151,-30233575,15272409 }, - }, - { - { -12288309,3169463,28813183,16658753,25116432,-5630466,-25173957,-12636138,-25014757,1950504 }, - { -26180358,9489187,11053416,-14746161,-31053720,5825630,-8384306,-8767532,15341279,8373727 }, - { 28685821,7759505,-14378516,-12002860,-31971820,4079242,298136,-10232602,-2878207,15190420 }, - }, - { - { -32932876,13806336,-14337485,-15794431,-24004620,10940928,8669718,2742393,-26033313,-6875003 }, - { -1580388,-11729417,-25979658,-11445023,-17411874,-10912854,9291594,-16247779,-12154742,6048605 }, - { -30305315,14843444,1539301,11864366,20201677,1900163,13934231,5128323,11213262,9168384 }, - }, - { - { -26280513,11007847,19408960,-940758,-18592965,-4328580,-5088060,-11105150,20470157,-16398701 }, - { -23136053,9282192,14855179,-15390078,-7362815,-14408560,-22783952,14461608,14042978,5230683 }, - { 29969567,-2741594,-16711867,-8552442,9175486,-2468974,21556951,3506042,-5933891,-12449708 }, - }, - { - { -3144746,8744661,19704003,4581278,-20430686,6830683,-21284170,8971513,-28539189,15326563 }, - { -19464629,10110288,-17262528,-3503892,-23500387,1355669,-15523050,15300988,-20514118,9168260 }, - { -5353335,4488613,-23803248,16314347,7780487,-15638939,-28948358,9601605,33087103,-9011387 }, - }, - { - { -19443170,-15512900,-20797467,-12445323,-29824447,10229461,-27444329,-15000531,-5996870,15664672 }, - { 23294591,-16632613,-22650781,-8470978,27844204,11461195,13099750,-2460356,18151676,13417686 }, - { -24722913,-4176517,-31150679,5988919,-26858785,6685065,1661597,-12551441,15271676,-15452665 }, - }, -}, -{ - { - { 11433042,-13228665,8239631,-5279517,-1985436,-725718,-18698764,2167544,-6921301,-13440182 }, - { -31436171,15575146,30436815,12192228,-22463353,9395379,-9917708,-8638997,12215110,12028277 }, - { 14098400,6555944,23007258,5757252,-15427832,-12950502,30123440,4617780,-16900089,-655628 }, - }, - { - { -4026201,-15240835,11893168,13718664,-14809462,1847385,-15819999,10154009,23973261,-12684474 }, - { -26531820,-3695990,-1908898,2534301,-31870557,-16550355,18341390,-11419951,32013174,-10103539 }, - { -25479301,10876443,-11771086,-14625140,-12369567,1838104,21911214,6354752,4425632,-837822 }, - }, - { - { -10433389,-14612966,22229858,-3091047,-13191166,776729,-17415375,-12020462,4725005,14044970 }, - { 19268650,-7304421,1555349,8692754,-21474059,-9910664,6347390,-1411784,-19522291,-16109756 }, - { -24864089,12986008,-10898878,-5558584,-11312371,-148526,19541418,8180106,9282262,10282508 }, - }, - { - { -26205082,4428547,-8661196,-13194263,4098402,-14165257,15522535,8372215,5542595,-10702683 }, - { -10562541,14895633,26814552,-16673850,-17480754,-2489360,-2781891,6993761,-18093885,10114655 }, - { -20107055,-929418,31422704,10427861,-7110749,6150669,-29091755,-11529146,25953725,-106158 }, - }, - { - { -4234397,-8039292,-9119125,3046000,2101609,-12607294,19390020,6094296,-3315279,12831125 }, - { -15998678,7578152,5310217,14408357,-33548620,-224739,31575954,6326196,7381791,-2421839 }, - { -20902779,3296811,24736065,-16328389,18374254,7318640,6295303,8082724,-15362489,12339664 }, - }, - { - { 27724736,2291157,6088201,-14184798,1792727,5857634,13848414,15768922,25091167,14856294 }, - { -18866652,8331043,24373479,8541013,-701998,-9269457,12927300,-12695493,-22182473,-9012899 }, - { -11423429,-5421590,11632845,3405020,30536730,-11674039,-27260765,13866390,30146206,9142070 }, - }, - { - { 3924129,-15307516,-13817122,-10054960,12291820,-668366,-27702774,9326384,-8237858,4171294 }, - { -15921940,16037937,6713787,16606682,-21612135,2790944,26396185,3731949,345228,-5462949 }, - { -21327538,13448259,25284571,1143661,20614966,-8849387,2031539,-12391231,-16253183,-13582083 }, - }, - { - { 31016211,-16722429,26371392,-14451233,-5027349,14854137,17477601,3842657,28012650,-16405420 }, - { -5075835,9368966,-8562079,-4600902,-15249953,6970560,-9189873,16292057,-8867157,3507940 }, - { 29439664,3537914,23333589,6997794,-17555561,-11018068,-15209202,-15051267,-9164929,6580396 }, - }, -}, -{ - { - { -12185861,-7679788,16438269,10826160,-8696817,-6235611,17860444,-9273846,-2095802,9304567 }, - { 20714564,-4336911,29088195,7406487,11426967,-5095705,14792667,-14608617,5289421,-477127 }, - { -16665533,-10650790,-6160345,-13305760,9192020,-1802462,17271490,12349094,26939669,-3752294 }, - }, - { - { -12889898,9373458,31595848,16374215,21471720,13221525,-27283495,-12348559,-3698806,117887 }, - { 22263325,-6560050,3984570,-11174646,-15114008,-566785,28311253,5358056,-23319780,541964 }, - { 16259219,3261970,2309254,-15534474,-16885711,-4581916,24134070,-16705829,-13337066,-13552195 }, - }, - { - { 9378160,-13140186,-22845982,-12745264,28198281,-7244098,-2399684,-717351,690426,14876244 }, - { 24977353,-314384,-8223969,-13465086,28432343,-1176353,-13068804,-12297348,-22380984,6618999 }, - { -1538174,11685646,12944378,13682314,-24389511,-14413193,8044829,-13817328,32239829,-5652762 }, - }, - { - { -18603066,4762990,-926250,8885304,-28412480,-3187315,9781647,-10350059,32779359,5095274 }, - { -33008130,-5214506,-32264887,-3685216,9460461,-9327423,-24601656,14506724,21639561,-2630236 }, - { -16400943,-13112215,25239338,15531969,3987758,-4499318,-1289502,-6863535,17874574,558605 }, - }, - { - { -13600129,10240081,9171883,16131053,-20869254,9599700,33499487,5080151,2085892,5119761 }, - { -22205145,-2519528,-16381601,414691,-25019550,2170430,30634760,-8363614,-31999993,-5759884 }, - { -6845704,15791202,8550074,-1312654,29928809,-12092256,27534430,-7192145,-22351378,12961482 }, - }, - { - { -24492060,-9570771,10368194,11582341,-23397293,-2245287,16533930,8206996,-30194652,-5159638 }, - { -11121496,-3382234,2307366,6362031,-135455,8868177,-16835630,7031275,7589640,8945490 }, - { -32152748,8917967,6661220,-11677616,-1192060,-15793393,7251489,-11182180,24099109,-14456170 }, - }, - { - { 5019558,-7907470,4244127,-14714356,-26933272,6453165,-19118182,-13289025,-6231896,-10280736 }, - { 10853594,10721687,26480089,5861829,-22995819,1972175,-1866647,-10557898,-3363451,-6441124 }, - { -17002408,5906790,221599,-6563147,7828208,-13248918,24362661,-2008168,-13866408,7421392 }, - }, - { - { 8139927,-6546497,32257646,-5890546,30375719,1886181,-21175108,15441252,28826358,-4123029 }, - { 6267086,9695052,7709135,-16603597,-32869068,-1886135,14795160,-7840124,13746021,-1742048 }, - { 28584902,7787108,-6732942,-15050729,22846041,-7571236,-3181936,-363524,4771362,-8419958 }, - }, -}, -{ - { - { 24949256,6376279,-27466481,-8174608,-18646154,-9930606,33543569,-12141695,3569627,11342593 }, - { 26514989,4740088,27912651,3697550,19331575,-11472339,6809886,4608608,7325975,-14801071 }, - { -11618399,-14554430,-24321212,7655128,-1369274,5214312,-27400540,10258390,-17646694,-8186692 }, - }, - { - { 11431204,15823007,26570245,14329124,18029990,4796082,-31446179,15580664,9280358,-3973687 }, - { -160783,-10326257,-22855316,-4304997,-20861367,-13621002,-32810901,-11181622,-15545091,4387441 }, - { -20799378,12194512,3937617,-5805892,-27154820,9340370,-24513992,8548137,20617071,-7482001 }, - }, - { - { -938825,-3930586,-8714311,16124718,24603125,-6225393,-13775352,-11875822,24345683,10325460 }, - { -19855277,-1568885,-22202708,8714034,14007766,6928528,16318175,-1010689,4766743,3552007 }, - { -21751364,-16730916,1351763,-803421,-4009670,3950935,3217514,14481909,10988822,-3994762 }, - }, - { - { 15564307,-14311570,3101243,5684148,30446780,-8051356,12677127,-6505343,-8295852,13296005 }, - { -9442290,6624296,-30298964,-11913677,-4670981,-2057379,31521204,9614054,-30000824,12074674 }, - { 4771191,-135239,14290749,-13089852,27992298,14998318,-1413936,-1556716,29832613,-16391035 }, - }, - { - { 7064884,-7541174,-19161962,-5067537,-18891269,-2912736,25825242,5293297,-27122660,13101590 }, - { -2298563,2439670,-7466610,1719965,-27267541,-16328445,32512469,-5317593,-30356070,-4190957 }, - { -30006540,10162316,-33180176,3981723,-16482138,-13070044,14413974,9515896,19568978,9628812 }, - }, - { - { 33053803,199357,15894591,1583059,27380243,-4580435,-17838894,-6106839,-6291786,3437740 }, - { -18978877,3884493,19469877,12726490,15913552,13614290,-22961733,70104,7463304,4176122 }, - { -27124001,10659917,11482427,-16070381,12771467,-6635117,-32719404,-5322751,24216882,5944158 }, - }, - { - { 8894125,7450974,-2664149,-9765752,-28080517,-12389115,19345746,14680796,11632993,5847885 }, - { 26942781,-2315317,9129564,-4906607,26024105,11769399,-11518837,6367194,-9727230,4782140 }, - { 19916461,-4828410,-22910704,-11414391,25606324,-5972441,33253853,8220911,6358847,-1873857 }, - }, - { - { 801428,-2081702,16569428,11065167,29875704,96627,7908388,-4480480,-13538503,1387155 }, - { 19646058,5720633,-11416706,12814209,11607948,12749789,14147075,15156355,-21866831,11835260 }, - { 19299512,1155910,28703737,14890794,2925026,7269399,26121523,15467869,-26560550,5052483 }, - }, -}, -{ - { - { -3017432,10058206,1980837,3964243,22160966,12322533,-6431123,-12618185,12228557,-7003677 }, - { 32944382,14922211,-22844894,5188528,21913450,-8719943,4001465,13238564,-6114803,8653815 }, - { 22865569,-4652735,27603668,-12545395,14348958,8234005,24808405,5719875,28483275,2841751 }, - }, - { - { -16420968,-1113305,-327719,-12107856,21886282,-15552774,-1887966,-315658,19932058,-12739203 }, - { -11656086,10087521,-8864888,-5536143,-19278573,-3055912,3999228,13239134,-4777469,-13910208 }, - { 1382174,-11694719,17266790,9194690,-13324356,9720081,20403944,11284705,-14013818,3093230 }, - }, - { - { 16650921,-11037932,-1064178,1570629,-8329746,7352753,-302424,16271225,-24049421,-6691850 }, - { -21911077,-5927941,-4611316,-5560156,-31744103,-10785293,24123614,15193618,-21652117,-16739389 }, - { -9935934,-4289447,-25279823,4372842,2087473,10399484,31870908,14690798,17361620,11864968 }, - }, - { - { -11307610,6210372,13206574,5806320,-29017692,-13967200,-12331205,-7486601,-25578460,-16240689 }, - { 14668462,-12270235,26039039,15305210,25515617,4542480,10453892,6577524,9145645,-6443880 }, - { 5974874,3053895,-9433049,-10385191,-31865124,3225009,-7972642,3936128,-5652273,-3050304 }, - }, - { - { 30625386,-4729400,-25555961,-12792866,-20484575,7695099,17097188,-16303496,-27999779,1803632 }, - { -3553091,9865099,-5228566,4272701,-5673832,-16689700,14911344,12196514,-21405489,7047412 }, - { 20093277,9920966,-11138194,-5343857,13161587,12044805,-32856851,4124601,-32343828,-10257566 }, - }, - { - { -20788824,14084654,-13531713,7842147,19119038,-13822605,4752377,-8714640,-21679658,2288038 }, - { -26819236,-3283715,29965059,3039786,-14473765,2540457,29457502,14625692,-24819617,12570232 }, - { -1063558,-11551823,16920318,12494842,1278292,-5869109,-21159943,-3498680,-11974704,4724943 }, - }, - { - { 17960970,-11775534,-4140968,-9702530,-8876562,-1410617,-12907383,-8659932,-29576300,1903856 }, - { 23134274,-14279132,-10681997,-1611936,20684485,15770816,-12989750,3190296,26955097,14109738 }, - { 15308788,5320727,-30113809,-14318877,22902008,7767164,29425325,-11277562,31960942,11934971 }, - }, - { - { -27395711,8435796,4109644,12222639,-24627868,14818669,20638173,4875028,10491392,1379718 }, - { -13159415,9197841,3875503,-8936108,-1383712,-5879801,33518459,16176658,21432314,12180697 }, - { -11787308,11500838,13787581,-13832590,-22430679,10140205,1465425,12689540,-10301319,-13872883 }, - }, -}, -{ - { - { 5414091,-15386041,-21007664,9643570,12834970,1186149,-2622916,-1342231,26128231,6032912 }, - { -26337395,-13766162,32496025,-13653919,17847801,-12669156,3604025,8316894,-25875034,-10437358 }, - { 3296484,6223048,24680646,-12246460,-23052020,5903205,-8862297,-4639164,12376617,3188849 }, - }, - { - { 29190488,-14659046,27549113,-1183516,3520066,-10697301,32049515,-7309113,-16109234,-9852307 }, - { -14744486,-9309156,735818,-598978,-20407687,-5057904,25246078,-15795669,18640741,-960977 }, - { -6928835,-16430795,10361374,5642961,4910474,12345252,-31638386,-494430,10530747,1053335 }, - }, - { - { -29265967,-14186805,-13538216,-12117373,-19457059,-10655384,-31462369,-2948985,24018831,15026644 }, - { -22592535,-3145277,-2289276,5953843,-13440189,9425631,25310643,13003497,-2314791,-15145616 }, - { -27419985,-603321,-8043984,-1669117,-26092265,13987819,-27297622,187899,-23166419,-2531735 }, - }, - { - { -21744398,-13810475,1844840,5021428,-10434399,-15911473,9716667,16266922,-5070217,726099 }, - { 29370922,-6053998,7334071,-15342259,9385287,2247707,-13661962,-4839461,30007388,-15823341 }, - { -936379,16086691,23751945,-543318,-1167538,-5189036,9137109,730663,9835848,4555336 }, - }, - { - { -23376435,1410446,-22253753,-12899614,30867635,15826977,17693930,544696,-11985298,12422646 }, - { 31117226,-12215734,-13502838,6561947,-9876867,-12757670,-5118685,-4096706,29120153,13924425 }, - { -17400879,-14233209,19675799,-2734756,-11006962,-5858820,-9383939,-11317700,7240931,-237388 }, - }, - { - { -31361739,-11346780,-15007447,-5856218,-22453340,-12152771,1222336,4389483,3293637,-15551743 }, - { -16684801,-14444245,11038544,11054958,-13801175,-3338533,-24319580,7733547,12796905,-6335822 }, - { -8759414,-10817836,-25418864,10783769,-30615557,-9746811,-28253339,3647836,3222231,-11160462 }, - }, - { - { 18606113,1693100,-25448386,-15170272,4112353,10045021,23603893,-2048234,-7550776,2484985 }, - { 9255317,-3131197,-12156162,-1004256,13098013,-9214866,16377220,-2102812,-19802075,-3034702 }, - { -22729289,7496160,-5742199,11329249,19991973,-3347502,-31718148,9936966,-30097688,-10618797 }, - }, - { - { 21878590,-5001297,4338336,13643897,-3036865,13160960,19708896,5415497,-7360503,-4109293 }, - { 27736861,10103576,12500508,8502413,-3413016,-9633558,10436918,-1550276,-23659143,-8132100 }, - { 19492550,-12104365,-29681976,-852630,-3208171,12403437,30066266,8367329,13243957,8709688 }, - }, -}, -{ - { - { 12015105,2801261,28198131,10151021,24818120,-4743133,-11194191,-5645734,5150968,7274186 }, - { 2831366,-12492146,1478975,6122054,23825128,-12733586,31097299,6083058,31021603,-9793610 }, - { -2529932,-2229646,445613,10720828,-13849527,-11505937,-23507731,16354465,15067285,-14147707 }, - }, - { - { 7840942,14037873,-33364863,15934016,-728213,-3642706,21403988,1057586,-19379462,-12403220 }, - { 915865,-16469274,15608285,-8789130,-24357026,6060030,-17371319,8410997,-7220461,16527025 }, - { 32922597,-556987,20336074,-16184568,10903705,-5384487,16957574,52992,23834301,6588044 }, - }, - { - { 32752030,11232950,3381995,-8714866,22652988,-10744103,17159699,16689107,-20314580,-1305992 }, - { -4689649,9166776,-25710296,-10847306,11576752,12733943,7924251,-2752281,1976123,-7249027 }, - { 21251222,16309901,-2983015,-6783122,30810597,12967303,156041,-3371252,12331345,-8237197 }, - }, - { - { 8651614,-4477032,-16085636,-4996994,13002507,2950805,29054427,-5106970,10008136,-4667901 }, - { 31486080,15114593,-14261250,12951354,14369431,-7387845,16347321,-13662089,8684155,-10532952 }, - { 19443825,11385320,24468943,-9659068,-23919258,2187569,-26263207,-6086921,31316348,14219878 }, - }, - { - { -28594490,1193785,32245219,11392485,31092169,15722801,27146014,6992409,29126555,9207390 }, - { 32382935,1110093,18477781,11028262,-27411763,-7548111,-4980517,10843782,-7957600,-14435730 }, - { 2814918,7836403,27519878,-7868156,-20894015,-11553689,-21494559,8550130,28346258,1994730 }, - }, - { - { -19578299,8085545,-14000519,-3948622,2785838,-16231307,-19516951,7174894,22628102,8115180 }, - { -30405132,955511,-11133838,-15078069,-32447087,-13278079,-25651578,3317160,-9943017,930272 }, - { -15303681,-6833769,28856490,1357446,23421993,1057177,24091212,-1388970,-22765376,-10650715 }, - }, - { - { -22751231,-5303997,-12907607,-12768866,-15811511,-7797053,-14839018,-16554220,-1867018,8398970 }, - { -31969310,2106403,-4736360,1362501,12813763,16200670,22981545,-6291273,18009408,-15772772 }, - { -17220923,-9545221,-27784654,14166835,29815394,7444469,29551787,-3727419,19288549,1325865 }, - }, - { - { 15100157,-15835752,-23923978,-1005098,-26450192,15509408,12376730,-3479146,33166107,-8042750 }, - { 20909231,13023121,-9209752,16251778,-5778415,-8094914,12412151,10018715,2213263,-13878373 }, - { 32529814,-11074689,30361439,-16689753,-9135940,1513226,22922121,6382134,-5766928,8371348 }, - }, -}, -{ - { - { 9923462,11271500,12616794,3544722,-29998368,-1721626,12891687,-8193132,-26442943,10486144 }, - { -22597207,-7012665,8587003,-8257861,4084309,-12970062,361726,2610596,-23921530,-11455195 }, - { 5408411,-1136691,-4969122,10561668,24145918,14240566,31319731,-4235541,19985175,-3436086 }, - }, - { - { -13994457,16616821,14549246,3341099,32155958,13648976,-17577068,8849297,65030,8370684 }, - { -8320926,-12049626,31204563,5839400,-20627288,-1057277,-19442942,6922164,12743482,-9800518 }, - { -2361371,12678785,28815050,4759974,-23893047,4884717,23783145,11038569,18800704,255233 }, - }, - { - { -5269658,-1773886,13957886,7990715,23132995,728773,13393847,9066957,19258688,-14753793 }, - { -2936654,-10827535,-10432089,14516793,-3640786,4372541,-31934921,2209390,-1524053,2055794 }, - { 580882,16705327,5468415,-2683018,-30926419,-14696000,-7203346,-8994389,-30021019,7394435 }, - }, - { - { 23838809,1822728,-15738443,15242727,8318092,-3733104,-21672180,-3492205,-4821741,14799921 }, - { 13345610,9759151,3371034,-16137791,16353039,8577942,31129804,13496856,-9056018,7402518 }, - { 2286874,-4435931,-20042458,-2008336,-13696227,5038122,11006906,-15760352,8205061,1607563 }, - }, - { - { 14414086,-8002132,3331830,-3208217,22249151,-5594188,18364661,-2906958,30019587,-9029278 }, - { -27688051,1585953,-10775053,931069,-29120221,-11002319,-14410829,12029093,9944378,8024 }, - { 4368715,-3709630,29874200,-15022983,-20230386,-11410704,-16114594,-999085,-8142388,5640030 }, - }, - { - { 10299610,13746483,11661824,16234854,7630238,5998374,9809887,-16694564,15219798,-14327783 }, - { 27425505,-5719081,3055006,10660664,23458024,595578,-15398605,-1173195,-18342183,9742717 }, - { 6744077,2427284,26042789,2720740,-847906,1118974,32324614,7406442,12420155,1994844 }, - }, - { - { 14012521,-5024720,-18384453,-9578469,-26485342,-3936439,-13033478,-10909803,24319929,-6446333 }, - { 16412690,-4507367,10772641,15929391,-17068788,-4658621,10555945,-10484049,-30102368,-4739048 }, - { 22397382,-7767684,-9293161,-12792868,17166287,-9755136,-27333065,6199366,21880021,-12250760 }, - }, - { - { -4283307,5368523,-31117018,8163389,-30323063,3209128,16557151,8890729,8840445,4957760 }, - { -15447727,709327,-6919446,-10870178,-29777922,6522332,-21720181,12130072,-14796503,5005757 }, - { -2114751,-14308128,23019042,15765735,-25269683,6002752,10183197,-13239326,-16395286,-2176112 }, - }, -}, -{ - { - { -19025756,1632005,13466291,-7995100,-23640451,16573537,-32013908,-3057104,22208662,2000468 }, - { 3065073,-1412761,-25598674,-361432,-17683065,-5703415,-8164212,11248527,-3691214,-7414184 }, - { 10379208,-6045554,8877319,1473647,-29291284,-12507580,16690915,2553332,-3132688,16400289 }, - }, - { - { 15716668,1254266,-18472690,7446274,-8448918,6344164,-22097271,-7285580,26894937,9132066 }, - { 24158887,12938817,11085297,-8177598,-28063478,-4457083,-30576463,64452,-6817084,-2692882 }, - { 13488534,7794716,22236231,5989356,25426474,-12578208,2350710,-3418511,-4688006,2364226 }, - }, - { - { 16335052,9132434,25640582,6678888,1725628,8517937,-11807024,-11697457,15445875,-7798101 }, - { 29004207,-7867081,28661402,-640412,-12794003,-7943086,31863255,-4135540,-278050,-15759279 }, - { -6122061,-14866665,-28614905,14569919,-10857999,-3591829,10343412,-6976290,-29828287,-10815811 }, - }, - { - { 27081650,3463984,14099042,-4517604,1616303,-6205604,29542636,15372179,17293797,960709 }, - { 20263915,11434237,-5765435,11236810,13505955,-10857102,-16111345,6493122,-19384511,7639714 }, - { -2830798,-14839232,25403038,-8215196,-8317012,-16173699,18006287,-16043750,29994677,-15808121 }, - }, - { - { 9769828,5202651,-24157398,-13631392,-28051003,-11561624,-24613141,-13860782,-31184575,709464 }, - { 12286395,13076066,-21775189,-1176622,-25003198,4057652,-32018128,-8890874,16102007,13205847 }, - { 13733362,5599946,10557076,3195751,-5557991,8536970,-25540170,8525972,10151379,10394400 }, - }, - { - { 4024660,-16137551,22436262,12276534,-9099015,-2686099,19698229,11743039,-33302334,8934414 }, - { -15879800,-4525240,-8580747,-2934061,14634845,-698278,-9449077,3137094,-11536886,11721158 }, - { 17555939,-5013938,8268606,2331751,-22738815,9761013,9319229,8835153,-9205489,-1280045 }, - }, - { - { -461409,-7830014,20614118,16688288,-7514766,-4807119,22300304,505429,6108462,-6183415 }, - { -5070281,12367917,-30663534,3234473,32617080,-8422642,29880583,-13483331,-26898490,-7867459 }, - { -31975283,5726539,26934134,10237677,-3173717,-605053,24199304,3795095,7592688,-14992079 }, - }, - { - { 21594432,-14964228,17466408,-4077222,32537084,2739898,6407723,12018833,-28256052,4298412 }, - { -20650503,-11961496,-27236275,570498,3767144,-1717540,13891942,-1569194,13717174,10805743 }, - { -14676630,-15644296,15287174,11927123,24177847,-8175568,-796431,14860609,-26938930,-5863836 }, - }, -}, -{ - { - { 12962541,5311799,-10060768,11658280,18855286,-7954201,13286263,-12808704,-4381056,9882022 }, - { 18512079,11319350,-20123124,15090309,18818594,5271736,-22727904,3666879,-23967430,-3299429 }, - { -6789020,-3146043,16192429,13241070,15898607,-14206114,-10084880,-6661110,-2403099,5276065 }, - }, - { - { 30169808,-5317648,26306206,-11750859,27814964,7069267,7152851,3684982,1449224,13082861 }, - { 10342826,3098505,2119311,193222,25702612,12233820,23697382,15056736,-21016438,-8202000 }, - { -33150110,3261608,22745853,7948688,19370557,-15177665,-26171976,6482814,-10300080,-11060101 }, - }, - { - { 32869458,-5408545,25609743,15678670,-10687769,-15471071,26112421,2521008,-22664288,6904815 }, - { 29506923,4457497,3377935,-9796444,-30510046,12935080,1561737,3841096,-29003639,-6657642 }, - { 10340844,-6630377,-18656632,-2278430,12621151,-13339055,30878497,-11824370,-25584551,5181966 }, - }, - { - { 25940115,-12658025,17324188,-10307374,-8671468,15029094,24396252,-16450922,-2322852,-12388574 }, - { -21765684,9916823,-1300409,4079498,-1028346,11909559,1782390,12641087,20603771,-6561742 }, - { -18882287,-11673380,24849422,11501709,13161720,-4768874,1925523,11914390,4662781,7820689 }, - }, - { - { 12241050,-425982,8132691,9393934,32846760,-1599620,29749456,12172924,16136752,15264020 }, - { -10349955,-14680563,-8211979,2330220,-17662549,-14545780,10658213,6671822,19012087,3772772 }, - { 3753511,-3421066,10617074,2028709,14841030,-6721664,28718732,-15762884,20527771,12988982 }, - }, - { - { -14822485,-5797269,-3707987,12689773,-898983,-10914866,-24183046,-10564943,3299665,-12424953 }, - { -16777703,-15253301,-9642417,4978983,3308785,8755439,6943197,6461331,-25583147,8991218 }, - { -17226263,1816362,-1673288,-6086439,31783888,-8175991,-32948145,7417950,-30242287,1507265 }, - }, - { - { 29692663,6829891,-10498800,4334896,20945975,-11906496,-28887608,8209391,14606362,-10647073 }, - { -3481570,8707081,32188102,5672294,22096700,1711240,-33020695,9761487,4170404,-2085325 }, - { -11587470,14855945,-4127778,-1531857,-26649089,15084046,22186522,16002000,-14276837,-8400798 }, - }, - { - { -4811456,13761029,-31703877,-2483919,-3312471,7869047,-7113572,-9620092,13240845,10965870 }, - { -7742563,-8256762,-14768334,-13656260,-23232383,12387166,4498947,14147411,29514390,4302863 }, - { -13413405,-12407859,20757302,-13801832,14785143,8976368,-5061276,-2144373,17846988,-13971927 }, - }, -}, -{ - { - { -2244452,-754728,-4597030,-1066309,-6247172,1455299,-21647728,-9214789,-5222701,12650267 }, - { -9906797,-16070310,21134160,12198166,-27064575,708126,387813,13770293,-19134326,10958663 }, - { 22470984,12369526,23446014,-5441109,-21520802,-9698723,-11772496,-11574455,-25083830,4271862 }, - }, - { - { -25169565,-10053642,-19909332,15361595,-5984358,2159192,75375,-4278529,-32526221,8469673 }, - { 15854970,4148314,-8893890,7259002,11666551,13824734,-30531198,2697372,24154791,-9460943 }, - { 15446137,-15806644,29759747,14019369,30811221,-9610191,-31582008,12840104,24913809,9815020 }, - }, - { - { -4709286,-5614269,-31841498,-12288893,-14443537,10799414,-9103676,13438769,18735128,9466238 }, - { 11933045,9281483,5081055,-5183824,-2628162,-4905629,-7727821,-10896103,-22728655,16199064 }, - { 14576810,379472,-26786533,-8317236,-29426508,-10812974,-102766,1876699,30801119,2164795 }, - }, - { - { 15995086,3199873,13672555,13712240,-19378835,-4647646,-13081610,-15496269,-13492807,1268052 }, - { -10290614,-3659039,-3286592,10948818,23037027,3794475,-3470338,-12600221,-17055369,3565904 }, - { 29210088,-9419337,-5919792,-4952785,10834811,-13327726,-16512102,-10820713,-27162222,-14030531 }, - }, - { - { -13161890,15508588,16663704,-8156150,-28349942,9019123,-29183421,-3769423,2244111,-14001979 }, - { -5152875,-3800936,-9306475,-6071583,16243069,14684434,-25673088,-16180800,13491506,4641841 }, - { 10813417,643330,-19188515,-728916,30292062,-16600078,27548447,-7721242,14476989,-12767431 }, - }, - { - { 10292079,9984945,6481436,8279905,-7251514,7032743,27282937,-1644259,-27912810,12651324 }, - { -31185513,-813383,22271204,11835308,10201545,15351028,17099662,3988035,21721536,-3148940 }, - { 10202177,-6545839,-31373232,-9574638,-32150642,-8119683,-12906320,3852694,13216206,14842320 }, - }, - { - { -15815640,-10601066,-6538952,-7258995,-6984659,-6581778,-31500847,13765824,-27434397,9900184 }, - { 14465505,-13833331,-32133984,-14738873,-27443187,12990492,33046193,15796406,-7051866,-8040114 }, - { 30924417,-8279620,6359016,-12816335,16508377,9071735,-25488601,15413635,9524356,-7018878 }, - }, - { - { 12274201,-13175547,32627641,-1785326,6736625,13267305,5237659,-5109483,15663516,4035784 }, - { -2951309,8903985,17349946,601635,-16432815,-4612556,-13732739,-15889334,-22258478,4659091 }, - { -16916263,-4952973,-30393711,-15158821,20774812,15897498,5736189,15026997,-2178256,-13455585 }, - }, -}, -{ - { - { -8858980,-2219056,28571666,-10155518,-474467,-10105698,-3801496,278095,23440562,-290208 }, - { 10226241,-5928702,15139956,120818,-14867693,5218603,32937275,11551483,-16571960,-7442864 }, - { 17932739,-12437276,-24039557,10749060,11316803,7535897,22503767,5561594,-3646624,3898661 }, - }, - { - { 7749907,-969567,-16339731,-16464,-25018111,15122143,-1573531,7152530,21831162,1245233 }, - { 26958459,-14658026,4314586,8346991,-5677764,11960072,-32589295,-620035,-30402091,-16716212 }, - { -12165896,9166947,33491384,13673479,29787085,13096535,6280834,14587357,-22338025,13987525 }, - }, - { - { -24349909,7778775,21116000,15572597,-4833266,-5357778,-4300898,-5124639,-7469781,-2858068 }, - { 9681908,-6737123,-31951644,13591838,-6883821,386950,31622781,6439245,-14581012,4091397 }, - { -8426427,1470727,-28109679,-1596990,3978627,-5123623,-19622683,12092163,29077877,-14741988 }, - }, - { - { 5269168,-6859726,-13230211,-8020715,25932563,1763552,-5606110,-5505881,-20017847,2357889 }, - { 32264008,-15407652,-5387735,-1160093,-2091322,-3946900,23104804,-12869908,5727338,189038 }, - { 14609123,-8954470,-6000566,-16622781,-14577387,-7743898,-26745169,10942115,-25888931,-14884697 }, - }, - { - { 20513500,5557931,-15604613,7829531,26413943,-2019404,-21378968,7471781,13913677,-5137875 }, - { -25574376,11967826,29233242,12948236,-6754465,4713227,-8940970,14059180,12878652,8511905 }, - { -25656801,3393631,-2955415,-7075526,-2250709,9366908,-30223418,6812974,5568676,-3127656 }, - }, - { - { 11630004,12144454,2116339,13606037,27378885,15676917,-17408753,-13504373,-14395196,8070818 }, - { 27117696,-10007378,-31282771,-5570088,1127282,12772488,-29845906,10483306,-11552749,-1028714 }, - { 10637467,-5688064,5674781,1072708,-26343588,-6982302,-1683975,9177853,-27493162,15431203 }, - }, - { - { 20525145,10892566,-12742472,12779443,-29493034,16150075,-28240519,14943142,-15056790,-7935931 }, - { -30024462,5626926,-551567,-9981087,753598,11981191,25244767,-3239766,-3356550,9594024 }, - { -23752644,2636870,-5163910,-10103818,585134,7877383,11345683,-6492290,13352335,-10977084 }, - }, - { - { -1931799,-5407458,3304649,-12884869,17015806,-4877091,-29783850,-7752482,-13215537,-319204 }, - { 20239939,6607058,6203985,3483793,-18386976,-779229,-20723742,15077870,-22750759,14523817 }, - { 27406042,-6041657,27423596,-4497394,4996214,10002360,-28842031,-4545494,-30172742,-4805667 }, - }, -}, -{ - { - { 11374242,12660715,17861383,-12540833,10935568,1099227,-13886076,-9091740,-27727044,11358504 }, - { -12730809,10311867,1510375,10778093,-2119455,-9145702,32676003,11149336,-26123651,4985768 }, - { -19096303,341147,-6197485,-239033,15756973,-8796662,-983043,13794114,-19414307,-15621255 }, - }, - { - { 6490081,11940286,25495923,-7726360,8668373,-8751316,3367603,6970005,-1691065,-9004790 }, - { 1656497,13457317,15370807,6364910,13605745,8362338,-19174622,-5475723,-16796596,-5031438 }, - { -22273315,-13524424,-64685,-4334223,-18605636,-10921968,-20571065,-7007978,-99853,-10237333 }, - }, - { - { 17747465,10039260,19368299,-4050591,-20630635,-16041286,31992683,-15857976,-29260363,-5511971 }, - { 31932027,-4986141,-19612382,16366580,22023614,88450,11371999,-3744247,4882242,-10626905 }, - { 29796507,37186,19818052,10115756,-11829032,3352736,18551198,3272828,-5190932,-4162409 }, - }, - { - { 12501286,4044383,-8612957,-13392385,-32430052,5136599,-19230378,-3529697,330070,-3659409 }, - { 6384877,2899513,17807477,7663917,-2358888,12363165,25366522,-8573892,-271295,12071499 }, - { -8365515,-4042521,25133448,-4517355,-6211027,2265927,-32769618,1936675,-5159697,3829363 }, - }, - { - { 28425966,-5835433,-577090,-4697198,-14217555,6870930,7921550,-6567787,26333140,14267664 }, - { -11067219,11871231,27385719,-10559544,-4585914,-11189312,10004786,-8709488,-21761224,8930324 }, - { -21197785,-16396035,25654216,-1725397,12282012,11008919,1541940,4757911,-26491501,-16408940 }, - }, - { - { 13537262,-7759490,-20604840,10961927,-5922820,-13218065,-13156584,6217254,-15943699,13814990 }, - { -17422573,15157790,18705543,29619,24409717,-260476,27361681,9257833,-1956526,-1776914 }, - { -25045300,-10191966,15366585,15166509,-13105086,8423556,-29171540,12361135,-18685978,4578290 }, - }, - { - { 24579768,3711570,1342322,-11180126,-27005135,14124956,-22544529,14074919,21964432,8235257 }, - { -6528613,-2411497,9442966,-5925588,12025640,-1487420,-2981514,-1669206,13006806,2355433 }, - { -16304899,-13605259,-6632427,-5142349,16974359,-10911083,27202044,1719366,1141648,-12796236 }, - }, - { - { -12863944,-13219986,-8318266,-11018091,-6810145,-4843894,13475066,-3133972,32674895,13715045 }, - { 11423335,-5468059,32344216,8962751,24989809,9241752,-13265253,16086212,-28740881,-15642093 }, - { -1409668,12530728,-6368726,10847387,19531186,-14132160,-11709148,7791794,-27245943,4383347 }, - }, -}, -{ - { - { -28970898,5271447,-1266009,-9736989,-12455236,16732599,-4862407,-4906449,27193557,6245191 }, - { -15193956,5362278,-1783893,2695834,4960227,12840725,23061898,3260492,22510453,8577507 }, - { -12632451,11257346,-32692994,13548177,-721004,10879011,31168030,13952092,-29571492,-3635906 }, - }, - { - { 3877321,-9572739,32416692,5405324,-11004407,-13656635,3759769,11935320,5611860,8164018 }, - { -16275802,14667797,15906460,12155291,-22111149,-9039718,32003002,-8832289,5773085,-8422109 }, - { -23788118,-8254300,1950875,8937633,18686727,16459170,-905725,12376320,31632953,190926 }, - }, - { - { -24593607,-16138885,-8423991,13378746,14162407,6901328,-8288749,4508564,-25341555,-3627528 }, - { 8884438,-5884009,6023974,10104341,-6881569,-4941533,18722941,-14786005,-1672488,827625 }, - { -32720583,-16289296,-32503547,7101210,13354605,2659080,-1800575,-14108036,-24878478,1541286 }, - }, - { - { 2901347,-1117687,3880376,-10059388,-17620940,-3612781,-21802117,-3567481,20456845,-1885033 }, - { 27019610,12299467,-13658288,-1603234,-12861660,-4861471,-19540150,-5016058,29439641,15138866 }, - { 21536104,-6626420,-32447818,-10690208,-22408077,5175814,-5420040,-16361163,7779328,109896 }, - }, - { - { 30279744,14648750,-8044871,6425558,13639621,-743509,28698390,12180118,23177719,-554075 }, - { 26572847,3405927,-31701700,12890905,-19265668,5335866,-6493768,2378492,4439158,-13279347 }, - { -22716706,3489070,-9225266,-332753,18875722,-1140095,14819434,-12731527,-17717757,-5461437 }, - }, - { - { -5056483,16566551,15953661,3767752,-10436499,15627060,-820954,2177225,8550082,-15114165 }, - { -18473302,16596775,-381660,15663611,22860960,15585581,-27844109,-3582739,-23260460,-8428588 }, - { -32480551,15707275,-8205912,-5652081,29464558,2713815,-22725137,15860482,-21902570,1494193 }, - }, - { - { -19562091,-14087393,-25583872,-9299552,13127842,759709,21923482,16529112,8742704,12967017 }, - { -28464899,1553205,32536856,-10473729,-24691605,-406174,-8914625,-2933896,-29903758,15553883 }, - { 21877909,3230008,9881174,10539357,-4797115,2841332,11543572,14513274,19375923,-12647961 }, - }, - { - { 8832269,-14495485,13253511,5137575,5037871,4078777,24880818,-6222716,2862653,9455043 }, - { 29306751,5123106,20245049,-14149889,9592566,8447059,-2077124,-2990080,15511449,4789663 }, - { -20679756,7004547,8824831,-9434977,-4045704,-3750736,-5754762,108893,23513200,16652362 }, - }, -}, -{ - { - { -33256173,4144782,-4476029,-6579123,10770039,-7155542,-6650416,-12936300,-18319198,10212860 }, - { 2756081,8598110,7383731,-6859892,22312759,-1105012,21179801,2600940,-9988298,-12506466 }, - { -24645692,13317462,-30449259,-15653928,21365574,-10869657,11344424,864440,-2499677,-16710063 }, - }, - { - { -26432803,6148329,-17184412,-14474154,18782929,-275997,-22561534,211300,2719757,4940997 }, - { -1323882,3911313,-6948744,14759765,-30027150,7851207,21690126,8518463,26699843,5276295 }, - { -13149873,-6429067,9396249,365013,24703301,-10488939,1321586,149635,-15452774,7159369 }, - }, - { - { 9987780,-3404759,17507962,9505530,9731535,-2165514,22356009,8312176,22477218,-8403385 }, - { 18155857,-16504990,19744716,9006923,15154154,-10538976,24256460,-4864995,-22548173,9334109 }, - { 2986088,-4911893,10776628,-3473844,10620590,-7083203,-21413845,14253545,-22587149,536906 }, - }, - { - { 4377756,8115836,24567078,15495314,11625074,13064599,7390551,10589625,10838060,-15420424 }, - { -19342404,867880,9277171,-3218459,-14431572,-1986443,19295826,-15796950,6378260,699185 }, - { 7895026,4057113,-7081772,-13077756,-17886831,-323126,-716039,15693155,-5045064,-13373962 }, - }, - { - { -7737563,-5869402,-14566319,-7406919,11385654,13201616,31730678,-10962840,-3918636,-9669325 }, - { 10188286,-15770834,-7336361,13427543,22223443,14896287,30743455,7116568,-21786507,5427593 }, - { 696102,13206899,27047647,-10632082,15285305,-9853179,10798490,-4578720,19236243,12477404 }, - }, - { - { -11229439,11243796,-17054270,-8040865,-788228,-8167967,-3897669,11180504,-23169516,7733644 }, - { 17800790,-14036179,-27000429,-11766671,23887827,3149671,23466177,-10538171,10322027,15313801 }, - { 26246234,11968874,32263343,-5468728,6830755,-13323031,-15794704,-101982,-24449242,10890804 }, - }, - { - { -31365647,10271363,-12660625,-6267268,16690207,-13062544,-14982212,16484931,25180797,-5334884 }, - { -586574,10376444,-32586414,-11286356,19801893,10997610,2276632,9482883,316878,13820577 }, - { -9882808,-4510367,-2115506,16457136,-11100081,11674996,30756178,-7515054,30696930,-3712849 }, - }, - { - { 32988917,-9603412,12499366,7910787,-10617257,-11931514,-7342816,-9985397,-32349517,7392473 }, - { -8855661,15927861,9866406,-3649411,-2396914,-16655781,-30409476,-9134995,25112947,-2926644 }, - { -2504044,-436966,25621774,-5678772,15085042,-5479877,-24884878,-13526194,5537438,-13914319 }, - }, -}, -{ - { - { -11225584,2320285,-9584280,10149187,-33444663,5808648,-14876251,-1729667,31234590,6090599 }, - { -9633316,116426,26083934,2897444,-6364437,-2688086,609721,15878753,-6970405,-9034768 }, - { -27757857,247744,-15194774,-9002551,23288161,-10011936,-23869595,6503646,20650474,1804084 }, - }, - { - { -27589786,15456424,8972517,8469608,15640622,4439847,3121995,-10329713,27842616,-202328 }, - { -15306973,2839644,22530074,10026331,4602058,5048462,28248656,5031932,-11375082,12714369 }, - { 20807691,-7270825,29286141,11421711,-27876523,-13868230,-21227475,1035546,-19733229,12796920 }, - }, - { - { 12076899,-14301286,-8785001,-11848922,-25012791,16400684,-17591495,-12899438,3480665,-15182815 }, - { -32361549,5457597,28548107,7833186,7303070,-11953545,-24363064,-15921875,-33374054,2771025 }, - { -21389266,421932,26597266,6860826,22486084,-6737172,-17137485,-4210226,-24552282,15673397 }, - }, - { - { -20184622,2338216,19788685,-9620956,-4001265,-8740893,-20271184,4733254,3727144,-12934448 }, - { 6120119,814863,-11794402,-622716,6812205,-15747771,2019594,7975683,31123697,-10958981 }, - { 30069250,-11435332,30434654,2958439,18399564,-976289,12296869,9204260,-16432438,9648165 }, - }, - { - { 32705432,-1550977,30705658,7451065,-11805606,9631813,3305266,5248604,-26008332,-11377501 }, - { 17219865,2375039,-31570947,-5575615,-19459679,9219903,294711,15298639,2662509,-16297073 }, - { -1172927,-7558695,-4366770,-4287744,-21346413,-8434326,32087529,-1222777,32247248,-14389861 }, - }, - { - { 14312628,1221556,17395390,-8700143,-4945741,-8684635,-28197744,-9637817,-16027623,-13378845 }, - { -1428825,-9678990,-9235681,6549687,-7383069,-468664,23046502,9803137,17597934,2346211 }, - { 18510800,15337574,26171504,981392,-22241552,7827556,-23491134,-11323352,3059833,-11782870 }, - }, - { - { 10141598,6082907,17829293,-1947643,9830092,13613136,-25556636,-5544586,-33502212,3592096 }, - { 33114168,-15889352,-26525686,-13343397,33076705,8716171,1151462,1521897,-982665,-6837803 }, - { -32939165,-4255815,23947181,-324178,-33072974,-12305637,-16637686,3891704,26353178,693168 }, - }, - { - { 30374239,1595580,-16884039,13186931,4600344,406904,9585294,-400668,31375464,14369965 }, - { -14370654,-7772529,1510301,6434173,-18784789,-6262728,32732230,-13108839,17901441,16011505 }, - { 18171223,-11934626,-12500402,15197122,-11038147,-15230035,-19172240,-16046376,8764035,12309598 }, - }, -}, -{ - { - { 5975908,-5243188,-19459362,-9681747,-11541277,14015782,-23665757,1228319,17544096,-10593782 }, - { 5811932,-1715293,3442887,-2269310,-18367348,-8359541,-18044043,-15410127,-5565381,12348900 }, - { -31399660,11407555,25755363,6891399,-3256938,14872274,-24849353,8141295,-10632534,-585479 }, - }, - { - { -12675304,694026,-5076145,13300344,14015258,-14451394,-9698672,-11329050,30944593,1130208 }, - { 8247766,-6710942,-26562381,-7709309,-14401939,-14648910,4652152,2488540,23550156,-271232 }, - { 17294316,-3788438,7026748,15626851,22990044,113481,2267737,-5908146,-408818,-137719 }, - }, - { - { 16091085,-16253926,18599252,7340678,2137637,-1221657,-3364161,14550936,3260525,-7166271 }, - { -4910104,-13332887,18550887,10864893,-16459325,-7291596,-23028869,-13204905,-12748722,2701326 }, - { -8574695,16099415,4629974,-16340524,-20786213,-6005432,-10018363,9276971,11329923,1862132 }, - }, - { - { 14763076,-15903608,-30918270,3689867,3511892,10313526,-21951088,12219231,-9037963,-940300 }, - { 8894987,-3446094,6150753,3013931,301220,15693451,-31981216,-2909717,-15438168,11595570 }, - { 15214962,3537601,-26238722,-14058872,4418657,-15230761,13947276,10730794,-13489462,-4363670 }, - }, - { - { -2538306,7682793,32759013,263109,-29984731,-7955452,-22332124,-10188635,977108,699994 }, - { -12466472,4195084,-9211532,550904,-15565337,12917920,19118110,-439841,-30534533,-14337913 }, - { 31788461,-14507657,4799989,7372237,8808585,-14747943,9408237,-10051775,12493932,-5409317 }, - }, - { - { -25680606,5260744,-19235809,-6284470,-3695942,16566087,27218280,2607121,29375955,6024730 }, - { 842132,-2794693,-4763381,-8722815,26332018,-12405641,11831880,6985184,-9940361,2854096 }, - { -4847262,-7969331,2516242,-5847713,9695691,-7221186,16512645,960770,12121869,16648078 }, - }, - { - { -15218652,14667096,-13336229,2013717,30598287,-464137,-31504922,-7882064,20237806,2838411 }, - { -19288047,4453152,15298546,-16178388,22115043,-15972604,12544294,-13470457,1068881,-12499905 }, - { -9558883,-16518835,33238498,13506958,30505848,-1114596,-8486907,-2630053,12521378,4845654 }, - }, - { - { -28198521,10744108,-2958380,10199664,7759311,-13088600,3409348,-873400,-6482306,-12885870 }, - { -23561822,6230156,-20382013,10655314,-24040585,-11621172,10477734,-1240216,-3113227,13974498 }, - { 12966261,15550616,-32038948,-1615346,21025980,-629444,5642325,7188737,18895762,12629579 }, - }, -}, -{ - { - { 14741879,-14946887,22177208,-11721237,1279741,8058600,11758140,789443,32195181,3895677 }, - { 10758205,15755439,-4509950,9243698,-4879422,6879879,-2204575,-3566119,-8982069,4429647 }, - { -2453894,15725973,-20436342,-10410672,-5803908,-11040220,-7135870,-11642895,18047436,-15281743 }, - }, - { - { -25173001,-11307165,29759956,11776784,-22262383,-15820455,10993114,-12850837,-17620701,-9408468 }, - { 21987233,700364,-24505048,14972008,-7774265,-5718395,32155026,2581431,-29958985,8773375 }, - { -25568350,454463,-13211935,16126715,25240068,8594567,20656846,12017935,-7874389,-13920155 }, - }, - { - { 6028182,6263078,-31011806,-11301710,-818919,2461772,-31841174,-5468042,-1721788,-2776725 }, - { -12278994,16624277,987579,-5922598,32908203,1248608,7719845,-4166698,28408820,6816612 }, - { -10358094,-8237829,19549651,-12169222,22082623,16147817,20613181,13982702,-10339570,5067943 }, - }, - { - { -30505967,-3821767,12074681,13582412,-19877972,2443951,-19719286,12746132,5331210,-10105944 }, - { 30528811,3601899,-1957090,4619785,-27361822,-15436388,24180793,-12570394,27679908,-1648928 }, - { 9402404,-13957065,32834043,10838634,-26580150,-13237195,26653274,-8685565,22611444,-12715406 }, - }, - { - { 22190590,1118029,22736441,15130463,-30460692,-5991321,19189625,-4648942,4854859,6622139 }, - { -8310738,-2953450,-8262579,-3388049,-10401731,-271929,13424426,-3567227,26404409,13001963 }, - { -31241838,-15415700,-2994250,8939346,11562230,-12840670,-26064365,-11621720,-15405155,11020693 }, - }, - { - { 1866042,-7949489,-7898649,-10301010,12483315,13477547,3175636,-12424163,28761762,1406734 }, - { -448555,-1777666,13018551,3194501,-9580420,-11161737,24760585,-4347088,25577411,-13378680 }, - { -24290378,4759345,-690653,-1852816,2066747,10693769,-29595790,9884936,-9368926,4745410 }, - }, - { - { -9141284,6049714,-19531061,-4341411,-31260798,9944276,-15462008,-11311852,10931924,-11931931 }, - { -16561513,14112680,-8012645,4817318,-8040464,-11414606,-22853429,10856641,-20470770,13434654 }, - { 22759489,-10073434,-16766264,-1871422,13637442,-10168091,1765144,-12654326,28445307,-5364710 }, - }, - { - { 29875063,12493613,2795536,-3786330,1710620,15181182,-10195717,-8788675,9074234,1167180 }, - { -26205683,11014233,-9842651,-2635485,-26908120,7532294,-18716888,-9535498,3843903,9367684 }, - { -10969595,-6403711,9591134,9582310,11349256,108879,16235123,8601684,-139197,4242895 }, - }, -}, -{ - { - { 22092954,-13191123,-2042793,-11968512,32186753,-11517388,-6574341,2470660,-27417366,16625501 }, - { -11057722,3042016,13770083,-9257922,584236,-544855,-7770857,2602725,-27351616,14247413 }, - { 6314175,-10264892,-32772502,15957557,-10157730,168750,-8618807,14290061,27108877,-1180880 }, - }, - { - { -8586597,-7170966,13241782,10960156,-32991015,-13794596,33547976,-11058889,-27148451,981874 }, - { 22833440,9293594,-32649448,-13618667,-9136966,14756819,-22928859,-13970780,-10479804,-16197962 }, - { -7768587,3326786,-28111797,10783824,19178761,14905060,22680049,13906969,-15933690,3797899 }, - }, - { - { 21721356,-4212746,-12206123,9310182,-3882239,-13653110,23740224,-2709232,20491983,-8042152 }, - { 9209270,-15135055,-13256557,-6167798,-731016,15289673,25947805,15286587,30997318,-6703063 }, - { 7392032,16618386,23946583,-8039892,-13265164,-1533858,-14197445,-2321576,17649998,-250080 }, - }, - { - { -9301088,-14193827,30609526,-3049543,-25175069,-1283752,-15241566,-9525724,-2233253,7662146 }, - { -17558673,1763594,-33114336,15908610,-30040870,-12174295,7335080,-8472199,-3174674,3440183 }, - { -19889700,-5977008,-24111293,-9688870,10799743,-16571957,40450,-4431835,4862400,1133 }, - }, - { - { -32856209,-7873957,-5422389,14860950,-16319031,7956142,7258061,311861,-30594991,-7379421 }, - { -3773428,-1565936,28985340,7499440,24445838,9325937,29727763,16527196,18278453,15405622 }, - { -4381906,8508652,-19898366,-3674424,-5984453,15149970,-13313598,843523,-21875062,13626197 }, - }, - { - { 2281448,-13487055,-10915418,-2609910,1879358,16164207,-10783882,3953792,13340839,15928663 }, - { 31727126,-7179855,-18437503,-8283652,2875793,-16390330,-25269894,-7014826,-23452306,5964753 }, - { 4100420,-5959452,-17179337,6017714,-18705837,12227141,-26684835,11344144,2538215,-7570755 }, - }, - { - { -9433605,6123113,11159803,-2156608,30016280,14966241,-20474983,1485421,-629256,-15958862 }, - { -26804558,4260919,11851389,9658551,-32017107,16367492,-20205425,-13191288,11659922,-11115118 }, - { 26180396,10015009,-30844224,-8581293,5418197,9480663,2231568,-10170080,33100372,-1306171 }, - }, - { - { 15121113,-5201871,-10389905,15427821,-27509937,-15992507,21670947,4486675,-5931810,-14466380 }, - { 16166486,-9483733,-11104130,6023908,-31926798,-1364923,2340060,-16254968,-10735770,-10039824 }, - { 28042865,-3557089,-12126526,12259706,-3717498,-6945899,6766453,-8689599,18036436,5803270 }, - }, -}, -{ - { - { -817581,6763912,11803561,1585585,10958447,-2671165,23855391,4598332,-6159431,-14117438 }, - { -31031306,-14256194,17332029,-2383520,31312682,-5967183,696309,50292,-20095739,11763584 }, - { -594563,-2514283,-32234153,12643980,12650761,14811489,665117,-12613632,-19773211,-10713562 }, - }, - { - { 30464590,-11262872,-4127476,-12734478,19835327,-7105613,-24396175,2075773,-17020157,992471 }, - { 18357185,-6994433,7766382,16342475,-29324918,411174,14578841,8080033,-11574335,-10601610 }, - { 19598397,10334610,12555054,2555664,18821899,-10339780,21873263,16014234,26224780,16452269 }, - }, - { - { -30223925,5145196,5944548,16385966,3976735,2009897,-11377804,-7618186,-20533829,3698650 }, - { 14187449,3448569,-10636236,-10810935,-22663880,-3433596,7268410,-10890444,27394301,12015369 }, - { 19695761,16087646,28032085,12999827,6817792,11427614,20244189,-1312777,-13259127,-3402461 }, - }, - { - { 30860103,12735208,-1888245,-4699734,-16974906,2256940,-8166013,12298312,-8550524,-10393462 }, - { -5719826,-11245325,-1910649,15569035,26642876,-7587760,-5789354,-15118654,-4976164,12651793 }, - { -2848395,9953421,11531313,-5282879,26895123,-12697089,-13118820,-16517902,9768698,-2533218 }, - }, - { - { -24719459,1894651,-287698,-4704085,15348719,-8156530,32767513,12765450,4940095,10678226 }, - { 18860224,15980149,-18987240,-1562570,-26233012,-11071856,-7843882,13944024,-24372348,16582019 }, - { -15504260,4970268,-29893044,4175593,-20993212,-2199756,-11704054,15444560,-11003761,7989037 }, - }, - { - { 31490452,5568061,-2412803,2182383,-32336847,4531686,-32078269,6200206,-19686113,-14800171 }, - { -17308668,-15879940,-31522777,-2831,-32887382,16375549,8680158,-16371713,28550068,-6857132 }, - { -28126887,-5688091,16837845,-1820458,-6850681,12700016,-30039981,4364038,1155602,5988841 }, - }, - { - { 21890435,-13272907,-12624011,12154349,-7831873,15300496,23148983,-4470481,24618407,8283181 }, - { -33136107,-10512751,9975416,6841041,-31559793,16356536,3070187,-7025928,1466169,10740210 }, - { -1509399,-15488185,-13503385,-10655916,32799044,909394,-13938903,-5779719,-32164649,-15327040 }, - }, - { - { 3960823,-14267803,-28026090,-15918051,-19404858,13146868,15567327,951507,-3260321,-573935 }, - { 24740841,5052253,-30094131,8961361,25877428,6165135,-24368180,14397372,-7380369,-6144105 }, - { -28888365,3510803,-28103278,-1158478,-11238128,-10631454,-15441463,-14453128,-1625486,-6494814 }, - }, -}, -{ - { - { 793299,-9230478,8836302,-6235707,-27360908,-2369593,33152843,-4885251,-9906200,-621852 }, - { 5666233,525582,20782575,-8038419,-24538499,14657740,16099374,1468826,-6171428,-15186581 }, - { -4859255,-3779343,-2917758,-6748019,7778750,11688288,-30404353,-9871238,-1558923,-9863646 }, - }, - { - { 10896332,-7719704,824275,472601,-19460308,3009587,25248958,14783338,-30581476,-15757844 }, - { 10566929,12612572,-31944212,11118703,-12633376,12362879,21752402,8822496,24003793,14264025 }, - { 27713862,-7355973,-11008240,9227530,27050101,2504721,23886875,-13117525,13958495,-5732453 }, - }, - { - { -23481610,4867226,-27247128,3900521,29838369,-8212291,-31889399,-10041781,7340521,-15410068 }, - { 4646514,-8011124,-22766023,-11532654,23184553,8566613,31366726,-1381061,-15066784,-10375192 }, - { -17270517,12723032,-16993061,14878794,21619651,-6197576,27584817,3093888,-8843694,3849921 }, - }, - { - { -9064912,2103172,25561640,-15125738,-5239824,9582958,32477045,-9017955,5002294,-15550259 }, - { -12057553,-11177906,21115585,-13365155,8808712,-12030708,16489530,13378448,-25845716,12741426 }, - { -5946367,10645103,-30911586,15390284,-3286982,-7118677,24306472,15852464,28834118,-7646072 }, - }, - { - { -17335748,-9107057,-24531279,9434953,-8472084,-583362,-13090771,455841,20461858,5491305 }, - { 13669248,-16095482,-12481974,-10203039,-14569770,-11893198,-24995986,11293807,-28588204,-9421832 }, - { 28497928,6272777,-33022994,14470570,8906179,-1225630,18504674,-14165166,29867745,-8795943 }, - }, - { - { -16207023,13517196,-27799630,-13697798,24009064,-6373891,-6367600,-13175392,22853429,-4012011 }, - { 24191378,16712145,-13931797,15217831,14542237,1646131,18603514,-11037887,12876623,-2112447 }, - { 17902668,4518229,-411702,-2829247,26878217,5258055,-12860753,608397,16031844,3723494 }, - }, - { - { -28632773,12763728,-20446446,7577504,33001348,-13017745,17558842,-7872890,23896954,-4314245 }, - { -20005381,-12011952,31520464,605201,2543521,5991821,-2945064,7229064,-9919646,-8826859 }, - { 28816045,298879,-28165016,-15920938,19000928,-1665890,-12680833,-2949325,-18051778,-2082915 }, - }, - { - { 16000882,-344896,3493092,-11447198,-29504595,-13159789,12577740,16041268,-19715240,7847707 }, - { 10151868,10572098,27312476,7922682,14825339,4723128,-32855931,-6519018,-10020567,3852848 }, - { -11430470,15697596,-21121557,-4420647,5386314,15063598,16514493,-15932110,29330899,-15076224 }, - }, -}, -{ - { - { -25499735,-4378794,-15222908,-6901211,16615731,2051784,3303702,15490,-27548796,12314391 }, - { 15683520,-6003043,18109120,-9980648,15337968,-5997823,-16717435,15921866,16103996,-3731215 }, - { -23169824,-10781249,13588192,-1628807,-3798557,-1074929,-19273607,5402699,-29815713,-9841101 }, - }, - { - { 23190676,2384583,-32714340,3462154,-29903655,-1529132,-11266856,8911517,-25205859,2739713 }, - { 21374101,-3554250,-33524649,9874411,15377179,11831242,-33529904,6134907,4931255,11987849 }, - { -7732,-2978858,-16223486,7277597,105524,-322051,-31480539,13861388,-30076310,10117930 }, - }, - { - { -29501170,-10744872,-26163768,13051539,-25625564,5089643,-6325503,6704079,12890019,15728940 }, - { -21972360,-11771379,-951059,-4418840,14704840,2695116,903376,-10428139,12885167,8311031 }, - { -17516482,5352194,10384213,-13811658,7506451,13453191,26423267,4384730,1888765,-5435404 }, - }, - { - { -25817338,-3107312,-13494599,-3182506,30896459,-13921729,-32251644,-12707869,-19464434,-3340243 }, - { -23607977,-2665774,-526091,4651136,5765089,4618330,6092245,14845197,17151279,-9854116 }, - { -24830458,-12733720,-15165978,10367250,-29530908,-265356,22825805,-7087279,-16866484,16176525 }, - }, - { - { -23583256,6564961,20063689,3798228,-4740178,7359225,2006182,-10363426,-28746253,-10197509 }, - { -10626600,-4486402,-13320562,-5125317,3432136,-6393229,23632037,-1940610,32808310,1099883 }, - { 15030977,5768825,-27451236,-2887299,-6427378,-15361371,-15277896,-6809350,2051441,-15225865 }, - }, - { - { -3362323,-7239372,7517890,9824992,23555850,295369,5148398,-14154188,-22686354,16633660 }, - { 4577086,-16752288,13249841,-15304328,19958763,-14537274,18559670,-10759549,8402478,-9864273 }, - { -28406330,-1051581,-26790155,-907698,-17212414,-11030789,9453451,-14980072,17983010,9967138 }, - }, - { - { -25762494,6524722,26585488,9969270,24709298,1220360,-1677990,7806337,17507396,3651560 }, - { -10420457,-4118111,14584639,15971087,-15768321,8861010,26556809,-5574557,-18553322,-11357135 }, - { 2839101,14284142,4029895,3472686,14402957,12689363,-26642121,8459447,-5605463,-7621941 }, - }, - { - { -4839289,-3535444,9744961,2871048,25113978,3187018,-25110813,-849066,17258084,-7977739 }, - { 18164541,-10595176,-17154882,-1542417,19237078,-9745295,23357533,-15217008,26908270,12150756 }, - { -30264870,-7647865,5112249,-7036672,-1499807,-6974257,43168,-5537701,-32302074,16215819 }, - }, -}, -{ - { - { -6898905,9824394,-12304779,-4401089,-31397141,-6276835,32574489,12532905,-7503072,-8675347 }, - { -27343522,-16515468,-27151524,-10722951,946346,16291093,254968,7168080,21676107,-1943028 }, - { 21260961,-8424752,-16831886,-11920822,-23677961,3968121,-3651949,-6215466,-3556191,-7913075 }, - }, - { - { 16544754,13250366,-16804428,15546242,-4583003,12757258,-2462308,-8680336,-18907032,-9662799 }, - { -2415239,-15577728,18312303,4964443,-15272530,-12653564,26820651,16690659,25459437,-4564609 }, - { -25144690,11425020,28423002,-11020557,-6144921,-15826224,9142795,-2391602,-6432418,-1644817 }, - }, - { - { -23104652,6253476,16964147,-3768872,-25113972,-12296437,-27457225,-16344658,6335692,7249989 }, - { -30333227,13979675,7503222,-12368314,-11956721,-4621693,-30272269,2682242,25993170,-12478523 }, - { 4364628,5930691,32304656,-10044554,-8054781,15091131,22857016,-10598955,31820368,15075278 }, - }, - { - { 31879134,-8918693,17258761,90626,-8041836,-4917709,24162788,-9650886,-17970238,12833045 }, - { 19073683,14851414,-24403169,-11860168,7625278,11091125,-19619190,2074449,-9413939,14905377 }, - { 24483667,-11935567,-2518866,-11547418,-1553130,15355506,-25282080,9253129,27628530,-7555480 }, - }, - { - { 17597607,8340603,19355617,552187,26198470,-3176583,4593324,-9157582,-14110875,15297016 }, - { 510886,14337390,-31785257,16638632,6328095,2713355,-20217417,-11864220,8683221,2921426 }, - { 18606791,11874196,27155355,-5281482,-24031742,6265446,-25178240,-1278924,4674690,13890525 }, - }, - { - { 13609624,13069022,-27372361,-13055908,24360586,9592974,14977157,9835105,4389687,288396 }, - { 9922506,-519394,13613107,5883594,-18758345,-434263,-12304062,8317628,23388070,16052080 }, - { 12720016,11937594,-31970060,-5028689,26900120,8561328,-20155687,-11632979,-14754271,-10812892 }, - }, - { - { 15961858,14150409,26716931,-665832,-22794328,13603569,11829573,7467844,-28822128,929275 }, - { 11038231,-11582396,-27310482,-7316562,-10498527,-16307831,-23479533,-9371869,-21393143,2465074 }, - { 20017163,-4323226,27915242,1529148,12396362,15675764,13817261,-9658066,2463391,-4622140 }, - }, - { - { -16358878,-12663911,-12065183,4996454,-1256422,1073572,9583558,12851107,4003896,12673717 }, - { -1731589,-15155870,-3262930,16143082,19294135,13385325,14741514,-9103726,7903886,2348101 }, - { 24536016,-16515207,12715592,-3862155,1511293,10047386,-3842346,-7129159,-28377538,10048127 }, - }, -}, -{ - { - { -12622226,-6204820,30718825,2591312,-10617028,12192840,18873298,-7297090,-32297756,15221632 }, - { -26478122,-11103864,11546244,-1852483,9180880,7656409,-21343950,2095755,29769758,6593415 }, - { -31994208,-2907461,4176912,3264766,12538965,-868111,26312345,-6118678,30958054,8292160 }, - }, - { - { 31429822,-13959116,29173532,15632448,12174511,-2760094,32808831,3977186,26143136,-3148876 }, - { 22648901,1402143,-22799984,13746059,7936347,365344,-8668633,-1674433,-3758243,-2304625 }, - { -15491917,8012313,-2514730,-12702462,-23965846,-10254029,-1612713,-1535569,-16664475,8194478 }, - }, - { - { 27338066,-7507420,-7414224,10140405,-19026427,-6589889,27277191,8855376,28572286,3005164 }, - { 26287124,4821776,25476601,-4145903,-3764513,-15788984,-18008582,1182479,-26094821,-13079595 }, - { -7171154,3178080,23970071,6201893,-17195577,-4489192,-21876275,-13982627,32208683,-1198248 }, - }, - { - { -16657702,2817643,-10286362,14811298,6024667,13349505,-27315504,-10497842,-27672585,-11539858 }, - { 15941029,-9405932,-21367050,8062055,31876073,-238629,-15278393,-1444429,15397331,-4130193 }, - { 8934485,-13485467,-23286397,-13423241,-32446090,14047986,31170398,-1441021,-27505566,15087184 }, - }, - { - { -18357243,-2156491,24524913,-16677868,15520427,-6360776,-15502406,11461896,16788528,-5868942 }, - { -1947386,16013773,21750665,3714552,-17401782,-16055433,-3770287,-10323320,31322514,-11615635 }, - { 21426655,-5650218,-13648287,-5347537,-28812189,-4920970,-18275391,-14621414,13040862,-12112948 }, - }, - { - { 11293895,12478086,-27136401,15083750,-29307421,14748872,14555558,-13417103,1613711,4896935 }, - { -25894883,15323294,-8489791,-8057900,25967126,-13425460,2825960,-4897045,-23971776,-11267415 }, - { -15924766,-5229880,-17443532,6410664,3622847,10243618,20615400,12405433,-23753030,-8436416 }, - }, - { - { -7091295,12556208,-20191352,9025187,-17072479,4333801,4378436,2432030,23097949,-566018 }, - { 4565804,-16025654,20084412,-7842817,1724999,189254,24767264,10103221,-18512313,2424778 }, - { 366633,-11976806,8173090,-6890119,30788634,5745705,-7168678,1344109,-3642553,12412659 }, - }, - { - { -24001791,7690286,14929416,-168257,-32210835,-13412986,24162697,-15326504,-3141501,11179385 }, - { 18289522,-14724954,8056945,16430056,-21729724,7842514,-6001441,-1486897,-18684645,-11443503 }, - { 476239,6601091,-6152790,-9723375,17503545,-4863900,27672959,13403813,11052904,5219329 }, - }, -}, -{ - { - { 20678546,-8375738,-32671898,8849123,-5009758,14574752,31186971,-3973730,9014762,-8579056 }, - { -13644050,-10350239,-15962508,5075808,-1514661,-11534600,-33102500,9160280,8473550,-3256838 }, - { 24900749,14435722,17209120,-15292541,-22592275,9878983,-7689309,-16335821,-24568481,11788948 }, - }, - { - { -3118155,-11395194,-13802089,14797441,9652448,-6845904,-20037437,10410733,-24568470,-1458691 }, - { -15659161,16736706,-22467150,10215878,-9097177,7563911,11871841,-12505194,-18513325,8464118 }, - { -23400612,8348507,-14585951,-861714,-3950205,-6373419,14325289,8628612,33313881,-8370517 }, - }, - { - { -20186973,-4967935,22367356,5271547,-1097117,-4788838,-24805667,-10236854,-8940735,-5818269 }, - { -6948785,-1795212,-32625683,-16021179,32635414,-7374245,15989197,-12838188,28358192,-4253904 }, - { -23561781,-2799059,-32351682,-1661963,-9147719,10429267,-16637684,4072016,-5351664,5596589 }, - }, - { - { -28236598,-3390048,12312896,6213178,3117142,16078565,29266239,2557221,1768301,15373193 }, - { -7243358,-3246960,-4593467,-7553353,-127927,-912245,-1090902,-4504991,-24660491,3442910 }, - { -30210571,5124043,14181784,8197961,18964734,-11939093,22597931,7176455,-18585478,13365930 }, - }, - { - { -7877390,-1499958,8324673,4690079,6261860,890446,24538107,-8570186,-9689599,-3031667 }, - { 25008904,-10771599,-4305031,-9638010,16265036,15721635,683793,-11823784,15723479,-15163481 }, - { -9660625,12374379,-27006999,-7026148,-7724114,-12314514,11879682,5400171,519526,-1235876 }, - }, - { - { 22258397,-16332233,-7869817,14613016,-22520255,-2950923,-20353881,7315967,16648397,7605640 }, - { -8081308,-8464597,-8223311,9719710,19259459,-15348212,23994942,-5281555,-9468848,4763278 }, - { -21699244,9220969,-15730624,1084137,-25476107,-2852390,31088447,-7764523,-11356529,728112 }, - }, - { - { 26047220,-11751471,-6900323,-16521798,24092068,9158119,-4273545,-12555558,-29365436,-5498272 }, - { 17510331,-322857,5854289,8403524,17133918,-3112612,-28111007,12327945,10750447,10014012 }, - { -10312768,3936952,9156313,-8897683,16498692,-994647,-27481051,-666732,3424691,7540221 }, - }, - { - { 30322361,-6964110,11361005,-4143317,7433304,4989748,-7071422,-16317219,-9244265,15258046 }, - { 13054562,-2779497,19155474,469045,-12482797,4566042,5631406,2711395,1062915,-5136345 }, - { -19240248,-11254599,-29509029,-7499965,-5835763,13005411,-6066489,12194497,32960380,1459310 }, - }, -}, -{ - { - { 19852034,7027924,23669353,10020366,8586503,-6657907,394197,-6101885,18638003,-11174937 }, - { 31395534,15098109,26581030,8030562,-16527914,-5007134,9012486,-7584354,-6643087,-5442636 }, - { -9192165,-2347377,-1997099,4529534,25766844,607986,-13222,9677543,-32294889,-6456008 }, - }, - { - { -2444496,-149937,29348902,8186665,1873760,12489863,-30934579,-7839692,-7852844,-8138429 }, - { -15236356,-15433509,7766470,746860,26346930,-10221762,-27333451,10754588,-9431476,5203576 }, - { 31834314,14135496,-770007,5159118,20917671,-16768096,-7467973,-7337524,31809243,7347066 }, - }, - { - { -9606723,-11874240,20414459,13033986,13716524,-11691881,19797970,-12211255,15192876,-2087490 }, - { -12663563,-2181719,1168162,-3804809,26747877,-14138091,10609330,12694420,33473243,-13382104 }, - { 33184999,11180355,15832085,-11385430,-1633671,225884,15089336,-11023903,-6135662,14480053 }, - }, - { - { 31308717,-5619998,31030840,-1897099,15674547,-6582883,5496208,13685227,27595050,8737275 }, - { -20318852,-15150239,10933843,-16178022,8335352,-7546022,-31008351,-12610604,26498114,66511 }, - { 22644454,-8761729,-16671776,4884562,-3105614,-13559366,30540766,-4286747,-13327787,-7515095 }, - }, - { - { -28017847,9834845,18617207,-2681312,-3401956,-13307506,8205540,13585437,-17127465,15115439 }, - { 23711543,-672915,31206561,-8362711,6164647,-9709987,-33535882,-1426096,8236921,16492939 }, - { -23910559,-13515526,-26299483,-4503841,25005590,-7687270,19574902,10071562,6708380,-6222424 }, - }, - { - { 2101391,-4930054,19702731,2367575,-15427167,1047675,5301017,9328700,29955601,-11678310 }, - { 3096359,9271816,-21620864,-15521844,-14847996,-7592937,-25892142,-12635595,-9917575,6216608 }, - { -32615849,338663,-25195611,2510422,-29213566,-13820213,24822830,-6146567,-26767480,7525079 }, - }, - { - { -23066649,-13985623,16133487,-7896178,-3389565,778788,-910336,-2782495,-19386633,11994101 }, - { 21691500,-13624626,-641331,-14367021,3285881,-3483596,-25064666,9718258,-7477437,13381418 }, - { 18445390,-4202236,14979846,11622458,-1727110,-3582980,23111648,-6375247,28535282,15779576 }, - }, - { - { 30098053,3089662,-9234387,16662135,-21306940,11308411,-14068454,12021730,9955285,-16303356 }, - { 9734894,-14576830,-7473633,-9138735,2060392,11313496,-18426029,9924399,20194861,13380996 }, - { -26378102,-7965207,-22167821,15789297,-18055342,-6168792,-1984914,15707771,26342023,10146099 }, - }, -}, -{ - { - { -26016874,-219943,21339191,-41388,19745256,-2878700,-29637280,2227040,21612326,-545728 }, - { -13077387,1184228,23562814,-5970442,-20351244,-6348714,25764461,12243797,-20856566,11649658 }, - { -10031494,11262626,27384172,2271902,26947504,-15997771,39944,6114064,33514190,2333242 }, - }, - { - { -21433588,-12421821,8119782,7219913,-21830522,-9016134,-6679750,-12670638,24350578,-13450001 }, - { -4116307,-11271533,-23886186,4843615,-30088339,690623,-31536088,-10406836,8317860,12352766 }, - { 18200138,-14475911,-33087759,-2696619,-23702521,-9102511,-23552096,-2287550,20712163,6719373 }, - }, - { - { 26656208,6075253,-7858556,1886072,-28344043,4262326,11117530,-3763210,26224235,-3297458 }, - { -17168938,-14854097,-3395676,-16369877,-19954045,14050420,21728352,9493610,18620611,-16428628 }, - { -13323321,13325349,11432106,5964811,18609221,6062965,-5269471,-9725556,-30701573,-16479657 }, - }, - { - { -23860538,-11233159,26961357,1640861,-32413112,-16737940,12248509,-5240639,13735342,1934062 }, - { 25089769,6742589,17081145,-13406266,21909293,-16067981,-15136294,-3765346,-21277997,5473616 }, - { 31883677,-7961101,1083432,-11572403,22828471,13290673,-7125085,12469656,29111212,-5451014 }, - }, - { - { 24244947,-15050407,-26262976,2791540,-14997599,16666678,24367466,6388839,-10295587,452383 }, - { -25640782,-3417841,5217916,16224624,19987036,-4082269,-24236251,-5915248,15766062,8407814 }, - { -20406999,13990231,15495425,16395525,5377168,15166495,-8917023,-4388953,-8067909,2276718 }, - }, - { - { 30157918,12924066,-17712050,9245753,19895028,3368142,-23827587,5096219,22740376,-7303417 }, - { 2041139,-14256350,7783687,13876377,-25946985,-13352459,24051124,13742383,-15637599,13295222 }, - { 33338237,-8505733,12532113,7977527,9106186,-1715251,-17720195,-4612972,-4451357,-14669444 }, - }, - { - { -20045281,5454097,-14346548,6447146,28862071,1883651,-2469266,-4141880,7770569,9620597 }, - { 23208068,7979712,33071466,8149229,1758231,-10834995,30945528,-1694323,-33502340,-14767970 }, - { 1439958,-16270480,-1079989,-793782,4625402,10647766,-5043801,1220118,30494170,-11440799 }, - }, - { - { -5037580,-13028295,-2970559,-3061767,15640974,-6701666,-26739026,926050,-1684339,-13333647 }, - { 13908495,-3549272,30919928,-6273825,-21521863,7989039,9021034,9078865,3353509,4033511 }, - { -29663431,-15113610,32259991,-344482,24295849,-12912123,23161163,8839127,27485041,7356032 }, - }, -}, -{ - { - { 9661027,705443,11980065,-5370154,-1628543,14661173,-6346142,2625015,28431036,-16771834 }, - { -23839233,-8311415,-25945511,7480958,-17681669,-8354183,-22545972,14150565,15970762,4099461 }, - { 29262576,16756590,26350592,-8793563,8529671,-11208050,13617293,-9937143,11465739,8317062 }, - }, - { - { -25493081,-6962928,32500200,-9419051,-23038724,-2302222,14898637,3848455,20969334,-5157516 }, - { -20384450,-14347713,-18336405,13884722,-33039454,2842114,-21610826,-3649888,11177095,14989547 }, - { -24496721,-11716016,16959896,2278463,12066309,10137771,13515641,2581286,-28487508,9930240 }, - }, - { - { -17751622,-2097826,16544300,-13009300,-15914807,-14949081,18345767,-13403753,16291481,-5314038 }, - { -33229194,2553288,32678213,9875984,8534129,6889387,-9676774,6957617,4368891,9788741 }, - { 16660756,7281060,-10830758,12911820,20108584,-8101676,-21722536,-8613148,16250552,-11111103 }, - }, - { - { -19765507,2390526,-16551031,14161980,1905286,6414907,4689584,10604807,-30190403,4782747 }, - { -1354539,14736941,-7367442,-13292886,7710542,-14155590,-9981571,4383045,22546403,437323 }, - { 31665577,-12180464,-16186830,1491339,-18368625,3294682,27343084,2786261,-30633590,-14097016 }, - }, - { - { -14467279,-683715,-33374107,7448552,19294360,14334329,-19690631,2355319,-19284671,-6114373 }, - { 15121312,-15796162,6377020,-6031361,-10798111,-12957845,18952177,15496498,-29380133,11754228 }, - { -2637277,-13483075,8488727,-14303896,12728761,-1622493,7141596,11724556,22761615,-10134141 }, - }, - { - { 16918416,11729663,-18083579,3022987,-31015732,-13339659,-28741185,-12227393,32851222,11717399 }, - { 11166634,7338049,-6722523,4531520,-29468672,-7302055,31474879,3483633,-1193175,-4030831 }, - { -185635,9921305,31456609,-13536438,-12013818,13348923,33142652,6546660,-19985279,-3948376 }, - }, - { - { -32460596,11266712,-11197107,-7899103,31703694,3855903,-8537131,-12833048,-30772034,-15486313 }, - { -18006477,12709068,3991746,-6479188,-21491523,-10550425,-31135347,-16049879,10928917,3011958 }, - { -6957757,-15594337,31696059,334240,29576716,14796075,-30831056,-12805180,18008031,10258577 }, - }, - { - { -22448644,15655569,7018479,-4410003,-30314266,-1201591,-1853465,1367120,25127874,6671743 }, - { 29701166,-14373934,-10878120,9279288,-17568,13127210,21382910,11042292,25838796,4642684 }, - { -20430234,14955537,-24126347,8124619,-5369288,-5990470,30468147,-13900640,18423289,4177476 }, - }, -}, -}; diff --git a/src/libcryptobox/curve25519/base_constants2.h b/src/libcryptobox/curve25519/base_constants2.h deleted file mode 100644 index 2fe11e29b..000000000 --- a/src/libcryptobox/curve25519/base_constants2.h +++ /dev/null @@ -1,42 +0,0 @@ -static const ge_precomp Bi[8] = { -{ - { 25967493,-14356035,29566456,3660896,-12694345,4014787,27544626,-11754271,-6079156,2047605 }, - { -12545711,934262,-2722910,3049990,-727428,9406986,12720692,5043384,19500929,-15469378 }, - { -8738181,4489570,9688441,-14785194,10184609,-12363380,29287919,11864899,-24514362,-4438546 }, - }, - { - { 15636291,-9688557,24204773,-7912398,616977,-16685262,27787600,-14772189,28944400,-1550024 }, - { 16568933,4717097,-11556148,-1102322,15682896,-11807043,16354577,-11775962,7689662,11199574 }, - { 30464156,-5976125,-11779434,-15670865,23220365,15915852,7512774,10017326,-17749093,-9920357 }, - }, - { - { 10861363,11473154,27284546,1981175,-30064349,12577861,32867885,14515107,-15438304,10819380 }, - { 4708026,6336745,20377586,9066809,-11272109,6594696,-25653668,12483688,-12668491,5581306 }, - { 19563160,16186464,-29386857,4097519,10237984,-4348115,28542350,13850243,-23678021,-15815942 }, - }, - { - { 5153746,9909285,1723747,-2777874,30523605,5516873,19480852,5230134,-23952439,-15175766 }, - { -30269007,-3463509,7665486,10083793,28475525,1649722,20654025,16520125,30598449,7715701 }, - { 28881845,14381568,9657904,3680757,-20181635,7843316,-31400660,1370708,29794553,-1409300 }, - }, - { - { -22518993,-6692182,14201702,-8745502,-23510406,8844726,18474211,-1361450,-13062696,13821877 }, - { -6455177,-7839871,3374702,-4740862,-27098617,-10571707,31655028,-7212327,18853322,-14220951 }, - { 4566830,-12963868,-28974889,-12240689,-7602672,-2830569,-8514358,-10431137,2207753,-3209784 }, - }, - { - { -25154831,-4185821,29681144,7868801,-6854661,-9423865,-12437364,-663000,-31111463,-16132436 }, - { 25576264,-2703214,7349804,-11814844,16472782,9300885,3844789,15725684,171356,6466918 }, - { 23103977,13316479,9739013,-16149481,817875,-15038942,8965339,-14088058,-30714912,16193877 }, - }, - { - { -33521811,3180713,-2394130,14003687,-16903474,-16270840,17238398,4729455,-18074513,9256800 }, - { -25182317,-4174131,32336398,5036987,-21236817,11360617,22616405,9761698,-19827198,630305 }, - { -13720693,2639453,-24237460,-7406481,9494427,-5774029,-6554551,-15960994,-2449256,-14291300 }, - }, - { - { -3151181,-5046075,9282714,6866145,-31907062,-863023,-18940575,15033784,25105118,-7894876 }, - { -24326370,15950226,-31801215,-14592823,-11662737,-5090925,1573892,-2625887,2198790,-15804619 }, - { -3099351,10324967,-2241613,7453183,-5446979,-2735503,-13812022,-16236442,-32461234,-12290683 }, - }, -}; diff --git a/src/libcryptobox/curve25519/constants.S b/src/libcryptobox/curve25519/constants.S deleted file mode 100644 index c8e048c3f..000000000 --- a/src/libcryptobox/curve25519/constants.S +++ /dev/null @@ -1,20 +0,0 @@ -.data - -.p2align 4 - -v0_0: .quad 0, 0 -v1_0: .quad 1, 0 -v2_1: .quad 2, 1 -v19_19: .quad 19, 19 -v9_0: .quad 9, 0 -v9_9: .quad 9, 9 -v38_1: .quad 38, 1 -v38_38: .quad 38, 38 -v121666_121666: .quad 121666, 121666 -m25: .quad 33554431, 33554431 -m26: .quad 67108863, 67108863 - -subc0: .quad 0x07FFFFDA, 0x03FFFFFE -subc2: .quad 0x07FFFFFE, 0x03FFFFFE - -REDMASK51: .quad 0x0007FFFFFFFFFFFF diff --git a/src/libcryptobox/curve25519/curve25519-donna-c64.c b/src/libcryptobox/curve25519/curve25519-donna-c64.c deleted file mode 100644 index c83b27ecb..000000000 --- a/src/libcryptobox/curve25519/curve25519-donna-c64.c +++ /dev/null @@ -1,505 +0,0 @@ -/* Copyright 2008, Google Inc. - * All rights reserved. - * - * Code released into the public domain. - * - * curve25519-donna: Curve25519 elliptic curve, public key function - * - * http://code.google.com/p/curve25519-donna/ - * - * Adam Langley <agl@imperialviolet.org> - * - * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to> - * - * More information about curve25519 can be found here - * http://cr.yp.to/ecdh.html - * - * djb's sample implementation of curve25519 is written in a special assembly - * language called qhasm and uses the floating point registers. - * - * This is, almost, a clean room reimplementation from the curve25519 paper. It - * uses many of the tricks described therein. Only the crecip function is taken - * from the sample implementation. - */ - -#include <string.h> -#include <stdint.h> -#include "curve25519.h" - -typedef uint8_t u8; -typedef uint64_t limb; -typedef limb felem[5]; -// This is a special gcc mode for 128-bit integers. It's implemented on 64-bit -// platforms only as far as I know. -typedef unsigned uint128_t __attribute__((mode(TI))); - -#undef force_inline -#define force_inline __attribute__((always_inline)) - -/* Sum two numbers: output += in */ -static inline void force_inline -fsum (limb *output, const limb *in) -{ - output[0] += in[0]; - output[1] += in[1]; - output[2] += in[2]; - output[3] += in[3]; - output[4] += in[4]; -} - -/* Find the difference of two numbers: output = in - output - * (note the order of the arguments!) - * - * Assumes that out[i] < 2**52 - * On return, out[i] < 2**55 - */ -static inline void force_inline -fdifference_backwards (felem out, const felem in) -{ - /* 152 is 19 << 3 */ - static const limb two54m152 = (((limb) 1) << 54) - 152; - static const limb two54m8 = (((limb) 1) << 54) - 8; - - out[0] = in[0] + two54m152 - out[0]; - out[1] = in[1] + two54m8 - out[1]; - out[2] = in[2] + two54m8 - out[2]; - out[3] = in[3] + two54m8 - out[3]; - out[4] = in[4] + two54m8 - out[4]; -} - -/* Multiply a number by a scalar: output = in * scalar */ -static inline void force_inline -fscalar_product (felem output, const felem in, const limb scalar) -{ - uint128_t a; - - a = ((uint128_t) in[0]) * scalar; - output[0] = ((limb) a) & 0x7ffffffffffff; - - a = ((uint128_t) in[1]) * scalar + ((limb) (a >> 51)); - output[1] = ((limb) a) & 0x7ffffffffffff; - - a = ((uint128_t) in[2]) * scalar + ((limb) (a >> 51)); - output[2] = ((limb) a) & 0x7ffffffffffff; - - a = ((uint128_t) in[3]) * scalar + ((limb) (a >> 51)); - output[3] = ((limb) a) & 0x7ffffffffffff; - - a = ((uint128_t) in[4]) * scalar + ((limb) (a >> 51)); - output[4] = ((limb) a) & 0x7ffffffffffff; - - output[0] += (a >> 51) * 19; -} - -/* Multiply two numbers: output = in2 * in - * - * output must be distinct to both inputs. The inputs are reduced coefficient - * form, the output is not. - * - * Assumes that in[i] < 2**55 and likewise for in2. - * On return, output[i] < 2**52 - */ -static inline void force_inline -fmul (felem output, const felem in2, const felem in) -{ - uint128_t t[5]; - limb r0, r1, r2, r3, r4, s0, s1, s2, s3, s4, c; - - r0 = in[0]; - r1 = in[1]; - r2 = in[2]; - r3 = in[3]; - r4 = in[4]; - - s0 = in2[0]; - s1 = in2[1]; - s2 = in2[2]; - s3 = in2[3]; - s4 = in2[4]; - - t[0] = ((uint128_t) r0) * s0; - t[1] = ((uint128_t) r0) * s1 + ((uint128_t) r1) * s0; - t[2] = ((uint128_t) r0) * s2 + ((uint128_t) r2) * s0 - + ((uint128_t) r1) * s1; - t[3] = ((uint128_t) r0) * s3 + ((uint128_t) r3) * s0 + ((uint128_t) r1) * s2 - + ((uint128_t) r2) * s1; - t[4] = ((uint128_t) r0) * s4 + ((uint128_t) r4) * s0 + ((uint128_t) r3) * s1 - + ((uint128_t) r1) * s3 + ((uint128_t) r2) * s2; - - r4 *= 19; - r1 *= 19; - r2 *= 19; - r3 *= 19; - - t[0] += ((uint128_t) r4) * s1 + ((uint128_t) r1) * s4 - + ((uint128_t) r2) * s3 + ((uint128_t) r3) * s2; - t[1] += ((uint128_t) r4) * s2 + ((uint128_t) r2) * s4 - + ((uint128_t) r3) * s3; - t[2] += ((uint128_t) r4) * s3 + ((uint128_t) r3) * s4; - t[3] += ((uint128_t) r4) * s4; - - r0 = (limb) t[0] & 0x7ffffffffffff; - c = (limb) (t[0] >> 51); - t[1] += c; - r1 = (limb) t[1] & 0x7ffffffffffff; - c = (limb) (t[1] >> 51); - t[2] += c; - r2 = (limb) t[2] & 0x7ffffffffffff; - c = (limb) (t[2] >> 51); - t[3] += c; - r3 = (limb) t[3] & 0x7ffffffffffff; - c = (limb) (t[3] >> 51); - t[4] += c; - r4 = (limb) t[4] & 0x7ffffffffffff; - c = (limb) (t[4] >> 51); - r0 += c * 19; - c = r0 >> 51; - r0 = r0 & 0x7ffffffffffff; - r1 += c; - c = r1 >> 51; - r1 = r1 & 0x7ffffffffffff; - r2 += c; - - output[0] = r0; - output[1] = r1; - output[2] = r2; - output[3] = r3; - output[4] = r4; -} - -static inline void force_inline -fsquare_times (felem output, const felem in, limb count) -{ - uint128_t t[5]; - limb r0, r1, r2, r3, r4, c; - limb d0, d1, d2, d4, d419; - - r0 = in[0]; - r1 = in[1]; - r2 = in[2]; - r3 = in[3]; - r4 = in[4]; - - do { - d0 = r0 * 2; - d1 = r1 * 2; - d2 = r2 * 2 * 19; - d419 = r4 * 19; - d4 = d419 * 2; - - t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 - + (((uint128_t) d2) * (r3)); - t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 - + (((uint128_t) r3) * (r3 * 19)); - t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 - + (((uint128_t) d4) * (r3)); - t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 - + (((uint128_t) r4) * (d419)); - t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 - + (((uint128_t) r2) * (r2)); - - r0 = (limb) t[0] & 0x7ffffffffffff; - c = (limb) (t[0] >> 51); - t[1] += c; - r1 = (limb) t[1] & 0x7ffffffffffff; - c = (limb) (t[1] >> 51); - t[2] += c; - r2 = (limb) t[2] & 0x7ffffffffffff; - c = (limb) (t[2] >> 51); - t[3] += c; - r3 = (limb) t[3] & 0x7ffffffffffff; - c = (limb) (t[3] >> 51); - t[4] += c; - r4 = (limb) t[4] & 0x7ffffffffffff; - c = (limb) (t[4] >> 51); - r0 += c * 19; - c = r0 >> 51; - r0 = r0 & 0x7ffffffffffff; - r1 += c; - c = r1 >> 51; - r1 = r1 & 0x7ffffffffffff; - r2 += c; - } while (--count); - - output[0] = r0; - output[1] = r1; - output[2] = r2; - output[3] = r3; - output[4] = r4; -} - -/* Load a little-endian 64-bit number */ -static limb load_limb (const u8 *in) -{ - return ((limb) in[0]) | (((limb) in[1]) << 8) | (((limb) in[2]) << 16) - | (((limb) in[3]) << 24) | (((limb) in[4]) << 32) - | (((limb) in[5]) << 40) | (((limb) in[6]) << 48) - | (((limb) in[7]) << 56); -} - -static void store_limb (u8 *out, limb in) -{ - out[0] = in & 0xff; - out[1] = (in >> 8) & 0xff; - out[2] = (in >> 16) & 0xff; - out[3] = (in >> 24) & 0xff; - out[4] = (in >> 32) & 0xff; - out[5] = (in >> 40) & 0xff; - out[6] = (in >> 48) & 0xff; - out[7] = (in >> 56) & 0xff; -} - -/* Take a little-endian, 32-byte number and expand it into polynomial form */ -static void fexpand (limb *output, const u8 *in) -{ - output[0] = load_limb (in) & 0x7ffffffffffff; - output[1] = (load_limb (in + 6) >> 3) & 0x7ffffffffffff; - output[2] = (load_limb (in + 12) >> 6) & 0x7ffffffffffff; - output[3] = (load_limb (in + 19) >> 1) & 0x7ffffffffffff; - output[4] = (load_limb (in + 24) >> 12) & 0x7ffffffffffff; -} - -/* Take a fully reduced polynomial form number and contract it into a - * little-endian, 32-byte array - */ -static void fcontract (u8 *output, const felem input) -{ - uint128_t t[5]; - - t[0] = input[0]; - t[1] = input[1]; - t[2] = input[2]; - t[3] = input[3]; - t[4] = input[4]; - - t[1] += t[0] >> 51; - t[0] &= 0x7ffffffffffff; - t[2] += t[1] >> 51; - t[1] &= 0x7ffffffffffff; - t[3] += t[2] >> 51; - t[2] &= 0x7ffffffffffff; - t[4] += t[3] >> 51; - t[3] &= 0x7ffffffffffff; - t[0] += 19 * (t[4] >> 51); - t[4] &= 0x7ffffffffffff; - - t[1] += t[0] >> 51; - t[0] &= 0x7ffffffffffff; - t[2] += t[1] >> 51; - t[1] &= 0x7ffffffffffff; - t[3] += t[2] >> 51; - t[2] &= 0x7ffffffffffff; - t[4] += t[3] >> 51; - t[3] &= 0x7ffffffffffff; - t[0] += 19 * (t[4] >> 51); - t[4] &= 0x7ffffffffffff; - - /* now t is between 0 and 2^255-1, properly carried. */ - /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */ - - t[0] += 19; - - t[1] += t[0] >> 51; - t[0] &= 0x7ffffffffffff; - t[2] += t[1] >> 51; - t[1] &= 0x7ffffffffffff; - t[3] += t[2] >> 51; - t[2] &= 0x7ffffffffffff; - t[4] += t[3] >> 51; - t[3] &= 0x7ffffffffffff; - t[0] += 19 * (t[4] >> 51); - t[4] &= 0x7ffffffffffff; - - /* now between 19 and 2^255-1 in both cases, and offset by 19. */ - - t[0] += 0x8000000000000 - 19; - t[1] += 0x8000000000000 - 1; - t[2] += 0x8000000000000 - 1; - t[3] += 0x8000000000000 - 1; - t[4] += 0x8000000000000 - 1; - - /* now between 2^255 and 2^256-20, and offset by 2^255. */ - - t[1] += t[0] >> 51; - t[0] &= 0x7ffffffffffff; - t[2] += t[1] >> 51; - t[1] &= 0x7ffffffffffff; - t[3] += t[2] >> 51; - t[2] &= 0x7ffffffffffff; - t[4] += t[3] >> 51; - t[3] &= 0x7ffffffffffff; - t[4] &= 0x7ffffffffffff; - - store_limb (output, t[0] | (t[1] << 51)); - store_limb (output + 8, (t[1] >> 13) | (t[2] << 38)); - store_limb (output + 16, (t[2] >> 26) | (t[3] << 25)); - store_limb (output + 24, (t[3] >> 39) | (t[4] << 12)); -} - -/* Input: Q, Q', Q-Q' - * Output: 2Q, Q+Q' - * - * x2 z3: long form - * x3 z3: long form - * x z: short form, destroyed - * xprime zprime: short form, destroyed - * qmqp: short form, preserved - */ -static void fmonty (limb *x2, limb *z2, /* output 2Q */ -limb *x3, limb *z3, /* output Q + Q' */ -limb *x, limb *z, /* input Q */ -limb *xprime, limb *zprime, /* input Q' */ -const limb *qmqp /* input Q - Q' */) -{ - limb origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], zzprime[5], - zzzprime[5]; - - memcpy (origx, x, 5 * sizeof(limb)); - fsum (x, z); - fdifference_backwards (z, origx); // does x - z - - memcpy (origxprime, xprime, sizeof(limb) * 5); - fsum (xprime, zprime); - fdifference_backwards (zprime, origxprime); - fmul (xxprime, xprime, z); - fmul (zzprime, x, zprime); - memcpy (origxprime, xxprime, sizeof(limb) * 5); - fsum (xxprime, zzprime); - fdifference_backwards (zzprime, origxprime); - fsquare_times (x3, xxprime, 1); - fsquare_times (zzzprime, zzprime, 1); - fmul (z3, zzzprime, qmqp); - - fsquare_times (xx, x, 1); - fsquare_times (zz, z, 1); - fmul (x2, xx, zz); - fdifference_backwards (zz, xx); // does zz = xx - zz - fscalar_product (zzz, zz, 121665); - fsum (zzz, xx); - fmul (z2, zz, zzz); -} - -// ----------------------------------------------------------------------------- -// Maybe swap the contents of two limb arrays (@a and @b), each @len elements -// long. Perform the swap iff @swap is non-zero. -// -// This function performs the swap without leaking any side-channel -// information. -// ----------------------------------------------------------------------------- -static void swap_conditional (limb a[5], limb b[5], limb iswap) -{ - unsigned i; - const limb swap = -iswap; - - for (i = 0; i < 5; ++i) { - const limb x = swap & (a[i] ^ b[i]); - a[i] ^= x; - b[i] ^= x; - } -} - -/* Calculates nQ where Q is the x-coordinate of a point on the curve - * - * resultx/resultz: the x coordinate of the resulting curve point (short form) - * n: a little endian, 32-byte number - * q: a point of the curve (short form) - */ -static void cmult (limb *resultx, limb *resultz, const u8 *n, const limb *q) -{ - limb a[5] = { 0 }, b[5] = { 1 }, c[5] = { 1 }, d[5] = { 0 }; - limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; - limb e[5] = { 0 }, f[5] = { 1 }, g[5] = { 0 }, h[5] = { 1 }; - limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; - - unsigned i, j; - - memcpy (nqpqx, q, sizeof(limb) * 5); - - for (i = 0; i < 32; ++i) { - u8 byte = n[31 - i]; - for (j = 0; j < 8; ++j) { - const limb bit = byte >> 7; - - swap_conditional (nqx, nqpqx, bit); - swap_conditional (nqz, nqpqz, bit); - fmonty (nqx2, nqz2, nqpqx2, nqpqz2, nqx, nqz, nqpqx, nqpqz, q); - swap_conditional (nqx2, nqpqx2, bit); - swap_conditional (nqz2, nqpqz2, bit); - - t = nqx; - nqx = nqx2; - nqx2 = t; - t = nqz; - nqz = nqz2; - nqz2 = t; - t = nqpqx; - nqpqx = nqpqx2; - nqpqx2 = t; - t = nqpqz; - nqpqz = nqpqz2; - nqpqz2 = t; - - byte <<= 1; - } - } - - memcpy (resultx, nqx, sizeof(limb) * 5); - memcpy (resultz, nqz, sizeof(limb) * 5); -} - -// ----------------------------------------------------------------------------- -// Shamelessly copied from djb's code, tightened a little -// ----------------------------------------------------------------------------- -static void crecip (felem out, const felem z) -{ - felem a, t0, b, c; - - /* 2 */fsquare_times (a, z, 1); // a = 2 - /* 8 */fsquare_times (t0, a, 2); - /* 9 */fmul (b, t0, z); // b = 9 - /* 11 */fmul (a, b, a); // a = 11 - /* 22 */fsquare_times (t0, a, 1); - /* 2^5 - 2^0 = 31 */fmul (b, t0, b); - /* 2^10 - 2^5 */fsquare_times (t0, b, 5); - /* 2^10 - 2^0 */fmul (b, t0, b); - /* 2^20 - 2^10 */fsquare_times (t0, b, 10); - /* 2^20 - 2^0 */fmul (c, t0, b); - /* 2^40 - 2^20 */fsquare_times (t0, c, 20); - /* 2^40 - 2^0 */fmul (t0, t0, c); - /* 2^50 - 2^10 */fsquare_times (t0, t0, 10); - /* 2^50 - 2^0 */fmul (b, t0, b); - /* 2^100 - 2^50 */fsquare_times (t0, b, 50); - /* 2^100 - 2^0 */fmul (c, t0, b); - /* 2^200 - 2^100 */fsquare_times (t0, c, 100); - /* 2^200 - 2^0 */fmul (t0, t0, c); - /* 2^250 - 2^50 */fsquare_times (t0, t0, 50); - /* 2^250 - 2^0 */fmul (t0, t0, b); - /* 2^255 - 2^5 */fsquare_times (t0, t0, 5); - /* 2^255 - 21 */fmul (out, t0, a); -} - -int scalarmult_donna (u8 *mypublic, const u8 *secret, const u8 *basepoint) -{ - limb bp[5], x[5], z[5], zmone[5]; - unsigned char e[32]; - - memcpy (e, secret, 32); - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; - - fexpand (bp, basepoint); - cmult (x, z, e, bp); - crecip (zmone, z); - fmul (z, x, zmone); - fcontract (mypublic, z); - - return 0; -} - -int -scalarmult_base_donna (u8 *mypublic, const u8 *secret) -{ - return scalarmult_donna (mypublic, secret, - curve25519_basepoint); -} diff --git a/src/libcryptobox/curve25519/curve25519-donna.c b/src/libcryptobox/curve25519/curve25519-donna.c deleted file mode 100644 index e54bae7b6..000000000 --- a/src/libcryptobox/curve25519/curve25519-donna.c +++ /dev/null @@ -1,919 +0,0 @@ -/* Copyright 2008, Google Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * curve25519-donna: Curve25519 elliptic curve, public key function - * - * http://code.google.com/p/curve25519-donna/ - * - * Adam Langley <agl@imperialviolet.org> - * - * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to> - * - * More information about curve25519 can be found here - * http://cr.yp.to/ecdh.html - * - * djb's sample implementation of curve25519 is written in a special assembly - * language called qhasm and uses the floating point registers. - * - * This is, almost, a clean room reimplementation from the curve25519 paper. It - * uses many of the tricks described therein. Only the crecip function is taken - * from the sample implementation. */ - -#include <string.h> -#include <stdint.h> -#include "curve25519.h" - -#ifdef _MSC_VER -#define inline __inline -#endif - -typedef uint8_t u8; -typedef int32_t s32; -typedef int64_t limb; - -/* Field element representation: - * - * Field elements are written as an array of signed, 64-bit limbs, least - * significant first. The value of the field element is: - * x[0] + 2^26·x[1] + x^51·x[2] + 2^102·x[3] + ... - * - * i.e. the limbs are 26, 25, 26, 25, ... bits wide. */ - -/* Sum two numbers: output += in */ -static void fsum (limb *output, const limb *in) -{ - unsigned i; - for (i = 0; i < 10; i += 2) { - output[0 + i] = output[0 + i] + in[0 + i]; - output[1 + i] = output[1 + i] + in[1 + i]; - } -} - -/* Find the difference of two numbers: output = in - output - * (note the order of the arguments!). */ -static void fdifference (limb *output, const limb *in) -{ - unsigned i; - for (i = 0; i < 10; ++i) { - output[i] = in[i] - output[i]; - } -} - -/* Multiply a number by a scalar: output = in * scalar */ -static void fscalar_product (limb *output, const limb *in, const limb scalar) -{ - unsigned i; - for (i = 0; i < 10; ++i) { - output[i] = in[i] * scalar; - } -} - -/* Multiply two numbers: output = in2 * in - * - * output must be distinct to both inputs. The inputs are reduced coefficient - * form, the output is not. - * - * output[x] <= 14 * the largest product of the input limbs. */ -static void fproduct (limb *output, const limb *in2, const limb *in) -{ - output[0] = ((limb) ((s32) in2[0])) * ((s32) in[0]); - output[1] = ((limb) ((s32) in2[0])) * ((s32) in[1]) - + ((limb) ((s32) in2[1])) * ((s32) in[0]); - output[2] = 2 * ((limb) ((s32) in2[1])) * ((s32) in[1]) - + ((limb) ((s32) in2[0])) * ((s32) in[2]) - + ((limb) ((s32) in2[2])) * ((s32) in[0]); - output[3] = ((limb) ((s32) in2[1])) * ((s32) in[2]) - + ((limb) ((s32) in2[2])) * ((s32) in[1]) - + ((limb) ((s32) in2[0])) * ((s32) in[3]) - + ((limb) ((s32) in2[3])) * ((s32) in[0]); - output[4] = ((limb) ((s32) in2[2])) * ((s32) in[2]) - + 2 - * (((limb) ((s32) in2[1])) * ((s32) in[3]) - + ((limb) ((s32) in2[3])) * ((s32) in[1])) - + ((limb) ((s32) in2[0])) * ((s32) in[4]) - + ((limb) ((s32) in2[4])) * ((s32) in[0]); - output[5] = ((limb) ((s32) in2[2])) * ((s32) in[3]) - + ((limb) ((s32) in2[3])) * ((s32) in[2]) - + ((limb) ((s32) in2[1])) * ((s32) in[4]) - + ((limb) ((s32) in2[4])) * ((s32) in[1]) - + ((limb) ((s32) in2[0])) * ((s32) in[5]) - + ((limb) ((s32) in2[5])) * ((s32) in[0]); - output[6] = 2 - * (((limb) ((s32) in2[3])) * ((s32) in[3]) - + ((limb) ((s32) in2[1])) * ((s32) in[5]) - + ((limb) ((s32) in2[5])) * ((s32) in[1])) - + ((limb) ((s32) in2[2])) * ((s32) in[4]) - + ((limb) ((s32) in2[4])) * ((s32) in[2]) - + ((limb) ((s32) in2[0])) * ((s32) in[6]) - + ((limb) ((s32) in2[6])) * ((s32) in[0]); - output[7] = ((limb) ((s32) in2[3])) * ((s32) in[4]) - + ((limb) ((s32) in2[4])) * ((s32) in[3]) - + ((limb) ((s32) in2[2])) * ((s32) in[5]) - + ((limb) ((s32) in2[5])) * ((s32) in[2]) - + ((limb) ((s32) in2[1])) * ((s32) in[6]) - + ((limb) ((s32) in2[6])) * ((s32) in[1]) - + ((limb) ((s32) in2[0])) * ((s32) in[7]) - + ((limb) ((s32) in2[7])) * ((s32) in[0]); - output[8] = ((limb) ((s32) in2[4])) * ((s32) in[4]) - + 2 - * (((limb) ((s32) in2[3])) * ((s32) in[5]) - + ((limb) ((s32) in2[5])) * ((s32) in[3]) - + ((limb) ((s32) in2[1])) * ((s32) in[7]) - + ((limb) ((s32) in2[7])) * ((s32) in[1])) - + ((limb) ((s32) in2[2])) * ((s32) in[6]) - + ((limb) ((s32) in2[6])) * ((s32) in[2]) - + ((limb) ((s32) in2[0])) * ((s32) in[8]) - + ((limb) ((s32) in2[8])) * ((s32) in[0]); - output[9] = ((limb) ((s32) in2[4])) * ((s32) in[5]) - + ((limb) ((s32) in2[5])) * ((s32) in[4]) - + ((limb) ((s32) in2[3])) * ((s32) in[6]) - + ((limb) ((s32) in2[6])) * ((s32) in[3]) - + ((limb) ((s32) in2[2])) * ((s32) in[7]) - + ((limb) ((s32) in2[7])) * ((s32) in[2]) - + ((limb) ((s32) in2[1])) * ((s32) in[8]) - + ((limb) ((s32) in2[8])) * ((s32) in[1]) - + ((limb) ((s32) in2[0])) * ((s32) in[9]) - + ((limb) ((s32) in2[9])) * ((s32) in[0]); - output[10] = 2 - * (((limb) ((s32) in2[5])) * ((s32) in[5]) - + ((limb) ((s32) in2[3])) * ((s32) in[7]) - + ((limb) ((s32) in2[7])) * ((s32) in[3]) - + ((limb) ((s32) in2[1])) * ((s32) in[9]) - + ((limb) ((s32) in2[9])) * ((s32) in[1])) - + ((limb) ((s32) in2[4])) * ((s32) in[6]) - + ((limb) ((s32) in2[6])) * ((s32) in[4]) - + ((limb) ((s32) in2[2])) * ((s32) in[8]) - + ((limb) ((s32) in2[8])) * ((s32) in[2]); - output[11] = ((limb) ((s32) in2[5])) * ((s32) in[6]) - + ((limb) ((s32) in2[6])) * ((s32) in[5]) - + ((limb) ((s32) in2[4])) * ((s32) in[7]) - + ((limb) ((s32) in2[7])) * ((s32) in[4]) - + ((limb) ((s32) in2[3])) * ((s32) in[8]) - + ((limb) ((s32) in2[8])) * ((s32) in[3]) - + ((limb) ((s32) in2[2])) * ((s32) in[9]) - + ((limb) ((s32) in2[9])) * ((s32) in[2]); - output[12] = ((limb) ((s32) in2[6])) * ((s32) in[6]) - + 2 - * (((limb) ((s32) in2[5])) * ((s32) in[7]) - + ((limb) ((s32) in2[7])) * ((s32) in[5]) - + ((limb) ((s32) in2[3])) * ((s32) in[9]) - + ((limb) ((s32) in2[9])) * ((s32) in[3])) - + ((limb) ((s32) in2[4])) * ((s32) in[8]) - + ((limb) ((s32) in2[8])) * ((s32) in[4]); - output[13] = ((limb) ((s32) in2[6])) * ((s32) in[7]) - + ((limb) ((s32) in2[7])) * ((s32) in[6]) - + ((limb) ((s32) in2[5])) * ((s32) in[8]) - + ((limb) ((s32) in2[8])) * ((s32) in[5]) - + ((limb) ((s32) in2[4])) * ((s32) in[9]) - + ((limb) ((s32) in2[9])) * ((s32) in[4]); - output[14] = 2 - * (((limb) ((s32) in2[7])) * ((s32) in[7]) - + ((limb) ((s32) in2[5])) * ((s32) in[9]) - + ((limb) ((s32) in2[9])) * ((s32) in[5])) - + ((limb) ((s32) in2[6])) * ((s32) in[8]) - + ((limb) ((s32) in2[8])) * ((s32) in[6]); - output[15] = ((limb) ((s32) in2[7])) * ((s32) in[8]) - + ((limb) ((s32) in2[8])) * ((s32) in[7]) - + ((limb) ((s32) in2[6])) * ((s32) in[9]) - + ((limb) ((s32) in2[9])) * ((s32) in[6]); - output[16] = ((limb) ((s32) in2[8])) * ((s32) in[8]) - + 2 - * (((limb) ((s32) in2[7])) * ((s32) in[9]) - + ((limb) ((s32) in2[9])) * ((s32) in[7])); - output[17] = ((limb) ((s32) in2[8])) * ((s32) in[9]) - + ((limb) ((s32) in2[9])) * ((s32) in[8]); - output[18] = 2 * ((limb) ((s32) in2[9])) * ((s32) in[9]); -} - -/* Reduce a long form to a short form by taking the input mod 2^255 - 19. - * - * On entry: |output[i]| < 14*2^54 - * On exit: |output[0..8]| < 280*2^54 */ -static void freduce_degree (limb *output) -{ - /* Each of these shifts and adds ends up multiplying the value by 19. - * - * For output[0..8], the absolute entry value is < 14*2^54 and we add, at - * most, 19*14*2^54 thus, on exit, |output[0..8]| < 280*2^54. */ - output[8] += output[18] << 4; - output[8] += output[18] << 1; - output[8] += output[18]; - output[7] += output[17] << 4; - output[7] += output[17] << 1; - output[7] += output[17]; - output[6] += output[16] << 4; - output[6] += output[16] << 1; - output[6] += output[16]; - output[5] += output[15] << 4; - output[5] += output[15] << 1; - output[5] += output[15]; - output[4] += output[14] << 4; - output[4] += output[14] << 1; - output[4] += output[14]; - output[3] += output[13] << 4; - output[3] += output[13] << 1; - output[3] += output[13]; - output[2] += output[12] << 4; - output[2] += output[12] << 1; - output[2] += output[12]; - output[1] += output[11] << 4; - output[1] += output[11] << 1; - output[1] += output[11]; - output[0] += output[10] << 4; - output[0] += output[10] << 1; - output[0] += output[10]; -} - -#if (-1 & 3) != 3 -#error "This code only works on a two's complement system" -#endif - -/* return v / 2^26, using only shifts and adds. - * - * On entry: v can take any value. */ -static inline limb div_by_2_26 (const limb v) -{ - /* High word of v; no shift needed. */ - const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32); - /* Set to all 1s if v was negative; else set to 0s. */ - const int32_t sign = ((int32_t) highword) >> 31; - /* Set to 0x3ffffff if v was negative; else set to 0. */ - const int32_t roundoff = ((uint32_t) sign) >> 6; - /* Should return v / (1<<26) */ - return (v + roundoff) >> 26; -} - -/* return v / (2^25), using only shifts and adds. - * - * On entry: v can take any value. */ -static inline limb div_by_2_25 (const limb v) -{ - /* High word of v; no shift needed*/ - const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32); - /* Set to all 1s if v was negative; else set to 0s. */ - const int32_t sign = ((int32_t) highword) >> 31; - /* Set to 0x1ffffff if v was negative; else set to 0. */ - const int32_t roundoff = ((uint32_t) sign) >> 7; - /* Should return v / (1<<25) */ - return (v + roundoff) >> 25; -} - -/* Reduce all coefficients of the short form input so that |x| < 2^26. - * - * On entry: |output[i]| < 280*2^54 */ -static void freduce_coefficients (limb *output) -{ - unsigned i; - - output[10] = 0; - - for (i = 0; i < 10; i += 2) { - limb over = div_by_2_26 (output[i]); - /* The entry condition (that |output[i]| < 280*2^54) means that over is, at - * most, 280*2^28 in the first iteration of this loop. This is added to the - * next limb and we can approximate the resulting bound of that limb by - * 281*2^54. */ - output[i] -= over << 26; - output[i + 1] += over; - - /* For the first iteration, |output[i+1]| < 281*2^54, thus |over| < - * 281*2^29. When this is added to the next limb, the resulting bound can - * be approximated as 281*2^54. - * - * For subsequent iterations of the loop, 281*2^54 remains a conservative - * bound and no overflow occurs. */ - over = div_by_2_25 (output[i + 1]); - output[i + 1] -= over << 25; - output[i + 2] += over; - } - /* Now |output[10]| < 281*2^29 and all other coefficients are reduced. */ - output[0] += output[10] << 4; - output[0] += output[10] << 1; - output[0] += output[10]; - - output[10] = 0; - - /* Now output[1..9] are reduced, and |output[0]| < 2^26 + 19*281*2^29 - * So |over| will be no more than 2^16. */ - { - limb over = div_by_2_26 (output[0]); - output[0] -= over << 26; - output[1] += over; - } - - /* Now output[0,2..9] are reduced, and |output[1]| < 2^25 + 2^16 < 2^26. The - * bound on |output[1]| is sufficient to meet our needs. */ -} - -/* A helpful wrapper around fproduct: output = in * in2. - * - * On entry: |in[i]| < 2^27 and |in2[i]| < 2^27. - * - * output must be distinct to both inputs. The output is reduced degree - * (indeed, one need only provide storage for 10 limbs) and |output[i]| < 2^26. */ -static void fmul (limb *output, const limb *in, const limb *in2) -{ - limb t[19]; - fproduct (t, in, in2); - /* |t[i]| < 14*2^54 */ - freduce_degree (t); - freduce_coefficients (t); - /* |t[i]| < 2^26 */ - memcpy (output, t, sizeof(limb) * 10); -} - -/* Square a number: output = in**2 - * - * output must be distinct from the input. The inputs are reduced coefficient - * form, the output is not. - * - * output[x] <= 14 * the largest product of the input limbs. */ -static void fsquare_inner (limb *output, const limb *in) -{ - output[0] = ((limb) ((s32) in[0])) * ((s32) in[0]); - output[1] = 2 * ((limb) ((s32) in[0])) * ((s32) in[1]); - output[2] = 2 - * (((limb) ((s32) in[1])) * ((s32) in[1]) - + ((limb) ((s32) in[0])) * ((s32) in[2])); - output[3] = 2 - * (((limb) ((s32) in[1])) * ((s32) in[2]) - + ((limb) ((s32) in[0])) * ((s32) in[3])); - output[4] = ((limb) ((s32) in[2])) * ((s32) in[2]) - + 4 * ((limb) ((s32) in[1])) * ((s32) in[3]) - + 2 * ((limb) ((s32) in[0])) * ((s32) in[4]); - output[5] = 2 - * (((limb) ((s32) in[2])) * ((s32) in[3]) - + ((limb) ((s32) in[1])) * ((s32) in[4]) - + ((limb) ((s32) in[0])) * ((s32) in[5])); - output[6] = 2 - * (((limb) ((s32) in[3])) * ((s32) in[3]) - + ((limb) ((s32) in[2])) * ((s32) in[4]) - + ((limb) ((s32) in[0])) * ((s32) in[6]) - + 2 * ((limb) ((s32) in[1])) * ((s32) in[5])); - output[7] = 2 - * (((limb) ((s32) in[3])) * ((s32) in[4]) - + ((limb) ((s32) in[2])) * ((s32) in[5]) - + ((limb) ((s32) in[1])) * ((s32) in[6]) - + ((limb) ((s32) in[0])) * ((s32) in[7])); - output[8] = ((limb) ((s32) in[4])) * ((s32) in[4]) - + 2 - * (((limb) ((s32) in[2])) * ((s32) in[6]) - + ((limb) ((s32) in[0])) * ((s32) in[8]) - + 2 - * (((limb) ((s32) in[1])) * ((s32) in[7]) - + ((limb) ((s32) in[3])) - * ((s32) in[5]))); - output[9] = 2 - * (((limb) ((s32) in[4])) * ((s32) in[5]) - + ((limb) ((s32) in[3])) * ((s32) in[6]) - + ((limb) ((s32) in[2])) * ((s32) in[7]) - + ((limb) ((s32) in[1])) * ((s32) in[8]) - + ((limb) ((s32) in[0])) * ((s32) in[9])); - output[10] = 2 - * (((limb) ((s32) in[5])) * ((s32) in[5]) - + ((limb) ((s32) in[4])) * ((s32) in[6]) - + ((limb) ((s32) in[2])) * ((s32) in[8]) - + 2 - * (((limb) ((s32) in[3])) * ((s32) in[7]) - + ((limb) ((s32) in[1])) * ((s32) in[9]))); - output[11] = 2 - * (((limb) ((s32) in[5])) * ((s32) in[6]) - + ((limb) ((s32) in[4])) * ((s32) in[7]) - + ((limb) ((s32) in[3])) * ((s32) in[8]) - + ((limb) ((s32) in[2])) * ((s32) in[9])); - output[12] = ((limb) ((s32) in[6])) * ((s32) in[6]) - + 2 - * (((limb) ((s32) in[4])) * ((s32) in[8]) - + 2 - * (((limb) ((s32) in[5])) * ((s32) in[7]) - + ((limb) ((s32) in[3])) - * ((s32) in[9]))); - output[13] = 2 - * (((limb) ((s32) in[6])) * ((s32) in[7]) - + ((limb) ((s32) in[5])) * ((s32) in[8]) - + ((limb) ((s32) in[4])) * ((s32) in[9])); - output[14] = 2 - * (((limb) ((s32) in[7])) * ((s32) in[7]) - + ((limb) ((s32) in[6])) * ((s32) in[8]) - + 2 * ((limb) ((s32) in[5])) * ((s32) in[9])); - output[15] = 2 - * (((limb) ((s32) in[7])) * ((s32) in[8]) - + ((limb) ((s32) in[6])) * ((s32) in[9])); - output[16] = ((limb) ((s32) in[8])) * ((s32) in[8]) - + 4 * ((limb) ((s32) in[7])) * ((s32) in[9]); - output[17] = 2 * ((limb) ((s32) in[8])) * ((s32) in[9]); - output[18] = 2 * ((limb) ((s32) in[9])) * ((s32) in[9]); -} - -/* fsquare sets output = in^2. - * - * On entry: The |in| argument is in reduced coefficients form and |in[i]| < - * 2^27. - * - * On exit: The |output| argument is in reduced coefficients form (indeed, one - * need only provide storage for 10 limbs) and |out[i]| < 2^26. */ -static void fsquare (limb *output, const limb *in) -{ - limb t[19]; - fsquare_inner (t, in); - /* |t[i]| < 14*2^54 because the largest product of two limbs will be < - * 2^(27+27) and fsquare_inner adds together, at most, 14 of those - * products. */ - freduce_degree (t); - freduce_coefficients (t); - /* |t[i]| < 2^26 */ - memcpy (output, t, sizeof(limb) * 10); -} - -/* Take a little-endian, 32-byte number and expand it into polynomial form */ -static void fexpand (limb *output, const u8 *input) -{ -#define F(n,start,shift,mask) \ - output[n] = ((((limb) input[start + 0]) | \ - ((limb) input[start + 1]) << 8 | \ - ((limb) input[start + 2]) << 16 | \ - ((limb) input[start + 3]) << 24) >> shift) & mask; - F(0, 0, 0, 0x3ffffff); - F(1, 3, 2, 0x1ffffff); - F(2, 6, 3, 0x3ffffff); - F(3, 9, 5, 0x1ffffff); - F(4, 12, 6, 0x3ffffff); - F(5, 16, 0, 0x1ffffff); - F(6, 19, 1, 0x3ffffff); - F(7, 22, 3, 0x1ffffff); - F(8, 25, 4, 0x3ffffff); - F(9, 28, 6, 0x1ffffff); -#undef F -} - -#if (-32 >> 1) != -16 -#error "This code only works when >> does sign-extension on negative numbers" -#endif - -/* s32_eq returns 0xffffffff iff a == b and zero otherwise. */ -static s32 s32_eq (s32 a, s32 b) -{ - a = ~(a ^ b); - a &= a << 16; - a &= a << 8; - a &= a << 4; - a &= a << 2; - a &= a << 1; - return a >> 31; -} - -/* s32_gte returns 0xffffffff if a >= b and zero otherwise, where a and b are - * both non-negative. */ -static s32 s32_gte (s32 a, s32 b) -{ - a -= b; - /* a >= 0 iff a >= b. */ - return ~(a >> 31); -} - -/* Take a fully reduced polynomial form number and contract it into a - * little-endian, 32-byte array. - * - * On entry: |input_limbs[i]| < 2^26 */ -static void fcontract (u8 *output, limb *input_limbs) -{ - int i; - int j; - s32 input[10]; - s32 mask; - - /* |input_limbs[i]| < 2^26, so it's valid to convert to an s32. */ - for (i = 0; i < 10; i++) { - input[i] = input_limbs[i]; - } - - for (j = 0; j < 2; ++j) { - for (i = 0; i < 9; ++i) { - if ((i & 1) == 1) { - /* This calculation is a time-invariant way to make input[i] - * non-negative by borrowing from the next-larger limb. */ - const s32 mask = input[i] >> 31; - const s32 carry = -((input[i] & mask) >> 25); - input[i] = input[i] + (carry << 25); - input[i + 1] = input[i + 1] - carry; - } - else { - const s32 mask = input[i] >> 31; - const s32 carry = -((input[i] & mask) >> 26); - input[i] = input[i] + (carry << 26); - input[i + 1] = input[i + 1] - carry; - } - } - - /* There's no greater limb for input[9] to borrow from, but we can multiply - * by 19 and borrow from input[0], which is valid mod 2^255-19. */ - { - const s32 mask = input[9] >> 31; - const s32 carry = -((input[9] & mask) >> 25); - input[9] = input[9] + (carry << 25); - input[0] = input[0] - (carry * 19); - } - - /* After the first iteration, input[1..9] are non-negative and fit within - * 25 or 26 bits, depending on position. However, input[0] may be - * negative. */ - } - - /* The first borrow-propagation pass above ended with every limb - except (possibly) input[0] non-negative. - - If input[0] was negative after the first pass, then it was because of a - carry from input[9]. On entry, input[9] < 2^26 so the carry was, at most, - one, since (2**26-1) >> 25 = 1. Thus input[0] >= -19. - - In the second pass, each limb is decreased by at most one. Thus the second - borrow-propagation pass could only have wrapped around to decrease - input[0] again if the first pass left input[0] negative *and* input[1] - through input[9] were all zero. In that case, input[1] is now 2^25 - 1, - and this last borrow-propagation step will leave input[1] non-negative. */ - { - const s32 mask = input[0] >> 31; - const s32 carry = -((input[0] & mask) >> 26); - input[0] = input[0] + (carry << 26); - input[1] = input[1] - carry; - } - - /* All input[i] are now non-negative. However, there might be values between - * 2^25 and 2^26 in a limb which is, nominally, 25 bits wide. */ - for (j = 0; j < 2; j++) { - for (i = 0; i < 9; i++) { - if ((i & 1) == 1) { - const s32 carry = input[i] >> 25; - input[i] &= 0x1ffffff; - input[i + 1] += carry; - } - else { - const s32 carry = input[i] >> 26; - input[i] &= 0x3ffffff; - input[i + 1] += carry; - } - } - - { - const s32 carry = input[9] >> 25; - input[9] &= 0x1ffffff; - input[0] += 19 * carry; - } - } - - /* If the first carry-chain pass, just above, ended up with a carry from - * input[9], and that caused input[0] to be out-of-bounds, then input[0] was - * < 2^26 + 2*19, because the carry was, at most, two. - * - * If the second pass carried from input[9] again then input[0] is < 2*19 and - * the input[9] -> input[0] carry didn't push input[0] out of bounds. */ - - /* It still remains the case that input might be between 2^255-19 and 2^255. - * In this case, input[1..9] must take their maximum value and input[0] must - * be >= (2^255-19) & 0x3ffffff, which is 0x3ffffed. */ - mask = s32_gte (input[0], 0x3ffffed); - for (i = 1; i < 10; i++) { - if ((i & 1) == 1) { - mask &= s32_eq (input[i], 0x1ffffff); - } - else { - mask &= s32_eq (input[i], 0x3ffffff); - } - } - - /* mask is either 0xffffffff (if input >= 2^255-19) and zero otherwise. Thus - * this conditionally subtracts 2^255-19. */ - input[0] -= mask & 0x3ffffed; - - for (i = 1; i < 10; i++) { - if ((i & 1) == 1) { - input[i] -= mask & 0x1ffffff; - } - else { - input[i] -= mask & 0x3ffffff; - } - } - - input[1] <<= 2; - input[2] <<= 3; - input[3] <<= 5; - input[4] <<= 6; - input[6] <<= 1; - input[7] <<= 3; - input[8] <<= 4; - input[9] <<= 6; -#define F(i, s) \ - output[s+0] |= input[i] & 0xff; \ - output[s+1] = (input[i] >> 8) & 0xff; \ - output[s+2] = (input[i] >> 16) & 0xff; \ - output[s+3] = (input[i] >> 24) & 0xff; - output[0] = 0; - output[16] = 0; - F(0, 0); - F(1, 3); - F(2, 6); - F(3, 9); - F(4, 12); - F(5, 16); - F(6, 19); - F(7, 22); - F(8, 25); - F(9, 28); -#undef F -} - -/* Input: Q, Q', Q-Q' - * Output: 2Q, Q+Q' - * - * x2 z3: long form - * x3 z3: long form - * x z: short form, destroyed - * xprime zprime: short form, destroyed - * qmqp: short form, preserved - * - * On entry and exit, the absolute value of the limbs of all inputs and outputs - * are < 2^26. */ -static void fmonty (limb *x2, limb *z2, /* output 2Q */ -limb *x3, limb *z3, /* output Q + Q' */ -limb *x, limb *z, /* input Q */ -limb *xprime, limb *zprime, /* input Q' */ -const limb *qmqp /* input Q - Q' */) -{ - limb origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19], - zzprime[19], zzzprime[19], xxxprime[19]; - - memcpy (origx, x, 10 * sizeof(limb)); - fsum (x, z); - /* |x[i]| < 2^27 */ - fdifference (z, origx); /* does x - z */ - /* |z[i]| < 2^27 */ - - memcpy (origxprime, xprime, sizeof(limb) * 10); - fsum (xprime, zprime); - /* |xprime[i]| < 2^27 */ - fdifference (zprime, origxprime); - /* |zprime[i]| < 2^27 */ - fproduct (xxprime, xprime, z); - /* |xxprime[i]| < 14*2^54: the largest product of two limbs will be < - * 2^(27+27) and fproduct adds together, at most, 14 of those products. - * (Approximating that to 2^58 doesn't work out.) */ - fproduct (zzprime, x, zprime); - /* |zzprime[i]| < 14*2^54 */ - freduce_degree (xxprime); - freduce_coefficients (xxprime); - /* |xxprime[i]| < 2^26 */ - freduce_degree (zzprime); - freduce_coefficients (zzprime); - /* |zzprime[i]| < 2^26 */ - memcpy (origxprime, xxprime, sizeof(limb) * 10); - fsum (xxprime, zzprime); - /* |xxprime[i]| < 2^27 */ - fdifference (zzprime, origxprime); - /* |zzprime[i]| < 2^27 */ - fsquare (xxxprime, xxprime); - /* |xxxprime[i]| < 2^26 */ - fsquare (zzzprime, zzprime); - /* |zzzprime[i]| < 2^26 */ - fproduct (zzprime, zzzprime, qmqp); - /* |zzprime[i]| < 14*2^52 */ - freduce_degree (zzprime); - freduce_coefficients (zzprime); - /* |zzprime[i]| < 2^26 */ - memcpy (x3, xxxprime, sizeof(limb) * 10); - memcpy (z3, zzprime, sizeof(limb) * 10); - - fsquare (xx, x); - /* |xx[i]| < 2^26 */ - fsquare (zz, z); - /* |zz[i]| < 2^26 */ - fproduct (x2, xx, zz); - /* |x2[i]| < 14*2^52 */ - freduce_degree (x2); - freduce_coefficients (x2); - /* |x2[i]| < 2^26 */ - fdifference (zz, xx); // does zz = xx - zz - /* |zz[i]| < 2^27 */ - memset (zzz + 10, 0, sizeof(limb) * 9); - fscalar_product (zzz, zz, 121665); - /* |zzz[i]| < 2^(27+17) */ - /* No need to call freduce_degree here: - fscalar_product doesn't increase the degree of its input. */ - freduce_coefficients (zzz); - /* |zzz[i]| < 2^26 */ - fsum (zzz, xx); - /* |zzz[i]| < 2^27 */ - fproduct (z2, zz, zzz); - /* |z2[i]| < 14*2^(26+27) */ - freduce_degree (z2); - freduce_coefficients (z2); - /* |z2|i| < 2^26 */ -} - -/* Conditionally swap two reduced-form limb arrays if 'iswap' is 1, but leave - * them unchanged if 'iswap' is 0. Runs in data-invariant time to avoid - * side-channel attacks. - * - * NOTE that this function requires that 'iswap' be 1 or 0; other values give - * wrong results. Also, the two limb arrays must be in reduced-coefficient, - * reduced-degree form: the values in a[10..19] or b[10..19] aren't swapped, - * and all all values in a[0..9],b[0..9] must have magnitude less than - * INT32_MAX. */ -static void swap_conditional (limb a[19], limb b[19], limb iswap) -{ - unsigned i; - const s32 swap = (s32) -iswap; - - for (i = 0; i < 10; ++i) { - const s32 x = swap & (((s32) a[i]) ^ ((s32) b[i])); - a[i] = ((s32) a[i]) ^ x; - b[i] = ((s32) b[i]) ^ x; - } -} - -/* Calculates nQ where Q is the x-coordinate of a point on the curve - * - * resultx/resultz: the x coordinate of the resulting curve point (short form) - * n: a little endian, 32-byte number - * q: a point of the curve (short form) */ -static void cmult (limb *resultx, limb *resultz, const u8 *n, const limb *q) -{ - limb a[19] = { 0 }, b[19] = { 1 }, c[19] = { 1 }, d[19] = { 0 }; - limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; - limb e[19] = { 0 }, f[19] = { 1 }, g[19] = { 0 }, h[19] = { 1 }; - limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; - - unsigned i, j; - - memcpy (nqpqx, q, sizeof(limb) * 10); - - for (i = 0; i < 32; ++i) { - u8 byte = n[31 - i]; - for (j = 0; j < 8; ++j) { - const limb bit = byte >> 7; - - swap_conditional (nqx, nqpqx, bit); - swap_conditional (nqz, nqpqz, bit); - fmonty (nqx2, nqz2, nqpqx2, nqpqz2, nqx, nqz, nqpqx, nqpqz, q); - swap_conditional (nqx2, nqpqx2, bit); - swap_conditional (nqz2, nqpqz2, bit); - - t = nqx; - nqx = nqx2; - nqx2 = t; - t = nqz; - nqz = nqz2; - nqz2 = t; - t = nqpqx; - nqpqx = nqpqx2; - nqpqx2 = t; - t = nqpqz; - nqpqz = nqpqz2; - nqpqz2 = t; - - byte <<= 1; - } - } - - memcpy (resultx, nqx, sizeof(limb) * 10); - memcpy (resultz, nqz, sizeof(limb) * 10); -} - -// ----------------------------------------------------------------------------- -// Shamelessly copied from djb's code -// ----------------------------------------------------------------------------- -static void crecip (limb *out, const limb *z) -{ - limb z2[10]; - limb z9[10]; - limb z11[10]; - limb z2_5_0[10]; - limb z2_10_0[10]; - limb z2_20_0[10]; - limb z2_50_0[10]; - limb z2_100_0[10]; - limb t0[10]; - limb t1[10]; - int i; - - /* 2 */fsquare (z2, z); - /* 4 */fsquare (t1, z2); - /* 8 */fsquare (t0, t1); - /* 9 */fmul (z9, t0, z); - /* 11 */fmul (z11, z9, z2); - /* 22 */fsquare (t0, z11); - /* 2^5 - 2^0 = 31 */fmul (z2_5_0, t0, z9); - - /* 2^6 - 2^1 */fsquare (t0, z2_5_0); - /* 2^7 - 2^2 */fsquare (t1, t0); - /* 2^8 - 2^3 */fsquare (t0, t1); - /* 2^9 - 2^4 */fsquare (t1, t0); - /* 2^10 - 2^5 */fsquare (t0, t1); - /* 2^10 - 2^0 */fmul (z2_10_0, t0, z2_5_0); - - /* 2^11 - 2^1 */fsquare (t0, z2_10_0); - /* 2^12 - 2^2 */fsquare (t1, t0); - /* 2^20 - 2^10 */for (i = 2; i < 10; i += 2) { - fsquare (t0, t1); - fsquare (t1, t0); - } - /* 2^20 - 2^0 */fmul (z2_20_0, t1, z2_10_0); - - /* 2^21 - 2^1 */fsquare (t0, z2_20_0); - /* 2^22 - 2^2 */fsquare (t1, t0); - /* 2^40 - 2^20 */for (i = 2; i < 20; i += 2) { - fsquare (t0, t1); - fsquare (t1, t0); - } - /* 2^40 - 2^0 */fmul (t0, t1, z2_20_0); - - /* 2^41 - 2^1 */fsquare (t1, t0); - /* 2^42 - 2^2 */fsquare (t0, t1); - /* 2^50 - 2^10 */for (i = 2; i < 10; i += 2) { - fsquare (t1, t0); - fsquare (t0, t1); - } - /* 2^50 - 2^0 */fmul (z2_50_0, t0, z2_10_0); - - /* 2^51 - 2^1 */fsquare (t0, z2_50_0); - /* 2^52 - 2^2 */fsquare (t1, t0); - /* 2^100 - 2^50 */for (i = 2; i < 50; i += 2) { - fsquare (t0, t1); - fsquare (t1, t0); - } - /* 2^100 - 2^0 */fmul (z2_100_0, t1, z2_50_0); - - /* 2^101 - 2^1 */fsquare (t1, z2_100_0); - /* 2^102 - 2^2 */fsquare (t0, t1); - /* 2^200 - 2^100 */for (i = 2; i < 100; i += 2) { - fsquare (t1, t0); - fsquare (t0, t1); - } - /* 2^200 - 2^0 */fmul (t1, t0, z2_100_0); - - /* 2^201 - 2^1 */fsquare (t0, t1); - /* 2^202 - 2^2 */fsquare (t1, t0); - /* 2^250 - 2^50 */for (i = 2; i < 50; i += 2) { - fsquare (t0, t1); - fsquare (t1, t0); - } - /* 2^250 - 2^0 */fmul (t0, t1, z2_50_0); - - /* 2^251 - 2^1 */fsquare (t1, t0); - /* 2^252 - 2^2 */fsquare (t0, t1); - /* 2^253 - 2^3 */fsquare (t1, t0); - /* 2^254 - 2^4 */fsquare (t0, t1); - /* 2^255 - 2^5 */fsquare (t1, t0); - /* 2^255 - 21 */fmul (out, t1, z11); -} - -int scalarmult_donna (u8 *mypublic, const u8 *secret, const u8 *basepoint) -{ - limb bp[10], x[10], z[11], zmone[10]; - unsigned char e[32]; - - memcpy (e, secret, 32); - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; - - fexpand (bp, basepoint); - cmult (x, z, e, bp); - crecip (zmone, z); - fmul (z, x, zmone); - fcontract (mypublic, z); - - return 0; -} - -int -scalarmult_base_donna (u8 *mypublic, const u8 *secret) -{ - return scalarmult_donna (mypublic, secret, - curve25519_basepoint); -} diff --git a/src/libcryptobox/curve25519/curve25519.c b/src/libcryptobox/curve25519/curve25519.c deleted file mode 100644 index 2d090ed1d..000000000 --- a/src/libcryptobox/curve25519/curve25519.c +++ /dev/null @@ -1,148 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "config.h" -#include "cryptobox.h" -#include "curve25519.h" -#include "platform_config.h" - -extern unsigned long cpu_config; - -typedef struct curve25519_impl_s { - unsigned long cpu_flags; - const char *desc; - - void (*scalarmult) (guint8 *mypublic, - const guint8 *secret, - const guint8 *basepoint); - void (*scalarmult_base) (guint8 *mypublic, - const guint8 *secret); -} curve25519_impl_t; - -#define CURVE25519_DECLARE(ext) \ - void scalarmult_##ext(guint8 *mypublic, const guint8 *secret, const guint8 *basepoint); \ - void scalarmult_base_##ext(guint8 *mypublic, const guint8 *secret) - -#define CURVE25519_IMPL(cpuflags, desc, ext) \ - {(cpuflags), desc, scalarmult_##ext, scalarmult_base_##ext} - -#if defined(__LP64__) -#if defined(HAVE_AVX) -CURVE25519_DECLARE(avx); -#define CURVE25519_AVX CURVE25519_IMPL(CPUID_AVX, "avx", avx) -#endif - -#endif - -CURVE25519_DECLARE(ref); -#define CURVE25519_REF CURVE25519_IMPL(0, "ref", ref) - -#if defined(CMAKE_ARCH_x86_64) || defined(CMAKE_ARCH_i386) -CURVE25519_DECLARE(donna); -#define CURVE25519_GENERIC CURVE25519_IMPL(0, "donna", donna) -#else -#define CURVE25519_GENERIC CURVE25519_REF -#endif - - -static const curve25519_impl_t curve25519_list[] = { - CURVE25519_GENERIC, -#if defined(CURVE25519_AVX) - CURVE25519_AVX, -#endif -}; - -const guchar secA[] = {0x5A, 0xC9, 0x9F, 0x33, 0x63, 0x2E, 0x5A, 0x76, 0x8D, - 0xE7, 0xE8, 0x1B, 0xF8, 0x54, 0xC2, 0x7C, 0x46, 0xE3, - 0xFB, 0xF2, 0xAB, 0xBA, 0xCD, 0x29, 0xEC, 0x4A, 0xFF, - 0x51, 0x73, 0x69, 0xC6, 0x60}; -const guchar secB[] = {0x47, 0xDC, 0x3D, 0x21, 0x41, 0x74, 0x82, 0x0E, 0x11, - 0x54, 0xB4, 0x9B, 0xC6, 0xCD, 0xB2, 0xAB, 0xD4, 0x5E, - 0xE9, 0x58, 0x17, 0x05, 0x5D, 0x25, 0x5A, 0xA3, 0x58, - 0x31, 0xB7, 0x0D, 0x32, 0x60}; - -static const curve25519_impl_t *curve25519_opt = &curve25519_list[0]; - -static gboolean -curve25519_test_impl (const curve25519_impl_t *impl) -{ - guchar sec_local[32], sec_ref[32], - pubA[32], pubB[32]; - - curve25519_impl_t ref_impl = CURVE25519_REF; - - ref_impl.scalarmult (pubA, secA, curve25519_basepoint); - ref_impl.scalarmult (pubB, secB, curve25519_basepoint); - - impl->scalarmult (sec_local, secA, pubB); - ref_impl.scalarmult (sec_ref, secA, pubB); - - if (memcmp (sec_local, sec_ref, sizeof (sec_ref)) != 0) { - return FALSE; - } - - impl->scalarmult (sec_local, secB, pubA); - ref_impl.scalarmult (sec_ref, secB, pubA); - - if (memcmp (sec_local, sec_ref, sizeof (sec_ref)) != 0) { - return FALSE; - } - - impl->scalarmult (sec_local, secB, pubA); - impl->scalarmult (sec_ref, secA, pubB); - - if (memcmp (sec_local, sec_ref, sizeof (sec_ref)) != 0) { - return FALSE; - } - - return TRUE; -} - -const char* -curve25519_load (void) -{ - guint i; - - if (cpu_config != 0) { - for (i = 0; i < G_N_ELEMENTS(curve25519_list); i++) { - if (curve25519_list[i].cpu_flags & cpu_config) { - curve25519_opt = &curve25519_list[i]; - break; - } - } - } - - g_assert (curve25519_test_impl (curve25519_opt)); - - return curve25519_opt->desc; -} - -int -curve25519 (guchar *mypublic, - const guchar *secret, - const guchar *basepoint) -{ - curve25519_opt->scalarmult (mypublic, secret, basepoint); - - return 0; -} - -int -curve25519_base (guchar *mypublic, const guchar *secret) -{ - curve25519_opt->scalarmult_base (mypublic, secret); - - return 0; -} diff --git a/src/libcryptobox/curve25519/curve25519.h b/src/libcryptobox/curve25519/curve25519.h deleted file mode 100644 index c75c355f8..000000000 --- a/src/libcryptobox/curve25519/curve25519.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef CURVE25519_H -#define CURVE25519_H - -#include "config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static const guchar curve25519_basepoint[32] = {9}; - -int curve25519 (guchar *mypublic, const guchar *secret, const guchar *basepoint); - -/* Call for optimized implementation of scalarmult if needed */ -int curve25519_base (guchar *mypublic, const guchar *secret); - -const char *curve25519_load (void); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/libcryptobox/curve25519/fe.h b/src/libcryptobox/curve25519/fe.h deleted file mode 100644 index 44e8b44a6..000000000 --- a/src/libcryptobox/curve25519/fe.h +++ /dev/null @@ -1,159 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* Imported from Public Domain djb code */ - -#ifndef SRC_LIBCRYPTOBOX_CURVE25519_FE_H_ -#define SRC_LIBCRYPTOBOX_CURVE25519_FE_H_ - - -#ifdef __cplusplus -extern "C" { -#endif - -typedef int32_t fe[10]; - -void fe_frombytes (fe, const unsigned char *); - -void fe_tobytes (unsigned char *, const fe); - -void fe_copy (fe, const fe); - -int fe_isnonzero (const fe); - -int fe_isnegative (const fe); - -void fe_0 (fe); - -void fe_1 (fe); - -void fe_cmov (fe, const fe, unsigned int); - -void fe_add (fe, const fe, const fe); - -void fe_sub (fe, const fe, const fe); - -void fe_neg (fe, const fe); - -void fe_mul (fe, const fe, const fe); - -void fe_sq (fe, const fe); - -void fe_sq2 (fe, const fe); - -void fe_invert (fe, const fe); - -void fe_pow22523 (fe, const fe); - -/* -ge means group element. -Here the group is the set of pairs (x,y) of field elements (see fe.h) -satisfying -x^2 + y^2 = 1 + d x^2y^2 -where d = -121665/121666. -Representations: - ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z - ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT - ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T - ge_precomp (Duif): (y+x,y-x,2dxy) -*/ - -typedef struct { - fe X; - fe Y; - fe Z; -} ge_p2; - -typedef struct { - fe X; - fe Y; - fe Z; - fe T; -} ge_p3; - -typedef struct { - fe X; - fe Y; - fe Z; - fe T; -} ge_p1p1; - -typedef struct { - fe yplusx; - fe yminusx; - fe xy2d; -} ge_precomp; - -typedef struct { - fe YplusX; - fe YminusX; - fe Z; - fe T2d; -} ge_cached; - - -void ge_tobytes (unsigned char *, const ge_p2 *); - -void ge_p3_tobytes (unsigned char *, const ge_p3 *); - -int ge_frombytes_negate_vartime (ge_p3 *, const unsigned char *); - -void ge_p2_0 (ge_p2 *); - -void ge_p3_0 (ge_p3 *); - -void ge_precomp_0 (ge_precomp *); - -void ge_p3_to_p2 (ge_p2 *, const ge_p3 *); - -void ge_p3_to_cached (ge_cached *, const ge_p3 *); - -void ge_p1p1_to_p2 (ge_p2 *, const ge_p1p1 *); - -void ge_p1p1_to_p3 (ge_p3 *, const ge_p1p1 *); - -void ge_p2_dbl (ge_p1p1 *, const ge_p2 *); - -void ge_p3_dbl (ge_p1p1 *, const ge_p3 *); - -void ge_madd (ge_p1p1 *, const ge_p3 *, const ge_precomp *); - -void ge_msub (ge_p1p1 *, const ge_p3 *, const ge_precomp *); - -void ge_add (ge_p1p1 *, const ge_p3 *, const ge_cached *); - -void ge_sub (ge_p1p1 *, const ge_p3 *, const ge_cached *); - -void ge_scalarmult_base (ge_p3 *, const unsigned char *); - -void ge_double_scalarmult_vartime (ge_p2 *, const unsigned char *, const ge_p3 *, const unsigned char *); - -void ge_scalarmult_vartime (ge_p3 *, const unsigned char *, const ge_p3 *); - -int verify_32 (const unsigned char *x, const unsigned char *y); - -/* -The set of scalars is \Z/l -where l = 2^252 + 27742317777372353535851937790883648493. -*/ - -void sc_reduce (unsigned char *); - -void sc_muladd (unsigned char *, const unsigned char *, const unsigned char *, const unsigned char *); - -#ifdef __cplusplus -} -#endif - -#endif /* SRC_LIBCRYPTOBOX_CURVE25519_FE_H_ */ diff --git a/src/libcryptobox/curve25519/ref.c b/src/libcryptobox/curve25519/ref.c deleted file mode 100644 index eb89b2cdc..000000000 --- a/src/libcryptobox/curve25519/ref.c +++ /dev/null @@ -1,2916 +0,0 @@ -/* - version 20081011 - Matthew Dempsky - Public domain. - Derived from public domain code by D. J. Bernstein. - 20140216 tweak: Mask top bit of point input. - */ - -#include <stddef.h> -#include <stdint.h> -#include <string.h> -#include "config.h" -#include "curve25519.h" -#include "fe.h" - - -static uint64_t load_3(const unsigned char *in) -{ - uint64_t result; - result = (uint64_t) in[0]; - result |= ((uint64_t) in[1]) << 8; - result |= ((uint64_t) in[2]) << 16; - return result; -} - -static uint64_t load_4(const unsigned char *in) -{ - uint64_t result; - result = (uint64_t) in[0]; - result |= ((uint64_t) in[1]) << 8; - result |= ((uint64_t) in[2]) << 16; - result |= ((uint64_t) in[3]) << 24; - return result; -} - -/* - h = 0 - */ - -void fe_0(fe h) -{ - memset(&h[0], 0, 10 * sizeof h[0]); -} - -/* - h = 1 - */ - -void fe_1(fe h) -{ - h[0] = 1; - h[1] = 0; - memset(&h[2], 0, 8 * sizeof h[0]); -} - -/* - h = f + g - Can overlap h with f or g. - - Preconditions: - |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. - |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. - - Postconditions: - |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. - */ - -void fe_add(fe h, const fe f, const fe g) -{ - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - int32_t g0 = g[0]; - int32_t g1 = g[1]; - int32_t g2 = g[2]; - int32_t g3 = g[3]; - int32_t g4 = g[4]; - int32_t g5 = g[5]; - int32_t g6 = g[6]; - int32_t g7 = g[7]; - int32_t g8 = g[8]; - int32_t g9 = g[9]; - int32_t h0 = f0 + g0; - int32_t h1 = f1 + g1; - int32_t h2 = f2 + g2; - int32_t h3 = f3 + g3; - int32_t h4 = f4 + g4; - int32_t h5 = f5 + g5; - int32_t h6 = f6 + g6; - int32_t h7 = f7 + g7; - int32_t h8 = f8 + g8; - int32_t h9 = f9 + g9; - h[0] = h0; - h[1] = h1; - h[2] = h2; - h[3] = h3; - h[4] = h4; - h[5] = h5; - h[6] = h6; - h[7] = h7; - h[8] = h8; - h[9] = h9; -} - -/* - Replace (f,g) with (g,g) if b == 1; - replace (f,g) with (f,g) if b == 0. - - Preconditions: b in {0,1}. - */ - -void fe_cmov(fe f, const fe g, unsigned int b) -{ - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - int32_t g0 = g[0]; - int32_t g1 = g[1]; - int32_t g2 = g[2]; - int32_t g3 = g[3]; - int32_t g4 = g[4]; - int32_t g5 = g[5]; - int32_t g6 = g[6]; - int32_t g7 = g[7]; - int32_t g8 = g[8]; - int32_t g9 = g[9]; - int32_t x0 = f0 ^ g0; - int32_t x1 = f1 ^ g1; - int32_t x2 = f2 ^ g2; - int32_t x3 = f3 ^ g3; - int32_t x4 = f4 ^ g4; - int32_t x5 = f5 ^ g5; - int32_t x6 = f6 ^ g6; - int32_t x7 = f7 ^ g7; - int32_t x8 = f8 ^ g8; - int32_t x9 = f9 ^ g9; - b = (unsigned int) (-(int) b); - x0 &= b; - x1 &= b; - x2 &= b; - x3 &= b; - x4 &= b; - x5 &= b; - x6 &= b; - x7 &= b; - x8 &= b; - x9 &= b; - f[0] = f0 ^ x0; - f[1] = f1 ^ x1; - f[2] = f2 ^ x2; - f[3] = f3 ^ x3; - f[4] = f4 ^ x4; - f[5] = f5 ^ x5; - f[6] = f6 ^ x6; - f[7] = f7 ^ x7; - f[8] = f8 ^ x8; - f[9] = f9 ^ x9; -} - -/* - h = f - */ - -void fe_copy(fe h, const fe f) -{ - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - h[0] = f0; - h[1] = f1; - h[2] = f2; - h[3] = f3; - h[4] = f4; - h[5] = f5; - h[6] = f6; - h[7] = f7; - h[8] = f8; - h[9] = f9; -} - -/* - Ignores top bit of h. - */ - -void fe_frombytes(fe h, const unsigned char *s) -{ - int64_t h0 = load_4 (s); - int64_t h1 = load_3 (s + 4) << 6; - int64_t h2 = load_3 (s + 7) << 5; - int64_t h3 = load_3 (s + 10) << 3; - int64_t h4 = load_3 (s + 13) << 2; - int64_t h5 = load_4 (s + 16); - int64_t h6 = load_3 (s + 20) << 7; - int64_t h7 = load_3 (s + 23) << 5; - int64_t h8 = load_3 (s + 26) << 4; - int64_t h9 = (load_3 (s + 29) & 8388607) << 2; - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - - carry9 = (h9 + (int64_t) (1L << 24)) >> 25; - h0 += carry9 * 19; - h9 -= carry9 * ((uint64_t) 1L << 25); - carry1 = (h1 + (int64_t) (1L << 24)) >> 25; - h2 += carry1; - h1 -= carry1 * ((uint64_t) 1L << 25); - carry3 = (h3 + (int64_t) (1L << 24)) >> 25; - h4 += carry3; - h3 -= carry3 * ((uint64_t) 1L << 25); - carry5 = (h5 + (int64_t) (1L << 24)) >> 25; - h6 += carry5; - h5 -= carry5 * ((uint64_t) 1L << 25); - carry7 = (h7 + (int64_t) (1L << 24)) >> 25; - h8 += carry7; - h7 -= carry7 * ((uint64_t) 1L << 25); - - carry0 = (h0 + (int64_t) (1L << 25)) >> 26; - h1 += carry0; - h0 -= carry0 * ((uint64_t) 1L << 26); - carry2 = (h2 + (int64_t) (1L << 25)) >> 26; - h3 += carry2; - h2 -= carry2 * ((uint64_t) 1L << 26); - carry4 = (h4 + (int64_t) (1L << 25)) >> 26; - h5 += carry4; - h4 -= carry4 * ((uint64_t) 1L << 26); - carry6 = (h6 + (int64_t) (1L << 25)) >> 26; - h7 += carry6; - h6 -= carry6 * ((uint64_t) 1L << 26); - carry8 = (h8 + (int64_t) (1L << 25)) >> 26; - h9 += carry8; - h8 -= carry8 * ((uint64_t) 1L << 26); - - h[0] = (int32_t) h0; - h[1] = (int32_t) h1; - h[2] = (int32_t) h2; - h[3] = (int32_t) h3; - h[4] = (int32_t) h4; - h[5] = (int32_t) h5; - h[6] = (int32_t) h6; - h[7] = (int32_t) h7; - h[8] = (int32_t) h8; - h[9] = (int32_t) h9; -} - -/* - Preconditions: - |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. - - Write p=2^255-19; q=floor(h/p). - Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). - - Proof: - Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. - Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4. - - Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). - Then 0<y<1. - - Write r=h-pq. - Have 0<=r<=p-1=2^255-20. - Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. - - Write x=r+19(2^-255)r+y. - Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. - - Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) - so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. - */ - -void fe_tobytes(unsigned char *s, const fe h) -{ - int32_t h0 = h[0]; - int32_t h1 = h[1]; - int32_t h2 = h[2]; - int32_t h3 = h[3]; - int32_t h4 = h[4]; - int32_t h5 = h[5]; - int32_t h6 = h[6]; - int32_t h7 = h[7]; - int32_t h8 = h[8]; - int32_t h9 = h[9]; - int32_t q; - int32_t carry0; - int32_t carry1; - int32_t carry2; - int32_t carry3; - int32_t carry4; - int32_t carry5; - int32_t carry6; - int32_t carry7; - int32_t carry8; - int32_t carry9; - - q = (19 * h9 + ((uint32_t) 1L << 24)) >> 25; - q = (h0 + q) >> 26; - q = (h1 + q) >> 25; - q = (h2 + q) >> 26; - q = (h3 + q) >> 25; - q = (h4 + q) >> 26; - q = (h5 + q) >> 25; - q = (h6 + q) >> 26; - q = (h7 + q) >> 25; - q = (h8 + q) >> 26; - q = (h9 + q) >> 25; - - /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ - h0 += 19 * q; - /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ - - carry0 = h0 >> 26; - h1 += carry0; - h0 -= carry0 * ((uint32_t) 1L << 26); - carry1 = h1 >> 25; - h2 += carry1; - h1 -= carry1 * ((uint32_t) 1L << 25); - carry2 = h2 >> 26; - h3 += carry2; - h2 -= carry2 * ((uint32_t) 1L << 26); - carry3 = h3 >> 25; - h4 += carry3; - h3 -= carry3 * ((uint32_t) 1L << 25); - carry4 = h4 >> 26; - h5 += carry4; - h4 -= carry4 * ((uint32_t) 1L << 26); - carry5 = h5 >> 25; - h6 += carry5; - h5 -= carry5 * ((uint32_t) 1L << 25); - carry6 = h6 >> 26; - h7 += carry6; - h6 -= carry6 * ((uint32_t) 1L << 26); - carry7 = h7 >> 25; - h8 += carry7; - h7 -= carry7 * ((uint32_t) 1L << 25); - carry8 = h8 >> 26; - h9 += carry8; - h8 -= carry8 * ((uint32_t) 1L << 26); - carry9 = h9 >> 25; - h9 -= carry9 * ((uint32_t) 1L << 25); - /* h10 = carry9 */ - - /* - Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. - Have h0+...+2^230 h9 between 0 and 2^255-1; - evidently 2^255 h10-2^255 q = 0. - Goal: Output h0+...+2^230 h9. - */ - - s[0] = h0 >> 0; - s[1] = h0 >> 8; - s[2] = h0 >> 16; - s[3] = (h0 >> 24) | (h1 * ((uint32_t) 1 << 2)); - s[4] = h1 >> 6; - s[5] = h1 >> 14; - s[6] = (h1 >> 22) | (h2 * ((uint32_t) 1 << 3)); - s[7] = h2 >> 5; - s[8] = h2 >> 13; - s[9] = (h2 >> 21) | (h3 * ((uint32_t) 1 << 5)); - s[10] = h3 >> 3; - s[11] = h3 >> 11; - s[12] = (h3 >> 19) | (h4 * ((uint32_t) 1 << 6)); - s[13] = h4 >> 2; - s[14] = h4 >> 10; - s[15] = h4 >> 18; - s[16] = h5 >> 0; - s[17] = h5 >> 8; - s[18] = h5 >> 16; - s[19] = (h5 >> 24) | (h6 * ((uint32_t) 1 << 1)); - s[20] = h6 >> 7; - s[21] = h6 >> 15; - s[22] = (h6 >> 23) | (h7 * ((uint32_t) 1 << 3)); - s[23] = h7 >> 5; - s[24] = h7 >> 13; - s[25] = (h7 >> 21) | (h8 * ((uint32_t) 1 << 4)); - s[26] = h8 >> 4; - s[27] = h8 >> 12; - s[28] = (h8 >> 20) | (h9 * ((uint32_t) 1 << 6)); - s[29] = h9 >> 2; - s[30] = h9 >> 10; - s[31] = h9 >> 18; -} - -/* - return 1 if f is in {1,3,5,...,q-2} - return 0 if f is in {0,2,4,...,q-1} - - Preconditions: - |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. - */ - -int fe_isnegative(const fe f) -{ - unsigned char s[32]; - fe_tobytes (s, f); - return s[0] & 1; -} - -/* - return 1 if f == 0 - return 0 if f != 0 - - Preconditions: - |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. - */ - -static unsigned char zero[32]; - -int verify_32(const unsigned char *x, const unsigned char *y) -{ - volatile uint_fast16_t d = 0U; - int i; - - for (i = 0; i < 32; i++) { - d |= x[i] ^ y[i]; - } - return (1 & ((d - 1) >> 8)) - 1; -} - -int fe_isnonzero(const fe f) -{ - unsigned char s[32]; - fe_tobytes (s, f); - return verify_32 (s, zero); -} - -/* - h = f * g - Can overlap h with f or g. - - Preconditions: - |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. - |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. - - Postconditions: - |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. - */ - -/* - Notes on implementation strategy: - - Using schoolbook multiplication. - Karatsuba would save a little in some cost models. - - Most multiplications by 2 and 19 are 32-bit precomputations; - cheaper than 64-bit postcomputations. - - There is one remaining multiplication by 19 in the carry chain; - one *19 precomputation can be merged into this, - but the resulting data flow is considerably less clean. - - There are 12 carries below. - 10 of them are 2-way parallelizable and vectorizable. - Can get away with 11 carries, but then data flow is much deeper. - - With tighter constraints on inputs can squeeze carries into int32. - */ - -void fe_mul(fe h, const fe f, const fe g) -{ - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - int32_t g0 = g[0]; - int32_t g1 = g[1]; - int32_t g2 = g[2]; - int32_t g3 = g[3]; - int32_t g4 = g[4]; - int32_t g5 = g[5]; - int32_t g6 = g[6]; - int32_t g7 = g[7]; - int32_t g8 = g[8]; - int32_t g9 = g[9]; - int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */ - int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ - int32_t g3_19 = 19 * g3; - int32_t g4_19 = 19 * g4; - int32_t g5_19 = 19 * g5; - int32_t g6_19 = 19 * g6; - int32_t g7_19 = 19 * g7; - int32_t g8_19 = 19 * g8; - int32_t g9_19 = 19 * g9; - int32_t f1_2 = 2 * f1; - int32_t f3_2 = 2 * f3; - int32_t f5_2 = 2 * f5; - int32_t f7_2 = 2 * f7; - int32_t f9_2 = 2 * f9; - int64_t f0g0 = f0 * (int64_t) g0; - int64_t f0g1 = f0 * (int64_t) g1; - int64_t f0g2 = f0 * (int64_t) g2; - int64_t f0g3 = f0 * (int64_t) g3; - int64_t f0g4 = f0 * (int64_t) g4; - int64_t f0g5 = f0 * (int64_t) g5; - int64_t f0g6 = f0 * (int64_t) g6; - int64_t f0g7 = f0 * (int64_t) g7; - int64_t f0g8 = f0 * (int64_t) g8; - int64_t f0g9 = f0 * (int64_t) g9; - int64_t f1g0 = f1 * (int64_t) g0; - int64_t f1g1_2 = f1_2 * (int64_t) g1; - int64_t f1g2 = f1 * (int64_t) g2; - int64_t f1g3_2 = f1_2 * (int64_t) g3; - int64_t f1g4 = f1 * (int64_t) g4; - int64_t f1g5_2 = f1_2 * (int64_t) g5; - int64_t f1g6 = f1 * (int64_t) g6; - int64_t f1g7_2 = f1_2 * (int64_t) g7; - int64_t f1g8 = f1 * (int64_t) g8; - int64_t f1g9_38 = f1_2 * (int64_t) g9_19; - int64_t f2g0 = f2 * (int64_t) g0; - int64_t f2g1 = f2 * (int64_t) g1; - int64_t f2g2 = f2 * (int64_t) g2; - int64_t f2g3 = f2 * (int64_t) g3; - int64_t f2g4 = f2 * (int64_t) g4; - int64_t f2g5 = f2 * (int64_t) g5; - int64_t f2g6 = f2 * (int64_t) g6; - int64_t f2g7 = f2 * (int64_t) g7; - int64_t f2g8_19 = f2 * (int64_t) g8_19; - int64_t f2g9_19 = f2 * (int64_t) g9_19; - int64_t f3g0 = f3 * (int64_t) g0; - int64_t f3g1_2 = f3_2 * (int64_t) g1; - int64_t f3g2 = f3 * (int64_t) g2; - int64_t f3g3_2 = f3_2 * (int64_t) g3; - int64_t f3g4 = f3 * (int64_t) g4; - int64_t f3g5_2 = f3_2 * (int64_t) g5; - int64_t f3g6 = f3 * (int64_t) g6; - int64_t f3g7_38 = f3_2 * (int64_t) g7_19; - int64_t f3g8_19 = f3 * (int64_t) g8_19; - int64_t f3g9_38 = f3_2 * (int64_t) g9_19; - int64_t f4g0 = f4 * (int64_t) g0; - int64_t f4g1 = f4 * (int64_t) g1; - int64_t f4g2 = f4 * (int64_t) g2; - int64_t f4g3 = f4 * (int64_t) g3; - int64_t f4g4 = f4 * (int64_t) g4; - int64_t f4g5 = f4 * (int64_t) g5; - int64_t f4g6_19 = f4 * (int64_t) g6_19; - int64_t f4g7_19 = f4 * (int64_t) g7_19; - int64_t f4g8_19 = f4 * (int64_t) g8_19; - int64_t f4g9_19 = f4 * (int64_t) g9_19; - int64_t f5g0 = f5 * (int64_t) g0; - int64_t f5g1_2 = f5_2 * (int64_t) g1; - int64_t f5g2 = f5 * (int64_t) g2; - int64_t f5g3_2 = f5_2 * (int64_t) g3; - int64_t f5g4 = f5 * (int64_t) g4; - int64_t f5g5_38 = f5_2 * (int64_t) g5_19; - int64_t f5g6_19 = f5 * (int64_t) g6_19; - int64_t f5g7_38 = f5_2 * (int64_t) g7_19; - int64_t f5g8_19 = f5 * (int64_t) g8_19; - int64_t f5g9_38 = f5_2 * (int64_t) g9_19; - int64_t f6g0 = f6 * (int64_t) g0; - int64_t f6g1 = f6 * (int64_t) g1; - int64_t f6g2 = f6 * (int64_t) g2; - int64_t f6g3 = f6 * (int64_t) g3; - int64_t f6g4_19 = f6 * (int64_t) g4_19; - int64_t f6g5_19 = f6 * (int64_t) g5_19; - int64_t f6g6_19 = f6 * (int64_t) g6_19; - int64_t f6g7_19 = f6 * (int64_t) g7_19; - int64_t f6g8_19 = f6 * (int64_t) g8_19; - int64_t f6g9_19 = f6 * (int64_t) g9_19; - int64_t f7g0 = f7 * (int64_t) g0; - int64_t f7g1_2 = f7_2 * (int64_t) g1; - int64_t f7g2 = f7 * (int64_t) g2; - int64_t f7g3_38 = f7_2 * (int64_t) g3_19; - int64_t f7g4_19 = f7 * (int64_t) g4_19; - int64_t f7g5_38 = f7_2 * (int64_t) g5_19; - int64_t f7g6_19 = f7 * (int64_t) g6_19; - int64_t f7g7_38 = f7_2 * (int64_t) g7_19; - int64_t f7g8_19 = f7 * (int64_t) g8_19; - int64_t f7g9_38 = f7_2 * (int64_t) g9_19; - int64_t f8g0 = f8 * (int64_t) g0; - int64_t f8g1 = f8 * (int64_t) g1; - int64_t f8g2_19 = f8 * (int64_t) g2_19; - int64_t f8g3_19 = f8 * (int64_t) g3_19; - int64_t f8g4_19 = f8 * (int64_t) g4_19; - int64_t f8g5_19 = f8 * (int64_t) g5_19; - int64_t f8g6_19 = f8 * (int64_t) g6_19; - int64_t f8g7_19 = f8 * (int64_t) g7_19; - int64_t f8g8_19 = f8 * (int64_t) g8_19; - int64_t f8g9_19 = f8 * (int64_t) g9_19; - int64_t f9g0 = f9 * (int64_t) g0; - int64_t f9g1_38 = f9_2 * (int64_t) g1_19; - int64_t f9g2_19 = f9 * (int64_t) g2_19; - int64_t f9g3_38 = f9_2 * (int64_t) g3_19; - int64_t f9g4_19 = f9 * (int64_t) g4_19; - int64_t f9g5_38 = f9_2 * (int64_t) g5_19; - int64_t f9g6_19 = f9 * (int64_t) g6_19; - int64_t f9g7_38 = f9_2 * (int64_t) g7_19; - int64_t f9g8_19 = f9 * (int64_t) g8_19; - int64_t f9g9_38 = f9_2 * (int64_t) g9_19; - int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 - + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38; - int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 - + f7g4_19 + f8g3_19 + f9g2_19; - int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 - + f7g5_38 + f8g4_19 + f9g3_38; - int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 - + f7g6_19 + f8g5_19 + f9g4_19; - int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 - + f7g7_38 + f8g6_19 + f9g5_38; - int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 - + f8g7_19 + f9g6_19; - int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 - + f8g8_19 + f9g7_38; - int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 - + f9g8_19; - int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 - + f8g0 + f9g9_38; - int64_t h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 - + f9g0; - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - - /* - |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38)) - i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8 - |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19)) - i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 - */ - - carry0 = (h0 + (int64_t) (1L << 25)) >> 26; - h1 += carry0; - h0 -= carry0 * ((uint64_t) 1L << 26); - carry4 = (h4 + (int64_t) (1L << 25)) >> 26; - h5 += carry4; - h4 -= carry4 * ((uint64_t) 1L << 26); - /* |h0| <= 2^25 */ - /* |h4| <= 2^25 */ - /* |h1| <= 1.71*2^59 */ - /* |h5| <= 1.71*2^59 */ - - carry1 = (h1 + (int64_t) (1L << 24)) >> 25; - h2 += carry1; - h1 -= carry1 * ((uint64_t) 1L << 25); - carry5 = (h5 + (int64_t) (1L << 24)) >> 25; - h6 += carry5; - h5 -= carry5 * ((uint64_t) 1L << 25); - /* |h1| <= 2^24; from now on fits into int32 */ - /* |h5| <= 2^24; from now on fits into int32 */ - /* |h2| <= 1.41*2^60 */ - /* |h6| <= 1.41*2^60 */ - - carry2 = (h2 + (int64_t) (1L << 25)) >> 26; - h3 += carry2; - h2 -= carry2 * ((uint64_t) 1L << 26); - carry6 = (h6 + (int64_t) (1L << 25)) >> 26; - h7 += carry6; - h6 -= carry6 * ((uint64_t) 1L << 26); - /* |h2| <= 2^25; from now on fits into int32 unchanged */ - /* |h6| <= 2^25; from now on fits into int32 unchanged */ - /* |h3| <= 1.71*2^59 */ - /* |h7| <= 1.71*2^59 */ - - carry3 = (h3 + (int64_t) (1L << 24)) >> 25; - h4 += carry3; - h3 -= carry3 * ((uint64_t) 1L << 25); - carry7 = (h7 + (int64_t) (1L << 24)) >> 25; - h8 += carry7; - h7 -= carry7 * ((uint64_t) 1L << 25); - /* |h3| <= 2^24; from now on fits into int32 unchanged */ - /* |h7| <= 2^24; from now on fits into int32 unchanged */ - /* |h4| <= 1.72*2^34 */ - /* |h8| <= 1.41*2^60 */ - - carry4 = (h4 + (int64_t) (1L << 25)) >> 26; - h5 += carry4; - h4 -= carry4 * ((uint64_t) 1L << 26); - carry8 = (h8 + (int64_t) (1L << 25)) >> 26; - h9 += carry8; - h8 -= carry8 * ((uint64_t) 1L << 26); - /* |h4| <= 2^25; from now on fits into int32 unchanged */ - /* |h8| <= 2^25; from now on fits into int32 unchanged */ - /* |h5| <= 1.01*2^24 */ - /* |h9| <= 1.71*2^59 */ - - carry9 = (h9 + (int64_t) (1L << 24)) >> 25; - h0 += carry9 * 19; - h9 -= carry9 * ((uint64_t) 1L << 25); - /* |h9| <= 2^24; from now on fits into int32 unchanged */ - /* |h0| <= 1.1*2^39 */ - - carry0 = (h0 + (int64_t) (1L << 25)) >> 26; - h1 += carry0; - h0 -= carry0 * ((uint64_t) 1L << 26); - /* |h0| <= 2^25; from now on fits into int32 unchanged */ - /* |h1| <= 1.01*2^24 */ - - h[0] = (int32_t) h0; - h[1] = (int32_t) h1; - h[2] = (int32_t) h2; - h[3] = (int32_t) h3; - h[4] = (int32_t) h4; - h[5] = (int32_t) h5; - h[6] = (int32_t) h6; - h[7] = (int32_t) h7; - h[8] = (int32_t) h8; - h[9] = (int32_t) h9; -} - -/* - h = -f - - Preconditions: - |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. - - Postconditions: - |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. - */ - -void fe_neg(fe h, const fe f) -{ - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - int32_t h0 = -f0; - int32_t h1 = -f1; - int32_t h2 = -f2; - int32_t h3 = -f3; - int32_t h4 = -f4; - int32_t h5 = -f5; - int32_t h6 = -f6; - int32_t h7 = -f7; - int32_t h8 = -f8; - int32_t h9 = -f9; - h[0] = h0; - h[1] = h1; - h[2] = h2; - h[3] = h3; - h[4] = h4; - h[5] = h5; - h[6] = h6; - h[7] = h7; - h[8] = h8; - h[9] = h9; -} - -/* - h = f * f - Can overlap h with f. - - Preconditions: - |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. - - Postconditions: - |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. - */ - -/* - See fe_mul.c for discussion of implementation strategy. - */ - -void fe_sq(fe h, const fe f) -{ - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - int32_t f0_2 = 2 * f0; - int32_t f1_2 = 2 * f1; - int32_t f2_2 = 2 * f2; - int32_t f3_2 = 2 * f3; - int32_t f4_2 = 2 * f4; - int32_t f5_2 = 2 * f5; - int32_t f6_2 = 2 * f6; - int32_t f7_2 = 2 * f7; - int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ - int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ - int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ - int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ - int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ - int64_t f0f0 = f0 * (int64_t) f0; - int64_t f0f1_2 = f0_2 * (int64_t) f1; - int64_t f0f2_2 = f0_2 * (int64_t) f2; - int64_t f0f3_2 = f0_2 * (int64_t) f3; - int64_t f0f4_2 = f0_2 * (int64_t) f4; - int64_t f0f5_2 = f0_2 * (int64_t) f5; - int64_t f0f6_2 = f0_2 * (int64_t) f6; - int64_t f0f7_2 = f0_2 * (int64_t) f7; - int64_t f0f8_2 = f0_2 * (int64_t) f8; - int64_t f0f9_2 = f0_2 * (int64_t) f9; - int64_t f1f1_2 = f1_2 * (int64_t) f1; - int64_t f1f2_2 = f1_2 * (int64_t) f2; - int64_t f1f3_4 = f1_2 * (int64_t) f3_2; - int64_t f1f4_2 = f1_2 * (int64_t) f4; - int64_t f1f5_4 = f1_2 * (int64_t) f5_2; - int64_t f1f6_2 = f1_2 * (int64_t) f6; - int64_t f1f7_4 = f1_2 * (int64_t) f7_2; - int64_t f1f8_2 = f1_2 * (int64_t) f8; - int64_t f1f9_76 = f1_2 * (int64_t) f9_38; - int64_t f2f2 = f2 * (int64_t) f2; - int64_t f2f3_2 = f2_2 * (int64_t) f3; - int64_t f2f4_2 = f2_2 * (int64_t) f4; - int64_t f2f5_2 = f2_2 * (int64_t) f5; - int64_t f2f6_2 = f2_2 * (int64_t) f6; - int64_t f2f7_2 = f2_2 * (int64_t) f7; - int64_t f2f8_38 = f2_2 * (int64_t) f8_19; - int64_t f2f9_38 = f2 * (int64_t) f9_38; - int64_t f3f3_2 = f3_2 * (int64_t) f3; - int64_t f3f4_2 = f3_2 * (int64_t) f4; - int64_t f3f5_4 = f3_2 * (int64_t) f5_2; - int64_t f3f6_2 = f3_2 * (int64_t) f6; - int64_t f3f7_76 = f3_2 * (int64_t) f7_38; - int64_t f3f8_38 = f3_2 * (int64_t) f8_19; - int64_t f3f9_76 = f3_2 * (int64_t) f9_38; - int64_t f4f4 = f4 * (int64_t) f4; - int64_t f4f5_2 = f4_2 * (int64_t) f5; - int64_t f4f6_38 = f4_2 * (int64_t) f6_19; - int64_t f4f7_38 = f4 * (int64_t) f7_38; - int64_t f4f8_38 = f4_2 * (int64_t) f8_19; - int64_t f4f9_38 = f4 * (int64_t) f9_38; - int64_t f5f5_38 = f5 * (int64_t) f5_38; - int64_t f5f6_38 = f5_2 * (int64_t) f6_19; - int64_t f5f7_76 = f5_2 * (int64_t) f7_38; - int64_t f5f8_38 = f5_2 * (int64_t) f8_19; - int64_t f5f9_76 = f5_2 * (int64_t) f9_38; - int64_t f6f6_19 = f6 * (int64_t) f6_19; - int64_t f6f7_38 = f6 * (int64_t) f7_38; - int64_t f6f8_38 = f6_2 * (int64_t) f8_19; - int64_t f6f9_38 = f6 * (int64_t) f9_38; - int64_t f7f7_38 = f7 * (int64_t) f7_38; - int64_t f7f8_38 = f7_2 * (int64_t) f8_19; - int64_t f7f9_76 = f7_2 * (int64_t) f9_38; - int64_t f8f8_19 = f8 * (int64_t) f8_19; - int64_t f8f9_38 = f8 * (int64_t) f9_38; - int64_t f9f9_38 = f9 * (int64_t) f9_38; - int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38; - int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38; - int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19; - int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38; - int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38; - int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38; - int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19; - int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38; - int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38; - int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2; - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - - carry0 = (h0 + (int64_t) (1L << 25)) >> 26; - h1 += carry0; - h0 -= carry0 * ((uint64_t) 1L << 26); - carry4 = (h4 + (int64_t) (1L << 25)) >> 26; - h5 += carry4; - h4 -= carry4 * ((uint64_t) 1L << 26); - - carry1 = (h1 + (int64_t) (1L << 24)) >> 25; - h2 += carry1; - h1 -= carry1 * ((uint64_t) 1L << 25); - carry5 = (h5 + (int64_t) (1L << 24)) >> 25; - h6 += carry5; - h5 -= carry5 * ((uint64_t) 1L << 25); - - carry2 = (h2 + (int64_t) (1L << 25)) >> 26; - h3 += carry2; - h2 -= carry2 * ((uint64_t) 1L << 26); - carry6 = (h6 + (int64_t) (1L << 25)) >> 26; - h7 += carry6; - h6 -= carry6 * ((uint64_t) 1L << 26); - - carry3 = (h3 + (int64_t) (1L << 24)) >> 25; - h4 += carry3; - h3 -= carry3 * ((uint64_t) 1L << 25); - carry7 = (h7 + (int64_t) (1L << 24)) >> 25; - h8 += carry7; - h7 -= carry7 * ((uint64_t) 1L << 25); - - carry4 = (h4 + (int64_t) (1L << 25)) >> 26; - h5 += carry4; - h4 -= carry4 * ((uint64_t) 1L << 26); - carry8 = (h8 + (int64_t) (1L << 25)) >> 26; - h9 += carry8; - h8 -= carry8 * ((uint64_t) 1L << 26); - - carry9 = (h9 + (int64_t) (1L << 24)) >> 25; - h0 += carry9 * 19; - h9 -= carry9 * ((uint64_t) 1L << 25); - - carry0 = (h0 + (int64_t) (1L << 25)) >> 26; - h1 += carry0; - h0 -= carry0 * ((uint64_t) 1L << 26); - - h[0] = (int32_t) h0; - h[1] = (int32_t) h1; - h[2] = (int32_t) h2; - h[3] = (int32_t) h3; - h[4] = (int32_t) h4; - h[5] = (int32_t) h5; - h[6] = (int32_t) h6; - h[7] = (int32_t) h7; - h[8] = (int32_t) h8; - h[9] = (int32_t) h9; -} - -/* - h = 2 * f * f - Can overlap h with f. - - Preconditions: - |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. - - Postconditions: - |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. - */ - -/* - See fe_mul.c for discussion of implementation strategy. - */ - -void fe_sq2(fe h, const fe f) -{ - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - int32_t f0_2 = 2 * f0; - int32_t f1_2 = 2 * f1; - int32_t f2_2 = 2 * f2; - int32_t f3_2 = 2 * f3; - int32_t f4_2 = 2 * f4; - int32_t f5_2 = 2 * f5; - int32_t f6_2 = 2 * f6; - int32_t f7_2 = 2 * f7; - int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ - int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ - int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ - int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ - int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ - int64_t f0f0 = f0 * (int64_t) f0; - int64_t f0f1_2 = f0_2 * (int64_t) f1; - int64_t f0f2_2 = f0_2 * (int64_t) f2; - int64_t f0f3_2 = f0_2 * (int64_t) f3; - int64_t f0f4_2 = f0_2 * (int64_t) f4; - int64_t f0f5_2 = f0_2 * (int64_t) f5; - int64_t f0f6_2 = f0_2 * (int64_t) f6; - int64_t f0f7_2 = f0_2 * (int64_t) f7; - int64_t f0f8_2 = f0_2 * (int64_t) f8; - int64_t f0f9_2 = f0_2 * (int64_t) f9; - int64_t f1f1_2 = f1_2 * (int64_t) f1; - int64_t f1f2_2 = f1_2 * (int64_t) f2; - int64_t f1f3_4 = f1_2 * (int64_t) f3_2; - int64_t f1f4_2 = f1_2 * (int64_t) f4; - int64_t f1f5_4 = f1_2 * (int64_t) f5_2; - int64_t f1f6_2 = f1_2 * (int64_t) f6; - int64_t f1f7_4 = f1_2 * (int64_t) f7_2; - int64_t f1f8_2 = f1_2 * (int64_t) f8; - int64_t f1f9_76 = f1_2 * (int64_t) f9_38; - int64_t f2f2 = f2 * (int64_t) f2; - int64_t f2f3_2 = f2_2 * (int64_t) f3; - int64_t f2f4_2 = f2_2 * (int64_t) f4; - int64_t f2f5_2 = f2_2 * (int64_t) f5; - int64_t f2f6_2 = f2_2 * (int64_t) f6; - int64_t f2f7_2 = f2_2 * (int64_t) f7; - int64_t f2f8_38 = f2_2 * (int64_t) f8_19; - int64_t f2f9_38 = f2 * (int64_t) f9_38; - int64_t f3f3_2 = f3_2 * (int64_t) f3; - int64_t f3f4_2 = f3_2 * (int64_t) f4; - int64_t f3f5_4 = f3_2 * (int64_t) f5_2; - int64_t f3f6_2 = f3_2 * (int64_t) f6; - int64_t f3f7_76 = f3_2 * (int64_t) f7_38; - int64_t f3f8_38 = f3_2 * (int64_t) f8_19; - int64_t f3f9_76 = f3_2 * (int64_t) f9_38; - int64_t f4f4 = f4 * (int64_t) f4; - int64_t f4f5_2 = f4_2 * (int64_t) f5; - int64_t f4f6_38 = f4_2 * (int64_t) f6_19; - int64_t f4f7_38 = f4 * (int64_t) f7_38; - int64_t f4f8_38 = f4_2 * (int64_t) f8_19; - int64_t f4f9_38 = f4 * (int64_t) f9_38; - int64_t f5f5_38 = f5 * (int64_t) f5_38; - int64_t f5f6_38 = f5_2 * (int64_t) f6_19; - int64_t f5f7_76 = f5_2 * (int64_t) f7_38; - int64_t f5f8_38 = f5_2 * (int64_t) f8_19; - int64_t f5f9_76 = f5_2 * (int64_t) f9_38; - int64_t f6f6_19 = f6 * (int64_t) f6_19; - int64_t f6f7_38 = f6 * (int64_t) f7_38; - int64_t f6f8_38 = f6_2 * (int64_t) f8_19; - int64_t f6f9_38 = f6 * (int64_t) f9_38; - int64_t f7f7_38 = f7 * (int64_t) f7_38; - int64_t f7f8_38 = f7_2 * (int64_t) f8_19; - int64_t f7f9_76 = f7_2 * (int64_t) f9_38; - int64_t f8f8_19 = f8 * (int64_t) f8_19; - int64_t f8f9_38 = f8 * (int64_t) f9_38; - int64_t f9f9_38 = f9 * (int64_t) f9_38; - int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38; - int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38; - int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19; - int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38; - int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38; - int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38; - int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19; - int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38; - int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38; - int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2; - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - - h0 += h0; - h1 += h1; - h2 += h2; - h3 += h3; - h4 += h4; - h5 += h5; - h6 += h6; - h7 += h7; - h8 += h8; - h9 += h9; - - carry0 = (h0 + (int64_t) (1L << 25)) >> 26; - h1 += carry0; - h0 -= carry0 * ((uint64_t) 1L << 26); - carry4 = (h4 + (int64_t) (1L << 25)) >> 26; - h5 += carry4; - h4 -= carry4 * ((uint64_t) 1L << 26); - - carry1 = (h1 + (int64_t) (1L << 24)) >> 25; - h2 += carry1; - h1 -= carry1 * ((uint64_t) 1L << 25); - carry5 = (h5 + (int64_t) (1L << 24)) >> 25; - h6 += carry5; - h5 -= carry5 * ((uint64_t) 1L << 25); - - carry2 = (h2 + (int64_t) (1L << 25)) >> 26; - h3 += carry2; - h2 -= carry2 * ((uint64_t) 1L << 26); - carry6 = (h6 + (int64_t) (1L << 25)) >> 26; - h7 += carry6; - h6 -= carry6 * ((uint64_t) 1L << 26); - - carry3 = (h3 + (int64_t) (1L << 24)) >> 25; - h4 += carry3; - h3 -= carry3 * ((uint64_t) 1L << 25); - carry7 = (h7 + (int64_t) (1L << 24)) >> 25; - h8 += carry7; - h7 -= carry7 * ((uint64_t) 1L << 25); - - carry4 = (h4 + (int64_t) (1L << 25)) >> 26; - h5 += carry4; - h4 -= carry4 * ((uint64_t) 1L << 26); - carry8 = (h8 + (int64_t) (1L << 25)) >> 26; - h9 += carry8; - h8 -= carry8 * ((uint64_t) 1L << 26); - - carry9 = (h9 + (int64_t) (1L << 24)) >> 25; - h0 += carry9 * 19; - h9 -= carry9 * ((uint64_t) 1L << 25); - - carry0 = (h0 + (int64_t) (1L << 25)) >> 26; - h1 += carry0; - h0 -= carry0 * ((uint64_t) 1L << 26); - - h[0] = (int32_t) h0; - h[1] = (int32_t) h1; - h[2] = (int32_t) h2; - h[3] = (int32_t) h3; - h[4] = (int32_t) h4; - h[5] = (int32_t) h5; - h[6] = (int32_t) h6; - h[7] = (int32_t) h7; - h[8] = (int32_t) h8; - h[9] = (int32_t) h9; -} - -void fe_invert(fe out, const fe z) -{ - fe t0; - fe t1; - fe t2; - fe t3; - int i; - - fe_sq (t0, z); - fe_sq (t1, t0); - fe_sq (t1, t1); - fe_mul (t1, z, t1); - fe_mul (t0, t0, t1); - fe_sq (t2, t0); - fe_mul (t1, t1, t2); - fe_sq (t2, t1); - for (i = 1; i < 5; ++i) { - fe_sq (t2, t2); - } - fe_mul (t1, t2, t1); - fe_sq (t2, t1); - for (i = 1; i < 10; ++i) { - fe_sq (t2, t2); - } - fe_mul (t2, t2, t1); - fe_sq (t3, t2); - for (i = 1; i < 20; ++i) { - fe_sq (t3, t3); - } - fe_mul (t2, t3, t2); - fe_sq (t2, t2); - for (i = 1; i < 10; ++i) { - fe_sq (t2, t2); - } - fe_mul (t1, t2, t1); - fe_sq (t2, t1); - for (i = 1; i < 50; ++i) { - fe_sq (t2, t2); - } - fe_mul (t2, t2, t1); - fe_sq (t3, t2); - for (i = 1; i < 100; ++i) { - fe_sq (t3, t3); - } - fe_mul (t2, t3, t2); - fe_sq (t2, t2); - for (i = 1; i < 50; ++i) { - fe_sq (t2, t2); - } - fe_mul (t1, t2, t1); - fe_sq (t1, t1); - for (i = 1; i < 5; ++i) { - fe_sq (t1, t1); - } - fe_mul (out, t1, t0); -} - -void fe_pow22523(fe out, const fe z) -{ - fe t0; - fe t1; - fe t2; - int i; - - fe_sq (t0, z); - fe_sq (t1, t0); - fe_sq (t1, t1); - fe_mul (t1, z, t1); - fe_mul (t0, t0, t1); - fe_sq (t0, t0); - fe_mul (t0, t1, t0); - fe_sq (t1, t0); - for (i = 1; i < 5; ++i) { - fe_sq (t1, t1); - } - fe_mul (t0, t1, t0); - fe_sq (t1, t0); - for (i = 1; i < 10; ++i) { - fe_sq (t1, t1); - } - fe_mul (t1, t1, t0); - fe_sq (t2, t1); - for (i = 1; i < 20; ++i) { - fe_sq (t2, t2); - } - fe_mul (t1, t2, t1); - fe_sq (t1, t1); - for (i = 1; i < 10; ++i) { - fe_sq (t1, t1); - } - fe_mul (t0, t1, t0); - fe_sq (t1, t0); - for (i = 1; i < 50; ++i) { - fe_sq (t1, t1); - } - fe_mul (t1, t1, t0); - fe_sq (t2, t1); - for (i = 1; i < 100; ++i) { - fe_sq (t2, t2); - } - fe_mul (t1, t2, t1); - fe_sq (t1, t1); - for (i = 1; i < 50; ++i) { - fe_sq (t1, t1); - } - fe_mul (t0, t1, t0); - fe_sq (t0, t0); - fe_sq (t0, t0); - fe_mul (out, t0, z); -} - -/* - h = f - g - Can overlap h with f or g. - - Preconditions: - |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. - |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. - - Postconditions: - |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. - */ - -void fe_sub(fe h, const fe f, const fe g) -{ - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - int32_t g0 = g[0]; - int32_t g1 = g[1]; - int32_t g2 = g[2]; - int32_t g3 = g[3]; - int32_t g4 = g[4]; - int32_t g5 = g[5]; - int32_t g6 = g[6]; - int32_t g7 = g[7]; - int32_t g8 = g[8]; - int32_t g9 = g[9]; - int32_t h0 = f0 - g0; - int32_t h1 = f1 - g1; - int32_t h2 = f2 - g2; - int32_t h3 = f3 - g3; - int32_t h4 = f4 - g4; - int32_t h5 = f5 - g5; - int32_t h6 = f6 - g6; - int32_t h7 = f7 - g7; - int32_t h8 = f8 - g8; - int32_t h9 = f9 - g9; - h[0] = h0; - h[1] = h1; - h[2] = h2; - h[3] = h3; - h[4] = h4; - h[5] = h5; - h[6] = h6; - h[7] = h7; - h[8] = h8; - h[9] = h9; -} - -/* - r = p + q - */ - -void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) -{ - fe t0; - - fe_add (r->X, p->Y, p->X); - fe_sub (r->Y, p->Y, p->X); - fe_mul (r->Z, r->X, q->YplusX); - fe_mul (r->Y, r->Y, q->YminusX); - fe_mul (r->T, q->T2d, p->T); - fe_mul (r->X, p->Z, q->Z); - fe_add (t0, r->X, r->X); - fe_sub (r->X, r->Z, r->Y); - fe_add (r->Y, r->Z, r->Y); - fe_add (r->Z, t0, r->T); - fe_sub (r->T, t0, r->T); -} - -static void slide(signed char *r, const unsigned char *a) -{ - int i; - int b; - int k; - - for (i = 0; i < 256; ++i) - r[i] = 1 & (a[i >> 3] >> (i & 7)); - - for (i = 0; i < 256; ++i) - if (r[i]) { - for (b = 1; b <= 6 && i + b < 256; ++b) { - if (r[i + b]) { - if (r[i] + (r[i + b] << b) <= 15) { - r[i] += r[i + b] << b; - r[i + b] = 0; - } - else if (r[i] - (r[i + b] << b) >= -15) { - r[i] -= r[i + b] << b; - for (k = i + b; k < 256; ++k) { - if (!r[k]) { - r[k] = 1; - break; - } - r[k] = 0; - } - } - else - break; - } - } - } - -} - -#include "base_constants2.h" - -/* 37095705934669439343138083508754565189542113879843219016388785533085940283555 */ -static const fe d = -{ -10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, - -18696448, -12055116 }; - -/* sqrt(-1) */ -static const fe sqrtm1 = -{ -32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, - 326686, 11406482 }; - -int ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s) -{ - fe u; - fe v; - fe v3; - fe vxx; - fe check; - - fe_frombytes (h->Y, s); - fe_1 (h->Z); - fe_sq (u, h->Y); - fe_mul (v, u, d); - fe_sub (u, u, h->Z); /* u = y^2-1 */ - fe_add (v, v, h->Z); /* v = dy^2+1 */ - - fe_sq (v3, v); - fe_mul (v3, v3, v); /* v3 = v^3 */ - fe_sq (h->X, v3); - fe_mul (h->X, h->X, v); - fe_mul (h->X, h->X, u); /* x = uv^7 */ - - fe_pow22523 (h->X, h->X); /* x = (uv^7)^((q-5)/8) */ - fe_mul (h->X, h->X, v3); - fe_mul (h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */ - - fe_sq (vxx, h->X); - fe_mul (vxx, vxx, v); - fe_sub (check, vxx, u); /* vx^2-u */ - if (fe_isnonzero (check)) { - fe_add (check, vxx, u); /* vx^2+u */ - if (fe_isnonzero (check)) - return -1; - fe_mul (h->X, h->X, sqrtm1); - } - - if (fe_isnegative (h->X) == (s[31] >> 7)) - fe_neg (h->X, h->X); - - fe_mul (h->T, h->X, h->Y); - return 0; -} - -/* - r = p + q - */ - -void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) -{ - fe t0; - - fe_add (r->X, p->Y, p->X); - fe_sub (r->Y, p->Y, p->X); - fe_mul (r->Z, r->X, q->yplusx); - fe_mul (r->Y, r->Y, q->yminusx); - fe_mul (r->T, q->xy2d, p->T); - fe_add (t0, p->Z, p->Z); - fe_sub (r->X, r->Z, r->Y); - fe_add (r->Y, r->Z, r->Y); - fe_add (r->Z, t0, r->T); - fe_sub (r->T, t0, r->T); -} - -/* - r = p - q - */ - -void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) -{ - fe t0; - - fe_add (r->X, p->Y, p->X); - fe_sub (r->Y, p->Y, p->X); - fe_mul (r->Z, r->X, q->yminusx); - fe_mul (r->Y, r->Y, q->yplusx); - fe_mul (r->T, q->xy2d, p->T); - fe_add (t0, p->Z, p->Z); - fe_sub (r->X, r->Z, r->Y); - fe_add (r->Y, r->Z, r->Y); - fe_sub (r->Z, t0, r->T); - fe_add (r->T, t0, r->T); -} - -/* - r = p - */ - -extern void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) -{ - fe_mul (r->X, p->X, p->T); - fe_mul (r->Y, p->Y, p->Z); - fe_mul (r->Z, p->Z, p->T); -} - -/* - r = p - */ - -extern void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) -{ - fe_mul (r->X, p->X, p->T); - fe_mul (r->Y, p->Y, p->Z); - fe_mul (r->Z, p->Z, p->T); - fe_mul (r->T, p->X, p->Y); -} - -void ge_p2_0(ge_p2 *h) -{ - fe_0 (h->X); - fe_1 (h->Y); - fe_1 (h->Z); -} - -/* - r = 2 * p - */ - -void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) -{ - fe t0; - - fe_sq (r->X, p->X); - fe_sq (r->Z, p->Y); - fe_sq2 (r->T, p->Z); - fe_add (r->Y, p->X, p->Y); - fe_sq (t0, r->Y); - fe_add (r->Y, r->Z, r->X); - fe_sub (r->Z, r->Z, r->X); - fe_sub (r->X, t0, r->Y); - fe_sub (r->T, r->T, r->Z); -} - -void ge_p3_0(ge_p3 *h) -{ - fe_0 (h->X); - fe_1 (h->Y); - fe_1 (h->Z); - fe_0 (h->T); -} - -/* - r = p - */ - -/* 2 * d = 16295367250680780974490674513165176452449235426866156013048779062215315747161 */ -static const fe d2 = -{ -21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, - -6495438, 29715968, 9444199 }; - -extern void ge_p3_to_cached(ge_cached *r, const ge_p3 *p) -{ - fe_add (r->YplusX, p->Y, p->X); - fe_sub (r->YminusX, p->Y, p->X); - fe_copy (r->Z, p->Z); - fe_mul (r->T2d, p->T, d2); -} - -/* - r = p - */ - -extern void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) -{ - fe_copy (r->X, p->X); - fe_copy (r->Y, p->Y); - fe_copy (r->Z, p->Z); -} - -void ge_p3_tobytes(unsigned char *s, const ge_p3 *h) -{ - fe recip; - fe x; - fe y; - - fe_invert (recip, h->Z); - fe_mul (x, h->X, recip); - fe_mul (y, h->Y, recip); - fe_tobytes (s, y); - s[31] ^= fe_isnegative (x) << 7; -} - -/* - r = 2 * p - */ - -void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) -{ - ge_p2 q; - ge_p3_to_p2 (&q, p); - ge_p2_dbl (r, &q); -} - -void ge_precomp_0(ge_precomp *h) -{ - fe_1 (h->yplusx); - fe_1 (h->yminusx); - fe_0 (h->xy2d); -} - -static unsigned char equal(signed char b, signed char c) -{ - unsigned char ub = b; - unsigned char uc = c; - unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */ - uint32_t y = x; /* 0: yes; 1..255: no */ - y -= 1; /* 4294967295: yes; 0..254: no */ - y >>= 31; /* 1: yes; 0: no */ - return y; -} - -static unsigned char negative(signed char b) -{ - uint64_t x = b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */ - x >>= 63; /* 1: yes; 0: no */ - return x; -} - -static void cmov(ge_precomp *t, const ge_precomp *u, unsigned char b) -{ - fe_cmov (t->yplusx, u->yplusx, b); - fe_cmov (t->yminusx, u->yminusx, b); - fe_cmov (t->xy2d, u->xy2d, b); -} - -/* base[i][j] = (j+1)*256^i*B */ -#include "base_constants.h" - - -static void ge_select(ge_precomp *t, int pos, signed char b) -{ - ge_precomp minust; - unsigned char bnegative = negative (b); - unsigned char babs = b - (((-bnegative) & b) * ((signed char) 1 << 1)); - - ge_precomp_0 (t); - cmov (t, &event_loop[pos][0], equal (babs, 1)); - cmov (t, &event_loop[pos][1], equal (babs, 2)); - cmov (t, &event_loop[pos][2], equal (babs, 3)); - cmov (t, &event_loop[pos][3], equal (babs, 4)); - cmov (t, &event_loop[pos][4], equal (babs, 5)); - cmov (t, &event_loop[pos][5], equal (babs, 6)); - cmov (t, &event_loop[pos][6], equal (babs, 7)); - cmov (t, &event_loop[pos][7], equal (babs, 8)); - fe_copy (minust.yplusx, t->yminusx); - fe_copy (minust.yminusx, t->yplusx); - fe_neg (minust.xy2d, t->xy2d); - cmov (t, &minust, bnegative); -} - -/* - r = p - q - */ - -void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) -{ - fe t0; - - fe_add (r->X, p->Y, p->X); - fe_sub (r->Y, p->Y, p->X); - fe_mul (r->Z, r->X, q->YminusX); - fe_mul (r->Y, r->Y, q->YplusX); - fe_mul (r->T, q->T2d, p->T); - fe_mul (r->X, p->Z, q->Z); - fe_add (t0, r->X, r->X); - fe_sub (r->X, r->Z, r->Y); - fe_add (r->Y, r->Z, r->Y); - fe_sub (r->Z, t0, r->T); - fe_add (r->T, t0, r->T); -} - -void ge_tobytes(unsigned char *s, const ge_p2 *h) -{ - fe recip; - fe x; - fe y; - - fe_invert (recip, h->Z); - fe_mul (x, h->X, recip); - fe_mul (y, h->Y, recip); - fe_tobytes (s, y); - s[31] ^= fe_isnegative (x) << 7; -} - -/* - h = a * B - where a = a[0]+256*a[1]+...+256^31 a[31] - B is the Ed25519 base point (x,4/5) with x positive. - - Preconditions: - a[31] <= 127 - */ - -/* - r = a * A + b * B - where a = a[0]+256*a[1]+...+256^31 a[31]. - and b = b[0]+256*b[1]+...+256^31 b[31]. - B is the Ed25519 base point (x,4/5) with x positive. - */ - -void ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, - const ge_p3 *A, const unsigned char *b) -{ - signed char aslide[256]; - signed char bslide[256]; - ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */ - ge_p1p1 t; - ge_p3 u; - ge_p3 A2; - int i; - - slide (aslide, a); - slide (bslide, b); - - ge_p3_to_cached (&Ai[0], A); - ge_p3_dbl (&t, A); - ge_p1p1_to_p3 (&A2, &t); - ge_add (&t, &A2, &Ai[0]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[1], &u); - ge_add (&t, &A2, &Ai[1]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[2], &u); - ge_add (&t, &A2, &Ai[2]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[3], &u); - ge_add (&t, &A2, &Ai[3]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[4], &u); - ge_add (&t, &A2, &Ai[4]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[5], &u); - ge_add (&t, &A2, &Ai[5]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[6], &u); - ge_add (&t, &A2, &Ai[6]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[7], &u); - - ge_p2_0 (r); - - for (i = 255; i >= 0; --i) { - if (aslide[i] || bslide[i]) - break; - } - - for (; i >= 0; --i) { - ge_p2_dbl (&t, r); - - if (aslide[i] > 0) { - ge_p1p1_to_p3 (&u, &t); - ge_add (&t, &u, &Ai[aslide[i] / 2]); - } - else if (aslide[i] < 0) { - ge_p1p1_to_p3 (&u, &t); - ge_sub (&t, &u, &Ai[(-aslide[i]) / 2]); - } - - if (bslide[i] > 0) { - ge_p1p1_to_p3 (&u, &t); - ge_madd (&t, &u, &Bi[bslide[i] / 2]); - } - else if (bslide[i] < 0) { - ge_p1p1_to_p3 (&u, &t); - ge_msub (&t, &u, &Bi[(-bslide[i]) / 2]); - } - - ge_p1p1_to_p2 (r, &t); - } -} - -void ge_scalarmult_vartime(ge_p3 *r, const unsigned char *a, const ge_p3 *A) -{ - signed char aslide[256]; - ge_cached Ai[8]; - ge_p1p1 t; - ge_p3 u; - ge_p3 A2; - int i; - - slide (aslide, a); - - ge_p3_to_cached (&Ai[0], A); - ge_p3_dbl (&t, A); - ge_p1p1_to_p3 (&A2, &t); - ge_add (&t, &A2, &Ai[0]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[1], &u); - ge_add (&t, &A2, &Ai[1]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[2], &u); - ge_add (&t, &A2, &Ai[2]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[3], &u); - ge_add (&t, &A2, &Ai[3]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[4], &u); - ge_add (&t, &A2, &Ai[4]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[5], &u); - ge_add (&t, &A2, &Ai[5]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[6], &u); - ge_add (&t, &A2, &Ai[6]); - ge_p1p1_to_p3 (&u, &t); - ge_p3_to_cached (&Ai[7], &u); - - ge_p3_0 (r); - - for (i = 255; i >= 0; --i) { - if (aslide[i]) - break; - } - - for (; i >= 0; --i) { - ge_p3_dbl (&t, r); - - if (aslide[i] > 0) { - ge_p1p1_to_p3 (&u, &t); - ge_add (&t, &u, &Ai[aslide[i] / 2]); - } - else if (aslide[i] < 0) { - ge_p1p1_to_p3 (&u, &t); - ge_sub (&t, &u, &Ai[(-aslide[i]) / 2]); - } - - ge_p1p1_to_p3 (r, &t); - } -} - -void ge_scalarmult_base(ge_p3 *h, const unsigned char *a) -{ - signed char e[64]; - signed char carry; - ge_p1p1 r; - ge_p2 s; - ge_precomp t; - int i; - - for (i = 0; i < 32; ++i) { - e[2 * i + 0] = (a[i] >> 0) & 15; - e[2 * i + 1] = (a[i] >> 4) & 15; - } - /* each e[i] is between 0 and 15 */ - /* e[63] is between 0 and 7 */ - - carry = 0; - for (i = 0; i < 63; ++i) { - e[i] += carry; - carry = e[i] + 8; - carry >>= 4; - e[i] -= carry * ((signed char) 1 << 4); - } - e[63] += carry; - /* each e[i] is between -8 and 8 */ - - ge_p3_0 (h); - for (i = 1; i < 64; i += 2) { - ge_select (&t, i / 2, e[i]); - ge_madd (&r, h, &t); - ge_p1p1_to_p3 (h, &r); - } - - ge_p3_dbl (&r, h); - ge_p1p1_to_p2 (&s, &r); - ge_p2_dbl (&r, &s); - ge_p1p1_to_p2 (&s, &r); - ge_p2_dbl (&r, &s); - ge_p1p1_to_p2 (&s, &r); - ge_p2_dbl (&r, &s); - ge_p1p1_to_p3 (h, &r); - - for (i = 0; i < 64; i += 2) { - ge_select (&t, i / 2, e[i]); - ge_madd (&r, h, &t); - ge_p1p1_to_p3 (h, &r); - } -} - -/* - Input: - a[0]+256*a[1]+...+256^31*a[31] = a - b[0]+256*b[1]+...+256^31*b[31] = b - c[0]+256*c[1]+...+256^31*c[31] = c - - Output: - s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l - where l = 2^252 + 27742317777372353535851937790883648493. - */ - -void sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b, - const unsigned char *c) -{ - int64_t a0 = 2097151 & load_3 (a); - int64_t a1 = 2097151 & (load_4 (a + 2) >> 5); - int64_t a2 = 2097151 & (load_3 (a + 5) >> 2); - int64_t a3 = 2097151 & (load_4 (a + 7) >> 7); - int64_t a4 = 2097151 & (load_4 (a + 10) >> 4); - int64_t a5 = 2097151 & (load_3 (a + 13) >> 1); - int64_t a6 = 2097151 & (load_4 (a + 15) >> 6); - int64_t a7 = 2097151 & (load_3 (a + 18) >> 3); - int64_t a8 = 2097151 & load_3 (a + 21); - int64_t a9 = 2097151 & (load_4 (a + 23) >> 5); - int64_t a10 = 2097151 & (load_3 (a + 26) >> 2); - int64_t a11 = (load_4 (a + 28) >> 7); - int64_t b0 = 2097151 & load_3 (b); - int64_t b1 = 2097151 & (load_4 (b + 2) >> 5); - int64_t b2 = 2097151 & (load_3 (b + 5) >> 2); - int64_t b3 = 2097151 & (load_4 (b + 7) >> 7); - int64_t b4 = 2097151 & (load_4 (b + 10) >> 4); - int64_t b5 = 2097151 & (load_3 (b + 13) >> 1); - int64_t b6 = 2097151 & (load_4 (b + 15) >> 6); - int64_t b7 = 2097151 & (load_3 (b + 18) >> 3); - int64_t b8 = 2097151 & load_3 (b + 21); - int64_t b9 = 2097151 & (load_4 (b + 23) >> 5); - int64_t b10 = 2097151 & (load_3 (b + 26) >> 2); - int64_t b11 = (load_4 (b + 28) >> 7); - int64_t c0 = 2097151 & load_3 (c); - int64_t c1 = 2097151 & (load_4 (c + 2) >> 5); - int64_t c2 = 2097151 & (load_3 (c + 5) >> 2); - int64_t c3 = 2097151 & (load_4 (c + 7) >> 7); - int64_t c4 = 2097151 & (load_4 (c + 10) >> 4); - int64_t c5 = 2097151 & (load_3 (c + 13) >> 1); - int64_t c6 = 2097151 & (load_4 (c + 15) >> 6); - int64_t c7 = 2097151 & (load_3 (c + 18) >> 3); - int64_t c8 = 2097151 & load_3 (c + 21); - int64_t c9 = 2097151 & (load_4 (c + 23) >> 5); - int64_t c10 = 2097151 & (load_3 (c + 26) >> 2); - int64_t c11 = (load_4 (c + 28) >> 7); - int64_t s0; - int64_t s1; - int64_t s2; - int64_t s3; - int64_t s4; - int64_t s5; - int64_t s6; - int64_t s7; - int64_t s8; - int64_t s9; - int64_t s10; - int64_t s11; - int64_t s12; - int64_t s13; - int64_t s14; - int64_t s15; - int64_t s16; - int64_t s17; - int64_t s18; - int64_t s19; - int64_t s20; - int64_t s21; - int64_t s22; - int64_t s23; - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - int64_t carry10; - int64_t carry11; - int64_t carry12; - int64_t carry13; - int64_t carry14; - int64_t carry15; - int64_t carry16; - int64_t carry17; - int64_t carry18; - int64_t carry19; - int64_t carry20; - int64_t carry21; - int64_t carry22; - - s0 = c0 + a0 * b0; - s1 = c1 + a0 * b1 + a1 * b0; - s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0; - s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0; - s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0; - s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0; - s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 - + a6 * b0; - s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 - + a6 * b1 + a7 * b0; - s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 - + a6 * b2 + a7 * b1 + a8 * b0; - s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 - + a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0; - s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 - + a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0; - s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 - + a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0; - s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 - + a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1; - s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 - + a9 * b4 + a10 * b3 + a11 * b2; - s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 - + a10 * b4 + a11 * b3; - s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 - + a11 * b4; - s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 - + a11 * b5; - s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6; - s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7; - s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8; - s20 = a9 * b11 + a10 * b10 + a11 * b9; - s21 = a10 * b11 + a11 * b10; - s22 = a11 * b11; - s23 = 0; - - carry0 = (s0 + (int64_t) (1L << 20)) >> 21; - s1 += carry0; - s0 -= carry0 * ((uint64_t) 1L << 21); - carry2 = (s2 + (int64_t) (1L << 20)) >> 21; - s3 += carry2; - s2 -= carry2 * ((uint64_t) 1L << 21); - carry4 = (s4 + (int64_t) (1L << 20)) >> 21; - s5 += carry4; - s4 -= carry4 * ((uint64_t) 1L << 21); - carry6 = (s6 + (int64_t) (1L << 20)) >> 21; - s7 += carry6; - s6 -= carry6 * ((uint64_t) 1L << 21); - carry8 = (s8 + (int64_t) (1L << 20)) >> 21; - s9 += carry8; - s8 -= carry8 * ((uint64_t) 1L << 21); - carry10 = (s10 + (int64_t) (1L << 20)) >> 21; - s11 += carry10; - s10 -= carry10 * ((uint64_t) 1L << 21); - carry12 = (s12 + (int64_t) (1L << 20)) >> 21; - s13 += carry12; - s12 -= carry12 * ((uint64_t) 1L << 21); - carry14 = (s14 + (int64_t) (1L << 20)) >> 21; - s15 += carry14; - s14 -= carry14 * ((uint64_t) 1L << 21); - carry16 = (s16 + (int64_t) (1L << 20)) >> 21; - s17 += carry16; - s16 -= carry16 * ((uint64_t) 1L << 21); - carry18 = (s18 + (int64_t) (1L << 20)) >> 21; - s19 += carry18; - s18 -= carry18 * ((uint64_t) 1L << 21); - carry20 = (s20 + (int64_t) (1L << 20)) >> 21; - s21 += carry20; - s20 -= carry20 * ((uint64_t) 1L << 21); - carry22 = (s22 + (int64_t) (1L << 20)) >> 21; - s23 += carry22; - s22 -= carry22 * ((uint64_t) 1L << 21); - - carry1 = (s1 + (int64_t) (1L << 20)) >> 21; - s2 += carry1; - s1 -= carry1 * ((uint64_t) 1L << 21); - carry3 = (s3 + (int64_t) (1L << 20)) >> 21; - s4 += carry3; - s3 -= carry3 * ((uint64_t) 1L << 21); - carry5 = (s5 + (int64_t) (1L << 20)) >> 21; - s6 += carry5; - s5 -= carry5 * ((uint64_t) 1L << 21); - carry7 = (s7 + (int64_t) (1L << 20)) >> 21; - s8 += carry7; - s7 -= carry7 * ((uint64_t) 1L << 21); - carry9 = (s9 + (int64_t) (1L << 20)) >> 21; - s10 += carry9; - s9 -= carry9 * ((uint64_t) 1L << 21); - carry11 = (s11 + (int64_t) (1L << 20)) >> 21; - s12 += carry11; - s11 -= carry11 * ((uint64_t) 1L << 21); - carry13 = (s13 + (int64_t) (1L << 20)) >> 21; - s14 += carry13; - s13 -= carry13 * ((uint64_t) 1L << 21); - carry15 = (s15 + (int64_t) (1L << 20)) >> 21; - s16 += carry15; - s15 -= carry15 * ((uint64_t) 1L << 21); - carry17 = (s17 + (int64_t) (1L << 20)) >> 21; - s18 += carry17; - s17 -= carry17 * ((uint64_t) 1L << 21); - carry19 = (s19 + (int64_t) (1L << 20)) >> 21; - s20 += carry19; - s19 -= carry19 * ((uint64_t) 1L << 21); - carry21 = (s21 + (int64_t) (1L << 20)) >> 21; - s22 += carry21; - s21 -= carry21 * ((uint64_t) 1L << 21); - - s11 += s23 * 666643; - s12 += s23 * 470296; - s13 += s23 * 654183; - s14 -= s23 * 997805; - s15 += s23 * 136657; - s16 -= s23 * 683901; - - s10 += s22 * 666643; - s11 += s22 * 470296; - s12 += s22 * 654183; - s13 -= s22 * 997805; - s14 += s22 * 136657; - s15 -= s22 * 683901; - - s9 += s21 * 666643; - s10 += s21 * 470296; - s11 += s21 * 654183; - s12 -= s21 * 997805; - s13 += s21 * 136657; - s14 -= s21 * 683901; - - s8 += s20 * 666643; - s9 += s20 * 470296; - s10 += s20 * 654183; - s11 -= s20 * 997805; - s12 += s20 * 136657; - s13 -= s20 * 683901; - - s7 += s19 * 666643; - s8 += s19 * 470296; - s9 += s19 * 654183; - s10 -= s19 * 997805; - s11 += s19 * 136657; - s12 -= s19 * 683901; - - s6 += s18 * 666643; - s7 += s18 * 470296; - s8 += s18 * 654183; - s9 -= s18 * 997805; - s10 += s18 * 136657; - s11 -= s18 * 683901; - - carry6 = (s6 + (int64_t) (1L << 20)) >> 21; - s7 += carry6; - s6 -= carry6 * ((uint64_t) 1L << 21); - carry8 = (s8 + (int64_t) (1L << 20)) >> 21; - s9 += carry8; - s8 -= carry8 * ((uint64_t) 1L << 21); - carry10 = (s10 + (int64_t) (1L << 20)) >> 21; - s11 += carry10; - s10 -= carry10 * ((uint64_t) 1L << 21); - carry12 = (s12 + (int64_t) (1L << 20)) >> 21; - s13 += carry12; - s12 -= carry12 * ((uint64_t) 1L << 21); - carry14 = (s14 + (int64_t) (1L << 20)) >> 21; - s15 += carry14; - s14 -= carry14 * ((uint64_t) 1L << 21); - carry16 = (s16 + (int64_t) (1L << 20)) >> 21; - s17 += carry16; - s16 -= carry16 * ((uint64_t) 1L << 21); - - carry7 = (s7 + (int64_t) (1L << 20)) >> 21; - s8 += carry7; - s7 -= carry7 * ((uint64_t) 1L << 21); - carry9 = (s9 + (int64_t) (1L << 20)) >> 21; - s10 += carry9; - s9 -= carry9 * ((uint64_t) 1L << 21); - carry11 = (s11 + (int64_t) (1L << 20)) >> 21; - s12 += carry11; - s11 -= carry11 * ((uint64_t) 1L << 21); - carry13 = (s13 + (int64_t) (1L << 20)) >> 21; - s14 += carry13; - s13 -= carry13 * ((uint64_t) 1L << 21); - carry15 = (s15 + (int64_t) (1L << 20)) >> 21; - s16 += carry15; - s15 -= carry15 * ((uint64_t) 1L << 21); - - s5 += s17 * 666643; - s6 += s17 * 470296; - s7 += s17 * 654183; - s8 -= s17 * 997805; - s9 += s17 * 136657; - s10 -= s17 * 683901; - - s4 += s16 * 666643; - s5 += s16 * 470296; - s6 += s16 * 654183; - s7 -= s16 * 997805; - s8 += s16 * 136657; - s9 -= s16 * 683901; - - s3 += s15 * 666643; - s4 += s15 * 470296; - s5 += s15 * 654183; - s6 -= s15 * 997805; - s7 += s15 * 136657; - s8 -= s15 * 683901; - - s2 += s14 * 666643; - s3 += s14 * 470296; - s4 += s14 * 654183; - s5 -= s14 * 997805; - s6 += s14 * 136657; - s7 -= s14 * 683901; - - s1 += s13 * 666643; - s2 += s13 * 470296; - s3 += s13 * 654183; - s4 -= s13 * 997805; - s5 += s13 * 136657; - s6 -= s13 * 683901; - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = (s0 + (int64_t) (1L << 20)) >> 21; - s1 += carry0; - s0 -= carry0 * ((uint64_t) 1L << 21); - carry2 = (s2 + (int64_t) (1L << 20)) >> 21; - s3 += carry2; - s2 -= carry2 * ((uint64_t) 1L << 21); - carry4 = (s4 + (int64_t) (1L << 20)) >> 21; - s5 += carry4; - s4 -= carry4 * ((uint64_t) 1L << 21); - carry6 = (s6 + (int64_t) (1L << 20)) >> 21; - s7 += carry6; - s6 -= carry6 * ((uint64_t) 1L << 21); - carry8 = (s8 + (int64_t) (1L << 20)) >> 21; - s9 += carry8; - s8 -= carry8 * ((uint64_t) 1L << 21); - carry10 = (s10 + (int64_t) (1L << 20)) >> 21; - s11 += carry10; - s10 -= carry10 * ((uint64_t) 1L << 21); - - carry1 = (s1 + (int64_t) (1L << 20)) >> 21; - s2 += carry1; - s1 -= carry1 * ((uint64_t) 1L << 21); - carry3 = (s3 + (int64_t) (1L << 20)) >> 21; - s4 += carry3; - s3 -= carry3 * ((uint64_t) 1L << 21); - carry5 = (s5 + (int64_t) (1L << 20)) >> 21; - s6 += carry5; - s5 -= carry5 * ((uint64_t) 1L << 21); - carry7 = (s7 + (int64_t) (1L << 20)) >> 21; - s8 += carry7; - s7 -= carry7 * ((uint64_t) 1L << 21); - carry9 = (s9 + (int64_t) (1L << 20)) >> 21; - s10 += carry9; - s9 -= carry9 * ((uint64_t) 1L << 21); - carry11 = (s11 + (int64_t) (1L << 20)) >> 21; - s12 += carry11; - s11 -= carry11 * ((uint64_t) 1L << 21); - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = s0 >> 21; - s1 += carry0; - s0 -= carry0 * ((uint64_t) 1L << 21); - carry1 = s1 >> 21; - s2 += carry1; - s1 -= carry1 * ((uint64_t) 1L << 21); - carry2 = s2 >> 21; - s3 += carry2; - s2 -= carry2 * ((uint64_t) 1L << 21); - carry3 = s3 >> 21; - s4 += carry3; - s3 -= carry3 * ((uint64_t) 1L << 21); - carry4 = s4 >> 21; - s5 += carry4; - s4 -= carry4 * ((uint64_t) 1L << 21); - carry5 = s5 >> 21; - s6 += carry5; - s5 -= carry5 * ((uint64_t) 1L << 21); - carry6 = s6 >> 21; - s7 += carry6; - s6 -= carry6 * ((uint64_t) 1L << 21); - carry7 = s7 >> 21; - s8 += carry7; - s7 -= carry7 * ((uint64_t) 1L << 21); - carry8 = s8 >> 21; - s9 += carry8; - s8 -= carry8 * ((uint64_t) 1L << 21); - carry9 = s9 >> 21; - s10 += carry9; - s9 -= carry9 * ((uint64_t) 1L << 21); - carry10 = s10 >> 21; - s11 += carry10; - s10 -= carry10 * ((uint64_t) 1L << 21); - carry11 = s11 >> 21; - s12 += carry11; - s11 -= carry11 * ((uint64_t) 1L << 21); - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - - carry0 = s0 >> 21; - s1 += carry0; - s0 -= carry0 * ((uint64_t) 1L << 21); - carry1 = s1 >> 21; - s2 += carry1; - s1 -= carry1 * ((uint64_t) 1L << 21); - carry2 = s2 >> 21; - s3 += carry2; - s2 -= carry2 * ((uint64_t) 1L << 21); - carry3 = s3 >> 21; - s4 += carry3; - s3 -= carry3 * ((uint64_t) 1L << 21); - carry4 = s4 >> 21; - s5 += carry4; - s4 -= carry4 * ((uint64_t) 1L << 21); - carry5 = s5 >> 21; - s6 += carry5; - s5 -= carry5 * ((uint64_t) 1L << 21); - carry6 = s6 >> 21; - s7 += carry6; - s6 -= carry6 * ((uint64_t) 1L << 21); - carry7 = s7 >> 21; - s8 += carry7; - s7 -= carry7 * ((uint64_t) 1L << 21); - carry8 = s8 >> 21; - s9 += carry8; - s8 -= carry8 * ((uint64_t) 1L << 21); - carry9 = s9 >> 21; - s10 += carry9; - s9 -= carry9 * ((uint64_t) 1L << 21); - carry10 = s10 >> 21; - s11 += carry10; - s10 -= carry10 * ((uint64_t) 1L << 21); - - s[0] = s0 >> 0; - s[1] = s0 >> 8; - s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5)); - s[3] = s1 >> 3; - s[4] = s1 >> 11; - s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2)); - s[6] = s2 >> 6; - s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7)); - s[8] = s3 >> 1; - s[9] = s3 >> 9; - s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4)); - s[11] = s4 >> 4; - s[12] = s4 >> 12; - s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1)); - s[14] = s5 >> 7; - s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6)); - s[16] = s6 >> 2; - s[17] = s6 >> 10; - s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3)); - s[19] = s7 >> 5; - s[20] = s7 >> 13; - s[21] = s8 >> 0; - s[22] = s8 >> 8; - s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5)); - s[24] = s9 >> 3; - s[25] = s9 >> 11; - s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2)); - s[27] = s10 >> 6; - s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7)); - s[29] = s11 >> 1; - s[30] = s11 >> 9; - s[31] = s11 >> 17; -} - -/* - Input: - s[0]+256*s[1]+...+256^63*s[63] = s - - Output: - s[0]+256*s[1]+...+256^31*s[31] = s mod l - where l = 2^252 + 27742317777372353535851937790883648493. - Overwrites s in place. - */ - -void sc_reduce(unsigned char *s) -{ - int64_t s0 = 2097151 & load_3 (s); - int64_t s1 = 2097151 & (load_4 (s + 2) >> 5); - int64_t s2 = 2097151 & (load_3 (s + 5) >> 2); - int64_t s3 = 2097151 & (load_4 (s + 7) >> 7); - int64_t s4 = 2097151 & (load_4 (s + 10) >> 4); - int64_t s5 = 2097151 & (load_3 (s + 13) >> 1); - int64_t s6 = 2097151 & (load_4 (s + 15) >> 6); - int64_t s7 = 2097151 & (load_3 (s + 18) >> 3); - int64_t s8 = 2097151 & load_3 (s + 21); - int64_t s9 = 2097151 & (load_4 (s + 23) >> 5); - int64_t s10 = 2097151 & (load_3 (s + 26) >> 2); - int64_t s11 = 2097151 & (load_4 (s + 28) >> 7); - int64_t s12 = 2097151 & (load_4 (s + 31) >> 4); - int64_t s13 = 2097151 & (load_3 (s + 34) >> 1); - int64_t s14 = 2097151 & (load_4 (s + 36) >> 6); - int64_t s15 = 2097151 & (load_3 (s + 39) >> 3); - int64_t s16 = 2097151 & load_3 (s + 42); - int64_t s17 = 2097151 & (load_4 (s + 44) >> 5); - int64_t s18 = 2097151 & (load_3 (s + 47) >> 2); - int64_t s19 = 2097151 & (load_4 (s + 49) >> 7); - int64_t s20 = 2097151 & (load_4 (s + 52) >> 4); - int64_t s21 = 2097151 & (load_3 (s + 55) >> 1); - int64_t s22 = 2097151 & (load_4 (s + 57) >> 6); - int64_t s23 = (load_4 (s + 60) >> 3); - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - int64_t carry10; - int64_t carry11; - int64_t carry12; - int64_t carry13; - int64_t carry14; - int64_t carry15; - int64_t carry16; - - s11 += s23 * 666643; - s12 += s23 * 470296; - s13 += s23 * 654183; - s14 -= s23 * 997805; - s15 += s23 * 136657; - s16 -= s23 * 683901; - - s10 += s22 * 666643; - s11 += s22 * 470296; - s12 += s22 * 654183; - s13 -= s22 * 997805; - s14 += s22 * 136657; - s15 -= s22 * 683901; - - s9 += s21 * 666643; - s10 += s21 * 470296; - s11 += s21 * 654183; - s12 -= s21 * 997805; - s13 += s21 * 136657; - s14 -= s21 * 683901; - - s8 += s20 * 666643; - s9 += s20 * 470296; - s10 += s20 * 654183; - s11 -= s20 * 997805; - s12 += s20 * 136657; - s13 -= s20 * 683901; - - s7 += s19 * 666643; - s8 += s19 * 470296; - s9 += s19 * 654183; - s10 -= s19 * 997805; - s11 += s19 * 136657; - s12 -= s19 * 683901; - - s6 += s18 * 666643; - s7 += s18 * 470296; - s8 += s18 * 654183; - s9 -= s18 * 997805; - s10 += s18 * 136657; - s11 -= s18 * 683901; - - carry6 = (s6 + (int64_t) (1L << 20)) >> 21; - s7 += carry6; - s6 -= carry6 * ((uint64_t) 1L << 21); - carry8 = (s8 + (int64_t) (1L << 20)) >> 21; - s9 += carry8; - s8 -= carry8 * ((uint64_t) 1L << 21); - carry10 = (s10 + (int64_t) (1L << 20)) >> 21; - s11 += carry10; - s10 -= carry10 * ((uint64_t) 1L << 21); - carry12 = (s12 + (int64_t) (1L << 20)) >> 21; - s13 += carry12; - s12 -= carry12 * ((uint64_t) 1L << 21); - carry14 = (s14 + (int64_t) (1L << 20)) >> 21; - s15 += carry14; - s14 -= carry14 * ((uint64_t) 1L << 21); - carry16 = (s16 + (int64_t) (1L << 20)) >> 21; - s17 += carry16; - s16 -= carry16 * ((uint64_t) 1L << 21); - - carry7 = (s7 + (int64_t) (1L << 20)) >> 21; - s8 += carry7; - s7 -= carry7 * ((uint64_t) 1L << 21); - carry9 = (s9 + (int64_t) (1L << 20)) >> 21; - s10 += carry9; - s9 -= carry9 * ((uint64_t) 1L << 21); - carry11 = (s11 + (int64_t) (1L << 20)) >> 21; - s12 += carry11; - s11 -= carry11 * ((uint64_t) 1L << 21); - carry13 = (s13 + (int64_t) (1L << 20)) >> 21; - s14 += carry13; - s13 -= carry13 * ((uint64_t) 1L << 21); - carry15 = (s15 + (int64_t) (1L << 20)) >> 21; - s16 += carry15; - s15 -= carry15 * ((uint64_t) 1L << 21); - - s5 += s17 * 666643; - s6 += s17 * 470296; - s7 += s17 * 654183; - s8 -= s17 * 997805; - s9 += s17 * 136657; - s10 -= s17 * 683901; - - s4 += s16 * 666643; - s5 += s16 * 470296; - s6 += s16 * 654183; - s7 -= s16 * 997805; - s8 += s16 * 136657; - s9 -= s16 * 683901; - - s3 += s15 * 666643; - s4 += s15 * 470296; - s5 += s15 * 654183; - s6 -= s15 * 997805; - s7 += s15 * 136657; - s8 -= s15 * 683901; - - s2 += s14 * 666643; - s3 += s14 * 470296; - s4 += s14 * 654183; - s5 -= s14 * 997805; - s6 += s14 * 136657; - s7 -= s14 * 683901; - - s1 += s13 * 666643; - s2 += s13 * 470296; - s3 += s13 * 654183; - s4 -= s13 * 997805; - s5 += s13 * 136657; - s6 -= s13 * 683901; - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = (s0 + (int64_t) (1L << 20)) >> 21; - s1 += carry0; - s0 -= carry0 * ((uint64_t) 1L << 21); - carry2 = (s2 + (int64_t) (1L << 20)) >> 21; - s3 += carry2; - s2 -= carry2 * ((uint64_t) 1L << 21); - carry4 = (s4 + (int64_t) (1L << 20)) >> 21; - s5 += carry4; - s4 -= carry4 * ((uint64_t) 1L << 21); - carry6 = (s6 + (int64_t) (1L << 20)) >> 21; - s7 += carry6; - s6 -= carry6 * ((uint64_t) 1L << 21); - carry8 = (s8 + (int64_t) (1L << 20)) >> 21; - s9 += carry8; - s8 -= carry8 * ((uint64_t) 1L << 21); - carry10 = (s10 + (int64_t) (1L << 20)) >> 21; - s11 += carry10; - s10 -= carry10 * ((uint64_t) 1L << 21); - - carry1 = (s1 + (int64_t) (1L << 20)) >> 21; - s2 += carry1; - s1 -= carry1 * ((uint64_t) 1L << 21); - carry3 = (s3 + (int64_t) (1L << 20)) >> 21; - s4 += carry3; - s3 -= carry3 * ((uint64_t) 1L << 21); - carry5 = (s5 + (int64_t) (1L << 20)) >> 21; - s6 += carry5; - s5 -= carry5 * ((uint64_t) 1L << 21); - carry7 = (s7 + (int64_t) (1L << 20)) >> 21; - s8 += carry7; - s7 -= carry7 * ((uint64_t) 1L << 21); - carry9 = (s9 + (int64_t) (1L << 20)) >> 21; - s10 += carry9; - s9 -= carry9 * ((uint64_t) 1L << 21); - carry11 = (s11 + (int64_t) (1L << 20)) >> 21; - s12 += carry11; - s11 -= carry11 * ((uint64_t) 1L << 21); - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = s0 >> 21; - s1 += carry0; - s0 -= carry0 * ((uint64_t) 1L << 21); - carry1 = s1 >> 21; - s2 += carry1; - s1 -= carry1 * ((uint64_t) 1L << 21); - carry2 = s2 >> 21; - s3 += carry2; - s2 -= carry2 * ((uint64_t) 1L << 21); - carry3 = s3 >> 21; - s4 += carry3; - s3 -= carry3 * ((uint64_t) 1L << 21); - carry4 = s4 >> 21; - s5 += carry4; - s4 -= carry4 * ((uint64_t) 1L << 21); - carry5 = s5 >> 21; - s6 += carry5; - s5 -= carry5 * ((uint64_t) 1L << 21); - carry6 = s6 >> 21; - s7 += carry6; - s6 -= carry6 * ((uint64_t) 1L << 21); - carry7 = s7 >> 21; - s8 += carry7; - s7 -= carry7 * ((uint64_t) 1L << 21); - carry8 = s8 >> 21; - s9 += carry8; - s8 -= carry8 * ((uint64_t) 1L << 21); - carry9 = s9 >> 21; - s10 += carry9; - s9 -= carry9 * ((uint64_t) 1L << 21); - carry10 = s10 >> 21; - s11 += carry10; - s10 -= carry10 * ((uint64_t) 1L << 21); - carry11 = s11 >> 21; - s12 += carry11; - s11 -= carry11 * ((uint64_t) 1L << 21); - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - - carry0 = s0 >> 21; - s1 += carry0; - s0 -= carry0 * ((uint64_t) 1L << 21); - carry1 = s1 >> 21; - s2 += carry1; - s1 -= carry1 * ((uint64_t) 1L << 21); - carry2 = s2 >> 21; - s3 += carry2; - s2 -= carry2 * ((uint64_t) 1L << 21); - carry3 = s3 >> 21; - s4 += carry3; - s3 -= carry3 * ((uint64_t) 1L << 21); - carry4 = s4 >> 21; - s5 += carry4; - s4 -= carry4 * ((uint64_t) 1L << 21); - carry5 = s5 >> 21; - s6 += carry5; - s5 -= carry5 * ((uint64_t) 1L << 21); - carry6 = s6 >> 21; - s7 += carry6; - s6 -= carry6 * ((uint64_t) 1L << 21); - carry7 = s7 >> 21; - s8 += carry7; - s7 -= carry7 * ((uint64_t) 1L << 21); - carry8 = s8 >> 21; - s9 += carry8; - s8 -= carry8 * ((uint64_t) 1L << 21); - carry9 = s9 >> 21; - s10 += carry9; - s9 -= carry9 * ((uint64_t) 1L << 21); - carry10 = s10 >> 21; - s11 += carry10; - s10 -= carry10 * ((uint64_t) 1L << 21); - - s[0] = s0 >> 0; - s[1] = s0 >> 8; - s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5)); - s[3] = s1 >> 3; - s[4] = s1 >> 11; - s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2)); - s[6] = s2 >> 6; - s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7)); - s[8] = s3 >> 1; - s[9] = s3 >> 9; - s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4)); - s[11] = s4 >> 4; - s[12] = s4 >> 12; - s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1)); - s[14] = s5 >> 7; - s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6)); - s[16] = s6 >> 2; - s[17] = s6 >> 10; - s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3)); - s[19] = s7 >> 5; - s[20] = s7 >> 13; - s[21] = s8 >> 0; - s[22] = s8 >> 8; - s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5)); - s[24] = s9 >> 3; - s[25] = s9 >> 11; - s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2)); - s[27] = s10 >> 6; - s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7)); - s[29] = s11 >> 1; - s[30] = s11 >> 9; - s[31] = s11 >> 17; -} - -/* -Replace (f,g) with (g,f) if b == 1; -replace (f,g) with (f,g) if b == 0. - -Preconditions: b in {0,1}. -*/ - -static void fe_cswap(fe f, fe g, unsigned int b) -{ - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - int32_t g0 = g[0]; - int32_t g1 = g[1]; - int32_t g2 = g[2]; - int32_t g3 = g[3]; - int32_t g4 = g[4]; - int32_t g5 = g[5]; - int32_t g6 = g[6]; - int32_t g7 = g[7]; - int32_t g8 = g[8]; - int32_t g9 = g[9]; - int32_t x0 = f0 ^ g0; - int32_t x1 = f1 ^ g1; - int32_t x2 = f2 ^ g2; - int32_t x3 = f3 ^ g3; - int32_t x4 = f4 ^ g4; - int32_t x5 = f5 ^ g5; - int32_t x6 = f6 ^ g6; - int32_t x7 = f7 ^ g7; - int32_t x8 = f8 ^ g8; - int32_t x9 = f9 ^ g9; - b = (unsigned int) (-(int) b); - x0 &= b; - x1 &= b; - x2 &= b; - x3 &= b; - x4 &= b; - x5 &= b; - x6 &= b; - x7 &= b; - x8 &= b; - x9 &= b; - f[0] = f0 ^ x0; - f[1] = f1 ^ x1; - f[2] = f2 ^ x2; - f[3] = f3 ^ x3; - f[4] = f4 ^ x4; - f[5] = f5 ^ x5; - f[6] = f6 ^ x6; - f[7] = f7 ^ x7; - f[8] = f8 ^ x8; - f[9] = f9 ^ x9; - g[0] = g0 ^ x0; - g[1] = g1 ^ x1; - g[2] = g2 ^ x2; - g[3] = g3 ^ x3; - g[4] = g4 ^ x4; - g[5] = g5 ^ x5; - g[6] = g6 ^ x6; - g[7] = g7 ^ x7; - g[8] = g8 ^ x8; - g[9] = g9 ^ x9; -} - -/* - h = f * 121666 - Can overlap h with f. - - Preconditions: - |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. - - Postconditions: - |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. - */ - -static void fe_mul121666(fe h, const fe f) -{ - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - int64_t h0 = f0 * (int64_t) 121666; - int64_t h1 = f1 * (int64_t) 121666; - int64_t h2 = f2 * (int64_t) 121666; - int64_t h3 = f3 * (int64_t) 121666; - int64_t h4 = f4 * (int64_t) 121666; - int64_t h5 = f5 * (int64_t) 121666; - int64_t h6 = f6 * (int64_t) 121666; - int64_t h7 = f7 * (int64_t) 121666; - int64_t h8 = f8 * (int64_t) 121666; - int64_t h9 = f9 * (int64_t) 121666; - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - - carry9 = (h9 + (int64_t) (1 << 24)) >> 25; - h0 += carry9 * 19; - h9 -= carry9 << 25; - carry1 = (h1 + (int64_t) (1 << 24)) >> 25; - h2 += carry1; - h1 -= carry1 << 25; - carry3 = (h3 + (int64_t) (1 << 24)) >> 25; - h4 += carry3; - h3 -= carry3 << 25; - carry5 = (h5 + (int64_t) (1 << 24)) >> 25; - h6 += carry5; - h5 -= carry5 << 25; - carry7 = (h7 + (int64_t) (1 << 24)) >> 25; - h8 += carry7; - h7 -= carry7 << 25; - - carry0 = (h0 + (int64_t) (1 << 25)) >> 26; - h1 += carry0; - h0 -= carry0 << 26; - carry2 = (h2 + (int64_t) (1 << 25)) >> 26; - h3 += carry2; - h2 -= carry2 << 26; - carry4 = (h4 + (int64_t) (1 << 25)) >> 26; - h5 += carry4; - h4 -= carry4 << 26; - carry6 = (h6 + (int64_t) (1 << 25)) >> 26; - h7 += carry6; - h6 -= carry6 << 26; - carry8 = (h8 + (int64_t) (1 << 25)) >> 26; - h9 += carry8; - h8 -= carry8 << 26; - - h[0] = h0; - h[1] = h1; - h[2] = h2; - h[3] = h3; - h[4] = h4; - h[5] = h5; - h[6] = h6; - h[7] = h7; - h[8] = h8; - h[9] = h9; -} - -int -scalarmult_ref(unsigned char *q, - const unsigned char *n, const unsigned char *p) -{ - unsigned char e[32]; - unsigned int i; - fe x1; - fe x2; - fe z2; - fe x3; - fe z3; - fe tmp0; - fe tmp1; - int pos; - unsigned int swap; - unsigned int b; - - for (i = 0; i < 32; ++i) - e[i] = n[i]; - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; - fe_frombytes (x1, p); - fe_1 (x2); - fe_0 (z2); - fe_copy (x3, x1); - fe_1 (z3); - - swap = 0; - for (pos = 254; pos >= 0; --pos) { - b = e[pos / 8] >> (pos & 7); - b &= 1; - swap ^= b; - fe_cswap (x2, x3, swap); - fe_cswap (z2, z3, swap); - swap = b; - fe_sub (tmp0, x3, z3); - fe_sub (tmp1, x2, z2); - fe_add (x2, x2, z2); - fe_add (z2, x3, z3); - fe_mul (z3, tmp0, x2); - fe_mul (z2, z2, tmp1); - fe_sq (tmp0, tmp1); - fe_sq (tmp1, x2); - fe_add (x3, z3, z2); - fe_sub (z2, z3, z2); - fe_mul (x2, tmp1, tmp0); - fe_sub (tmp1, tmp1, tmp0); - fe_sq (z2, z2); - fe_mul121666 (z3, tmp1); - fe_sq (x3, x3); - fe_add (tmp0, tmp0, z3); - fe_mul (z3, x1, z2); - fe_mul (z2, tmp1, tmp0); - } - fe_cswap (x2, x3, swap); - fe_cswap (z2, z3, swap); - - fe_invert (z2, z2); - fe_mul (x2, x2, z2); - fe_tobytes (q, x2); - return 0; -} - -static void edwards_to_montgomery(fe montgomeryX, const fe edwardsY, - const fe edwardsZ) -{ - fe tempX; - fe tempZ; - - fe_add (tempX, edwardsZ, edwardsY); - fe_sub (tempZ, edwardsZ, edwardsY); - fe_invert (tempZ, tempZ); - fe_mul (montgomeryX, tempX, tempZ); -} - -int -scalarmult_base_ref(unsigned char *q, - const unsigned char *n) -{ - unsigned char e[32]; - ge_p3 A; - fe pk; - unsigned int i; - - for (i = 0; i < 32; ++i) - e[i] = n[i]; - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; - ge_scalarmult_base (&A, e); - edwards_to_montgomery (pk, A.Y, A.Z); - fe_tobytes (q, pk); - return 0; -} diff --git a/src/libcryptobox/ed25519/ed25519.c b/src/libcryptobox/ed25519/ed25519.c deleted file mode 100644 index 1591b2590..000000000 --- a/src/libcryptobox/ed25519/ed25519.c +++ /dev/null @@ -1,298 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "config.h" -#include "cryptobox.h" -#include "ed25519.h" -#include "platform_config.h" -#include "libutil/str_util.h" - -extern unsigned long cpu_config; - -typedef struct ed25519_impl_s { - unsigned long cpu_flags; - const char *desc; - - void (*keypair) (unsigned char *pk, unsigned char *sk); - void (*seed_keypair) (unsigned char *pk, unsigned char *sk, unsigned char *seed); - void (*sign) (unsigned char *sig, size_t *siglen_p, - const unsigned char *m, size_t mlen, - const unsigned char *sk); - int (*verify) (const unsigned char *sig, - const unsigned char *m, - size_t mlen, - const unsigned char *pk); -} ed25519_impl_t; - -#define ED25519_DECLARE(ext) \ - void ed_keypair_##ext(unsigned char *pk, unsigned char *sk); \ - void ed_seed_keypair_##ext(unsigned char *pk, unsigned char *sk, unsigned char *seed); \ - void ed_sign_##ext(unsigned char *sig, size_t *siglen_p, \ - const unsigned char *m, size_t mlen, \ - const unsigned char *sk); \ - int ed_verify_##ext(const unsigned char *sig, \ - const unsigned char *m, \ - size_t mlen, \ - const unsigned char *pk) - -#define ED25519_IMPL(cpuflags, desc, ext) \ - {(cpuflags), desc, ed_keypair_##ext, ed_seed_keypair_##ext, ed_sign_##ext, ed_verify_##ext} - -ED25519_DECLARE(ref); -#define ED25519_REF ED25519_IMPL(0, "ref", ref) - -static const ed25519_impl_t ed25519_list[] = { - ED25519_REF, -}; - -static const ed25519_impl_t *ed25519_opt = &ed25519_list[0]; -static bool ed25519_test (const ed25519_impl_t *impl); - -const char* -ed25519_load (void) -{ - guint i; - - if (cpu_config != 0) { - for (i = 0; i < G_N_ELEMENTS(ed25519_list); i++) { - if (ed25519_list[i].cpu_flags & cpu_config) { - ed25519_opt = &ed25519_list[i]; - g_assert (ed25519_test (ed25519_opt)); - break; - } - } - } - - g_assert (ed25519_test (ed25519_opt)); - - return ed25519_opt->desc; -} - -void -ed25519_seed_keypair (unsigned char *pk, unsigned char *sk, unsigned char *seed) -{ - ed25519_opt->seed_keypair (pk, sk, seed); -} - -void -ed25519_keypair (unsigned char *pk, unsigned char *sk) -{ - ed25519_opt->keypair (pk, sk); -} - -void -ed25519_sign (unsigned char *sig, size_t *siglen_p, - const unsigned char *m, size_t mlen, - const unsigned char *sk) -{ - ed25519_opt->sign (sig, siglen_p, m, mlen, sk); -} - -bool -ed25519_verify (const unsigned char *sig, - const unsigned char *m, - size_t mlen, - const unsigned char *pk) -{ - int ret = ed25519_opt->verify (sig, m, mlen, pk); - - return (ret == 0 ? true : false); -} - -struct ed25519_test_vector { - const char *message; - const char *pk; - const char *sk; - const char *sig; -}; - -static const struct ed25519_test_vector test_vectors[] = { - { - .sk = "" - "9d61b19deffd5a60ba844af492ec2cc4" - "4449c5697b326919703bac031cae7f60" - "", - .pk = "" - "d75a980182b10ab7d54bfed3c964073a" - "0ee172f3daa62325af021a68f707511a" - "", - .message = "" - "", - .sig = "" - "e5564300c360ac729086e2cc806e828a" - "84877f1eb8e5d974d873e06522490155" - "5fb8821590a33bacc61e39701cf9b46b" - "d25bf5f0595bbe24655141438e7a100b" - "", - }, - { - .sk = "" - "4ccd089b28ff96da9db6c346ec114e0f" - "5b8a319f35aba624da8cf6ed4fb8a6fb" - "", - .pk = "" - "3d4017c3e843895a92b70aa74d1b7ebc" - "9c982ccf2ec4968cc0cd55f12af4660c" - "", - .message = "" - "72" - "", - .sig = "" - "92a009a9f0d4cab8720e820b5f642540" - "a2b27b5416503f8fb3762223ebdb69da" - "085ac1e43e15996e458f3613d0f11d8c" - "387b2eaeb4302aeeb00d291612bb0c00" - "", - }, - { - .sk = "" - "c5aa8df43f9f837bedb7442f31dcb7b1" - "66d38535076f094b85ce3a2e0b4458f7" - "", - .pk = "" - "fc51cd8e6218a1a38da47ed00230f058" - "0816ed13ba3303ac5deb911548908025" - "", - .message = "" - "af82" - "", - .sig = "" - "6291d657deec24024827e69c3abe01a3" - "0ce548a284743a445e3680d7db5ac3ac" - "18ff9b538d16f290ae67f760984dc659" - "4a7c15e9716ed28dc027beceea1ec40a" - "", - }, - { - .sk = "" - "f5e5767cf153319517630f226876b86c" - "8160cc583bc013744c6bf255f5cc0ee5" - "", - .pk = "" - "278117fc144c72340f67d0f2316e8386" - "ceffbf2b2428c9c51fef7c597f1d426e" - "", - .message = "" - "08b8b2b733424243760fe426a4b54908" - "632110a66c2f6591eabd3345e3e4eb98" - "fa6e264bf09efe12ee50f8f54e9f77b1" - "e355f6c50544e23fb1433ddf73be84d8" - "79de7c0046dc4996d9e773f4bc9efe57" - "38829adb26c81b37c93a1b270b20329d" - "658675fc6ea534e0810a4432826bf58c" - "941efb65d57a338bbd2e26640f89ffbc" - "1a858efcb8550ee3a5e1998bd177e93a" - "7363c344fe6b199ee5d02e82d522c4fe" - "ba15452f80288a821a579116ec6dad2b" - "3b310da903401aa62100ab5d1a36553e" - "06203b33890cc9b832f79ef80560ccb9" - "a39ce767967ed628c6ad573cb116dbef" - "efd75499da96bd68a8a97b928a8bbc10" - "3b6621fcde2beca1231d206be6cd9ec7" - "aff6f6c94fcd7204ed3455c68c83f4a4" - "1da4af2b74ef5c53f1d8ac70bdcb7ed1" - "85ce81bd84359d44254d95629e9855a9" - "4a7c1958d1f8ada5d0532ed8a5aa3fb2" - "d17ba70eb6248e594e1a2297acbbb39d" - "502f1a8c6eb6f1ce22b3de1a1f40cc24" - "554119a831a9aad6079cad88425de6bd" - "e1a9187ebb6092cf67bf2b13fd65f270" - "88d78b7e883c8759d2c4f5c65adb7553" - "878ad575f9fad878e80a0c9ba63bcbcc" - "2732e69485bbc9c90bfbd62481d9089b" - "eccf80cfe2df16a2cf65bd92dd597b07" - "07e0917af48bbb75fed413d238f5555a" - "7a569d80c3414a8d0859dc65a46128ba" - "b27af87a71314f318c782b23ebfe808b" - "82b0ce26401d2e22f04d83d1255dc51a" - "ddd3b75a2b1ae0784504df543af8969b" - "e3ea7082ff7fc9888c144da2af58429e" - "c96031dbcad3dad9af0dcbaaaf268cb8" - "fcffead94f3c7ca495e056a9b47acdb7" - "51fb73e666c6c655ade8297297d07ad1" - "ba5e43f1bca32301651339e22904cc8c" - "42f58c30c04aafdb038dda0847dd988d" - "cda6f3bfd15c4b4c4525004aa06eeff8" - "ca61783aacec57fb3d1f92b0fe2fd1a8" - "5f6724517b65e614ad6808d6f6ee34df" - "f7310fdc82aebfd904b01e1dc54b2927" - "094b2db68d6f903b68401adebf5a7e08" - "d78ff4ef5d63653a65040cf9bfd4aca7" - "984a74d37145986780fc0b16ac451649" - "de6188a7dbdf191f64b5fc5e2ab47b57" - "f7f7276cd419c17a3ca8e1b939ae49e4" - "88acba6b965610b5480109c8b17b80e1" - "b7b750dfc7598d5d5011fd2dcc5600a3" - "2ef5b52a1ecc820e308aa342721aac09" - "43bf6686b64b2579376504ccc493d97e" - "6aed3fb0f9cd71a43dd497f01f17c0e2" - "cb3797aa2a2f256656168e6c496afc5f" - "b93246f6b1116398a346f1a641f3b041" - "e989f7914f90cc2c7fff357876e506b5" - "0d334ba77c225bc307ba537152f3f161" - "0e4eafe595f6d9d90d11faa933a15ef1" - "369546868a7f3a45a96768d40fd9d034" - "12c091c6315cf4fde7cb68606937380d" - "b2eaaa707b4c4185c32eddcdd306705e" - "4dc1ffc872eeee475a64dfac86aba41c" - "0618983f8741c5ef68d3a101e8a3b8ca" - "c60c905c15fc910840b94c00a0b9d0" - "", - .sig = "" - "0aab4c900501b3e24d7cdf4663326a3a" - "87df5e4843b2cbdb67cbf6e460fec350" - "aa5371b1508f9f4528ecea23c436d94b" - "5e8fcd4f681e30a6ac00a9704a188a03" - } -}; - -static bool -ed25519_test (const ed25519_impl_t *impl) -{ - guint i; - gchar sig[rspamd_cryptobox_MAX_SIGBYTES]; - gchar joint_sk[rspamd_cryptobox_MAX_SIGSKBYTES]; - guchar *sk, *pk, *expected, *msg; - - for (i = 0; i < G_N_ELEMENTS (test_vectors); i ++) { - sk = rspamd_decode_hex (test_vectors[i].sk, strlen (test_vectors[i].sk)); - pk = rspamd_decode_hex (test_vectors[i].pk, strlen (test_vectors[i].pk)); - expected = rspamd_decode_hex (test_vectors[i].sig, - strlen (test_vectors[i].sig)); - msg = rspamd_decode_hex (test_vectors[i].message, - strlen (test_vectors[i].message)); - - memcpy (joint_sk, sk, 32); - memcpy (joint_sk + 32, pk, 32); - - impl->sign (sig, NULL, msg, strlen (test_vectors[i].message) / 2, joint_sk); - - if (memcmp (sig, expected, - rspamd_cryptobox_signature_bytes (RSPAMD_CRYPTOBOX_MODE_25519)) != 0) { - return false; - } - - if (impl->verify (sig, msg, strlen (test_vectors[i].message) / 2, pk) != 0) { - return false; - } - - g_free (sk); - g_free (pk); - g_free (expected); - g_free (msg); - } - - return true; -} diff --git a/src/libcryptobox/ed25519/ed25519.h b/src/libcryptobox/ed25519/ed25519.h deleted file mode 100644 index a1f702c4a..000000000 --- a/src/libcryptobox/ed25519/ed25519.h +++ /dev/null @@ -1,46 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef SRC_LIBCRYPTOBOX_ED25519_ED25519_H_ -#define SRC_LIBCRYPTOBOX_ED25519_ED25519_H_ - -#include "config.h" -#include <stdbool.h> -#include <stddef.h> - -#ifdef __cplusplus -extern "C" { -#endif - -const char *ed25519_load (void); - -void ed25519_keypair (unsigned char *pk, unsigned char *sk); - -void ed25519_seed_keypair (unsigned char *pk, unsigned char *sk, unsigned char *seed); - -void ed25519_sign (unsigned char *sig, size_t *siglen_p, - const unsigned char *m, size_t mlen, - const unsigned char *sk); - -bool ed25519_verify (const unsigned char *sig, - const unsigned char *m, - size_t mlen, - const unsigned char *pk); - -#ifdef __cplusplus -} -#endif - -#endif /* SRC_LIBCRYPTOBOX_ED25519_ED25519_H_ */ diff --git a/src/libcryptobox/ed25519/ref.c b/src/libcryptobox/ed25519/ref.c deleted file mode 100644 index cbcf87857..000000000 --- a/src/libcryptobox/ed25519/ref.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2013-2016 - * Frank Denis <j at pureftpd dot org> - * Vsevolod Stakhov - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "config.h" -#include "ed25519.h" -#include "cryptobox.h" -#include "../curve25519/fe.h" -#include "ottery.h" -#include <openssl/evp.h> /* SHA512 */ - -int -ed_seed_keypair_ref (unsigned char *pk, unsigned char *sk, - const unsigned char *seed) -{ - ge_p3 A; - EVP_MD_CTX *sha_ctx; - - sha_ctx = EVP_MD_CTX_create (); - g_assert (sha_ctx && EVP_DigestInit (sha_ctx, EVP_sha512()) == 1); - EVP_DigestUpdate (sha_ctx, seed, 32); - EVP_DigestFinal (sha_ctx, sk, NULL); - - sk[0] &= 248; - sk[31] &= 63; - sk[31] |= 64; - - ge_scalarmult_base (&A, sk); - ge_p3_tobytes (pk, &A); - - memmove (sk, seed, 32); - memmove (sk + 32, pk, 32); - - EVP_MD_CTX_destroy (sha_ctx); - - return 0; -} - -int -ed_keypair_ref (unsigned char *pk, unsigned char *sk) -{ - unsigned char seed[32]; - int ret; - - ottery_rand_bytes (seed, sizeof (seed)); - ret = ed_seed_keypair_ref (pk, sk, seed); - rspamd_explicit_memzero (seed, sizeof (seed)); - - return ret; -} - -int -ed_verify_ref(const unsigned char *sig, const unsigned char *m, - size_t mlen, const unsigned char *pk) -{ - EVP_MD_CTX *sha_ctx; - unsigned char h[64]; - unsigned char rcheck[32]; - unsigned int i; - unsigned char d = 0; - ge_p3 A; - ge_p2 R; - - if (sig[63] & 224) { - return -1; - } - if (ge_frombytes_negate_vartime (&A, pk) != 0) { - return -1; - } - for (i = 0; i < 32; ++i) { - d |= pk[i]; - } - if (d == 0) { - return -1; - } - - sha_ctx = EVP_MD_CTX_create (); - g_assert (sha_ctx && EVP_DigestInit (sha_ctx, EVP_sha512()) == 1); - EVP_DigestUpdate (sha_ctx, sig, 32); - EVP_DigestUpdate (sha_ctx, pk, 32); - EVP_DigestUpdate (sha_ctx, m, mlen); - EVP_DigestFinal (sha_ctx, h, NULL); - - sc_reduce (h); - - EVP_MD_CTX_destroy (sha_ctx); - - ge_double_scalarmult_vartime (&R, h, &A, sig + 32); - ge_tobytes (rcheck, &R); - - return verify_32 (rcheck, sig) | (-(rcheck == sig)); -} - -void -ed_sign_ref(unsigned char *sig, size_t *siglen_p, - const unsigned char *m, size_t mlen, - const unsigned char *sk) -{ - EVP_MD_CTX *sha_ctx; - unsigned char az[64]; - unsigned char nonce[64]; - unsigned char hram[64]; - ge_p3 R; - - sha_ctx = EVP_MD_CTX_create (); - g_assert (sha_ctx && EVP_DigestInit (sha_ctx, EVP_sha512()) == 1); - EVP_DigestUpdate (sha_ctx, sk, 32); - EVP_DigestFinal (sha_ctx, az, NULL); - az[0] &= 248; - az[31] &= 63; - az[31] |= 64; - - g_assert (EVP_DigestInit (sha_ctx, EVP_sha512()) == 1); - EVP_DigestUpdate (sha_ctx, az + 32, 32); - EVP_DigestUpdate (sha_ctx, m, mlen); - EVP_DigestFinal (sha_ctx, nonce, NULL); - - memmove (sig + 32, sk + 32, 32); - - sc_reduce (nonce); - ge_scalarmult_base (&R, nonce); - ge_p3_tobytes (sig, &R); - - g_assert (EVP_DigestInit (sha_ctx, EVP_sha512()) == 1); - EVP_DigestUpdate (sha_ctx, sig, 64); - EVP_DigestUpdate (sha_ctx, m, mlen); - EVP_DigestFinal (sha_ctx, hram, NULL); - - sc_reduce (hram); - sc_muladd (sig + 32, hram, az, nonce); - - rspamd_explicit_memzero (az, sizeof (az)); - EVP_MD_CTX_destroy (sha_ctx); - - if (siglen_p != NULL) { - *siglen_p = 64U; - } -} diff --git a/src/libcryptobox/poly1305/avx.S b/src/libcryptobox/poly1305/avx.S deleted file mode 100644 index bf7390888..000000000 --- a/src/libcryptobox/poly1305/avx.S +++ /dev/null @@ -1,877 +0,0 @@ -#include "../macro.S" -#include "constants.S" - -SECTION_TEXT - -GLOBAL_HIDDEN_FN_EXT poly1305_block_size_avx,0,0 -movl $32, %eax -ret -FN_END poly1305_block_size_avx - -GLOBAL_HIDDEN_FN_EXT poly1305_init_ext_avx,4,1 -poly1305_init_ext_avx_local: -pushq %r15 -pushq %r14 -pushq %r13 -pushq %r12 -pushq %rbp -pushq %rbx -movq %rdi, %rbp -testq %rdx, %rdx -movq $-1, %rax -cmovne %rdx, %rax -movq %rax, -16(%rsp) -vpxor %xmm0, %xmm0, %xmm0 -vmovdqu %xmm0, (%rdi) -vmovdqu %xmm0, 16(%rdi) -vmovdqu %xmm0, 32(%rdi) -movq (%rsi), %r9 -movq 8(%rsi), %r8 -movabsq $17575274610687, %r10 -andq %r9, %r10 -shrq $44, %r9 -movq %r8, %rax -salq $20, %rax -orq %rax, %r9 -movabsq $17592181915647, %rax -andq %rax, %r9 -shrq $24, %r8 -movabsq $68719475727, %rax -andq %rax, %r8 -leaq 40(%rdi), %r15 -movl %r10d, %eax -andl $67108863, %eax -movl %eax, 40(%rdi) -movl %r9d, %edx -sall $18, %edx -movq %r10, %rax -shrq $26, %rax -orl %edx, %eax -andl $67108863, %eax -movl %eax, 44(%rdi) -movq %r9, %rax -shrq $8, %rax -andl $67108863, %eax -movl %eax, 48(%rdi) -movq %r9, %rax -shrq $34, %rax -movl %r8d, %edx -sall $10, %edx -orl %edx, %eax -andl $67108863, %eax -movl %eax, 52(%rdi) -movq %r8, %rax -shrq $16, %rax -movl %eax, 56(%rdi) -movq 16(%rsi), %rax -movq %rax, 104(%rdi) -movq 24(%rsi), %rax -movq %rax, 112(%rdi) -movl $0, %ebx -.L7: -testq %rbx, %rbx -jne .L4 -leaq 60(%rbp), %r15 -cmpq $16, -16(%rsp) -ja .L6 -jmp .L5 -.L4: -cmpq $1, %rbx -jne .L6 -leaq 80(%rbp), %r15 -cmpq $95, -16(%rsp) -jbe .L5 -.L6: -leaq (%r8,%r8,4), %rsi -salq $2, %rsi -leaq (%r9,%r9), %rdi -movq %rdi, %rax -mulq %rsi -movq %rax, %r13 -movq %rdx, %r14 -movq %r10, %rax -mulq %r10 -addq %r13, %rax -adcq %r14, %rdx -movabsq $17592186044415, %rcx -movq %rax, -72(%rsp) -movq %rdx, -64(%rsp) -andq -72(%rsp), %rcx -leaq (%r10,%r10), %r11 -movq %r11, %rax -mulq %r9 -movq %rax, %r11 -movq %rdx, %r12 -movq %rsi, %rax -mulq %r8 -movq %rax, %r13 -movq %rdx, %r14 -addq %r11, %r13 -adcq %r12, %r14 -movq -72(%rsp), %rax -movq -64(%rsp), %rdx -shrdq $44, %rdx, %rax -movq %rax, -56(%rsp) -movq $0, -48(%rsp) -addq -56(%rsp), %r13 -adcq -48(%rsp), %r14 -movabsq $17592186044415, %rsi -andq %r13, %rsi -leaq (%r8,%r8), %rdi -movq %rdi, %rax -mulq %r10 -movq %rax, %r11 -movq %rdx, %r12 -movq %r9, %rax -mulq %r9 -addq %r11, %rax -adcq %r12, %rdx -shrdq $44, %r14, %r13 -movq %r13, -40(%rsp) -movq $0, -32(%rsp) -addq -40(%rsp), %rax -adcq -32(%rsp), %rdx -movabsq $4398046511103, %rdi -andq %rax, %rdi -shrdq $42, %rdx, %rax -leaq (%rax,%rax,4), %r8 -addq %rcx, %r8 -movabsq $17592186044415, %r10 -andq %r8, %r10 -shrq $44, %r8 -addq %rsi, %r8 -movabsq $17592186044415, %r9 -andq %r8, %r9 -shrq $44, %r8 -addq %rdi, %r8 -movl %r10d, %eax -andl $67108863, %eax -movl %eax, (%r15) -movl %r9d, %edx -sall $18, %edx -movq %r10, %rax -shrq $26, %rax -orl %edx, %eax -andl $67108863, %eax -movl %eax, 4(%r15) -movq %r9, %rax -shrq $8, %rax -andl $67108863, %eax -movl %eax, 8(%r15) -movl %r8d, %edx -sall $10, %edx -movq %r9, %rax -shrq $34, %rax -orl %edx, %eax -andl $67108863, %eax -movl %eax, 12(%r15) -movq %r8, %rax -shrq $16, %rax -movl %eax, 16(%r15) -addq $1, %rbx -cmpq $2, %rbx -jne .L7 -.L5: -movq $0, 120(%rbp) -popq %rbx -popq %rbp -popq %r12 -popq %r13 -popq %r14 -popq %r15 -ret -FN_END poly1305_init_ext_avx - - - -GLOBAL_HIDDEN_FN poly1305_blocks_avx -poly1305_blocks_avx_local: -pushq %rbp -movq %rsp, %rbp -pushq %rbx -andq $-64, %rsp -subq $200, %rsp -movl $(1 << 24), %eax -movl $((1 << 26) - 1), %r8d -movl $(5), %r9d -vmovd %eax, %xmm1 -vmovd %r8d, %xmm0 -vmovd %r9d, %xmm2 -vpshufd $68, %xmm1, %xmm1 -vpshufd $68, %xmm0, %xmm0 -vpshufd $68, %xmm2, %xmm2 -vmovdqa %xmm1, 152(%rsp) -vmovdqa %xmm2, 184(%rsp) -movq 120(%rdi), %rax -testb $4, %al -je .L12 -vpsrldq $8, %xmm1, %xmm1 -vmovdqa %xmm1, 152(%rsp) -.L12: -testb $8, %al -je .L13 -vpxor %xmm1, %xmm1, %xmm1 -vmovdqa %xmm1, 152(%rsp) -.L13: -testb $1, %al -jne .L14 -vmovq (%rsi), %xmm1 -vpinsrq $1, 16(%rsi), %xmm1, %xmm1 -vmovq 8(%rsi), %xmm3 -vpinsrq $1, 24(%rsi), %xmm3, %xmm2 -vpand %xmm0, %xmm1, %xmm7 -vpsrlq $26, %xmm1, %xmm12 -vpand %xmm0, %xmm12, %xmm12 -vpsllq $12, %xmm2, %xmm3 -vpsrlq $52, %xmm1, %xmm1 -vpor %xmm3, %xmm1, %xmm6 -vpand %xmm0, %xmm6, %xmm3 -vpsrlq $26, %xmm6, %xmm6 -vpand %xmm0, %xmm6, %xmm6 -vpsrlq $40, %xmm2, %xmm2 -vpor 152(%rsp), %xmm2, %xmm2 -addq $32, %rsi -subq $32, %rdx -orq $1, %rax -movq %rax, 120(%rdi) -jmp .L15 -.L14: -vmovdqu (%rdi), %xmm12 -vmovdqu 16(%rdi), %xmm6 -vmovdqu 32(%rdi), %xmm2 -vpshufd $80, %xmm12, %xmm7 -vpshufd $250, %xmm12, %xmm12 -vpshufd $80, %xmm6, %xmm3 -vpshufd $250, %xmm6, %xmm6 -vpshufd $80, %xmm2, %xmm2 -.L15: -movq 120(%rdi), %rax -testb $48, %al -je .L16 -testb $16, %al -je .L17 -vmovdqu 40(%rdi), %xmm1 -vmovd 56(%rdi), %xmm4 -vmovdqu 60(%rdi), %xmm5 -vpunpckldq %xmm1, %xmm5, %xmm11 -vpunpckhdq %xmm1, %xmm5, %xmm5 -vmovd 76(%rdi), %xmm1 -vpunpcklqdq %xmm4, %xmm1, %xmm4 -jmp .L18 -.L17: -movl $(1), %r8d -vmovdqu 40(%rdi), %xmm5 -vmovd 56(%rdi), %xmm4 -vmovd %r8d, %xmm1 -vpunpckldq %xmm1, %xmm5, %xmm11 -vpunpckhdq %xmm1, %xmm5, %xmm5 -.L18: -vpshufd $80, %xmm11, %xmm1 -vpshufd $250, %xmm11, %xmm11 -vpshufd $80, %xmm5, %xmm10 -vpshufd $250, %xmm5, %xmm5 -jmp .L19 -.L16: -vmovdqu 60(%rdi), %xmm5 -vpshufd $0, %xmm5, %xmm1 -vpshufd $85, %xmm5, %xmm11 -vpshufd $170, %xmm5, %xmm10 -vpshufd $255, %xmm5, %xmm5 -vmovd 76(%rdi), %xmm4 -vpshufd $0, %xmm4, %xmm4 -.L19: -vmovdqa %xmm11, 136(%rsp) -vpmuludq 184(%rsp), %xmm11, %xmm13 -vmovdqa %xmm13, 120(%rsp) -vmovdqa %xmm10, 104(%rsp) -vpmuludq 184(%rsp), %xmm10, %xmm13 -vmovdqa %xmm13, 88(%rsp) -vmovdqa %xmm5, 72(%rsp) -vpmuludq 184(%rsp), %xmm5, %xmm5 -vmovdqa %xmm5, 56(%rsp) -vmovdqa %xmm4, 40(%rsp) -vpmuludq 184(%rsp), %xmm4, %xmm4 -vmovdqa %xmm4, 24(%rsp) -cmpq $63, %rdx -jbe .L20 -vmovdqu 80(%rdi), %xmm4 -vpshufd $0, %xmm4, %xmm5 -vmovdqa %xmm5, 8(%rsp) -vpshufd $85, %xmm4, %xmm5 -vmovdqa %xmm5, -8(%rsp) -vpshufd $170, %xmm4, %xmm13 -vmovdqa %xmm13, -24(%rsp) -vpshufd $255, %xmm4, %xmm4 -vmovdqa %xmm4, %xmm10 -vmovdqa %xmm4, -40(%rsp) -vmovd 96(%rdi), %xmm4 -vpshufd $0, %xmm4, %xmm4 -vmovdqa %xmm4, %xmm8 -vmovdqa %xmm4, -56(%rsp) -vpmuludq 184(%rsp), %xmm5, %xmm4 -vmovdqa %xmm4, -72(%rsp) -vpmuludq 184(%rsp), %xmm13, %xmm4 -vmovdqa %xmm4, -88(%rsp) -vpmuludq 184(%rsp), %xmm10, %xmm4 -vmovdqa %xmm4, -104(%rsp) -vpmuludq 184(%rsp), %xmm8, %xmm4 -vmovdqa %xmm4, -120(%rsp) -leaq 32(%rsi), %rax -movq %rdx, %rcx -vmovdqa %xmm1, 168(%rsp) -jmp .L22 -.p2align 6 -nop -nop -nop -nop -.L22: -vpmuludq -72(%rsp), %xmm2, %xmm13 -vmovdqa -88(%rsp), %xmm5 -vpmuludq %xmm5, %xmm6, %xmm4 -vpmuludq %xmm5, %xmm2, %xmm11 -vmovdqa -104(%rsp), %xmm9 -vpmuludq %xmm9, %xmm6, %xmm5 -vpmuludq %xmm9, %xmm2, %xmm10 -vpaddq %xmm4, %xmm13, %xmm13 -vpmuludq %xmm9, %xmm3, %xmm4 -vmovdqa -120(%rsp), %xmm8 -vpmuludq %xmm8, %xmm2, %xmm9 -vpaddq %xmm5, %xmm11, %xmm11 -vmovdqa %xmm8, %xmm5 -vpmuludq %xmm8, %xmm12, %xmm8 -vpmuludq %xmm5, %xmm3, %xmm14 -vpaddq %xmm4, %xmm13, %xmm13 -vpmuludq %xmm5, %xmm6, %xmm4 -vmovdqa 8(%rsp), %xmm15 -vpmuludq %xmm15, %xmm6, %xmm5 -vpaddq %xmm8, %xmm13, %xmm13 -vpmuludq %xmm15, %xmm2, %xmm8 -vpaddq %xmm14, %xmm11, %xmm11 -vpmuludq %xmm15, %xmm7, %xmm14 -vpaddq %xmm4, %xmm10, %xmm10 -vpmuludq %xmm15, %xmm12, %xmm4 -vpaddq %xmm5, %xmm9, %xmm9 -vpmuludq %xmm15, %xmm3, %xmm5 -vmovdqa -8(%rsp), %xmm15 -vpmuludq %xmm15, %xmm3, %xmm2 -vpaddq %xmm14, %xmm13, %xmm13 -vpmuludq %xmm15, %xmm6, %xmm6 -vpaddq %xmm4, %xmm11, %xmm11 -vpmuludq %xmm15, %xmm7, %xmm4 -vpaddq %xmm5, %xmm10, %xmm10 -vmovq -32(%rax), %xmm5 -vpinsrq $1, -16(%rax), %xmm5, %xmm5 -vpmuludq %xmm15, %xmm12, %xmm14 -vpaddq %xmm2, %xmm9, %xmm9 -vmovdqa -24(%rsp), %xmm2 -vpmuludq %xmm2, %xmm12, %xmm15 -vpaddq %xmm6, %xmm8, %xmm8 -vpmuludq %xmm2, %xmm3, %xmm3 -vpaddq %xmm4, %xmm11, %xmm11 -vmovq -24(%rax), %xmm4 -vpinsrq $1, -8(%rax), %xmm4, %xmm6 -vpmuludq %xmm2, %xmm7, %xmm4 -vpaddq %xmm14, %xmm10, %xmm10 -vmovdqa -40(%rsp), %xmm1 -vpmuludq %xmm1, %xmm7, %xmm14 -vpaddq %xmm15, %xmm9, %xmm9 -vpand %xmm5, %xmm0, %xmm2 -vpmuludq %xmm1, %xmm12, %xmm12 -vpaddq %xmm3, %xmm8, %xmm8 -vpsrlq $26, %xmm5, %xmm3 -vpand %xmm3, %xmm0, %xmm3 -vpmuludq -56(%rsp), %xmm7, %xmm7 -vpaddq %xmm4, %xmm10, %xmm10 -vpsllq $12, %xmm6, %xmm15 -vpsrlq $52, %xmm5, %xmm4 -vpor %xmm15, %xmm4, %xmm4 -vpaddq %xmm14, %xmm9, %xmm9 -vpsrlq $14, %xmm6, %xmm5 -vpand %xmm5, %xmm0, %xmm5 -vpaddq %xmm12, %xmm8, %xmm8 -vpand %xmm4, %xmm0, %xmm4 -vpaddq %xmm7, %xmm8, %xmm8 -vpsrlq $40, %xmm6, %xmm6 -vpor 152(%rsp), %xmm6, %xmm6 -vmovdqu (%rax), %xmm12 -vmovdqu 16(%rax), %xmm7 -vpunpckldq %xmm7, %xmm12, %xmm15 -vpunpckhdq %xmm7, %xmm12, %xmm7 -vpxor %xmm14, %xmm14, %xmm14 -vpunpckldq %xmm14, %xmm15, %xmm12 -vpunpckhdq %xmm14, %xmm15, %xmm15 -vpunpckldq %xmm14, %xmm7, %xmm14 -vpxor %xmm1, %xmm1, %xmm1 -vpunpckhdq %xmm1, %xmm7, %xmm7 -vpsllq $6, %xmm15, %xmm15 -vpsllq $12, %xmm14, %xmm14 -vpsllq $18, %xmm7, %xmm7 -vpaddq %xmm12, %xmm13, %xmm12 -vpaddq %xmm15, %xmm11, %xmm15 -vpaddq %xmm14, %xmm10, %xmm14 -vpaddq %xmm7, %xmm9, %xmm7 -vpaddq 152(%rsp), %xmm8, %xmm8 -vpmuludq 120(%rsp), %xmm6, %xmm13 -vmovdqa 88(%rsp), %xmm10 -vpmuludq %xmm10, %xmm5, %xmm9 -vpmuludq %xmm10, %xmm6, %xmm11 -vmovdqa 56(%rsp), %xmm1 -vpmuludq %xmm1, %xmm5, %xmm10 -vpaddq %xmm13, %xmm12, %xmm12 -vpmuludq %xmm1, %xmm6, %xmm13 -vpaddq %xmm9, %xmm12, %xmm12 -vpmuludq %xmm1, %xmm4, %xmm9 -vpaddq %xmm11, %xmm15, %xmm15 -vmovdqa 24(%rsp), %xmm1 -vpmuludq %xmm1, %xmm6, %xmm11 -vpaddq %xmm10, %xmm15, %xmm10 -vpmuludq %xmm1, %xmm3, %xmm15 -vpaddq %xmm13, %xmm14, %xmm14 -vpmuludq %xmm1, %xmm4, %xmm13 -vpaddq %xmm9, %xmm12, %xmm9 -vpmuludq %xmm1, %xmm5, %xmm12 -vpaddq %xmm11, %xmm7, %xmm7 -vpmuludq 168(%rsp), %xmm5, %xmm11 -vpaddq %xmm15, %xmm9, %xmm9 -vpmuludq 168(%rsp), %xmm6, %xmm6 -vpaddq %xmm13, %xmm10, %xmm10 -vpmuludq 168(%rsp), %xmm2, %xmm15 -vpaddq %xmm12, %xmm14, %xmm14 -vpmuludq 168(%rsp), %xmm3, %xmm13 -vpaddq %xmm11, %xmm7, %xmm11 -vpmuludq 168(%rsp), %xmm4, %xmm12 -vpaddq %xmm6, %xmm8, %xmm6 -vmovdqa 136(%rsp), %xmm8 -vpmuludq %xmm8, %xmm4, %xmm7 -vpaddq %xmm15, %xmm9, %xmm9 -vpmuludq %xmm8, %xmm5, %xmm5 -vpaddq %xmm13, %xmm10, %xmm10 -vpmuludq %xmm8, %xmm2, %xmm15 -vpaddq %xmm12, %xmm14, %xmm14 -vpmuludq %xmm8, %xmm3, %xmm8 -vpaddq %xmm7, %xmm11, %xmm11 -vmovdqa 104(%rsp), %xmm7 -vpmuludq %xmm7, %xmm3, %xmm13 -vpaddq %xmm5, %xmm6, %xmm6 -vpmuludq %xmm7, %xmm4, %xmm4 -vpaddq %xmm15, %xmm10, %xmm10 -vpmuludq %xmm7, %xmm2, %xmm15 -vpaddq %xmm8, %xmm14, %xmm14 -vmovdqa 72(%rsp), %xmm5 -vpmuludq %xmm5, %xmm2, %xmm7 -vpaddq %xmm13, %xmm11, %xmm11 -vpmuludq %xmm5, %xmm3, %xmm3 -vpaddq %xmm4, %xmm6, %xmm6 -vpmuludq 40(%rsp), %xmm2, %xmm2 -vpaddq %xmm15, %xmm14, %xmm14 -vpaddq %xmm7, %xmm11, %xmm11 -vpaddq %xmm3, %xmm6, %xmm6 -vpaddq %xmm2, %xmm6, %xmm2 -vpsrlq $26, %xmm9, %xmm12 -vpsrlq $26, %xmm11, %xmm5 -vpand %xmm0, %xmm9, %xmm9 -vpand %xmm0, %xmm11, %xmm11 -vpaddq %xmm12, %xmm10, %xmm10 -vpaddq %xmm5, %xmm2, %xmm2 -vpsrlq $26, %xmm10, %xmm3 -vpsrlq $26, %xmm2, %xmm7 -vpand %xmm0, %xmm10, %xmm10 -vpand %xmm0, %xmm2, %xmm2 -vpaddq %xmm3, %xmm14, %xmm3 -vpmuludq 184(%rsp), %xmm7, %xmm7 -vpaddq %xmm7, %xmm9, %xmm9 -vpsrlq $26, %xmm3, %xmm6 -vpsrlq $26, %xmm9, %xmm12 -vpand %xmm0, %xmm3, %xmm3 -vpand %xmm0, %xmm9, %xmm7 -vpaddq %xmm6, %xmm11, %xmm6 -vpaddq %xmm12, %xmm10, %xmm12 -vpsrlq $26, %xmm6, %xmm8 -vpand %xmm0, %xmm6, %xmm6 -vpaddq %xmm8, %xmm2, %xmm2 -subq $64, %rcx -addq $64, %rax -cmpq $63, %rcx -ja .L22 -vmovdqa 168(%rsp), %xmm1 -leaq -64(%rdx), %rax -andq $-64, %rax -leaq 64(%rsi,%rax), %rsi -andl $63, %edx -.L20: -cmpq $31, %rdx -jbe .L23 -vpmuludq 120(%rsp), %xmm2, %xmm11 -vmovdqa 88(%rsp), %xmm4 -vpmuludq %xmm4, %xmm6, %xmm0 -vpmuludq %xmm4, %xmm2, %xmm10 -vmovdqa 56(%rsp), %xmm4 -vpmuludq %xmm4, %xmm6, %xmm8 -vpmuludq %xmm4, %xmm2, %xmm5 -vpaddq %xmm0, %xmm11, %xmm11 -vpmuludq %xmm4, %xmm3, %xmm0 -vmovdqa 24(%rsp), %xmm13 -vpmuludq %xmm13, %xmm2, %xmm4 -vpaddq %xmm8, %xmm10, %xmm10 -vpmuludq %xmm13, %xmm12, %xmm8 -vpmuludq %xmm13, %xmm3, %xmm9 -vpaddq %xmm0, %xmm11, %xmm11 -vpmuludq %xmm13, %xmm6, %xmm13 -vpmuludq %xmm1, %xmm6, %xmm0 -vpaddq %xmm8, %xmm11, %xmm8 -vpmuludq %xmm1, %xmm2, %xmm2 -vpaddq %xmm9, %xmm10, %xmm9 -vpmuludq %xmm1, %xmm7, %xmm11 -vpaddq %xmm13, %xmm5, %xmm5 -vpmuludq %xmm1, %xmm12, %xmm10 -vpaddq %xmm0, %xmm4, %xmm0 -vpmuludq %xmm1, %xmm3, %xmm1 -vmovdqa 136(%rsp), %xmm4 -vpmuludq %xmm4, %xmm3, %xmm14 -vpaddq %xmm11, %xmm8, %xmm11 -vpmuludq %xmm4, %xmm6, %xmm6 -vpaddq %xmm10, %xmm9, %xmm9 -vpmuludq %xmm4, %xmm7, %xmm15 -vpaddq %xmm1, %xmm5, %xmm5 -vpmuludq %xmm4, %xmm12, %xmm1 -vpaddq %xmm14, %xmm0, %xmm0 -vmovdqa 104(%rsp), %xmm4 -vpmuludq %xmm4, %xmm12, %xmm8 -vpaddq %xmm6, %xmm2, %xmm2 -vpmuludq %xmm4, %xmm3, %xmm3 -vpaddq %xmm15, %xmm9, %xmm9 -vpmuludq %xmm4, %xmm7, %xmm10 -vpaddq %xmm1, %xmm5, %xmm1 -vmovdqa 72(%rsp), %xmm4 -vpmuludq %xmm4, %xmm7, %xmm15 -vpaddq %xmm8, %xmm0, %xmm0 -vpmuludq %xmm4, %xmm12, %xmm12 -vpaddq %xmm3, %xmm2, %xmm2 -vpmuludq 40(%rsp), %xmm7, %xmm7 -vpaddq %xmm10, %xmm1, %xmm1 -vpaddq %xmm15, %xmm0, %xmm0 -vpaddq %xmm12, %xmm2, %xmm2 -vpaddq %xmm7, %xmm2, %xmm2 -movl $((1 << 26) - 1), %r8d -testq %rsi, %rsi -vmovd %r8d, %xmm15 -je .L24 -vmovdqu (%rsi), %xmm4 -vmovdqu 16(%rsi), %xmm3 -vpunpckldq %xmm3, %xmm4, %xmm5 -vpunpckhdq %xmm3, %xmm4, %xmm3 -vpxor %xmm4, %xmm4, %xmm4 -vpunpckldq %xmm4, %xmm5, %xmm7 -vpunpckhdq %xmm4, %xmm5, %xmm5 -vpunpckldq %xmm4, %xmm3, %xmm6 -vpunpckhdq %xmm4, %xmm3, %xmm3 -vpsllq $6, %xmm5, %xmm5 -vpsllq $12, %xmm6, %xmm6 -vpsllq $18, %xmm3, %xmm3 -vpaddq %xmm7, %xmm11, %xmm11 -vpaddq %xmm5, %xmm9, %xmm9 -vpaddq %xmm6, %xmm1, %xmm1 -vpaddq %xmm3, %xmm0, %xmm0 -vpaddq 152(%rsp), %xmm2, %xmm2 -.L24: -vpshufd $68, %xmm15, %xmm15 -vpsrlq $26, %xmm11, %xmm12 -vpsrlq $26, %xmm0, %xmm3 -vpand %xmm15, %xmm11, %xmm11 -vpand %xmm15, %xmm0, %xmm6 -vpaddq %xmm12, %xmm9, %xmm9 -vpaddq %xmm3, %xmm2, %xmm2 -vpsrlq $26, %xmm9, %xmm3 -vpsrlq $26, %xmm2, %xmm7 -vpand %xmm15, %xmm9, %xmm9 -vpand %xmm15, %xmm2, %xmm2 -vpaddq %xmm3, %xmm1, %xmm3 -vpmuludq 184(%rsp), %xmm7, %xmm7 -vpaddq %xmm7, %xmm11, %xmm7 -vpsrlq $26, %xmm3, %xmm4 -vpsrlq $26, %xmm7, %xmm1 -vpand %xmm15, %xmm3, %xmm3 -vpand %xmm15, %xmm7, %xmm7 -vpaddq %xmm4, %xmm6, %xmm6 -vpaddq %xmm1, %xmm9, %xmm12 -vpsrlq $26, %xmm6, %xmm0 -vpand %xmm15, %xmm6, %xmm6 -vpaddq %xmm0, %xmm2, %xmm2 -.L23: -testq %rsi, %rsi -je .L25 -vpshufd $8, %xmm7, %xmm7 -vpshufd $8, %xmm12, %xmm12 -vpshufd $8, %xmm3, %xmm3 -vpshufd $8, %xmm6, %xmm6 -vpshufd $8, %xmm2, %xmm2 -vpunpcklqdq %xmm12, %xmm7, %xmm7 -vpunpcklqdq %xmm6, %xmm3, %xmm3 -vmovdqu %xmm7, (%rdi) -vmovdqu %xmm3, 16(%rdi) -vmovq %xmm2, 32(%rdi) -jmp .L11 -.L25: -vpsrldq $8, %xmm7, %xmm0 -vpaddq %xmm0, %xmm7, %xmm7 -vpsrldq $8, %xmm12, %xmm0 -vpaddq %xmm0, %xmm12, %xmm12 -vpsrldq $8, %xmm3, %xmm0 -vpaddq %xmm0, %xmm3, %xmm3 -vpsrldq $8, %xmm6, %xmm0 -vpaddq %xmm0, %xmm6, %xmm6 -vpsrldq $8, %xmm2, %xmm0 -vpaddq %xmm0, %xmm2, %xmm2 -vmovd %xmm7, %eax -vmovd %xmm12, %edx -movl %eax, %r9d -shrl $26, %r9d -addl %edx, %r9d -movl %r9d, %r8d -andl $67108863, %r8d -vmovd %xmm3, %edx -shrl $26, %r9d -addl %edx, %r9d -vmovd %xmm6, %edx -movl %r9d, %ecx -shrl $26, %ecx -addl %edx, %ecx -movl %ecx, %esi -andl $67108863, %esi -vmovd %xmm2, %r10d -movl %r8d, %r11d -salq $26, %r11 -andl $67108863, %eax -orq %rax, %r11 -movabsq $17592186044415, %rax -andq %rax, %r11 -andl $67108863, %r9d -salq $8, %r9 -shrl $18, %r8d -movl %r8d, %r8d -orq %r8, %r9 -movq %rsi, %rdx -salq $34, %rdx -orq %rdx, %r9 -andq %rax, %r9 -shrl $26, %ecx -addl %r10d, %ecx -salq $16, %rcx -shrl $10, %esi -movl %esi, %esi -orq %rsi, %rcx -movabsq $4398046511103, %r10 -movq %rcx, %r8 -andq %r10, %r8 -shrq $42, %rcx -leaq (%rcx,%rcx,4), %rdx -addq %r11, %rdx -movq %rdx, %rsi -andq %rax, %rsi -shrq $44, %rdx -addq %r9, %rdx -movq %rdx, %rcx -andq %rax, %rcx -shrq $44, %rdx -addq %r8, %rdx -andq %rdx, %r10 -shrq $42, %rdx -leaq (%rsi,%rdx,4), %rsi -leaq (%rsi,%rdx), %r11 -movq %r11, %rbx -andq %rax, %rbx -shrq $44, %r11 -addq %rcx, %r11 -leaq 5(%rbx), %r9 -movq %r9, %r8 -shrq $44, %r8 -addq %r11, %r8 -movabsq $-4398046511104, %rsi -addq %r10, %rsi -movq %r8, %rdx -shrq $44, %rdx -addq %rdx, %rsi -movq %rsi, %rdx -shrq $63, %rdx -subq $1, %rdx -movq %rdx, %rcx -notq %rcx -andq %rcx, %rbx -andq %rcx, %r11 -andq %r10, %rcx -andq %rax, %r9 -andq %rdx, %r9 -orq %r9, %rbx -movq %rbx, (%rdi) -andq %r8, %rax -andq %rdx, %rax -orq %rax, %r11 -movq %r11, 8(%rdi) -andq %rsi, %rdx -orq %rcx, %rdx -movq %rdx, 16(%rdi) -.L11: -movq -8(%rbp), %rbx -leave -ret -FN_END poly1305_blocks_avx - -GLOBAL_HIDDEN_FN poly1305_finish_ext_avx -poly1305_finish_ext_avx_local: -pushq %r12 -pushq %rbp -pushq %rbx -subq $32, %rsp -movq %rdi, %rbx -movq %rdx, %rbp -movq %rcx, %r12 -testq %rdx, %rdx -je .L30 -movq $0, (%rsp) -movq $0, 8(%rsp) -movq $0, 16(%rsp) -movq $0, 24(%rsp) -movq %rsp, %rax -subq %rsp, %rsi -testb $16, %dl -je .L31 -vmovdqu (%rsp,%rsi), %xmm0 -vmovdqa %xmm0, (%rsp) -addq $16, %rax -.L31: -testb $8, %bpl -je .L32 -movq (%rax,%rsi), %rdx -movq %rdx, (%rax) -addq $8, %rax -.L32: -testb $4, %bpl -je .L33 -movl (%rax,%rsi), %edx -movl %edx, (%rax) -addq $4, %rax -.L33: -testb $2, %bpl -je .L34 -movzwl (%rax,%rsi), %edx -movw %dx, (%rax) -addq $2, %rax -.L34: -testb $1, %bpl -je .L35 -movzbl (%rax,%rsi), %edx -movb %dl, (%rax) -.L35: -cmpq $16, %rbp -je .L36 -movb $1, (%rsp,%rbp) -movq 120(%rbx), %rdx -cmpq $16, %rbp -sbbq %rax, %rax -andl $4, %eax -addq $4, %rax -.L37: -orq %rdx, %rax -movq %rax, 120(%rbx) -movq %rsp, %rsi -movl $32, %edx -movq %rbx, %rdi -call poly1305_blocks_avx_local -.L30: -movq 120(%rbx), %rax -testb $1, %al -je .L38 -subq $1, %rbp -cmpq $15, %rbp -jbe .L39 -orq $16, %rax -movq %rax, 120(%rbx) -jmp .L40 -.L39: -orq $32, %rax -movq %rax, 120(%rbx) -.L40: -movl $32, %edx -movl $0, %esi -movq %rbx, %rdi -call poly1305_blocks_avx_local -.L38: -movq 8(%rbx), %rax -movq %rax, %rdx -salq $44, %rdx -orq (%rbx), %rdx -shrq $20, %rax -movq 16(%rbx), %rcx -salq $24, %rcx -orq %rcx, %rax -movq 104(%rbx), %rcx -movq 112(%rbx), %rsi -addq %rcx, %rdx -adcq %rsi, %rax -vpxor %xmm0, %xmm0, %xmm0 -vmovdqu %xmm0, (%rbx) -vmovdqu %xmm0, 16(%rbx) -vmovdqu %xmm0, 32(%rbx) -vmovdqu %xmm0, 48(%rbx) -vmovdqu %xmm0, 64(%rbx) -vmovdqu %xmm0, 80(%rbx) -vmovdqu %xmm0, 96(%rbx) -vmovdqu %xmm0, 112(%rbx) -movq %rdx, (%r12) -movq %rax, 8(%r12) -jmp .L43 -.L36: -movq 120(%rbx), %rdx -movl $4, %eax -jmp .L37 -.L43: -addq $32, %rsp -popq %rbx -popq %rbp -popq %r12 -ret -FN_END poly1305_finish_ext_avx - -GLOBAL_HIDDEN_FN poly1305_auth_avx -/* -cmp $128, %rdx -jb poly1305_auth_x86_local -*/ -pushq %rbp -movq %rsp, %rbp -pushq %r14 -pushq %r13 -pushq %r12 -pushq %rbx -andq $-64, %rsp -addq $-128, %rsp -movq %rdi, %r14 -movq %rsi, %r12 -movq %rdx, %rbx -movq %rsp, %rdi -movq %rcx, %rsi -call poly1305_init_ext_avx_local -movq %rbx, %r13 -andq $-32, %r13 -je .L46 -movq %rsp, %rdi -movq %r13, %rdx -movq %r12, %rsi -call poly1305_blocks_avx_local -addq %r13, %r12 -subq %r13, %rbx -.L46: -movq %rsp, %rdi -movq %r14, %rcx -movq %rbx, %rdx -movq %r12, %rsi -call poly1305_finish_ext_avx_local -leaq -32(%rbp), %rsp -popq %rbx -popq %r12 -popq %r13 -popq %r14 -popq %rbp -ret -FN_END poly1305_auth_avx diff --git a/src/libcryptobox/poly1305/avx2.S b/src/libcryptobox/poly1305/avx2.S deleted file mode 100644 index 5aa5851d6..000000000 --- a/src/libcryptobox/poly1305/avx2.S +++ /dev/null @@ -1,1095 +0,0 @@ -#include "../macro.S" -#include "constants.S" -SECTION_TEXT - -GLOBAL_HIDDEN_FN_EXT poly1305_block_size_avx2, 0, 0 -movl $64, %eax -ret -FN_END poly1305_block_size_avx2 - -GLOBAL_HIDDEN_FN poly1305_auth_avx2 -/* -cmp $128, %rdx -jb poly1305_auth_x86_local -*/ -pushq %rbp -movq %rsp, %rbp -andq $-64, %rsp -pushq %r12 -pushq %r14 -pushq %r15 -pushq %rbx -subq $224, %rsp -movq %rsi, %r14 -movq %rdi, %rbx -lea (%rsp), %rdi -movq %rcx, %rsi -movq %rdx, %r12 -call poly1305_init_ext_avx2_local -poly1305_auth_avx2_2: -movq %r12, %r15 -andq $-64, %r15 -je poly1305_auth_avx2_5 -poly1305_auth_avx2_3: -movq %r14, %rsi -lea (%rsp), %rdi -movq %r15, %rdx -call poly1305_blocks_avx2_local -poly1305_auth_avx2_4: -addq %r15, %r14 -subq %r15, %r12 -poly1305_auth_avx2_5: -movq %r14, %rsi -lea (%rsp), %rdi -movq %r12, %rdx -movq %rbx, %rcx -call poly1305_finish_ext_avx2_local -poly1305_auth_avx2_6: -addq $224, %rsp -popq %rbx -popq %r15 -popq %r14 -popq %r12 -movq %rbp, %rsp -popq %rbp -ret -FN_END poly1305_auth_avx2 - - -GLOBAL_HIDDEN_FN poly1305_finish_ext_avx2 -poly1305_finish_ext_avx2_local: -pushq %rbp -movq %rsp, %rbp -andq $-64, %rsp -pushq %r12 -pushq %r13 -pushq %r14 -subq $104, %rsp -movq %rdx, %r13 -movq %rcx, %r14 -movq %rdi, %r12 -testq %r13, %r13 -je poly1305_finish_ext_avx2_29 -poly1305_finish_ext_avx2_2: -lea (%rsp), %rax -vpxor %ymm0, %ymm0, %ymm0 -subq %rax, %rsi -vmovdqu %ymm0, (%rsp) -vmovdqu %ymm0, 32(%rsp) -testq $32, %r13 -je poly1305_finish_ext_avx2_4 -poly1305_finish_ext_avx2_3: -vmovdqu (%rsp,%rsi), %ymm0 -lea 32(%rsp), %rax -vmovdqu %ymm0, (%rsp) -poly1305_finish_ext_avx2_4: -testq $16, %r13 -je poly1305_finish_ext_avx2_6 -poly1305_finish_ext_avx2_5: -vmovdqu (%rax,%rsi), %xmm0 -vmovdqu %xmm0, (%rax) -addq $16, %rax -poly1305_finish_ext_avx2_6: -testq $8, %r13 -je poly1305_finish_ext_avx2_8 -poly1305_finish_ext_avx2_7: -movq (%rax,%rsi), %rdx -movq %rdx, (%rax) -addq $8, %rax -poly1305_finish_ext_avx2_8: -testq $4, %r13 -je poly1305_finish_ext_avx2_10 -poly1305_finish_ext_avx2_9: -movl (%rax,%rsi), %edx -movl %edx, (%rax) -addq $4, %rax -poly1305_finish_ext_avx2_10: -testq $2, %r13 -je poly1305_finish_ext_avx2_12 -poly1305_finish_ext_avx2_11: -movzwl (%rax,%rsi), %edx -movw %dx, (%rax) -addq $2, %rax -poly1305_finish_ext_avx2_12: -testq $1, %r13 -je poly1305_finish_ext_avx2_14 -poly1305_finish_ext_avx2_13: -movb (%rax,%rsi), %dl -movb %dl, (%rax) -poly1305_finish_ext_avx2_14: -testq $15, %r13 -je poly1305_finish_ext_avx2_16 -poly1305_finish_ext_avx2_15: -movb $1, (%rsp,%r13) -poly1305_finish_ext_avx2_16: -movq 176(%r12), %rdx -andq $-8125, %rdx -cmpq $48, %r13 -jb poly1305_finish_ext_avx2_18 -poly1305_finish_ext_avx2_17: -orq $4, %rdx -jmp poly1305_finish_ext_avx2_21 -poly1305_finish_ext_avx2_18: -cmpq $32, %r13 -jb poly1305_finish_ext_avx2_20 -poly1305_finish_ext_avx2_19: -orq $8, %rdx -jmp poly1305_finish_ext_avx2_21 -poly1305_finish_ext_avx2_20: -movq %rdx, %rax -orq $32, %rdx -orq $16, %rax -cmpq $16, %r13 -cmovae %rax, %rdx -poly1305_finish_ext_avx2_21: -testq $1, %rdx -je poly1305_finish_ext_avx2_27 -poly1305_finish_ext_avx2_22: -cmpq $16, %r13 -ja poly1305_finish_ext_avx2_24 -poly1305_finish_ext_avx2_23: -orq $256, %rdx -movq %rdx, 176(%r12) -jmp poly1305_finish_ext_avx2_28 -poly1305_finish_ext_avx2_24: -cmpq $32, %r13 -ja poly1305_finish_ext_avx2_27 -poly1305_finish_ext_avx2_25: -orq $128, %rdx -movq %rdx, 176(%r12) -jmp poly1305_finish_ext_avx2_28 -poly1305_finish_ext_avx2_27: -movq %rdx, 176(%r12) -poly1305_finish_ext_avx2_28: -movq %r12, %rdi -lea (%rsp), %rsi -movl $64, %edx -vzeroupper -call poly1305_blocks_avx2_local -poly1305_finish_ext_avx2_29: -movq 176(%r12), %rdx -testq $1, %rdx -je poly1305_finish_ext_avx2_37 -poly1305_finish_ext_avx2_30: -andq $-8125, %rdx -testq %r13, %r13 -je poly1305_finish_ext_avx2_32 -poly1305_finish_ext_avx2_31: -cmpq $48, %r13 -jbe poly1305_finish_ext_avx2_33 -poly1305_finish_ext_avx2_32: -orq $512, %rdx -jmp poly1305_finish_ext_avx2_36 -poly1305_finish_ext_avx2_33: -cmpq $32, %r13 -jbe poly1305_finish_ext_avx2_35 -poly1305_finish_ext_avx2_34: -orq $1024, %rdx -jmp poly1305_finish_ext_avx2_36 -poly1305_finish_ext_avx2_35: -movq %rdx, %rax -orq $4096, %rdx -orq $2048, %rax -cmpq $16, %r13 -cmova %rax, %rdx -poly1305_finish_ext_avx2_36: -orq $96, %rdx -movq %r12, %rdi -vpxor %ymm0, %ymm0, %ymm0 -lea (%rsp), %rsi -movq %rdx, 176(%r12) -movl $64, %edx -vmovdqu %ymm0, (%rsp) -vmovdqu %ymm0, 32(%rsp) -vzeroupper -call poly1305_blocks_avx2_local -poly1305_finish_ext_avx2_37: -movq 8(%r12), %r8 -movq %r8, %rsi -movq 16(%r12), %rax -vpxor %ymm0, %ymm0, %ymm0 -shlq $44, %rsi -shrq $20, %r8 -shlq $24, %rax -orq (%r12), %rsi -orq %rax, %r8 -movq 160(%r12), %rdx -movq 168(%r12), %rcx -addq %rdx, %rsi -adcq %rcx, %r8 -vmovdqu %ymm0, (%r12) -vmovdqu %ymm0, 32(%r12) -vmovdqu %ymm0, 64(%r12) -vmovdqu %ymm0, 96(%r12) -vmovdqu %ymm0, 128(%r12) -vmovdqu %ymm0, 160(%r12) -movq %rsi, (%r14) -movq %r8, 8(%r14) -vzeroupper -addq $104, %rsp -popq %r14 -popq %r13 -popq %r12 -movq %rbp, %rsp -popq %rbp -ret -FN_END poly1305_finish_ext_avx2 - -GLOBAL_HIDDEN_FN poly1305_blocks_avx2 -poly1305_blocks_avx2_local: -pushq %rbp -movq %rsp, %rbp -andq $-64, %rsp -subq $384, %rsp -movl $16777216, %eax -movl $67108863, %ecx -movl $5, %r8d -vmovd %eax, %xmm1 -vmovd %ecx, %xmm10 -vmovd %r8d, %xmm0 -movq 176(%rdi), %rax -vpbroadcastq %xmm1, %ymm1 -vpbroadcastq %xmm10, %ymm10 -vpbroadcastq %xmm0, %ymm11 -testq $60, %rax -je poly1305_blocks_avx2_11 -poly1305_blocks_avx2_2: -vpsrldq $8, %ymm1, %ymm15 -testq $4, %rax -je poly1305_blocks_avx2_4 -poly1305_blocks_avx2_3: -vpermq $192, %ymm15, %ymm15 -poly1305_blocks_avx2_4: -testq $8, %rax -je poly1305_blocks_avx2_6 -poly1305_blocks_avx2_5: -vpermq $240, %ymm15, %ymm15 -poly1305_blocks_avx2_6: -testq $16, %rax -je poly1305_blocks_avx2_8 -poly1305_blocks_avx2_7: -vpermq $252, %ymm15, %ymm15 -poly1305_blocks_avx2_8: -testq $32, %rax -je poly1305_blocks_avx2_10 -poly1305_blocks_avx2_9: -vpxor %ymm15, %ymm15, %ymm15 -poly1305_blocks_avx2_10: -vmovdqa %ymm15, %ymm1 -poly1305_blocks_avx2_11: -movq %rax, %rcx -btsq $0, %rcx -jc poly1305_blocks_avx2_13 -poly1305_blocks_avx2_12: -vmovdqu (%rsi), %ymm3 -movq %rcx, %rax -vmovdqu 32(%rsi), %ymm5 -vpunpcklqdq %ymm5, %ymm3, %ymm4 -addq $64, %rsi -vpunpckhqdq %ymm5, %ymm3, %ymm7 -vpermq $216, %ymm4, %ymm6 -addq $-64, %rdx -vpermq $216, %ymm7, %ymm0 -vpsrlq $52, %ymm6, %ymm8 -vpsllq $12, %ymm0, %ymm9 -vpsrlq $26, %ymm6, %ymm2 -vpsrlq $40, %ymm0, %ymm0 -vpand %ymm6, %ymm10, %ymm4 -vpor %ymm9, %ymm8, %ymm7 -vpand %ymm2, %ymm10, %ymm3 -vpor %ymm1, %ymm0, %ymm9 -vpsrlq $26, %ymm7, %ymm2 -vpand %ymm7, %ymm10, %ymm5 -vpand %ymm2, %ymm10, %ymm7 -movq %rax, 176(%rdi) -jmp poly1305_blocks_avx2_14 -poly1305_blocks_avx2_13: -vpermq $216, (%rdi), %ymm15 -vpxor %ymm0, %ymm0, %ymm0 -vpermq $216, 32(%rdi), %ymm14 -vpermq $216, 64(%rdi), %ymm13 -vpunpckldq %ymm0, %ymm15, %ymm4 -vpunpckhdq %ymm0, %ymm15, %ymm3 -vpunpckldq %ymm0, %ymm14, %ymm5 -vpunpckhdq %ymm0, %ymm14, %ymm7 -vpunpckldq %ymm0, %ymm13, %ymm9 -poly1305_blocks_avx2_14: -cmpq $64, %rdx -jb poly1305_blocks_avx2_34 -poly1305_blocks_avx2_15: -vmovdqu 140(%rdi), %ymm0 -testq $8064, %rax -je poly1305_blocks_avx2_29 -poly1305_blocks_avx2_16: -vpermq $216, 80(%rdi), %ymm6 -vpermq $216, 100(%rdi), %ymm2 -vpermq $216, 120(%rdi), %ymm8 -vpermq $216, %ymm0, %ymm0 -testq $128, %rax -je poly1305_blocks_avx2_18 -poly1305_blocks_avx2_17: -vmovdqa %ymm0, %ymm15 -vmovdqa %ymm0, %ymm14 -vmovdqa %ymm0, %ymm13 -vmovdqa %ymm8, %ymm12 -jmp poly1305_blocks_avx2_28 -poly1305_blocks_avx2_18: -testq $256, %rax -je poly1305_blocks_avx2_20 -poly1305_blocks_avx2_19: -vmovdqa %ymm0, %ymm15 -vmovdqa %ymm0, %ymm14 -vmovdqa %ymm8, %ymm13 -vmovdqa %ymm2, %ymm12 -jmp poly1305_blocks_avx2_28 -poly1305_blocks_avx2_20: -testq $512, %rax -je poly1305_blocks_avx2_22 -poly1305_blocks_avx2_21: -vmovdqa %ymm0, %ymm15 -vmovdqa %ymm8, %ymm14 -vmovdqa %ymm2, %ymm13 -vmovdqa %ymm6, %ymm12 -jmp poly1305_blocks_avx2_28 -poly1305_blocks_avx2_22: -testq $1024, %rax -je poly1305_blocks_avx2_24 -poly1305_blocks_avx2_23: -vpxor %ymm12, %ymm12, %ymm12 -movl $1, %r8d -vmovdqa %ymm8, %ymm15 -vmovdqa %ymm2, %ymm14 -vmovdqa %ymm6, %ymm13 -vmovd %r8d, %xmm12 -jmp poly1305_blocks_avx2_28 -poly1305_blocks_avx2_24: -testq $2048, %rax -je poly1305_blocks_avx2_26 -poly1305_blocks_avx2_25: -vpxor %ymm12, %ymm12, %ymm12 -movl $1, %r8d -vmovd %r8d, %xmm13 -vmovdqa %ymm2, %ymm15 -vmovdqa %ymm6, %ymm14 -vmovdqa %ymm13, %ymm12 -jmp poly1305_blocks_avx2_28 -poly1305_blocks_avx2_26: -testq $4096, %rax -je poly1305_blocks_avx2_28 -poly1305_blocks_avx2_27: -movl $1, %r8d -vmovd %r8d, %xmm14 -vmovdqa %ymm6, %ymm15 -vmovdqa %ymm14, %ymm13 -vmovdqa %ymm14, %ymm12 -poly1305_blocks_avx2_28: -vpunpcklqdq %ymm14, %ymm15, %ymm6 -vpunpcklqdq %ymm12, %ymm13, %ymm8 -vpunpckhqdq %ymm14, %ymm15, %ymm14 -vpunpckhqdq %ymm12, %ymm13, %ymm12 -vperm2i128 $32, %ymm8, %ymm6, %ymm2 -vperm2i128 $49, %ymm8, %ymm6, %ymm6 -vpsrlq $32, %ymm6, %ymm0 -vpsrlq $32, %ymm2, %ymm8 -vmovdqu %ymm0, 352(%rsp) -vperm2i128 $32, %ymm12, %ymm14, %ymm13 -vmovdqu %ymm13, 320(%rsp) -jmp poly1305_blocks_avx2_30 -poly1305_blocks_avx2_29: -vpsrlq $32, %ymm0, %ymm12 -vpermq $0, %ymm0, %ymm2 -vpermq $85, %ymm0, %ymm6 -vpermq $85, %ymm12, %ymm13 -vpermq $170, %ymm0, %ymm0 -vpermq $0, %ymm12, %ymm8 -vmovdqu %ymm13, 352(%rsp) -vmovdqu %ymm0, 320(%rsp) -poly1305_blocks_avx2_30: -vmovdqu (%rsi), %ymm12 -movq %rdx, %r9 -vmovdqu 352(%rsp), %ymm15 -vmovdqu %ymm1, 160(%rsp) -vmovdqu %ymm10, 192(%rsp) -vmovdqu %ymm11, 128(%rsp) -vperm2i128 $32, 32(%rsi), %ymm12, %ymm13 -xorl %r8d, %r8d -vperm2i128 $49, 32(%rsi), %ymm12, %ymm12 -xorl %ecx, %ecx -vpmuludq %ymm11, %ymm8, %ymm0 -vpmuludq %ymm11, %ymm6, %ymm1 -vmovdqu %ymm0, 224(%rsp) -vmovdqu %ymm1, 256(%rsp) -vpunpckldq %ymm12, %ymm13, %ymm14 -vpunpckhdq %ymm12, %ymm13, %ymm12 -vmovdqu %ymm14, 32(%rsp) -vpmuludq %ymm0, %ymm9, %ymm0 -vpmuludq %ymm1, %ymm7, %ymm13 -vpaddq %ymm13, %ymm0, %ymm0 -vpmuludq %ymm11, %ymm15, %ymm10 -vpmuludq %ymm10, %ymm5, %ymm13 -vpaddq %ymm13, %ymm0, %ymm0 -vmovdqu %ymm10, 288(%rsp) -vpmuludq 320(%rsp), %ymm11, %ymm11 -vpmuludq %ymm11, %ymm3, %ymm13 -vpaddq %ymm13, %ymm0, %ymm0 -vmovdqu %ymm11, (%rsp) -vpmuludq %ymm2, %ymm4, %ymm13 -vpaddq %ymm13, %ymm0, %ymm0 -vpxor %ymm13, %ymm13, %ymm13 -vpunpckldq %ymm13, %ymm14, %ymm14 -vpaddq %ymm14, %ymm0, %ymm0 -vmovdqu %ymm0, 64(%rsp) -vpmuludq %ymm11, %ymm9, %ymm14 -vpmuludq %ymm2, %ymm7, %ymm0 -vpaddq %ymm0, %ymm14, %ymm14 -vpmuludq %ymm8, %ymm5, %ymm0 -vpaddq %ymm0, %ymm14, %ymm14 -vpmuludq %ymm6, %ymm3, %ymm0 -vpaddq %ymm0, %ymm14, %ymm14 -vpmuludq %ymm15, %ymm4, %ymm0 -vpaddq %ymm0, %ymm14, %ymm0 -vpunpckhdq %ymm13, %ymm12, %ymm14 -vpsllq $18, %ymm14, %ymm14 -vpaddq %ymm14, %ymm0, %ymm14 -vpmuludq %ymm1, %ymm9, %ymm1 -vpmuludq %ymm10, %ymm7, %ymm0 -vpaddq %ymm0, %ymm1, %ymm1 -vpmuludq %ymm11, %ymm5, %ymm0 -vpaddq %ymm0, %ymm1, %ymm1 -vpmuludq %ymm2, %ymm3, %ymm0 -vpaddq %ymm0, %ymm1, %ymm1 -vpmuludq %ymm8, %ymm4, %ymm0 -vpaddq %ymm0, %ymm1, %ymm1 -vmovdqu 32(%rsp), %ymm0 -vpunpckhdq %ymm13, %ymm0, %ymm0 -vpsllq $6, %ymm0, %ymm0 -vpaddq %ymm0, %ymm1, %ymm1 -vmovdqu 64(%rsp), %ymm0 -vpsrlq $26, %ymm0, %ymm0 -vpaddq %ymm0, %ymm1, %ymm1 -vmovdqu %ymm1, 96(%rsp) -vpmuludq %ymm2, %ymm9, %ymm1 -vpmuludq %ymm8, %ymm7, %ymm0 -vpaddq %ymm0, %ymm1, %ymm1 -vpmuludq %ymm10, %ymm9, %ymm10 -vpmuludq %ymm11, %ymm7, %ymm11 -vpaddq %ymm11, %ymm10, %ymm7 -vpmuludq %ymm6, %ymm5, %ymm0 -vpaddq %ymm0, %ymm1, %ymm1 -vpmuludq %ymm2, %ymm5, %ymm5 -vpaddq %ymm5, %ymm7, %ymm10 -vpmuludq %ymm15, %ymm3, %ymm15 -vpaddq %ymm15, %ymm1, %ymm1 -vpmuludq %ymm8, %ymm3, %ymm11 -vpaddq %ymm11, %ymm10, %ymm5 -vpunpckldq %ymm13, %ymm12, %ymm10 -vmovdqu 96(%rsp), %ymm12 -vpmuludq 320(%rsp), %ymm4, %ymm0 -vpaddq %ymm0, %ymm1, %ymm15 -vpsrlq $26, %ymm12, %ymm3 -vmovdqu 160(%rsp), %ymm1 -vpmuludq %ymm6, %ymm4, %ymm4 -vpaddq %ymm1, %ymm15, %ymm0 -vpsrlq $26, %ymm14, %ymm15 -vpaddq %ymm4, %ymm5, %ymm11 -vpsllq $12, %ymm10, %ymm4 -vmovdqu 192(%rsp), %ymm10 -vpaddq %ymm15, %ymm0, %ymm0 -vpaddq %ymm4, %ymm11, %ymm5 -vmovdqu 128(%rsp), %ymm11 -vpsrlq $26, %ymm0, %ymm9 -vpaddq %ymm3, %ymm5, %ymm7 -vpand 64(%rsp), %ymm10, %ymm13 -vpand %ymm10, %ymm12, %ymm12 -vpand %ymm10, %ymm7, %ymm5 -vpsrlq $26, %ymm7, %ymm7 -vpmuludq %ymm11, %ymm9, %ymm15 -vpand %ymm10, %ymm14, %ymm9 -vpaddq %ymm15, %ymm13, %ymm3 -vpand %ymm10, %ymm0, %ymm14 -vpaddq %ymm7, %ymm9, %ymm9 -vpand %ymm10, %ymm3, %ymm4 -vpsrlq $26, %ymm3, %ymm3 -vpsrlq $26, %ymm9, %ymm0 -vpand %ymm10, %ymm9, %ymm7 -vpaddq %ymm3, %ymm12, %ymm3 -vpaddq %ymm0, %ymm14, %ymm9 -sarq $5, %r9 -shrq $58, %r9 -addq %rdx, %r9 -sarq $6, %r9 -cmpq $2, %r9 -jl poly1305_blocks_avx2_34 -poly1305_blocks_avx2_31: -vmovdqu %ymm6, 32(%rsp) -lea -64(%rdx), %r9 -vmovdqu %ymm8, 64(%rsp) -vmovdqu %ymm11, 128(%rsp) -vmovdqu %ymm10, 192(%rsp) -vmovdqu %ymm1, 160(%rsp) -vmovdqu (%rsp), %ymm12 -sarq $5, %r9 -shrq $58, %r9 -lea -64(%rdx,%r9), %rdx -sarq $6, %rdx -poly1305_blocks_avx2_32: -vmovdqu 256(%rsp), %ymm15 -incq %r8 -vmovdqu 64(%rcx,%rsi), %ymm11 -vpmuludq 224(%rsp), %ymm9, %ymm8 -vpmuludq %ymm15, %ymm7, %ymm14 -vpaddq %ymm14, %ymm8, %ymm1 -vmovdqu 288(%rsp), %ymm8 -vperm2i128 $32, 96(%rcx,%rsi), %ymm11, %ymm10 -vperm2i128 $49, 96(%rcx,%rsi), %ymm11, %ymm6 -addq $64, %rcx -vpmuludq %ymm8, %ymm5, %ymm13 -vpunpckldq %ymm6, %ymm10, %ymm0 -vpunpckhdq %ymm6, %ymm10, %ymm11 -vpaddq %ymm13, %ymm1, %ymm10 -vpmuludq %ymm12, %ymm3, %ymm6 -vpaddq %ymm6, %ymm10, %ymm14 -vpxor %ymm10, %ymm10, %ymm10 -vpunpckldq %ymm10, %ymm0, %ymm6 -vpunpckhdq %ymm10, %ymm0, %ymm0 -vpmuludq %ymm2, %ymm4, %ymm1 -vpaddq %ymm1, %ymm14, %ymm13 -vpaddq %ymm6, %ymm13, %ymm1 -vmovdqu 64(%rsp), %ymm6 -vmovdqu %ymm1, (%rsp) -vpsrlq $26, %ymm1, %ymm1 -vpmuludq %ymm12, %ymm9, %ymm14 -vpmuludq %ymm2, %ymm7, %ymm13 -vpaddq %ymm13, %ymm14, %ymm14 -vpmuludq %ymm6, %ymm5, %ymm13 -vpaddq %ymm13, %ymm14, %ymm14 -vpmuludq 32(%rsp), %ymm3, %ymm13 -vpaddq %ymm13, %ymm14, %ymm14 -vpmuludq 352(%rsp), %ymm4, %ymm13 -vpaddq %ymm13, %ymm14, %ymm13 -vpunpckhdq %ymm10, %ymm11, %ymm14 -vpsllq $18, %ymm14, %ymm14 -vpaddq %ymm14, %ymm13, %ymm13 -vpmuludq %ymm15, %ymm9, %ymm15 -vpmuludq %ymm8, %ymm7, %ymm14 -vpaddq %ymm14, %ymm15, %ymm15 -vpmuludq %ymm12, %ymm5, %ymm14 -vpaddq %ymm14, %ymm15, %ymm15 -vpmuludq %ymm2, %ymm3, %ymm14 -vpaddq %ymm14, %ymm15, %ymm15 -vpmuludq %ymm6, %ymm4, %ymm14 -vpaddq %ymm14, %ymm15, %ymm14 -vpsllq $6, %ymm0, %ymm15 -vpaddq %ymm15, %ymm14, %ymm14 -vmovdqu 32(%rsp), %ymm15 -vpaddq %ymm1, %ymm14, %ymm1 -vpmuludq %ymm2, %ymm9, %ymm0 -vpmuludq %ymm6, %ymm7, %ymm14 -vpmuludq %ymm8, %ymm9, %ymm9 -vpmuludq %ymm12, %ymm7, %ymm7 -vpaddq %ymm7, %ymm9, %ymm7 -vpaddq %ymm14, %ymm0, %ymm0 -vpsrlq $26, %ymm1, %ymm9 -vpmuludq %ymm15, %ymm5, %ymm14 -vpmuludq %ymm2, %ymm5, %ymm5 -vpaddq %ymm5, %ymm7, %ymm5 -vpaddq %ymm14, %ymm0, %ymm0 -vpmuludq 352(%rsp), %ymm3, %ymm14 -vpmuludq %ymm6, %ymm3, %ymm3 -vpaddq %ymm3, %ymm5, %ymm5 -vpaddq %ymm14, %ymm0, %ymm0 -vpmuludq 320(%rsp), %ymm4, %ymm14 -vpmuludq %ymm15, %ymm4, %ymm4 -vpaddq %ymm4, %ymm5, %ymm5 -vpaddq %ymm14, %ymm0, %ymm0 -vpunpckldq %ymm10, %ymm11, %ymm4 -vpaddq 160(%rsp), %ymm0, %ymm14 -vpsrlq $26, %ymm13, %ymm0 -vpsllq $12, %ymm4, %ymm3 -vpaddq %ymm0, %ymm14, %ymm14 -vpaddq %ymm3, %ymm5, %ymm7 -vpsrlq $26, %ymm14, %ymm0 -vpaddq %ymm9, %ymm7, %ymm10 -vmovdqu 192(%rsp), %ymm9 -vpsrlq $26, %ymm10, %ymm11 -vpand (%rsp), %ymm9, %ymm6 -vpand %ymm9, %ymm13, %ymm13 -vpand %ymm9, %ymm1, %ymm1 -vpand %ymm9, %ymm14, %ymm14 -vpand %ymm9, %ymm10, %ymm5 -vpmuludq 128(%rsp), %ymm0, %ymm8 -vpaddq %ymm8, %ymm6, %ymm15 -vpaddq %ymm11, %ymm13, %ymm0 -vpsrlq $26, %ymm15, %ymm3 -vpand %ymm9, %ymm0, %ymm7 -vpsrlq $26, %ymm0, %ymm0 -vpand %ymm9, %ymm15, %ymm4 -vpaddq %ymm3, %ymm1, %ymm3 -vpaddq %ymm0, %ymm14, %ymm9 -cmpq %rdx, %r8 -jb poly1305_blocks_avx2_32 -poly1305_blocks_avx2_34: -testq $64, %rax -jne poly1305_blocks_avx2_36 -poly1305_blocks_avx2_35: -vpshufd $8, %ymm4, %ymm0 -vpshufd $8, %ymm3, %ymm3 -vpshufd $8, %ymm5, %ymm5 -vpshufd $8, %ymm7, %ymm7 -vpshufd $8, %ymm9, %ymm9 -vpermq $8, %ymm0, %ymm1 -vpermq $8, %ymm3, %ymm2 -vpermq $8, %ymm5, %ymm4 -vpermq $8, %ymm7, %ymm6 -vpermq $8, %ymm9, %ymm11 -vperm2i128 $32, %ymm2, %ymm1, %ymm8 -vperm2i128 $32, %ymm6, %ymm4, %ymm10 -vmovdqu %ymm8, (%rdi) -vmovdqu %ymm10, 32(%rdi) -vmovdqu %xmm11, 64(%rdi) -jmp poly1305_blocks_avx2_37 -poly1305_blocks_avx2_36: -vpermq $245, %ymm4, %ymm0 -vpaddq %ymm0, %ymm4, %ymm4 -vpermq $245, %ymm3, %ymm1 -vpaddq %ymm1, %ymm3, %ymm10 -vpermq $245, %ymm5, %ymm3 -vpermq $170, %ymm4, %ymm6 -vpaddq %ymm3, %ymm5, %ymm13 -vpaddq %ymm6, %ymm4, %ymm8 -vpermq $170, %ymm10, %ymm11 -vpermq $245, %ymm7, %ymm5 -vpaddq %ymm11, %ymm10, %ymm12 -vpaddq %ymm5, %ymm7, %ymm7 -vpermq $170, %ymm13, %ymm14 -vpermq $245, %ymm9, %ymm2 -vpaddq %ymm14, %ymm13, %ymm15 -vpaddq %ymm2, %ymm9, %ymm9 -vpermq $170, %ymm7, %ymm0 -vpaddq %ymm0, %ymm7, %ymm1 -vpermq $170, %ymm9, %ymm2 -vpaddq %ymm2, %ymm9, %ymm3 -vmovd %xmm8, %r9d -movl %r9d, %r8d -shrl $26, %r8d -andq $67108863, %r9 -vmovd %xmm12, %esi -addl %r8d, %esi -movl %esi, %r11d -shrl $26, %esi -andq $67108863, %r11 -vmovd %xmm15, %ecx -addl %esi, %ecx -movl %ecx, %eax -shrl $26, %eax -andq $67108863, %rcx -shlq $8, %rcx -vmovd %xmm1, %r8d -addl %eax, %r8d -movl %r8d, %r10d -shrl $26, %r8d -andq $67108863, %r10 -movq %r10, %rax -shrq $10, %rax -shlq $34, %r10 -vmovd %xmm3, %edx -addl %r8d, %edx -shlq $16, %rdx -orq %rdx, %rax -movq %rax, %r8 -shrq $42, %r8 -lea (%r8,%r8,4), %rdx -movq %r11, %r8 -shlq $26, %r8 -orq %r8, %r9 -movq $0xfffffffffff, %r8 -shrq $18, %r11 -andq %r8, %r9 -addq %r9, %rdx -orq %rcx, %r11 -movq %rdx, %rsi -orq %r10, %r11 -shrq $44, %rsi -andq %r8, %r11 -addq %r11, %rsi -movq $0x3ffffffffff, %r9 -movq %rsi, %r10 -andq %r9, %rax -shrq $44, %r10 -andq %r8, %rdx -addq %r10, %rax -movq %r8, %rcx -andq %rax, %r9 -andq %r8, %rsi -shrq $42, %rax -movq $0xfffffc0000000000, %r10 -lea (%rax,%rax,4), %r11 -addq %r11, %rdx -andq %rdx, %rcx -shrq $44, %rdx -addq %rdx, %rsi -lea 5(%rcx), %rdx -movq %rdx, %r11 -andq %r8, %rdx -shrq $44, %r11 -addq %rsi, %r11 -movq %r11, %rax -andq %r11, %r8 -shrq $44, %rax -addq %r9, %rax -addq %r10, %rax -movq %rax, %r10 -shrq $63, %r10 -decq %r10 -andn %rcx, %r10, %rcx -andq %r10, %rdx -orq %rdx, %rcx -andq %r10, %r8 -andn %rsi, %r10, %rdx -andq %r10, %rax -andn %r9, %r10, %rsi -orq %r8, %rdx -orq %rax, %rsi -movq %rcx, (%rdi) -movq %rdx, 8(%rdi) -movq %rsi, 16(%rdi) -poly1305_blocks_avx2_37: -vzeroupper -movq %rbp, %rsp -popq %rbp -ret -FN_END poly1305_blocks_avx2 - -GLOBAL_HIDDEN_FN poly1305_init_ext_avx2 -poly1305_init_ext_avx2_local: -pushq %r12 -pushq %r13 -pushq %r14 -pushq %r15 -pushq %rbx -movq %rdi, %r10 -vpxor %ymm0, %ymm0, %ymm0 -movq %rdx, %r12 -vpxor %xmm1, %xmm1, %xmm1 -vmovdqu %xmm1, 64(%r10) -vmovdqu %ymm0, (%r10) -vmovdqu %ymm0, 32(%r10) -movq $-1, %r8 -testq %r12, %r12 -movq 8(%rsi), %rdi -movq $0xffc0fffffff, %r9 -movq %rdi, %rcx -cmove %r8, %r12 -movq (%rsi), %r8 -andq %r8, %r9 -shrq $44, %r8 -movq $0xfffffc0ffff, %r11 -shlq $20, %rcx -shrq $24, %rdi -orq %rcx, %r8 -movq $0xffffffc0f, %rcx -andq %r11, %r8 -andq %rcx, %rdi -movq 16(%rsi), %rcx -movq %rcx, 160(%r10) -movq %r9, %rcx -movq 24(%rsi), %rdx -movq %rdx, 168(%r10) -movl %r9d, %edx -andl $67108863, %edx -movl %edx, 80(%r10) -movq %r8, %rdx -shrq $26, %rcx -shlq $18, %rdx -orq %rdx, %rcx -movq %r8, %rdx -shrq $8, %rdx -andl $67108863, %ecx -andl $67108863, %edx -movl %ecx, 84(%r10) -movq %r8, %rcx -movl %edx, 88(%r10) -movq %rdi, %rdx -shrq $34, %rcx -shlq $10, %rdx -orq %rdx, %rcx -movq %rdi, %rdx -shrq $16, %rdx -andl $67108863, %ecx -movl %ecx, 92(%r10) -movl %edx, 96(%r10) -cmpq $16, %r12 -jbe poly1305_init_ext_avx2_7 -poly1305_init_ext_avx2_2: -movq %r9, %rax -lea (%rdi,%rdi,4), %r14 -mulq %r9 -shlq $2, %r14 -movq %rax, %r11 -movq %rdx, %r15 -lea (%r8,%r8), %rax -mulq %r14 -addq %rax, %r11 -lea (%r9,%r9), %rax -movq %r11, %rsi -adcq %rdx, %r15 -mulq %r8 -movq %rax, %rbx -movq %r14, %rax -movq %rdx, %rcx -lea (%rdi,%rdi), %r14 -mulq %rdi -addq %rax, %rbx -movq %r8, %rax -adcq %rdx, %rcx -mulq %r8 -shlq $20, %r15 -movq %rax, %r13 -shrq $44, %rsi -movq %r9, %rax -orq %rsi, %r15 -movq %rdx, %rsi -mulq %r14 -addq %r15, %rbx -movq %rbx, %r15 -adcq $0, %rcx -addq %rax, %r13 -adcq %rdx, %rsi -shlq $20, %rcx -shrq $44, %r15 -orq %r15, %rcx -addq %rcx, %r13 -movq $0xfffffffffff, %rcx -movq %r13, %rdx -adcq $0, %rsi -andq %rcx, %r11 -shlq $22, %rsi -andq %rcx, %rbx -shrq $42, %rdx -orq %rdx, %rsi -lea (%rsi,%rsi,4), %rsi -addq %rsi, %r11 -movq %rcx, %rsi -andq %r11, %rsi -shrq $44, %r11 -addq %r11, %rbx -movq $0x3ffffffffff, %r11 -andq %rbx, %rcx -andq %r11, %r13 -shrq $44, %rbx -movq %rsi, %r11 -movq %rcx, %rdx -addq %r13, %rbx -shrq $26, %r11 -movq %rbx, %r15 -shlq $18, %rdx -movq %rcx, %r14 -orq %rdx, %r11 -movq %rcx, %rdx -shrq $34, %rdx -movl %esi, %r13d -shlq $10, %r15 -andl $67108863, %r13d -orq %r15, %rdx -andl $67108863, %r11d -shrq $8, %r14 -andl $67108863, %edx -movl %edx, 112(%r10) -movq %rbx, %rdx -shrq $16, %rdx -andl $67108863, %r14d -movl %r13d, 100(%r10) -movl %r11d, 104(%r10) -movl %r14d, 108(%r10) -movl %edx, 116(%r10) -cmpq $48, %r12 -jbe poly1305_init_ext_avx2_4 -poly1305_init_ext_avx2_3: -movq %rsi, %rax -lea (%rbx,%rbx,4), %r15 -mulq %rsi -shlq $2, %r15 -movq %rax, %r13 -movq %rdx, %r12 -lea (%rcx,%rcx), %rax -mulq %r15 -addq %rax, %r13 -lea (%rsi,%rsi), %rax -movq %r15, -16(%rsp) -adcq %rdx, %r12 -mulq %rcx -movq %rax, %r14 -movq %rbx, %rax -movq %rdx, %r11 -mulq %r15 -addq %rax, %r14 -movq %rcx, %rax -movq %r13, %r15 -adcq %rdx, %r11 -mulq %rcx -shlq $20, %r12 -shrq $44, %r15 -orq %r15, %r12 -movq %rax, %r15 -addq %r12, %r14 -movq %rdx, %r12 -movq %rsi, %rax -lea (%rbx,%rbx), %rdx -adcq $0, %r11 -mulq %rdx -addq %rax, %r15 -adcq %rdx, %r12 -movq %r14, %rdx -shlq $20, %r11 -shrq $44, %rdx -orq %rdx, %r11 -addq %r11, %r15 -movq $0xfffffffffff, %r11 -movq %r15, %rdx -adcq $0, %r12 -andq %r11, %r13 -shlq $22, %r12 -andq %r11, %r14 -shrq $42, %rdx -orq %rdx, %r12 -lea (%r12,%r12,4), %r12 -addq %r12, %r13 -movq %r11, %r12 -andq %r13, %r12 -shrq $44, %r13 -addq %r13, %r14 -movq $0x3ffffffffff, %r13 -andq %r14, %r11 -andq %r13, %r15 -shrq $44, %r14 -movq %r11, %rdx -shlq $18, %rdx -addq %r14, %r15 -movl %r12d, %r14d -movq %r11, %r13 -shrq $26, %r12 -andl $67108863, %r14d -orq %rdx, %r12 -movq %r15, %rdx -shrq $34, %r11 -shlq $10, %rdx -andl $67108863, %r12d -orq %rdx, %r11 -shrq $8, %r13 -andl $67108863, %r11d -movl %r11d, 152(%r10) -andl $67108863, %r13d -shrq $16, %r15 -movl %r14d, 140(%r10) -movl %r12d, 144(%r10) -movl %r13d, 148(%r10) -movl %r15d, 156(%r10) -movq -16(%rsp), %r11 -jmp poly1305_init_ext_avx2_6 -poly1305_init_ext_avx2_4: -cmpq $32, %r12 -jbe poly1305_init_ext_avx2_7 -poly1305_init_ext_avx2_5: -lea (%rbx,%rbx,4), %r11 -shlq $2, %r11 -poly1305_init_ext_avx2_6: -movq %r9, %rax -lea (%rcx,%rcx,4), %r13 -mulq %rsi -shlq $2, %r13 -movq %rax, %r14 -movq %rdi, %rax -movq %rdx, %r12 -mulq %r13 -addq %rax, %r14 -movq %r8, %rax -adcq %rdx, %r12 -mulq %r11 -addq %rax, %r14 -movq %r8, %rax -adcq %rdx, %r12 -mulq %rsi -movq %rax, %r15 -movq %r9, %rax -movq %rdx, %r13 -mulq %rcx -addq %rax, %r15 -movq %r11, %rax -movq %r14, %r11 -adcq %rdx, %r13 -mulq %rdi -addq %rax, %r15 -movq %rdi, %rax -adcq %rdx, %r13 -mulq %rsi -shlq $20, %r12 -movq %rax, %rsi -shrq $44, %r11 -movq %r8, %rax -orq %r11, %r12 -movq %rdx, %rdi -mulq %rcx -addq %r12, %r15 -movq %r15, %rcx -adcq $0, %r13 -addq %rax, %rsi -movq %r9, %rax -movq $0xfffffffffff, %r9 -adcq %rdx, %rdi -andq %r9, %r14 -mulq %rbx -addq %rax, %rsi -adcq %rdx, %rdi -movq %r9, %rdx -shlq $20, %r13 -andq %r9, %r15 -shrq $44, %rcx -orq %rcx, %r13 -addq %r13, %rsi -movq %rsi, %rbx -adcq $0, %rdi -shlq $22, %rdi -shrq $42, %rbx -orq %rbx, %rdi -lea (%rdi,%rdi,4), %r8 -addq %r8, %r14 -andq %r14, %rdx -shrq $44, %r14 -addq %r14, %r15 -movq $0x3ffffffffff, %r14 -andq %r15, %r9 -andq %r14, %rsi -shrq $44, %r15 -movq %r9, %rax -addq %r15, %rsi -movl %edx, %r15d -movq %rsi, %rbx -movq %r9, %rcx -shrq $26, %rdx -andl $67108863, %r15d -shlq $18, %rax -shrq $34, %r9 -orq %rax, %rdx -shlq $10, %rbx -shrq $8, %rcx -orq %rbx, %r9 -shrq $16, %rsi -andl $67108863, %edx -andl $67108863, %ecx -andl $67108863, %r9d -movl %r15d, 120(%r10) -movl %edx, 124(%r10) -movl %ecx, 128(%r10) -movl %r9d, 132(%r10) -movl %esi, 136(%r10) -poly1305_init_ext_avx2_7: -movq $0, 176(%r10) -vzeroupper -popq %rbx -popq %r15 -popq %r14 -popq %r13 -popq %r12 -ret -FN_END poly1305_init_ext_avx2 - diff --git a/src/libcryptobox/poly1305/constants.S b/src/libcryptobox/poly1305/constants.S deleted file mode 100644 index a4797a2aa..000000000 --- a/src/libcryptobox/poly1305/constants.S +++ /dev/null @@ -1,21 +0,0 @@ -SECTION_RODATA - -.p2align 4 -poly1305_constants_x86: -/* 0 */ poly1305_x86_scale: .long 0x0,0x37f40000 -/* 8 */ poly1305_x86_two32: .long 0x0,0x41f00000 -/* 16 */ poly1305_x86_two64: .long 0x0,0x43f00000 -/* 24 */ poly1305_x86_two96: .long 0x0,0x45f00000 -/* 32 */ poly1305_x86_alpha32: .long 0x0,0x45e80000 -/* 40 */ poly1305_x86_alpha64: .long 0x0,0x47e80000 -/* 48 */ poly1305_x86_alpha96: .long 0x0,0x49e80000 -/* 56 */ poly1305_x86_alpha130: .long 0x0,0x4c080000 -/* 64 */ poly1305_x86_doffset0: .long 0x0,0x43300000 -/* 72 */ poly1305_x86_doffset1: .long 0x0,0x45300000 -/* 80 */ poly1305_x86_doffset2: .long 0x0,0x47300000 -/* 88 */ poly1305_x86_doffset3: .long 0x0,0x49300000 -/* 96 */ poly1305_x86_doffset3minustwo128: .long 0x0,0x492ffffe -/* 104 */ poly1305_x86_hoffset0: .long 0xfffffffb,0x43300001 -/* 112 */ poly1305_x86_hoffset1: .long 0xfffffffe,0x45300001 -/* 120 */ poly1305_x86_hoffset2: .long 0xfffffffe,0x47300001 -/* 124 */ poly1305_x86_hoffset3: .long 0xfffffffe,0x49300003 diff --git a/src/libcryptobox/poly1305/poly1305.c b/src/libcryptobox/poly1305/poly1305.c deleted file mode 100644 index 4adea30af..000000000 --- a/src/libcryptobox/poly1305/poly1305.c +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2015, Vsevolod Stakhov - * Copyright (c) 2015, Andrew Moon - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "cryptobox.h" -#include "poly1305.h" -#include "platform_config.h" - -extern unsigned long cpu_config; - -typedef struct poly1305_state_internal_t -{ - unsigned char opaque[192]; /* largest state required (AVX2) */ - size_t leftover, block_size; - unsigned char buffer[64]; /* largest blocksize (AVX2) */ -} poly1305_state_internal; - -typedef struct poly1305_impl_t -{ - unsigned long cpu_flags; - const char *desc; - - size_t (*block_size)(void); - void (*init_ext)(void *state, const poly1305_key *key, size_t bytes_hint); - void (*blocks)(void *state, const unsigned char *in, size_t inlen); - void (*finish_ext)(void *state, const unsigned char *in, size_t remaining, - unsigned char *mac); - void (*auth)(unsigned char *mac, const unsigned char *in, size_t inlen, - const poly1305_key *key); -} poly1305_impl_t; - -#define POLY1305_DECLARE(ext) \ - size_t poly1305_block_size_##ext(void); \ - void poly1305_init_ext_##ext(void *state, const poly1305_key *key, size_t bytes_hint); \ - void poly1305_blocks_##ext(void *state, const unsigned char *in, size_t inlen); \ - void poly1305_finish_ext_##ext(void *state, const unsigned char *in, size_t remaining, unsigned char *mac); \ - void poly1305_auth_##ext(unsigned char *mac, const unsigned char *m, size_t inlen, const poly1305_key *key); - -#define POLY1305_IMPL(cpuflags, desc, ext) \ - {(cpuflags), desc, poly1305_block_size_##ext, poly1305_init_ext_##ext, poly1305_blocks_##ext, poly1305_finish_ext_##ext, poly1305_auth_##ext} - -#if defined(HAVE_AVX2) -POLY1305_DECLARE(avx2) -#define POLY1305_AVX2 POLY1305_IMPL(CPUID_AVX2, "avx2", avx2) -#endif -#if defined(HAVE_AVX) -POLY1305_DECLARE(avx) -#define POLY1305_AVX POLY1305_IMPL(CPUID_AVX, "avx", avx) -#endif -#if defined(HAVE_SSE2) -POLY1305_DECLARE(sse2) -#define POLY1305_SSE2 POLY1305_IMPL(CPUID_SSE2, "sse2", sse2) -#endif - -POLY1305_DECLARE(ref) -#define POLY1305_GENERIC POLY1305_IMPL(0, "generic", ref) - -/* list implementations from most optimized to least, with generic as the last entry */ -static const poly1305_impl_t poly1305_list[] = -{ -POLY1305_GENERIC, - -#if defined(POLY1305_AVX2) - POLY1305_AVX2, -#endif -#if defined(POLY1305_AVX) - POLY1305_AVX, -#endif -#if defined(POLY1305_SSE2) - POLY1305_SSE2, -#endif -}; - -static const poly1305_impl_t *poly1305_opt = &poly1305_list[0]; - -/* is the pointer aligned on a word boundary? */ -static int poly1305_is_aligned(const void *p) -{ - return ((size_t) p & (sizeof(size_t) - 1)) == 0; -} - -const char* -poly1305_load(void) -{ - guint i; - - if (cpu_config != 0) { - for (i = 0; i < G_N_ELEMENTS(poly1305_list); i++) { - if (poly1305_list[i].cpu_flags & cpu_config) { - poly1305_opt = &poly1305_list[i]; - break; - } - } - } - - return poly1305_opt->desc; -} - -/* processes inlen bytes (full blocks only), handling input alignment */ -static void poly1305_consume(poly1305_state_internal *state, - const unsigned char *in, size_t inlen) -{ - int in_aligned; - - /* it's ok to call with 0 bytes */ - if (!inlen) - return; - - /* if everything is aligned, handle directly */ - in_aligned = poly1305_is_aligned (in); - if (in_aligned) { - poly1305_opt->blocks (state->opaque, in, inlen); - return; - } - - /* copy the unaligned data to an aligned buffer and process in chunks */ - while (inlen) { - unsigned char buffer[1024]; - const size_t bytes = (inlen > sizeof(buffer)) ? sizeof(buffer) : inlen; - memcpy (buffer, in, bytes); - poly1305_opt->blocks (state->opaque, buffer, bytes); - in += bytes; - inlen -= bytes; - } -} - -void poly1305_init(poly1305_state *S, const poly1305_key *key) -{ - poly1305_state_internal *state = (poly1305_state_internal *) S; - poly1305_opt->init_ext (state->opaque, key, 0); - state->leftover = 0; - state->block_size = poly1305_opt->block_size (); -} - -void poly1305_init_ext(poly1305_state *S, const poly1305_key *key, - size_t bytes_hint) -{ - poly1305_state_internal *state = (poly1305_state_internal *) S; - poly1305_opt->init_ext (state->opaque, key, bytes_hint); - state->leftover = 0; - state->block_size = poly1305_opt->block_size (); -} - -void poly1305_update(poly1305_state *S, const unsigned char *in, size_t inlen) -{ - poly1305_state_internal *state = (poly1305_state_internal *) S; - - /* handle leftover */ - if (state->leftover) { - size_t want = (state->block_size - state->leftover); - if (want > inlen) - want = inlen; - memcpy (state->buffer + state->leftover, in, want); - inlen -= want; - in += want; - state->leftover += want; - if (state->leftover < state->block_size) - return; - poly1305_opt->blocks (state->opaque, state->buffer, state->block_size); - state->leftover = 0; - } - - /* process full blocks */ - if (inlen >= state->block_size) { - size_t want = (inlen & ~(state->block_size - 1)); - poly1305_consume (state, in, want); - in += want; - inlen -= want; - } - - /* store leftover */ - if (inlen) { - memcpy (state->buffer + state->leftover, in, inlen); - state->leftover += inlen; - } -} - -void poly1305_finish(poly1305_state *S, unsigned char *mac) -{ - poly1305_state_internal *state = (poly1305_state_internal *) S; - poly1305_opt->finish_ext (state->opaque, state->buffer, state->leftover, - mac); -} - -void poly1305_auth(unsigned char *mac, const unsigned char *in, size_t inlen, - const poly1305_key *key) -{ - poly1305_opt->auth (mac, in, inlen, key); -} - -int poly1305_verify(const unsigned char mac1[16], const unsigned char mac2[16]) -{ - size_t i; - unsigned int dif = 0; - - for (i = 0; i < 16; i++) { - dif |= (mac1[i] ^ mac2[i]); - } - - dif = (dif - 1) >> ((sizeof(unsigned int) * 8) - 1); - return (dif & 1); -} diff --git a/src/libcryptobox/poly1305/poly1305.h b/src/libcryptobox/poly1305/poly1305.h deleted file mode 100644 index 902a9c288..000000000 --- a/src/libcryptobox/poly1305/poly1305.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef POLY1305_H -#define POLY1305_H - -#include <stddef.h> - -#if defined(__cplusplus) -extern "C" -{ -#endif - -typedef struct poly1305_state -{ - unsigned char opaque[320]; -} poly1305_state; - -typedef struct poly1305_key -{ - unsigned char b[32]; -} poly1305_key; - -void poly1305_init(poly1305_state *S, const poly1305_key *key); -void poly1305_init_ext(poly1305_state *S, const poly1305_key *key, - size_t bytes_hint); -void poly1305_update(poly1305_state *S, const unsigned char *in, size_t inlen); -void poly1305_finish(poly1305_state *S, unsigned char *mac); - -void poly1305_auth(unsigned char *mac, const unsigned char *in, size_t inlen, - const poly1305_key *key); -int poly1305_verify(const unsigned char mac1[16], const unsigned char mac2[16]); - -const char* poly1305_load(void); - -#if defined(__cplusplus) -} -#endif - -#endif /* POLY1305_H */ - diff --git a/src/libcryptobox/poly1305/poly1305_internal.h b/src/libcryptobox/poly1305/poly1305_internal.h deleted file mode 100644 index 21b7aa7d2..000000000 --- a/src/libcryptobox/poly1305/poly1305_internal.h +++ /dev/null @@ -1,19 +0,0 @@ -#if defined(_MSC_VER) - #include <intrin.h> - - typedef struct uint128_t { - unsigned long long lo; - unsigned long long hi; - } uint128_t; - - #define POLY1305_NOINLINE __declspec(noinline) -#elif defined(__GNUC__) - #pragma GCC system_header - #if defined(__SIZEOF_INT128__) - typedef unsigned __int128 uint128_t; - #else - typedef unsigned uint128_t __attribute__((mode(TI))); - #endif - - #define POLY1305_NOINLINE __attribute__((noinline)) -#endif diff --git a/src/libcryptobox/poly1305/ref-32.c b/src/libcryptobox/poly1305/ref-32.c deleted file mode 100644 index 9f0ea998b..000000000 --- a/src/libcryptobox/poly1305/ref-32.c +++ /dev/null @@ -1,238 +0,0 @@ -/* - poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication and 64 bit addition - - assumes the existence of uint32_t and uint64_t -*/ - -#include "config.h" -#include "poly1305.h" - -enum { - POLY1305_BLOCK_SIZE = 16 -}; - -typedef struct poly1305_state_ref_t { - uint32_t r[5]; - uint32_t h[5]; - uint32_t pad[4]; - unsigned char final; -} poly1305_state_ref_t; - -/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */ -static uint32_t -U8TO32(const unsigned char *p) { - return - (((uint32_t)(p[0] & 0xff) ) | - ((uint32_t)(p[1] & 0xff) << 8) | - ((uint32_t)(p[2] & 0xff) << 16) | - ((uint32_t)(p[3] & 0xff) << 24)); -} - -/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */ -static void -U32TO8(unsigned char *p, uint32_t v) { - p[0] = (unsigned char)((v ) & 0xff); - p[1] = (unsigned char)((v >> 8) & 0xff); - p[2] = (unsigned char)((v >> 16) & 0xff); - p[3] = (unsigned char)((v >> 24) & 0xff); -} - -size_t -poly1305_block_size_ref(void) { - return POLY1305_BLOCK_SIZE; -} - -void -poly1305_init_ext_ref(void *state, const poly1305_key *key, size_t bytes_hint) { - poly1305_state_ref_t *st = (poly1305_state_ref_t *)state; - - /* bytes_hint not used */ - (void)bytes_hint; - - /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ - st->r[0] = (U8TO32(&key->b[ 0]) ) & 0x3ffffff; - st->r[1] = (U8TO32(&key->b[ 3]) >> 2) & 0x3ffff03; - st->r[2] = (U8TO32(&key->b[ 6]) >> 4) & 0x3ffc0ff; - st->r[3] = (U8TO32(&key->b[ 9]) >> 6) & 0x3f03fff; - st->r[4] = (U8TO32(&key->b[12]) >> 8) & 0x00fffff; - - /* h = 0 */ - st->h[0] = 0; - st->h[1] = 0; - st->h[2] = 0; - st->h[3] = 0; - st->h[4] = 0; - - /* save pad for later */ - st->pad[0] = U8TO32(&key->b[16]); - st->pad[1] = U8TO32(&key->b[20]); - st->pad[2] = U8TO32(&key->b[24]); - st->pad[3] = U8TO32(&key->b[28]); - - st->final = 0; -} - -void -poly1305_blocks_ref(void *state, const unsigned char *in, size_t inlen) { - poly1305_state_ref_t *st = (poly1305_state_ref_t *)state; - const uint32_t hibit = (st->final) ? 0 : (1 << 24); /* 1 << 128 */ - uint32_t r0,r1,r2,r3,r4; - uint32_t s1,s2,s3,s4; - uint32_t h0,h1,h2,h3,h4; - uint64_t d0,d1,d2,d3,d4; - uint32_t c; - - r0 = st->r[0]; - r1 = st->r[1]; - r2 = st->r[2]; - r3 = st->r[3]; - r4 = st->r[4]; - - s1 = r1 * 5; - s2 = r2 * 5; - s3 = r3 * 5; - s4 = r4 * 5; - - h0 = st->h[0]; - h1 = st->h[1]; - h2 = st->h[2]; - h3 = st->h[3]; - h4 = st->h[4]; - - while (inlen >= POLY1305_BLOCK_SIZE) { - /* h += m[i] */ - h0 += (U8TO32(in+ 0) ) & 0x3ffffff; - h1 += (U8TO32(in+ 3) >> 2) & 0x3ffffff; - h2 += (U8TO32(in+ 6) >> 4) & 0x3ffffff; - h3 += (U8TO32(in+ 9) >> 6) & 0x3ffffff; - h4 += (U8TO32(in+12) >> 8) | hibit; - - /* h *= r */ - d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) + ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) + ((uint64_t)h4 * s1); - d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) + ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) + ((uint64_t)h4 * s2); - d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) + ((uint64_t)h2 * r0) + ((uint64_t)h3 * s4) + ((uint64_t)h4 * s3); - d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) + ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) + ((uint64_t)h4 * s4); - d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) + ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) + ((uint64_t)h4 * r0); - - /* (partial) h %= p */ - c = (uint32_t)(d0 >> 26); h0 = (uint32_t)d0 & 0x3ffffff; - d1 += c; c = (uint32_t)(d1 >> 26); h1 = (uint32_t)d1 & 0x3ffffff; - d2 += c; c = (uint32_t)(d2 >> 26); h2 = (uint32_t)d2 & 0x3ffffff; - d3 += c; c = (uint32_t)(d3 >> 26); h3 = (uint32_t)d3 & 0x3ffffff; - d4 += c; c = (uint32_t)(d4 >> 26); h4 = (uint32_t)d4 & 0x3ffffff; - h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff; - h1 += c; - - in += POLY1305_BLOCK_SIZE; - inlen -= POLY1305_BLOCK_SIZE; - } - - st->h[0] = h0; - st->h[1] = h1; - st->h[2] = h2; - st->h[3] = h3; - st->h[4] = h4; -} - -void -poly1305_finish_ext_ref(void *state, const unsigned char *in, size_t remaining, unsigned char mac[16]) { - poly1305_state_ref_t *st = (poly1305_state_ref_t *)state; - uint32_t h0,h1,h2,h3,h4,c; - uint32_t g0,g1,g2,g3,g4; - uint64_t f; - uint32_t mask; - - /* process the remaining block */ - if (remaining) { - unsigned char final[POLY1305_BLOCK_SIZE] = {0}; - size_t i; - for (i = 0; i < remaining; i++) - final[i] = in[i]; - final[remaining] = 1; - st->final = 1; - poly1305_blocks_ref(st, final, POLY1305_BLOCK_SIZE); - } - - /* fully carry h */ - h0 = st->h[0]; - h1 = st->h[1]; - h2 = st->h[2]; - h3 = st->h[3]; - h4 = st->h[4]; - - c = h1 >> 26; h1 = h1 & 0x3ffffff; - h2 += c; c = h2 >> 26; h2 = h2 & 0x3ffffff; - h3 += c; c = h3 >> 26; h3 = h3 & 0x3ffffff; - h4 += c; c = h4 >> 26; h4 = h4 & 0x3ffffff; - h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff; - h1 += c; - - /* compute h + -p */ - g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff; - g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff; - g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff; - g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff; - g4 = h4 + c - (1 << 26); - - /* select h if h < p, or h + -p if h >= p */ - mask = (g4 >> ((sizeof(uint32_t) * 8) - 1)) - 1; - g0 &= mask; - g1 &= mask; - g2 &= mask; - g3 &= mask; - g4 &= mask; - mask = ~mask; - h0 = (h0 & mask) | g0; - h1 = (h1 & mask) | g1; - h2 = (h2 & mask) | g2; - h3 = (h3 & mask) | g3; - h4 = (h4 & mask) | g4; - - /* h = h % (2^128) */ - h0 = ((h0 ) | (h1 << 26)) & 0xffffffff; - h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; - h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; - h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; - - /* mac = (h + pad) % (2^128) */ - f = (uint64_t)h0 + st->pad[0] ; h0 = (uint32_t)f; - f = (uint64_t)h1 + st->pad[1] + (f >> 32); h1 = (uint32_t)f; - f = (uint64_t)h2 + st->pad[2] + (f >> 32); h2 = (uint32_t)f; - f = (uint64_t)h3 + st->pad[3] + (f >> 32); h3 = (uint32_t)f; - - U32TO8(mac + 0, h0); - U32TO8(mac + 4, h1); - U32TO8(mac + 8, h2); - U32TO8(mac + 12, h3); - - /* zero out the state */ - st->h[0] = 0; - st->h[1] = 0; - st->h[2] = 0; - st->h[3] = 0; - st->h[4] = 0; - st->r[0] = 0; - st->r[1] = 0; - st->r[2] = 0; - st->r[3] = 0; - st->r[4] = 0; - st->pad[0] = 0; - st->pad[1] = 0; - st->pad[2] = 0; - st->pad[3] = 0; -} - -void -poly1305_auth_ref(unsigned char mac[16], const unsigned char *in, size_t inlen, const poly1305_key *key) { - poly1305_state_ref_t st; - size_t blocks; - poly1305_init_ext_ref(&st, key, inlen); - blocks = (inlen & ~(POLY1305_BLOCK_SIZE - 1)); - if (blocks) { - poly1305_blocks_ref(&st, in, blocks); - in += blocks; - inlen -= blocks; - } - poly1305_finish_ext_ref(&st, in, inlen, mac); -} - diff --git a/src/libcryptobox/poly1305/ref-64.c b/src/libcryptobox/poly1305/ref-64.c deleted file mode 100644 index cceb1476d..000000000 --- a/src/libcryptobox/poly1305/ref-64.c +++ /dev/null @@ -1,213 +0,0 @@ -/* - poly1305 implementation using 64 bit * 64 bit = 128 bit multiplication and 128 bit addition - - assumes the existence of uint64_t and uint128_t -*/ - -#include "config.h" -#include "poly1305.h" -#include "poly1305_internal.h" - -#define POLY1305_BLOCK_SIZE 16 - -typedef struct poly1305_state_ref_t { - uint64_t r[3]; - uint64_t h[3]; - uint64_t pad[2]; - unsigned char final; -} poly1305_state_ref_t; - -/* interpret eight 8 bit unsigned integers as a 64 bit unsigned integer in little endian */ -static uint64_t -U8TO64(const unsigned char *p) { - return - ((uint64_t)p[0] ) | - ((uint64_t)p[1] << 8) | - ((uint64_t)p[2] << 16) | - ((uint64_t)p[3] << 24) | - ((uint64_t)p[4] << 32) | - ((uint64_t)p[5] << 40) | - ((uint64_t)p[6] << 48) | - ((uint64_t)p[7] << 56); -} - -/* store a 64 bit unsigned integer as eight 8 bit unsigned integers in little endian */ -static void -U64TO8(unsigned char *p, uint64_t v) { - p[0] = (unsigned char)(v ) & 0xff; - p[1] = (unsigned char)(v >> 8) & 0xff; - p[2] = (unsigned char)(v >> 16) & 0xff; - p[3] = (unsigned char)(v >> 24) & 0xff; - p[4] = (unsigned char)(v >> 32) & 0xff; - p[5] = (unsigned char)(v >> 40) & 0xff; - p[6] = (unsigned char)(v >> 48) & 0xff; - p[7] = (unsigned char)(v >> 56) & 0xff; -} - -size_t -poly1305_block_size_ref(void) { - return POLY1305_BLOCK_SIZE; -} - -void -poly1305_init_ext_ref(void *state, const poly1305_key *key, size_t bytes_hint) { - poly1305_state_ref_t *st = (poly1305_state_ref_t *)state; - uint64_t t0, t1; - - /* bytes_hint not used */ - (void)bytes_hint; - - /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ - t0 = U8TO64(&key->b[0]); - t1 = U8TO64(&key->b[8]); - st->r[0] = ( t0 ) & 0xffc0fffffff; - st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff; - st->r[2] = ((t1 >> 24) ) & 0x00ffffffc0f; - - /* h = 0 */ - st->h[0] = 0; - st->h[1] = 0; - st->h[2] = 0; - - /* save pad for later */ - st->pad[0] = U8TO64(&key->b[16]); - st->pad[1] = U8TO64(&key->b[24]); - - st->final = 0; -} - -void -poly1305_blocks_ref(void *state, const unsigned char *in, size_t inlen) { - poly1305_state_ref_t *st = (poly1305_state_ref_t *)state; - const uint64_t hibit = (st->final) ? 0 : ((uint64_t)1 << 40); /* 1 << 128 */ - uint64_t r0,r1,r2; - uint64_t s1,s2; - uint64_t h0,h1,h2; - uint64_t c; - uint128_t d0,d1,d2; - - r0 = st->r[0]; - r1 = st->r[1]; - r2 = st->r[2]; - - s1 = r1 * (5 << 2); - s2 = r2 * (5 << 2); - - h0 = st->h[0]; - h1 = st->h[1]; - h2 = st->h[2]; - - while (inlen >= POLY1305_BLOCK_SIZE) { - uint64_t t0, t1; - - /* h += in[i] */ - t0 = U8TO64(in + 0); - t1 = U8TO64(in + 8); - h0 += (( t0 ) & 0xfffffffffff); - h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff); - h2 += (((t1 >> 24) ) & 0x3ffffffffff) | hibit; - - /* h *= r */ - d0 = ((uint128_t)h0 * r0) + ((uint128_t)h1 * s2) + ((uint128_t)h2 * s1); - d1 = ((uint128_t)h0 * r1) + ((uint128_t)h1 * r0) + ((uint128_t)h2 * s2); - d2 = ((uint128_t)h0 * r2) + ((uint128_t)h1 * r1) + ((uint128_t)h2 * r0); - - /* (partial) h %= p */ - c = (uint64_t)(d0 >> 44); h0 = (uint64_t)d0 & 0xfffffffffff; - d1 += c; c = (uint64_t)(d1 >> 44); h1 = (uint64_t)d1 & 0xfffffffffff; - d2 += c; c = (uint64_t)(d2 >> 42); h2 = (uint64_t)d2 & 0x3ffffffffff; - h0 += c * 5; c = (h0 >> 44); h0 = h0 & 0xfffffffffff; - h1 += c; - - in += POLY1305_BLOCK_SIZE; - inlen -= POLY1305_BLOCK_SIZE; - } - - st->h[0] = h0; - st->h[1] = h1; - st->h[2] = h2; -} - -void -poly1305_finish_ext_ref(void *state, const unsigned char *in, size_t remaining, unsigned char mac[16]) { - poly1305_state_ref_t *st = (poly1305_state_ref_t *)state; - uint64_t h0, h1, h2, c; - uint64_t g0, g1, g2; - uint64_t t0, t1; - - /* process the remaining block */ - if (remaining) { - unsigned char final[POLY1305_BLOCK_SIZE] = {0}; - size_t i; - for (i = 0; i < remaining; i++) - final[i] = in[i]; - final[remaining] = 1; - st->final = 1; - poly1305_blocks_ref(st, final, POLY1305_BLOCK_SIZE); - } - - /* fully carry h */ - h0 = st->h[0]; - h1 = st->h[1]; - h2 = st->h[2]; - - c = (h1 >> 44); h1 &= 0xfffffffffff; - h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; - h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff; - h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff; - h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; - h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff; - h1 += c; - - /* compute h + -p */ - g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff; - g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff; - g2 = h2 + c - ((uint64_t)1 << 42); - - /* select h if h < p, or h + -p if h >= p */ - c = (g2 >> 63) - 1; - h0 = (h0 & ~c) | (g0 & c); - h1 = (h1 & ~c) | (g1 & c); - h2 = (h2 & ~c) | (g2 & c); - - /* h = (h + pad) */ - t0 = st->pad[0]; - t1 = st->pad[1]; - - h0 += (( t0 ) & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff; - h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff; - h2 += (((t1 >> 24) ) & 0x3ffffffffff) + c; h2 &= 0x3ffffffffff; - - /* mac = h % (2^128) */ - h0 = ((h0 ) | (h1 << 44)); - h1 = ((h1 >> 20) | (h2 << 24)); - - U64TO8(&mac[0], h0); - U64TO8(&mac[8], h1); - - /* zero out the state */ - st->h[0] = 0; - st->h[1] = 0; - st->h[2] = 0; - st->r[0] = 0; - st->r[1] = 0; - st->r[2] = 0; - st->pad[0] = 0; - st->pad[1] = 0; -} - - -void -poly1305_auth_ref(unsigned char mac[16], const unsigned char *in, size_t inlen, const poly1305_key *key) { - poly1305_state_ref_t st; - size_t blocks; - poly1305_init_ext_ref(&st, key, inlen); - blocks = (inlen & ~(POLY1305_BLOCK_SIZE - 1)); - if (blocks) { - poly1305_blocks_ref(&st, in, blocks); - in += blocks; - inlen -= blocks; - } - poly1305_finish_ext_ref(&st, in, inlen, mac); -} - diff --git a/src/libcryptobox/poly1305/sse2.S b/src/libcryptobox/poly1305/sse2.S deleted file mode 100644 index 038961899..000000000 --- a/src/libcryptobox/poly1305/sse2.S +++ /dev/null @@ -1,969 +0,0 @@ -#include "../macro.S" -#include "constants.S" - -SECTION_TEXT - -GLOBAL_HIDDEN_FN poly1305_block_size_sse2 -movl $32, %eax -ret -FN_END poly1305_block_size_sse2 - -GLOBAL_HIDDEN_FN poly1305_init_ext_sse2 -poly1305_init_ext_sse2_local: -pushq %r15 -xorps %xmm0, %xmm0 -testq %rdx, %rdx -pushq %r14 -movq %rdx, %r11 -movq $-1, %rax -cmove %rax, %r11 -pushq %r13 -movabsq $17575274610687, %r9 -pushq %r12 -pushq %rbp -movq %r11, %r13 -movabsq $17592186044415, %rbp -pushq %rbx -xorl %ebx, %ebx -movdqu %xmm0, 32(%rdi) -movdqu %xmm0, (%rdi) -movdqu %xmm0, 16(%rdi) -movq 8(%rsi), %rcx -movq (%rsi), %rax -movq %rcx, %rdx -shrq $24, %rcx -andq %rax, %r9 -salq $20, %rdx -shrq $44, %rax -movq %r9, %r8 -orq %rax, %rdx -shrq $26, %r8 -movabsq $17592181915647, %rax -andq %rax, %rdx -movabsq $68719475727, %rax -andq %rax, %rcx -movl %r9d, %eax -andl $67108863, %eax -movl %eax, 40(%rdi) -movl %edx, %eax -sall $18, %eax -orl %r8d, %eax -movq %rdx, %r8 -andl $67108863, %eax -shrq $34, %r8 -movl %eax, 44(%rdi) -movq %rdx, %rax -shrq $8, %rax -andl $67108863, %eax -movl %eax, 48(%rdi) -movl %ecx, %eax -sall $10, %eax -orl %r8d, %eax -movq %rdi, %r8 -andl $67108863, %eax -movl %eax, 52(%rdi) -movq %rcx, %rax -shrq $16, %rax -movl %eax, 56(%rdi) -movq 16(%rsi), %rax -movq %rax, 104(%rdi) -movq 24(%rsi), %rax -movq %rdx, %rsi -movq %rax, 112(%rdi) -poly1305_init_ext_sse2_7: -testq %rbx, %rbx -jne poly1305_init_ext_sse2_4 -cmpq $16, %r13 -jbe poly1305_init_ext_sse2_5 -leaq 60(%r8), %rdi -jmp poly1305_init_ext_sse2_6 -poly1305_init_ext_sse2_4: -cmpq $96, %r13 -jb poly1305_init_ext_sse2_5 -leaq 80(%r8), %rdi -poly1305_init_ext_sse2_6: -imulq $20, %rcx, %r10 -movq $0, -48(%rsp) -movq $0, -32(%rsp) -leaq (%rsi,%rsi), %r14 -leaq (%r9,%r9), %r11 -movq %r10, %rax -mulq %r14 -movq %rax, %r14 -movq %r9, %rax -movq %rdx, %r15 -mulq %r9 -addq %rax, %r14 -movq %r14, %rax -adcq %rdx, %r15 -leaq (%rcx,%rcx), %rdx -andq %rbp, %rax -movq %rax, -16(%rsp) -movq %r11, %rax -movq %rdx, -24(%rsp) -mulq %rsi -movq %rax, %r11 -movq %r10, %rax -movq %rdx, %r12 -mulq %rcx -movq -16(%rsp), %rcx -addq %rax, %r11 -movq %r14, %rax -adcq %rdx, %r12 -shrdq $44, %r15, %rax -movq %rax, -56(%rsp) -movq -24(%rsp), %rax -addq -56(%rsp), %r11 -adcq -48(%rsp), %r12 -mulq %r9 -movq %r11, %r14 -andq %rbp, %r14 -movq %rax, %r9 -movq %rsi, %rax -movq %rdx, %r10 -mulq %rsi -addq %rax, %r9 -movq %r11, %rax -adcq %rdx, %r10 -shrdq $44, %r12, %rax -movq %rax, -40(%rsp) -movabsq $4398046511103, %rax -addq -40(%rsp), %r9 -adcq -32(%rsp), %r10 -andq %r9, %rax -incq %rbx -shrdq $42, %r10, %r9 -leaq (%r9,%r9,4), %r9 -addq %r9, %rcx -movq %rcx, %r9 -shrq $44, %rcx -addq %r14, %rcx -andq %rbp, %r9 -movq %rcx, %rsi -shrq $44, %rcx -movq %r9, %rdx -addq %rax, %rcx -movl %r9d, %eax -andq %rbp, %rsi -andl $67108863, %eax -shrq $26, %rdx -movl %eax, (%rdi) -movl %esi, %eax -sall $18, %eax -orl %edx, %eax -movq %rsi, %rdx -andl $67108863, %eax -shrq $34, %rdx -movl %eax, 4(%rdi) -movq %rsi, %rax -shrq $8, %rax -andl $67108863, %eax -movl %eax, 8(%rdi) -movl %ecx, %eax -sall $10, %eax -orl %edx, %eax -andl $67108863, %eax -movl %eax, 12(%rdi) -movq %rcx, %rax -shrq $16, %rax -cmpq $2, %rbx -movl %eax, 16(%rdi) -jne poly1305_init_ext_sse2_7 -poly1305_init_ext_sse2_5: -movq $0, 120(%r8) -popq %rbx -popq %rbp -popq %r12 -popq %r13 -popq %r14 -popq %r15 -ret -FN_END poly1305_init_ext_sse2 - - -GLOBAL_HIDDEN_FN poly1305_blocks_sse2 -poly1305_blocks_sse2_local: -pushq %rbp -movq %rsp, %rbp -pushq %rbx -andq $-64, %rsp -subq $328, %rsp -movq $(1 << 24), %rax -movd %rax, %xmm1 -movq $((1 << 26) - 1), %rax -movd %rax, %xmm0 -pshufd $68, %xmm1, %xmm1 -pshufd $68, %xmm0, %xmm0 -movq 120(%rdi), %rax -movaps %xmm1, 312(%rsp) -testb $4, %al -je poly1305_blocks_sse2_11 -movaps 312(%rsp), %xmm1 -psrldq $8, %xmm1 -movaps %xmm1, 312(%rsp) -poly1305_blocks_sse2_11: -testb $8, %al -je poly1305_blocks_sse2_12 -xorps %xmm1, %xmm1 -movaps %xmm1, 312(%rsp) -poly1305_blocks_sse2_12: -testb $1, %al -jne poly1305_blocks_sse2_13 -movq 16(%rsi), %xmm1 -movaps %xmm0, %xmm3 -movaps %xmm0, %xmm9 -movq (%rsi), %xmm15 -orq $1, %rax -subq $32, %rdx -movq 8(%rsi), %xmm12 -punpcklqdq %xmm1, %xmm15 -movq 24(%rsi), %xmm1 -movaps %xmm15, %xmm8 -pand %xmm15, %xmm3 -psrlq $52, %xmm15 -addq $32, %rsi -punpcklqdq %xmm1, %xmm12 -movaps %xmm12, %xmm1 -psrlq $26, %xmm8 -psllq $12, %xmm1 -pand %xmm0, %xmm8 -movq %rax, 120(%rdi) -por %xmm1, %xmm15 -psrlq $40, %xmm12 -pand %xmm15, %xmm9 -por 312(%rsp), %xmm12 -psrlq $26, %xmm15 -pand %xmm0, %xmm15 -jmp poly1305_blocks_sse2_14 -poly1305_blocks_sse2_13: -movdqu (%rdi), %xmm8 -movdqu 16(%rdi), %xmm15 -movdqu 32(%rdi), %xmm12 -pshufd $80, %xmm8, %xmm3 -pshufd $250, %xmm8, %xmm8 -pshufd $80, %xmm15, %xmm9 -pshufd $250, %xmm15, %xmm15 -pshufd $80, %xmm12, %xmm12 -poly1305_blocks_sse2_14: -movq 120(%rdi), %rax -testb $48, %al -je poly1305_blocks_sse2_15 -testb $16, %al -movd 56(%rdi), %xmm2 -leaq 40(%rdi), %rax -je poly1305_blocks_sse2_16 -movdqu 60(%rdi), %xmm1 -movdqu (%rax), %xmm4 -movd %xmm2, %eax -movd 76(%rdi), %xmm2 -movaps %xmm1, %xmm7 -movd %eax, %xmm5 -punpckldq %xmm4, %xmm7 -punpckhdq %xmm4, %xmm1 -punpcklqdq %xmm5, %xmm2 -jmp poly1305_blocks_sse2_17 -poly1305_blocks_sse2_16: -movdqu (%rax), %xmm1 -movl $1, %r8d -movd %r8d, %xmm4 -movaps %xmm1, %xmm7 -punpckldq %xmm4, %xmm7 -punpckhdq %xmm4, %xmm1 -poly1305_blocks_sse2_17: -pshufd $80, %xmm7, %xmm11 -pshufd $80, %xmm1, %xmm4 -pshufd $250, %xmm7, %xmm7 -movaps %xmm11, 168(%rsp) -pshufd $250, %xmm1, %xmm1 -jmp poly1305_blocks_sse2_18 -poly1305_blocks_sse2_15: -movdqu 60(%rdi), %xmm1 -movd 76(%rdi), %xmm2 -pshufd $0, %xmm2, %xmm2 -pshufd $0, %xmm1, %xmm11 -pshufd $85, %xmm1, %xmm7 -pshufd $170, %xmm1, %xmm4 -movaps %xmm11, 168(%rsp) -pshufd $255, %xmm1, %xmm1 -poly1305_blocks_sse2_18: -movaps %xmm1, %xmm14 -movaps %xmm7, %xmm5 -movaps %xmm4, %xmm13 -movaps %xmm1, 264(%rsp) -movaps %xmm2, %xmm1 -cmpq $63, %rdx -movq $(5), %r8 -movd %r8, %xmm6 -pshufd $68, %xmm6, %xmm6 -pmuludq %xmm6, %xmm5 -movaps %xmm4, 296(%rsp) -pmuludq %xmm6, %xmm13 -movaps %xmm2, 152(%rsp) -pmuludq %xmm6, %xmm14 -pmuludq %xmm6, %xmm1 -movaps %xmm5, 88(%rsp) -movaps %xmm13, 72(%rsp) -movaps %xmm14, 56(%rsp) -movaps %xmm1, 40(%rsp) -jbe poly1305_blocks_sse2_19 -movdqu 80(%rdi), %xmm1 -movd 96(%rdi), %xmm2 -movq %rdx, %rcx -pshufd $0, %xmm2, %xmm2 -movaps %xmm2, 24(%rsp) -pmuludq %xmm6, %xmm2 -pshufd $85, %xmm1, %xmm4 -movaps %xmm4, 280(%rsp) -pmuludq %xmm6, %xmm4 -pshufd $255, %xmm1, %xmm13 -pshufd $170, %xmm1, %xmm5 -movaps 72(%rsp), %xmm14 -movaps %xmm5, 216(%rsp) -pmuludq %xmm6, %xmm5 -movq %rsi, %rax -movaps %xmm4, -24(%rsp) -movaps %xmm13, %xmm4 -pshufd $0, %xmm1, %xmm1 -pmuludq %xmm6, %xmm4 -movaps %xmm14, -8(%rsp) -movaps %xmm5, 8(%rsp) -movaps 168(%rsp), %xmm5 -movaps %xmm1, 248(%rsp) -movaps 56(%rsp), %xmm1 -movaps %xmm4, 120(%rsp) -movaps 40(%rsp), %xmm4 -movaps %xmm13, 136(%rsp) -movaps %xmm2, 200(%rsp) -movaps %xmm1, 104(%rsp) -movaps %xmm4, 184(%rsp) -movaps %xmm5, 232(%rsp) -jmp poly1305_blocks_sse2_20 -.p2align 6 -poly1305_blocks_sse2_20: -movaps -24(%rsp), %xmm5 -movaps %xmm8, %xmm13 -subq $64, %rcx -movaps 8(%rsp), %xmm4 -movaps 120(%rsp), %xmm10 -pmuludq %xmm12, %xmm5 -pmuludq %xmm15, %xmm4 -movaps 8(%rsp), %xmm2 -pmuludq %xmm9, %xmm10 -movaps 120(%rsp), %xmm11 -movaps 200(%rsp), %xmm14 -pmuludq %xmm12, %xmm2 -paddq %xmm4, %xmm5 -pmuludq %xmm15, %xmm11 -movaps 120(%rsp), %xmm1 -paddq %xmm10, %xmm5 -pmuludq %xmm8, %xmm14 -movaps 200(%rsp), %xmm10 -movaps 200(%rsp), %xmm4 -pmuludq %xmm12, %xmm1 -movaps 248(%rsp), %xmm8 -pmuludq %xmm15, %xmm10 -paddq %xmm11, %xmm2 -pmuludq %xmm12, %xmm4 -paddq %xmm14, %xmm5 -movaps 200(%rsp), %xmm11 -movaps 248(%rsp), %xmm14 -pmuludq %xmm15, %xmm8 -pmuludq 248(%rsp), %xmm12 -pmuludq %xmm9, %xmm11 -paddq %xmm10, %xmm1 -movaps 248(%rsp), %xmm10 -pmuludq 280(%rsp), %xmm15 -pmuludq %xmm3, %xmm14 -paddq %xmm15, %xmm12 -paddq %xmm8, %xmm4 -pmuludq %xmm13, %xmm10 -movq 24(%rax), %xmm15 -movaps 248(%rsp), %xmm8 -paddq %xmm11, %xmm2 -movaps %xmm3, %xmm11 -movaps 280(%rsp), %xmm3 -paddq %xmm14, %xmm5 -pmuludq %xmm9, %xmm8 -paddq %xmm10, %xmm2 -movq 16(%rax), %xmm14 -movaps 280(%rsp), %xmm10 -pmuludq %xmm9, %xmm3 -pmuludq 216(%rsp), %xmm9 -paddq %xmm9, %xmm12 -paddq %xmm8, %xmm1 -movq (%rax), %xmm8 -pmuludq %xmm11, %xmm10 -paddq %xmm3, %xmm4 -movaps 216(%rsp), %xmm3 -punpcklqdq %xmm14, %xmm8 -movaps 280(%rsp), %xmm14 -pmuludq %xmm13, %xmm3 -paddq %xmm10, %xmm2 -movq 8(%rax), %xmm10 -pmuludq %xmm13, %xmm14 -pmuludq 136(%rsp), %xmm13 -paddq %xmm13, %xmm12 -punpcklqdq %xmm15, %xmm10 -movaps %xmm10, %xmm9 -movaps 216(%rsp), %xmm15 -paddq %xmm3, %xmm4 -psllq $12, %xmm9 -movaps %xmm0, %xmm3 -paddq %xmm14, %xmm1 -pmuludq %xmm11, %xmm15 -pand %xmm8, %xmm3 -movaps 136(%rsp), %xmm14 -movaps %xmm3, -40(%rsp) -movaps %xmm8, %xmm3 -movdqu 48(%rax), %xmm13 -psrlq $52, %xmm8 -pmuludq %xmm11, %xmm14 -paddq %xmm15, %xmm1 -por %xmm9, %xmm8 -pmuludq 24(%rsp), %xmm11 -paddq %xmm11, %xmm12 -movdqu 32(%rax), %xmm11 -movaps %xmm10, %xmm9 -psrlq $40, %xmm10 -pand %xmm0, %xmm8 -movaps %xmm11, %xmm15 -paddq %xmm14, %xmm4 -xorps %xmm14, %xmm14 -punpckldq %xmm13, %xmm15 -psrlq $14, %xmm9 -addq $64, %rax -pand %xmm0, %xmm9 -psrlq $26, %xmm3 -cmpq $63, %rcx -por 312(%rsp), %xmm10 -movaps %xmm13, -72(%rsp) -movaps %xmm15, %xmm13 -punpckldq %xmm14, %xmm13 -punpckhdq -72(%rsp), %xmm11 -movaps %xmm13, -56(%rsp) -movaps %xmm11, %xmm13 -punpckhdq %xmm14, %xmm11 -pand %xmm0, %xmm3 -psllq $18, %xmm11 -punpckhdq %xmm14, %xmm15 -punpckldq %xmm14, %xmm13 -paddq %xmm11, %xmm4 -movaps -8(%rsp), %xmm11 -psllq $6, %xmm15 -psllq $12, %xmm13 -movaps 88(%rsp), %xmm14 -paddq %xmm15, %xmm2 -pmuludq %xmm10, %xmm11 -paddq %xmm13, %xmm1 -movaps -8(%rsp), %xmm13 -pmuludq %xmm10, %xmm14 -paddq -56(%rsp), %xmm5 -paddq 312(%rsp), %xmm12 -pmuludq %xmm9, %xmm13 -movaps 104(%rsp), %xmm15 -paddq %xmm11, %xmm2 -movaps 184(%rsp), %xmm11 -paddq %xmm14, %xmm5 -movaps 104(%rsp), %xmm14 -pmuludq %xmm9, %xmm15 -pmuludq %xmm10, %xmm11 -paddq %xmm13, %xmm5 -movaps 104(%rsp), %xmm13 -pmuludq %xmm10, %xmm14 -pmuludq 232(%rsp), %xmm10 -paddq %xmm10, %xmm12 -pmuludq %xmm8, %xmm13 -paddq %xmm15, %xmm2 -movaps %xmm8, %xmm10 -paddq %xmm11, %xmm4 -pmuludq %xmm7, %xmm10 -movaps 232(%rsp), %xmm11 -movaps 184(%rsp), %xmm15 -paddq %xmm14, %xmm1 -pmuludq %xmm9, %xmm11 -paddq %xmm13, %xmm5 -movaps 184(%rsp), %xmm13 -movaps 184(%rsp), %xmm14 -pmuludq %xmm3, %xmm15 -pmuludq %xmm9, %xmm13 -paddq %xmm11, %xmm4 -pmuludq %xmm8, %xmm14 -movaps 232(%rsp), %xmm11 -paddq %xmm10, %xmm4 -paddq %xmm15, %xmm5 -pmuludq %xmm7, %xmm9 -pmuludq %xmm8, %xmm11 -paddq %xmm13, %xmm1 -movaps 232(%rsp), %xmm13 -movaps 296(%rsp), %xmm10 -paddq %xmm14, %xmm2 -pmuludq 296(%rsp), %xmm8 -movaps -40(%rsp), %xmm14 -pmuludq %xmm3, %xmm13 -paddq %xmm9, %xmm12 -paddq %xmm11, %xmm1 -movaps %xmm3, %xmm11 -paddq %xmm8, %xmm12 -movaps 232(%rsp), %xmm15 -pmuludq %xmm7, %xmm11 -pmuludq %xmm3, %xmm10 -paddq %xmm13, %xmm2 -movaps %xmm14, %xmm13 -movaps 296(%rsp), %xmm9 -pmuludq %xmm14, %xmm15 -pmuludq 264(%rsp), %xmm3 -paddq %xmm11, %xmm1 -pmuludq %xmm7, %xmm13 -paddq %xmm3, %xmm12 -movaps 264(%rsp), %xmm11 -paddq %xmm10, %xmm4 -pmuludq %xmm14, %xmm9 -paddq %xmm15, %xmm5 -pmuludq %xmm14, %xmm11 -movaps %xmm5, %xmm8 -paddq %xmm13, %xmm2 -psrlq $26, %xmm8 -paddq %xmm9, %xmm1 -pand %xmm0, %xmm5 -pmuludq 152(%rsp), %xmm14 -paddq %xmm14, %xmm12 -paddq %xmm8, %xmm2 -paddq %xmm11, %xmm4 -movaps %xmm2, %xmm9 -movaps %xmm2, %xmm8 -movaps %xmm4, %xmm3 -psrlq $26, %xmm9 -pand %xmm0, %xmm4 -psrlq $26, %xmm3 -paddq %xmm9, %xmm1 -pand %xmm0, %xmm8 -paddq %xmm3, %xmm12 -movaps %xmm1, %xmm10 -movaps %xmm1, %xmm9 -movaps %xmm12, %xmm3 -psrlq $26, %xmm10 -pand %xmm0, %xmm12 -psrlq $26, %xmm3 -paddq %xmm10, %xmm4 -pand %xmm0, %xmm9 -pmuludq %xmm6, %xmm3 -movaps %xmm4, %xmm1 -movaps %xmm4, %xmm15 -psrlq $26, %xmm1 -pand %xmm0, %xmm15 -paddq %xmm1, %xmm12 -paddq %xmm3, %xmm5 -movaps %xmm5, %xmm2 -movaps %xmm5, %xmm3 -psrlq $26, %xmm2 -pand %xmm0, %xmm3 -paddq %xmm2, %xmm8 -ja poly1305_blocks_sse2_20 -leaq -64(%rdx), %rax -andl $63, %edx -andq $-64, %rax -leaq 64(%rsi,%rax), %rsi -poly1305_blocks_sse2_19: -cmpq $31, %rdx -jbe poly1305_blocks_sse2_21 -movaps 56(%rsp), %xmm11 -movaps %xmm15, %xmm1 -movaps %xmm15, %xmm14 -movaps 72(%rsp), %xmm5 -movaps %xmm12, %xmm4 -movaps %xmm15, %xmm10 -movaps 88(%rsp), %xmm2 -pmuludq %xmm11, %xmm14 -movaps %xmm8, %xmm15 -pmuludq %xmm5, %xmm1 -movaps 40(%rsp), %xmm13 -testq %rsi, %rsi -pmuludq %xmm12, %xmm2 -pmuludq %xmm12, %xmm5 -pmuludq %xmm11, %xmm4 -paddq %xmm1, %xmm2 -pmuludq %xmm9, %xmm11 -movaps %xmm12, %xmm1 -paddq %xmm14, %xmm5 -pmuludq %xmm13, %xmm15 -movaps %xmm9, %xmm14 -pmuludq %xmm13, %xmm14 -pmuludq %xmm13, %xmm1 -paddq %xmm11, %xmm2 -movaps 168(%rsp), %xmm11 -pmuludq %xmm10, %xmm13 -paddq %xmm15, %xmm2 -movaps %xmm9, %xmm15 -paddq %xmm14, %xmm5 -pmuludq %xmm11, %xmm12 -movaps %xmm3, %xmm14 -pmuludq %xmm11, %xmm14 -movaps %xmm13, 248(%rsp) -movaps %xmm10, %xmm13 -pmuludq %xmm7, %xmm15 -paddq 248(%rsp), %xmm4 -pmuludq %xmm11, %xmm13 -pmuludq %xmm7, %xmm10 -paddq %xmm14, %xmm2 -movaps %xmm13, 280(%rsp) -movaps %xmm8, %xmm13 -pmuludq %xmm11, %xmm13 -paddq %xmm10, %xmm12 -movaps 296(%rsp), %xmm10 -paddq 280(%rsp), %xmm1 -pmuludq %xmm9, %xmm11 -pmuludq 296(%rsp), %xmm9 -pmuludq %xmm3, %xmm10 -paddq %xmm9, %xmm12 -paddq %xmm13, %xmm5 -movaps %xmm3, %xmm13 -paddq %xmm15, %xmm1 -pmuludq %xmm7, %xmm13 -paddq %xmm11, %xmm4 -movaps 296(%rsp), %xmm11 -pmuludq %xmm8, %xmm7 -pmuludq %xmm8, %xmm11 -pmuludq 264(%rsp), %xmm8 -paddq %xmm8, %xmm12 -paddq %xmm13, %xmm5 -paddq %xmm7, %xmm4 -movaps 264(%rsp), %xmm7 -paddq %xmm11, %xmm1 -paddq %xmm10, %xmm4 -pmuludq %xmm3, %xmm7 -pmuludq 152(%rsp), %xmm3 -paddq %xmm3, %xmm12 -paddq %xmm7, %xmm1 -je poly1305_blocks_sse2_22 -movdqu (%rsi), %xmm7 -xorps %xmm3, %xmm3 -paddq 312(%rsp), %xmm12 -movdqu 16(%rsi), %xmm8 -movaps %xmm7, %xmm9 -punpckldq %xmm8, %xmm9 -punpckhdq %xmm8, %xmm7 -movaps %xmm9, %xmm10 -movaps %xmm7, %xmm8 -punpckldq %xmm3, %xmm10 -punpckhdq %xmm3, %xmm9 -punpckhdq %xmm3, %xmm7 -punpckldq %xmm3, %xmm8 -movaps %xmm8, %xmm3 -psllq $6, %xmm9 -paddq %xmm10, %xmm2 -psllq $12, %xmm3 -paddq %xmm9, %xmm5 -psllq $18, %xmm7 -paddq %xmm3, %xmm4 -paddq %xmm7, %xmm1 -poly1305_blocks_sse2_22: -movaps %xmm2, %xmm8 -movaps %xmm1, %xmm3 -movaps %xmm1, %xmm15 -psrlq $26, %xmm8 -pand %xmm0, %xmm2 -pand %xmm0, %xmm15 -psrlq $26, %xmm3 -paddq %xmm5, %xmm8 -paddq %xmm12, %xmm3 -movaps %xmm8, %xmm9 -pand %xmm0, %xmm8 -movaps %xmm3, %xmm1 -psrlq $26, %xmm9 -movaps %xmm3, %xmm12 -psrlq $26, %xmm1 -paddq %xmm4, %xmm9 -pand %xmm0, %xmm12 -pmuludq %xmm1, %xmm6 -movaps %xmm9, %xmm3 -pand %xmm0, %xmm9 -psrlq $26, %xmm3 -paddq %xmm3, %xmm15 -paddq %xmm6, %xmm2 -movaps %xmm15, %xmm3 -pand %xmm0, %xmm15 -movaps %xmm2, %xmm1 -psrlq $26, %xmm3 -psrlq $26, %xmm1 -paddq %xmm3, %xmm12 -movaps %xmm0, %xmm3 -paddq %xmm1, %xmm8 -pand %xmm2, %xmm3 -poly1305_blocks_sse2_21: -testq %rsi, %rsi -je poly1305_blocks_sse2_23 -pshufd $8, %xmm3, %xmm3 -pshufd $8, %xmm8, %xmm8 -pshufd $8, %xmm9, %xmm9 -pshufd $8, %xmm15, %xmm15 -pshufd $8, %xmm12, %xmm12 -punpcklqdq %xmm8, %xmm3 -punpcklqdq %xmm15, %xmm9 -movdqu %xmm3, (%rdi) -movdqu %xmm9, 16(%rdi) -movq %xmm12, 32(%rdi) -jmp poly1305_blocks_sse2_10 -poly1305_blocks_sse2_23: -movaps %xmm3, %xmm0 -movaps %xmm8, %xmm4 -movaps %xmm9, %xmm2 -psrldq $8, %xmm0 -movaps %xmm15, %xmm10 -paddq %xmm0, %xmm3 -psrldq $8, %xmm4 -movaps %xmm12, %xmm0 -movd %xmm3, %edx -paddq %xmm4, %xmm8 -psrldq $8, %xmm2 -movl %edx, %ecx -movd %xmm8, %eax -paddq %xmm2, %xmm9 -shrl $26, %ecx -psrldq $8, %xmm10 -andl $67108863, %edx -addl %ecx, %eax -movd %xmm9, %ecx -paddq %xmm10, %xmm15 -movl %eax, %r9d -shrl $26, %eax -psrldq $8, %xmm0 -addl %ecx, %eax -movd %xmm15, %ecx -paddq %xmm0, %xmm12 -movl %eax, %esi -andl $67108863, %r9d -movd %xmm12, %r10d -shrl $26, %esi -andl $67108863, %eax -addl %ecx, %esi -salq $8, %rax -movl %r9d, %ecx -shrl $18, %r9d -movl %esi, %r8d -shrl $26, %esi -andl $67108863, %r8d -addl %r10d, %esi -orq %r9, %rax -salq $16, %rsi -movq %r8, %r9 -shrl $10, %r8d -salq $26, %rcx -orq %r8, %rsi -salq $34, %r9 -orq %rdx, %rcx -movq %rsi, %r11 -shrq $42, %rsi -movabsq $17592186044415, %rdx -orq %r9, %rax -movabsq $4398046511103, %r8 -andq %rdx, %rcx -andq %rdx, %rax -andq %r8, %r11 -leaq (%rsi,%rsi,4), %rsi -addq %rsi, %rcx -movq %rcx, %r10 -shrq $44, %rcx -addq %rcx, %rax -andq %rdx, %r10 -movq %rax, %r9 -shrq $44, %rax -addq %r11, %rax -andq %rdx, %r9 -movabsq $-4398046511104, %r11 -movq %rax, %rcx -andq %r8, %rcx -shrq $42, %rax -leaq (%rax,%rax,4), %rsi -addq %rcx, %r11 -addq %r10, %rsi -movq %rsi, %r8 -shrq $44, %rsi -andq %rdx, %r8 -addq %r9, %rsi -leaq 5(%r8), %r9 -movq %r9, %rbx -andq %rdx, %r9 -shrq $44, %rbx -addq %rsi, %rbx -movq %rbx, %rax -andq %rbx, %rdx -shrq $44, %rax -addq %rax, %r11 -movq %r11, %rax -shrq $63, %rax -decq %rax -movq %rax, %r10 -andq %rax, %r9 -andq %rax, %rdx -notq %r10 -andq %r11, %rax -andq %r10, %r8 -andq %r10, %rsi -andq %r10, %rcx -orq %r9, %r8 -orq %rdx, %rsi -orq %rax, %rcx -movq %r8, (%rdi) -movq %rsi, 8(%rdi) -movq %rcx, 16(%rdi) -poly1305_blocks_sse2_10: -movq -8(%rbp), %rbx -leave -ret -FN_END poly1305_blocks_sse2 - -GLOBAL_HIDDEN_FN poly1305_finish_ext_sse2 -poly1305_finish_ext_sse2_local: -pushq %r12 -movq %rcx, %r12 -pushq %rbp -movq %rdx, %rbp -pushq %rbx -movq %rdi, %rbx -subq $32, %rsp -testq %rdx, %rdx -je poly1305_finish_ext_sse2_27 -xorl %eax, %eax -movq %rsp, %rdi -movl $8, %ecx -rep stosl -subq %rsp, %rsi -testb $16, %dl -movq %rsp, %rax -je poly1305_finish_ext_sse2_28 -movdqu (%rsp,%rsi), %xmm0 -addq $16, %rax -movaps %xmm0, (%rsp) -poly1305_finish_ext_sse2_28: -testb $8, %bpl -je poly1305_finish_ext_sse2_29 -movq (%rax,%rsi), %rdx -movq %rdx, (%rax) -addq $8, %rax -poly1305_finish_ext_sse2_29: -testb $4, %bpl -je poly1305_finish_ext_sse2_30 -movl (%rax,%rsi), %edx -movl %edx, (%rax) -addq $4, %rax -poly1305_finish_ext_sse2_30: -testb $2, %bpl -je poly1305_finish_ext_sse2_31 -movw (%rax,%rsi), %dx -movw %dx, (%rax) -addq $2, %rax -poly1305_finish_ext_sse2_31: -testb $1, %bpl -je poly1305_finish_ext_sse2_32 -movb (%rax,%rsi), %dl -movb %dl, (%rax) -poly1305_finish_ext_sse2_32: -cmpq $16, %rbp -je poly1305_finish_ext_sse2_33 -movb $1, (%rsp,%rbp) -poly1305_finish_ext_sse2_33: -cmpq $16, %rbp -movl $32, %edx -movq %rsp, %rsi -sbbq %rax, %rax -movq %rbx, %rdi -andl $4, %eax -addq $4, %rax -orq %rax, 120(%rbx) -call poly1305_blocks_sse2_local -poly1305_finish_ext_sse2_27: -movq 120(%rbx), %rax -testb $1, %al -je poly1305_finish_ext_sse2_35 -decq %rbp -cmpq $15, %rbp -jbe poly1305_finish_ext_sse2_36 -orq $16, %rax -jmp poly1305_finish_ext_sse2_40 -poly1305_finish_ext_sse2_36: -orq $32, %rax -poly1305_finish_ext_sse2_40: -movq %rax, 120(%rbx) -movl $32, %edx -xorl %esi, %esi -movq %rbx, %rdi -call poly1305_blocks_sse2_local -poly1305_finish_ext_sse2_35: -movq 8(%rbx), %rax -movq 112(%rbx), %rsi -movq %rax, %rdx -movq %rax, %rcx -movq 16(%rbx), %rax -shrq $20, %rcx -salq $44, %rdx -orq (%rbx), %rdx -salq $24, %rax -orq %rcx, %rax -movq 104(%rbx), %rcx -addq %rcx, %rdx -adcq %rsi, %rax -xorps %xmm0, %xmm0 -movdqu %xmm0, (%rbx) -movdqu %xmm0, 16(%rbx) -movdqu %xmm0, 32(%rbx) -movdqu %xmm0, 48(%rbx) -movdqu %xmm0, 64(%rbx) -movdqu %xmm0, 80(%rbx) -movdqu %xmm0, 96(%rbx) -movdqu %xmm0, 112(%rbx) -movq %rdx, (%r12) -movq %rax, 8(%r12) -addq $32, %rsp -popq %rbx -popq %rbp -popq %r12 -ret -FN_END poly1305_finish_ext_sse2 - -GLOBAL_HIDDEN_FN poly1305_auth_sse2 -/* -cmpq $128, %rdx -jb poly1305_auth_x86_local -*/ -pushq %rbp -movq %rsp, %rbp -pushq %r14 -pushq %r13 -movq %rdi, %r13 -pushq %r12 -movq %rsi, %r12 -movq %rcx, %rsi -pushq %rbx -movq %rdx, %rbx -andq $-64, %rsp -movq %rbx, %r14 -addq $-128, %rsp -movq %rsp, %rdi -call poly1305_init_ext_sse2_local -andq $-32, %r14 -je poly1305_auth_sse2_42 -movq %r12, %rsi -movq %r14, %rdx -movq %rsp, %rdi -call poly1305_blocks_sse2_local -addq %r14, %r12 -subq %r14, %rbx -poly1305_auth_sse2_42: -movq %r13, %rcx -movq %rbx, %rdx -movq %r12, %rsi -movq %rsp, %rdi -call poly1305_finish_ext_sse2_local -leaq -32(%rbp), %rsp -popq %rbx -popq %r12 -popq %r13 -popq %r14 -popq %rbp -ret -FN_END poly1305_auth_sse2 - - - - - diff --git a/src/libcryptobox/siphash/avx2.S b/src/libcryptobox/siphash/avx2.S deleted file mode 100644 index 070419c60..000000000 --- a/src/libcryptobox/siphash/avx2.S +++ /dev/null @@ -1,206 +0,0 @@ -/*- - * Copyright 2015 Google Inc. All Rights Reserved. - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "../macro.S" -#include "constants.S" - -/* - * Generated by clang-3.8 from siphash avx2 implementation written by - * Jan Wassenberg and Jyrki Alakuijala - */ - -SECTION_TEXT - -GLOBAL_HIDDEN_FN siphash_avx2 -siphash_avx2_local: - .cfi_startproc -## BB#0: ## %entry - pushq %rbp -Ltmp0: - .cfi_def_cfa_offset 16 -Ltmp1: - .cfi_offset %rbp, -16 - movq %rsp, %rbp -Ltmp2: - .cfi_def_cfa_register %rbp - pushq %rbx - subq $40, %rsp -Ltmp3: - .cfi_offset %rbx, -24 - movq %rdx, %rbx - vmovdqu (%rdi), %xmm0 - vpxor LCPI0_0(%rip), %xmm0, %xmm1 - vpxor LCPI0_1(%rip), %xmm0, %xmm0 - vpunpcklqdq %xmm0, %xmm1, %xmm6 ## xmm6 = xmm1[0],xmm0[0] - vpunpckhqdq %xmm0, %xmm1, %xmm7 ## xmm7 = xmm1[1],xmm0[1] - movq %rbx, %rax - andq $-8, %rax - je LBB0_1 -## BB#2: ## %for.body.preheader - xorl %ecx, %ecx - vmovdqa LCPI0_2(%rip), %xmm0 ## xmm0 = [13,16] - vmovdqa LCPI0_3(%rip), %xmm1 ## xmm1 = [51,48] - vmovdqa LCPI0_4(%rip), %xmm2 ## xmm2 = [17,21] - vmovdqa LCPI0_5(%rip), %xmm3 ## xmm3 = [47,43] - .align 4, 0x90 -LBB0_3: ## %for.body - ## =>This Inner Loop Header: Depth=1 - vmovq (%rsi,%rcx), %xmm4 ## xmm4 = mem[0],zero - vpslldq $8, %xmm4, %xmm5 ## xmm5 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7] - vpxor %xmm5, %xmm7, %xmm5 - vpaddq %xmm6, %xmm5, %xmm6 - vpsllvq %xmm0, %xmm5, %xmm7 - vpsrlvq %xmm1, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm5 - vpshufd $30, %xmm6, %xmm6 ## xmm6 = xmm6[2,3,1,0] - vpaddq %xmm5, %xmm6, %xmm6 - vpsllvq %xmm2, %xmm5, %xmm7 - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm5 - vpshufd $30, %xmm6, %xmm6 ## xmm6 = xmm6[2,3,1,0] - vpaddq %xmm5, %xmm6, %xmm6 - vpsllvq %xmm0, %xmm5, %xmm7 - vpsrlvq %xmm1, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm5 - vpshufd $30, %xmm6, %xmm6 ## xmm6 = xmm6[2,3,1,0] - vpaddq %xmm5, %xmm6, %xmm6 - vpsllvq %xmm2, %xmm5, %xmm7 - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm7 - vpshufd $30, %xmm6, %xmm5 ## xmm5 = xmm6[2,3,1,0] - vpxor %xmm5, %xmm4, %xmm6 - addq $8, %rcx - cmpq %rax, %rcx - jb LBB0_3 -## BB#4: ## %for.end.loopexit - vmovdqa %xmm7, -48(%rbp) ## 16-byte Spill - vmovdqa %xmm6, -32(%rbp) ## 16-byte Spill - addq %rax, %rsi - jmp LBB0_5 -LBB0_1: - vmovdqa %xmm7, -48(%rbp) ## 16-byte Spill - vmovdqa %xmm6, -32(%rbp) ## 16-byte Spill - xorl %eax, %eax -LBB0_5: ## %for.end - movq $0, -16(%rbp) - movq %rbx, %rdx - subq %rax, %rdx - leaq -16(%rbp), %rdi - movq %rdx, %rcx - shrq $2, %rcx - rep; movsl - movq %rdx, %rcx - andq $3, %rcx - rep; movsb - movb %bl, -9(%rbp) - vmovq -16(%rbp), %xmm4 ## xmm4 = mem[0],zero - vpslldq $8, %xmm4, %xmm0 ## xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7] - vpxor -48(%rbp), %xmm0, %xmm2 ## 16-byte Folded Reload - vpaddq -32(%rbp), %xmm2, %xmm3 ## 16-byte Folded Reload - vmovdqa LCPI0_2(%rip), %xmm0 ## xmm0 = [13,16] - vpsllvq %xmm0, %xmm2, %xmm5 - vmovdqa LCPI0_3(%rip), %xmm1 ## xmm1 = [51,48] - vpsrlvq %xmm1, %xmm2, %xmm2 - vpor %xmm5, %xmm2, %xmm2 - vpxor %xmm3, %xmm2, %xmm5 - vpshufd $30, %xmm3, %xmm2 ## xmm2 = xmm3[2,3,1,0] - vpaddq %xmm5, %xmm2, %xmm6 - vmovdqa LCPI0_4(%rip), %xmm2 ## xmm2 = [17,21] - vpsllvq %xmm2, %xmm5, %xmm7 - vmovdqa LCPI0_5(%rip), %xmm3 ## xmm3 = [47,43] - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm5 - vpshufd $30, %xmm6, %xmm6 ## xmm6 = xmm6[2,3,1,0] - vpaddq %xmm5, %xmm6, %xmm6 - vpsllvq %xmm0, %xmm5, %xmm7 - vpsrlvq %xmm1, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm5 - vpshufd $30, %xmm6, %xmm6 ## xmm6 = xmm6[2,3,1,0] - vpaddq %xmm5, %xmm6, %xmm6 - vpsllvq %xmm2, %xmm5, %xmm7 - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm5 - vpshufd $30, %xmm6, %xmm6 ## xmm6 = xmm6[2,3,1,0] - movl $255, %eax - vmovq %rax, %xmm7 - vpslldq $8, %xmm7, %xmm7 ## xmm7 = zero,zero,zero,zero,zero,zero,zero,zero,xmm7[0,1,2,3,4,5,6,7] - vpxor %xmm7, %xmm4, %xmm4 - vpxor %xmm4, %xmm6, %xmm4 - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm0, %xmm5, %xmm6 - vpsrlvq %xmm1, %xmm5, %xmm5 - vpor %xmm6, %xmm5, %xmm5 - vpxor %xmm4, %xmm5, %xmm5 - vpshufd $30, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,1,0] - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm2, %xmm5, %xmm6 - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm6, %xmm5, %xmm5 - vpxor %xmm4, %xmm5, %xmm5 - vpshufd $30, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,1,0] - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm0, %xmm5, %xmm6 - vpsrlvq %xmm1, %xmm5, %xmm5 - vpor %xmm6, %xmm5, %xmm5 - vpxor %xmm4, %xmm5, %xmm5 - vpshufd $30, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,1,0] - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm2, %xmm5, %xmm6 - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm6, %xmm5, %xmm5 - vpxor %xmm4, %xmm5, %xmm5 - vpshufd $30, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,1,0] - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm0, %xmm5, %xmm6 - vpsrlvq %xmm1, %xmm5, %xmm5 - vpor %xmm6, %xmm5, %xmm5 - vpxor %xmm4, %xmm5, %xmm5 - vpshufd $30, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,1,0] - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm2, %xmm5, %xmm6 - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm6, %xmm5, %xmm5 - vpxor %xmm4, %xmm5, %xmm5 - vpshufd $30, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,1,0] - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm0, %xmm5, %xmm0 - vpsrlvq %xmm1, %xmm5, %xmm1 - vpor %xmm0, %xmm1, %xmm0 - vpxor %xmm4, %xmm0, %xmm0 - vpshufd $30, %xmm4, %xmm1 ## xmm1 = xmm4[2,3,1,0] - vpaddq %xmm0, %xmm1, %xmm1 - vpsllvq %xmm2, %xmm0, %xmm2 - vpsrlvq %xmm3, %xmm0, %xmm0 - vpor %xmm2, %xmm0, %xmm0 - vpshufd $30, %xmm1, %xmm2 ## xmm2 = xmm1[2,3,1,0] - vpxor %xmm2, %xmm1, %xmm1 - vpxor %xmm1, %xmm0, %xmm0 - vpshufd $78, %xmm0, %xmm1 ## xmm1 = xmm0[2,3,0,1] - vpxor %xmm1, %xmm0, %xmm0 - vmovq %xmm0, %rax - addq $40, %rsp - popq %rbx - popq %rbp - retq - .cfi_endproc -FN_END siphash_avx2 diff --git a/src/libcryptobox/siphash/constants.S b/src/libcryptobox/siphash/constants.S deleted file mode 100644 index 0fb27c75e..000000000 --- a/src/libcryptobox/siphash/constants.S +++ /dev/null @@ -1,43 +0,0 @@ -SECTION_RODATA - -.LC0: - .quad 8317987319222330741 - .quad 7237128888997146477 - .align 16 -.LC1: - .quad 7816392313619706465 - .quad 8387220255154660723 - .align 16 -.LC2: - .quad -1 - .quad 0 - .align 16 -.LC3: - .quad 0 - .quad 255 - .align 16 -/* For AVX 2 */ -LCPI0_0: - .quad 8317987319222330741 ## 0x736f6d6570736575 - .quad 7237128888997146477 ## 0x646f72616e646f6d - .align 16 -LCPI0_1: - .quad 7816392313619706465 ## 0x6c7967656e657261 - .quad 8387220255154660723 ## 0x7465646279746573 - .align 16 -LCPI0_2: - .quad 13 ## 0xd - .quad 16 ## 0x10 - .align 16 -LCPI0_3: - .quad 51 ## 0x33 - .quad 48 ## 0x30 - .align 16 -LCPI0_4: - .quad 17 ## 0x11 - .quad 21 ## 0x15 - .align 16 -LCPI0_5: - .quad 47 ## 0x2f - .quad 43 ## 0x2b - .align 16 diff --git a/src/libcryptobox/siphash/ref.c b/src/libcryptobox/siphash/ref.c deleted file mode 100644 index cbd244f5f..000000000 --- a/src/libcryptobox/siphash/ref.c +++ /dev/null @@ -1,144 +0,0 @@ -/* Copyright (c) 2015, Vsevolod Stakhov - * Copyright (c) 2012-2014 Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com> - * Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -/* default: SipHash-2-4 */ -#define cROUNDS 2 -#define dROUNDS 4 - -#define ROTL(x,b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) ) - -#if BYTE_ORDER != LITTLE_ENDIAN -#define U8TO64_LE(p) \ - (((uint64_t)((p)[0]) << 0) | \ - ((uint64_t)((p)[1]) << 8) | \ - ((uint64_t)((p)[2]) << 16) | \ - ((uint64_t)((p)[3]) << 24) | \ - ((uint64_t)((p)[4]) << 32) | \ - ((uint64_t)((p)[5]) << 40) | \ - ((uint64_t)((p)[6]) << 48) | \ - ((uint64_t)((p)[7]) << 56)) -#else -#define U8TO64_LE(p) (*(uint64_t*)(p)) -#endif - -#define SIPROUND \ - do { \ - v0 += v1; v1=ROTL(v1,13); v1 ^= v0; v0=ROTL(v0,32); \ - v2 += v3; v3=ROTL(v3,16); v3 ^= v2; \ - v0 += v3; v3=ROTL(v3,21); v3 ^= v0; \ - v2 += v1; v1=ROTL(v1,17); v1 ^= v2; v2=ROTL(v2,32); \ - } while(0) - - -uint64_t -siphash_ref (const unsigned char k[16], const unsigned char *in, const uint64_t inlen) -{ - /* "somepseudorandomlygeneratedbytes" */ - uint64_t v0 = 0x736f6d6570736575ULL; - uint64_t v1 = 0x646f72616e646f6dULL; - uint64_t v2 = 0x6c7967656e657261ULL; - uint64_t v3 = 0x7465646279746573ULL; - uint64_t b; - uint64_t k0 = U8TO64_LE(k); - uint64_t k1 = U8TO64_LE(k + 8); - uint64_t m; - int i; - const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t)); - const int left = inlen & 7; - b = ((uint64_t) inlen) << 56; - v3 ^= k1; - v2 ^= k0; - v1 ^= k1; - v0 ^= k0; - -#ifdef DOUBLE - v1 ^= 0xee; -#endif - - for (; in != end; in += 8) { - m = U8TO64_LE(in); - v3 ^= m; - - for (i = 0; i < cROUNDS; ++i) - SIPROUND - ; - - v0 ^= m; - } - - switch (left) { - case 7: - b |= ((uint64_t) in[6]) << 48; - case 6: - b |= ((uint64_t) in[5]) << 40; - case 5: - b |= ((uint64_t) in[4]) << 32; - case 4: - b |= ((uint64_t) in[3]) << 24; - case 3: - b |= ((uint64_t) in[2]) << 16; - case 2: - b |= ((uint64_t) in[1]) << 8; - case 1: - b |= ((uint64_t) in[0]); - break; - case 0: - break; - } - - v3 ^= b; - - for (i = 0; i < cROUNDS; ++i) - SIPROUND - ; - - v0 ^= b; - -#ifndef DOUBLE - v2 ^= 0xff; -#else - v2 ^= 0xee; -#endif - - for (i = 0; i < dROUNDS; ++i) - SIPROUND - ; - - b = v0 ^ v1 ^ v2 ^ v3; - return b; - -#ifdef DOUBLE - v1 ^= 0xdd; - - TRACE; - for( i=0; i<dROUNDS; ++i ) SIPROUND; - - b = v0 ^ v1 ^ v2 ^ v3; - - return b; -#endif -} diff --git a/src/libcryptobox/siphash/siphash.c b/src/libcryptobox/siphash/siphash.c deleted file mode 100644 index 0c91ff7ab..000000000 --- a/src/libcryptobox/siphash/siphash.c +++ /dev/null @@ -1,227 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "config.h" -#include "cryptobox.h" -#include "siphash.h" -#include "platform_config.h" -#include "ottery.h" -#include <stdbool.h> - -extern unsigned long cpu_config; - -typedef struct siphash_impl_t -{ - unsigned long cpu_flags; - const char *desc; - - uint64_t (*siphash) (const unsigned char k[16], const unsigned char *in, const uint64_t inlen); -} siphash_impl_t; - -#define SIPHASH_DECLARE(ext) \ - uint64_t siphash_##ext(const unsigned char k[16], const unsigned char *in, const uint64_t inlen); - -#define SIPHASH_IMPL(cpuflags, desc, ext) \ - {(cpuflags), desc, siphash_##ext} - - -SIPHASH_DECLARE(ref) -#define SIPHASH_GENERIC SIPHASH_IMPL(0, "generic", ref) -#if defined(HAVE_SSE41) && defined(__i386__) -SIPHASH_DECLARE(sse41) -#define SIPHASH_SSE41 SIPHASH_IMPL(CPUID_SSE41, "sse41", sse41) -#endif -#if defined(HAVE_AVX2) && defined(__x86_64__) -SIPHASH_DECLARE(avx2) -#define SIPHASH_AVX2 SIPHASH_IMPL(CPUID_AVX2, "avx2", avx2) -#endif - -/* list implementations from most optimized to least, with generic as the last entry */ -static const siphash_impl_t siphash_list[] = { - SIPHASH_GENERIC, -#if defined(SIPHASH_AVX2) - SIPHASH_AVX2, -#endif -#if defined(SIPHASH_SSE41) - SIPHASH_SSE41, -#endif -}; - -static const siphash_impl_t *siphash_opt = &siphash_list[0]; - -static bool -siphash_test_impl (const siphash_impl_t *impl) -{ - static const unsigned char vectors[64][8] = { - { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, }, - { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, }, - { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, }, - { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, }, - { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, }, - { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, }, - { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, }, - { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, }, - { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, }, - { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, }, - { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, }, - { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, }, - { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, }, - { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, }, - { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, }, - { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, }, - { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, }, - { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, }, - { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, }, - { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, }, - { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, }, - { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, }, - { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, }, - { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, }, - { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, }, - { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, }, - { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, }, - { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, }, - { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, }, - { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, }, - { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, }, - { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, }, - { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, }, - { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, }, - { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, }, - { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, }, - { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, }, - { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, }, - { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, }, - { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, }, - { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, }, - { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, }, - { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, }, - { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, }, - { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, }, - { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, }, - { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, }, - { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, }, - { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, }, - { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, }, - { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, }, - { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, }, - { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, }, - { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, }, - { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, }, - { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, }, - { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, }, - { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, }, - { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, }, - { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, }, - { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, }, - { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, }, - { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, }, - { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, } - }; - unsigned char in[64]; - static const unsigned char k[] ={ - '\000', '\001', '\002', '\003', '\004', - '\005', '\006', '\007', '\010', '\011', - '\012', '\013', '\014', '\015', '\016', '\017' - }; - size_t i; - union { - guint64 m; - guchar c[sizeof (guint64)]; - } r; - - for (i = 0; i < sizeof in; ++i) { - in[i] = i; - r.m = impl->siphash (k, in, i); - - if (memcmp (r.c, vectors[i], sizeof (r)) != 0) { - return false; - } - } - - return true; -} - -const char * -siphash_load(void) -{ - guint i; - - if (cpu_config != 0) { - for (i = 0; i < G_N_ELEMENTS(siphash_list); i++) { - if (siphash_list[i].cpu_flags & cpu_config) { - siphash_opt = &siphash_list[i]; - g_assert (siphash_test_impl (siphash_opt)); - break; - } - } - } - - return siphash_opt->desc; -} - -void siphash24 (unsigned char *out, const unsigned char *in, - unsigned long long inlen, const unsigned char *k) -{ - uint64_t r; - - r = siphash_opt->siphash (k, in, inlen); - memcpy (out, &r, sizeof (r)); -} - - -size_t -siphash24_test (bool generic, size_t niters, size_t len) -{ - size_t cycles; - guchar *in, k[16]; - const siphash_impl_t *impl; - - g_assert (len > 0); - in = g_malloc (len); - ottery_rand_bytes (k, sizeof (k)); - ottery_rand_bytes (in, len); - - impl = generic ? &siphash_list[0] : siphash_opt; - - for (cycles = 0; cycles < niters; cycles ++) { - impl->siphash (k, in, len); - } - - return cycles; -} - -bool -siphash24_fuzz (size_t cycles) -{ - size_t i, len; - guint64 t, r; - guchar in[8192], k[16]; - - for (i = 0; i < cycles; i ++) { - ottery_rand_bytes (k, sizeof (k)); - len = ottery_rand_range (sizeof (in) - 1); - ottery_rand_bytes (in, len); - - t = siphash_list[0].siphash (k, in, len); - r = siphash_opt->siphash (k, in, len); - - if (t != r) { - return false; - } - } - - return true; -} diff --git a/src/libcryptobox/siphash/siphash.h b/src/libcryptobox/siphash/siphash.h deleted file mode 100644 index c6ceb5fb3..000000000 --- a/src/libcryptobox/siphash/siphash.h +++ /dev/null @@ -1,34 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef SIPHASH_H_ -#define SIPHASH_H_ - -#include <stddef.h> - -#if defined(__cplusplus) -extern "C" -{ -#endif -const char* siphash_load (void); -void siphash24 (unsigned char *out, - const unsigned char *in, - unsigned long long inlen, - const unsigned char *k); -#if defined(__cplusplus) -} -#endif - -#endif /* SIPHASH_H_ */ diff --git a/src/libcryptobox/siphash/sse41.S b/src/libcryptobox/siphash/sse41.S deleted file mode 100644 index 92c15671a..000000000 --- a/src/libcryptobox/siphash/sse41.S +++ /dev/null @@ -1,188 +0,0 @@ -#include "../macro.S" -#include "constants.S" - -/* - * Generated by gcc-4.9 from siphash sse41 implementation written by - * Samuel Neves and submitted to supercop competition - */ - -SECTION_TEXT - -GLOBAL_HIDDEN_FN siphash_sse41 -siphash_sse41_local: - - movdqu (%rdi), %xmm1 - movq %rdx, %rcx - xorl %eax, %eax - andq $-8, %rcx - movdqa .LC0(%rip), %xmm4 - pxor %xmm1, %xmm4 - pxor .LC1(%rip), %xmm1 - movdqa %xmm4, %xmm0 - punpckhqdq %xmm1, %xmm4 - punpcklqdq %xmm1, %xmm0 - je .L2 - pxor %xmm3, %xmm3 - .p2align 4,,10 - .p2align 3 -.L3: - movdqa %xmm3, %xmm2 - movdqa %xmm3, %xmm1 - pinsrq $0, (%rsi,%rax), %xmm2 - addq $8, %rax - cmpq %rcx, %rax - punpcklqdq %xmm2, %xmm1 - pxor %xmm1, %xmm4 - movdqa %xmm4, %xmm1 - paddq %xmm4, %xmm0 - movdqa %xmm4, %xmm5 - pshufhw $147, %xmm4, %xmm4 - psrlq $51, %xmm1 - psllq $13, %xmm5 - pxor %xmm5, %xmm1 - pblendw $240, %xmm4, %xmm1 - pxor %xmm0, %xmm1 - pshufd $30, %xmm0, %xmm0 - movdqa %xmm1, %xmm4 - movdqa %xmm1, %xmm5 - paddq %xmm1, %xmm0 - psrlq $47, %xmm4 - psllq $17, %xmm5 - pxor %xmm5, %xmm4 - movdqa %xmm1, %xmm5 - psllq $21, %xmm1 - psrlq $43, %xmm5 - pxor %xmm5, %xmm1 - pblendw $240, %xmm1, %xmm4 - pxor %xmm0, %xmm4 - pshufd $30, %xmm0, %xmm0 - movdqa %xmm4, %xmm1 - movdqa %xmm4, %xmm5 - paddq %xmm4, %xmm0 - pshufhw $147, %xmm4, %xmm4 - psrlq $51, %xmm1 - psllq $13, %xmm5 - pxor %xmm5, %xmm1 - pblendw $240, %xmm4, %xmm1 - pxor %xmm0, %xmm1 - pshufd $30, %xmm0, %xmm0 - movdqa %xmm1, %xmm4 - movdqa %xmm1, %xmm5 - paddq %xmm1, %xmm0 - psrlq $47, %xmm4 - psllq $17, %xmm5 - pxor %xmm5, %xmm4 - movdqa %xmm1, %xmm5 - psllq $21, %xmm1 - psrlq $43, %xmm5 - pxor %xmm5, %xmm1 - pblendw $240, %xmm1, %xmm4 - pxor %xmm0, %xmm4 - pshufd $30, %xmm0, %xmm0 - pxor %xmm2, %xmm0 - jb .L3 -.L2: - movl %edx, %ecx - pxor %xmm1, %xmm1 - andl $7, %ecx - movdqa .LC2(%rip), %xmm2 - negl %ecx - sall $24, %edx - pinsrq $0, (%rsi,%rax), %xmm1 - leal 64(,%rcx,8), %edi - movl $4, %eax - movl %edi, -24(%rsp) - movd -24(%rsp), %xmm7 - psrlq %xmm7, %xmm2 - pand %xmm1, %xmm2 - pxor %xmm1, %xmm1 - pinsrd $1, %edx, %xmm1 - pxor %xmm1, %xmm2 - pxor %xmm1, %xmm1 - punpcklqdq %xmm2, %xmm1 - pxor %xmm1, %xmm4 - movdqa %xmm4, %xmm1 - paddq %xmm4, %xmm0 - movdqa %xmm4, %xmm3 - pshufhw $147, %xmm4, %xmm4 - psrlq $51, %xmm1 - psllq $13, %xmm3 - pxor %xmm3, %xmm1 - pshufd $30, %xmm0, %xmm3 - pblendw $240, %xmm4, %xmm1 - pxor %xmm0, %xmm1 - movdqa %xmm1, %xmm0 - paddq %xmm1, %xmm3 - movdqa %xmm1, %xmm4 - psrlq $47, %xmm0 - psllq $17, %xmm4 - pxor %xmm4, %xmm0 - movdqa %xmm1, %xmm4 - psllq $21, %xmm1 - psrlq $43, %xmm4 - pxor %xmm4, %xmm1 - movdqa %xmm0, %xmm6 - pblendw $240, %xmm1, %xmm6 - movdqa %xmm6, %xmm1 - pxor %xmm3, %xmm1 - pshufd $30, %xmm3, %xmm3 - movdqa %xmm1, %xmm0 - movdqa %xmm1, %xmm4 - paddq %xmm1, %xmm3 - pshufhw $147, %xmm1, %xmm1 - psrlq $51, %xmm0 - psllq $13, %xmm4 - pxor %xmm0, %xmm4 - pblendw $240, %xmm1, %xmm4 - pxor %xmm3, %xmm4 - pshufd $30, %xmm3, %xmm3 - movdqa %xmm4, %xmm1 - movdqa %xmm4, %xmm0 - paddq %xmm4, %xmm3 - psrlq $47, %xmm1 - psllq $17, %xmm0 - pxor %xmm1, %xmm0 - movdqa %xmm4, %xmm1 - psllq $21, %xmm4 - psrlq $43, %xmm1 - pxor %xmm1, %xmm4 - pblendw $240, %xmm4, %xmm0 - pxor %xmm3, %xmm0 - pshufd $30, %xmm3, %xmm3 - pxor %xmm3, %xmm2 - pxor .LC3(%rip), %xmm2 -.L4: - movdqa %xmm0, %xmm1 - paddq %xmm0, %xmm2 - subq $1, %rax - movdqa %xmm0, %xmm3 - pshufhw $147, %xmm0, %xmm0 - psrlq $51, %xmm1 - psllq $13, %xmm3 - pxor %xmm3, %xmm1 - pblendw $240, %xmm0, %xmm1 - pxor %xmm2, %xmm1 - pshufd $30, %xmm2, %xmm2 - movdqa %xmm1, %xmm0 - movdqa %xmm1, %xmm3 - paddq %xmm1, %xmm2 - psrlq $47, %xmm0 - psllq $17, %xmm3 - pxor %xmm3, %xmm0 - movdqa %xmm1, %xmm3 - psllq $21, %xmm1 - psrlq $43, %xmm3 - pxor %xmm3, %xmm1 - pblendw $240, %xmm1, %xmm0 - pxor %xmm2, %xmm0 - pshufd $30, %xmm2, %xmm2 - jne .L4 - pxor %xmm2, %xmm0 - pxor %xmm1, %xmm1 - movhlps %xmm0, %xmm1 - pxor %xmm1, %xmm0 - movaps %xmm0, -24(%rsp) - movq -24(%rsp), %rax - ret - -FN_END siphash_sse41 |