diff options
Diffstat (limited to 'src/libcryptobox/blake2')
-rw-r--r-- | src/libcryptobox/blake2/avx.S | 689 | ||||
-rw-r--r-- | src/libcryptobox/blake2/blake2-internal.h | 29 | ||||
-rw-r--r-- | src/libcryptobox/blake2/blake2.c | 297 | ||||
-rw-r--r-- | src/libcryptobox/blake2/blake2.h | 65 | ||||
-rw-r--r-- | src/libcryptobox/blake2/constants.S | 30 | ||||
-rw-r--r-- | src/libcryptobox/blake2/ref.c | 185 | ||||
-rw-r--r-- | src/libcryptobox/blake2/x86-32.S | 1080 | ||||
-rw-r--r-- | src/libcryptobox/blake2/x86-64.S | 1754 |
8 files changed, 0 insertions, 4129 deletions
diff --git a/src/libcryptobox/blake2/avx.S b/src/libcryptobox/blake2/avx.S deleted file mode 100644 index e569f0ba7..000000000 --- a/src/libcryptobox/blake2/avx.S +++ /dev/null @@ -1,689 +0,0 @@ -#include "../macro.S" -#include "constants.S" - -SECTION_TEXT - -GLOBAL_HIDDEN_FN_EXT blake2b_blocks_avx, 4, 16 -pushq %rbp -movq %rsp, %rbp -andq $-64, %rsp -pushq %r12 -pushq %r13 -pushq %r14 -pushq %r15 -pushq %rbx -subq $344, %rsp -LOAD_VAR_PIC 48+blake2b_constants, %r9 -LOAD_VAR_PIC blake2b_constants_ssse3, %rax -leaq 16(%rax), %r8 -vmovdqu 80(%rdi), %xmm0 -cmpq $128, %rdx -vpxor (%r9), %xmm0, %xmm0 -movl $128, %r9d -vmovdqu (%rax), %xmm12 -cmovbe %rdx, %r9 -vmovdqu (%r8), %xmm1 -movq 64(%rdi), %r8 -movq 72(%rdi), %rax -cmpq $0, 80(%rdi) -je blake2b_blocks_avx_L21 -blake2b_blocks_avx_L2: -cmpq $128, %rdx -je blake2b_blocks_avx_L21 -blake2b_blocks_avx_L3: -lea (%rsp), %r10 -testq $64, %rdx -je blake2b_blocks_avx_L5 -blake2b_blocks_avx_L4: -vmovdqu (%rsi), %xmm2 -vmovdqu %xmm2, (%rsp) -lea 64(%rsp), %r10 -vmovdqu 16(%rsi), %xmm3 -vmovdqu %xmm3, 16(%rsp) -vpxor %xmm2, %xmm2, %xmm2 -vmovdqu 32(%rsi), %xmm4 -vmovdqu %xmm4, 32(%rsp) -vmovdqu 48(%rsi), %xmm5 -vmovdqu %xmm5, 48(%rsp) -addq $64, %rsi -jmp blake2b_blocks_avx_L6 -blake2b_blocks_avx_L5: -vpxor %xmm2, %xmm2, %xmm2 -vmovdqu %xmm2, 64(%rsp) -vmovdqu %xmm2, 80(%rsp) -vmovdqu %xmm2, 96(%rsp) -vmovdqu %xmm2, 112(%rsp) -blake2b_blocks_avx_L6: -vmovdqu %xmm2, (%r10) -vmovdqu %xmm2, 16(%r10) -vmovdqu %xmm2, 32(%r10) -vmovdqu %xmm2, 48(%r10) -testq $32, %rdx -je blake2b_blocks_avx_L8 -blake2b_blocks_avx_L7: -vmovdqu (%rsi), %xmm2 -vmovdqu %xmm2, (%r10) -vmovdqu 16(%rsi), %xmm3 -vmovdqu %xmm3, 16(%r10) -addq $32, %rsi -addq $32, %r10 -blake2b_blocks_avx_L8: -testq $16, %rdx -je blake2b_blocks_avx_L10 -blake2b_blocks_avx_L9: -vmovdqu (%rsi), %xmm2 -vmovdqu %xmm2, (%r10) -addq $16, %rsi -addq $16, %r10 -blake2b_blocks_avx_L10: -testq $8, %rdx -je blake2b_blocks_avx_L12 -blake2b_blocks_avx_L11: -movq (%rsi), %r11 -addq $8, %rsi -movq %r11, (%r10) -addq $8, %r10 -blake2b_blocks_avx_L12: -testq $4, %rdx -je blake2b_blocks_avx_L14 -blake2b_blocks_avx_L13: -movl (%rsi), %r11d -addq $4, %rsi -movl %r11d, (%r10) -addq $4, %r10 -blake2b_blocks_avx_L14: -testq $2, %rdx -je blake2b_blocks_avx_L16 -blake2b_blocks_avx_L15: -movzwl (%rsi), %r11d -addq $2, %rsi -movw %r11w, (%r10) -addq $2, %r10 -blake2b_blocks_avx_L16: -testq $1, %rdx -je blake2b_blocks_avx_L18 -blake2b_blocks_avx_L17: -movb (%rsi), %sil -movb %sil, (%r10) -blake2b_blocks_avx_L18: -lea (%rsp), %rsi -blake2b_blocks_avx_L21: -LOAD_VAR_PIC 32+blake2b_constants, %r10 -LOAD_VAR_PIC blake2b_constants, %r11 -vmovdqu (%rdi), %xmm5 -vmovdqu 16(%rdi), %xmm6 -vmovdqu 32(%rdi), %xmm7 -vmovdqu (%r10), %xmm4 -LOAD_VAR_PIC 16+blake2b_constants, %r10 -vmovdqu 48(%rdi), %xmm8 -vmovdqu (%r11), %xmm3 -vmovdqu %xmm3, 176(%rsp) -vmovdqu (%r10), %xmm2 -vmovdqu %xmm2, 160(%rsp) -vmovdqu %xmm4, 144(%rsp) -vmovdqu %xmm8, 240(%rsp) -vmovdqu %xmm7, 256(%rsp) -vmovdqu %xmm6, 224(%rsp) -vmovdqu %xmm5, 208(%rsp) -vmovdqu %xmm0, 192(%rsp) -movq %r9, 272(%rsp) -movq %rdi, 128(%rsp) -movq %rcx, 136(%rsp) -jmp blake2b_blocks_avx_L22 -# align to 31 mod 64 -.p2align 6 -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -nop -blake2b_blocks_avx_L25: -addq 136(%rsp), %rsi -addq $-128, %rdx -blake2b_blocks_avx_L22: -movq 272(%rsp), %rcx -addq %rcx, %r8 -cmpq %rcx, %r8 -lea 1(%rax), %rbx -vmovdqu (%rsi), %xmm15 -vmovdqu 16(%rsi), %xmm5 -vmovdqu 32(%rsi), %xmm3 -vmovdqu 48(%rsi), %xmm6 -cmovb %rbx, %rax -vmovd %r8, %xmm7 -vpunpcklqdq %xmm5, %xmm15, %xmm2 -LOAD_VAR_PIC 96+blake2b_constants, %rcx -vpunpcklqdq %xmm6, %xmm3, %xmm8 -LOAD_VAR_PIC 224+blake2b_constants, %rbx -vpaddq 208(%rsp), %xmm2, %xmm0 -vpaddq 224(%rsp), %xmm8, %xmm10 -vmovd %rax, %xmm14 -vmovdqu 256(%rsp), %xmm4 -vmovdqu 240(%rsp), %xmm11 -vpunpcklqdq %xmm14, %xmm7, %xmm9 -vpaddq %xmm4, %xmm0, %xmm13 -vpaddq %xmm11, %xmm10, %xmm2 -vpxor 144(%rsp), %xmm9, %xmm0 -vpxor 192(%rsp), %xmm2, %xmm10 -vpxor %xmm13, %xmm0, %xmm8 -vpshufd $177, %xmm8, %xmm8 -vpshufd $177, %xmm10, %xmm7 -vpaddq 176(%rsp), %xmm8, %xmm14 -vpaddq 160(%rsp), %xmm7, %xmm9 -vpxor %xmm14, %xmm4, %xmm4 -vpxor %xmm9, %xmm11, %xmm11 -vpshufb %xmm1, %xmm4, %xmm4 -vpshufb %xmm1, %xmm11, %xmm0 -vpunpckhqdq %xmm5, %xmm15, %xmm15 -vpunpckhqdq %xmm6, %xmm3, %xmm6 -vpaddq %xmm15, %xmm13, %xmm13 -vpaddq %xmm6, %xmm2, %xmm6 -vpaddq %xmm4, %xmm13, %xmm10 -vpaddq %xmm0, %xmm6, %xmm15 -vpxor %xmm10, %xmm8, %xmm2 -vpxor %xmm15, %xmm7, %xmm8 -vpshufb %xmm12, %xmm2, %xmm5 -vpshufb %xmm12, %xmm8, %xmm2 -vpaddq %xmm5, %xmm14, %xmm6 -vpaddq %xmm2, %xmm9, %xmm7 -vpxor %xmm6, %xmm4, %xmm4 -vpxor %xmm7, %xmm0, %xmm9 -vpaddq %xmm4, %xmm4, %xmm14 -vpaddq %xmm9, %xmm9, %xmm13 -vpsrlq $63, %xmm4, %xmm0 -vpsrlq $63, %xmm9, %xmm11 -vpor %xmm14, %xmm0, %xmm8 -vpor %xmm13, %xmm11, %xmm4 -vpalignr $8, %xmm8, %xmm4, %xmm0 -vpalignr $8, %xmm4, %xmm8, %xmm14 -vmovdqu 64(%rsi), %xmm9 -vmovdqu 80(%rsi), %xmm8 -vmovdqu 96(%rsi), %xmm4 -vpunpcklqdq %xmm8, %xmm9, %xmm11 -vpaddq %xmm11, %xmm10, %xmm10 -vmovdqu 112(%rsi), %xmm11 -vpaddq %xmm0, %xmm10, %xmm13 -vpunpcklqdq %xmm11, %xmm4, %xmm10 -vpaddq %xmm10, %xmm15, %xmm15 -vpaddq %xmm14, %xmm15, %xmm15 -vpalignr $8, %xmm2, %xmm5, %xmm10 -vpalignr $8, %xmm5, %xmm2, %xmm5 -vpxor %xmm13, %xmm10, %xmm10 -vpxor %xmm15, %xmm5, %xmm2 -vpshufd $177, %xmm10, %xmm10 -vpshufd $177, %xmm2, %xmm2 -vpaddq %xmm10, %xmm7, %xmm7 -vpaddq %xmm2, %xmm6, %xmm5 -vpxor %xmm7, %xmm0, %xmm6 -vpxor %xmm5, %xmm14, %xmm14 -vpshufb %xmm1, %xmm6, %xmm0 -vpshufb %xmm1, %xmm14, %xmm6 -vpunpckhqdq %xmm8, %xmm9, %xmm14 -vpaddq %xmm14, %xmm13, %xmm13 -vpaddq %xmm0, %xmm13, %xmm14 -vpunpckhqdq %xmm11, %xmm4, %xmm13 -vpxor %xmm14, %xmm10, %xmm10 -vpaddq %xmm13, %xmm15, %xmm15 -vpshufb %xmm12, %xmm10, %xmm13 -vpaddq %xmm6, %xmm15, %xmm15 -vpaddq %xmm13, %xmm7, %xmm10 -vpxor %xmm15, %xmm2, %xmm2 -vpxor %xmm10, %xmm0, %xmm0 -vpshufb %xmm12, %xmm2, %xmm2 -vpaddq %xmm2, %xmm5, %xmm5 -vpxor %xmm5, %xmm6, %xmm7 -vpsrlq $63, %xmm0, %xmm6 -vpaddq %xmm0, %xmm0, %xmm0 -vpor %xmm0, %xmm6, %xmm6 -vpsrlq $63, %xmm7, %xmm0 -vpaddq %xmm7, %xmm7, %xmm7 -vpor %xmm7, %xmm0, %xmm0 -vpalignr $8, %xmm0, %xmm6, %xmm7 -vpalignr $8, %xmm6, %xmm0, %xmm6 -vpunpcklqdq %xmm3, %xmm11, %xmm0 -vpaddq %xmm0, %xmm14, %xmm14 -vpaddq %xmm7, %xmm14, %xmm0 -vpunpckhqdq %xmm4, %xmm9, %xmm14 -vpaddq %xmm14, %xmm15, %xmm15 -vpaddq %xmm6, %xmm15, %xmm14 -vpalignr $8, %xmm13, %xmm2, %xmm15 -vpxor %xmm0, %xmm15, %xmm15 -vpshufd $177, %xmm15, %xmm15 -vpalignr $8, %xmm2, %xmm13, %xmm2 -vpxor %xmm14, %xmm2, %xmm13 -vpaddq %xmm15, %xmm5, %xmm2 -vpshufd $177, %xmm13, %xmm13 -vpxor %xmm2, %xmm7, %xmm5 -vpunpcklqdq %xmm9, %xmm8, %xmm7 -vpaddq %xmm13, %xmm10, %xmm10 -vpaddq %xmm7, %xmm0, %xmm9 -vmovdqu 48(%rsi), %xmm0 -vpshufb %xmm1, %xmm5, %xmm5 -vpxor %xmm10, %xmm6, %xmm6 -vpshufb %xmm1, %xmm6, %xmm6 -vpaddq %xmm5, %xmm9, %xmm9 -vpalignr $8, %xmm11, %xmm0, %xmm11 -vpxor %xmm9, %xmm15, %xmm15 -vpaddq %xmm11, %xmm14, %xmm7 -vpshufb %xmm12, %xmm15, %xmm11 -vpaddq %xmm6, %xmm7, %xmm14 -vpaddq %xmm11, %xmm2, %xmm2 -vpxor %xmm14, %xmm13, %xmm13 -vpxor %xmm2, %xmm5, %xmm5 -vpshufb %xmm12, %xmm13, %xmm13 -vpaddq %xmm13, %xmm10, %xmm10 -vpxor %xmm10, %xmm6, %xmm15 -vpsrlq $63, %xmm5, %xmm6 -vpaddq %xmm5, %xmm5, %xmm5 -vpsrlq $63, %xmm15, %xmm7 -vpor %xmm5, %xmm6, %xmm6 -vpaddq %xmm15, %xmm15, %xmm15 -vpor %xmm15, %xmm7, %xmm5 -vpalignr $8, %xmm6, %xmm5, %xmm15 -vpalignr $8, %xmm5, %xmm6, %xmm5 -vpshufd $78, (%rsi), %xmm6 -vpaddq %xmm6, %xmm9, %xmm9 -vpunpckhqdq %xmm3, %xmm8, %xmm3 -vpaddq %xmm3, %xmm14, %xmm6 -vpaddq %xmm15, %xmm9, %xmm9 -vpaddq %xmm5, %xmm6, %xmm8 -vpalignr $8, %xmm13, %xmm11, %xmm3 -vpalignr $8, %xmm11, %xmm13, %xmm11 -vpxor %xmm9, %xmm3, %xmm7 -vpshufd $177, %xmm7, %xmm14 -vpxor %xmm8, %xmm11, %xmm13 -vpshufd $177, %xmm13, %xmm3 -vpaddq %xmm14, %xmm10, %xmm6 -vpaddq %xmm3, %xmm2, %xmm10 -vpxor %xmm6, %xmm15, %xmm2 -vmovdqu 16(%rsi), %xmm15 -vpshufb %xmm1, %xmm2, %xmm7 -vpxor %xmm10, %xmm5, %xmm2 -vpshufb %xmm1, %xmm2, %xmm5 -vpunpcklqdq %xmm15, %xmm4, %xmm4 -vpunpckhqdq %xmm15, %xmm0, %xmm0 -vpaddq %xmm4, %xmm9, %xmm2 -vpaddq %xmm0, %xmm8, %xmm8 -vpaddq %xmm7, %xmm2, %xmm2 -vpaddq %xmm5, %xmm8, %xmm0 -vpxor %xmm2, %xmm14, %xmm15 -vpxor %xmm0, %xmm3, %xmm9 -vpshufb %xmm12, %xmm15, %xmm15 -vpshufb %xmm12, %xmm9, %xmm3 -vpaddq %xmm15, %xmm6, %xmm8 -vpaddq %xmm3, %xmm10, %xmm6 -vpxor %xmm8, %xmm7, %xmm10 -vpxor %xmm6, %xmm5, %xmm5 -vpaddq %xmm5, %xmm5, %xmm9 -vpsrlq $63, %xmm10, %xmm4 -vpsrlq $63, %xmm5, %xmm7 -vpaddq %xmm10, %xmm10, %xmm10 -vpor %xmm10, %xmm4, %xmm13 -vpor %xmm9, %xmm7, %xmm11 -vpalignr $8, %xmm11, %xmm13, %xmm4 -vpalignr $8, %xmm13, %xmm11, %xmm7 -vpalignr $8, %xmm15, %xmm3, %xmm9 -vpalignr $8, %xmm3, %xmm15, %xmm10 -blake2b_blocks_avx_L23: -movzbl (%rcx), %edi -movzbl 2(%rcx), %r9d -movzbl 4(%rcx), %r10d -movzbl 6(%rcx), %r11d -vmovq (%rdi,%rsi), %xmm5 -vpinsrq $1, (%r9,%rsi), %xmm5, %xmm14 -vmovq (%r10,%rsi), %xmm3 -vpinsrq $1, (%r11,%rsi), %xmm3, %xmm15 -vpaddq %xmm14, %xmm2, %xmm2 -vpaddq %xmm15, %xmm0, %xmm0 -vpaddq %xmm4, %xmm2, %xmm2 -vpaddq %xmm7, %xmm0, %xmm0 -vpxor %xmm2, %xmm9, %xmm11 -vpxor %xmm0, %xmm10, %xmm10 -vpshufd $177, %xmm11, %xmm3 -movzbl 1(%rcx), %r12d -movzbl 5(%rcx), %r14d -vpshufd $177, %xmm10, %xmm5 -vpaddq %xmm3, %xmm6, %xmm6 -vpaddq %xmm5, %xmm8, %xmm9 -movzbl 3(%rcx), %r13d -vpxor %xmm6, %xmm4, %xmm14 -movzbl 7(%rcx), %r15d -vpxor %xmm9, %xmm7, %xmm15 -vmovq (%r12,%rsi), %xmm4 -vmovq (%r14,%rsi), %xmm11 -vpinsrq $1, (%r13,%rsi), %xmm4, %xmm7 -vpinsrq $1, (%r15,%rsi), %xmm11, %xmm13 -vpshufb %xmm1, %xmm14, %xmm8 -vpshufb %xmm1, %xmm15, %xmm14 -vpaddq %xmm7, %xmm2, %xmm2 -vpaddq %xmm13, %xmm0, %xmm0 -vpaddq %xmm8, %xmm2, %xmm4 -vpaddq %xmm14, %xmm0, %xmm7 -vpxor %xmm4, %xmm3, %xmm10 -vpxor %xmm7, %xmm5, %xmm3 -vpshufb %xmm12, %xmm10, %xmm11 -vpshufb %xmm12, %xmm3, %xmm10 -vpaddq %xmm11, %xmm6, %xmm13 -vpaddq %xmm10, %xmm9, %xmm9 -movzbl 8(%rcx), %edi -vpxor %xmm13, %xmm8, %xmm8 -movzbl 12(%rcx), %r10d -vpxor %xmm9, %xmm14, %xmm2 -movzbl 10(%rcx), %r9d -vpsrlq $63, %xmm8, %xmm6 -movzbl 14(%rcx), %r11d -vpsrlq $63, %xmm2, %xmm0 -vpaddq %xmm8, %xmm8, %xmm5 -vpaddq %xmm2, %xmm2, %xmm14 -vmovq (%rdi,%rsi), %xmm15 -vpor %xmm5, %xmm6, %xmm8 -vmovq (%r10,%rsi), %xmm3 -vpor %xmm14, %xmm0, %xmm6 -vpinsrq $1, (%r9,%rsi), %xmm15, %xmm5 -vpinsrq $1, (%r11,%rsi), %xmm3, %xmm0 -vpalignr $8, %xmm8, %xmm6, %xmm2 -vpalignr $8, %xmm6, %xmm8, %xmm14 -vpalignr $8, %xmm10, %xmm11, %xmm8 -vpalignr $8, %xmm11, %xmm10, %xmm11 -vpaddq %xmm5, %xmm4, %xmm4 -vpaddq %xmm0, %xmm7, %xmm7 -vpaddq %xmm2, %xmm4, %xmm15 -vpaddq %xmm14, %xmm7, %xmm0 -vpxor %xmm15, %xmm8, %xmm6 -vpxor %xmm0, %xmm11, %xmm10 -vpshufd $177, %xmm6, %xmm6 -vpshufd $177, %xmm10, %xmm8 -movzbl 9(%rcx), %r12d -movzbl 13(%rcx), %r14d -vpaddq %xmm6, %xmm9, %xmm4 -vpaddq %xmm8, %xmm13, %xmm7 -movzbl 11(%rcx), %r13d -vpxor %xmm4, %xmm2, %xmm9 -movzbl 15(%rcx), %r15d -vpxor %xmm7, %xmm14, %xmm2 -vmovq (%r12,%rsi), %xmm14 -addq $16, %rcx -vmovq (%r14,%rsi), %xmm3 -vpshufb %xmm1, %xmm9, %xmm13 -vpinsrq $1, (%r13,%rsi), %xmm14, %xmm5 -vpinsrq $1, (%r15,%rsi), %xmm3, %xmm9 -vpshufb %xmm1, %xmm2, %xmm11 -vpaddq %xmm5, %xmm15, %xmm15 -vpaddq %xmm9, %xmm0, %xmm0 -vpaddq %xmm13, %xmm15, %xmm2 -vpaddq %xmm11, %xmm0, %xmm0 -vpxor %xmm2, %xmm6, %xmm6 -vpxor %xmm0, %xmm8, %xmm8 -vpshufb %xmm12, %xmm6, %xmm14 -vpshufb %xmm12, %xmm8, %xmm15 -vpaddq %xmm14, %xmm4, %xmm8 -vpaddq %xmm15, %xmm7, %xmm6 -vpxor %xmm8, %xmm13, %xmm4 -vpxor %xmm6, %xmm11, %xmm11 -vpaddq %xmm4, %xmm4, %xmm10 -vpsrlq $63, %xmm4, %xmm7 -vpsrlq $63, %xmm11, %xmm13 -vpaddq %xmm11, %xmm11, %xmm4 -vpor %xmm10, %xmm7, %xmm3 -vpor %xmm4, %xmm13, %xmm11 -vpalignr $8, %xmm11, %xmm3, %xmm4 -vpalignr $8, %xmm3, %xmm11, %xmm7 -vpalignr $8, %xmm15, %xmm14, %xmm10 -vpalignr $8, %xmm14, %xmm15, %xmm9 -cmpq %rbx, %rcx -jb blake2b_blocks_avx_L23 -blake2b_blocks_avx_L24: -movq 32(%rsi), %r13 -movq (%rsi), %r10 -movq 48(%rsi), %r9 -vmovd %r13, %xmm13 -vpinsrq $1, %r9, %xmm13, %xmm14 -vmovd %r10, %xmm3 -movq 16(%rsi), %rbx -vpinsrq $1, %rbx, %xmm3, %xmm15 -vpaddq %xmm14, %xmm0, %xmm0 -vpaddq %xmm7, %xmm0, %xmm3 -vpxor %xmm3, %xmm10, %xmm10 -vpaddq %xmm15, %xmm2, %xmm2 -vpaddq %xmm4, %xmm2, %xmm5 -vpshufd $177, %xmm10, %xmm15 -vpxor %xmm5, %xmm9, %xmm9 -vpshufd $177, %xmm9, %xmm9 -vpaddq %xmm15, %xmm8, %xmm14 -vpaddq %xmm9, %xmm6, %xmm0 -vpxor %xmm14, %xmm7, %xmm7 -vpxor %xmm0, %xmm4, %xmm8 -vpshufb %xmm1, %xmm7, %xmm4 -vpshufb %xmm1, %xmm8, %xmm2 -vmovq 8(%rsi), %xmm7 -movq %r8, 288(%rsp) -movq 24(%rsi), %r8 -vpinsrq $1, %r8, %xmm7, %xmm6 -vpinsrq $1, %r10, %xmm7, %xmm7 -vpaddq %xmm6, %xmm5, %xmm13 -movq 40(%rsi), %rcx -movq 56(%rsi), %rdi -vpaddq %xmm2, %xmm13, %xmm13 -vmovd %rcx, %xmm5 -vpxor %xmm13, %xmm9, %xmm9 -vpinsrq $1, %rdi, %xmm5, %xmm10 -vpshufb %xmm12, %xmm9, %xmm5 -vpaddq %xmm10, %xmm3, %xmm3 -vpaddq %xmm4, %xmm3, %xmm11 -vpaddq %xmm5, %xmm0, %xmm3 -vpxor %xmm11, %xmm15, %xmm8 -vpshufb %xmm12, %xmm8, %xmm10 -vpaddq %xmm10, %xmm14, %xmm8 -vpxor %xmm3, %xmm2, %xmm14 -vpxor %xmm8, %xmm4, %xmm9 -vpsrlq $63, %xmm14, %xmm4 -vpsrlq $63, %xmm9, %xmm0 -vpaddq %xmm14, %xmm14, %xmm14 -movq 64(%rsi), %r15 -vpor %xmm14, %xmm4, %xmm6 -vpaddq %xmm9, %xmm9, %xmm4 -vmovq 96(%rsi), %xmm9 -vpor %xmm4, %xmm0, %xmm2 -movq 112(%rsi), %r14 -vmovd %r15, %xmm15 -vpinsrq $1, %r14, %xmm9, %xmm0 -vpinsrq $1, %rbx, %xmm9, %xmm9 -vpalignr $8, %xmm6, %xmm2, %xmm4 -vpalignr $8, %xmm2, %xmm6, %xmm2 -vpaddq %xmm0, %xmm11, %xmm11 -movq 80(%rsi), %r11 -vpinsrq $1, %r11, %xmm15, %xmm14 -vpaddq %xmm2, %xmm11, %xmm11 -vpalignr $8, %xmm10, %xmm5, %xmm15 -vpalignr $8, %xmm5, %xmm10, %xmm5 -vpxor %xmm11, %xmm5, %xmm10 -vpaddq %xmm14, %xmm13, %xmm13 -vpaddq %xmm4, %xmm13, %xmm6 -vpshufd $177, %xmm10, %xmm14 -vpxor %xmm6, %xmm15, %xmm13 -vpaddq %xmm14, %xmm3, %xmm0 -vpshufd $177, %xmm13, %xmm13 -vpaddq %xmm13, %xmm8, %xmm15 -vpxor %xmm0, %xmm2, %xmm8 -vpxor %xmm15, %xmm4, %xmm3 -vpshufb %xmm1, %xmm8, %xmm5 -vpshufb %xmm1, %xmm3, %xmm4 -vmovq 72(%rsi), %xmm8 -movq %rax, 296(%rsp) -movq 88(%rsi), %rax -vpinsrq $1, %rax, %xmm8, %xmm2 -movq 104(%rsi), %r12 -vpaddq %xmm2, %xmm6, %xmm6 -vpinsrq $1, %r12, %xmm8, %xmm8 -vmovd %r12, %xmm3 -vpaddq %xmm4, %xmm6, %xmm10 -vpxor %xmm10, %xmm13, %xmm13 -movq %rsi, 280(%rsp) -movq 120(%rsi), %rsi -vpinsrq $1, %rsi, %xmm3, %xmm6 -vpshufb %xmm12, %xmm13, %xmm3 -vpaddq %xmm6, %xmm11, %xmm11 -vpaddq %xmm5, %xmm11, %xmm6 -vpxor %xmm6, %xmm14, %xmm14 -vpshufb %xmm12, %xmm14, %xmm2 -vpaddq %xmm3, %xmm15, %xmm14 -vpaddq %xmm2, %xmm0, %xmm0 -vpaddq %xmm8, %xmm6, %xmm6 -vpxor %xmm14, %xmm4, %xmm4 -vpxor %xmm0, %xmm5, %xmm13 -vpsrlq $63, %xmm4, %xmm5 -vpsrlq $63, %xmm13, %xmm15 -vpaddq %xmm4, %xmm4, %xmm4 -vpaddq %xmm13, %xmm13, %xmm13 -vpor %xmm4, %xmm5, %xmm11 -vpor %xmm13, %xmm15, %xmm5 -vpalignr $8, %xmm5, %xmm11, %xmm15 -vmovd %r11, %xmm4 -vpalignr $8, %xmm11, %xmm5, %xmm5 -vmovd %r14, %xmm11 -vpinsrq $1, %r13, %xmm11, %xmm13 -vpinsrq $1, %r15, %xmm4, %xmm11 -vpaddq %xmm5, %xmm6, %xmm6 -vpaddq %xmm13, %xmm10, %xmm10 -vpaddq %xmm15, %xmm10, %xmm10 -vpalignr $8, %xmm3, %xmm2, %xmm13 -vpxor %xmm10, %xmm13, %xmm8 -vmovd %rsi, %xmm13 -vpshufd $177, %xmm8, %xmm8 -vpalignr $8, %xmm2, %xmm3, %xmm3 -vpxor %xmm6, %xmm3, %xmm2 -vpaddq %xmm8, %xmm0, %xmm3 -vpaddq %xmm11, %xmm10, %xmm10 -vpxor %xmm3, %xmm15, %xmm0 -vpshufd $177, %xmm2, %xmm2 -vpshufb %xmm1, %xmm0, %xmm0 -vpaddq %xmm2, %xmm14, %xmm14 -vpxor %xmm14, %xmm5, %xmm5 -vpshufb %xmm1, %xmm5, %xmm15 -vpaddq %xmm0, %xmm10, %xmm5 -vpinsrq $1, %r9, %xmm13, %xmm10 -vpaddq %xmm10, %xmm6, %xmm6 -vpaddq %xmm15, %xmm6, %xmm13 -vpxor %xmm5, %xmm8, %xmm10 -vpxor %xmm13, %xmm2, %xmm8 -vpshufb %xmm12, %xmm10, %xmm4 -vpshufb %xmm12, %xmm8, %xmm6 -vpaddq %xmm4, %xmm3, %xmm8 -vpaddq %xmm6, %xmm14, %xmm2 -vpxor %xmm8, %xmm0, %xmm14 -vpxor %xmm2, %xmm15, %xmm15 -vpaddq %xmm14, %xmm14, %xmm0 -vpsrlq $63, %xmm14, %xmm3 -vpsrlq $63, %xmm15, %xmm14 -vpor %xmm0, %xmm3, %xmm10 -vpaddq %xmm15, %xmm15, %xmm3 -vpor %xmm3, %xmm14, %xmm0 -vpaddq %xmm7, %xmm5, %xmm14 -vpalignr $8, %xmm10, %xmm0, %xmm11 -vmovd %rax, %xmm5 -vpaddq %xmm11, %xmm14, %xmm7 -vpinsrq $1, %rcx, %xmm5, %xmm14 -vpalignr $8, %xmm0, %xmm10, %xmm15 -vpaddq %xmm9, %xmm7, %xmm3 -vmovd %rdi, %xmm9 -vpinsrq $1, %r8, %xmm9, %xmm10 -vpaddq %xmm14, %xmm13, %xmm13 -vpaddq %xmm15, %xmm13, %xmm5 -vpalignr $8, %xmm6, %xmm4, %xmm13 -vpalignr $8, %xmm4, %xmm6, %xmm4 -vpxor %xmm7, %xmm13, %xmm14 -vpxor %xmm5, %xmm4, %xmm6 -vpshufd $177, %xmm14, %xmm13 -vpshufd $177, %xmm6, %xmm14 -vpaddq %xmm13, %xmm2, %xmm6 -vpaddq %xmm14, %xmm8, %xmm4 -vpaddq %xmm10, %xmm5, %xmm5 -vpxor %xmm6, %xmm11, %xmm2 -vpxor %xmm4, %xmm15, %xmm8 -vpshufb %xmm1, %xmm2, %xmm2 -vpshufb %xmm1, %xmm8, %xmm8 -vpaddq %xmm2, %xmm3, %xmm7 -vpaddq %xmm8, %xmm5, %xmm5 -vpxor %xmm7, %xmm13, %xmm13 -vpxor %xmm5, %xmm14, %xmm14 -vpshufb %xmm12, %xmm13, %xmm13 -vpshufb %xmm12, %xmm14, %xmm14 -vpaddq %xmm13, %xmm6, %xmm10 -vpaddq %xmm14, %xmm4, %xmm0 -vpxor %xmm10, %xmm2, %xmm2 -vpxor %xmm0, %xmm8, %xmm8 -vpaddq %xmm2, %xmm2, %xmm6 -vpaddq %xmm8, %xmm8, %xmm15 -vpsrlq $63, %xmm2, %xmm4 -vpsrlq $63, %xmm8, %xmm11 -vpor %xmm6, %xmm4, %xmm3 -vpor %xmm15, %xmm11, %xmm9 -vpxor %xmm0, %xmm7, %xmm0 -vpxor 208(%rsp), %xmm0, %xmm7 -vpxor %xmm10, %xmm5, %xmm0 -vpalignr $8, %xmm9, %xmm3, %xmm4 -vpalignr $8, %xmm13, %xmm14, %xmm5 -vpalignr $8, %xmm3, %xmm9, %xmm3 -vpxor %xmm5, %xmm4, %xmm6 -vpalignr $8, %xmm14, %xmm13, %xmm8 -vpxor %xmm8, %xmm3, %xmm9 -vmovdqu %xmm7, 208(%rsp) -vpxor 224(%rsp), %xmm0, %xmm2 -vpxor 256(%rsp), %xmm6, %xmm7 -vpxor 240(%rsp), %xmm9, %xmm10 -movq 296(%rsp), %rax -movq 288(%rsp), %r8 -movq 280(%rsp), %rsi -vmovdqu %xmm2, 224(%rsp) -vmovdqu %xmm7, 256(%rsp) -vmovdqu %xmm10, 240(%rsp) -cmpq $128, %rdx -ja blake2b_blocks_avx_L25 -blake2b_blocks_avx_L26: -vmovdqu 240(%rsp), %xmm8 -vmovdqu 256(%rsp), %xmm7 -vmovdqu 224(%rsp), %xmm6 -vmovdqu 208(%rsp), %xmm5 -movq 128(%rsp), %rdi -vmovdqu %xmm5, (%rdi) -vmovdqu %xmm6, 16(%rdi) -vmovdqu %xmm7, 32(%rdi) -vmovdqu %xmm8, 48(%rdi) -movq %r8, 64(%rdi) -movq %rax, 72(%rdi) -addq $344, %rsp -popq %rbx -popq %r15 -popq %r14 -popq %r13 -popq %r12 -movq %rbp, %rsp -popq %rbp -ret -FN_END blake2b_blocks_avx
\ No newline at end of file diff --git a/src/libcryptobox/blake2/blake2-internal.h b/src/libcryptobox/blake2/blake2-internal.h deleted file mode 100644 index 18b825900..000000000 --- a/src/libcryptobox/blake2/blake2-internal.h +++ /dev/null @@ -1,29 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef RSPAMD_BLAKE2_INTERNAL_H -#define RSPAMD_BLAKE2_INTERNAL_H - -#include "blake2.h" - -typedef struct blake2b_state_internal_t { - unsigned char h[64]; - unsigned char t[16]; - unsigned char f[16]; - size_t leftover; - unsigned char buffer[BLAKE2B_BLOCKBYTES]; -} blake2b_state_internal; - -#endif diff --git a/src/libcryptobox/blake2/blake2.c b/src/libcryptobox/blake2/blake2.c deleted file mode 100644 index bb681b5d3..000000000 --- a/src/libcryptobox/blake2/blake2.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - * Copyright (c) 2015, Vsevolod Stakhov - * Copyright (c) 2015, Andrew Moon - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "cryptobox.h" -#include "platform_config.h" -#include "blake2.h" -#include "blake2-internal.h" - -extern unsigned long cpu_config; - -typedef struct blake2b_impl_t { - unsigned long cpu_flags; - const char *desc; - - void (*blake2b_blocks) (blake2b_state_internal *state, - const unsigned char *in, - size_t bytes, - size_t stride); -} blake2b_impl_t; - -#define BLAKE2B_STRIDE BLAKE2B_BLOCKBYTES -#define BLAKE2B_STRIDE_NONE 0 - -#define BLAKE2B_DECLARE(ext) \ - void blake2b_blocks_##ext(blake2b_state_internal *state, const unsigned char *in, size_t bytes, size_t stride); - -#define BLAKE2B_IMPL(cpuflags, desc, ext) \ - {(cpuflags), desc, blake2b_blocks_##ext} - -#if defined(HAVE_AVX) -BLAKE2B_DECLARE(avx) -#define BLAKE2B_AVX BLAKE2B_IMPL(CPUID_AVX, "avx", avx) -#endif - -#if defined(CMAKE_ARCH_x86_64) || defined(CMAKE_ARCH_i386) -BLAKE2B_DECLARE(x86) -#define BLAKE2B_X86 BLAKE2B_IMPL(CPUID_SSE2, "x86", x86) -#endif - -/* the "always runs" version */ -BLAKE2B_DECLARE(ref) -#define BLAKE2B_GENERIC BLAKE2B_IMPL(0, "generic", ref) - -/* list implementations from most optimized to least, with generic as the last entry */ -static const blake2b_impl_t blake2b_list[] = { - BLAKE2B_GENERIC, -#if defined(BLAKE2B_AVX) - BLAKE2B_AVX, -#endif -#if defined(BLAKE2B_X86) - BLAKE2B_X86, -#endif -}; - -static const blake2b_impl_t *blake2b_opt = &blake2b_list[0]; - - -/* is the pointer not aligned on a word boundary? */ -static int -blake2b_not_aligned (const void *p) -{ -#if !defined(CPU_8BITS) - return ((size_t) p & (sizeof (size_t) - 1)) != 0; -#else - return 0; -#endif -} - -static const union endian_test_t { - unsigned char b[2]; - unsigned short s; -} blake2b_endian_test = {{1, 0}}; - -/* copy the hash from the internal state */ -static void -blake2b_store_hash (blake2b_state_internal *state, unsigned char *hash) -{ - if (blake2b_endian_test.s == 0x0001) { - memcpy (hash, state->h, 64); - } - else { - size_t i, j; - for (i = 0; i < 8; i++, hash += 8) { - for (j = 0; j < 8; j++) - hash[7 - j] = state->h[(i * 8) + j]; - } - } -} - -static const unsigned char blake2b_init_le[64] = { - 0x08 ^ 0x40, 0xc9 ^ 0x00, 0xbc ^ 0x01, 0xf3 ^ 0x01, 0x67 ^ 0x00, - 0xe6 ^ 0x00, 0x09 ^ 0x00, 0x6a ^ 0x00, - 0x3b, 0xa7, 0xca, 0x84, 0x85, 0xae, 0x67, 0xbb, - 0x2b, 0xf8, 0x94, 0xfe, 0x72, 0xf3, 0x6e, 0x3c, - 0xf1, 0x36, 0x1d, 0x5f, 0x3a, 0xf5, 0x4f, 0xa5, - 0xd1, 0x82, 0xe6, 0xad, 0x7f, 0x52, 0x0e, 0x51, - 0x1f, 0x6c, 0x3e, 0x2b, 0x8c, 0x68, 0x05, 0x9b, - 0x6b, 0xbd, 0x41, 0xfb, 0xab, 0xd9, 0x83, 0x1f, - 0x79, 0x21, 0x7e, 0x13, 0x19, 0xcd, 0xe0, 0x5b, -}; - -/* initialize the state in serial mode */ -void -blake2b_init (blake2b_state *S) -{ - blake2b_state_internal *state = (blake2b_state_internal *) S; - /* assume state is fully little endian for now */ - memcpy (state, blake2b_init_le, 64); - /*memcpy(state, (blake2b_endian_test.s == 1) ? blake2b_init_le : blake2b_init_be, 64);*/ - memset (state->t, - 0, - sizeof (state->t) + sizeof (state->f) + sizeof (state->leftover)); -} - -/* initialized the state in serial-key'd mode */ -void -blake2b_keyed_init (blake2b_state *S, const unsigned char *key, size_t keylen) -{ - unsigned char k[BLAKE2B_BLOCKBYTES]; - blake2b_state _ks; - blake2b_state_internal *state = (blake2b_state_internal *)S; - - memset (k, 0, sizeof (k)); - - if (keylen <= BLAKE2B_KEYBYTES) { - memcpy (k, key, keylen); - blake2b_init (S); - state->h[1] ^= keylen; - blake2b_update (S, k, sizeof (k)); - } - else { - blake2b_init (S); - /* - * We use additional blake2 iteration to store large key - * XXX: it is not compatible with the original implementation but safe - */ - blake2b_init (&_ks); - blake2b_update (&_ks, key, keylen); - blake2b_final (&_ks, k); - blake2b_keyed_init (S, k, BLAKE2B_KEYBYTES); - } - - rspamd_explicit_memzero (k, sizeof (k)); -} - -/* hash inlen bytes from in, which may or may not be word aligned, returns the number of bytes used */ -static size_t -blake2b_consume_blocks (blake2b_state_internal *state, - const unsigned char *in, - size_t inlen) -{ - /* always need to leave at least BLAKE2B_BLOCKBYTES in case this is the final block */ - if (inlen <= BLAKE2B_BLOCKBYTES) - return 0; - - inlen = ((inlen - 1) & ~(BLAKE2B_BLOCKBYTES - 1)); - if (blake2b_not_aligned (in)) { - /* copy the unaligned data to an aligned buffer and process in chunks */ - unsigned char buffer[16 * BLAKE2B_BLOCKBYTES]; - size_t left = inlen; - while (left) { - const size_t bytes = (left > sizeof (buffer)) ? sizeof (buffer) - : left; - memcpy (buffer, in, bytes); - blake2b_opt->blake2b_blocks (state, buffer, bytes, BLAKE2B_STRIDE); - in += bytes; - left -= bytes; - } - } - else { - /* word aligned, handle directly */ - blake2b_opt->blake2b_blocks (state, in, inlen, BLAKE2B_STRIDE); - } - - return inlen; -} - -/* update the hash state with inlen bytes from in */ -void -blake2b_update (blake2b_state *S, const unsigned char *in, size_t inlen) -{ - blake2b_state_internal *state = (blake2b_state_internal *) S; - size_t bytes; - - /* blake2b processes the final <=BLOCKBYTES bytes raw, so we can only update if there are at least BLOCKBYTES+1 bytes available */ - if ((state->leftover + inlen) > BLAKE2B_BLOCKBYTES) { - /* handle the previous data, we know there is enough for at least one block */ - if (state->leftover) { - bytes = (BLAKE2B_BLOCKBYTES - state->leftover); - memcpy (state->buffer + state->leftover, in, bytes); - in += bytes; - inlen -= bytes; - state->leftover = 0; - blake2b_opt->blake2b_blocks (state, - state->buffer, - BLAKE2B_BLOCKBYTES, - BLAKE2B_STRIDE_NONE); - } - - /* handle the direct data (if any) */ - bytes = blake2b_consume_blocks (state, in, inlen); - inlen -= bytes; - in += bytes; - } - - /* handle leftover data */ - memcpy (state->buffer + state->leftover, in, inlen); - state->leftover += inlen; -} - -/* finalize the hash */ -void -blake2b_final (blake2b_state *S, unsigned char *hash) -{ - blake2b_state_internal *state = (blake2b_state_internal *) S; - memset (&state->f[0], 0xff, 8); - blake2b_opt->blake2b_blocks (state, - state->buffer, - state->leftover, - BLAKE2B_STRIDE_NONE); - blake2b_store_hash (state, hash); - rspamd_explicit_memzero (state, sizeof (*state)); -} - -/* one-shot hash inlen bytes from in */ -void -blake2b (unsigned char *hash, const unsigned char *in, size_t inlen) -{ - blake2b_state S; - blake2b_state_internal *state = (blake2b_state_internal *) &S; - size_t bytes; - - blake2b_init (&S); - - /* hash until <= 128 bytes left */ - bytes = blake2b_consume_blocks (state, in, inlen); - in += bytes; - inlen -= bytes; - - /* final block */ - memset (&state->f[0], 0xff, 8); - blake2b_opt->blake2b_blocks (state, in, inlen, BLAKE2B_STRIDE_NONE); - blake2b_store_hash (state, hash); -} - -void -blake2b_keyed (unsigned char *hash, - const unsigned char *in, - size_t inlen, - const unsigned char *key, - size_t keylen) -{ - blake2b_state S; - blake2b_keyed_init (&S, key, keylen); - blake2b_update (&S, in, inlen); - blake2b_final (&S, hash); -} - -const char* -blake2b_load (void) -{ - guint i; - - if (cpu_config != 0) { - for (i = 0; i < G_N_ELEMENTS (blake2b_list); i++) { - if (blake2b_list[i].cpu_flags & cpu_config) { - blake2b_opt = &blake2b_list[i]; - break; - } - } - } - - return blake2b_opt->desc; -} diff --git a/src/libcryptobox/blake2/blake2.h b/src/libcryptobox/blake2/blake2.h deleted file mode 100644 index 3da1958ae..000000000 --- a/src/libcryptobox/blake2/blake2.h +++ /dev/null @@ -1,65 +0,0 @@ -/*- - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef RSPAMD_BLAKE2_H -#define RSPAMD_BLAKE2_H - -#if defined(__cplusplus) -extern "C" { -#endif - - -#define BLAKE2B_BLOCKBYTES 128 -#define BLAKE2B_OUTBYTES 64 -#define BLAKE2B_KEYBYTES 64 -#define BLAKE2B_SALTBYTES 16 -#define BLAKE2B_PERSONALBYTES 16 - - -typedef struct blake2b_state_t { - unsigned char opaque[256]; -} blake2b_state; - -/* incremental */ -void blake2b_init (blake2b_state *S); - -void blake2b_keyed_init (blake2b_state *S, - const unsigned char *key, - size_t keylen); - -void blake2b_update (blake2b_state *S, - const unsigned char *in, - size_t inlen); - -void blake2b_final (blake2b_state *S, unsigned char *hash); - -/* one-shot */ -void blake2b (unsigned char *hash, - const unsigned char *in, - size_t inlen); - -void blake2b_keyed (unsigned char *hash, - const unsigned char *in, - size_t inlen, - const unsigned char *key, - size_t keylen); - -const char* blake2b_load (void); - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/src/libcryptobox/blake2/constants.S b/src/libcryptobox/blake2/constants.S deleted file mode 100644 index c5c5b9a61..000000000 --- a/src/libcryptobox/blake2/constants.S +++ /dev/null @@ -1,30 +0,0 @@ -SECTION_RODATA -.p2align 6 -blake2b_constants: -.quad 0x6a09e667f3bcc908 -.quad 0xbb67ae8584caa73b -.quad 0x3c6ef372fe94f82b -.quad 0xa54ff53a5f1d36f1 -.quad 0x510e527fade682d1 -.quad 0x9b05688c2b3e6c1f -.quad 0x1f83d9abfb41bd6b -.quad 0x5be0cd19137e2179 - -blake2b_sigma: -.byte 0,8,16,24,32,40,48,56,64,72,80,88,96,104,112,120 -.byte 112,80,32,64,72,120,104,48,8,96,0,16,88,56,40,24 -.byte 88,64,96,0,40,16,120,104,80,112,24,48,56,8,72,32 -.byte 56,72,24,8,104,96,88,112,16,48,40,80,32,0,120,64 -.byte 72,0,40,56,16,32,80,120,112,8,88,96,48,64,24,104 -.byte 16,96,48,80,0,88,64,24,32,104,56,40,120,112,8,72 -.byte 96,40,8,120,112,104,32,80,0,56,48,24,72,16,64,88 -.byte 104,88,56,112,96,8,24,72,40,0,120,32,64,48,16,80 -.byte 48,120,112,72,88,24,0,64,96,16,104,56,8,32,80,40 -.byte 80,16,64,32,56,48,8,40,120,88,72,112,24,96,104,0 -.byte 0,8,16,24,32,40,48,56,64,72,80,88,96,104,112,120 -.byte 112,80,32,64,72,120,104,48,8,96,0,16,88,56,40,24 - -.p2align 4 -blake2b_constants_ssse3: -.byte 2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9 /* 64 bit rotate right by 16 */ -.byte 3,4,5,6,7,0,1,2,11,12,13,14,15,8,9,10 /* 64 bit rotate right by 24 */
\ No newline at end of file diff --git a/src/libcryptobox/blake2/ref.c b/src/libcryptobox/blake2/ref.c deleted file mode 100644 index ed6f395fc..000000000 --- a/src/libcryptobox/blake2/ref.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2015, Vsevolod Stakhov - * Copyright (c) 2015, Andrew Moon - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "blake2.h" -#include "blake2-internal.h" - -typedef uint64_t blake2b_uint64; - -static const unsigned char blake2b_sigma[12][16] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, - {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, - {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, - {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, - {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, - {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, - {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, - {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, - {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3} -}; - -static blake2b_uint64 -ROTR64 (blake2b_uint64 x, int k) -{ - return ((x >> k) | (x << (64 - k))); -} - -static blake2b_uint64 -U8TO64 (const unsigned char *p) -{ - return - ((blake2b_uint64) p[0]) | - ((blake2b_uint64) p[1] << 8) | - ((blake2b_uint64) p[2] << 16) | - ((blake2b_uint64) p[3] << 24) | - ((blake2b_uint64) p[4] << 32) | - ((blake2b_uint64) p[5] << 40) | - ((blake2b_uint64) p[6] << 48) | - ((blake2b_uint64) p[7] << 56); -} - -static void -U64TO8 (unsigned char *p, blake2b_uint64 v) -{ - p[0] = (v) & 0xff; - p[1] = (v >> 8) & 0xff; - p[2] = (v >> 16) & 0xff; - p[3] = (v >> 24) & 0xff; - p[4] = (v >> 32) & 0xff; - p[5] = (v >> 40) & 0xff; - p[6] = (v >> 48) & 0xff; - p[7] = (v >> 56) & 0xff; -} - -void -blake2b_blocks_ref (blake2b_state_internal *S, - const unsigned char *in, - size_t bytes, - size_t stride) -{ - const blake2b_uint64 f0 = U8TO64 (&S->f[0]); - const blake2b_uint64 f1 = U8TO64 (&S->f[8]); - - const blake2b_uint64 w8 = 0x6a09e667f3bcc908ull; - const blake2b_uint64 w9 = 0xbb67ae8584caa73bull; - const blake2b_uint64 w10 = 0x3c6ef372fe94f82bull; - const blake2b_uint64 w11 = 0xa54ff53a5f1d36f1ull; - const blake2b_uint64 w12 = 0x510e527fade682d1ull; - const blake2b_uint64 w13 = 0x9b05688c2b3e6c1full; - const blake2b_uint64 w14 = 0x1f83d9abfb41bd6bull ^f0; - const blake2b_uint64 w15 = 0x5be0cd19137e2179ull ^f1; - - const size_t inc = (bytes >= 128) ? 128 : bytes; - - blake2b_uint64 t0 = U8TO64 (&S->t[0]); - blake2b_uint64 t1 = U8TO64 (&S->t[8]); - - blake2b_uint64 h[8]; - blake2b_uint64 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; - unsigned char buffer[128]; - - size_t i; - - if (f0) { - memset (buffer, 0, sizeof (buffer)); - memcpy (buffer, in, bytes); - in = buffer; - } - - for (i = 0; i < 8; i++) - h[i] = U8TO64 (&S->h[i * 8]); - - while (1) { - blake2b_uint64 m[16]; - - t0 += inc; - if (t0 < inc) - t1 += 1; - - for (i = 0; i < 16; i++) - m[i] = U8TO64 (in + (i * 8)); - - v0 = h[0]; - v1 = h[1]; - v2 = h[2]; - v3 = h[3]; - v4 = h[4]; - v5 = h[5]; - v6 = h[6]; - v7 = h[7]; - v8 = w8; - v9 = w9; - v10 = w10; - v11 = w11; - v12 = w12 ^ t0; - v13 = w13 ^ t1; - v14 = w14; - v15 = w15; - -#define G(r, x, a, b, c, d) \ - a += b + m[blake2b_sigma[r][2*x+0]]; \ - d = ROTR64(d ^ a, 32); \ - c += d; \ - b = ROTR64(b ^ c, 24); \ - a += b + m[blake2b_sigma[r][2*x+1]]; \ - d = ROTR64(d ^ a, 16); \ - c += d; \ - b = ROTR64(b ^ c, 63); - - for (i = 0; i < 12; i++) { - G(i, 0, v0, v4, v8, v12); - G(i, 1, v1, v5, v9, v13); - G(i, 2, v2, v6, v10, v14); - G(i, 3, v3, v7, v11, v15); - G(i, 4, v0, v5, v10, v15); - G(i, 5, v1, v6, v11, v12); - G(i, 6, v2, v7, v8, v13); - G(i, 7, v3, v4, v9, v14); - } - - h[0] ^= (v0 ^ v8); - h[1] ^= (v1 ^ v9); - h[2] ^= (v2 ^ v10); - h[3] ^= (v3 ^ v11); - h[4] ^= (v4 ^ v12); - h[5] ^= (v5 ^ v13); - h[6] ^= (v6 ^ v14); - h[7] ^= (v7 ^ v15); - - if (bytes <= 128) - break; - in += stride; - bytes -= 128; - } - - for (i = 0; i < 8; i++) - U64TO8 (&S->h[i * 8], h[i]); - U64TO8 (&S->t[0], t0); - U64TO8 (&S->t[8], t1); -} diff --git a/src/libcryptobox/blake2/x86-32.S b/src/libcryptobox/blake2/x86-32.S deleted file mode 100644 index 12030e57b..000000000 --- a/src/libcryptobox/blake2/x86-32.S +++ /dev/null @@ -1,1080 +0,0 @@ -#include "../macro.S" -#include "constants.S" - -SECTION_TEXT - -GLOBAL_HIDDEN_FN blake2b_blocks_x86 -pushl %esi -pushl %edi -pushl %ebx -pushl %ebp -subl $492, %esp -movl 512(%esp), %eax -movl 80(%eax), %ebp -movl %ebp, %edi -movl 84(%eax), %ebx -xorl $-79577749, %edi -movl %edi, 144(%esp) -movl %ebx, %edi -xorl $528734635, %edi -movl %edi, 148(%esp) -movl 88(%eax), %edi -movl 92(%eax), %eax -xorl $327033209, %edi -xorl $1541459225, %eax -movl %edi, 152(%esp) -LOAD_VAR_PIC blake2b_sigma, %ecx -lea 192(%ecx), %edi -movl 516(%esp), %esi -orl %ebx, %ebp -movl 520(%esp), %edx -movl %edi, 360(%esp) -jne blake2b_blocks_x86_L2 -blake2b_blocks_x86_L32: -cmpl $128, %edx -jmp blake2b_blocks_x86_L21 -blake2b_blocks_x86_L2: -cmpl $128, %edx -je blake2b_blocks_x86_L21 -blake2b_blocks_x86_L3: -testb $64, %dl -lea (%esp), %ebp -je blake2b_blocks_x86_L5 -blake2b_blocks_x86_L4: -movl (%esi), %ebx -movl 4(%esi), %ebp -movl %ebx, (%esp) -movl %ebp, 4(%esp) -movl 8(%esi), %edi -movl 12(%esi), %ebx -movl %edi, 8(%esp) -movl %ebx, 12(%esp) -movl 16(%esi), %ebp -movl 20(%esi), %edi -movl %ebp, 16(%esp) -movl %edi, 20(%esp) -movl 24(%esi), %ebx -movl 28(%esi), %ebp -movl %ebx, 24(%esp) -movl %ebp, 28(%esp) -movl 32(%esi), %edi -movl 36(%esi), %ebx -movl %edi, 32(%esp) -movl %ebx, 36(%esp) -movl 40(%esi), %ebp -movl 44(%esi), %edi -movl %ebp, 40(%esp) -movl %edi, 44(%esp) -movl 48(%esi), %ebx -movl 52(%esi), %ebp -movl %ebx, 48(%esp) -movl %ebp, 52(%esp) -lea 64(%esp), %ebp -movl 56(%esi), %edi -movl 60(%esi), %ebx -addl $64, %esi -movl %edi, 56(%esp) -movl %ebx, 60(%esp) -jmp blake2b_blocks_x86_L6 -blake2b_blocks_x86_L5: -xorl %ebx, %ebx -movl %ebx, 64(%esp) -movl %ebx, 68(%esp) -movl %ebx, 72(%esp) -movl %ebx, 76(%esp) -movl %ebx, 80(%esp) -movl %ebx, 84(%esp) -movl %ebx, 88(%esp) -movl %ebx, 92(%esp) -movl %ebx, 96(%esp) -movl %ebx, 100(%esp) -movl %ebx, 104(%esp) -movl %ebx, 108(%esp) -movl %ebx, 112(%esp) -movl %ebx, 116(%esp) -movl %ebx, 120(%esp) -movl %ebx, 124(%esp) -blake2b_blocks_x86_L6: -xorl %ebx, %ebx -testb $32, %dl -movl %ebx, (%ebp) -movl %ebx, 4(%ebp) -movl %ebx, 8(%ebp) -movl %ebx, 12(%ebp) -movl %ebx, 16(%ebp) -movl %ebx, 20(%ebp) -movl %ebx, 24(%ebp) -movl %ebx, 28(%ebp) -movl %ebx, 32(%ebp) -movl %ebx, 36(%ebp) -movl %ebx, 40(%ebp) -movl %ebx, 44(%ebp) -movl %ebx, 48(%ebp) -movl %ebx, 52(%ebp) -movl %ebx, 56(%ebp) -movl %ebx, 60(%ebp) -je blake2b_blocks_x86_L8 -blake2b_blocks_x86_L7: -movl (%esi), %ebx -movl 4(%esi), %edi -movl %ebx, (%ebp) -movl %edi, 4(%ebp) -movl 8(%esi), %ebx -movl 12(%esi), %edi -movl %ebx, 8(%ebp) -movl %edi, 12(%ebp) -movl 16(%esi), %ebx -movl 20(%esi), %edi -movl %ebx, 16(%ebp) -movl %edi, 20(%ebp) -movl 24(%esi), %ebx -movl 28(%esi), %edi -addl $32, %esi -movl %ebx, 24(%ebp) -movl %edi, 28(%ebp) -addl $32, %ebp -blake2b_blocks_x86_L8: -testb $16, %dl -je blake2b_blocks_x86_L10 -blake2b_blocks_x86_L9: -movl (%esi), %ebx -movl 4(%esi), %edi -movl %ebx, (%ebp) -movl %edi, 4(%ebp) -movl 8(%esi), %ebx -movl 12(%esi), %edi -addl $16, %esi -movl %ebx, 8(%ebp) -movl %edi, 12(%ebp) -addl $16, %ebp -blake2b_blocks_x86_L10: -testb $8, %dl -je blake2b_blocks_x86_L12 -blake2b_blocks_x86_L11: -movl (%esi), %ebx -movl 4(%esi), %edi -addl $8, %esi -movl %ebx, (%ebp) -movl %edi, 4(%ebp) -addl $8, %ebp -blake2b_blocks_x86_L12: -testb $4, %dl -je blake2b_blocks_x86_L14 -blake2b_blocks_x86_L13: -movl (%esi), %ebx -addl $4, %esi -movl %ebx, (%ebp) -addl $4, %ebp -blake2b_blocks_x86_L14: -testb $2, %dl -je blake2b_blocks_x86_L16 -blake2b_blocks_x86_L15: -movzwl (%esi), %ebx -addl $2, %esi -movw %bx, (%ebp) -addl $2, %ebp -blake2b_blocks_x86_L16: -testb $1, %dl -je blake2b_blocks_x86_L18 -blake2b_blocks_x86_L17: -movzbl (%esi), %ebx -movb %bl, (%ebp) -blake2b_blocks_x86_L18: -cmpl $128, %edx -lea (%esp), %esi -blake2b_blocks_x86_L21: -movl 512(%esp), %ebp -lea (%ecx), %ecx -movl %esi, 236(%esp) -movl %ecx, 128(%esp) -movl 68(%ebp), %edi -movl %edi, 228(%esp) -movl 60(%ebp), %edi -movl %edi, 196(%esp) -movl 72(%ebp), %edi -movl %edi, 164(%esp) -movl 76(%ebp), %edi -movl %edi, 200(%esp) -movl 24(%ebp), %edi -movl %edi, 176(%esp) -movl 28(%ebp), %edi -movl %edi, 208(%esp) -movl 16(%ebp), %edi -movl %edi, 184(%esp) -movl 20(%ebp), %edi -movl %edi, 216(%esp) -movl 48(%ebp), %edi -movl %edi, 168(%esp) -movl 52(%ebp), %edi -movl %edi, 204(%esp) -movl 8(%ebp), %edi -movl 64(%ebp), %ebx -movl %edi, 156(%esp) -movl 12(%ebp), %edi -movl %ebx, 192(%esp) -movl 56(%ebp), %ebx -movl %edi, 224(%esp) -movl 40(%ebp), %edi -movl %ebx, 172(%esp) -movl %edx, %ebx -movl %edi, 160(%esp) -movl 44(%ebp), %edi -jbe blake2b_blocks_x86_LL3 -movl $128, %ebx -blake2b_blocks_x86_LL3: -movl %edi, 212(%esp) -movl (%ebp), %edi -movl %edi, 180(%esp) -movl 4(%ebp), %edi -movl %edi, 232(%esp) -movl 32(%ebp), %edi -movl 36(%ebp), %ebp -movl %edi, 188(%esp) -movl %ebp, 220(%esp) -movl %eax, 132(%esp) -movl %ebx, 136(%esp) -movl %edx, 140(%esp) -movl 512(%esp), %esi -jmp blake2b_blocks_x86_L22 -blake2b_blocks_x86_L28: -movl 524(%esp), %eax -movl 140(%esp), %edx -addl $-128, %edx -addl %eax, 236(%esp) -movl %edx, 140(%esp) -blake2b_blocks_x86_L22: -movl 136(%esp), %edx -xorl %ebx, %ebx -movl 192(%esp), %eax -addl %edx, %eax -movl 228(%esp), %ecx -adcl $0, %ecx -movl %eax, 192(%esp) -movl %eax, 64(%esi) -subl %edx, %eax -movl %ecx, 228(%esp) -movl %ecx, 68(%esi) -sbbl %ebx, %ecx -jae blake2b_blocks_x86_L25 -blake2b_blocks_x86_L23: -movl 164(%esp), %eax -addl $1, %eax -movl 200(%esp), %edx -adcl $0, %edx -movl %eax, 164(%esp) -movl %edx, 200(%esp) -movl %eax, 72(%esi) -movl %edx, 76(%esi) -blake2b_blocks_x86_L25: -movl 152(%esp), %eax -movl %eax, 312(%esp) -movl 172(%esp), %ebp -movl 196(%esp), %ebx -movl 144(%esp), %eax -movl 184(%esp), %edi -movl %ebp, 284(%esp) -movl %ebx, 288(%esp) -movl %eax, 296(%esp) -movl 168(%esp), %ebp -movl 204(%esp), %ebx -movl 212(%esp), %eax -movl %edi, 332(%esp) -movl %ebp, 276(%esp) -movl %ebx, 280(%esp) -movl 148(%esp), %edi -movl %eax, 272(%esp) -movl 224(%esp), %ebp -movl 160(%esp), %ebx -movl 188(%esp), %eax -movl 208(%esp), %ecx -movl %edi, 300(%esp) -movl %ebp, 248(%esp) -movl %ebx, 268(%esp) -movl 180(%esp), %edi -movl %eax, 260(%esp) -movl 176(%esp), %edx -movl 164(%esp), %ebp -movl 232(%esp), %ebx -xorl $725511199, %ebp -movl 128(%esp), %eax -movl %ebp, 348(%esp) -movl %ecx, 256(%esp) -movl 200(%esp), %ebp -movl 216(%esp), %ecx -xorl $-1694144372, %ebp -movl %edi, 240(%esp) -movl %edx, 316(%esp) -movl %ebx, 244(%esp) -movl 220(%esp), %edi -movl %eax, 292(%esp) -movl 192(%esp), %ebx -xorl $-1377402159, %ebx -movl %ebx, 352(%esp) -movl %ecx, 252(%esp) -movl 228(%esp), %ebx -movl %ebp, 356(%esp) -xorl $1359893119, %ebx -movl 132(%esp), %edx -movl 156(%esp), %ecx -movl 332(%esp), %ebp -movl 316(%esp), %esi -movl %edi, 264(%esp) -movl $1595750129, 308(%esp) -movl $-1521486534, 304(%esp) -movl $-23791573, 324(%esp) -movl $1013904242, 320(%esp) -movl $-2067093701, 340(%esp) -movl $-1150833019, 336(%esp) -movl $-205731576, 328(%esp) -movl $1779033703, 344(%esp) -blake2b_blocks_x86_L26: -movl %esi, 316(%esp) -movl %edx, 368(%esp) -movzbl (%eax), %esi -movl 236(%esp), %edx -movl %ecx, 364(%esp) -movl 240(%esp), %ecx -addl (%esi,%edx), %ecx -movl %ebp, 332(%esp) -movl 244(%esp), %ebp -adcl 4(%esi,%edx), %ebp -movl 260(%esp), %edx -addl %edx, %ecx -movl 264(%esp), %esi -adcl %esi, %ebp -xorl %ebp, %ebx -movl 352(%esp), %edi -movl %ecx, 240(%esp) -xorl %ecx, %edi -movl 328(%esp), %ecx -addl %ebx, %ecx -movl %ebx, 372(%esp) -movl 344(%esp), %ebx -adcl %edi, %ebx -xorl %ecx, %edx -xorl %ebx, %esi -movl %edi, 352(%esp) -movl %edx, %edi -movl %ecx, 328(%esp) -movl %esi, %ecx -shrl $24, %esi -shll $8, %edx -orl %edx, %esi -movl %esi, 264(%esp) -movzbl 2(%eax), %edx -movl 236(%esp), %esi -shll $8, %ecx -shrl $24, %edi -orl %edi, %ecx -movl %ecx, 376(%esp) -movl 364(%esp), %ecx -addl (%edx,%esi), %ecx -movl 248(%esp), %edi -movl %ebp, 244(%esp) -movl 268(%esp), %ebp -adcl 4(%edx,%esi), %edi -addl %ebp, %ecx -movl 272(%esp), %edx -adcl %edx, %edi -movl %ebx, 344(%esp) -movl %ecx, 364(%esp) -movl 348(%esp), %ebx -xorl %ecx, %ebx -movl 356(%esp), %ecx -xorl %edi, %ecx -movl %edi, 248(%esp) -movl 340(%esp), %edi -addl %ecx, %edi -movl %ecx, 356(%esp) -movl 336(%esp), %ecx -adcl %ebx, %ecx -xorl %edi, %ebp -xorl %ecx, %edx -movl %ebx, 348(%esp) -movl %edx, %ebx -movl %edi, 340(%esp) -movl %ebp, %edi -shrl $24, %edx -shll $8, %ebp -orl %ebp, %edx -movzbl 4(%eax), %ebp -movl %ecx, 336(%esp) -shll $8, %ebx -shrl $24, %edi -movl 332(%esp), %ecx -orl %edi, %ebx -addl (%ebp,%esi), %ecx -movl 252(%esp), %edi -adcl 4(%ebp,%esi), %edi -movl 276(%esp), %ebp -addl %ebp, %ecx -movl %edx, 272(%esp) -movl 280(%esp), %edx -adcl %edx, %edi -movl %ebx, 380(%esp) -movl %ecx, 332(%esp) -movl 296(%esp), %ebx -xorl %ecx, %ebx -movl 300(%esp), %ecx -xorl %edi, %ecx -movl %edi, 252(%esp) -movl 324(%esp), %edi -addl %ecx, %edi -movl %ecx, 300(%esp) -movl 320(%esp), %ecx -adcl %ebx, %ecx -xorl %edi, %ebp -xorl %ecx, %edx -movl %ebx, 296(%esp) -movl %edx, %ebx -movl %edi, 324(%esp) -movl %ebp, %edi -shrl $24, %edx -shll $8, %ebp -orl %ebp, %edx -movl %edx, 280(%esp) -movzbl 6(%eax), %edx -movl %ecx, 320(%esp) -shll $8, %ebx -shrl $24, %edi -movl 316(%esp), %ecx -orl %edi, %ebx -addl (%edx,%esi), %ecx -movl 256(%esp), %edi -movl 284(%esp), %ebp -adcl 4(%edx,%esi), %edi -addl %ebp, %ecx -movl 288(%esp), %edx -adcl %edx, %edi -movl %ebx, 384(%esp) -movl %ecx, 316(%esp) -movl 312(%esp), %ebx -xorl %ecx, %ebx -movl 368(%esp), %ecx -xorl %edi, %ecx -movl %edi, 256(%esp) -movl 308(%esp), %edi -addl %ecx, %edi -movl %ecx, 368(%esp) -movl 304(%esp), %ecx -adcl %ebx, %ecx -xorl %edi, %ebp -xorl %ecx, %edx -movl %ebx, 312(%esp) -movl %edx, %ebx -movl %edi, 308(%esp) -movl %ebp, %edi -shrl $24, %edx -shll $8, %ebp -orl %ebp, %edx -movzbl 5(%eax), %ebp -movl %ecx, 304(%esp) -shll $8, %ebx -movl (%ebp,%esi), %ecx -addl 332(%esp), %ecx -movl 4(%ebp,%esi), %esi -adcl 252(%esp), %esi -shrl $24, %edi -orl %edi, %ebx -movl %ebx, 388(%esp) -movl 384(%esp), %ebx -addl %ebx, %ecx -movl %edx, 288(%esp) -movl 280(%esp), %edx -adcl %edx, %esi -movl 300(%esp), %ebp -movl 296(%esp), %edi -xorl %ecx, %ebp -xorl %esi, %edi -movl %ecx, 392(%esp) -movl %ebp, %ecx -movl %esi, 396(%esp) -movl %edi, %esi -shll $16, %esi -shrl $16, %ecx -shrl $16, %edi -orl %ecx, %esi -shll $16, %ebp -orl %ebp, %edi -movl 324(%esp), %ebp -addl %esi, %ebp -movl %esi, 400(%esp) -movl 320(%esp), %esi -adcl %edi, %esi -xorl %ebp, %ebx -xorl %esi, %edx -movl %esi, 320(%esp) -movl %edx, %esi -movl %edi, 296(%esp) -movl %ebx, %edi -shrl $31, %esi -addl %ebx, %ebx -shrl $31, %edi -addl %edx, %edx -orl %ebx, %esi -orl %edx, %edi -movl %esi, 408(%esp) -movzbl 7(%eax), %edx -movl 236(%esp), %esi -movl %edi, 404(%esp) -movl 288(%esp), %edi -movl (%edx,%esi), %ebx -addl 316(%esp), %ebx -movl 4(%edx,%esi), %ecx -movl 388(%esp), %edx -adcl 256(%esp), %ecx -addl %edx, %ebx -movl %ebp, 324(%esp) -adcl %edi, %ecx -movl 368(%esp), %ebp -movl 312(%esp), %esi -xorl %ebx, %ebp -xorl %ecx, %esi -movl %ebx, 412(%esp) -movl %ebp, %ebx -movl %ecx, 416(%esp) -movl %esi, %ecx -shll $16, %ecx -shrl $16, %ebx -shrl $16, %esi -orl %ebx, %ecx -shll $16, %ebp -orl %ebp, %esi -movl 308(%esp), %ebp -addl %ecx, %ebp -movl %ecx, 420(%esp) -movl 304(%esp), %ecx -adcl %esi, %ecx -xorl %ebp, %edx -movl %esi, 312(%esp) -xorl %ecx, %edi -movl %edx, %esi -movl %edi, %ebx -shrl $31, %esi -addl %edi, %edi -orl %edi, %esi -addl %edx, %edx -movl %esi, 424(%esp) -movzbl 3(%eax), %edi -movl 236(%esp), %esi -shrl $31, %ebx -orl %edx, %ebx -movl (%edi,%esi), %edx -addl 364(%esp), %edx -movl %ecx, 304(%esp) -movl 4(%edi,%esi), %ecx -movl 380(%esp), %edi -adcl 248(%esp), %ecx -addl %edi, %edx -movl 272(%esp), %esi -adcl %esi, %ecx -movl %ebp, 308(%esp) -movl %ebx, 428(%esp) -movl 356(%esp), %ebx -movl 348(%esp), %ebp -xorl %edx, %ebx -xorl %ecx, %ebp -movl %edx, 432(%esp) -movl %ebp, %edx -movl %ecx, 436(%esp) -movl %ebx, %ecx -shll $16, %edx -shrl $16, %ecx -shrl $16, %ebp -orl %ecx, %edx -shll $16, %ebx -orl %ebx, %ebp -movl 340(%esp), %ebx -addl %edx, %ebx -movl %edx, 440(%esp) -movl 336(%esp), %edx -adcl %ebp, %edx -xorl %ebx, %edi -movl %ebx, 340(%esp) -xorl %edx, %esi -movl %edi, %ebx -movl %esi, %ecx -shrl $31, %ebx -addl %esi, %esi -movl %edx, 336(%esp) -orl %esi, %ebx -movzbl 1(%eax), %esi -addl %edi, %edi -movl 236(%esp), %edx -shrl $31, %ecx -orl %edi, %ecx -movl (%esi,%edx), %edi -addl 240(%esp), %edi -movl %ebp, 348(%esp) -movl 4(%esi,%edx), %ebp -movl 376(%esp), %esi -adcl 244(%esp), %ebp -addl %esi, %edi -movl %ecx, 448(%esp) -movl 264(%esp), %ecx -adcl %ecx, %ebp -movl %ebx, 444(%esp) -movl 372(%esp), %ebx -movl 352(%esp), %edx -xorl %edi, %ebx -xorl %ebp, %edx -movl %edi, 452(%esp) -movl %edx, %edi -movl %ebp, 456(%esp) -movl %ebx, %ebp -shll $16, %edi -shrl $16, %ebp -shrl $16, %edx -orl %ebp, %edi -shll $16, %ebx -orl %ebx, %edx -movl 328(%esp), %ebx -addl %edi, %ebx -movl %edi, 460(%esp) -movl 344(%esp), %edi -adcl %edx, %edi -xorl %ebx, %esi -movl %edx, 352(%esp) -xorl %edi, %ecx -movl %esi, %edx -addl %esi, %esi -movl %ebx, 328(%esp) -movl %ecx, %ebx -shrl $31, %edx -addl %ecx, %ecx -movl %edi, 344(%esp) -orl %ecx, %edx -movzbl 8(%eax), %edi -movl 236(%esp), %ecx -shrl $31, %ebx -orl %esi, %ebx -movl %ebx, 468(%esp) -movl 452(%esp), %ebx -addl (%edi,%ecx), %ebx -movl 456(%esp), %esi -movl %edx, 464(%esp) -movl 448(%esp), %edx -adcl 4(%edi,%ecx), %esi -addl %edx, %ebx -movl 444(%esp), %edi -adcl %edi, %esi -movl 420(%esp), %ebp -movl %ebx, 452(%esp) -xorl %ebx, %ebp -movl 312(%esp), %ebx -xorl %esi, %ebx -movl %esi, 456(%esp) -movl 324(%esp), %esi -addl %ebx, %esi -movl %ebx, 312(%esp) -movl 320(%esp), %ebx -adcl %ebp, %ebx -xorl %esi, %edx -xorl %ebx, %edi -movl %ebp, 420(%esp) -movzbl 10(%eax), %ebp -movl %esi, 324(%esp) -movl %edx, %esi -movl %ebx, 320(%esp) -movl %edi, %ebx -shll $8, %ebx -shrl $24, %esi -orl %esi, %ebx -movl %ebx, 472(%esp) -movl (%ebp,%ecx), %ebx -addl 432(%esp), %ebx -movl 4(%ebp,%ecx), %esi -adcl 436(%esp), %esi -shrl $24, %edi -shll $8, %edx -orl %edx, %edi -movl 408(%esp), %edx -addl %edx, %ebx -movl %edi, 444(%esp) -movl 404(%esp), %edi -adcl %edi, %esi -movl 460(%esp), %ebp -movl %ebx, 364(%esp) -xorl %ebx, %ebp -movl 352(%esp), %ebx -xorl %esi, %ebx -movl %esi, 248(%esp) -movl 308(%esp), %esi -addl %ebx, %esi -movl %ebx, 352(%esp) -movl 304(%esp), %ebx -adcl %ebp, %ebx -xorl %esi, %edx -xorl %ebx, %edi -movl %esi, 308(%esp) -movl %edx, %esi -movl %ebx, 304(%esp) -movl %edi, %ebx -shrl $24, %edi -shll $8, %edx -orl %edx, %edi -movl %edi, 404(%esp) -movzbl 12(%eax), %edi -movl %ebp, 460(%esp) -shll $8, %ebx -shrl $24, %esi -movl (%edi,%ecx), %ebp -orl %esi, %ebx -addl 392(%esp), %ebp -movl 4(%edi,%ecx), %esi -movl 428(%esp), %edx -adcl 396(%esp), %esi -addl %edx, %ebp -movl %ebx, 476(%esp) -movl 424(%esp), %ebx -adcl %ebx, %esi -movl 440(%esp), %edi -movl %ebp, 332(%esp) -xorl %ebp, %edi -movl 348(%esp), %ebp -xorl %esi, %ebp -movl %esi, 252(%esp) -movl 328(%esp), %esi -addl %ebp, %esi -movl %ebp, 348(%esp) -movl 344(%esp), %ebp -adcl %edi, %ebp -xorl %esi, %edx -xorl %ebp, %ebx -movl %esi, 328(%esp) -movl %edx, %esi -movl %ebp, 344(%esp) -movl %ebx, %ebp -shrl $24, %ebx -shll $8, %edx -orl %edx, %ebx -movzbl 14(%eax), %edx -movl %eax, 292(%esp) -shll $8, %ebp -shrl $24, %esi -movl (%edx,%ecx), %eax -orl %esi, %ebp -addl 412(%esp), %eax -movl 4(%edx,%ecx), %esi -movl 468(%esp), %ecx -adcl 416(%esp), %esi -addl %ecx, %eax -movl 464(%esp), %edx -adcl %edx, %esi -movl %edi, 440(%esp) -movl %eax, 316(%esp) -movl 400(%esp), %edi -xorl %eax, %edi -movl 296(%esp), %eax -xorl %esi, %eax -movl %esi, 256(%esp) -movl 340(%esp), %esi -addl %eax, %esi -movl %eax, 296(%esp) -movl 336(%esp), %eax -adcl %edi, %eax -xorl %esi, %ecx -xorl %eax, %edx -movl %edi, 400(%esp) -movl %ecx, %edi -movl %esi, 340(%esp) -movl %edx, %esi -shrl $24, %edx -shll $8, %ecx -orl %ecx, %edx -movl %edx, 464(%esp) -movl 292(%esp), %edx -shll $8, %esi -shrl $24, %edi -orl %edi, %esi -movzbl 13(%edx), %edi -movl 236(%esp), %edx -movl 332(%esp), %ecx -addl %ebp, %ecx -movl %eax, 336(%esp) -movl 252(%esp), %eax -adcl %ebx, %eax -addl (%edi,%edx), %ecx -movl %ecx, 332(%esp) -adcl 4(%edi,%edx), %eax -movl 348(%esp), %edi -movl 440(%esp), %edx -xorl %ecx, %edi -xorl %eax, %edx -movl %edi, %ecx -movl %eax, 252(%esp) -movl %edx, %eax -shll $16, %eax -shrl $16, %ecx -shrl $16, %edx -orl %ecx, %eax -shll $16, %edi -orl %edx, %edi -movl 328(%esp), %edx -addl %eax, %edx -movl %eax, 348(%esp) -movl 344(%esp), %eax -adcl %edi, %eax -xorl %edx, %ebp -xorl %eax, %ebx -movl %eax, 344(%esp) -movl %ebx, %eax -movl %edi, 356(%esp) -movl %ebp, %edi -shrl $31, %eax -addl %ebp, %ebp -orl %ebp, %eax -addl %ebx, %ebx -movl %eax, 284(%esp) -movl 292(%esp), %eax -shrl $31, %edi -orl %ebx, %edi -movl %edi, 288(%esp) -movzbl 15(%eax), %ebx -movl 236(%esp), %edi -movl 316(%esp), %ebp -addl %esi, %ebp -movl %edx, 328(%esp) -movl 256(%esp), %edx -movl 464(%esp), %ecx -adcl %ecx, %edx -addl (%ebx,%edi), %ebp -movl %ebp, 316(%esp) -adcl 4(%ebx,%edi), %edx -movl 296(%esp), %edi -movl 400(%esp), %ebx -xorl %ebp, %edi -xorl %edx, %ebx -movl %edi, %ebp -movl %edx, 256(%esp) -movl %ebx, %edx -shll $16, %edx -shrl $16, %ebp -shrl $16, %ebx -orl %ebp, %edx -shll $16, %edi -orl %ebx, %edi -movl 340(%esp), %ebx -addl %edx, %ebx -movl %edx, 296(%esp) -movl 336(%esp), %edx -adcl %edi, %edx -xorl %ebx, %esi -xorl %edx, %ecx -movl %edx, 336(%esp) -movl %ecx, %edx -movl %edi, 300(%esp) -movl %esi, %edi -shrl $31, %edx -addl %esi, %esi -shrl $31, %edi -addl %ecx, %ecx -movl %ebx, 340(%esp) -orl %esi, %edx -movzbl 11(%eax), %ebp -orl %ecx, %edi -movl 236(%esp), %ebx -movl %edx, 260(%esp) -movl 364(%esp), %ecx -movl 476(%esp), %edx -addl %edx, %ecx -movl %edi, 264(%esp) -movl 248(%esp), %edi -movl 404(%esp), %esi -adcl %esi, %edi -addl (%ebp,%ebx), %ecx -movl %ecx, 364(%esp) -adcl 4(%ebp,%ebx), %edi -movl 352(%esp), %ebp -movl 460(%esp), %ebx -xorl %ecx, %ebp -xorl %edi, %ebx -movl %ebp, %ecx -movl %edi, 248(%esp) -movl %ebx, %edi -shll $16, %edi -shrl $16, %ecx -shrl $16, %ebx -orl %ecx, %edi -shll $16, %ebp -orl %ebx, %ebp -movl 308(%esp), %ebx -addl %edi, %ebx -movl %edi, 352(%esp) -movl 304(%esp), %edi -adcl %ebp, %edi -xorl %ebx, %edx -xorl %edi, %esi -movl %edi, 304(%esp) -movl %esi, %edi -movl %ebp, 372(%esp) -movl %edx, %ebp -shrl $31, %edi -addl %edx, %edx -shrl $31, %ebp -addl %esi, %esi -movzbl 9(%eax), %ecx -orl %edx, %edi -movl 236(%esp), %edx -orl %esi, %ebp -movl %ebx, 308(%esp) -addl $16, %eax -movl %edi, 276(%esp) -movl 452(%esp), %ebx -movl 472(%esp), %edi -addl %edi, %ebx -movl %ebp, 280(%esp) -movl 456(%esp), %ebp -movl 444(%esp), %esi -adcl %esi, %ebp -addl (%ecx,%edx), %ebx -movl %ebx, 240(%esp) -adcl 4(%ecx,%edx), %ebp -movl 312(%esp), %edx -movl 420(%esp), %ecx -xorl %ebx, %edx -xorl %ebp, %ecx -movl %ebp, 244(%esp) -movl %ecx, %ebx -movl %edx, %ebp -shll $16, %ebx -shrl $16, %ebp -shrl $16, %ecx -orl %ebp, %ebx -shll $16, %edx -orl %ecx, %edx -movl 324(%esp), %ecx -addl %ebx, %ecx -movl %ebx, 312(%esp) -movl 320(%esp), %ebx -adcl %edx, %ebx -xorl %ecx, %edi -xorl %ebx, %esi -movl %edi, %ebp -movl %ecx, 324(%esp) -movl %esi, %ecx -shrl $31, %ecx -addl %edi, %edi -shrl $31, %ebp -addl %esi, %esi -orl %esi, %ebp -orl %edi, %ecx -movl %ebx, 320(%esp) -movl %ebp, 272(%esp) -movl %ecx, 268(%esp) -movl 332(%esp), %ebp -movl 316(%esp), %esi -movl 364(%esp), %ecx -movl 372(%esp), %ebx -cmpl 360(%esp), %eax -jb blake2b_blocks_x86_L26 -blake2b_blocks_x86_L27: -movl 328(%esp), %edi -xorl 240(%esp), %edi -movl %esi, 316(%esp) -movl 512(%esp), %esi -movl 180(%esp), %eax -movl %edx, 368(%esp) -xorl %edi, %eax -movl 344(%esp), %edx -movl %eax, 180(%esp) -movl %eax, (%esi) -movl 340(%esp), %eax -xorl %ecx, %eax -movl 336(%esp), %ecx -xorl 244(%esp), %edx -xorl 248(%esp), %ecx -movl 232(%esp), %edi -xorl %edx, %edi -movl 156(%esp), %edx -xorl %eax, %edx -movl 224(%esp), %eax -movl %edi, 232(%esp) -xorl %ecx, %eax -movl %edi, 4(%esi) -movl %ebp, 332(%esp) -movl %eax, 224(%esp) -movl %eax, 12(%esi) -movl 324(%esp), %edi -movl 320(%esp), %eax -xorl 332(%esp), %edi -xorl 252(%esp), %eax -movl %edx, 156(%esp) -movl %edx, 8(%esi) -movl 184(%esp), %edx -movl 216(%esp), %ecx -xorl %edi, %edx -movl %edx, 184(%esp) -xorl %eax, %ecx -movl %edx, 16(%esi) -movl 308(%esp), %eax -movl 304(%esp), %edx -xorl 316(%esp), %eax -xorl 256(%esp), %edx -movl 176(%esp), %edi -xorl 264(%esp), %ebx -xorl %eax, %edi -movl 208(%esp), %eax -xorl %edx, %eax -movl %eax, 208(%esp) -movl %eax, 28(%esi) -movl 352(%esp), %edx -movl 220(%esp), %eax -movl 356(%esp), %ebp -xorl %ebx, %eax -movl 348(%esp), %ebx -xorl 260(%esp), %edx -xorl 268(%esp), %ebx -xorl 272(%esp), %ebp -movl %ecx, 216(%esp) -movl %ecx, 20(%esi) -movl 188(%esp), %ecx -movl %eax, 220(%esp) -xorl %edx, %ecx -movl %eax, 36(%esi) -movl 160(%esp), %eax -movl 212(%esp), %edx -xorl %ebx, %eax -xorl %ebp, %edx -movl 296(%esp), %ebp -movl %eax, 160(%esp) -movl %eax, 40(%esi) -movl %edi, 176(%esp) -movl %edi, 24(%esi) -movl 300(%esp), %eax -movl 312(%esp), %ebx -movl 368(%esp), %edi -xorl 276(%esp), %ebp -xorl 280(%esp), %eax -xorl 284(%esp), %ebx -xorl 288(%esp), %edi -movl %edx, 212(%esp) -movl %edx, 44(%esi) -movl 168(%esp), %edx -movl %ecx, 188(%esp) -xorl %ebp, %edx -movl %ecx, 32(%esi) -movl %edx, 168(%esp) -movl 204(%esp), %ecx -movl %edx, 48(%esi) -xorl %eax, %ecx -movl 172(%esp), %eax -movl 196(%esp), %edx -xorl %ebx, %eax -xorl %edi, %edx -movl %ecx, 204(%esp) -movl %ecx, 52(%esi) -movl %eax, 172(%esp) -movl %edx, 196(%esp) -movl %eax, 56(%esi) -movl %edx, 60(%esi) -cmpl $128, 140(%esp) -ja blake2b_blocks_x86_L28 -blake2b_blocks_x86_L29: -addl $492, %esp -popl %ebp -popl %ebx -popl %edi -popl %esi -ret -FN_END blake2b_blocks_x86 diff --git a/src/libcryptobox/blake2/x86-64.S b/src/libcryptobox/blake2/x86-64.S deleted file mode 100644 index f0de795fb..000000000 --- a/src/libcryptobox/blake2/x86-64.S +++ /dev/null @@ -1,1754 +0,0 @@ -#include "../macro.S" -#include "constants.S" - -SECTION_TEXT - -GLOBAL_HIDDEN_FN_EXT blake2b_blocks_x86, 4, 8 -pushq %rbx -pushq %rbp -pushq %r12 -pushq %r13 -pushq %r14 -pushq %r15 -movq %rsp, %r9 -subq $320, %rsp -andq $~63, %rsp -cmpq $128, %rdx -movq %rdx, %rax -jb blake2b_blocks_x86_usebytesinc -movq $128, %rax -blake2b_blocks_x86_usebytesinc: -movq %rdx, 136(%rsp) -movq %rcx, 144(%rsp) -movq %rax, 152(%rsp) -movq %rdi, 160(%rsp) -movq %r9, 168(%rsp) -movq 80(%rdi), %rcx -andq %rcx, %rcx -jz blake2b_blocks_x86_not_final_call -cmpq $128, %rdx -je blake2b_blocks_x86_not_final_call -leaq 0(%rsp), %rcx -pxor %xmm0, %xmm0 -movdqa %xmm0, 0(%rcx) -movdqa %xmm0, 16(%rcx) -movdqa %xmm0, 32(%rcx) -movdqa %xmm0, 48(%rcx) -movdqa %xmm0, 64(%rcx) -movdqa %xmm0, 80(%rcx) -movdqa %xmm0, 96(%rcx) -movdqa %xmm0, 112(%rcx) -testq $0x40, %rdx -jz blake2b_blocks_x86_skip64 -movdqu 0(%rsi), %xmm0 -movdqu 16(%rsi), %xmm1 -movdqu 32(%rsi), %xmm2 -movdqu 48(%rsi), %xmm3 -movdqa %xmm0, 0(%rcx) -movdqa %xmm1, 16(%rcx) -movdqa %xmm2, 32(%rcx) -movdqa %xmm3, 48(%rcx) -addq $64, %rsi -addq $64, %rcx -blake2b_blocks_x86_skip64: -testq $0x20, %rdx -jz blake2b_blocks_x86_skip32 -movdqu 0(%rsi), %xmm0 -movdqu 16(%rsi), %xmm1 -movdqa %xmm0, 0(%rcx) -movdqa %xmm1, 16(%rcx) -addq $32, %rsi -addq $32, %rcx -blake2b_blocks_x86_skip32: -testq $0x10, %rdx -jz blake2b_blocks_x86_skip16 -movdqu 0(%rsi), %xmm0 -movdqa %xmm0, 0(%rcx) -addq $16, %rsi -addq $16, %rcx -blake2b_blocks_x86_skip16: -testq $0x8, %rdx -jz blake2b_blocks_x86_skip8 -movq 0(%rsi), %rax -movq %rax, 0(%rcx) -addq $8, %rsi -addq $8, %rcx -blake2b_blocks_x86_skip8: -testq $0x4, %rdx -jz blake2b_blocks_x86_skip4 -movl 0(%rsi), %eax -movl %eax, 0(%rcx) -addq $4, %rsi -addq $4, %rcx -blake2b_blocks_x86_skip4: -testq $0x2, %rdx -jz blake2b_blocks_x86_skip2 -movw 0(%rsi), %ax -movw %ax, 0(%rcx) -addq $2, %rsi -addq $2, %rcx -blake2b_blocks_x86_skip2: -testq $0x1, %rdx -jz blake2b_blocks_x86_skip1 -movb 0(%rsi), %al -movb %al, 0(%rcx) -blake2b_blocks_x86_skip1: -leaq 0(%rsp), %rsi -blake2b_blocks_x86_not_final_call: -movq %rsi, 128(%rsp) -movq 64(%rdi), %r12 -movq 72(%rdi), %r13 -movq 80(%rdi), %r14 -movq 88(%rdi), %r15 -movabsq $0x1f83d9abfb41bd6b, %rax -movabsq $0x5be0cd19137e2179, %rbx -xorq %rax, %r14 -xorq %rbx, %r15 -movq %r12, 256(%rsp) -movq %r13, 264(%rsp) -movq %r14, 272(%rsp) -movq %r15, 280(%rsp) -movq 0(%rdi), %rax -movq 8(%rdi), %rdx -movq 16(%rdi), %r8 -movq 24(%rdi), %r12 -movq 32(%rdi), %rbx -movq 40(%rdi), %rsi -movq 48(%rdi), %r9 -movq 56(%rdi), %r13 -.p2align 6,,63 -blake2b_blocks_x86_mainloop: -movq 128(%rsp), %r10 -cmpq %r10, %rsp -je blake2b_blocks_x86_nocopy -movdqu 0(%r10), %xmm0 -movdqu 16(%r10), %xmm1 -movdqu 32(%r10), %xmm2 -movdqu 48(%r10), %xmm3 -movdqu 64(%r10), %xmm4 -movdqu 80(%r10), %xmm5 -movdqu 96(%r10), %xmm6 -movdqu 112(%r10), %xmm7 -movdqa %xmm0, 0(%rsp) -movdqa %xmm1, 16(%rsp) -movdqa %xmm2, 32(%rsp) -movdqa %xmm3, 48(%rsp) -movdqa %xmm4, 64(%rsp) -movdqa %xmm5, 80(%rsp) -movdqa %xmm6, 96(%rsp) -movdqa %xmm7, 112(%rsp) -blake2b_blocks_x86_nocopy: -movq 152(%rsp), %r10 -movq 256(%rsp), %rcx -movq 264(%rsp), %rbp -movabsq $0x510e527fade682d1, %r11 -movabsq $0x9b05688c2b3e6c1f, %r15 -addq %r10, %rcx -cmpq %r10, %rcx -jae blake2b_blocks_x86_nocountercarry -addq $1, %rbp -blake2b_blocks_x86_nocountercarry: -movq %rcx, 256(%rsp) -movq %rbp, 264(%rsp) -xorq %r11, %rcx -xorq %r15, %rbp -movabsq $0x6a09e667f3bcc908, %r11 -movabsq $0xbb67ae8584caa73b, %rdi -movabsq $0x3c6ef372fe94f82b, %r10 -movabsq $0xa54ff53a5f1d36f1, %r14 -movq %r11, 296(%rsp) -movq 272(%rsp), %r11 -movq 280(%rsp), %r15 -movq %rax, 192(%rsp) -movq %rdx, 200(%rsp) -movq %r8, 208(%rsp) -movq %r12, 216(%rsp) -movq %rbx, 224(%rsp) -movq %rsi, 232(%rsp) -movq %r9, 240(%rsp) -movq %r13, 248(%rsp) -addq 0(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 16(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 32(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 48(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 8(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 24(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 40(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 56(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 64(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 80(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 96(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 112(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 72(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 88(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 104(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 120(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 112(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 32(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 72(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 104(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 80(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 64(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 120(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 48(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 8(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 0(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 88(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 40(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 96(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 16(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 56(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 24(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 88(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 96(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 40(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 120(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 64(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 0(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 16(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 104(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 80(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 24(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 56(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 72(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 112(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 48(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 8(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 32(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 56(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 24(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 104(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 88(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 72(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 8(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 96(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 112(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 16(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 40(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 32(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 120(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 48(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 80(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 0(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 64(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 72(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 40(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 16(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 80(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 0(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 56(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 32(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 120(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 112(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 88(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 48(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 24(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 8(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 96(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 64(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 104(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 16(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 48(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 0(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 64(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 96(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 80(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 88(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 24(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 32(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 56(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 120(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 8(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 104(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 40(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 112(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 72(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 96(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 8(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 112(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 32(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 40(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 120(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 104(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 80(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 0(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 48(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 72(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 64(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 56(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 24(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 16(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 88(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 104(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 56(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 96(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 24(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 88(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 112(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 8(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 72(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 40(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 120(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 64(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 16(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 0(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 32(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 48(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 80(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 48(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 112(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 88(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 0(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 120(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 72(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 24(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 64(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 96(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 104(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 8(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 80(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 16(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 56(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 32(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 40(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 80(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 64(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 56(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 8(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 16(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 32(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 48(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 40(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 120(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 72(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 24(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 104(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 88(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 112(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 96(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 0(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 0(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 16(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 32(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 48(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 8(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 24(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 40(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 56(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 64(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 80(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 96(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 112(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 72(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 88(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 104(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 120(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -addq 112(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $32, %rcx -addq 32(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $32, %rbp -addq 72(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $32, %r11 -addq 104(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $32, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $40, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $40, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $40, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $40, %r13 -addq 80(%rsp), %rax -addq %rbx, %rax -xorq %rax, %rcx -rolq $48, %rcx -addq 64(%rsp), %rdx -addq %rsi, %rdx -xorq %rdx, %rbp -rolq $48, %rbp -addq 120(%rsp), %r8 -addq %r9, %r8 -movq %rax, 288(%rsp) -xorq %r8, %r11 -rolq $48, %r11 -addq 48(%rsp), %r12 -addq %r13, %r12 -xorq %r12, %r15 -rolq $48, %r15 -movq 296(%rsp), %rax -addq %rcx, %rax -xorq %rax, %rbx -movq %rax, 296(%rsp) -rolq $1, %rbx -addq %rbp, %rdi -xorq %rdi, %rsi -rolq $1, %rsi -addq %r11, %r10 -xorq %r10, %r9 -rolq $1, %r9 -movq 288(%rsp), %rax -addq %r15, %r14 -xorq %r14, %r13 -rolq $1, %r13 -addq 8(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $32, %r15 -addq 0(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $32, %rcx -addq 88(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $32, %rbp -addq 40(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $32, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $40, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $40, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $40, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $40, %rbx -addq 96(%rsp), %rax -addq %rsi, %rax -xorq %rax, %r15 -rolq $48, %r15 -addq 16(%rsp), %rdx -addq %r9, %rdx -xorq %rdx, %rcx -rolq $48, %rcx -addq 56(%rsp), %r8 -addq %r13, %r8 -movq %rax, 288(%rsp) -xorq %r8, %rbp -rolq $48, %rbp -addq 24(%rsp), %r12 -addq %rbx, %r12 -xorq %r12, %r11 -rolq $48, %r11 -addq %r15, %r10 -xorq %r10, %rsi -rolq $1, %rsi -addq %rcx, %r14 -xorq %r14, %r9 -rolq $1, %r9 -movq 296(%rsp), %rax -addq %rbp, %rax -xorq %rax, %r13 -movq %rax, 296(%rsp) -rolq $1, %r13 -movq 288(%rsp), %rax -addq %r11, %rdi -xorq %rdi, %rbx -rolq $1, %rbx -xorq 296(%rsp), %rax -xorq %rdi, %rdx -xorq %r10, %r8 -xorq %r14, %r12 -xorq %rcx, %rbx -xorq %rbp, %rsi -xorq %r11, %r9 -xorq %r15, %r13 -xorq 192(%rsp), %rax -xorq 200(%rsp), %rdx -xorq 208(%rsp), %r8 -xorq 216(%rsp), %r12 -xorq 224(%rsp), %rbx -xorq 232(%rsp), %rsi -xorq 240(%rsp), %r9 -xorq 248(%rsp), %r13 -movq 128(%rsp), %rcx -movq 136(%rsp), %rbp -movq 144(%rsp), %r11 -cmpq $128, %rbp -jbe blake2b_blocks_x86_done -addq %r11, %rcx -subq $128, %rbp -movq %rcx, 128(%rsp) -movq %rbp, 136(%rsp) -jmp blake2b_blocks_x86_mainloop -blake2b_blocks_x86_done: -movq 160(%rsp), %rcx -movq 256(%rsp), %rbp -movq 264(%rsp), %r11 -movq %rax, 0(%rcx) -movq %rdx, 8(%rcx) -movq %r8, 16(%rcx) -movq %r12, 24(%rcx) -movq %rbx, 32(%rcx) -movq %rsi, 40(%rcx) -movq %r9, 48(%rcx) -movq %r13, 56(%rcx) -movq %rbp, 64(%rcx) -movq %r11, 72(%rcx) -movq 168(%rsp), %rsp -popq %r15 -popq %r14 -popq %r13 -popq %r12 -popq %rbp -popq %rbx -ret -FN_END blake2b_blocks_x86 |