summaryrefslogtreecommitdiffstats
path: root/src/libcryptobox/poly1305
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2019-07-08 17:41:31 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2019-07-08 17:41:31 +0100
commit9026da71bb262886a275a3e24b1db51ab3395240 (patch)
tree6050f1a73472b3251b9fd18db7e8c96a7cca276c /src/libcryptobox/poly1305
parentc271eb36656a4ff88a9c8c1d59934949260275a3 (diff)
downloadrspamd-9026da71bb262886a275a3e24b1db51ab3395240.tar.gz
rspamd-9026da71bb262886a275a3e24b1db51ab3395240.zip
[Rework] Use libsodium instead of hand crafted crypto implementations
Diffstat (limited to 'src/libcryptobox/poly1305')
-rw-r--r--src/libcryptobox/poly1305/avx.S877
-rw-r--r--src/libcryptobox/poly1305/avx2.S1095
-rw-r--r--src/libcryptobox/poly1305/constants.S21
-rw-r--r--src/libcryptobox/poly1305/poly1305.c224
-rw-r--r--src/libcryptobox/poly1305/poly1305.h38
-rw-r--r--src/libcryptobox/poly1305/poly1305_internal.h19
-rw-r--r--src/libcryptobox/poly1305/ref-32.c238
-rw-r--r--src/libcryptobox/poly1305/ref-64.c213
-rw-r--r--src/libcryptobox/poly1305/sse2.S969
9 files changed, 0 insertions, 3694 deletions
diff --git a/src/libcryptobox/poly1305/avx.S b/src/libcryptobox/poly1305/avx.S
deleted file mode 100644
index bf7390888..000000000
--- a/src/libcryptobox/poly1305/avx.S
+++ /dev/null
@@ -1,877 +0,0 @@
-#include "../macro.S"
-#include "constants.S"
-
-SECTION_TEXT
-
-GLOBAL_HIDDEN_FN_EXT poly1305_block_size_avx,0,0
-movl $32, %eax
-ret
-FN_END poly1305_block_size_avx
-
-GLOBAL_HIDDEN_FN_EXT poly1305_init_ext_avx,4,1
-poly1305_init_ext_avx_local:
-pushq %r15
-pushq %r14
-pushq %r13
-pushq %r12
-pushq %rbp
-pushq %rbx
-movq %rdi, %rbp
-testq %rdx, %rdx
-movq $-1, %rax
-cmovne %rdx, %rax
-movq %rax, -16(%rsp)
-vpxor %xmm0, %xmm0, %xmm0
-vmovdqu %xmm0, (%rdi)
-vmovdqu %xmm0, 16(%rdi)
-vmovdqu %xmm0, 32(%rdi)
-movq (%rsi), %r9
-movq 8(%rsi), %r8
-movabsq $17575274610687, %r10
-andq %r9, %r10
-shrq $44, %r9
-movq %r8, %rax
-salq $20, %rax
-orq %rax, %r9
-movabsq $17592181915647, %rax
-andq %rax, %r9
-shrq $24, %r8
-movabsq $68719475727, %rax
-andq %rax, %r8
-leaq 40(%rdi), %r15
-movl %r10d, %eax
-andl $67108863, %eax
-movl %eax, 40(%rdi)
-movl %r9d, %edx
-sall $18, %edx
-movq %r10, %rax
-shrq $26, %rax
-orl %edx, %eax
-andl $67108863, %eax
-movl %eax, 44(%rdi)
-movq %r9, %rax
-shrq $8, %rax
-andl $67108863, %eax
-movl %eax, 48(%rdi)
-movq %r9, %rax
-shrq $34, %rax
-movl %r8d, %edx
-sall $10, %edx
-orl %edx, %eax
-andl $67108863, %eax
-movl %eax, 52(%rdi)
-movq %r8, %rax
-shrq $16, %rax
-movl %eax, 56(%rdi)
-movq 16(%rsi), %rax
-movq %rax, 104(%rdi)
-movq 24(%rsi), %rax
-movq %rax, 112(%rdi)
-movl $0, %ebx
-.L7:
-testq %rbx, %rbx
-jne .L4
-leaq 60(%rbp), %r15
-cmpq $16, -16(%rsp)
-ja .L6
-jmp .L5
-.L4:
-cmpq $1, %rbx
-jne .L6
-leaq 80(%rbp), %r15
-cmpq $95, -16(%rsp)
-jbe .L5
-.L6:
-leaq (%r8,%r8,4), %rsi
-salq $2, %rsi
-leaq (%r9,%r9), %rdi
-movq %rdi, %rax
-mulq %rsi
-movq %rax, %r13
-movq %rdx, %r14
-movq %r10, %rax
-mulq %r10
-addq %r13, %rax
-adcq %r14, %rdx
-movabsq $17592186044415, %rcx
-movq %rax, -72(%rsp)
-movq %rdx, -64(%rsp)
-andq -72(%rsp), %rcx
-leaq (%r10,%r10), %r11
-movq %r11, %rax
-mulq %r9
-movq %rax, %r11
-movq %rdx, %r12
-movq %rsi, %rax
-mulq %r8
-movq %rax, %r13
-movq %rdx, %r14
-addq %r11, %r13
-adcq %r12, %r14
-movq -72(%rsp), %rax
-movq -64(%rsp), %rdx
-shrdq $44, %rdx, %rax
-movq %rax, -56(%rsp)
-movq $0, -48(%rsp)
-addq -56(%rsp), %r13
-adcq -48(%rsp), %r14
-movabsq $17592186044415, %rsi
-andq %r13, %rsi
-leaq (%r8,%r8), %rdi
-movq %rdi, %rax
-mulq %r10
-movq %rax, %r11
-movq %rdx, %r12
-movq %r9, %rax
-mulq %r9
-addq %r11, %rax
-adcq %r12, %rdx
-shrdq $44, %r14, %r13
-movq %r13, -40(%rsp)
-movq $0, -32(%rsp)
-addq -40(%rsp), %rax
-adcq -32(%rsp), %rdx
-movabsq $4398046511103, %rdi
-andq %rax, %rdi
-shrdq $42, %rdx, %rax
-leaq (%rax,%rax,4), %r8
-addq %rcx, %r8
-movabsq $17592186044415, %r10
-andq %r8, %r10
-shrq $44, %r8
-addq %rsi, %r8
-movabsq $17592186044415, %r9
-andq %r8, %r9
-shrq $44, %r8
-addq %rdi, %r8
-movl %r10d, %eax
-andl $67108863, %eax
-movl %eax, (%r15)
-movl %r9d, %edx
-sall $18, %edx
-movq %r10, %rax
-shrq $26, %rax
-orl %edx, %eax
-andl $67108863, %eax
-movl %eax, 4(%r15)
-movq %r9, %rax
-shrq $8, %rax
-andl $67108863, %eax
-movl %eax, 8(%r15)
-movl %r8d, %edx
-sall $10, %edx
-movq %r9, %rax
-shrq $34, %rax
-orl %edx, %eax
-andl $67108863, %eax
-movl %eax, 12(%r15)
-movq %r8, %rax
-shrq $16, %rax
-movl %eax, 16(%r15)
-addq $1, %rbx
-cmpq $2, %rbx
-jne .L7
-.L5:
-movq $0, 120(%rbp)
-popq %rbx
-popq %rbp
-popq %r12
-popq %r13
-popq %r14
-popq %r15
-ret
-FN_END poly1305_init_ext_avx
-
-
-
-GLOBAL_HIDDEN_FN poly1305_blocks_avx
-poly1305_blocks_avx_local:
-pushq %rbp
-movq %rsp, %rbp
-pushq %rbx
-andq $-64, %rsp
-subq $200, %rsp
-movl $(1 << 24), %eax
-movl $((1 << 26) - 1), %r8d
-movl $(5), %r9d
-vmovd %eax, %xmm1
-vmovd %r8d, %xmm0
-vmovd %r9d, %xmm2
-vpshufd $68, %xmm1, %xmm1
-vpshufd $68, %xmm0, %xmm0
-vpshufd $68, %xmm2, %xmm2
-vmovdqa %xmm1, 152(%rsp)
-vmovdqa %xmm2, 184(%rsp)
-movq 120(%rdi), %rax
-testb $4, %al
-je .L12
-vpsrldq $8, %xmm1, %xmm1
-vmovdqa %xmm1, 152(%rsp)
-.L12:
-testb $8, %al
-je .L13
-vpxor %xmm1, %xmm1, %xmm1
-vmovdqa %xmm1, 152(%rsp)
-.L13:
-testb $1, %al
-jne .L14
-vmovq (%rsi), %xmm1
-vpinsrq $1, 16(%rsi), %xmm1, %xmm1
-vmovq 8(%rsi), %xmm3
-vpinsrq $1, 24(%rsi), %xmm3, %xmm2
-vpand %xmm0, %xmm1, %xmm7
-vpsrlq $26, %xmm1, %xmm12
-vpand %xmm0, %xmm12, %xmm12
-vpsllq $12, %xmm2, %xmm3
-vpsrlq $52, %xmm1, %xmm1
-vpor %xmm3, %xmm1, %xmm6
-vpand %xmm0, %xmm6, %xmm3
-vpsrlq $26, %xmm6, %xmm6
-vpand %xmm0, %xmm6, %xmm6
-vpsrlq $40, %xmm2, %xmm2
-vpor 152(%rsp), %xmm2, %xmm2
-addq $32, %rsi
-subq $32, %rdx
-orq $1, %rax
-movq %rax, 120(%rdi)
-jmp .L15
-.L14:
-vmovdqu (%rdi), %xmm12
-vmovdqu 16(%rdi), %xmm6
-vmovdqu 32(%rdi), %xmm2
-vpshufd $80, %xmm12, %xmm7
-vpshufd $250, %xmm12, %xmm12
-vpshufd $80, %xmm6, %xmm3
-vpshufd $250, %xmm6, %xmm6
-vpshufd $80, %xmm2, %xmm2
-.L15:
-movq 120(%rdi), %rax
-testb $48, %al
-je .L16
-testb $16, %al
-je .L17
-vmovdqu 40(%rdi), %xmm1
-vmovd 56(%rdi), %xmm4
-vmovdqu 60(%rdi), %xmm5
-vpunpckldq %xmm1, %xmm5, %xmm11
-vpunpckhdq %xmm1, %xmm5, %xmm5
-vmovd 76(%rdi), %xmm1
-vpunpcklqdq %xmm4, %xmm1, %xmm4
-jmp .L18
-.L17:
-movl $(1), %r8d
-vmovdqu 40(%rdi), %xmm5
-vmovd 56(%rdi), %xmm4
-vmovd %r8d, %xmm1
-vpunpckldq %xmm1, %xmm5, %xmm11
-vpunpckhdq %xmm1, %xmm5, %xmm5
-.L18:
-vpshufd $80, %xmm11, %xmm1
-vpshufd $250, %xmm11, %xmm11
-vpshufd $80, %xmm5, %xmm10
-vpshufd $250, %xmm5, %xmm5
-jmp .L19
-.L16:
-vmovdqu 60(%rdi), %xmm5
-vpshufd $0, %xmm5, %xmm1
-vpshufd $85, %xmm5, %xmm11
-vpshufd $170, %xmm5, %xmm10
-vpshufd $255, %xmm5, %xmm5
-vmovd 76(%rdi), %xmm4
-vpshufd $0, %xmm4, %xmm4
-.L19:
-vmovdqa %xmm11, 136(%rsp)
-vpmuludq 184(%rsp), %xmm11, %xmm13
-vmovdqa %xmm13, 120(%rsp)
-vmovdqa %xmm10, 104(%rsp)
-vpmuludq 184(%rsp), %xmm10, %xmm13
-vmovdqa %xmm13, 88(%rsp)
-vmovdqa %xmm5, 72(%rsp)
-vpmuludq 184(%rsp), %xmm5, %xmm5
-vmovdqa %xmm5, 56(%rsp)
-vmovdqa %xmm4, 40(%rsp)
-vpmuludq 184(%rsp), %xmm4, %xmm4
-vmovdqa %xmm4, 24(%rsp)
-cmpq $63, %rdx
-jbe .L20
-vmovdqu 80(%rdi), %xmm4
-vpshufd $0, %xmm4, %xmm5
-vmovdqa %xmm5, 8(%rsp)
-vpshufd $85, %xmm4, %xmm5
-vmovdqa %xmm5, -8(%rsp)
-vpshufd $170, %xmm4, %xmm13
-vmovdqa %xmm13, -24(%rsp)
-vpshufd $255, %xmm4, %xmm4
-vmovdqa %xmm4, %xmm10
-vmovdqa %xmm4, -40(%rsp)
-vmovd 96(%rdi), %xmm4
-vpshufd $0, %xmm4, %xmm4
-vmovdqa %xmm4, %xmm8
-vmovdqa %xmm4, -56(%rsp)
-vpmuludq 184(%rsp), %xmm5, %xmm4
-vmovdqa %xmm4, -72(%rsp)
-vpmuludq 184(%rsp), %xmm13, %xmm4
-vmovdqa %xmm4, -88(%rsp)
-vpmuludq 184(%rsp), %xmm10, %xmm4
-vmovdqa %xmm4, -104(%rsp)
-vpmuludq 184(%rsp), %xmm8, %xmm4
-vmovdqa %xmm4, -120(%rsp)
-leaq 32(%rsi), %rax
-movq %rdx, %rcx
-vmovdqa %xmm1, 168(%rsp)
-jmp .L22
-.p2align 6
-nop
-nop
-nop
-nop
-.L22:
-vpmuludq -72(%rsp), %xmm2, %xmm13
-vmovdqa -88(%rsp), %xmm5
-vpmuludq %xmm5, %xmm6, %xmm4
-vpmuludq %xmm5, %xmm2, %xmm11
-vmovdqa -104(%rsp), %xmm9
-vpmuludq %xmm9, %xmm6, %xmm5
-vpmuludq %xmm9, %xmm2, %xmm10
-vpaddq %xmm4, %xmm13, %xmm13
-vpmuludq %xmm9, %xmm3, %xmm4
-vmovdqa -120(%rsp), %xmm8
-vpmuludq %xmm8, %xmm2, %xmm9
-vpaddq %xmm5, %xmm11, %xmm11
-vmovdqa %xmm8, %xmm5
-vpmuludq %xmm8, %xmm12, %xmm8
-vpmuludq %xmm5, %xmm3, %xmm14
-vpaddq %xmm4, %xmm13, %xmm13
-vpmuludq %xmm5, %xmm6, %xmm4
-vmovdqa 8(%rsp), %xmm15
-vpmuludq %xmm15, %xmm6, %xmm5
-vpaddq %xmm8, %xmm13, %xmm13
-vpmuludq %xmm15, %xmm2, %xmm8
-vpaddq %xmm14, %xmm11, %xmm11
-vpmuludq %xmm15, %xmm7, %xmm14
-vpaddq %xmm4, %xmm10, %xmm10
-vpmuludq %xmm15, %xmm12, %xmm4
-vpaddq %xmm5, %xmm9, %xmm9
-vpmuludq %xmm15, %xmm3, %xmm5
-vmovdqa -8(%rsp), %xmm15
-vpmuludq %xmm15, %xmm3, %xmm2
-vpaddq %xmm14, %xmm13, %xmm13
-vpmuludq %xmm15, %xmm6, %xmm6
-vpaddq %xmm4, %xmm11, %xmm11
-vpmuludq %xmm15, %xmm7, %xmm4
-vpaddq %xmm5, %xmm10, %xmm10
-vmovq -32(%rax), %xmm5
-vpinsrq $1, -16(%rax), %xmm5, %xmm5
-vpmuludq %xmm15, %xmm12, %xmm14
-vpaddq %xmm2, %xmm9, %xmm9
-vmovdqa -24(%rsp), %xmm2
-vpmuludq %xmm2, %xmm12, %xmm15
-vpaddq %xmm6, %xmm8, %xmm8
-vpmuludq %xmm2, %xmm3, %xmm3
-vpaddq %xmm4, %xmm11, %xmm11
-vmovq -24(%rax), %xmm4
-vpinsrq $1, -8(%rax), %xmm4, %xmm6
-vpmuludq %xmm2, %xmm7, %xmm4
-vpaddq %xmm14, %xmm10, %xmm10
-vmovdqa -40(%rsp), %xmm1
-vpmuludq %xmm1, %xmm7, %xmm14
-vpaddq %xmm15, %xmm9, %xmm9
-vpand %xmm5, %xmm0, %xmm2
-vpmuludq %xmm1, %xmm12, %xmm12
-vpaddq %xmm3, %xmm8, %xmm8
-vpsrlq $26, %xmm5, %xmm3
-vpand %xmm3, %xmm0, %xmm3
-vpmuludq -56(%rsp), %xmm7, %xmm7
-vpaddq %xmm4, %xmm10, %xmm10
-vpsllq $12, %xmm6, %xmm15
-vpsrlq $52, %xmm5, %xmm4
-vpor %xmm15, %xmm4, %xmm4
-vpaddq %xmm14, %xmm9, %xmm9
-vpsrlq $14, %xmm6, %xmm5
-vpand %xmm5, %xmm0, %xmm5
-vpaddq %xmm12, %xmm8, %xmm8
-vpand %xmm4, %xmm0, %xmm4
-vpaddq %xmm7, %xmm8, %xmm8
-vpsrlq $40, %xmm6, %xmm6
-vpor 152(%rsp), %xmm6, %xmm6
-vmovdqu (%rax), %xmm12
-vmovdqu 16(%rax), %xmm7
-vpunpckldq %xmm7, %xmm12, %xmm15
-vpunpckhdq %xmm7, %xmm12, %xmm7
-vpxor %xmm14, %xmm14, %xmm14
-vpunpckldq %xmm14, %xmm15, %xmm12
-vpunpckhdq %xmm14, %xmm15, %xmm15
-vpunpckldq %xmm14, %xmm7, %xmm14
-vpxor %xmm1, %xmm1, %xmm1
-vpunpckhdq %xmm1, %xmm7, %xmm7
-vpsllq $6, %xmm15, %xmm15
-vpsllq $12, %xmm14, %xmm14
-vpsllq $18, %xmm7, %xmm7
-vpaddq %xmm12, %xmm13, %xmm12
-vpaddq %xmm15, %xmm11, %xmm15
-vpaddq %xmm14, %xmm10, %xmm14
-vpaddq %xmm7, %xmm9, %xmm7
-vpaddq 152(%rsp), %xmm8, %xmm8
-vpmuludq 120(%rsp), %xmm6, %xmm13
-vmovdqa 88(%rsp), %xmm10
-vpmuludq %xmm10, %xmm5, %xmm9
-vpmuludq %xmm10, %xmm6, %xmm11
-vmovdqa 56(%rsp), %xmm1
-vpmuludq %xmm1, %xmm5, %xmm10
-vpaddq %xmm13, %xmm12, %xmm12
-vpmuludq %xmm1, %xmm6, %xmm13
-vpaddq %xmm9, %xmm12, %xmm12
-vpmuludq %xmm1, %xmm4, %xmm9
-vpaddq %xmm11, %xmm15, %xmm15
-vmovdqa 24(%rsp), %xmm1
-vpmuludq %xmm1, %xmm6, %xmm11
-vpaddq %xmm10, %xmm15, %xmm10
-vpmuludq %xmm1, %xmm3, %xmm15
-vpaddq %xmm13, %xmm14, %xmm14
-vpmuludq %xmm1, %xmm4, %xmm13
-vpaddq %xmm9, %xmm12, %xmm9
-vpmuludq %xmm1, %xmm5, %xmm12
-vpaddq %xmm11, %xmm7, %xmm7
-vpmuludq 168(%rsp), %xmm5, %xmm11
-vpaddq %xmm15, %xmm9, %xmm9
-vpmuludq 168(%rsp), %xmm6, %xmm6
-vpaddq %xmm13, %xmm10, %xmm10
-vpmuludq 168(%rsp), %xmm2, %xmm15
-vpaddq %xmm12, %xmm14, %xmm14
-vpmuludq 168(%rsp), %xmm3, %xmm13
-vpaddq %xmm11, %xmm7, %xmm11
-vpmuludq 168(%rsp), %xmm4, %xmm12
-vpaddq %xmm6, %xmm8, %xmm6
-vmovdqa 136(%rsp), %xmm8
-vpmuludq %xmm8, %xmm4, %xmm7
-vpaddq %xmm15, %xmm9, %xmm9
-vpmuludq %xmm8, %xmm5, %xmm5
-vpaddq %xmm13, %xmm10, %xmm10
-vpmuludq %xmm8, %xmm2, %xmm15
-vpaddq %xmm12, %xmm14, %xmm14
-vpmuludq %xmm8, %xmm3, %xmm8
-vpaddq %xmm7, %xmm11, %xmm11
-vmovdqa 104(%rsp), %xmm7
-vpmuludq %xmm7, %xmm3, %xmm13
-vpaddq %xmm5, %xmm6, %xmm6
-vpmuludq %xmm7, %xmm4, %xmm4
-vpaddq %xmm15, %xmm10, %xmm10
-vpmuludq %xmm7, %xmm2, %xmm15
-vpaddq %xmm8, %xmm14, %xmm14
-vmovdqa 72(%rsp), %xmm5
-vpmuludq %xmm5, %xmm2, %xmm7
-vpaddq %xmm13, %xmm11, %xmm11
-vpmuludq %xmm5, %xmm3, %xmm3
-vpaddq %xmm4, %xmm6, %xmm6
-vpmuludq 40(%rsp), %xmm2, %xmm2
-vpaddq %xmm15, %xmm14, %xmm14
-vpaddq %xmm7, %xmm11, %xmm11
-vpaddq %xmm3, %xmm6, %xmm6
-vpaddq %xmm2, %xmm6, %xmm2
-vpsrlq $26, %xmm9, %xmm12
-vpsrlq $26, %xmm11, %xmm5
-vpand %xmm0, %xmm9, %xmm9
-vpand %xmm0, %xmm11, %xmm11
-vpaddq %xmm12, %xmm10, %xmm10
-vpaddq %xmm5, %xmm2, %xmm2
-vpsrlq $26, %xmm10, %xmm3
-vpsrlq $26, %xmm2, %xmm7
-vpand %xmm0, %xmm10, %xmm10
-vpand %xmm0, %xmm2, %xmm2
-vpaddq %xmm3, %xmm14, %xmm3
-vpmuludq 184(%rsp), %xmm7, %xmm7
-vpaddq %xmm7, %xmm9, %xmm9
-vpsrlq $26, %xmm3, %xmm6
-vpsrlq $26, %xmm9, %xmm12
-vpand %xmm0, %xmm3, %xmm3
-vpand %xmm0, %xmm9, %xmm7
-vpaddq %xmm6, %xmm11, %xmm6
-vpaddq %xmm12, %xmm10, %xmm12
-vpsrlq $26, %xmm6, %xmm8
-vpand %xmm0, %xmm6, %xmm6
-vpaddq %xmm8, %xmm2, %xmm2
-subq $64, %rcx
-addq $64, %rax
-cmpq $63, %rcx
-ja .L22
-vmovdqa 168(%rsp), %xmm1
-leaq -64(%rdx), %rax
-andq $-64, %rax
-leaq 64(%rsi,%rax), %rsi
-andl $63, %edx
-.L20:
-cmpq $31, %rdx
-jbe .L23
-vpmuludq 120(%rsp), %xmm2, %xmm11
-vmovdqa 88(%rsp), %xmm4
-vpmuludq %xmm4, %xmm6, %xmm0
-vpmuludq %xmm4, %xmm2, %xmm10
-vmovdqa 56(%rsp), %xmm4
-vpmuludq %xmm4, %xmm6, %xmm8
-vpmuludq %xmm4, %xmm2, %xmm5
-vpaddq %xmm0, %xmm11, %xmm11
-vpmuludq %xmm4, %xmm3, %xmm0
-vmovdqa 24(%rsp), %xmm13
-vpmuludq %xmm13, %xmm2, %xmm4
-vpaddq %xmm8, %xmm10, %xmm10
-vpmuludq %xmm13, %xmm12, %xmm8
-vpmuludq %xmm13, %xmm3, %xmm9
-vpaddq %xmm0, %xmm11, %xmm11
-vpmuludq %xmm13, %xmm6, %xmm13
-vpmuludq %xmm1, %xmm6, %xmm0
-vpaddq %xmm8, %xmm11, %xmm8
-vpmuludq %xmm1, %xmm2, %xmm2
-vpaddq %xmm9, %xmm10, %xmm9
-vpmuludq %xmm1, %xmm7, %xmm11
-vpaddq %xmm13, %xmm5, %xmm5
-vpmuludq %xmm1, %xmm12, %xmm10
-vpaddq %xmm0, %xmm4, %xmm0
-vpmuludq %xmm1, %xmm3, %xmm1
-vmovdqa 136(%rsp), %xmm4
-vpmuludq %xmm4, %xmm3, %xmm14
-vpaddq %xmm11, %xmm8, %xmm11
-vpmuludq %xmm4, %xmm6, %xmm6
-vpaddq %xmm10, %xmm9, %xmm9
-vpmuludq %xmm4, %xmm7, %xmm15
-vpaddq %xmm1, %xmm5, %xmm5
-vpmuludq %xmm4, %xmm12, %xmm1
-vpaddq %xmm14, %xmm0, %xmm0
-vmovdqa 104(%rsp), %xmm4
-vpmuludq %xmm4, %xmm12, %xmm8
-vpaddq %xmm6, %xmm2, %xmm2
-vpmuludq %xmm4, %xmm3, %xmm3
-vpaddq %xmm15, %xmm9, %xmm9
-vpmuludq %xmm4, %xmm7, %xmm10
-vpaddq %xmm1, %xmm5, %xmm1
-vmovdqa 72(%rsp), %xmm4
-vpmuludq %xmm4, %xmm7, %xmm15
-vpaddq %xmm8, %xmm0, %xmm0
-vpmuludq %xmm4, %xmm12, %xmm12
-vpaddq %xmm3, %xmm2, %xmm2
-vpmuludq 40(%rsp), %xmm7, %xmm7
-vpaddq %xmm10, %xmm1, %xmm1
-vpaddq %xmm15, %xmm0, %xmm0
-vpaddq %xmm12, %xmm2, %xmm2
-vpaddq %xmm7, %xmm2, %xmm2
-movl $((1 << 26) - 1), %r8d
-testq %rsi, %rsi
-vmovd %r8d, %xmm15
-je .L24
-vmovdqu (%rsi), %xmm4
-vmovdqu 16(%rsi), %xmm3
-vpunpckldq %xmm3, %xmm4, %xmm5
-vpunpckhdq %xmm3, %xmm4, %xmm3
-vpxor %xmm4, %xmm4, %xmm4
-vpunpckldq %xmm4, %xmm5, %xmm7
-vpunpckhdq %xmm4, %xmm5, %xmm5
-vpunpckldq %xmm4, %xmm3, %xmm6
-vpunpckhdq %xmm4, %xmm3, %xmm3
-vpsllq $6, %xmm5, %xmm5
-vpsllq $12, %xmm6, %xmm6
-vpsllq $18, %xmm3, %xmm3
-vpaddq %xmm7, %xmm11, %xmm11
-vpaddq %xmm5, %xmm9, %xmm9
-vpaddq %xmm6, %xmm1, %xmm1
-vpaddq %xmm3, %xmm0, %xmm0
-vpaddq 152(%rsp), %xmm2, %xmm2
-.L24:
-vpshufd $68, %xmm15, %xmm15
-vpsrlq $26, %xmm11, %xmm12
-vpsrlq $26, %xmm0, %xmm3
-vpand %xmm15, %xmm11, %xmm11
-vpand %xmm15, %xmm0, %xmm6
-vpaddq %xmm12, %xmm9, %xmm9
-vpaddq %xmm3, %xmm2, %xmm2
-vpsrlq $26, %xmm9, %xmm3
-vpsrlq $26, %xmm2, %xmm7
-vpand %xmm15, %xmm9, %xmm9
-vpand %xmm15, %xmm2, %xmm2
-vpaddq %xmm3, %xmm1, %xmm3
-vpmuludq 184(%rsp), %xmm7, %xmm7
-vpaddq %xmm7, %xmm11, %xmm7
-vpsrlq $26, %xmm3, %xmm4
-vpsrlq $26, %xmm7, %xmm1
-vpand %xmm15, %xmm3, %xmm3
-vpand %xmm15, %xmm7, %xmm7
-vpaddq %xmm4, %xmm6, %xmm6
-vpaddq %xmm1, %xmm9, %xmm12
-vpsrlq $26, %xmm6, %xmm0
-vpand %xmm15, %xmm6, %xmm6
-vpaddq %xmm0, %xmm2, %xmm2
-.L23:
-testq %rsi, %rsi
-je .L25
-vpshufd $8, %xmm7, %xmm7
-vpshufd $8, %xmm12, %xmm12
-vpshufd $8, %xmm3, %xmm3
-vpshufd $8, %xmm6, %xmm6
-vpshufd $8, %xmm2, %xmm2
-vpunpcklqdq %xmm12, %xmm7, %xmm7
-vpunpcklqdq %xmm6, %xmm3, %xmm3
-vmovdqu %xmm7, (%rdi)
-vmovdqu %xmm3, 16(%rdi)
-vmovq %xmm2, 32(%rdi)
-jmp .L11
-.L25:
-vpsrldq $8, %xmm7, %xmm0
-vpaddq %xmm0, %xmm7, %xmm7
-vpsrldq $8, %xmm12, %xmm0
-vpaddq %xmm0, %xmm12, %xmm12
-vpsrldq $8, %xmm3, %xmm0
-vpaddq %xmm0, %xmm3, %xmm3
-vpsrldq $8, %xmm6, %xmm0
-vpaddq %xmm0, %xmm6, %xmm6
-vpsrldq $8, %xmm2, %xmm0
-vpaddq %xmm0, %xmm2, %xmm2
-vmovd %xmm7, %eax
-vmovd %xmm12, %edx
-movl %eax, %r9d
-shrl $26, %r9d
-addl %edx, %r9d
-movl %r9d, %r8d
-andl $67108863, %r8d
-vmovd %xmm3, %edx
-shrl $26, %r9d
-addl %edx, %r9d
-vmovd %xmm6, %edx
-movl %r9d, %ecx
-shrl $26, %ecx
-addl %edx, %ecx
-movl %ecx, %esi
-andl $67108863, %esi
-vmovd %xmm2, %r10d
-movl %r8d, %r11d
-salq $26, %r11
-andl $67108863, %eax
-orq %rax, %r11
-movabsq $17592186044415, %rax
-andq %rax, %r11
-andl $67108863, %r9d
-salq $8, %r9
-shrl $18, %r8d
-movl %r8d, %r8d
-orq %r8, %r9
-movq %rsi, %rdx
-salq $34, %rdx
-orq %rdx, %r9
-andq %rax, %r9
-shrl $26, %ecx
-addl %r10d, %ecx
-salq $16, %rcx
-shrl $10, %esi
-movl %esi, %esi
-orq %rsi, %rcx
-movabsq $4398046511103, %r10
-movq %rcx, %r8
-andq %r10, %r8
-shrq $42, %rcx
-leaq (%rcx,%rcx,4), %rdx
-addq %r11, %rdx
-movq %rdx, %rsi
-andq %rax, %rsi
-shrq $44, %rdx
-addq %r9, %rdx
-movq %rdx, %rcx
-andq %rax, %rcx
-shrq $44, %rdx
-addq %r8, %rdx
-andq %rdx, %r10
-shrq $42, %rdx
-leaq (%rsi,%rdx,4), %rsi
-leaq (%rsi,%rdx), %r11
-movq %r11, %rbx
-andq %rax, %rbx
-shrq $44, %r11
-addq %rcx, %r11
-leaq 5(%rbx), %r9
-movq %r9, %r8
-shrq $44, %r8
-addq %r11, %r8
-movabsq $-4398046511104, %rsi
-addq %r10, %rsi
-movq %r8, %rdx
-shrq $44, %rdx
-addq %rdx, %rsi
-movq %rsi, %rdx
-shrq $63, %rdx
-subq $1, %rdx
-movq %rdx, %rcx
-notq %rcx
-andq %rcx, %rbx
-andq %rcx, %r11
-andq %r10, %rcx
-andq %rax, %r9
-andq %rdx, %r9
-orq %r9, %rbx
-movq %rbx, (%rdi)
-andq %r8, %rax
-andq %rdx, %rax
-orq %rax, %r11
-movq %r11, 8(%rdi)
-andq %rsi, %rdx
-orq %rcx, %rdx
-movq %rdx, 16(%rdi)
-.L11:
-movq -8(%rbp), %rbx
-leave
-ret
-FN_END poly1305_blocks_avx
-
-GLOBAL_HIDDEN_FN poly1305_finish_ext_avx
-poly1305_finish_ext_avx_local:
-pushq %r12
-pushq %rbp
-pushq %rbx
-subq $32, %rsp
-movq %rdi, %rbx
-movq %rdx, %rbp
-movq %rcx, %r12
-testq %rdx, %rdx
-je .L30
-movq $0, (%rsp)
-movq $0, 8(%rsp)
-movq $0, 16(%rsp)
-movq $0, 24(%rsp)
-movq %rsp, %rax
-subq %rsp, %rsi
-testb $16, %dl
-je .L31
-vmovdqu (%rsp,%rsi), %xmm0
-vmovdqa %xmm0, (%rsp)
-addq $16, %rax
-.L31:
-testb $8, %bpl
-je .L32
-movq (%rax,%rsi), %rdx
-movq %rdx, (%rax)
-addq $8, %rax
-.L32:
-testb $4, %bpl
-je .L33
-movl (%rax,%rsi), %edx
-movl %edx, (%rax)
-addq $4, %rax
-.L33:
-testb $2, %bpl
-je .L34
-movzwl (%rax,%rsi), %edx
-movw %dx, (%rax)
-addq $2, %rax
-.L34:
-testb $1, %bpl
-je .L35
-movzbl (%rax,%rsi), %edx
-movb %dl, (%rax)
-.L35:
-cmpq $16, %rbp
-je .L36
-movb $1, (%rsp,%rbp)
-movq 120(%rbx), %rdx
-cmpq $16, %rbp
-sbbq %rax, %rax
-andl $4, %eax
-addq $4, %rax
-.L37:
-orq %rdx, %rax
-movq %rax, 120(%rbx)
-movq %rsp, %rsi
-movl $32, %edx
-movq %rbx, %rdi
-call poly1305_blocks_avx_local
-.L30:
-movq 120(%rbx), %rax
-testb $1, %al
-je .L38
-subq $1, %rbp
-cmpq $15, %rbp
-jbe .L39
-orq $16, %rax
-movq %rax, 120(%rbx)
-jmp .L40
-.L39:
-orq $32, %rax
-movq %rax, 120(%rbx)
-.L40:
-movl $32, %edx
-movl $0, %esi
-movq %rbx, %rdi
-call poly1305_blocks_avx_local
-.L38:
-movq 8(%rbx), %rax
-movq %rax, %rdx
-salq $44, %rdx
-orq (%rbx), %rdx
-shrq $20, %rax
-movq 16(%rbx), %rcx
-salq $24, %rcx
-orq %rcx, %rax
-movq 104(%rbx), %rcx
-movq 112(%rbx), %rsi
-addq %rcx, %rdx
-adcq %rsi, %rax
-vpxor %xmm0, %xmm0, %xmm0
-vmovdqu %xmm0, (%rbx)
-vmovdqu %xmm0, 16(%rbx)
-vmovdqu %xmm0, 32(%rbx)
-vmovdqu %xmm0, 48(%rbx)
-vmovdqu %xmm0, 64(%rbx)
-vmovdqu %xmm0, 80(%rbx)
-vmovdqu %xmm0, 96(%rbx)
-vmovdqu %xmm0, 112(%rbx)
-movq %rdx, (%r12)
-movq %rax, 8(%r12)
-jmp .L43
-.L36:
-movq 120(%rbx), %rdx
-movl $4, %eax
-jmp .L37
-.L43:
-addq $32, %rsp
-popq %rbx
-popq %rbp
-popq %r12
-ret
-FN_END poly1305_finish_ext_avx
-
-GLOBAL_HIDDEN_FN poly1305_auth_avx
-/*
-cmp $128, %rdx
-jb poly1305_auth_x86_local
-*/
-pushq %rbp
-movq %rsp, %rbp
-pushq %r14
-pushq %r13
-pushq %r12
-pushq %rbx
-andq $-64, %rsp
-addq $-128, %rsp
-movq %rdi, %r14
-movq %rsi, %r12
-movq %rdx, %rbx
-movq %rsp, %rdi
-movq %rcx, %rsi
-call poly1305_init_ext_avx_local
-movq %rbx, %r13
-andq $-32, %r13
-je .L46
-movq %rsp, %rdi
-movq %r13, %rdx
-movq %r12, %rsi
-call poly1305_blocks_avx_local
-addq %r13, %r12
-subq %r13, %rbx
-.L46:
-movq %rsp, %rdi
-movq %r14, %rcx
-movq %rbx, %rdx
-movq %r12, %rsi
-call poly1305_finish_ext_avx_local
-leaq -32(%rbp), %rsp
-popq %rbx
-popq %r12
-popq %r13
-popq %r14
-popq %rbp
-ret
-FN_END poly1305_auth_avx
diff --git a/src/libcryptobox/poly1305/avx2.S b/src/libcryptobox/poly1305/avx2.S
deleted file mode 100644
index 5aa5851d6..000000000
--- a/src/libcryptobox/poly1305/avx2.S
+++ /dev/null
@@ -1,1095 +0,0 @@
-#include "../macro.S"
-#include "constants.S"
-SECTION_TEXT
-
-GLOBAL_HIDDEN_FN_EXT poly1305_block_size_avx2, 0, 0
-movl $64, %eax
-ret
-FN_END poly1305_block_size_avx2
-
-GLOBAL_HIDDEN_FN poly1305_auth_avx2
-/*
-cmp $128, %rdx
-jb poly1305_auth_x86_local
-*/
-pushq %rbp
-movq %rsp, %rbp
-andq $-64, %rsp
-pushq %r12
-pushq %r14
-pushq %r15
-pushq %rbx
-subq $224, %rsp
-movq %rsi, %r14
-movq %rdi, %rbx
-lea (%rsp), %rdi
-movq %rcx, %rsi
-movq %rdx, %r12
-call poly1305_init_ext_avx2_local
-poly1305_auth_avx2_2:
-movq %r12, %r15
-andq $-64, %r15
-je poly1305_auth_avx2_5
-poly1305_auth_avx2_3:
-movq %r14, %rsi
-lea (%rsp), %rdi
-movq %r15, %rdx
-call poly1305_blocks_avx2_local
-poly1305_auth_avx2_4:
-addq %r15, %r14
-subq %r15, %r12
-poly1305_auth_avx2_5:
-movq %r14, %rsi
-lea (%rsp), %rdi
-movq %r12, %rdx
-movq %rbx, %rcx
-call poly1305_finish_ext_avx2_local
-poly1305_auth_avx2_6:
-addq $224, %rsp
-popq %rbx
-popq %r15
-popq %r14
-popq %r12
-movq %rbp, %rsp
-popq %rbp
-ret
-FN_END poly1305_auth_avx2
-
-
-GLOBAL_HIDDEN_FN poly1305_finish_ext_avx2
-poly1305_finish_ext_avx2_local:
-pushq %rbp
-movq %rsp, %rbp
-andq $-64, %rsp
-pushq %r12
-pushq %r13
-pushq %r14
-subq $104, %rsp
-movq %rdx, %r13
-movq %rcx, %r14
-movq %rdi, %r12
-testq %r13, %r13
-je poly1305_finish_ext_avx2_29
-poly1305_finish_ext_avx2_2:
-lea (%rsp), %rax
-vpxor %ymm0, %ymm0, %ymm0
-subq %rax, %rsi
-vmovdqu %ymm0, (%rsp)
-vmovdqu %ymm0, 32(%rsp)
-testq $32, %r13
-je poly1305_finish_ext_avx2_4
-poly1305_finish_ext_avx2_3:
-vmovdqu (%rsp,%rsi), %ymm0
-lea 32(%rsp), %rax
-vmovdqu %ymm0, (%rsp)
-poly1305_finish_ext_avx2_4:
-testq $16, %r13
-je poly1305_finish_ext_avx2_6
-poly1305_finish_ext_avx2_5:
-vmovdqu (%rax,%rsi), %xmm0
-vmovdqu %xmm0, (%rax)
-addq $16, %rax
-poly1305_finish_ext_avx2_6:
-testq $8, %r13
-je poly1305_finish_ext_avx2_8
-poly1305_finish_ext_avx2_7:
-movq (%rax,%rsi), %rdx
-movq %rdx, (%rax)
-addq $8, %rax
-poly1305_finish_ext_avx2_8:
-testq $4, %r13
-je poly1305_finish_ext_avx2_10
-poly1305_finish_ext_avx2_9:
-movl (%rax,%rsi), %edx
-movl %edx, (%rax)
-addq $4, %rax
-poly1305_finish_ext_avx2_10:
-testq $2, %r13
-je poly1305_finish_ext_avx2_12
-poly1305_finish_ext_avx2_11:
-movzwl (%rax,%rsi), %edx
-movw %dx, (%rax)
-addq $2, %rax
-poly1305_finish_ext_avx2_12:
-testq $1, %r13
-je poly1305_finish_ext_avx2_14
-poly1305_finish_ext_avx2_13:
-movb (%rax,%rsi), %dl
-movb %dl, (%rax)
-poly1305_finish_ext_avx2_14:
-testq $15, %r13
-je poly1305_finish_ext_avx2_16
-poly1305_finish_ext_avx2_15:
-movb $1, (%rsp,%r13)
-poly1305_finish_ext_avx2_16:
-movq 176(%r12), %rdx
-andq $-8125, %rdx
-cmpq $48, %r13
-jb poly1305_finish_ext_avx2_18
-poly1305_finish_ext_avx2_17:
-orq $4, %rdx
-jmp poly1305_finish_ext_avx2_21
-poly1305_finish_ext_avx2_18:
-cmpq $32, %r13
-jb poly1305_finish_ext_avx2_20
-poly1305_finish_ext_avx2_19:
-orq $8, %rdx
-jmp poly1305_finish_ext_avx2_21
-poly1305_finish_ext_avx2_20:
-movq %rdx, %rax
-orq $32, %rdx
-orq $16, %rax
-cmpq $16, %r13
-cmovae %rax, %rdx
-poly1305_finish_ext_avx2_21:
-testq $1, %rdx
-je poly1305_finish_ext_avx2_27
-poly1305_finish_ext_avx2_22:
-cmpq $16, %r13
-ja poly1305_finish_ext_avx2_24
-poly1305_finish_ext_avx2_23:
-orq $256, %rdx
-movq %rdx, 176(%r12)
-jmp poly1305_finish_ext_avx2_28
-poly1305_finish_ext_avx2_24:
-cmpq $32, %r13
-ja poly1305_finish_ext_avx2_27
-poly1305_finish_ext_avx2_25:
-orq $128, %rdx
-movq %rdx, 176(%r12)
-jmp poly1305_finish_ext_avx2_28
-poly1305_finish_ext_avx2_27:
-movq %rdx, 176(%r12)
-poly1305_finish_ext_avx2_28:
-movq %r12, %rdi
-lea (%rsp), %rsi
-movl $64, %edx
-vzeroupper
-call poly1305_blocks_avx2_local
-poly1305_finish_ext_avx2_29:
-movq 176(%r12), %rdx
-testq $1, %rdx
-je poly1305_finish_ext_avx2_37
-poly1305_finish_ext_avx2_30:
-andq $-8125, %rdx
-testq %r13, %r13
-je poly1305_finish_ext_avx2_32
-poly1305_finish_ext_avx2_31:
-cmpq $48, %r13
-jbe poly1305_finish_ext_avx2_33
-poly1305_finish_ext_avx2_32:
-orq $512, %rdx
-jmp poly1305_finish_ext_avx2_36
-poly1305_finish_ext_avx2_33:
-cmpq $32, %r13
-jbe poly1305_finish_ext_avx2_35
-poly1305_finish_ext_avx2_34:
-orq $1024, %rdx
-jmp poly1305_finish_ext_avx2_36
-poly1305_finish_ext_avx2_35:
-movq %rdx, %rax
-orq $4096, %rdx
-orq $2048, %rax
-cmpq $16, %r13
-cmova %rax, %rdx
-poly1305_finish_ext_avx2_36:
-orq $96, %rdx
-movq %r12, %rdi
-vpxor %ymm0, %ymm0, %ymm0
-lea (%rsp), %rsi
-movq %rdx, 176(%r12)
-movl $64, %edx
-vmovdqu %ymm0, (%rsp)
-vmovdqu %ymm0, 32(%rsp)
-vzeroupper
-call poly1305_blocks_avx2_local
-poly1305_finish_ext_avx2_37:
-movq 8(%r12), %r8
-movq %r8, %rsi
-movq 16(%r12), %rax
-vpxor %ymm0, %ymm0, %ymm0
-shlq $44, %rsi
-shrq $20, %r8
-shlq $24, %rax
-orq (%r12), %rsi
-orq %rax, %r8
-movq 160(%r12), %rdx
-movq 168(%r12), %rcx
-addq %rdx, %rsi
-adcq %rcx, %r8
-vmovdqu %ymm0, (%r12)
-vmovdqu %ymm0, 32(%r12)
-vmovdqu %ymm0, 64(%r12)
-vmovdqu %ymm0, 96(%r12)
-vmovdqu %ymm0, 128(%r12)
-vmovdqu %ymm0, 160(%r12)
-movq %rsi, (%r14)
-movq %r8, 8(%r14)
-vzeroupper
-addq $104, %rsp
-popq %r14
-popq %r13
-popq %r12
-movq %rbp, %rsp
-popq %rbp
-ret
-FN_END poly1305_finish_ext_avx2
-
-GLOBAL_HIDDEN_FN poly1305_blocks_avx2
-poly1305_blocks_avx2_local:
-pushq %rbp
-movq %rsp, %rbp
-andq $-64, %rsp
-subq $384, %rsp
-movl $16777216, %eax
-movl $67108863, %ecx
-movl $5, %r8d
-vmovd %eax, %xmm1
-vmovd %ecx, %xmm10
-vmovd %r8d, %xmm0
-movq 176(%rdi), %rax
-vpbroadcastq %xmm1, %ymm1
-vpbroadcastq %xmm10, %ymm10
-vpbroadcastq %xmm0, %ymm11
-testq $60, %rax
-je poly1305_blocks_avx2_11
-poly1305_blocks_avx2_2:
-vpsrldq $8, %ymm1, %ymm15
-testq $4, %rax
-je poly1305_blocks_avx2_4
-poly1305_blocks_avx2_3:
-vpermq $192, %ymm15, %ymm15
-poly1305_blocks_avx2_4:
-testq $8, %rax
-je poly1305_blocks_avx2_6
-poly1305_blocks_avx2_5:
-vpermq $240, %ymm15, %ymm15
-poly1305_blocks_avx2_6:
-testq $16, %rax
-je poly1305_blocks_avx2_8
-poly1305_blocks_avx2_7:
-vpermq $252, %ymm15, %ymm15
-poly1305_blocks_avx2_8:
-testq $32, %rax
-je poly1305_blocks_avx2_10
-poly1305_blocks_avx2_9:
-vpxor %ymm15, %ymm15, %ymm15
-poly1305_blocks_avx2_10:
-vmovdqa %ymm15, %ymm1
-poly1305_blocks_avx2_11:
-movq %rax, %rcx
-btsq $0, %rcx
-jc poly1305_blocks_avx2_13
-poly1305_blocks_avx2_12:
-vmovdqu (%rsi), %ymm3
-movq %rcx, %rax
-vmovdqu 32(%rsi), %ymm5
-vpunpcklqdq %ymm5, %ymm3, %ymm4
-addq $64, %rsi
-vpunpckhqdq %ymm5, %ymm3, %ymm7
-vpermq $216, %ymm4, %ymm6
-addq $-64, %rdx
-vpermq $216, %ymm7, %ymm0
-vpsrlq $52, %ymm6, %ymm8
-vpsllq $12, %ymm0, %ymm9
-vpsrlq $26, %ymm6, %ymm2
-vpsrlq $40, %ymm0, %ymm0
-vpand %ymm6, %ymm10, %ymm4
-vpor %ymm9, %ymm8, %ymm7
-vpand %ymm2, %ymm10, %ymm3
-vpor %ymm1, %ymm0, %ymm9
-vpsrlq $26, %ymm7, %ymm2
-vpand %ymm7, %ymm10, %ymm5
-vpand %ymm2, %ymm10, %ymm7
-movq %rax, 176(%rdi)
-jmp poly1305_blocks_avx2_14
-poly1305_blocks_avx2_13:
-vpermq $216, (%rdi), %ymm15
-vpxor %ymm0, %ymm0, %ymm0
-vpermq $216, 32(%rdi), %ymm14
-vpermq $216, 64(%rdi), %ymm13
-vpunpckldq %ymm0, %ymm15, %ymm4
-vpunpckhdq %ymm0, %ymm15, %ymm3
-vpunpckldq %ymm0, %ymm14, %ymm5
-vpunpckhdq %ymm0, %ymm14, %ymm7
-vpunpckldq %ymm0, %ymm13, %ymm9
-poly1305_blocks_avx2_14:
-cmpq $64, %rdx
-jb poly1305_blocks_avx2_34
-poly1305_blocks_avx2_15:
-vmovdqu 140(%rdi), %ymm0
-testq $8064, %rax
-je poly1305_blocks_avx2_29
-poly1305_blocks_avx2_16:
-vpermq $216, 80(%rdi), %ymm6
-vpermq $216, 100(%rdi), %ymm2
-vpermq $216, 120(%rdi), %ymm8
-vpermq $216, %ymm0, %ymm0
-testq $128, %rax
-je poly1305_blocks_avx2_18
-poly1305_blocks_avx2_17:
-vmovdqa %ymm0, %ymm15
-vmovdqa %ymm0, %ymm14
-vmovdqa %ymm0, %ymm13
-vmovdqa %ymm8, %ymm12
-jmp poly1305_blocks_avx2_28
-poly1305_blocks_avx2_18:
-testq $256, %rax
-je poly1305_blocks_avx2_20
-poly1305_blocks_avx2_19:
-vmovdqa %ymm0, %ymm15
-vmovdqa %ymm0, %ymm14
-vmovdqa %ymm8, %ymm13
-vmovdqa %ymm2, %ymm12
-jmp poly1305_blocks_avx2_28
-poly1305_blocks_avx2_20:
-testq $512, %rax
-je poly1305_blocks_avx2_22
-poly1305_blocks_avx2_21:
-vmovdqa %ymm0, %ymm15
-vmovdqa %ymm8, %ymm14
-vmovdqa %ymm2, %ymm13
-vmovdqa %ymm6, %ymm12
-jmp poly1305_blocks_avx2_28
-poly1305_blocks_avx2_22:
-testq $1024, %rax
-je poly1305_blocks_avx2_24
-poly1305_blocks_avx2_23:
-vpxor %ymm12, %ymm12, %ymm12
-movl $1, %r8d
-vmovdqa %ymm8, %ymm15
-vmovdqa %ymm2, %ymm14
-vmovdqa %ymm6, %ymm13
-vmovd %r8d, %xmm12
-jmp poly1305_blocks_avx2_28
-poly1305_blocks_avx2_24:
-testq $2048, %rax
-je poly1305_blocks_avx2_26
-poly1305_blocks_avx2_25:
-vpxor %ymm12, %ymm12, %ymm12
-movl $1, %r8d
-vmovd %r8d, %xmm13
-vmovdqa %ymm2, %ymm15
-vmovdqa %ymm6, %ymm14
-vmovdqa %ymm13, %ymm12
-jmp poly1305_blocks_avx2_28
-poly1305_blocks_avx2_26:
-testq $4096, %rax
-je poly1305_blocks_avx2_28
-poly1305_blocks_avx2_27:
-movl $1, %r8d
-vmovd %r8d, %xmm14
-vmovdqa %ymm6, %ymm15
-vmovdqa %ymm14, %ymm13
-vmovdqa %ymm14, %ymm12
-poly1305_blocks_avx2_28:
-vpunpcklqdq %ymm14, %ymm15, %ymm6
-vpunpcklqdq %ymm12, %ymm13, %ymm8
-vpunpckhqdq %ymm14, %ymm15, %ymm14
-vpunpckhqdq %ymm12, %ymm13, %ymm12
-vperm2i128 $32, %ymm8, %ymm6, %ymm2
-vperm2i128 $49, %ymm8, %ymm6, %ymm6
-vpsrlq $32, %ymm6, %ymm0
-vpsrlq $32, %ymm2, %ymm8
-vmovdqu %ymm0, 352(%rsp)
-vperm2i128 $32, %ymm12, %ymm14, %ymm13
-vmovdqu %ymm13, 320(%rsp)
-jmp poly1305_blocks_avx2_30
-poly1305_blocks_avx2_29:
-vpsrlq $32, %ymm0, %ymm12
-vpermq $0, %ymm0, %ymm2
-vpermq $85, %ymm0, %ymm6
-vpermq $85, %ymm12, %ymm13
-vpermq $170, %ymm0, %ymm0
-vpermq $0, %ymm12, %ymm8
-vmovdqu %ymm13, 352(%rsp)
-vmovdqu %ymm0, 320(%rsp)
-poly1305_blocks_avx2_30:
-vmovdqu (%rsi), %ymm12
-movq %rdx, %r9
-vmovdqu 352(%rsp), %ymm15
-vmovdqu %ymm1, 160(%rsp)
-vmovdqu %ymm10, 192(%rsp)
-vmovdqu %ymm11, 128(%rsp)
-vperm2i128 $32, 32(%rsi), %ymm12, %ymm13
-xorl %r8d, %r8d
-vperm2i128 $49, 32(%rsi), %ymm12, %ymm12
-xorl %ecx, %ecx
-vpmuludq %ymm11, %ymm8, %ymm0
-vpmuludq %ymm11, %ymm6, %ymm1
-vmovdqu %ymm0, 224(%rsp)
-vmovdqu %ymm1, 256(%rsp)
-vpunpckldq %ymm12, %ymm13, %ymm14
-vpunpckhdq %ymm12, %ymm13, %ymm12
-vmovdqu %ymm14, 32(%rsp)
-vpmuludq %ymm0, %ymm9, %ymm0
-vpmuludq %ymm1, %ymm7, %ymm13
-vpaddq %ymm13, %ymm0, %ymm0
-vpmuludq %ymm11, %ymm15, %ymm10
-vpmuludq %ymm10, %ymm5, %ymm13
-vpaddq %ymm13, %ymm0, %ymm0
-vmovdqu %ymm10, 288(%rsp)
-vpmuludq 320(%rsp), %ymm11, %ymm11
-vpmuludq %ymm11, %ymm3, %ymm13
-vpaddq %ymm13, %ymm0, %ymm0
-vmovdqu %ymm11, (%rsp)
-vpmuludq %ymm2, %ymm4, %ymm13
-vpaddq %ymm13, %ymm0, %ymm0
-vpxor %ymm13, %ymm13, %ymm13
-vpunpckldq %ymm13, %ymm14, %ymm14
-vpaddq %ymm14, %ymm0, %ymm0
-vmovdqu %ymm0, 64(%rsp)
-vpmuludq %ymm11, %ymm9, %ymm14
-vpmuludq %ymm2, %ymm7, %ymm0
-vpaddq %ymm0, %ymm14, %ymm14
-vpmuludq %ymm8, %ymm5, %ymm0
-vpaddq %ymm0, %ymm14, %ymm14
-vpmuludq %ymm6, %ymm3, %ymm0
-vpaddq %ymm0, %ymm14, %ymm14
-vpmuludq %ymm15, %ymm4, %ymm0
-vpaddq %ymm0, %ymm14, %ymm0
-vpunpckhdq %ymm13, %ymm12, %ymm14
-vpsllq $18, %ymm14, %ymm14
-vpaddq %ymm14, %ymm0, %ymm14
-vpmuludq %ymm1, %ymm9, %ymm1
-vpmuludq %ymm10, %ymm7, %ymm0
-vpaddq %ymm0, %ymm1, %ymm1
-vpmuludq %ymm11, %ymm5, %ymm0
-vpaddq %ymm0, %ymm1, %ymm1
-vpmuludq %ymm2, %ymm3, %ymm0
-vpaddq %ymm0, %ymm1, %ymm1
-vpmuludq %ymm8, %ymm4, %ymm0
-vpaddq %ymm0, %ymm1, %ymm1
-vmovdqu 32(%rsp), %ymm0
-vpunpckhdq %ymm13, %ymm0, %ymm0
-vpsllq $6, %ymm0, %ymm0
-vpaddq %ymm0, %ymm1, %ymm1
-vmovdqu 64(%rsp), %ymm0
-vpsrlq $26, %ymm0, %ymm0
-vpaddq %ymm0, %ymm1, %ymm1
-vmovdqu %ymm1, 96(%rsp)
-vpmuludq %ymm2, %ymm9, %ymm1
-vpmuludq %ymm8, %ymm7, %ymm0
-vpaddq %ymm0, %ymm1, %ymm1
-vpmuludq %ymm10, %ymm9, %ymm10
-vpmuludq %ymm11, %ymm7, %ymm11
-vpaddq %ymm11, %ymm10, %ymm7
-vpmuludq %ymm6, %ymm5, %ymm0
-vpaddq %ymm0, %ymm1, %ymm1
-vpmuludq %ymm2, %ymm5, %ymm5
-vpaddq %ymm5, %ymm7, %ymm10
-vpmuludq %ymm15, %ymm3, %ymm15
-vpaddq %ymm15, %ymm1, %ymm1
-vpmuludq %ymm8, %ymm3, %ymm11
-vpaddq %ymm11, %ymm10, %ymm5
-vpunpckldq %ymm13, %ymm12, %ymm10
-vmovdqu 96(%rsp), %ymm12
-vpmuludq 320(%rsp), %ymm4, %ymm0
-vpaddq %ymm0, %ymm1, %ymm15
-vpsrlq $26, %ymm12, %ymm3
-vmovdqu 160(%rsp), %ymm1
-vpmuludq %ymm6, %ymm4, %ymm4
-vpaddq %ymm1, %ymm15, %ymm0
-vpsrlq $26, %ymm14, %ymm15
-vpaddq %ymm4, %ymm5, %ymm11
-vpsllq $12, %ymm10, %ymm4
-vmovdqu 192(%rsp), %ymm10
-vpaddq %ymm15, %ymm0, %ymm0
-vpaddq %ymm4, %ymm11, %ymm5
-vmovdqu 128(%rsp), %ymm11
-vpsrlq $26, %ymm0, %ymm9
-vpaddq %ymm3, %ymm5, %ymm7
-vpand 64(%rsp), %ymm10, %ymm13
-vpand %ymm10, %ymm12, %ymm12
-vpand %ymm10, %ymm7, %ymm5
-vpsrlq $26, %ymm7, %ymm7
-vpmuludq %ymm11, %ymm9, %ymm15
-vpand %ymm10, %ymm14, %ymm9
-vpaddq %ymm15, %ymm13, %ymm3
-vpand %ymm10, %ymm0, %ymm14
-vpaddq %ymm7, %ymm9, %ymm9
-vpand %ymm10, %ymm3, %ymm4
-vpsrlq $26, %ymm3, %ymm3
-vpsrlq $26, %ymm9, %ymm0
-vpand %ymm10, %ymm9, %ymm7
-vpaddq %ymm3, %ymm12, %ymm3
-vpaddq %ymm0, %ymm14, %ymm9
-sarq $5, %r9
-shrq $58, %r9
-addq %rdx, %r9
-sarq $6, %r9
-cmpq $2, %r9
-jl poly1305_blocks_avx2_34
-poly1305_blocks_avx2_31:
-vmovdqu %ymm6, 32(%rsp)
-lea -64(%rdx), %r9
-vmovdqu %ymm8, 64(%rsp)
-vmovdqu %ymm11, 128(%rsp)
-vmovdqu %ymm10, 192(%rsp)
-vmovdqu %ymm1, 160(%rsp)
-vmovdqu (%rsp), %ymm12
-sarq $5, %r9
-shrq $58, %r9
-lea -64(%rdx,%r9), %rdx
-sarq $6, %rdx
-poly1305_blocks_avx2_32:
-vmovdqu 256(%rsp), %ymm15
-incq %r8
-vmovdqu 64(%rcx,%rsi), %ymm11
-vpmuludq 224(%rsp), %ymm9, %ymm8
-vpmuludq %ymm15, %ymm7, %ymm14
-vpaddq %ymm14, %ymm8, %ymm1
-vmovdqu 288(%rsp), %ymm8
-vperm2i128 $32, 96(%rcx,%rsi), %ymm11, %ymm10
-vperm2i128 $49, 96(%rcx,%rsi), %ymm11, %ymm6
-addq $64, %rcx
-vpmuludq %ymm8, %ymm5, %ymm13
-vpunpckldq %ymm6, %ymm10, %ymm0
-vpunpckhdq %ymm6, %ymm10, %ymm11
-vpaddq %ymm13, %ymm1, %ymm10
-vpmuludq %ymm12, %ymm3, %ymm6
-vpaddq %ymm6, %ymm10, %ymm14
-vpxor %ymm10, %ymm10, %ymm10
-vpunpckldq %ymm10, %ymm0, %ymm6
-vpunpckhdq %ymm10, %ymm0, %ymm0
-vpmuludq %ymm2, %ymm4, %ymm1
-vpaddq %ymm1, %ymm14, %ymm13
-vpaddq %ymm6, %ymm13, %ymm1
-vmovdqu 64(%rsp), %ymm6
-vmovdqu %ymm1, (%rsp)
-vpsrlq $26, %ymm1, %ymm1
-vpmuludq %ymm12, %ymm9, %ymm14
-vpmuludq %ymm2, %ymm7, %ymm13
-vpaddq %ymm13, %ymm14, %ymm14
-vpmuludq %ymm6, %ymm5, %ymm13
-vpaddq %ymm13, %ymm14, %ymm14
-vpmuludq 32(%rsp), %ymm3, %ymm13
-vpaddq %ymm13, %ymm14, %ymm14
-vpmuludq 352(%rsp), %ymm4, %ymm13
-vpaddq %ymm13, %ymm14, %ymm13
-vpunpckhdq %ymm10, %ymm11, %ymm14
-vpsllq $18, %ymm14, %ymm14
-vpaddq %ymm14, %ymm13, %ymm13
-vpmuludq %ymm15, %ymm9, %ymm15
-vpmuludq %ymm8, %ymm7, %ymm14
-vpaddq %ymm14, %ymm15, %ymm15
-vpmuludq %ymm12, %ymm5, %ymm14
-vpaddq %ymm14, %ymm15, %ymm15
-vpmuludq %ymm2, %ymm3, %ymm14
-vpaddq %ymm14, %ymm15, %ymm15
-vpmuludq %ymm6, %ymm4, %ymm14
-vpaddq %ymm14, %ymm15, %ymm14
-vpsllq $6, %ymm0, %ymm15
-vpaddq %ymm15, %ymm14, %ymm14
-vmovdqu 32(%rsp), %ymm15
-vpaddq %ymm1, %ymm14, %ymm1
-vpmuludq %ymm2, %ymm9, %ymm0
-vpmuludq %ymm6, %ymm7, %ymm14
-vpmuludq %ymm8, %ymm9, %ymm9
-vpmuludq %ymm12, %ymm7, %ymm7
-vpaddq %ymm7, %ymm9, %ymm7
-vpaddq %ymm14, %ymm0, %ymm0
-vpsrlq $26, %ymm1, %ymm9
-vpmuludq %ymm15, %ymm5, %ymm14
-vpmuludq %ymm2, %ymm5, %ymm5
-vpaddq %ymm5, %ymm7, %ymm5
-vpaddq %ymm14, %ymm0, %ymm0
-vpmuludq 352(%rsp), %ymm3, %ymm14
-vpmuludq %ymm6, %ymm3, %ymm3
-vpaddq %ymm3, %ymm5, %ymm5
-vpaddq %ymm14, %ymm0, %ymm0
-vpmuludq 320(%rsp), %ymm4, %ymm14
-vpmuludq %ymm15, %ymm4, %ymm4
-vpaddq %ymm4, %ymm5, %ymm5
-vpaddq %ymm14, %ymm0, %ymm0
-vpunpckldq %ymm10, %ymm11, %ymm4
-vpaddq 160(%rsp), %ymm0, %ymm14
-vpsrlq $26, %ymm13, %ymm0
-vpsllq $12, %ymm4, %ymm3
-vpaddq %ymm0, %ymm14, %ymm14
-vpaddq %ymm3, %ymm5, %ymm7
-vpsrlq $26, %ymm14, %ymm0
-vpaddq %ymm9, %ymm7, %ymm10
-vmovdqu 192(%rsp), %ymm9
-vpsrlq $26, %ymm10, %ymm11
-vpand (%rsp), %ymm9, %ymm6
-vpand %ymm9, %ymm13, %ymm13
-vpand %ymm9, %ymm1, %ymm1
-vpand %ymm9, %ymm14, %ymm14
-vpand %ymm9, %ymm10, %ymm5
-vpmuludq 128(%rsp), %ymm0, %ymm8
-vpaddq %ymm8, %ymm6, %ymm15
-vpaddq %ymm11, %ymm13, %ymm0
-vpsrlq $26, %ymm15, %ymm3
-vpand %ymm9, %ymm0, %ymm7
-vpsrlq $26, %ymm0, %ymm0
-vpand %ymm9, %ymm15, %ymm4
-vpaddq %ymm3, %ymm1, %ymm3
-vpaddq %ymm0, %ymm14, %ymm9
-cmpq %rdx, %r8
-jb poly1305_blocks_avx2_32
-poly1305_blocks_avx2_34:
-testq $64, %rax
-jne poly1305_blocks_avx2_36
-poly1305_blocks_avx2_35:
-vpshufd $8, %ymm4, %ymm0
-vpshufd $8, %ymm3, %ymm3
-vpshufd $8, %ymm5, %ymm5
-vpshufd $8, %ymm7, %ymm7
-vpshufd $8, %ymm9, %ymm9
-vpermq $8, %ymm0, %ymm1
-vpermq $8, %ymm3, %ymm2
-vpermq $8, %ymm5, %ymm4
-vpermq $8, %ymm7, %ymm6
-vpermq $8, %ymm9, %ymm11
-vperm2i128 $32, %ymm2, %ymm1, %ymm8
-vperm2i128 $32, %ymm6, %ymm4, %ymm10
-vmovdqu %ymm8, (%rdi)
-vmovdqu %ymm10, 32(%rdi)
-vmovdqu %xmm11, 64(%rdi)
-jmp poly1305_blocks_avx2_37
-poly1305_blocks_avx2_36:
-vpermq $245, %ymm4, %ymm0
-vpaddq %ymm0, %ymm4, %ymm4
-vpermq $245, %ymm3, %ymm1
-vpaddq %ymm1, %ymm3, %ymm10
-vpermq $245, %ymm5, %ymm3
-vpermq $170, %ymm4, %ymm6
-vpaddq %ymm3, %ymm5, %ymm13
-vpaddq %ymm6, %ymm4, %ymm8
-vpermq $170, %ymm10, %ymm11
-vpermq $245, %ymm7, %ymm5
-vpaddq %ymm11, %ymm10, %ymm12
-vpaddq %ymm5, %ymm7, %ymm7
-vpermq $170, %ymm13, %ymm14
-vpermq $245, %ymm9, %ymm2
-vpaddq %ymm14, %ymm13, %ymm15
-vpaddq %ymm2, %ymm9, %ymm9
-vpermq $170, %ymm7, %ymm0
-vpaddq %ymm0, %ymm7, %ymm1
-vpermq $170, %ymm9, %ymm2
-vpaddq %ymm2, %ymm9, %ymm3
-vmovd %xmm8, %r9d
-movl %r9d, %r8d
-shrl $26, %r8d
-andq $67108863, %r9
-vmovd %xmm12, %esi
-addl %r8d, %esi
-movl %esi, %r11d
-shrl $26, %esi
-andq $67108863, %r11
-vmovd %xmm15, %ecx
-addl %esi, %ecx
-movl %ecx, %eax
-shrl $26, %eax
-andq $67108863, %rcx
-shlq $8, %rcx
-vmovd %xmm1, %r8d
-addl %eax, %r8d
-movl %r8d, %r10d
-shrl $26, %r8d
-andq $67108863, %r10
-movq %r10, %rax
-shrq $10, %rax
-shlq $34, %r10
-vmovd %xmm3, %edx
-addl %r8d, %edx
-shlq $16, %rdx
-orq %rdx, %rax
-movq %rax, %r8
-shrq $42, %r8
-lea (%r8,%r8,4), %rdx
-movq %r11, %r8
-shlq $26, %r8
-orq %r8, %r9
-movq $0xfffffffffff, %r8
-shrq $18, %r11
-andq %r8, %r9
-addq %r9, %rdx
-orq %rcx, %r11
-movq %rdx, %rsi
-orq %r10, %r11
-shrq $44, %rsi
-andq %r8, %r11
-addq %r11, %rsi
-movq $0x3ffffffffff, %r9
-movq %rsi, %r10
-andq %r9, %rax
-shrq $44, %r10
-andq %r8, %rdx
-addq %r10, %rax
-movq %r8, %rcx
-andq %rax, %r9
-andq %r8, %rsi
-shrq $42, %rax
-movq $0xfffffc0000000000, %r10
-lea (%rax,%rax,4), %r11
-addq %r11, %rdx
-andq %rdx, %rcx
-shrq $44, %rdx
-addq %rdx, %rsi
-lea 5(%rcx), %rdx
-movq %rdx, %r11
-andq %r8, %rdx
-shrq $44, %r11
-addq %rsi, %r11
-movq %r11, %rax
-andq %r11, %r8
-shrq $44, %rax
-addq %r9, %rax
-addq %r10, %rax
-movq %rax, %r10
-shrq $63, %r10
-decq %r10
-andn %rcx, %r10, %rcx
-andq %r10, %rdx
-orq %rdx, %rcx
-andq %r10, %r8
-andn %rsi, %r10, %rdx
-andq %r10, %rax
-andn %r9, %r10, %rsi
-orq %r8, %rdx
-orq %rax, %rsi
-movq %rcx, (%rdi)
-movq %rdx, 8(%rdi)
-movq %rsi, 16(%rdi)
-poly1305_blocks_avx2_37:
-vzeroupper
-movq %rbp, %rsp
-popq %rbp
-ret
-FN_END poly1305_blocks_avx2
-
-GLOBAL_HIDDEN_FN poly1305_init_ext_avx2
-poly1305_init_ext_avx2_local:
-pushq %r12
-pushq %r13
-pushq %r14
-pushq %r15
-pushq %rbx
-movq %rdi, %r10
-vpxor %ymm0, %ymm0, %ymm0
-movq %rdx, %r12
-vpxor %xmm1, %xmm1, %xmm1
-vmovdqu %xmm1, 64(%r10)
-vmovdqu %ymm0, (%r10)
-vmovdqu %ymm0, 32(%r10)
-movq $-1, %r8
-testq %r12, %r12
-movq 8(%rsi), %rdi
-movq $0xffc0fffffff, %r9
-movq %rdi, %rcx
-cmove %r8, %r12
-movq (%rsi), %r8
-andq %r8, %r9
-shrq $44, %r8
-movq $0xfffffc0ffff, %r11
-shlq $20, %rcx
-shrq $24, %rdi
-orq %rcx, %r8
-movq $0xffffffc0f, %rcx
-andq %r11, %r8
-andq %rcx, %rdi
-movq 16(%rsi), %rcx
-movq %rcx, 160(%r10)
-movq %r9, %rcx
-movq 24(%rsi), %rdx
-movq %rdx, 168(%r10)
-movl %r9d, %edx
-andl $67108863, %edx
-movl %edx, 80(%r10)
-movq %r8, %rdx
-shrq $26, %rcx
-shlq $18, %rdx
-orq %rdx, %rcx
-movq %r8, %rdx
-shrq $8, %rdx
-andl $67108863, %ecx
-andl $67108863, %edx
-movl %ecx, 84(%r10)
-movq %r8, %rcx
-movl %edx, 88(%r10)
-movq %rdi, %rdx
-shrq $34, %rcx
-shlq $10, %rdx
-orq %rdx, %rcx
-movq %rdi, %rdx
-shrq $16, %rdx
-andl $67108863, %ecx
-movl %ecx, 92(%r10)
-movl %edx, 96(%r10)
-cmpq $16, %r12
-jbe poly1305_init_ext_avx2_7
-poly1305_init_ext_avx2_2:
-movq %r9, %rax
-lea (%rdi,%rdi,4), %r14
-mulq %r9
-shlq $2, %r14
-movq %rax, %r11
-movq %rdx, %r15
-lea (%r8,%r8), %rax
-mulq %r14
-addq %rax, %r11
-lea (%r9,%r9), %rax
-movq %r11, %rsi
-adcq %rdx, %r15
-mulq %r8
-movq %rax, %rbx
-movq %r14, %rax
-movq %rdx, %rcx
-lea (%rdi,%rdi), %r14
-mulq %rdi
-addq %rax, %rbx
-movq %r8, %rax
-adcq %rdx, %rcx
-mulq %r8
-shlq $20, %r15
-movq %rax, %r13
-shrq $44, %rsi
-movq %r9, %rax
-orq %rsi, %r15
-movq %rdx, %rsi
-mulq %r14
-addq %r15, %rbx
-movq %rbx, %r15
-adcq $0, %rcx
-addq %rax, %r13
-adcq %rdx, %rsi
-shlq $20, %rcx
-shrq $44, %r15
-orq %r15, %rcx
-addq %rcx, %r13
-movq $0xfffffffffff, %rcx
-movq %r13, %rdx
-adcq $0, %rsi
-andq %rcx, %r11
-shlq $22, %rsi
-andq %rcx, %rbx
-shrq $42, %rdx
-orq %rdx, %rsi
-lea (%rsi,%rsi,4), %rsi
-addq %rsi, %r11
-movq %rcx, %rsi
-andq %r11, %rsi
-shrq $44, %r11
-addq %r11, %rbx
-movq $0x3ffffffffff, %r11
-andq %rbx, %rcx
-andq %r11, %r13
-shrq $44, %rbx
-movq %rsi, %r11
-movq %rcx, %rdx
-addq %r13, %rbx
-shrq $26, %r11
-movq %rbx, %r15
-shlq $18, %rdx
-movq %rcx, %r14
-orq %rdx, %r11
-movq %rcx, %rdx
-shrq $34, %rdx
-movl %esi, %r13d
-shlq $10, %r15
-andl $67108863, %r13d
-orq %r15, %rdx
-andl $67108863, %r11d
-shrq $8, %r14
-andl $67108863, %edx
-movl %edx, 112(%r10)
-movq %rbx, %rdx
-shrq $16, %rdx
-andl $67108863, %r14d
-movl %r13d, 100(%r10)
-movl %r11d, 104(%r10)
-movl %r14d, 108(%r10)
-movl %edx, 116(%r10)
-cmpq $48, %r12
-jbe poly1305_init_ext_avx2_4
-poly1305_init_ext_avx2_3:
-movq %rsi, %rax
-lea (%rbx,%rbx,4), %r15
-mulq %rsi
-shlq $2, %r15
-movq %rax, %r13
-movq %rdx, %r12
-lea (%rcx,%rcx), %rax
-mulq %r15
-addq %rax, %r13
-lea (%rsi,%rsi), %rax
-movq %r15, -16(%rsp)
-adcq %rdx, %r12
-mulq %rcx
-movq %rax, %r14
-movq %rbx, %rax
-movq %rdx, %r11
-mulq %r15
-addq %rax, %r14
-movq %rcx, %rax
-movq %r13, %r15
-adcq %rdx, %r11
-mulq %rcx
-shlq $20, %r12
-shrq $44, %r15
-orq %r15, %r12
-movq %rax, %r15
-addq %r12, %r14
-movq %rdx, %r12
-movq %rsi, %rax
-lea (%rbx,%rbx), %rdx
-adcq $0, %r11
-mulq %rdx
-addq %rax, %r15
-adcq %rdx, %r12
-movq %r14, %rdx
-shlq $20, %r11
-shrq $44, %rdx
-orq %rdx, %r11
-addq %r11, %r15
-movq $0xfffffffffff, %r11
-movq %r15, %rdx
-adcq $0, %r12
-andq %r11, %r13
-shlq $22, %r12
-andq %r11, %r14
-shrq $42, %rdx
-orq %rdx, %r12
-lea (%r12,%r12,4), %r12
-addq %r12, %r13
-movq %r11, %r12
-andq %r13, %r12
-shrq $44, %r13
-addq %r13, %r14
-movq $0x3ffffffffff, %r13
-andq %r14, %r11
-andq %r13, %r15
-shrq $44, %r14
-movq %r11, %rdx
-shlq $18, %rdx
-addq %r14, %r15
-movl %r12d, %r14d
-movq %r11, %r13
-shrq $26, %r12
-andl $67108863, %r14d
-orq %rdx, %r12
-movq %r15, %rdx
-shrq $34, %r11
-shlq $10, %rdx
-andl $67108863, %r12d
-orq %rdx, %r11
-shrq $8, %r13
-andl $67108863, %r11d
-movl %r11d, 152(%r10)
-andl $67108863, %r13d
-shrq $16, %r15
-movl %r14d, 140(%r10)
-movl %r12d, 144(%r10)
-movl %r13d, 148(%r10)
-movl %r15d, 156(%r10)
-movq -16(%rsp), %r11
-jmp poly1305_init_ext_avx2_6
-poly1305_init_ext_avx2_4:
-cmpq $32, %r12
-jbe poly1305_init_ext_avx2_7
-poly1305_init_ext_avx2_5:
-lea (%rbx,%rbx,4), %r11
-shlq $2, %r11
-poly1305_init_ext_avx2_6:
-movq %r9, %rax
-lea (%rcx,%rcx,4), %r13
-mulq %rsi
-shlq $2, %r13
-movq %rax, %r14
-movq %rdi, %rax
-movq %rdx, %r12
-mulq %r13
-addq %rax, %r14
-movq %r8, %rax
-adcq %rdx, %r12
-mulq %r11
-addq %rax, %r14
-movq %r8, %rax
-adcq %rdx, %r12
-mulq %rsi
-movq %rax, %r15
-movq %r9, %rax
-movq %rdx, %r13
-mulq %rcx
-addq %rax, %r15
-movq %r11, %rax
-movq %r14, %r11
-adcq %rdx, %r13
-mulq %rdi
-addq %rax, %r15
-movq %rdi, %rax
-adcq %rdx, %r13
-mulq %rsi
-shlq $20, %r12
-movq %rax, %rsi
-shrq $44, %r11
-movq %r8, %rax
-orq %r11, %r12
-movq %rdx, %rdi
-mulq %rcx
-addq %r12, %r15
-movq %r15, %rcx
-adcq $0, %r13
-addq %rax, %rsi
-movq %r9, %rax
-movq $0xfffffffffff, %r9
-adcq %rdx, %rdi
-andq %r9, %r14
-mulq %rbx
-addq %rax, %rsi
-adcq %rdx, %rdi
-movq %r9, %rdx
-shlq $20, %r13
-andq %r9, %r15
-shrq $44, %rcx
-orq %rcx, %r13
-addq %r13, %rsi
-movq %rsi, %rbx
-adcq $0, %rdi
-shlq $22, %rdi
-shrq $42, %rbx
-orq %rbx, %rdi
-lea (%rdi,%rdi,4), %r8
-addq %r8, %r14
-andq %r14, %rdx
-shrq $44, %r14
-addq %r14, %r15
-movq $0x3ffffffffff, %r14
-andq %r15, %r9
-andq %r14, %rsi
-shrq $44, %r15
-movq %r9, %rax
-addq %r15, %rsi
-movl %edx, %r15d
-movq %rsi, %rbx
-movq %r9, %rcx
-shrq $26, %rdx
-andl $67108863, %r15d
-shlq $18, %rax
-shrq $34, %r9
-orq %rax, %rdx
-shlq $10, %rbx
-shrq $8, %rcx
-orq %rbx, %r9
-shrq $16, %rsi
-andl $67108863, %edx
-andl $67108863, %ecx
-andl $67108863, %r9d
-movl %r15d, 120(%r10)
-movl %edx, 124(%r10)
-movl %ecx, 128(%r10)
-movl %r9d, 132(%r10)
-movl %esi, 136(%r10)
-poly1305_init_ext_avx2_7:
-movq $0, 176(%r10)
-vzeroupper
-popq %rbx
-popq %r15
-popq %r14
-popq %r13
-popq %r12
-ret
-FN_END poly1305_init_ext_avx2
-
diff --git a/src/libcryptobox/poly1305/constants.S b/src/libcryptobox/poly1305/constants.S
deleted file mode 100644
index a4797a2aa..000000000
--- a/src/libcryptobox/poly1305/constants.S
+++ /dev/null
@@ -1,21 +0,0 @@
-SECTION_RODATA
-
-.p2align 4
-poly1305_constants_x86:
-/* 0 */ poly1305_x86_scale: .long 0x0,0x37f40000
-/* 8 */ poly1305_x86_two32: .long 0x0,0x41f00000
-/* 16 */ poly1305_x86_two64: .long 0x0,0x43f00000
-/* 24 */ poly1305_x86_two96: .long 0x0,0x45f00000
-/* 32 */ poly1305_x86_alpha32: .long 0x0,0x45e80000
-/* 40 */ poly1305_x86_alpha64: .long 0x0,0x47e80000
-/* 48 */ poly1305_x86_alpha96: .long 0x0,0x49e80000
-/* 56 */ poly1305_x86_alpha130: .long 0x0,0x4c080000
-/* 64 */ poly1305_x86_doffset0: .long 0x0,0x43300000
-/* 72 */ poly1305_x86_doffset1: .long 0x0,0x45300000
-/* 80 */ poly1305_x86_doffset2: .long 0x0,0x47300000
-/* 88 */ poly1305_x86_doffset3: .long 0x0,0x49300000
-/* 96 */ poly1305_x86_doffset3minustwo128: .long 0x0,0x492ffffe
-/* 104 */ poly1305_x86_hoffset0: .long 0xfffffffb,0x43300001
-/* 112 */ poly1305_x86_hoffset1: .long 0xfffffffe,0x45300001
-/* 120 */ poly1305_x86_hoffset2: .long 0xfffffffe,0x47300001
-/* 124 */ poly1305_x86_hoffset3: .long 0xfffffffe,0x49300003
diff --git a/src/libcryptobox/poly1305/poly1305.c b/src/libcryptobox/poly1305/poly1305.c
deleted file mode 100644
index 4adea30af..000000000
--- a/src/libcryptobox/poly1305/poly1305.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (c) 2015, Vsevolod Stakhov
- * Copyright (c) 2015, Andrew Moon
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "cryptobox.h"
-#include "poly1305.h"
-#include "platform_config.h"
-
-extern unsigned long cpu_config;
-
-typedef struct poly1305_state_internal_t
-{
- unsigned char opaque[192]; /* largest state required (AVX2) */
- size_t leftover, block_size;
- unsigned char buffer[64]; /* largest blocksize (AVX2) */
-} poly1305_state_internal;
-
-typedef struct poly1305_impl_t
-{
- unsigned long cpu_flags;
- const char *desc;
-
- size_t (*block_size)(void);
- void (*init_ext)(void *state, const poly1305_key *key, size_t bytes_hint);
- void (*blocks)(void *state, const unsigned char *in, size_t inlen);
- void (*finish_ext)(void *state, const unsigned char *in, size_t remaining,
- unsigned char *mac);
- void (*auth)(unsigned char *mac, const unsigned char *in, size_t inlen,
- const poly1305_key *key);
-} poly1305_impl_t;
-
-#define POLY1305_DECLARE(ext) \
- size_t poly1305_block_size_##ext(void); \
- void poly1305_init_ext_##ext(void *state, const poly1305_key *key, size_t bytes_hint); \
- void poly1305_blocks_##ext(void *state, const unsigned char *in, size_t inlen); \
- void poly1305_finish_ext_##ext(void *state, const unsigned char *in, size_t remaining, unsigned char *mac); \
- void poly1305_auth_##ext(unsigned char *mac, const unsigned char *m, size_t inlen, const poly1305_key *key);
-
-#define POLY1305_IMPL(cpuflags, desc, ext) \
- {(cpuflags), desc, poly1305_block_size_##ext, poly1305_init_ext_##ext, poly1305_blocks_##ext, poly1305_finish_ext_##ext, poly1305_auth_##ext}
-
-#if defined(HAVE_AVX2)
-POLY1305_DECLARE(avx2)
-#define POLY1305_AVX2 POLY1305_IMPL(CPUID_AVX2, "avx2", avx2)
-#endif
-#if defined(HAVE_AVX)
-POLY1305_DECLARE(avx)
-#define POLY1305_AVX POLY1305_IMPL(CPUID_AVX, "avx", avx)
-#endif
-#if defined(HAVE_SSE2)
-POLY1305_DECLARE(sse2)
-#define POLY1305_SSE2 POLY1305_IMPL(CPUID_SSE2, "sse2", sse2)
-#endif
-
-POLY1305_DECLARE(ref)
-#define POLY1305_GENERIC POLY1305_IMPL(0, "generic", ref)
-
-/* list implementations from most optimized to least, with generic as the last entry */
-static const poly1305_impl_t poly1305_list[] =
-{
-POLY1305_GENERIC,
-
-#if defined(POLY1305_AVX2)
- POLY1305_AVX2,
-#endif
-#if defined(POLY1305_AVX)
- POLY1305_AVX,
-#endif
-#if defined(POLY1305_SSE2)
- POLY1305_SSE2,
-#endif
-};
-
-static const poly1305_impl_t *poly1305_opt = &poly1305_list[0];
-
-/* is the pointer aligned on a word boundary? */
-static int poly1305_is_aligned(const void *p)
-{
- return ((size_t) p & (sizeof(size_t) - 1)) == 0;
-}
-
-const char*
-poly1305_load(void)
-{
- guint i;
-
- if (cpu_config != 0) {
- for (i = 0; i < G_N_ELEMENTS(poly1305_list); i++) {
- if (poly1305_list[i].cpu_flags & cpu_config) {
- poly1305_opt = &poly1305_list[i];
- break;
- }
- }
- }
-
- return poly1305_opt->desc;
-}
-
-/* processes inlen bytes (full blocks only), handling input alignment */
-static void poly1305_consume(poly1305_state_internal *state,
- const unsigned char *in, size_t inlen)
-{
- int in_aligned;
-
- /* it's ok to call with 0 bytes */
- if (!inlen)
- return;
-
- /* if everything is aligned, handle directly */
- in_aligned = poly1305_is_aligned (in);
- if (in_aligned) {
- poly1305_opt->blocks (state->opaque, in, inlen);
- return;
- }
-
- /* copy the unaligned data to an aligned buffer and process in chunks */
- while (inlen) {
- unsigned char buffer[1024];
- const size_t bytes = (inlen > sizeof(buffer)) ? sizeof(buffer) : inlen;
- memcpy (buffer, in, bytes);
- poly1305_opt->blocks (state->opaque, buffer, bytes);
- in += bytes;
- inlen -= bytes;
- }
-}
-
-void poly1305_init(poly1305_state *S, const poly1305_key *key)
-{
- poly1305_state_internal *state = (poly1305_state_internal *) S;
- poly1305_opt->init_ext (state->opaque, key, 0);
- state->leftover = 0;
- state->block_size = poly1305_opt->block_size ();
-}
-
-void poly1305_init_ext(poly1305_state *S, const poly1305_key *key,
- size_t bytes_hint)
-{
- poly1305_state_internal *state = (poly1305_state_internal *) S;
- poly1305_opt->init_ext (state->opaque, key, bytes_hint);
- state->leftover = 0;
- state->block_size = poly1305_opt->block_size ();
-}
-
-void poly1305_update(poly1305_state *S, const unsigned char *in, size_t inlen)
-{
- poly1305_state_internal *state = (poly1305_state_internal *) S;
-
- /* handle leftover */
- if (state->leftover) {
- size_t want = (state->block_size - state->leftover);
- if (want > inlen)
- want = inlen;
- memcpy (state->buffer + state->leftover, in, want);
- inlen -= want;
- in += want;
- state->leftover += want;
- if (state->leftover < state->block_size)
- return;
- poly1305_opt->blocks (state->opaque, state->buffer, state->block_size);
- state->leftover = 0;
- }
-
- /* process full blocks */
- if (inlen >= state->block_size) {
- size_t want = (inlen & ~(state->block_size - 1));
- poly1305_consume (state, in, want);
- in += want;
- inlen -= want;
- }
-
- /* store leftover */
- if (inlen) {
- memcpy (state->buffer + state->leftover, in, inlen);
- state->leftover += inlen;
- }
-}
-
-void poly1305_finish(poly1305_state *S, unsigned char *mac)
-{
- poly1305_state_internal *state = (poly1305_state_internal *) S;
- poly1305_opt->finish_ext (state->opaque, state->buffer, state->leftover,
- mac);
-}
-
-void poly1305_auth(unsigned char *mac, const unsigned char *in, size_t inlen,
- const poly1305_key *key)
-{
- poly1305_opt->auth (mac, in, inlen, key);
-}
-
-int poly1305_verify(const unsigned char mac1[16], const unsigned char mac2[16])
-{
- size_t i;
- unsigned int dif = 0;
-
- for (i = 0; i < 16; i++) {
- dif |= (mac1[i] ^ mac2[i]);
- }
-
- dif = (dif - 1) >> ((sizeof(unsigned int) * 8) - 1);
- return (dif & 1);
-}
diff --git a/src/libcryptobox/poly1305/poly1305.h b/src/libcryptobox/poly1305/poly1305.h
deleted file mode 100644
index 902a9c288..000000000
--- a/src/libcryptobox/poly1305/poly1305.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef POLY1305_H
-#define POLY1305_H
-
-#include <stddef.h>
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-typedef struct poly1305_state
-{
- unsigned char opaque[320];
-} poly1305_state;
-
-typedef struct poly1305_key
-{
- unsigned char b[32];
-} poly1305_key;
-
-void poly1305_init(poly1305_state *S, const poly1305_key *key);
-void poly1305_init_ext(poly1305_state *S, const poly1305_key *key,
- size_t bytes_hint);
-void poly1305_update(poly1305_state *S, const unsigned char *in, size_t inlen);
-void poly1305_finish(poly1305_state *S, unsigned char *mac);
-
-void poly1305_auth(unsigned char *mac, const unsigned char *in, size_t inlen,
- const poly1305_key *key);
-int poly1305_verify(const unsigned char mac1[16], const unsigned char mac2[16]);
-
-const char* poly1305_load(void);
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif /* POLY1305_H */
-
diff --git a/src/libcryptobox/poly1305/poly1305_internal.h b/src/libcryptobox/poly1305/poly1305_internal.h
deleted file mode 100644
index 21b7aa7d2..000000000
--- a/src/libcryptobox/poly1305/poly1305_internal.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#if defined(_MSC_VER)
- #include <intrin.h>
-
- typedef struct uint128_t {
- unsigned long long lo;
- unsigned long long hi;
- } uint128_t;
-
- #define POLY1305_NOINLINE __declspec(noinline)
-#elif defined(__GNUC__)
- #pragma GCC system_header
- #if defined(__SIZEOF_INT128__)
- typedef unsigned __int128 uint128_t;
- #else
- typedef unsigned uint128_t __attribute__((mode(TI)));
- #endif
-
- #define POLY1305_NOINLINE __attribute__((noinline))
-#endif
diff --git a/src/libcryptobox/poly1305/ref-32.c b/src/libcryptobox/poly1305/ref-32.c
deleted file mode 100644
index 9f0ea998b..000000000
--- a/src/libcryptobox/poly1305/ref-32.c
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication and 64 bit addition
-
- assumes the existence of uint32_t and uint64_t
-*/
-
-#include "config.h"
-#include "poly1305.h"
-
-enum {
- POLY1305_BLOCK_SIZE = 16
-};
-
-typedef struct poly1305_state_ref_t {
- uint32_t r[5];
- uint32_t h[5];
- uint32_t pad[4];
- unsigned char final;
-} poly1305_state_ref_t;
-
-/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */
-static uint32_t
-U8TO32(const unsigned char *p) {
- return
- (((uint32_t)(p[0] & 0xff) ) |
- ((uint32_t)(p[1] & 0xff) << 8) |
- ((uint32_t)(p[2] & 0xff) << 16) |
- ((uint32_t)(p[3] & 0xff) << 24));
-}
-
-/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */
-static void
-U32TO8(unsigned char *p, uint32_t v) {
- p[0] = (unsigned char)((v ) & 0xff);
- p[1] = (unsigned char)((v >> 8) & 0xff);
- p[2] = (unsigned char)((v >> 16) & 0xff);
- p[3] = (unsigned char)((v >> 24) & 0xff);
-}
-
-size_t
-poly1305_block_size_ref(void) {
- return POLY1305_BLOCK_SIZE;
-}
-
-void
-poly1305_init_ext_ref(void *state, const poly1305_key *key, size_t bytes_hint) {
- poly1305_state_ref_t *st = (poly1305_state_ref_t *)state;
-
- /* bytes_hint not used */
- (void)bytes_hint;
-
- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
- st->r[0] = (U8TO32(&key->b[ 0]) ) & 0x3ffffff;
- st->r[1] = (U8TO32(&key->b[ 3]) >> 2) & 0x3ffff03;
- st->r[2] = (U8TO32(&key->b[ 6]) >> 4) & 0x3ffc0ff;
- st->r[3] = (U8TO32(&key->b[ 9]) >> 6) & 0x3f03fff;
- st->r[4] = (U8TO32(&key->b[12]) >> 8) & 0x00fffff;
-
- /* h = 0 */
- st->h[0] = 0;
- st->h[1] = 0;
- st->h[2] = 0;
- st->h[3] = 0;
- st->h[4] = 0;
-
- /* save pad for later */
- st->pad[0] = U8TO32(&key->b[16]);
- st->pad[1] = U8TO32(&key->b[20]);
- st->pad[2] = U8TO32(&key->b[24]);
- st->pad[3] = U8TO32(&key->b[28]);
-
- st->final = 0;
-}
-
-void
-poly1305_blocks_ref(void *state, const unsigned char *in, size_t inlen) {
- poly1305_state_ref_t *st = (poly1305_state_ref_t *)state;
- const uint32_t hibit = (st->final) ? 0 : (1 << 24); /* 1 << 128 */
- uint32_t r0,r1,r2,r3,r4;
- uint32_t s1,s2,s3,s4;
- uint32_t h0,h1,h2,h3,h4;
- uint64_t d0,d1,d2,d3,d4;
- uint32_t c;
-
- r0 = st->r[0];
- r1 = st->r[1];
- r2 = st->r[2];
- r3 = st->r[3];
- r4 = st->r[4];
-
- s1 = r1 * 5;
- s2 = r2 * 5;
- s3 = r3 * 5;
- s4 = r4 * 5;
-
- h0 = st->h[0];
- h1 = st->h[1];
- h2 = st->h[2];
- h3 = st->h[3];
- h4 = st->h[4];
-
- while (inlen >= POLY1305_BLOCK_SIZE) {
- /* h += m[i] */
- h0 += (U8TO32(in+ 0) ) & 0x3ffffff;
- h1 += (U8TO32(in+ 3) >> 2) & 0x3ffffff;
- h2 += (U8TO32(in+ 6) >> 4) & 0x3ffffff;
- h3 += (U8TO32(in+ 9) >> 6) & 0x3ffffff;
- h4 += (U8TO32(in+12) >> 8) | hibit;
-
- /* h *= r */
- d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) + ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) + ((uint64_t)h4 * s1);
- d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) + ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) + ((uint64_t)h4 * s2);
- d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) + ((uint64_t)h2 * r0) + ((uint64_t)h3 * s4) + ((uint64_t)h4 * s3);
- d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) + ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) + ((uint64_t)h4 * s4);
- d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) + ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) + ((uint64_t)h4 * r0);
-
- /* (partial) h %= p */
- c = (uint32_t)(d0 >> 26); h0 = (uint32_t)d0 & 0x3ffffff;
- d1 += c; c = (uint32_t)(d1 >> 26); h1 = (uint32_t)d1 & 0x3ffffff;
- d2 += c; c = (uint32_t)(d2 >> 26); h2 = (uint32_t)d2 & 0x3ffffff;
- d3 += c; c = (uint32_t)(d3 >> 26); h3 = (uint32_t)d3 & 0x3ffffff;
- d4 += c; c = (uint32_t)(d4 >> 26); h4 = (uint32_t)d4 & 0x3ffffff;
- h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff;
- h1 += c;
-
- in += POLY1305_BLOCK_SIZE;
- inlen -= POLY1305_BLOCK_SIZE;
- }
-
- st->h[0] = h0;
- st->h[1] = h1;
- st->h[2] = h2;
- st->h[3] = h3;
- st->h[4] = h4;
-}
-
-void
-poly1305_finish_ext_ref(void *state, const unsigned char *in, size_t remaining, unsigned char mac[16]) {
- poly1305_state_ref_t *st = (poly1305_state_ref_t *)state;
- uint32_t h0,h1,h2,h3,h4,c;
- uint32_t g0,g1,g2,g3,g4;
- uint64_t f;
- uint32_t mask;
-
- /* process the remaining block */
- if (remaining) {
- unsigned char final[POLY1305_BLOCK_SIZE] = {0};
- size_t i;
- for (i = 0; i < remaining; i++)
- final[i] = in[i];
- final[remaining] = 1;
- st->final = 1;
- poly1305_blocks_ref(st, final, POLY1305_BLOCK_SIZE);
- }
-
- /* fully carry h */
- h0 = st->h[0];
- h1 = st->h[1];
- h2 = st->h[2];
- h3 = st->h[3];
- h4 = st->h[4];
-
- c = h1 >> 26; h1 = h1 & 0x3ffffff;
- h2 += c; c = h2 >> 26; h2 = h2 & 0x3ffffff;
- h3 += c; c = h3 >> 26; h3 = h3 & 0x3ffffff;
- h4 += c; c = h4 >> 26; h4 = h4 & 0x3ffffff;
- h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff;
- h1 += c;
-
- /* compute h + -p */
- g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff;
- g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff;
- g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff;
- g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff;
- g4 = h4 + c - (1 << 26);
-
- /* select h if h < p, or h + -p if h >= p */
- mask = (g4 >> ((sizeof(uint32_t) * 8) - 1)) - 1;
- g0 &= mask;
- g1 &= mask;
- g2 &= mask;
- g3 &= mask;
- g4 &= mask;
- mask = ~mask;
- h0 = (h0 & mask) | g0;
- h1 = (h1 & mask) | g1;
- h2 = (h2 & mask) | g2;
- h3 = (h3 & mask) | g3;
- h4 = (h4 & mask) | g4;
-
- /* h = h % (2^128) */
- h0 = ((h0 ) | (h1 << 26)) & 0xffffffff;
- h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
- h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
- h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
-
- /* mac = (h + pad) % (2^128) */
- f = (uint64_t)h0 + st->pad[0] ; h0 = (uint32_t)f;
- f = (uint64_t)h1 + st->pad[1] + (f >> 32); h1 = (uint32_t)f;
- f = (uint64_t)h2 + st->pad[2] + (f >> 32); h2 = (uint32_t)f;
- f = (uint64_t)h3 + st->pad[3] + (f >> 32); h3 = (uint32_t)f;
-
- U32TO8(mac + 0, h0);
- U32TO8(mac + 4, h1);
- U32TO8(mac + 8, h2);
- U32TO8(mac + 12, h3);
-
- /* zero out the state */
- st->h[0] = 0;
- st->h[1] = 0;
- st->h[2] = 0;
- st->h[3] = 0;
- st->h[4] = 0;
- st->r[0] = 0;
- st->r[1] = 0;
- st->r[2] = 0;
- st->r[3] = 0;
- st->r[4] = 0;
- st->pad[0] = 0;
- st->pad[1] = 0;
- st->pad[2] = 0;
- st->pad[3] = 0;
-}
-
-void
-poly1305_auth_ref(unsigned char mac[16], const unsigned char *in, size_t inlen, const poly1305_key *key) {
- poly1305_state_ref_t st;
- size_t blocks;
- poly1305_init_ext_ref(&st, key, inlen);
- blocks = (inlen & ~(POLY1305_BLOCK_SIZE - 1));
- if (blocks) {
- poly1305_blocks_ref(&st, in, blocks);
- in += blocks;
- inlen -= blocks;
- }
- poly1305_finish_ext_ref(&st, in, inlen, mac);
-}
-
diff --git a/src/libcryptobox/poly1305/ref-64.c b/src/libcryptobox/poly1305/ref-64.c
deleted file mode 100644
index cceb1476d..000000000
--- a/src/libcryptobox/poly1305/ref-64.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- poly1305 implementation using 64 bit * 64 bit = 128 bit multiplication and 128 bit addition
-
- assumes the existence of uint64_t and uint128_t
-*/
-
-#include "config.h"
-#include "poly1305.h"
-#include "poly1305_internal.h"
-
-#define POLY1305_BLOCK_SIZE 16
-
-typedef struct poly1305_state_ref_t {
- uint64_t r[3];
- uint64_t h[3];
- uint64_t pad[2];
- unsigned char final;
-} poly1305_state_ref_t;
-
-/* interpret eight 8 bit unsigned integers as a 64 bit unsigned integer in little endian */
-static uint64_t
-U8TO64(const unsigned char *p) {
- return
- ((uint64_t)p[0] ) |
- ((uint64_t)p[1] << 8) |
- ((uint64_t)p[2] << 16) |
- ((uint64_t)p[3] << 24) |
- ((uint64_t)p[4] << 32) |
- ((uint64_t)p[5] << 40) |
- ((uint64_t)p[6] << 48) |
- ((uint64_t)p[7] << 56);
-}
-
-/* store a 64 bit unsigned integer as eight 8 bit unsigned integers in little endian */
-static void
-U64TO8(unsigned char *p, uint64_t v) {
- p[0] = (unsigned char)(v ) & 0xff;
- p[1] = (unsigned char)(v >> 8) & 0xff;
- p[2] = (unsigned char)(v >> 16) & 0xff;
- p[3] = (unsigned char)(v >> 24) & 0xff;
- p[4] = (unsigned char)(v >> 32) & 0xff;
- p[5] = (unsigned char)(v >> 40) & 0xff;
- p[6] = (unsigned char)(v >> 48) & 0xff;
- p[7] = (unsigned char)(v >> 56) & 0xff;
-}
-
-size_t
-poly1305_block_size_ref(void) {
- return POLY1305_BLOCK_SIZE;
-}
-
-void
-poly1305_init_ext_ref(void *state, const poly1305_key *key, size_t bytes_hint) {
- poly1305_state_ref_t *st = (poly1305_state_ref_t *)state;
- uint64_t t0, t1;
-
- /* bytes_hint not used */
- (void)bytes_hint;
-
- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
- t0 = U8TO64(&key->b[0]);
- t1 = U8TO64(&key->b[8]);
- st->r[0] = ( t0 ) & 0xffc0fffffff;
- st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
- st->r[2] = ((t1 >> 24) ) & 0x00ffffffc0f;
-
- /* h = 0 */
- st->h[0] = 0;
- st->h[1] = 0;
- st->h[2] = 0;
-
- /* save pad for later */
- st->pad[0] = U8TO64(&key->b[16]);
- st->pad[1] = U8TO64(&key->b[24]);
-
- st->final = 0;
-}
-
-void
-poly1305_blocks_ref(void *state, const unsigned char *in, size_t inlen) {
- poly1305_state_ref_t *st = (poly1305_state_ref_t *)state;
- const uint64_t hibit = (st->final) ? 0 : ((uint64_t)1 << 40); /* 1 << 128 */
- uint64_t r0,r1,r2;
- uint64_t s1,s2;
- uint64_t h0,h1,h2;
- uint64_t c;
- uint128_t d0,d1,d2;
-
- r0 = st->r[0];
- r1 = st->r[1];
- r2 = st->r[2];
-
- s1 = r1 * (5 << 2);
- s2 = r2 * (5 << 2);
-
- h0 = st->h[0];
- h1 = st->h[1];
- h2 = st->h[2];
-
- while (inlen >= POLY1305_BLOCK_SIZE) {
- uint64_t t0, t1;
-
- /* h += in[i] */
- t0 = U8TO64(in + 0);
- t1 = U8TO64(in + 8);
- h0 += (( t0 ) & 0xfffffffffff);
- h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff);
- h2 += (((t1 >> 24) ) & 0x3ffffffffff) | hibit;
-
- /* h *= r */
- d0 = ((uint128_t)h0 * r0) + ((uint128_t)h1 * s2) + ((uint128_t)h2 * s1);
- d1 = ((uint128_t)h0 * r1) + ((uint128_t)h1 * r0) + ((uint128_t)h2 * s2);
- d2 = ((uint128_t)h0 * r2) + ((uint128_t)h1 * r1) + ((uint128_t)h2 * r0);
-
- /* (partial) h %= p */
- c = (uint64_t)(d0 >> 44); h0 = (uint64_t)d0 & 0xfffffffffff;
- d1 += c; c = (uint64_t)(d1 >> 44); h1 = (uint64_t)d1 & 0xfffffffffff;
- d2 += c; c = (uint64_t)(d2 >> 42); h2 = (uint64_t)d2 & 0x3ffffffffff;
- h0 += c * 5; c = (h0 >> 44); h0 = h0 & 0xfffffffffff;
- h1 += c;
-
- in += POLY1305_BLOCK_SIZE;
- inlen -= POLY1305_BLOCK_SIZE;
- }
-
- st->h[0] = h0;
- st->h[1] = h1;
- st->h[2] = h2;
-}
-
-void
-poly1305_finish_ext_ref(void *state, const unsigned char *in, size_t remaining, unsigned char mac[16]) {
- poly1305_state_ref_t *st = (poly1305_state_ref_t *)state;
- uint64_t h0, h1, h2, c;
- uint64_t g0, g1, g2;
- uint64_t t0, t1;
-
- /* process the remaining block */
- if (remaining) {
- unsigned char final[POLY1305_BLOCK_SIZE] = {0};
- size_t i;
- for (i = 0; i < remaining; i++)
- final[i] = in[i];
- final[remaining] = 1;
- st->final = 1;
- poly1305_blocks_ref(st, final, POLY1305_BLOCK_SIZE);
- }
-
- /* fully carry h */
- h0 = st->h[0];
- h1 = st->h[1];
- h2 = st->h[2];
-
- c = (h1 >> 44); h1 &= 0xfffffffffff;
- h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
- h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
- h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff;
- h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
- h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
- h1 += c;
-
- /* compute h + -p */
- g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
- g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
- g2 = h2 + c - ((uint64_t)1 << 42);
-
- /* select h if h < p, or h + -p if h >= p */
- c = (g2 >> 63) - 1;
- h0 = (h0 & ~c) | (g0 & c);
- h1 = (h1 & ~c) | (g1 & c);
- h2 = (h2 & ~c) | (g2 & c);
-
- /* h = (h + pad) */
- t0 = st->pad[0];
- t1 = st->pad[1];
-
- h0 += (( t0 ) & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff;
- h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff;
- h2 += (((t1 >> 24) ) & 0x3ffffffffff) + c; h2 &= 0x3ffffffffff;
-
- /* mac = h % (2^128) */
- h0 = ((h0 ) | (h1 << 44));
- h1 = ((h1 >> 20) | (h2 << 24));
-
- U64TO8(&mac[0], h0);
- U64TO8(&mac[8], h1);
-
- /* zero out the state */
- st->h[0] = 0;
- st->h[1] = 0;
- st->h[2] = 0;
- st->r[0] = 0;
- st->r[1] = 0;
- st->r[2] = 0;
- st->pad[0] = 0;
- st->pad[1] = 0;
-}
-
-
-void
-poly1305_auth_ref(unsigned char mac[16], const unsigned char *in, size_t inlen, const poly1305_key *key) {
- poly1305_state_ref_t st;
- size_t blocks;
- poly1305_init_ext_ref(&st, key, inlen);
- blocks = (inlen & ~(POLY1305_BLOCK_SIZE - 1));
- if (blocks) {
- poly1305_blocks_ref(&st, in, blocks);
- in += blocks;
- inlen -= blocks;
- }
- poly1305_finish_ext_ref(&st, in, inlen, mac);
-}
-
diff --git a/src/libcryptobox/poly1305/sse2.S b/src/libcryptobox/poly1305/sse2.S
deleted file mode 100644
index 038961899..000000000
--- a/src/libcryptobox/poly1305/sse2.S
+++ /dev/null
@@ -1,969 +0,0 @@
-#include "../macro.S"
-#include "constants.S"
-
-SECTION_TEXT
-
-GLOBAL_HIDDEN_FN poly1305_block_size_sse2
-movl $32, %eax
-ret
-FN_END poly1305_block_size_sse2
-
-GLOBAL_HIDDEN_FN poly1305_init_ext_sse2
-poly1305_init_ext_sse2_local:
-pushq %r15
-xorps %xmm0, %xmm0
-testq %rdx, %rdx
-pushq %r14
-movq %rdx, %r11
-movq $-1, %rax
-cmove %rax, %r11
-pushq %r13
-movabsq $17575274610687, %r9
-pushq %r12
-pushq %rbp
-movq %r11, %r13
-movabsq $17592186044415, %rbp
-pushq %rbx
-xorl %ebx, %ebx
-movdqu %xmm0, 32(%rdi)
-movdqu %xmm0, (%rdi)
-movdqu %xmm0, 16(%rdi)
-movq 8(%rsi), %rcx
-movq (%rsi), %rax
-movq %rcx, %rdx
-shrq $24, %rcx
-andq %rax, %r9
-salq $20, %rdx
-shrq $44, %rax
-movq %r9, %r8
-orq %rax, %rdx
-shrq $26, %r8
-movabsq $17592181915647, %rax
-andq %rax, %rdx
-movabsq $68719475727, %rax
-andq %rax, %rcx
-movl %r9d, %eax
-andl $67108863, %eax
-movl %eax, 40(%rdi)
-movl %edx, %eax
-sall $18, %eax
-orl %r8d, %eax
-movq %rdx, %r8
-andl $67108863, %eax
-shrq $34, %r8
-movl %eax, 44(%rdi)
-movq %rdx, %rax
-shrq $8, %rax
-andl $67108863, %eax
-movl %eax, 48(%rdi)
-movl %ecx, %eax
-sall $10, %eax
-orl %r8d, %eax
-movq %rdi, %r8
-andl $67108863, %eax
-movl %eax, 52(%rdi)
-movq %rcx, %rax
-shrq $16, %rax
-movl %eax, 56(%rdi)
-movq 16(%rsi), %rax
-movq %rax, 104(%rdi)
-movq 24(%rsi), %rax
-movq %rdx, %rsi
-movq %rax, 112(%rdi)
-poly1305_init_ext_sse2_7:
-testq %rbx, %rbx
-jne poly1305_init_ext_sse2_4
-cmpq $16, %r13
-jbe poly1305_init_ext_sse2_5
-leaq 60(%r8), %rdi
-jmp poly1305_init_ext_sse2_6
-poly1305_init_ext_sse2_4:
-cmpq $96, %r13
-jb poly1305_init_ext_sse2_5
-leaq 80(%r8), %rdi
-poly1305_init_ext_sse2_6:
-imulq $20, %rcx, %r10
-movq $0, -48(%rsp)
-movq $0, -32(%rsp)
-leaq (%rsi,%rsi), %r14
-leaq (%r9,%r9), %r11
-movq %r10, %rax
-mulq %r14
-movq %rax, %r14
-movq %r9, %rax
-movq %rdx, %r15
-mulq %r9
-addq %rax, %r14
-movq %r14, %rax
-adcq %rdx, %r15
-leaq (%rcx,%rcx), %rdx
-andq %rbp, %rax
-movq %rax, -16(%rsp)
-movq %r11, %rax
-movq %rdx, -24(%rsp)
-mulq %rsi
-movq %rax, %r11
-movq %r10, %rax
-movq %rdx, %r12
-mulq %rcx
-movq -16(%rsp), %rcx
-addq %rax, %r11
-movq %r14, %rax
-adcq %rdx, %r12
-shrdq $44, %r15, %rax
-movq %rax, -56(%rsp)
-movq -24(%rsp), %rax
-addq -56(%rsp), %r11
-adcq -48(%rsp), %r12
-mulq %r9
-movq %r11, %r14
-andq %rbp, %r14
-movq %rax, %r9
-movq %rsi, %rax
-movq %rdx, %r10
-mulq %rsi
-addq %rax, %r9
-movq %r11, %rax
-adcq %rdx, %r10
-shrdq $44, %r12, %rax
-movq %rax, -40(%rsp)
-movabsq $4398046511103, %rax
-addq -40(%rsp), %r9
-adcq -32(%rsp), %r10
-andq %r9, %rax
-incq %rbx
-shrdq $42, %r10, %r9
-leaq (%r9,%r9,4), %r9
-addq %r9, %rcx
-movq %rcx, %r9
-shrq $44, %rcx
-addq %r14, %rcx
-andq %rbp, %r9
-movq %rcx, %rsi
-shrq $44, %rcx
-movq %r9, %rdx
-addq %rax, %rcx
-movl %r9d, %eax
-andq %rbp, %rsi
-andl $67108863, %eax
-shrq $26, %rdx
-movl %eax, (%rdi)
-movl %esi, %eax
-sall $18, %eax
-orl %edx, %eax
-movq %rsi, %rdx
-andl $67108863, %eax
-shrq $34, %rdx
-movl %eax, 4(%rdi)
-movq %rsi, %rax
-shrq $8, %rax
-andl $67108863, %eax
-movl %eax, 8(%rdi)
-movl %ecx, %eax
-sall $10, %eax
-orl %edx, %eax
-andl $67108863, %eax
-movl %eax, 12(%rdi)
-movq %rcx, %rax
-shrq $16, %rax
-cmpq $2, %rbx
-movl %eax, 16(%rdi)
-jne poly1305_init_ext_sse2_7
-poly1305_init_ext_sse2_5:
-movq $0, 120(%r8)
-popq %rbx
-popq %rbp
-popq %r12
-popq %r13
-popq %r14
-popq %r15
-ret
-FN_END poly1305_init_ext_sse2
-
-
-GLOBAL_HIDDEN_FN poly1305_blocks_sse2
-poly1305_blocks_sse2_local:
-pushq %rbp
-movq %rsp, %rbp
-pushq %rbx
-andq $-64, %rsp
-subq $328, %rsp
-movq $(1 << 24), %rax
-movd %rax, %xmm1
-movq $((1 << 26) - 1), %rax
-movd %rax, %xmm0
-pshufd $68, %xmm1, %xmm1
-pshufd $68, %xmm0, %xmm0
-movq 120(%rdi), %rax
-movaps %xmm1, 312(%rsp)
-testb $4, %al
-je poly1305_blocks_sse2_11
-movaps 312(%rsp), %xmm1
-psrldq $8, %xmm1
-movaps %xmm1, 312(%rsp)
-poly1305_blocks_sse2_11:
-testb $8, %al
-je poly1305_blocks_sse2_12
-xorps %xmm1, %xmm1
-movaps %xmm1, 312(%rsp)
-poly1305_blocks_sse2_12:
-testb $1, %al
-jne poly1305_blocks_sse2_13
-movq 16(%rsi), %xmm1
-movaps %xmm0, %xmm3
-movaps %xmm0, %xmm9
-movq (%rsi), %xmm15
-orq $1, %rax
-subq $32, %rdx
-movq 8(%rsi), %xmm12
-punpcklqdq %xmm1, %xmm15
-movq 24(%rsi), %xmm1
-movaps %xmm15, %xmm8
-pand %xmm15, %xmm3
-psrlq $52, %xmm15
-addq $32, %rsi
-punpcklqdq %xmm1, %xmm12
-movaps %xmm12, %xmm1
-psrlq $26, %xmm8
-psllq $12, %xmm1
-pand %xmm0, %xmm8
-movq %rax, 120(%rdi)
-por %xmm1, %xmm15
-psrlq $40, %xmm12
-pand %xmm15, %xmm9
-por 312(%rsp), %xmm12
-psrlq $26, %xmm15
-pand %xmm0, %xmm15
-jmp poly1305_blocks_sse2_14
-poly1305_blocks_sse2_13:
-movdqu (%rdi), %xmm8
-movdqu 16(%rdi), %xmm15
-movdqu 32(%rdi), %xmm12
-pshufd $80, %xmm8, %xmm3
-pshufd $250, %xmm8, %xmm8
-pshufd $80, %xmm15, %xmm9
-pshufd $250, %xmm15, %xmm15
-pshufd $80, %xmm12, %xmm12
-poly1305_blocks_sse2_14:
-movq 120(%rdi), %rax
-testb $48, %al
-je poly1305_blocks_sse2_15
-testb $16, %al
-movd 56(%rdi), %xmm2
-leaq 40(%rdi), %rax
-je poly1305_blocks_sse2_16
-movdqu 60(%rdi), %xmm1
-movdqu (%rax), %xmm4
-movd %xmm2, %eax
-movd 76(%rdi), %xmm2
-movaps %xmm1, %xmm7
-movd %eax, %xmm5
-punpckldq %xmm4, %xmm7
-punpckhdq %xmm4, %xmm1
-punpcklqdq %xmm5, %xmm2
-jmp poly1305_blocks_sse2_17
-poly1305_blocks_sse2_16:
-movdqu (%rax), %xmm1
-movl $1, %r8d
-movd %r8d, %xmm4
-movaps %xmm1, %xmm7
-punpckldq %xmm4, %xmm7
-punpckhdq %xmm4, %xmm1
-poly1305_blocks_sse2_17:
-pshufd $80, %xmm7, %xmm11
-pshufd $80, %xmm1, %xmm4
-pshufd $250, %xmm7, %xmm7
-movaps %xmm11, 168(%rsp)
-pshufd $250, %xmm1, %xmm1
-jmp poly1305_blocks_sse2_18
-poly1305_blocks_sse2_15:
-movdqu 60(%rdi), %xmm1
-movd 76(%rdi), %xmm2
-pshufd $0, %xmm2, %xmm2
-pshufd $0, %xmm1, %xmm11
-pshufd $85, %xmm1, %xmm7
-pshufd $170, %xmm1, %xmm4
-movaps %xmm11, 168(%rsp)
-pshufd $255, %xmm1, %xmm1
-poly1305_blocks_sse2_18:
-movaps %xmm1, %xmm14
-movaps %xmm7, %xmm5
-movaps %xmm4, %xmm13
-movaps %xmm1, 264(%rsp)
-movaps %xmm2, %xmm1
-cmpq $63, %rdx
-movq $(5), %r8
-movd %r8, %xmm6
-pshufd $68, %xmm6, %xmm6
-pmuludq %xmm6, %xmm5
-movaps %xmm4, 296(%rsp)
-pmuludq %xmm6, %xmm13
-movaps %xmm2, 152(%rsp)
-pmuludq %xmm6, %xmm14
-pmuludq %xmm6, %xmm1
-movaps %xmm5, 88(%rsp)
-movaps %xmm13, 72(%rsp)
-movaps %xmm14, 56(%rsp)
-movaps %xmm1, 40(%rsp)
-jbe poly1305_blocks_sse2_19
-movdqu 80(%rdi), %xmm1
-movd 96(%rdi), %xmm2
-movq %rdx, %rcx
-pshufd $0, %xmm2, %xmm2
-movaps %xmm2, 24(%rsp)
-pmuludq %xmm6, %xmm2
-pshufd $85, %xmm1, %xmm4
-movaps %xmm4, 280(%rsp)
-pmuludq %xmm6, %xmm4
-pshufd $255, %xmm1, %xmm13
-pshufd $170, %xmm1, %xmm5
-movaps 72(%rsp), %xmm14
-movaps %xmm5, 216(%rsp)
-pmuludq %xmm6, %xmm5
-movq %rsi, %rax
-movaps %xmm4, -24(%rsp)
-movaps %xmm13, %xmm4
-pshufd $0, %xmm1, %xmm1
-pmuludq %xmm6, %xmm4
-movaps %xmm14, -8(%rsp)
-movaps %xmm5, 8(%rsp)
-movaps 168(%rsp), %xmm5
-movaps %xmm1, 248(%rsp)
-movaps 56(%rsp), %xmm1
-movaps %xmm4, 120(%rsp)
-movaps 40(%rsp), %xmm4
-movaps %xmm13, 136(%rsp)
-movaps %xmm2, 200(%rsp)
-movaps %xmm1, 104(%rsp)
-movaps %xmm4, 184(%rsp)
-movaps %xmm5, 232(%rsp)
-jmp poly1305_blocks_sse2_20
-.p2align 6
-poly1305_blocks_sse2_20:
-movaps -24(%rsp), %xmm5
-movaps %xmm8, %xmm13
-subq $64, %rcx
-movaps 8(%rsp), %xmm4
-movaps 120(%rsp), %xmm10
-pmuludq %xmm12, %xmm5
-pmuludq %xmm15, %xmm4
-movaps 8(%rsp), %xmm2
-pmuludq %xmm9, %xmm10
-movaps 120(%rsp), %xmm11
-movaps 200(%rsp), %xmm14
-pmuludq %xmm12, %xmm2
-paddq %xmm4, %xmm5
-pmuludq %xmm15, %xmm11
-movaps 120(%rsp), %xmm1
-paddq %xmm10, %xmm5
-pmuludq %xmm8, %xmm14
-movaps 200(%rsp), %xmm10
-movaps 200(%rsp), %xmm4
-pmuludq %xmm12, %xmm1
-movaps 248(%rsp), %xmm8
-pmuludq %xmm15, %xmm10
-paddq %xmm11, %xmm2
-pmuludq %xmm12, %xmm4
-paddq %xmm14, %xmm5
-movaps 200(%rsp), %xmm11
-movaps 248(%rsp), %xmm14
-pmuludq %xmm15, %xmm8
-pmuludq 248(%rsp), %xmm12
-pmuludq %xmm9, %xmm11
-paddq %xmm10, %xmm1
-movaps 248(%rsp), %xmm10
-pmuludq 280(%rsp), %xmm15
-pmuludq %xmm3, %xmm14
-paddq %xmm15, %xmm12
-paddq %xmm8, %xmm4
-pmuludq %xmm13, %xmm10
-movq 24(%rax), %xmm15
-movaps 248(%rsp), %xmm8
-paddq %xmm11, %xmm2
-movaps %xmm3, %xmm11
-movaps 280(%rsp), %xmm3
-paddq %xmm14, %xmm5
-pmuludq %xmm9, %xmm8
-paddq %xmm10, %xmm2
-movq 16(%rax), %xmm14
-movaps 280(%rsp), %xmm10
-pmuludq %xmm9, %xmm3
-pmuludq 216(%rsp), %xmm9
-paddq %xmm9, %xmm12
-paddq %xmm8, %xmm1
-movq (%rax), %xmm8
-pmuludq %xmm11, %xmm10
-paddq %xmm3, %xmm4
-movaps 216(%rsp), %xmm3
-punpcklqdq %xmm14, %xmm8
-movaps 280(%rsp), %xmm14
-pmuludq %xmm13, %xmm3
-paddq %xmm10, %xmm2
-movq 8(%rax), %xmm10
-pmuludq %xmm13, %xmm14
-pmuludq 136(%rsp), %xmm13
-paddq %xmm13, %xmm12
-punpcklqdq %xmm15, %xmm10
-movaps %xmm10, %xmm9
-movaps 216(%rsp), %xmm15
-paddq %xmm3, %xmm4
-psllq $12, %xmm9
-movaps %xmm0, %xmm3
-paddq %xmm14, %xmm1
-pmuludq %xmm11, %xmm15
-pand %xmm8, %xmm3
-movaps 136(%rsp), %xmm14
-movaps %xmm3, -40(%rsp)
-movaps %xmm8, %xmm3
-movdqu 48(%rax), %xmm13
-psrlq $52, %xmm8
-pmuludq %xmm11, %xmm14
-paddq %xmm15, %xmm1
-por %xmm9, %xmm8
-pmuludq 24(%rsp), %xmm11
-paddq %xmm11, %xmm12
-movdqu 32(%rax), %xmm11
-movaps %xmm10, %xmm9
-psrlq $40, %xmm10
-pand %xmm0, %xmm8
-movaps %xmm11, %xmm15
-paddq %xmm14, %xmm4
-xorps %xmm14, %xmm14
-punpckldq %xmm13, %xmm15
-psrlq $14, %xmm9
-addq $64, %rax
-pand %xmm0, %xmm9
-psrlq $26, %xmm3
-cmpq $63, %rcx
-por 312(%rsp), %xmm10
-movaps %xmm13, -72(%rsp)
-movaps %xmm15, %xmm13
-punpckldq %xmm14, %xmm13
-punpckhdq -72(%rsp), %xmm11
-movaps %xmm13, -56(%rsp)
-movaps %xmm11, %xmm13
-punpckhdq %xmm14, %xmm11
-pand %xmm0, %xmm3
-psllq $18, %xmm11
-punpckhdq %xmm14, %xmm15
-punpckldq %xmm14, %xmm13
-paddq %xmm11, %xmm4
-movaps -8(%rsp), %xmm11
-psllq $6, %xmm15
-psllq $12, %xmm13
-movaps 88(%rsp), %xmm14
-paddq %xmm15, %xmm2
-pmuludq %xmm10, %xmm11
-paddq %xmm13, %xmm1
-movaps -8(%rsp), %xmm13
-pmuludq %xmm10, %xmm14
-paddq -56(%rsp), %xmm5
-paddq 312(%rsp), %xmm12
-pmuludq %xmm9, %xmm13
-movaps 104(%rsp), %xmm15
-paddq %xmm11, %xmm2
-movaps 184(%rsp), %xmm11
-paddq %xmm14, %xmm5
-movaps 104(%rsp), %xmm14
-pmuludq %xmm9, %xmm15
-pmuludq %xmm10, %xmm11
-paddq %xmm13, %xmm5
-movaps 104(%rsp), %xmm13
-pmuludq %xmm10, %xmm14
-pmuludq 232(%rsp), %xmm10
-paddq %xmm10, %xmm12
-pmuludq %xmm8, %xmm13
-paddq %xmm15, %xmm2
-movaps %xmm8, %xmm10
-paddq %xmm11, %xmm4
-pmuludq %xmm7, %xmm10
-movaps 232(%rsp), %xmm11
-movaps 184(%rsp), %xmm15
-paddq %xmm14, %xmm1
-pmuludq %xmm9, %xmm11
-paddq %xmm13, %xmm5
-movaps 184(%rsp), %xmm13
-movaps 184(%rsp), %xmm14
-pmuludq %xmm3, %xmm15
-pmuludq %xmm9, %xmm13
-paddq %xmm11, %xmm4
-pmuludq %xmm8, %xmm14
-movaps 232(%rsp), %xmm11
-paddq %xmm10, %xmm4
-paddq %xmm15, %xmm5
-pmuludq %xmm7, %xmm9
-pmuludq %xmm8, %xmm11
-paddq %xmm13, %xmm1
-movaps 232(%rsp), %xmm13
-movaps 296(%rsp), %xmm10
-paddq %xmm14, %xmm2
-pmuludq 296(%rsp), %xmm8
-movaps -40(%rsp), %xmm14
-pmuludq %xmm3, %xmm13
-paddq %xmm9, %xmm12
-paddq %xmm11, %xmm1
-movaps %xmm3, %xmm11
-paddq %xmm8, %xmm12
-movaps 232(%rsp), %xmm15
-pmuludq %xmm7, %xmm11
-pmuludq %xmm3, %xmm10
-paddq %xmm13, %xmm2
-movaps %xmm14, %xmm13
-movaps 296(%rsp), %xmm9
-pmuludq %xmm14, %xmm15
-pmuludq 264(%rsp), %xmm3
-paddq %xmm11, %xmm1
-pmuludq %xmm7, %xmm13
-paddq %xmm3, %xmm12
-movaps 264(%rsp), %xmm11
-paddq %xmm10, %xmm4
-pmuludq %xmm14, %xmm9
-paddq %xmm15, %xmm5
-pmuludq %xmm14, %xmm11
-movaps %xmm5, %xmm8
-paddq %xmm13, %xmm2
-psrlq $26, %xmm8
-paddq %xmm9, %xmm1
-pand %xmm0, %xmm5
-pmuludq 152(%rsp), %xmm14
-paddq %xmm14, %xmm12
-paddq %xmm8, %xmm2
-paddq %xmm11, %xmm4
-movaps %xmm2, %xmm9
-movaps %xmm2, %xmm8
-movaps %xmm4, %xmm3
-psrlq $26, %xmm9
-pand %xmm0, %xmm4
-psrlq $26, %xmm3
-paddq %xmm9, %xmm1
-pand %xmm0, %xmm8
-paddq %xmm3, %xmm12
-movaps %xmm1, %xmm10
-movaps %xmm1, %xmm9
-movaps %xmm12, %xmm3
-psrlq $26, %xmm10
-pand %xmm0, %xmm12
-psrlq $26, %xmm3
-paddq %xmm10, %xmm4
-pand %xmm0, %xmm9
-pmuludq %xmm6, %xmm3
-movaps %xmm4, %xmm1
-movaps %xmm4, %xmm15
-psrlq $26, %xmm1
-pand %xmm0, %xmm15
-paddq %xmm1, %xmm12
-paddq %xmm3, %xmm5
-movaps %xmm5, %xmm2
-movaps %xmm5, %xmm3
-psrlq $26, %xmm2
-pand %xmm0, %xmm3
-paddq %xmm2, %xmm8
-ja poly1305_blocks_sse2_20
-leaq -64(%rdx), %rax
-andl $63, %edx
-andq $-64, %rax
-leaq 64(%rsi,%rax), %rsi
-poly1305_blocks_sse2_19:
-cmpq $31, %rdx
-jbe poly1305_blocks_sse2_21
-movaps 56(%rsp), %xmm11
-movaps %xmm15, %xmm1
-movaps %xmm15, %xmm14
-movaps 72(%rsp), %xmm5
-movaps %xmm12, %xmm4
-movaps %xmm15, %xmm10
-movaps 88(%rsp), %xmm2
-pmuludq %xmm11, %xmm14
-movaps %xmm8, %xmm15
-pmuludq %xmm5, %xmm1
-movaps 40(%rsp), %xmm13
-testq %rsi, %rsi
-pmuludq %xmm12, %xmm2
-pmuludq %xmm12, %xmm5
-pmuludq %xmm11, %xmm4
-paddq %xmm1, %xmm2
-pmuludq %xmm9, %xmm11
-movaps %xmm12, %xmm1
-paddq %xmm14, %xmm5
-pmuludq %xmm13, %xmm15
-movaps %xmm9, %xmm14
-pmuludq %xmm13, %xmm14
-pmuludq %xmm13, %xmm1
-paddq %xmm11, %xmm2
-movaps 168(%rsp), %xmm11
-pmuludq %xmm10, %xmm13
-paddq %xmm15, %xmm2
-movaps %xmm9, %xmm15
-paddq %xmm14, %xmm5
-pmuludq %xmm11, %xmm12
-movaps %xmm3, %xmm14
-pmuludq %xmm11, %xmm14
-movaps %xmm13, 248(%rsp)
-movaps %xmm10, %xmm13
-pmuludq %xmm7, %xmm15
-paddq 248(%rsp), %xmm4
-pmuludq %xmm11, %xmm13
-pmuludq %xmm7, %xmm10
-paddq %xmm14, %xmm2
-movaps %xmm13, 280(%rsp)
-movaps %xmm8, %xmm13
-pmuludq %xmm11, %xmm13
-paddq %xmm10, %xmm12
-movaps 296(%rsp), %xmm10
-paddq 280(%rsp), %xmm1
-pmuludq %xmm9, %xmm11
-pmuludq 296(%rsp), %xmm9
-pmuludq %xmm3, %xmm10
-paddq %xmm9, %xmm12
-paddq %xmm13, %xmm5
-movaps %xmm3, %xmm13
-paddq %xmm15, %xmm1
-pmuludq %xmm7, %xmm13
-paddq %xmm11, %xmm4
-movaps 296(%rsp), %xmm11
-pmuludq %xmm8, %xmm7
-pmuludq %xmm8, %xmm11
-pmuludq 264(%rsp), %xmm8
-paddq %xmm8, %xmm12
-paddq %xmm13, %xmm5
-paddq %xmm7, %xmm4
-movaps 264(%rsp), %xmm7
-paddq %xmm11, %xmm1
-paddq %xmm10, %xmm4
-pmuludq %xmm3, %xmm7
-pmuludq 152(%rsp), %xmm3
-paddq %xmm3, %xmm12
-paddq %xmm7, %xmm1
-je poly1305_blocks_sse2_22
-movdqu (%rsi), %xmm7
-xorps %xmm3, %xmm3
-paddq 312(%rsp), %xmm12
-movdqu 16(%rsi), %xmm8
-movaps %xmm7, %xmm9
-punpckldq %xmm8, %xmm9
-punpckhdq %xmm8, %xmm7
-movaps %xmm9, %xmm10
-movaps %xmm7, %xmm8
-punpckldq %xmm3, %xmm10
-punpckhdq %xmm3, %xmm9
-punpckhdq %xmm3, %xmm7
-punpckldq %xmm3, %xmm8
-movaps %xmm8, %xmm3
-psllq $6, %xmm9
-paddq %xmm10, %xmm2
-psllq $12, %xmm3
-paddq %xmm9, %xmm5
-psllq $18, %xmm7
-paddq %xmm3, %xmm4
-paddq %xmm7, %xmm1
-poly1305_blocks_sse2_22:
-movaps %xmm2, %xmm8
-movaps %xmm1, %xmm3
-movaps %xmm1, %xmm15
-psrlq $26, %xmm8
-pand %xmm0, %xmm2
-pand %xmm0, %xmm15
-psrlq $26, %xmm3
-paddq %xmm5, %xmm8
-paddq %xmm12, %xmm3
-movaps %xmm8, %xmm9
-pand %xmm0, %xmm8
-movaps %xmm3, %xmm1
-psrlq $26, %xmm9
-movaps %xmm3, %xmm12
-psrlq $26, %xmm1
-paddq %xmm4, %xmm9
-pand %xmm0, %xmm12
-pmuludq %xmm1, %xmm6
-movaps %xmm9, %xmm3
-pand %xmm0, %xmm9
-psrlq $26, %xmm3
-paddq %xmm3, %xmm15
-paddq %xmm6, %xmm2
-movaps %xmm15, %xmm3
-pand %xmm0, %xmm15
-movaps %xmm2, %xmm1
-psrlq $26, %xmm3
-psrlq $26, %xmm1
-paddq %xmm3, %xmm12
-movaps %xmm0, %xmm3
-paddq %xmm1, %xmm8
-pand %xmm2, %xmm3
-poly1305_blocks_sse2_21:
-testq %rsi, %rsi
-je poly1305_blocks_sse2_23
-pshufd $8, %xmm3, %xmm3
-pshufd $8, %xmm8, %xmm8
-pshufd $8, %xmm9, %xmm9
-pshufd $8, %xmm15, %xmm15
-pshufd $8, %xmm12, %xmm12
-punpcklqdq %xmm8, %xmm3
-punpcklqdq %xmm15, %xmm9
-movdqu %xmm3, (%rdi)
-movdqu %xmm9, 16(%rdi)
-movq %xmm12, 32(%rdi)
-jmp poly1305_blocks_sse2_10
-poly1305_blocks_sse2_23:
-movaps %xmm3, %xmm0
-movaps %xmm8, %xmm4
-movaps %xmm9, %xmm2
-psrldq $8, %xmm0
-movaps %xmm15, %xmm10
-paddq %xmm0, %xmm3
-psrldq $8, %xmm4
-movaps %xmm12, %xmm0
-movd %xmm3, %edx
-paddq %xmm4, %xmm8
-psrldq $8, %xmm2
-movl %edx, %ecx
-movd %xmm8, %eax
-paddq %xmm2, %xmm9
-shrl $26, %ecx
-psrldq $8, %xmm10
-andl $67108863, %edx
-addl %ecx, %eax
-movd %xmm9, %ecx
-paddq %xmm10, %xmm15
-movl %eax, %r9d
-shrl $26, %eax
-psrldq $8, %xmm0
-addl %ecx, %eax
-movd %xmm15, %ecx
-paddq %xmm0, %xmm12
-movl %eax, %esi
-andl $67108863, %r9d
-movd %xmm12, %r10d
-shrl $26, %esi
-andl $67108863, %eax
-addl %ecx, %esi
-salq $8, %rax
-movl %r9d, %ecx
-shrl $18, %r9d
-movl %esi, %r8d
-shrl $26, %esi
-andl $67108863, %r8d
-addl %r10d, %esi
-orq %r9, %rax
-salq $16, %rsi
-movq %r8, %r9
-shrl $10, %r8d
-salq $26, %rcx
-orq %r8, %rsi
-salq $34, %r9
-orq %rdx, %rcx
-movq %rsi, %r11
-shrq $42, %rsi
-movabsq $17592186044415, %rdx
-orq %r9, %rax
-movabsq $4398046511103, %r8
-andq %rdx, %rcx
-andq %rdx, %rax
-andq %r8, %r11
-leaq (%rsi,%rsi,4), %rsi
-addq %rsi, %rcx
-movq %rcx, %r10
-shrq $44, %rcx
-addq %rcx, %rax
-andq %rdx, %r10
-movq %rax, %r9
-shrq $44, %rax
-addq %r11, %rax
-andq %rdx, %r9
-movabsq $-4398046511104, %r11
-movq %rax, %rcx
-andq %r8, %rcx
-shrq $42, %rax
-leaq (%rax,%rax,4), %rsi
-addq %rcx, %r11
-addq %r10, %rsi
-movq %rsi, %r8
-shrq $44, %rsi
-andq %rdx, %r8
-addq %r9, %rsi
-leaq 5(%r8), %r9
-movq %r9, %rbx
-andq %rdx, %r9
-shrq $44, %rbx
-addq %rsi, %rbx
-movq %rbx, %rax
-andq %rbx, %rdx
-shrq $44, %rax
-addq %rax, %r11
-movq %r11, %rax
-shrq $63, %rax
-decq %rax
-movq %rax, %r10
-andq %rax, %r9
-andq %rax, %rdx
-notq %r10
-andq %r11, %rax
-andq %r10, %r8
-andq %r10, %rsi
-andq %r10, %rcx
-orq %r9, %r8
-orq %rdx, %rsi
-orq %rax, %rcx
-movq %r8, (%rdi)
-movq %rsi, 8(%rdi)
-movq %rcx, 16(%rdi)
-poly1305_blocks_sse2_10:
-movq -8(%rbp), %rbx
-leave
-ret
-FN_END poly1305_blocks_sse2
-
-GLOBAL_HIDDEN_FN poly1305_finish_ext_sse2
-poly1305_finish_ext_sse2_local:
-pushq %r12
-movq %rcx, %r12
-pushq %rbp
-movq %rdx, %rbp
-pushq %rbx
-movq %rdi, %rbx
-subq $32, %rsp
-testq %rdx, %rdx
-je poly1305_finish_ext_sse2_27
-xorl %eax, %eax
-movq %rsp, %rdi
-movl $8, %ecx
-rep stosl
-subq %rsp, %rsi
-testb $16, %dl
-movq %rsp, %rax
-je poly1305_finish_ext_sse2_28
-movdqu (%rsp,%rsi), %xmm0
-addq $16, %rax
-movaps %xmm0, (%rsp)
-poly1305_finish_ext_sse2_28:
-testb $8, %bpl
-je poly1305_finish_ext_sse2_29
-movq (%rax,%rsi), %rdx
-movq %rdx, (%rax)
-addq $8, %rax
-poly1305_finish_ext_sse2_29:
-testb $4, %bpl
-je poly1305_finish_ext_sse2_30
-movl (%rax,%rsi), %edx
-movl %edx, (%rax)
-addq $4, %rax
-poly1305_finish_ext_sse2_30:
-testb $2, %bpl
-je poly1305_finish_ext_sse2_31
-movw (%rax,%rsi), %dx
-movw %dx, (%rax)
-addq $2, %rax
-poly1305_finish_ext_sse2_31:
-testb $1, %bpl
-je poly1305_finish_ext_sse2_32
-movb (%rax,%rsi), %dl
-movb %dl, (%rax)
-poly1305_finish_ext_sse2_32:
-cmpq $16, %rbp
-je poly1305_finish_ext_sse2_33
-movb $1, (%rsp,%rbp)
-poly1305_finish_ext_sse2_33:
-cmpq $16, %rbp
-movl $32, %edx
-movq %rsp, %rsi
-sbbq %rax, %rax
-movq %rbx, %rdi
-andl $4, %eax
-addq $4, %rax
-orq %rax, 120(%rbx)
-call poly1305_blocks_sse2_local
-poly1305_finish_ext_sse2_27:
-movq 120(%rbx), %rax
-testb $1, %al
-je poly1305_finish_ext_sse2_35
-decq %rbp
-cmpq $15, %rbp
-jbe poly1305_finish_ext_sse2_36
-orq $16, %rax
-jmp poly1305_finish_ext_sse2_40
-poly1305_finish_ext_sse2_36:
-orq $32, %rax
-poly1305_finish_ext_sse2_40:
-movq %rax, 120(%rbx)
-movl $32, %edx
-xorl %esi, %esi
-movq %rbx, %rdi
-call poly1305_blocks_sse2_local
-poly1305_finish_ext_sse2_35:
-movq 8(%rbx), %rax
-movq 112(%rbx), %rsi
-movq %rax, %rdx
-movq %rax, %rcx
-movq 16(%rbx), %rax
-shrq $20, %rcx
-salq $44, %rdx
-orq (%rbx), %rdx
-salq $24, %rax
-orq %rcx, %rax
-movq 104(%rbx), %rcx
-addq %rcx, %rdx
-adcq %rsi, %rax
-xorps %xmm0, %xmm0
-movdqu %xmm0, (%rbx)
-movdqu %xmm0, 16(%rbx)
-movdqu %xmm0, 32(%rbx)
-movdqu %xmm0, 48(%rbx)
-movdqu %xmm0, 64(%rbx)
-movdqu %xmm0, 80(%rbx)
-movdqu %xmm0, 96(%rbx)
-movdqu %xmm0, 112(%rbx)
-movq %rdx, (%r12)
-movq %rax, 8(%r12)
-addq $32, %rsp
-popq %rbx
-popq %rbp
-popq %r12
-ret
-FN_END poly1305_finish_ext_sse2
-
-GLOBAL_HIDDEN_FN poly1305_auth_sse2
-/*
-cmpq $128, %rdx
-jb poly1305_auth_x86_local
-*/
-pushq %rbp
-movq %rsp, %rbp
-pushq %r14
-pushq %r13
-movq %rdi, %r13
-pushq %r12
-movq %rsi, %r12
-movq %rcx, %rsi
-pushq %rbx
-movq %rdx, %rbx
-andq $-64, %rsp
-movq %rbx, %r14
-addq $-128, %rsp
-movq %rsp, %rdi
-call poly1305_init_ext_sse2_local
-andq $-32, %r14
-je poly1305_auth_sse2_42
-movq %r12, %rsi
-movq %r14, %rdx
-movq %rsp, %rdi
-call poly1305_blocks_sse2_local
-addq %r14, %r12
-subq %r14, %rbx
-poly1305_auth_sse2_42:
-movq %r13, %rcx
-movq %rbx, %rdx
-movq %r12, %rsi
-movq %rsp, %rdi
-call poly1305_finish_ext_sse2_local
-leaq -32(%rbp), %rsp
-popq %rbx
-popq %r12
-popq %r13
-popq %r14
-popq %rbp
-ret
-FN_END poly1305_auth_sse2
-
-
-
-
-