diff options
Diffstat (limited to 'src/libcryptobox/siphash/avx2.S')
-rw-r--r-- | src/libcryptobox/siphash/avx2.S | 206 |
1 files changed, 0 insertions, 206 deletions
diff --git a/src/libcryptobox/siphash/avx2.S b/src/libcryptobox/siphash/avx2.S deleted file mode 100644 index 070419c60..000000000 --- a/src/libcryptobox/siphash/avx2.S +++ /dev/null @@ -1,206 +0,0 @@ -/*- - * Copyright 2015 Google Inc. All Rights Reserved. - * Copyright 2016 Vsevolod Stakhov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "../macro.S" -#include "constants.S" - -/* - * Generated by clang-3.8 from siphash avx2 implementation written by - * Jan Wassenberg and Jyrki Alakuijala - */ - -SECTION_TEXT - -GLOBAL_HIDDEN_FN siphash_avx2 -siphash_avx2_local: - .cfi_startproc -## BB#0: ## %entry - pushq %rbp -Ltmp0: - .cfi_def_cfa_offset 16 -Ltmp1: - .cfi_offset %rbp, -16 - movq %rsp, %rbp -Ltmp2: - .cfi_def_cfa_register %rbp - pushq %rbx - subq $40, %rsp -Ltmp3: - .cfi_offset %rbx, -24 - movq %rdx, %rbx - vmovdqu (%rdi), %xmm0 - vpxor LCPI0_0(%rip), %xmm0, %xmm1 - vpxor LCPI0_1(%rip), %xmm0, %xmm0 - vpunpcklqdq %xmm0, %xmm1, %xmm6 ## xmm6 = xmm1[0],xmm0[0] - vpunpckhqdq %xmm0, %xmm1, %xmm7 ## xmm7 = xmm1[1],xmm0[1] - movq %rbx, %rax - andq $-8, %rax - je LBB0_1 -## BB#2: ## %for.body.preheader - xorl %ecx, %ecx - vmovdqa LCPI0_2(%rip), %xmm0 ## xmm0 = [13,16] - vmovdqa LCPI0_3(%rip), %xmm1 ## xmm1 = [51,48] - vmovdqa LCPI0_4(%rip), %xmm2 ## xmm2 = [17,21] - vmovdqa LCPI0_5(%rip), %xmm3 ## xmm3 = [47,43] - .align 4, 0x90 -LBB0_3: ## %for.body - ## =>This Inner Loop Header: Depth=1 - vmovq (%rsi,%rcx), %xmm4 ## xmm4 = mem[0],zero - vpslldq $8, %xmm4, %xmm5 ## xmm5 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7] - vpxor %xmm5, %xmm7, %xmm5 - vpaddq %xmm6, %xmm5, %xmm6 - vpsllvq %xmm0, %xmm5, %xmm7 - vpsrlvq %xmm1, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm5 - vpshufd $30, %xmm6, %xmm6 ## xmm6 = xmm6[2,3,1,0] - vpaddq %xmm5, %xmm6, %xmm6 - vpsllvq %xmm2, %xmm5, %xmm7 - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm5 - vpshufd $30, %xmm6, %xmm6 ## xmm6 = xmm6[2,3,1,0] - vpaddq %xmm5, %xmm6, %xmm6 - vpsllvq %xmm0, %xmm5, %xmm7 - vpsrlvq %xmm1, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm5 - vpshufd $30, %xmm6, %xmm6 ## xmm6 = xmm6[2,3,1,0] - vpaddq %xmm5, %xmm6, %xmm6 - vpsllvq %xmm2, %xmm5, %xmm7 - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm7 - vpshufd $30, %xmm6, %xmm5 ## xmm5 = xmm6[2,3,1,0] - vpxor %xmm5, %xmm4, %xmm6 - addq $8, %rcx - cmpq %rax, %rcx - jb LBB0_3 -## BB#4: ## %for.end.loopexit - vmovdqa %xmm7, -48(%rbp) ## 16-byte Spill - vmovdqa %xmm6, -32(%rbp) ## 16-byte Spill - addq %rax, %rsi - jmp LBB0_5 -LBB0_1: - vmovdqa %xmm7, -48(%rbp) ## 16-byte Spill - vmovdqa %xmm6, -32(%rbp) ## 16-byte Spill - xorl %eax, %eax -LBB0_5: ## %for.end - movq $0, -16(%rbp) - movq %rbx, %rdx - subq %rax, %rdx - leaq -16(%rbp), %rdi - movq %rdx, %rcx - shrq $2, %rcx - rep; movsl - movq %rdx, %rcx - andq $3, %rcx - rep; movsb - movb %bl, -9(%rbp) - vmovq -16(%rbp), %xmm4 ## xmm4 = mem[0],zero - vpslldq $8, %xmm4, %xmm0 ## xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7] - vpxor -48(%rbp), %xmm0, %xmm2 ## 16-byte Folded Reload - vpaddq -32(%rbp), %xmm2, %xmm3 ## 16-byte Folded Reload - vmovdqa LCPI0_2(%rip), %xmm0 ## xmm0 = [13,16] - vpsllvq %xmm0, %xmm2, %xmm5 - vmovdqa LCPI0_3(%rip), %xmm1 ## xmm1 = [51,48] - vpsrlvq %xmm1, %xmm2, %xmm2 - vpor %xmm5, %xmm2, %xmm2 - vpxor %xmm3, %xmm2, %xmm5 - vpshufd $30, %xmm3, %xmm2 ## xmm2 = xmm3[2,3,1,0] - vpaddq %xmm5, %xmm2, %xmm6 - vmovdqa LCPI0_4(%rip), %xmm2 ## xmm2 = [17,21] - vpsllvq %xmm2, %xmm5, %xmm7 - vmovdqa LCPI0_5(%rip), %xmm3 ## xmm3 = [47,43] - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm5 - vpshufd $30, %xmm6, %xmm6 ## xmm6 = xmm6[2,3,1,0] - vpaddq %xmm5, %xmm6, %xmm6 - vpsllvq %xmm0, %xmm5, %xmm7 - vpsrlvq %xmm1, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm5 - vpshufd $30, %xmm6, %xmm6 ## xmm6 = xmm6[2,3,1,0] - vpaddq %xmm5, %xmm6, %xmm6 - vpsllvq %xmm2, %xmm5, %xmm7 - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm7, %xmm5, %xmm5 - vpxor %xmm6, %xmm5, %xmm5 - vpshufd $30, %xmm6, %xmm6 ## xmm6 = xmm6[2,3,1,0] - movl $255, %eax - vmovq %rax, %xmm7 - vpslldq $8, %xmm7, %xmm7 ## xmm7 = zero,zero,zero,zero,zero,zero,zero,zero,xmm7[0,1,2,3,4,5,6,7] - vpxor %xmm7, %xmm4, %xmm4 - vpxor %xmm4, %xmm6, %xmm4 - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm0, %xmm5, %xmm6 - vpsrlvq %xmm1, %xmm5, %xmm5 - vpor %xmm6, %xmm5, %xmm5 - vpxor %xmm4, %xmm5, %xmm5 - vpshufd $30, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,1,0] - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm2, %xmm5, %xmm6 - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm6, %xmm5, %xmm5 - vpxor %xmm4, %xmm5, %xmm5 - vpshufd $30, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,1,0] - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm0, %xmm5, %xmm6 - vpsrlvq %xmm1, %xmm5, %xmm5 - vpor %xmm6, %xmm5, %xmm5 - vpxor %xmm4, %xmm5, %xmm5 - vpshufd $30, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,1,0] - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm2, %xmm5, %xmm6 - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm6, %xmm5, %xmm5 - vpxor %xmm4, %xmm5, %xmm5 - vpshufd $30, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,1,0] - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm0, %xmm5, %xmm6 - vpsrlvq %xmm1, %xmm5, %xmm5 - vpor %xmm6, %xmm5, %xmm5 - vpxor %xmm4, %xmm5, %xmm5 - vpshufd $30, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,1,0] - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm2, %xmm5, %xmm6 - vpsrlvq %xmm3, %xmm5, %xmm5 - vpor %xmm6, %xmm5, %xmm5 - vpxor %xmm4, %xmm5, %xmm5 - vpshufd $30, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,1,0] - vpaddq %xmm5, %xmm4, %xmm4 - vpsllvq %xmm0, %xmm5, %xmm0 - vpsrlvq %xmm1, %xmm5, %xmm1 - vpor %xmm0, %xmm1, %xmm0 - vpxor %xmm4, %xmm0, %xmm0 - vpshufd $30, %xmm4, %xmm1 ## xmm1 = xmm4[2,3,1,0] - vpaddq %xmm0, %xmm1, %xmm1 - vpsllvq %xmm2, %xmm0, %xmm2 - vpsrlvq %xmm3, %xmm0, %xmm0 - vpor %xmm2, %xmm0, %xmm0 - vpshufd $30, %xmm1, %xmm2 ## xmm2 = xmm1[2,3,1,0] - vpxor %xmm2, %xmm1, %xmm1 - vpxor %xmm1, %xmm0, %xmm0 - vpshufd $78, %xmm0, %xmm1 ## xmm1 = xmm0[2,3,0,1] - vpxor %xmm1, %xmm0, %xmm0 - vmovq %xmm0, %rax - addq $40, %rsp - popq %rbx - popq %rbp - retq - .cfi_endproc -FN_END siphash_avx2 |