IF(HAVE_AVX)
SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/avx.S)
SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/avx.S)
- SET(SIPHASHSRC ${SIPHASHSRC} ${CMAKE_CURRENT_SOURCE_DIR}/siphash/avx.S)
ENDIF(HAVE_AVX)
IF(HAVE_SSE2)
SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/sse2.S)
SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/sse2.S)
ENDIF(HAVE_SSE2)
-IF(HAVE_SSSE3)
- SET(SIPHASHSRC ${SIPHASHSRC} ${CMAKE_CURRENT_SOURCE_DIR}/siphash/ssse3.S)
-ENDIF(HAVE_SSSE3)
IF(HAVE_SSE41)
SET(SIPHASHSRC ${SIPHASHSRC} ${CMAKE_CURRENT_SOURCE_DIR}/siphash/sse41.S)
ENDIF(HAVE_SSE41)
+++ /dev/null
-#include "../macro.S"
-#include "constants.S"
-
-/*
- * Generated by clang-3.7 with -mavx -Ofast from reference implementation
- */
-
-SECTION_TEXT
-
-GLOBAL_HIDDEN_FN siphash_avx
-siphash_avx_local:
- .cfi_startproc
- pushq %r15
-.Ltmp0:
- .cfi_def_cfa_offset 16
- pushq %r14
-.Ltmp1:
- .cfi_def_cfa_offset 24
- pushq %r12
-.Ltmp2:
- .cfi_def_cfa_offset 32
- pushq %rbx
-.Ltmp3:
- .cfi_def_cfa_offset 40
-.Ltmp4:
- .cfi_offset %rbx, -40
-.Ltmp5:
- .cfi_offset %r12, -32
-.Ltmp6:
- .cfi_offset %r14, -24
-.Ltmp7:
- .cfi_offset %r15, -16
- movq (%rdi), %rcx
- movq 8(%rdi), %rbx
- movq %rdx, %r9
- shlq $56, %r9
- movq %r9, -8(%rsp)
- movabsq $8317987319222330741, %r12 # imm = 0x736F6D6570736575
- xorq %rcx, %r12
- movabsq $7237128888997146477, %rax # imm = 0x646F72616E646F6D
- xorq %rbx, %rax
- movabsq $7816392313619706465, %r8 # imm = 0x6C7967656E657261
- xorq %rcx, %r8
- movabsq $8387220255154660723, %rdi # imm = 0x7465646279746573
- xorq %rbx, %rdi
- cmpq $8, %rdx
- jb .LBB0_4
-# BB#1: # %.lr.ph104
- leaq -8(%rdx), %r10
- movq %r10, %r11
- andq $-8, %r11
- leaq 8(%r11), %r14
- movq %rsi, %rbx
- .align 16, 0x90
-.LBB0_2: # =>This Inner Loop Header: Depth=1
- movq (%rbx), %r15
- addq $8, %rbx
- xorq %r15, %rdi
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- xorq %r15, %r12
- addq $-8, %rdx
- cmpq $7, %rdx
- ja .LBB0_2
-# BB#3: # %..preheader_crit_edge
- subq %r11, %r10
- addq %r14, %rsi
- movq %r10, %rdx
-.LBB0_4: # %.preheader
- testq %rdx, %rdx
- je .LBB0_13
-# BB#5: # %overflow.checked
- xorl %ebx, %ebx
- movq %rdx, %r9
- andq $-128, %r9
- je .LBB0_9
-# BB#6: # %vector.body.preheader
- leaq 88(%rsp), %rbx
- leaq 96(%rsi), %rcx
- movq %rdx, %r10
- andq $-128, %r10
- .align 16, 0x90
-.LBB0_7: # %vector.body
- # =>This Inner Loop Header: Depth=1
- vmovups -96(%rcx), %ymm0
- vmovups -64(%rcx), %ymm1
- vmovups -32(%rcx), %ymm2
- vmovups (%rcx), %ymm3
- vmovups %ymm0, -96(%rbx)
- vmovups %ymm1, -64(%rbx)
- vmovups %ymm2, -32(%rbx)
- vmovups %ymm3, (%rbx)
- subq $-128, %rbx
- subq $-128, %rcx
- addq $-128, %r10
- jne .LBB0_7
-# BB#8:
- movq %r9, %rbx
-.LBB0_9: # %middle.block
- subq %rbx, %rdx
- je .LBB0_12
-# BB#10: # %.lr.ph.preheader
- leaq -8(%rsp,%rbx), %rcx
- addq %rbx, %rsi
- .align 16, 0x90
-.LBB0_11: # %.lr.ph
- # =>This Inner Loop Header: Depth=1
- movb (%rsi), %bl
- movb %bl, (%rcx)
- incq %rcx
- incq %rsi
- decq %rdx
- jne .LBB0_11
-.LBB0_12: # %._crit_edge
- movq -8(%rsp), %r9
-.LBB0_13:
- xorq %r9, %rdi
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- xorq %r9, %r12
- xorq $255, %r8
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- addq %rax, %r8
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- xorq %rdi, %rax
- xorq %r8, %rax
- popq %rbx
- popq %r12
- popq %r14
- popq %r15
- vzeroupper
- retq
-.Lfunc_end0:
- .size siphash_avx_local, .Lfunc_end0-siphash_avx_local
- .cfi_endproc
-FN_END siphash_avx
SIPHASH_DECLARE(ref)
#define SIPHASH_GENERIC SIPHASH_IMPL(0, "generic", ref)
-#if defined(HAVE_SSE41)
+#if defined(HAVE_SSE41) && defined(__i386__)
SIPHASH_DECLARE(sse41)
#define SIPHASH_SSE41 SIPHASH_IMPL(CPUID_SSE41, "sse41", sse41)
#endif
-#if defined(HAVE_SSSE3)
-SIPHASH_DECLARE(ssse3)
-#define SIPHASH_SSSE3 SIPHASH_IMPL(CPUID_SSSE3, "ssse3", ssse3)
-#endif
-#if defined(HAVE_AVX)
-SIPHASH_DECLARE(avx)
-#define SIPHASH_AVX SIPHASH_IMPL(CPUID_AVX, "avx", avx)
-#endif
/* list implemenations from most optimized to least, with generic as the last entry */
static const siphash_impl_t siphash_list[] = {
SIPHASH_GENERIC,
-#if defined(SIPHASH_AVX)
- SIPHASH_AVX,
-#endif
-#if defined(SIPHASH_SSSE3)
- SIPHASH_SSSE3,
-#endif
#if defined(SIPHASH_SSE41)
SIPHASH_SSE41,
#endif
+++ /dev/null
-#include "../macro.S"
-#include "constants.S"
-
-/*
- * Generated by clang-3.7 with -mssse3 -Ofast from reference implementation
- */
-
-SECTION_TEXT
-
-GLOBAL_HIDDEN_FN siphash_ssse3
-siphash_ssse3_local:
- .cfi_startproc
-# BB#0:
- pushq %r15
-.Ltmp0:
- .cfi_def_cfa_offset 16
- pushq %r14
-.Ltmp1:
- .cfi_def_cfa_offset 24
- pushq %r12
-.Ltmp2:
- .cfi_def_cfa_offset 32
- pushq %rbx
-.Ltmp3:
- .cfi_def_cfa_offset 40
-.Ltmp4:
- .cfi_offset %rbx, -40
-.Ltmp5:
- .cfi_offset %r12, -32
-.Ltmp6:
- .cfi_offset %r14, -24
-.Ltmp7:
- .cfi_offset %r15, -16
- movq (%rdi), %rcx
- movq 8(%rdi), %rbx
- movq %rdx, %r9
- shlq $56, %r9
- movq %r9, -8(%rsp)
- movabsq $8317987319222330741, %r12 # imm = 0x736F6D6570736575
- xorq %rcx, %r12
- movabsq $7237128888997146477, %rax # imm = 0x646F72616E646F6D
- xorq %rbx, %rax
- movabsq $7816392313619706465, %r8 # imm = 0x6C7967656E657261
- xorq %rcx, %r8
- movabsq $8387220255154660723, %rdi # imm = 0x7465646279746573
- xorq %rbx, %rdi
- cmpq $8, %rdx
- jb .LBB0_4
-# BB#1: # %.lr.ph104
- leaq -8(%rdx), %r10
- movq %r10, %r11
- andq $-8, %r11
- leaq 8(%r11), %r14
- movq %rsi, %rbx
- .align 16, 0x90
-.LBB0_2: # =>This Inner Loop Header: Depth=1
- movq (%rbx), %r15
- addq $8, %rbx
- xorq %r15, %rdi
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- xorq %r15, %r12
- addq $-8, %rdx
- cmpq $7, %rdx
- ja .LBB0_2
-# BB#3: # %..preheader_crit_edge
- subq %r11, %r10
- addq %r14, %rsi
- movq %r10, %rdx
-.LBB0_4: # %.preheader
- testq %rdx, %rdx
- je .LBB0_13
-# BB#5: # %overflow.checked
- xorl %ebx, %ebx
- movq %rdx, %r9
- andq $-32, %r9
- je .LBB0_9
-# BB#6: # %vector.body.preheader
- leaq 8(%rsp), %rbx
- leaq 16(%rsi), %rcx
- movq %rdx, %r10
- andq $-32, %r10
- .align 16, 0x90
-.LBB0_7: # %vector.body
- # =>This Inner Loop Header: Depth=1
- movups -16(%rcx), %xmm0
- movups (%rcx), %xmm1
- movups %xmm0, -16(%rbx)
- movups %xmm1, (%rbx)
- addq $32, %rbx
- addq $32, %rcx
- addq $-32, %r10
- jne .LBB0_7
-# BB#8:
- movq %r9, %rbx
-.LBB0_9: # %middle.block
- subq %rbx, %rdx
- je .LBB0_12
-# BB#10: # %.lr.ph.preheader
- leaq -8(%rsp,%rbx), %rcx
- addq %rbx, %rsi
- .align 16, 0x90
-.LBB0_11: # %.lr.ph
- # =>This Inner Loop Header: Depth=1
- movb (%rsi), %bl
- movb %bl, (%rcx)
- incq %rcx
- incq %rsi
- decq %rdx
- jne .LBB0_11
-.LBB0_12: # %._crit_edge
- movq -8(%rsp), %r9
-.LBB0_13:
- xorq %r9, %rdi
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- xorq %r9, %r12
- xorq $255, %r8
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- #APP
- shldq $32, %r12, %r12
- #NO_APP
- addq %rax, %r8
- addq %rdi, %r12
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- xorq %r12, %rdi
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- addq %rax, %r12
- addq %rdi, %r8
- #APP
- shldq $13, %rax, %rax
- #NO_APP
- #APP
- shldq $16, %rdi, %rdi
- #NO_APP
- xorq %r12, %rax
- xorq %r8, %rdi
- addq %rax, %r8
- #APP
- shldq $17, %rax, %rax
- #NO_APP
- #APP
- shldq $21, %rdi, %rdi
- #NO_APP
- xorq %r8, %rax
- #APP
- shldq $32, %r8, %r8
- #NO_APP
- xorq %rdi, %rax
- xorq %r8, %rax
- popq %rbx
- popq %r12
- popq %r14
- popq %r15
- retq
-.Lfunc_end0:
- .size siphash_ssse3_local, .Lfunc_end0-siphash_ssse3_local
- .cfi_endproc
-FN_END siphash_ssse3