From 4478caa98eb946b58a1d1b4e4e1a7ef8b984baac Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 9 Apr 2015 11:21:33 +0100 Subject: [PATCH] Disable some siphash optimizations since they are too machine dependant. --- src/libcryptobox/CMakeLists.txt | 4 - src/libcryptobox/siphash/avx.S | 332 ----------------------------- src/libcryptobox/siphash/siphash.c | 16 +- src/libcryptobox/siphash/ssse3.S | 328 ---------------------------- 4 files changed, 1 insertion(+), 679 deletions(-) delete mode 100644 src/libcryptobox/siphash/avx.S delete mode 100644 src/libcryptobox/siphash/ssse3.S diff --git a/src/libcryptobox/CMakeLists.txt b/src/libcryptobox/CMakeLists.txt index f59ec8146..ecd729d6a 100644 --- a/src/libcryptobox/CMakeLists.txt +++ b/src/libcryptobox/CMakeLists.txt @@ -49,15 +49,11 @@ ENDIF(HAVE_AVX2) IF(HAVE_AVX) SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/avx.S) SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/avx.S) - SET(SIPHASHSRC ${SIPHASHSRC} ${CMAKE_CURRENT_SOURCE_DIR}/siphash/avx.S) ENDIF(HAVE_AVX) IF(HAVE_SSE2) SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/sse2.S) SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/sse2.S) ENDIF(HAVE_SSE2) -IF(HAVE_SSSE3) - SET(SIPHASHSRC ${SIPHASHSRC} ${CMAKE_CURRENT_SOURCE_DIR}/siphash/ssse3.S) -ENDIF(HAVE_SSSE3) IF(HAVE_SSE41) SET(SIPHASHSRC ${SIPHASHSRC} ${CMAKE_CURRENT_SOURCE_DIR}/siphash/sse41.S) ENDIF(HAVE_SSE41) diff --git a/src/libcryptobox/siphash/avx.S b/src/libcryptobox/siphash/avx.S deleted file mode 100644 index 72e18c7c1..000000000 --- a/src/libcryptobox/siphash/avx.S +++ /dev/null @@ -1,332 +0,0 @@ -#include "../macro.S" -#include "constants.S" - -/* - * Generated by clang-3.7 with -mavx -Ofast from reference implementation - */ - -SECTION_TEXT - -GLOBAL_HIDDEN_FN siphash_avx -siphash_avx_local: - .cfi_startproc - pushq %r15 -.Ltmp0: - .cfi_def_cfa_offset 16 - pushq %r14 -.Ltmp1: - .cfi_def_cfa_offset 24 - pushq %r12 -.Ltmp2: - .cfi_def_cfa_offset 32 - pushq %rbx -.Ltmp3: - .cfi_def_cfa_offset 40 -.Ltmp4: - .cfi_offset %rbx, -40 -.Ltmp5: - .cfi_offset %r12, -32 -.Ltmp6: - .cfi_offset %r14, -24 -.Ltmp7: - .cfi_offset %r15, -16 - movq (%rdi), %rcx - movq 8(%rdi), %rbx - movq %rdx, %r9 - shlq $56, %r9 - movq %r9, -8(%rsp) - movabsq $8317987319222330741, %r12 # imm = 0x736F6D6570736575 - xorq %rcx, %r12 - movabsq $7237128888997146477, %rax # imm = 0x646F72616E646F6D - xorq %rbx, %rax - movabsq $7816392313619706465, %r8 # imm = 0x6C7967656E657261 - xorq %rcx, %r8 - movabsq $8387220255154660723, %rdi # imm = 0x7465646279746573 - xorq %rbx, %rdi - cmpq $8, %rdx - jb .LBB0_4 -# BB#1: # %.lr.ph104 - leaq -8(%rdx), %r10 - movq %r10, %r11 - andq $-8, %r11 - leaq 8(%r11), %r14 - movq %rsi, %rbx - .align 16, 0x90 -.LBB0_2: # =>This Inner Loop Header: Depth=1 - movq (%rbx), %r15 - addq $8, %rbx - xorq %r15, %rdi - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - xorq %r15, %r12 - addq $-8, %rdx - cmpq $7, %rdx - ja .LBB0_2 -# BB#3: # %..preheader_crit_edge - subq %r11, %r10 - addq %r14, %rsi - movq %r10, %rdx -.LBB0_4: # %.preheader - testq %rdx, %rdx - je .LBB0_13 -# BB#5: # %overflow.checked - xorl %ebx, %ebx - movq %rdx, %r9 - andq $-128, %r9 - je .LBB0_9 -# BB#6: # %vector.body.preheader - leaq 88(%rsp), %rbx - leaq 96(%rsi), %rcx - movq %rdx, %r10 - andq $-128, %r10 - .align 16, 0x90 -.LBB0_7: # %vector.body - # =>This Inner Loop Header: Depth=1 - vmovups -96(%rcx), %ymm0 - vmovups -64(%rcx), %ymm1 - vmovups -32(%rcx), %ymm2 - vmovups (%rcx), %ymm3 - vmovups %ymm0, -96(%rbx) - vmovups %ymm1, -64(%rbx) - vmovups %ymm2, -32(%rbx) - vmovups %ymm3, (%rbx) - subq $-128, %rbx - subq $-128, %rcx - addq $-128, %r10 - jne .LBB0_7 -# BB#8: - movq %r9, %rbx -.LBB0_9: # %middle.block - subq %rbx, %rdx - je .LBB0_12 -# BB#10: # %.lr.ph.preheader - leaq -8(%rsp,%rbx), %rcx - addq %rbx, %rsi - .align 16, 0x90 -.LBB0_11: # %.lr.ph - # =>This Inner Loop Header: Depth=1 - movb (%rsi), %bl - movb %bl, (%rcx) - incq %rcx - incq %rsi - decq %rdx - jne .LBB0_11 -.LBB0_12: # %._crit_edge - movq -8(%rsp), %r9 -.LBB0_13: - xorq %r9, %rdi - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - xorq %r9, %r12 - xorq $255, %r8 - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - addq %rax, %r8 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - #APP - shldq $32, %r8, %r8 - #NO_APP - xorq %rdi, %rax - xorq %r8, %rax - popq %rbx - popq %r12 - popq %r14 - popq %r15 - vzeroupper - retq -.Lfunc_end0: - .size siphash_avx_local, .Lfunc_end0-siphash_avx_local - .cfi_endproc -FN_END siphash_avx diff --git a/src/libcryptobox/siphash/siphash.c b/src/libcryptobox/siphash/siphash.c index c2c61b3e9..14e679113 100644 --- a/src/libcryptobox/siphash/siphash.c +++ b/src/libcryptobox/siphash/siphash.c @@ -45,28 +45,14 @@ typedef struct siphash_impl_t SIPHASH_DECLARE(ref) #define SIPHASH_GENERIC SIPHASH_IMPL(0, "generic", ref) -#if defined(HAVE_SSE41) +#if defined(HAVE_SSE41) && defined(__i386__) SIPHASH_DECLARE(sse41) #define SIPHASH_SSE41 SIPHASH_IMPL(CPUID_SSE41, "sse41", sse41) #endif -#if defined(HAVE_SSSE3) -SIPHASH_DECLARE(ssse3) -#define SIPHASH_SSSE3 SIPHASH_IMPL(CPUID_SSSE3, "ssse3", ssse3) -#endif -#if defined(HAVE_AVX) -SIPHASH_DECLARE(avx) -#define SIPHASH_AVX SIPHASH_IMPL(CPUID_AVX, "avx", avx) -#endif /* list implemenations from most optimized to least, with generic as the last entry */ static const siphash_impl_t siphash_list[] = { SIPHASH_GENERIC, -#if defined(SIPHASH_AVX) - SIPHASH_AVX, -#endif -#if defined(SIPHASH_SSSE3) - SIPHASH_SSSE3, -#endif #if defined(SIPHASH_SSE41) SIPHASH_SSE41, #endif diff --git a/src/libcryptobox/siphash/ssse3.S b/src/libcryptobox/siphash/ssse3.S deleted file mode 100644 index bed181ccb..000000000 --- a/src/libcryptobox/siphash/ssse3.S +++ /dev/null @@ -1,328 +0,0 @@ -#include "../macro.S" -#include "constants.S" - -/* - * Generated by clang-3.7 with -mssse3 -Ofast from reference implementation - */ - -SECTION_TEXT - -GLOBAL_HIDDEN_FN siphash_ssse3 -siphash_ssse3_local: - .cfi_startproc -# BB#0: - pushq %r15 -.Ltmp0: - .cfi_def_cfa_offset 16 - pushq %r14 -.Ltmp1: - .cfi_def_cfa_offset 24 - pushq %r12 -.Ltmp2: - .cfi_def_cfa_offset 32 - pushq %rbx -.Ltmp3: - .cfi_def_cfa_offset 40 -.Ltmp4: - .cfi_offset %rbx, -40 -.Ltmp5: - .cfi_offset %r12, -32 -.Ltmp6: - .cfi_offset %r14, -24 -.Ltmp7: - .cfi_offset %r15, -16 - movq (%rdi), %rcx - movq 8(%rdi), %rbx - movq %rdx, %r9 - shlq $56, %r9 - movq %r9, -8(%rsp) - movabsq $8317987319222330741, %r12 # imm = 0x736F6D6570736575 - xorq %rcx, %r12 - movabsq $7237128888997146477, %rax # imm = 0x646F72616E646F6D - xorq %rbx, %rax - movabsq $7816392313619706465, %r8 # imm = 0x6C7967656E657261 - xorq %rcx, %r8 - movabsq $8387220255154660723, %rdi # imm = 0x7465646279746573 - xorq %rbx, %rdi - cmpq $8, %rdx - jb .LBB0_4 -# BB#1: # %.lr.ph104 - leaq -8(%rdx), %r10 - movq %r10, %r11 - andq $-8, %r11 - leaq 8(%r11), %r14 - movq %rsi, %rbx - .align 16, 0x90 -.LBB0_2: # =>This Inner Loop Header: Depth=1 - movq (%rbx), %r15 - addq $8, %rbx - xorq %r15, %rdi - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - xorq %r15, %r12 - addq $-8, %rdx - cmpq $7, %rdx - ja .LBB0_2 -# BB#3: # %..preheader_crit_edge - subq %r11, %r10 - addq %r14, %rsi - movq %r10, %rdx -.LBB0_4: # %.preheader - testq %rdx, %rdx - je .LBB0_13 -# BB#5: # %overflow.checked - xorl %ebx, %ebx - movq %rdx, %r9 - andq $-32, %r9 - je .LBB0_9 -# BB#6: # %vector.body.preheader - leaq 8(%rsp), %rbx - leaq 16(%rsi), %rcx - movq %rdx, %r10 - andq $-32, %r10 - .align 16, 0x90 -.LBB0_7: # %vector.body - # =>This Inner Loop Header: Depth=1 - movups -16(%rcx), %xmm0 - movups (%rcx), %xmm1 - movups %xmm0, -16(%rbx) - movups %xmm1, (%rbx) - addq $32, %rbx - addq $32, %rcx - addq $-32, %r10 - jne .LBB0_7 -# BB#8: - movq %r9, %rbx -.LBB0_9: # %middle.block - subq %rbx, %rdx - je .LBB0_12 -# BB#10: # %.lr.ph.preheader - leaq -8(%rsp,%rbx), %rcx - addq %rbx, %rsi - .align 16, 0x90 -.LBB0_11: # %.lr.ph - # =>This Inner Loop Header: Depth=1 - movb (%rsi), %bl - movb %bl, (%rcx) - incq %rcx - incq %rsi - decq %rdx - jne .LBB0_11 -.LBB0_12: # %._crit_edge - movq -8(%rsp), %r9 -.LBB0_13: - xorq %r9, %rdi - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - xorq %r9, %r12 - xorq $255, %r8 - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - #APP - shldq $32, %r12, %r12 - #NO_APP - addq %rax, %r8 - addq %rdi, %r12 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - xorq %r12, %rdi - #APP - shldq $32, %r8, %r8 - #NO_APP - addq %rax, %r12 - addq %rdi, %r8 - #APP - shldq $13, %rax, %rax - #NO_APP - #APP - shldq $16, %rdi, %rdi - #NO_APP - xorq %r12, %rax - xorq %r8, %rdi - addq %rax, %r8 - #APP - shldq $17, %rax, %rax - #NO_APP - #APP - shldq $21, %rdi, %rdi - #NO_APP - xorq %r8, %rax - #APP - shldq $32, %r8, %r8 - #NO_APP - xorq %rdi, %rax - xorq %r8, %rax - popq %rbx - popq %r12 - popq %r14 - popq %r15 - retq -.Lfunc_end0: - .size siphash_ssse3_local, .Lfunc_end0-siphash_ssse3_local - .cfi_endproc -FN_END siphash_ssse3 -- 2.39.5