aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/libcryptobox/CMakeLists.txt1
-rw-r--r--src/libcryptobox/siphash/avx.S332
-rw-r--r--src/libcryptobox/siphash/siphash.c8
-rw-r--r--src/libcryptobox/siphash/sse41.S5
-rw-r--r--test/lua/unit/siphash.lua23
5 files changed, 364 insertions, 5 deletions
diff --git a/src/libcryptobox/CMakeLists.txt b/src/libcryptobox/CMakeLists.txt
index ecd729d6a..84b6db96e 100644
--- a/src/libcryptobox/CMakeLists.txt
+++ b/src/libcryptobox/CMakeLists.txt
@@ -49,6 +49,7 @@ ENDIF(HAVE_AVX2)
IF(HAVE_AVX)
SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/avx.S)
SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/avx.S)
+ SET(SIPHASHSRC ${SIPHASHSRC} ${CMAKE_CURRENT_SOURCE_DIR}/siphash/avx.S)
ENDIF(HAVE_AVX)
IF(HAVE_SSE2)
SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/sse2.S)
diff --git a/src/libcryptobox/siphash/avx.S b/src/libcryptobox/siphash/avx.S
new file mode 100644
index 000000000..72e18c7c1
--- /dev/null
+++ b/src/libcryptobox/siphash/avx.S
@@ -0,0 +1,332 @@
+#include "../macro.S"
+#include "constants.S"
+
+/*
+ * Generated by clang-3.7 with -mavx -Ofast from reference implementation
+ */
+
+SECTION_TEXT
+
+GLOBAL_HIDDEN_FN siphash_avx
+siphash_avx_local:
+ .cfi_startproc
+ pushq %r15
+.Ltmp0:
+ .cfi_def_cfa_offset 16
+ pushq %r14
+.Ltmp1:
+ .cfi_def_cfa_offset 24
+ pushq %r12
+.Ltmp2:
+ .cfi_def_cfa_offset 32
+ pushq %rbx
+.Ltmp3:
+ .cfi_def_cfa_offset 40
+.Ltmp4:
+ .cfi_offset %rbx, -40
+.Ltmp5:
+ .cfi_offset %r12, -32
+.Ltmp6:
+ .cfi_offset %r14, -24
+.Ltmp7:
+ .cfi_offset %r15, -16
+ movq (%rdi), %rcx
+ movq 8(%rdi), %rbx
+ movq %rdx, %r9
+ shlq $56, %r9
+ movq %r9, -8(%rsp)
+ movabsq $8317987319222330741, %r12 # imm = 0x736F6D6570736575
+ xorq %rcx, %r12
+ movabsq $7237128888997146477, %rax # imm = 0x646F72616E646F6D
+ xorq %rbx, %rax
+ movabsq $7816392313619706465, %r8 # imm = 0x6C7967656E657261
+ xorq %rcx, %r8
+ movabsq $8387220255154660723, %rdi # imm = 0x7465646279746573
+ xorq %rbx, %rdi
+ cmpq $8, %rdx
+ jb .LBB0_4
+# BB#1: # %.lr.ph104
+ leaq -8(%rdx), %r10
+ movq %r10, %r11
+ andq $-8, %r11
+ leaq 8(%r11), %r14
+ movq %rsi, %rbx
+ .align 16, 0x90
+.LBB0_2: # =>This Inner Loop Header: Depth=1
+ movq (%rbx), %r15
+ addq $8, %rbx
+ xorq %r15, %rdi
+ addq %rax, %r12
+ addq %rdi, %r8
+ #APP
+ shldq $13, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $16, %rdi, %rdi
+ #NO_APP
+ xorq %r12, %rax
+ xorq %r8, %rdi
+ #APP
+ shldq $32, %r12, %r12
+ #NO_APP
+ addq %rax, %r8
+ addq %rdi, %r12
+ #APP
+ shldq $17, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $21, %rdi, %rdi
+ #NO_APP
+ xorq %r8, %rax
+ xorq %r12, %rdi
+ #APP
+ shldq $32, %r8, %r8
+ #NO_APP
+ addq %rax, %r12
+ addq %rdi, %r8
+ #APP
+ shldq $13, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $16, %rdi, %rdi
+ #NO_APP
+ xorq %r12, %rax
+ xorq %r8, %rdi
+ #APP
+ shldq $32, %r12, %r12
+ #NO_APP
+ addq %rax, %r8
+ addq %rdi, %r12
+ #APP
+ shldq $17, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $21, %rdi, %rdi
+ #NO_APP
+ xorq %r8, %rax
+ xorq %r12, %rdi
+ #APP
+ shldq $32, %r8, %r8
+ #NO_APP
+ xorq %r15, %r12
+ addq $-8, %rdx
+ cmpq $7, %rdx
+ ja .LBB0_2
+# BB#3: # %..preheader_crit_edge
+ subq %r11, %r10
+ addq %r14, %rsi
+ movq %r10, %rdx
+.LBB0_4: # %.preheader
+ testq %rdx, %rdx
+ je .LBB0_13
+# BB#5: # %overflow.checked
+ xorl %ebx, %ebx
+ movq %rdx, %r9
+ andq $-128, %r9
+ je .LBB0_9
+# BB#6: # %vector.body.preheader
+ leaq 88(%rsp), %rbx
+ leaq 96(%rsi), %rcx
+ movq %rdx, %r10
+ andq $-128, %r10
+ .align 16, 0x90
+.LBB0_7: # %vector.body
+ # =>This Inner Loop Header: Depth=1
+ vmovups -96(%rcx), %ymm0
+ vmovups -64(%rcx), %ymm1
+ vmovups -32(%rcx), %ymm2
+ vmovups (%rcx), %ymm3
+ vmovups %ymm0, -96(%rbx)
+ vmovups %ymm1, -64(%rbx)
+ vmovups %ymm2, -32(%rbx)
+ vmovups %ymm3, (%rbx)
+ subq $-128, %rbx
+ subq $-128, %rcx
+ addq $-128, %r10
+ jne .LBB0_7
+# BB#8:
+ movq %r9, %rbx
+.LBB0_9: # %middle.block
+ subq %rbx, %rdx
+ je .LBB0_12
+# BB#10: # %.lr.ph.preheader
+ leaq -8(%rsp,%rbx), %rcx
+ addq %rbx, %rsi
+ .align 16, 0x90
+.LBB0_11: # %.lr.ph
+ # =>This Inner Loop Header: Depth=1
+ movb (%rsi), %bl
+ movb %bl, (%rcx)
+ incq %rcx
+ incq %rsi
+ decq %rdx
+ jne .LBB0_11
+.LBB0_12: # %._crit_edge
+ movq -8(%rsp), %r9
+.LBB0_13:
+ xorq %r9, %rdi
+ addq %rax, %r12
+ addq %rdi, %r8
+ #APP
+ shldq $13, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $16, %rdi, %rdi
+ #NO_APP
+ xorq %r12, %rax
+ xorq %r8, %rdi
+ #APP
+ shldq $32, %r12, %r12
+ #NO_APP
+ addq %rax, %r8
+ addq %rdi, %r12
+ #APP
+ shldq $17, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $21, %rdi, %rdi
+ #NO_APP
+ xorq %r8, %rax
+ xorq %r12, %rdi
+ #APP
+ shldq $32, %r8, %r8
+ #NO_APP
+ addq %rax, %r12
+ addq %rdi, %r8
+ #APP
+ shldq $13, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $16, %rdi, %rdi
+ #NO_APP
+ xorq %r12, %rax
+ xorq %r8, %rdi
+ #APP
+ shldq $32, %r12, %r12
+ #NO_APP
+ addq %rax, %r8
+ addq %rdi, %r12
+ #APP
+ shldq $17, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $21, %rdi, %rdi
+ #NO_APP
+ xorq %r8, %rax
+ xorq %r12, %rdi
+ #APP
+ shldq $32, %r8, %r8
+ #NO_APP
+ xorq %r9, %r12
+ xorq $255, %r8
+ addq %rax, %r12
+ addq %rdi, %r8
+ #APP
+ shldq $13, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $16, %rdi, %rdi
+ #NO_APP
+ xorq %r12, %rax
+ xorq %r8, %rdi
+ #APP
+ shldq $32, %r12, %r12
+ #NO_APP
+ addq %rax, %r8
+ addq %rdi, %r12
+ #APP
+ shldq $17, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $21, %rdi, %rdi
+ #NO_APP
+ xorq %r8, %rax
+ xorq %r12, %rdi
+ #APP
+ shldq $32, %r8, %r8
+ #NO_APP
+ addq %rax, %r12
+ addq %rdi, %r8
+ #APP
+ shldq $13, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $16, %rdi, %rdi
+ #NO_APP
+ xorq %r12, %rax
+ xorq %r8, %rdi
+ #APP
+ shldq $32, %r12, %r12
+ #NO_APP
+ addq %rax, %r8
+ addq %rdi, %r12
+ #APP
+ shldq $17, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $21, %rdi, %rdi
+ #NO_APP
+ xorq %r8, %rax
+ xorq %r12, %rdi
+ #APP
+ shldq $32, %r8, %r8
+ #NO_APP
+ addq %rax, %r12
+ addq %rdi, %r8
+ #APP
+ shldq $13, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $16, %rdi, %rdi
+ #NO_APP
+ xorq %r12, %rax
+ xorq %r8, %rdi
+ #APP
+ shldq $32, %r12, %r12
+ #NO_APP
+ addq %rax, %r8
+ addq %rdi, %r12
+ #APP
+ shldq $17, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $21, %rdi, %rdi
+ #NO_APP
+ xorq %r8, %rax
+ xorq %r12, %rdi
+ #APP
+ shldq $32, %r8, %r8
+ #NO_APP
+ addq %rax, %r12
+ addq %rdi, %r8
+ #APP
+ shldq $13, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $16, %rdi, %rdi
+ #NO_APP
+ xorq %r12, %rax
+ xorq %r8, %rdi
+ addq %rax, %r8
+ #APP
+ shldq $17, %rax, %rax
+ #NO_APP
+ #APP
+ shldq $21, %rdi, %rdi
+ #NO_APP
+ xorq %r8, %rax
+ #APP
+ shldq $32, %r8, %r8
+ #NO_APP
+ xorq %rdi, %rax
+ xorq %r8, %rax
+ popq %rbx
+ popq %r12
+ popq %r14
+ popq %r15
+ vzeroupper
+ retq
+.Lfunc_end0:
+ .size siphash_avx_local, .Lfunc_end0-siphash_avx_local
+ .cfi_endproc
+FN_END siphash_avx
diff --git a/src/libcryptobox/siphash/siphash.c b/src/libcryptobox/siphash/siphash.c
index f42456b7f..498609fc5 100644
--- a/src/libcryptobox/siphash/siphash.c
+++ b/src/libcryptobox/siphash/siphash.c
@@ -49,10 +49,17 @@ SIPHASH_DECLARE(ref)
SIPHASH_DECLARE(sse41)
#define SIPHASH_SSE41 SIPHASH_IMPL(CPUID_SSE41, "sse41", sse41)
#endif
+#if defined(HAVE_AVX)
+SIPHASH_DECLARE(avx)
+#define SIPHASH_AVX SIPHASH_IMPL(CPUID_AVX, "avx", avx)
+#endif
/* list implemenations from most optimized to least, with generic as the last entry */
static const siphash_impl_t siphash_list[] = {
SIPHASH_GENERIC,
+#if defined(SIPHASH_AVX)
+ SIPHASH_AVX,
+#endif
#if defined(SIPHASH_SSE41)
SIPHASH_SSE41,
#endif
@@ -73,7 +80,6 @@ siphash_load(void)
}
}
}
- fprintf(stderr, "selected %s\n", siphash_opt->desc);
}
void siphash24 (unsigned char *out, const unsigned char *in,
diff --git a/src/libcryptobox/siphash/sse41.S b/src/libcryptobox/siphash/sse41.S
index 58acfee8f..92c15671a 100644
--- a/src/libcryptobox/siphash/sse41.S
+++ b/src/libcryptobox/siphash/sse41.S
@@ -1,6 +1,11 @@
#include "../macro.S"
#include "constants.S"
+/*
+ * Generated by gcc-4.9 from siphash sse41 implementation written by
+ * Samuel Neves and submitted to supercop competition
+ */
+
SECTION_TEXT
GLOBAL_HIDDEN_FN siphash_sse41
diff --git a/test/lua/unit/siphash.lua b/test/lua/unit/siphash.lua
index 62a30b01a..1c773b45e 100644
--- a/test/lua/unit/siphash.lua
+++ b/test/lua/unit/siphash.lua
@@ -3,12 +3,27 @@
context("Siphash check functions", function()
local ffi = require("ffi")
ffi.cdef[[
- size_t siphash24_test(void);
+ void rspamd_cryptobox_init (void);
+ size_t siphash24_test(bool generic);
+ double rspamd_get_ticks (void);
]]
-
- test("Siphash test vectors", function()
- local res = ffi.C.siphash24_test()
+
+ ffi.C.rspamd_cryptobox_init()
+
+ test("Siphash test reference vectors", function()
+ local t1 = ffi.C.rspamd_get_ticks()
+ local res = ffi.C.siphash24_test(true)
+ local t2 = ffi.C.rspamd_get_ticks()
+
+ print("Refrence siphash: " .. tostring(t2 - t1) .. " sec")
+ assert_not_equal(res, 0)
+ end)
+ test("Siphash test optimized vectors", function()
+ local t1 = ffi.C.rspamd_get_ticks()
+ local res = ffi.C.siphash24_test(false)
+ local t2 = ffi.C.rspamd_get_ticks()
+ print("Optimized siphash: " .. tostring(t2 - t1) .. " sec")
assert_not_equal(res, 0)
end)
end) \ No newline at end of file