From: Vsevolod Stakhov Date: Tue, 13 Dec 2016 17:15:58 +0000 (+0000) Subject: [Feature] Add ssse3 and avx2 base64 decoders X-Git-Tag: 1.5.0~595 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=fe3062454ca9f40448d2b8fb1b08268441b229dd;p=rspamd.git [Feature] Add ssse3 and avx2 base64 decoders --- diff --git a/src/libcryptobox/CMakeLists.txt b/src/libcryptobox/CMakeLists.txt index 165f9b40f..c031c1d96 100644 --- a/src/libcryptobox/CMakeLists.txt +++ b/src/libcryptobox/CMakeLists.txt @@ -75,6 +75,7 @@ ENDIF() IF(HAVE_AVX2) SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/avx2.S) + SET(BASE64SRC ${BASE64SRC} ${CMAKE_CURRENT_SOURCE_DIR}/base64/avx2.S) SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/avx2.S) SET(SIPHASHSRC ${SIPHASHSRC} ${CMAKE_CURRENT_SOURCE_DIR}/siphash/avx2.S) ENDIF(HAVE_AVX2) @@ -89,6 +90,9 @@ IF(HAVE_SSE2) SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/sse2.S) SET(POLYSRC ${POLYSRC} ${CMAKE_CURRENT_SOURCE_DIR}/poly1305/sse2.S) ENDIF(HAVE_SSE2) +IF(HAVE_SSE3) + SET(BASE64SRC ${BASE64SRC} ${CMAKE_CURRENT_SOURCE_DIR}/base64/ssse3.S) +ENDIF() IF(HAVE_SSE41) SET(SIPHASHSRC ${SIPHASHSRC} ${CMAKE_CURRENT_SOURCE_DIR}/siphash/sse41.S) ENDIF(HAVE_SSE41) diff --git a/src/libcryptobox/base64/avx2.S b/src/libcryptobox/base64/avx2.S new file mode 100644 index 000000000..ccc0e0d6c --- /dev/null +++ b/src/libcryptobox/base64/avx2.S @@ -0,0 +1,558 @@ +/* + * Generated from https://github.com/aklomp/base64/blob/master/lib/arch/avx2/ + * using gcc6 -march=core-avx2 -mtune=core-avx2 -O3 -S + */ + +#include "../macro.S" + +#ifdef LINUX +#define PROGBITS @progbits +#else +#define PROGBITS +#endif +.comm _base64_table_dec, 256, 5 + .file "avx2.c" + .text + .p2align 4,,15 +GLOBAL_HIDDEN_FN_EXT base64_decode_avx2,4,1 +base64_decode_avx2_local: +.LFB4659: + .cfi_startproc + vmovdqa .LC0(%rip), %ymm15 + leaq 8(%rsp), %r10 + .cfi_def_cfa 10, 0 + andq $-32, %rsp + vmovdqa .LC1(%rip), %ymm6 + vmovdqa .LC2(%rip), %ymm14 + pushq -8(%r10) + vmovdqa .LC3(%rip), %ymm13 + pushq %rbp + vmovdqa .LC4(%rip), %ymm12 + vmovdqa .LC5(%rip), %ymm11 + vmovdqa .LC6(%rip), %ymm10 + vmovdqa .LC7(%rip), %ymm9 + vmovdqa .LC8(%rip), %ymm8 + .cfi_escape 0x10,0x6,0x2,0x76,0 + movq %rsp, %rbp + pushq %r12 + vmovdqa .LC9(%rip), %ymm7 + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x70,0x6 + .cfi_escape 0x10,0xc,0x2,0x76,0x78 + xorl %r10d, %r10d + pushq %rbx + .cfi_escape 0x10,0x3,0x2,0x76,0x68 +.L2: + cmpq $44, %rsi + ja .L4 +.L26: + leaq -1(%rsi), %rbx + testq %rsi, %rsi + je .L22 + movzbl (%rdi), %eax + leaq _base64_table_dec(%rip), %r11 + movzbl (%r11,%rax), %eax + cmpb $-3, %al + ja .L11 + leal 0(,%rax,4), %r8d + testq %rbx, %rbx + je .L22 + movzbl 1(%rdi), %eax + movzbl (%r11,%rax), %eax + cmpb $-3, %al + ja .L15 +.L28: + movl %eax, %r9d + leaq -3(%rsi), %r12 + sall $4, %eax + shrb $4, %r9b + orl %r9d, %r8d + leaq 1(%r10), %r9 + movb %r8b, (%rdx) + cmpq $1, %rbx + je .L16 + movzbl 2(%rdi), %r8d + movzbl (%r11,%r8), %r8d + cmpb $-3, %r8b + ja .L24 + movl %r8d, %r9d + leaq 2(%r10), %rbx + sall $6, %r8d + subq $4, %rsi + shrb $2, %r9b + orl %r9d, %eax + movb %al, 1(%rdx) + testq %r12, %r12 + je .L19 + movzbl 3(%rdi), %eax + leaq 4(%rdi), %r9 + movzbl (%r11,%rax), %eax + cmpb $-3, %al + ja .L25 + orl %eax, %r8d + addq $3, %r10 + addq $3, %rdx + movq %r9, %rdi + movb %r8b, -1(%rdx) + cmpq $44, %rsi + jbe .L26 +.L4: + vmovdqu (%rdi), %ymm0 + vpcmpgtb %ymm6, %ymm0, %ymm1 + vpcmpgtb %ymm15, %ymm0, %ymm2 + vpcmpgtb %ymm14, %ymm0, %ymm4 + vpcmpgtb %ymm12, %ymm0, %ymm3 + vpandn %ymm1, %ymm2, %ymm2 + vpcmpgtb %ymm13, %ymm0, %ymm1 + vpand %ymm7, %ymm2, %ymm2 + vpandn %ymm1, %ymm4, %ymm4 + vpcmpgtb %ymm11, %ymm0, %ymm1 + vpand .LC10(%rip), %ymm4, %ymm4 + vpor %ymm4, %ymm2, %ymm2 + vpandn %ymm1, %ymm3, %ymm3 + vpcmpeqb %ymm6, %ymm0, %ymm1 + vpand .LC11(%rip), %ymm3, %ymm3 + vpand %ymm10, %ymm1, %ymm5 + vpcmpeqb %ymm9, %ymm0, %ymm1 + vpand %ymm8, %ymm1, %ymm1 + vpor %ymm1, %ymm5, %ymm1 + vpor %ymm2, %ymm1, %ymm1 + vpxor %xmm2, %xmm2, %xmm2 + vpor %ymm3, %ymm1, %ymm1 + vpcmpeqb %ymm2, %ymm1, %ymm2 + vpmovmskb %ymm2, %eax + testl %eax, %eax + je .L27 + movzbl (%rdi), %eax + leaq _base64_table_dec(%rip), %r11 + leaq -1(%rsi), %rbx + movzbl (%r11,%rax), %eax + cmpb $-3, %al + ja .L11 + leal 0(,%rax,4), %r8d + movzbl 1(%rdi), %eax + movzbl (%r11,%rax), %eax + cmpb $-3, %al + jbe .L28 +.L15: + movl $-1, %eax + vzeroupper + popq %rbx + popq %r10 + .cfi_remember_state + .cfi_def_cfa 10, 0 + popq %r12 + popq %rbp + leaq -8(%r10), %rsp + .cfi_def_cfa 7, 8 + ret + .p2align 4,,10 + .p2align 3 +.L27: + .cfi_restore_state + vpaddb %ymm1, %ymm0, %ymm0 + addq $32, %rdi + addq $24, %rdx + addq $24, %r10 + vpand .LC12(%rip), %ymm0, %ymm3 + vpsrld $16, %ymm0, %ymm2 + subq $32, %rsi + vpand .LC13(%rip), %ymm0, %ymm1 + vpslld $26, %ymm0, %ymm0 + vpsrld $2, %ymm3, %ymm3 + vpslld $12, %ymm1, %ymm1 + vpor %ymm3, %ymm2, %ymm2 + vpor %ymm0, %ymm1, %ymm0 + vmovdqa .LC15(%rip), %ymm1 + vpor %ymm2, %ymm0, %ymm0 + vpshufb .LC14(%rip), %ymm0, %ymm0 + vpermd %ymm0, %ymm1, %ymm0 + vmovdqu %ymm0, -24(%rdx) + jmp .L2 + .p2align 4,,10 + .p2align 3 +.L19: + movq %rbx, %r10 +.L22: + movl $1, %eax +.L5: + movq %r10, (%rcx) + vzeroupper + popq %rbx + popq %r10 + .cfi_remember_state + .cfi_def_cfa 10, 0 + popq %r12 + popq %rbp + leaq -8(%r10), %rsp + .cfi_def_cfa 7, 8 + ret + .p2align 4,,10 + .p2align 3 +.L11: + .cfi_restore_state + xorl %eax, %eax + jmp .L5 + .p2align 4,,10 + .p2align 3 +.L16: + movq %r9, %r10 + movl $1, %eax + jmp .L5 + .p2align 4,,10 + .p2align 3 +.L24: + movq %r9, %r10 + xorl %eax, %eax + cmpb $-2, %r8b + jne .L5 + movl $1, %eax + testq %r12, %r12 + je .L5 + movzbl 3(%rdi), %eax + cmpb $-2, (%r11,%rax) + sete %dl + xorl %eax, %eax + cmpq $4, %rsi + sete %al + andl %edx, %eax + jmp .L5 + .p2align 4,,10 + .p2align 3 +.L25: + cmpb $-2, %al + movq %rbx, %r10 + sete %dl + xorl %eax, %eax + testq %rsi, %rsi + sete %al + andl %edx, %eax + jmp .L5 + .cfi_endproc +.LFE4659: + .section .rodata.cst32,"aM",PROGBITS,32 + .align 16 +.LC0: + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .align 16 +.LC1: + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .align 16 +.LC2: + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .align 16 +.LC3: + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .align 16 +.LC4: + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .align 16 +.LC5: + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .align 16 +.LC6: + .quad 1157442765409226768 + .quad 1157442765409226768 + .quad 1157442765409226768 + .quad 1157442765409226768 + .align 16 +.LC7: + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .align 16 +.LC8: + .quad 1374463283923456787 + .quad 1374463283923456787 + .quad 1374463283923456787 + .quad 1374463283923456787 + .align 16 +.LC9: + .quad 289360691352306692 + .quad 289360691352306692 + .quad 289360691352306692 + .quad 289360691352306692 + .align 16 +.LC10: + .quad -4629771061636907073 + .quad -4629771061636907073 + .quad -4629771061636907073 + .quad -4629771061636907073 + .align 16 +.LC11: + .quad -5063812098665367111 + .quad -5063812098665367111 + .quad -5063812098665367111 + .quad -5063812098665367111 + .align 16 +.LC12: + .quad 17732923536900096 + .quad 17732923536900096 + .quad 17732923536900096 + .quad 17732923536900096 + .align 16 +.LC13: + .quad 69269232566016 + .quad 69269232566016 + .quad 69269232566016 + .quad 69269232566016 + .align 16 +.LC14: + .byte 3 + .byte 2 + .byte 1 + .byte 7 + .byte 6 + .byte 5 + .byte 11 + .byte 10 + .byte 9 + .byte 15 + .byte 14 + .byte 13 + .byte -1 + .byte -1 + .byte -1 + .byte -1 + .byte 3 + .byte 2 + .byte 1 + .byte 7 + .byte 6 + .byte 5 + .byte 11 + .byte 10 + .byte 9 + .byte 15 + .byte 14 + .byte 13 + .byte -1 + .byte -1 + .byte -1 + .byte -1 + .align 16 +.LC15: + .long 0 + .long 1 + .long 2 + .long 4 + .long 5 + .long 6 + .long -1 + .long -1 + .ident "GCC: (Debian 6.2.1-5) 6.2.1 20161124" diff --git a/src/libcryptobox/base64/base64.c b/src/libcryptobox/base64/base64.c index c280b59fa..3a1e74ac7 100644 --- a/src/libcryptobox/base64/base64.c +++ b/src/libcryptobox/base64/base64.c @@ -20,6 +20,26 @@ #include "platform_config.h" extern unsigned long cpu_config; +const uint8_t +base64_table_dec[256] = +{ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255, + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +}; typedef struct base64_impl { unsigned long cpu_flags; @@ -36,15 +56,16 @@ typedef struct base64_impl { BASE64_DECLARE(ref); #define BASE64_REF BASE64_IMPL(0, "ref", ref) +BASE64_DECLARE(avx2); +#define BASE64_AVX2 BASE64_IMPL(CPUID_AVX2,"avx2", avx2) +BASE64_DECLARE(ssse3); +#define BASE64_SSSE3 BASE64_IMPL(CPUID_SSSE3, "avx2", ssse3) static const base64_impl_t base64_list[] = { BASE64_REF, #if defined(BASE64_AVX2) BASE64_AVX2, #endif -#if defined(BASE64_AVX) - BASE64_AVX, -#endif #if defined(BASE64_SSSE3) BASE64_SSSE3, #endif diff --git a/src/libcryptobox/base64/ref.c b/src/libcryptobox/base64/ref.c index b262fa5e5..22cedee6a 100644 --- a/src/libcryptobox/base64/ref.c +++ b/src/libcryptobox/base64/ref.c @@ -29,26 +29,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "config.h" -const uint8_t -base64_table_dec[] = -{ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63, - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, - 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255, - 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, -}; +extern const uint8_t base64_table_dec[256]; #define INNER_LOOP_64 do { \ while (inlen >= 13) { \ diff --git a/src/libcryptobox/base64/ssse3.S b/src/libcryptobox/base64/ssse3.S new file mode 100644 index 000000000..5cf6da52f --- /dev/null +++ b/src/libcryptobox/base64/ssse3.S @@ -0,0 +1,404 @@ +/* + * Generated from https://github.com/aklomp/base64/blob/master/lib/arch/ssse3/ + * using gcc6 -march=core2 -mtune=core2 -O3 -S + */ + + +#include "../macro.S" + + +#ifdef LINUX +#define PROGBITS @progbits +#else +#define PROGBITS +#endif + +.comm _base64_table_dec, 256, 5 + + .file "ssse3.c" + .text + .p2align 4,,15 +GLOBAL_HIDDEN_FN_EXT base64_decode_ssse3,4,1 +base64_decode_ssse3_local: +.LFB4658: + .cfi_startproc + movdqa .LC0(%rip), %xmm11 + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset 6, -16 + xorl %r10d, %r10d + movdqa .LC1(%rip), %xmm2 + pushq %rbx + .cfi_def_cfa_offset 24 + .cfi_offset 3, -24 + movdqa .LC2(%rip), %xmm10 + movdqa .LC3(%rip), %xmm9 + movdqa .LC4(%rip), %xmm8 + movdqa .LC5(%rip), %xmm7 + movdqa .LC6(%rip), %xmm6 + movdqa .LC7(%rip), %xmm5 + movdqa .LC8(%rip), %xmm4 + movdqa .LC9(%rip), %xmm3 +.L2: + cmpq $23, %rsi + ja .L4 +.L25: + leaq -1(%rsi), %rbx + testq %rsi, %rsi + je .L21 + movzbl (%rdi), %eax + leaq _base64_table_dec(%rip), %r9 + movzbl (%r9,%rax), %eax + cmpb $-3, %al + ja .L11 + leal 0(,%rax,4), %r8d + testq %rbx, %rbx + je .L21 + movzbl 1(%rdi), %eax + movzbl (%r9,%rax), %eax + cmpb $-3, %al + ja .L15 +.L27: + leaq -3(%rsi), %rbp + movl %eax, %r11d + sall $4, %eax + shrb $4, %r11b + orl %r11d, %r8d + cmpq $1, %rbx + movb %r8b, (%rdx) + leaq 1(%r10), %r11 + je .L16 + movzbl 2(%rdi), %r8d + movzbl (%r9,%r8), %r8d + cmpb $-3, %r8b + ja .L23 + leaq 2(%r10), %rbx + movl %r8d, %r11d + subq $4, %rsi + shrb $2, %r11b + sall $6, %r8d + orl %r11d, %eax + testq %rbp, %rbp + movb %al, 1(%rdx) + je .L19 + movzbl 3(%rdi), %eax + leaq 4(%rdi), %r11 + movzbl (%r9,%rax), %eax + cmpb $-3, %al + ja .L24 + orl %eax, %r8d + addq $3, %r10 + addq $3, %rdx + movb %r8b, -1(%rdx) + cmpq $23, %rsi + movq %r11, %rdi + jbe .L25 +.L4: + movdqu (%rdi), %xmm1 + movdqa %xmm1, %xmm0 + movdqa %xmm1, %xmm12 + movdqa %xmm1, %xmm13 + pcmpgtb %xmm2, %xmm12 + movdqa %xmm1, %xmm14 + pcmpgtb %xmm11, %xmm0 + pcmpgtb %xmm10, %xmm13 + pcmpgtb %xmm7, %xmm14 + pandn %xmm12, %xmm0 + movdqa %xmm1, %xmm12 + pand %xmm6, %xmm0 + pcmpgtb %xmm9, %xmm12 + pandn %xmm12, %xmm13 + movdqa %xmm1, %xmm12 + pand %xmm5, %xmm13 + por %xmm13, %xmm0 + pcmpgtb %xmm8, %xmm12 + pandn %xmm14, %xmm12 + pand %xmm4, %xmm12 + por %xmm12, %xmm0 + movdqa %xmm1, %xmm12 + pcmpeqb %xmm3, %xmm12 + pand .LC10(%rip), %xmm12 + por %xmm12, %xmm0 + movdqa %xmm1, %xmm12 + pcmpeqb %xmm2, %xmm12 + pand .LC11(%rip), %xmm12 + por %xmm12, %xmm0 + pxor %xmm12, %xmm12 + pcmpeqb %xmm0, %xmm12 + pmovmskb %xmm12, %eax + testl %eax, %eax + je .L26 + movzbl (%rdi), %eax + leaq _base64_table_dec(%rip), %r9 + leaq -1(%rsi), %rbx + movzbl (%r9,%rax), %eax + cmpb $-3, %al + ja .L11 + leal 0(,%rax,4), %r8d + movzbl 1(%rdi), %eax + movzbl (%r9,%rax), %eax + cmpb $-3, %al + jbe .L27 +.L15: + movl $-1, %eax + popq %rbx + .cfi_remember_state + .cfi_def_cfa_offset 16 + popq %rbp + .cfi_def_cfa_offset 8 + ret + .p2align 4,,10 + .p2align 3 +.L26: + .cfi_restore_state + paddb %xmm1, %xmm0 + addq $16, %rdi + addq $12, %rdx + movdqa .LC12(%rip), %xmm13 + movdqa %xmm0, %xmm1 + addq $12, %r10 + subq $16, %rsi + movdqa .LC13(%rip), %xmm12 + pand %xmm0, %xmm13 + psrld $16, %xmm1 + pand %xmm0, %xmm12 + psrld $2, %xmm13 + pslld $12, %xmm12 + por %xmm13, %xmm1 + pslld $26, %xmm0 + por %xmm12, %xmm1 + por %xmm1, %xmm0 + pshufb .LC14(%rip), %xmm0 + movups %xmm0, -12(%rdx) + jmp .L2 + .p2align 4,,10 + .p2align 3 +.L19: + movq %rbx, %r10 +.L21: + movl $1, %eax +.L5: + popq %rbx + .cfi_remember_state + .cfi_def_cfa_offset 16 + movq %r10, (%rcx) + popq %rbp + .cfi_def_cfa_offset 8 + ret + .p2align 4,,10 + .p2align 3 +.L11: + .cfi_restore_state + xorl %eax, %eax + jmp .L5 + .p2align 4,,10 + .p2align 3 +.L16: + movq %r11, %r10 + movl $1, %eax + jmp .L5 + .p2align 4,,10 + .p2align 3 +.L23: + xorl %eax, %eax + cmpb $-2, %r8b + movq %r11, %r10 + jne .L5 + testq %rbp, %rbp + movl $1, %eax + je .L5 + movzbl 3(%rdi), %eax + cmpb $-2, (%r9,%rax) + sete %dl + xorl %eax, %eax + cmpq $4, %rsi + sete %al + andl %edx, %eax + jmp .L5 + .p2align 4,,10 + .p2align 3 +.L24: + cmpb $-2, %al + movq %rbx, %r10 + sete %dl + xorl %eax, %eax + testq %rsi, %rsi + sete %al + andl %edx, %eax + jmp .L5 + .cfi_endproc +.LFE4658: + .section .rodata.cst16,"aM",PROGBITS,16 + .align 16 +.LC0: + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .byte 57 + .align 16 +.LC1: + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .byte 47 + .align 16 +.LC2: + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .byte 90 + .align 16 +.LC3: + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .byte 64 + .align 16 +.LC4: + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .byte 122 + .align 16 +.LC5: + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .byte 96 + .align 16 +.LC6: + .quad 289360691352306692 + .quad 289360691352306692 + .align 16 +.LC7: + .quad -4629771061636907073 + .quad -4629771061636907073 + .align 16 +.LC8: + .quad -5063812098665367111 + .quad -5063812098665367111 + .align 16 +.LC9: + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .byte 43 + .align 16 +.LC10: + .quad 1374463283923456787 + .quad 1374463283923456787 + .align 16 +.LC11: + .quad 1157442765409226768 + .quad 1157442765409226768 + .align 16 +.LC12: + .quad 17732923536900096 + .quad 17732923536900096 + .align 16 +.LC13: + .quad 69269232566016 + .quad 69269232566016 + .align 16 +.LC14: + .byte 3 + .byte 2 + .byte 1 + .byte 7 + .byte 6 + .byte 5 + .byte 11 + .byte 10 + .byte 9 + .byte 15 + .byte 14 + .byte 13 + .byte -1 + .byte -1 + .byte -1 + .byte -1 + .ident "GCC: (Debian 6.2.1-5) 6.2.1 20161124" diff --git a/src/libcryptobox/cryptobox.c b/src/libcryptobox/cryptobox.c index 3098cd966..c0f0eb65c 100644 --- a/src/libcryptobox/cryptobox.c +++ b/src/libcryptobox/cryptobox.c @@ -29,6 +29,7 @@ #include "blake2/blake2.h" #include "siphash/siphash.h" #include "catena/catena.h" +#include "base64/base64.h" #include "ottery.h" #include "printf.h" #include "xxhash.h" @@ -314,6 +315,7 @@ rspamd_cryptobox_init (void) ctx->curve25519_impl = curve25519_load (); ctx->blake2_impl = blake2b_load (); ctx->ed25519_impl = ed25519_load (); + ctx->base64_impl = base64_load (); #ifdef HAVE_USABLE_OPENSSL ERR_load_EC_strings (); ERR_load_RAND_strings (); diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h index 07cc5adb9..d2fcbf2d2 100644 --- a/src/libcryptobox/cryptobox.h +++ b/src/libcryptobox/cryptobox.h @@ -67,6 +67,7 @@ struct rspamd_cryptobox_library_ctx { const gchar *poly1305_impl; const gchar *siphash_impl; const gchar *blake2_impl; + const gchar *base64_impl; unsigned long cpu_config; }; diff --git a/src/libcryptobox/curve25519/constants.S b/src/libcryptobox/curve25519/constants.S index 1c68955b4..1f39c2dc7 100644 --- a/src/libcryptobox/curve25519/constants.S +++ b/src/libcryptobox/curve25519/constants.S @@ -13,7 +13,7 @@ SECTION_RODATA .globl subc2 .globl v9_0 .globl v9_9 - +.globl base64_table_dec .globl REDMASK51 .p2align 4 diff --git a/src/rspamd.c b/src/rspamd.c index d9f2b9a76..60fa243f8 100644 --- a/src/rspamd.c +++ b/src/rspamd.c @@ -1233,12 +1233,13 @@ main (gint argc, gchar **argv, gchar **env) msg_info_main ("cpu features: %s", rspamd_main->cfg->libs_ctx->crypto_ctx->cpu_extensions); msg_info_main ("cryptobox configuration: curve25519(%s), " - "chacha20(%s), poly1305(%s), siphash(%s), blake2(%s)", + "chacha20(%s), poly1305(%s), siphash(%s), blake2(%s), base64(%s)", rspamd_main->cfg->libs_ctx->crypto_ctx->curve25519_impl, rspamd_main->cfg->libs_ctx->crypto_ctx->chacha20_impl, rspamd_main->cfg->libs_ctx->crypto_ctx->poly1305_impl, rspamd_main->cfg->libs_ctx->crypto_ctx->siphash_impl, - rspamd_main->cfg->libs_ctx->crypto_ctx->blake2_impl); + rspamd_main->cfg->libs_ctx->crypto_ctx->blake2_impl, + rspamd_main->cfg->libs_ctx->crypto_ctx->base64_impl); /* Daemonize */ if (!no_fork && daemon (0, 0) == -1) {