From 10888831dd7706ce58de3124a0502f5d0cd244e7 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 6 Feb 2017 17:28:28 +0000 Subject: [PATCH] [Feature] Add sse42 version of base64 decoding --- src/libcryptobox/CMakeLists.txt | 3 + src/libcryptobox/base64/base64.c | 13 ++ src/libcryptobox/base64/sse42.c | 235 +++++++++++++++++++++++++++++++ src/libcryptobox/cryptobox.h | 4 + 4 files changed, 255 insertions(+) create mode 100644 src/libcryptobox/base64/sse42.c diff --git a/src/libcryptobox/CMakeLists.txt b/src/libcryptobox/CMakeLists.txt index 9cca1b694..6e0344bbf 100644 --- a/src/libcryptobox/CMakeLists.txt +++ b/src/libcryptobox/CMakeLists.txt @@ -94,6 +94,9 @@ ENDIF(HAVE_SSE2) IF(HAVE_SSE41) SET(SIPHASHSRC ${SIPHASHSRC} ${CMAKE_CURRENT_SOURCE_DIR}/siphash/sse41.S) ENDIF(HAVE_SSE41) +IF(HAVE_SSE42) + SET(BASE64SRC ${BASE64SRC} ${CMAKE_CURRENT_SOURCE_DIR}/base64/sse42.c) +ENDIF(HAVE_SSE42) CONFIGURE_FILE(platform_config.h.in platform_config.h) INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}") diff --git a/src/libcryptobox/base64/base64.c b/src/libcryptobox/base64/base64.c index 21675ff32..361bfff64 100644 --- a/src/libcryptobox/base64/base64.c +++ b/src/libcryptobox/base64/base64.c @@ -57,8 +57,21 @@ typedef struct base64_impl { BASE64_DECLARE(ref); #define BASE64_REF BASE64_IMPL(0, "ref", ref) +#ifdef RSPAMD_HAS_TARGET_ATTR +# if defined(HAVE_SSE42) +int base64_decode_sse42 (const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2"))); + +BASE64_DECLARE(sse42); +# define BASE64_SSE42 BASE64_IMPL(CPUID_SSE42, "sse42", sse42) +# endif +#endif + static const base64_impl_t base64_list[] = { BASE64_REF, +#ifdef BASE64_SSE42 + BASE64_SSE42, +#endif }; static const base64_impl_t *base64_opt = &base64_list[0]; diff --git a/src/libcryptobox/base64/sse42.c b/src/libcryptobox/base64/sse42.c new file mode 100644 index 000000000..53af47342 --- /dev/null +++ b/src/libcryptobox/base64/sse42.c @@ -0,0 +1,235 @@ +/*- + * Copyright 2017 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- +Copyright (c) 2013-2015, Alfred Klomp +Copyright (c) 2016, Vsevolod Stakhov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "cryptobox.h" + +extern const uint8_t base64_table_dec[256]; + +#ifdef RSPAMD_HAS_TARGET_ATTR +#include +#include + + +static inline __m128i +dec_reshuffle (__m128i in) __attribute__((__target__("sse4.2"))); + +static inline __m128i dec_reshuffle (__m128i in) +{ + // Mask in a single byte per shift: + const __m128i maskB2 = _mm_set1_epi32(0x003F0000); + const __m128i maskB1 = _mm_set1_epi32(0x00003F00); + + // Pack bytes together: + __m128i out = _mm_srli_epi32(in, 16); + + out = _mm_or_si128(out, _mm_srli_epi32(_mm_and_si128(in, maskB2), 2)); + + out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, maskB1), 12)); + + out = _mm_or_si128(out, _mm_slli_epi32(in, 26)); + + // Reshuffle and repack into 12-byte output format: + return _mm_shuffle_epi8(out, _mm_setr_epi8( + 3, 2, 1, + 7, 6, 5, + 11, 10, 9, + 15, 14, 13, + -1, -1, -1, -1)); +} + +#define CMPGT(s,n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n)) + +#define INNER_LOOP_SSE42 do { \ + while (inlen >= 24) { \ + __m128i str = _mm_loadu_si128((__m128i *)c); \ + const __m128i lut = _mm_setr_epi8( \ + 19, 16, 4, 4, \ + 4, 4, 4, 4, \ + 4, 4, 4, 4, \ + 0, 0, -71, -65 \ + ); \ + const __m128i range = _mm_setr_epi8( \ + '+','+', \ + '+','+', \ + '+','+', \ + '+','+', \ + '/','/', \ + '0','9', \ + 'A','Z', \ + 'a','z'); \ + if (_mm_cmpistrc(range, str, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY)) { \ + break; \ + } \ + __m128i indices = _mm_subs_epu8(str, _mm_set1_epi8(46)); \ + __m128i mask45 = CMPGT(str, 64); \ + __m128i mask5 = CMPGT(str, 96); \ + indices = _mm_andnot_si128(mask45, indices); \ + mask45 = _mm_add_epi8(_mm_slli_epi16(_mm_abs_epi8(mask45), 4), mask45); \ + indices = _mm_add_epi8(indices, mask45); \ + indices = _mm_add_epi8(indices, mask5); \ + __m128i delta = _mm_shuffle_epi8(lut, indices); \ + str = _mm_add_epi8(str, delta); \ + str = dec_reshuffle(str); \ + _mm_storeu_si128((__m128i *)o, str); \ + c += 16; \ + o += 12; \ + outl += 12; \ + inlen -= 16; \ + } \ +} while (0) + +int +base64_decode_sse42 (const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2"))); +int +base64_decode_sse42 (const char *in, size_t inlen, + unsigned char *out, size_t *outlen) +{ + ssize_t ret = 0; + const uint8_t *c = (const uint8_t *)in; + uint8_t *o = (uint8_t *)out; + uint8_t q, carry; + size_t outl = 0; + size_t leftover = 0; + +repeat: + switch (leftover) { + for (;;) { + case 0: + INNER_LOOP_SSE42; + + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + carry = q << 2; + leftover++; + + case 1: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + *o++ = carry | (q >> 4); + carry = q << 4; + leftover++; + outl++; + + case 2: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + leftover++; + + if (q == 254) { + if (inlen-- != 0) { + leftover = 0; + q = base64_table_dec[*c++]; + ret = ((q == 254) && (inlen == 0)) ? 1 : 0; + break; + } + else { + ret = 1; + break; + } + } + /* If we get here, there was an error: */ + break; + } + *o++ = carry | (q >> 2); + carry = q << 6; + leftover++; + outl++; + + case 3: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + leftover = 0; + /* + * When q == 254, the input char is '='. Return 1 and EOF. + * When q == 255, the input char is invalid. Return 0 and EOF. + */ + ret = ((q == 254) && (inlen == 0)) ? 1 : 0; + break; + } + + *o++ = carry | q; + carry = 0; + leftover = 0; + outl++; + } + } + + if (!ret && inlen > 0) { + /* Skip to the next valid character in input */ + while (base64_table_dec[*c] >= 254 && inlen > 0) { + c ++; + inlen --; + } + + if (inlen > 0) { + goto repeat; + } + } + + *outlen = outl; + + return ret; +} +#endif diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h index 601b2e505..a5a27f403 100644 --- a/src/libcryptobox/cryptobox.h +++ b/src/libcryptobox/cryptobox.h @@ -23,6 +23,10 @@ struct rspamd_cryptobox_segment { gsize len; }; +#if defined(__GNUC__) && ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 8) || (__GNUC__ > 4)) +#define RSPAMD_HAS_TARGET_ATTR +#endif + #define rspamd_cryptobox_MAX_NONCEBYTES 24 #define rspamd_cryptobox_MAX_PKBYTES 65 #define rspamd_cryptobox_MAX_SKBYTES 32 -- 2.39.5