]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Add sse42 version of base64 decoding
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 6 Feb 2017 17:28:28 +0000 (17:28 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 6 Feb 2017 17:28:28 +0000 (17:28 +0000)
src/libcryptobox/CMakeLists.txt
src/libcryptobox/base64/base64.c
src/libcryptobox/base64/sse42.c [new file with mode: 0644]
src/libcryptobox/cryptobox.h

index 9cca1b69416c7e190981c860bfcd6695d87efdf9..6e0344bbfe1f30816dc69ca1a78e5d9658e4f8a7 100644 (file)
@@ -94,6 +94,9 @@ ENDIF(HAVE_SSE2)
 IF(HAVE_SSE41)
        SET(SIPHASHSRC ${SIPHASHSRC} ${CMAKE_CURRENT_SOURCE_DIR}/siphash/sse41.S)
 ENDIF(HAVE_SSE41)
+IF(HAVE_SSE42)
+       SET(BASE64SRC ${BASE64SRC} ${CMAKE_CURRENT_SOURCE_DIR}/base64/sse42.c)
+ENDIF(HAVE_SSE42)
 
 CONFIGURE_FILE(platform_config.h.in platform_config.h)
 INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}")
index 21675ff324c6d3b10f676c96a5be7b62841e15e4..361bfff64505fb8d8a0ad33fb995ce3b5758f8c2 100644 (file)
@@ -57,8 +57,21 @@ typedef struct base64_impl {
 BASE64_DECLARE(ref);
 #define BASE64_REF BASE64_IMPL(0, "ref", ref)
 
+#ifdef RSPAMD_HAS_TARGET_ATTR
+# if defined(HAVE_SSE42)
+int base64_decode_sse42 (const char *in, size_t inlen,
+               unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2")));
+
+BASE64_DECLARE(sse42);
+#  define BASE64_SSE42 BASE64_IMPL(CPUID_SSE42, "sse42", sse42)
+# endif
+#endif
+
 static const base64_impl_t base64_list[] = {
                BASE64_REF,
+#ifdef BASE64_SSE42
+               BASE64_SSE42,
+#endif
 };
 
 static const base64_impl_t *base64_opt = &base64_list[0];
diff --git a/src/libcryptobox/base64/sse42.c b/src/libcryptobox/base64/sse42.c
new file mode 100644 (file)
index 0000000..53af473
--- /dev/null
@@ -0,0 +1,235 @@
+/*-
+ * Copyright 2017 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*-
+Copyright (c) 2013-2015, Alfred Klomp
+Copyright (c) 2016, Vsevolod Stakhov
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "cryptobox.h"
+
+extern const uint8_t base64_table_dec[256];
+
+#ifdef RSPAMD_HAS_TARGET_ATTR
+#include <xmmintrin.h>
+#include <nmmintrin.h>
+
+
+static inline __m128i
+dec_reshuffle (__m128i in) __attribute__((__target__("sse4.2")));
+
+static inline __m128i dec_reshuffle (__m128i in)
+{
+       // Mask in a single byte per shift:
+       const __m128i maskB2 = _mm_set1_epi32(0x003F0000);
+       const __m128i maskB1 = _mm_set1_epi32(0x00003F00);
+
+       // Pack bytes together:
+       __m128i out = _mm_srli_epi32(in, 16);
+
+       out = _mm_or_si128(out, _mm_srli_epi32(_mm_and_si128(in, maskB2), 2));
+
+       out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, maskB1), 12));
+
+       out = _mm_or_si128(out, _mm_slli_epi32(in, 26));
+
+       // Reshuffle and repack into 12-byte output format:
+       return _mm_shuffle_epi8(out, _mm_setr_epi8(
+                3,  2,  1,
+                7,  6,  5,
+               11, 10,  9,
+               15, 14, 13,
+               -1, -1, -1, -1));
+}
+
+#define CMPGT(s,n)     _mm_cmpgt_epi8((s), _mm_set1_epi8(n))
+
+#define INNER_LOOP_SSE42 do { \
+       while (inlen >= 24) { \
+               __m128i str = _mm_loadu_si128((__m128i *)c); \
+               const __m128i lut = _mm_setr_epi8( \
+                       19, 16,   4,   4, \
+                        4,  4,   4,   4, \
+                        4,  4,   4,   4, \
+                        0,  0, -71, -65 \
+               ); \
+               const __m128i range = _mm_setr_epi8( \
+                       '+','+', \
+                       '+','+', \
+                       '+','+', \
+                       '+','+', \
+                       '/','/', \
+                       '0','9', \
+                       'A','Z', \
+                       'a','z'); \
+               if (_mm_cmpistrc(range, str, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY)) { \
+                       break; \
+               } \
+               __m128i indices = _mm_subs_epu8(str, _mm_set1_epi8(46)); \
+               __m128i mask45 = CMPGT(str, 64); \
+               __m128i mask5  = CMPGT(str, 96); \
+               indices = _mm_andnot_si128(mask45, indices); \
+               mask45 = _mm_add_epi8(_mm_slli_epi16(_mm_abs_epi8(mask45), 4), mask45); \
+               indices = _mm_add_epi8(indices, mask45); \
+               indices = _mm_add_epi8(indices, mask5); \
+               __m128i delta = _mm_shuffle_epi8(lut, indices); \
+               str = _mm_add_epi8(str, delta); \
+               str = dec_reshuffle(str); \
+               _mm_storeu_si128((__m128i *)o, str); \
+               c += 16; \
+               o += 12; \
+               outl += 12; \
+               inlen -= 16; \
+       } \
+} while (0)
+
+int
+base64_decode_sse42 (const char *in, size_t inlen,
+               unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2")));
+int
+base64_decode_sse42 (const char *in, size_t inlen,
+               unsigned char *out, size_t *outlen)
+{
+       ssize_t ret = 0;
+       const uint8_t *c = (const uint8_t *)in;
+       uint8_t *o = (uint8_t *)out;
+       uint8_t q, carry;
+       size_t outl = 0;
+       size_t leftover = 0;
+
+repeat:
+       switch (leftover) {
+               for (;;) {
+               case 0:
+                       INNER_LOOP_SSE42;
+
+                       if (inlen-- == 0) {
+                               ret = 1;
+                               break;
+                       }
+                       if ((q = base64_table_dec[*c++]) >= 254) {
+                               ret = 0;
+                               break;
+                       }
+                       carry = q << 2;
+                       leftover++;
+
+               case 1:
+                       if (inlen-- == 0) {
+                               ret = 1;
+                               break;
+                       }
+                       if ((q = base64_table_dec[*c++]) >= 254) {
+                               ret = 0;
+                               break;
+                       }
+                       *o++ = carry | (q >> 4);
+                       carry = q << 4;
+                       leftover++;
+                       outl++;
+
+               case 2:
+                       if (inlen-- == 0) {
+                               ret = 1;
+                               break;
+                       }
+                       if ((q = base64_table_dec[*c++]) >= 254) {
+                               leftover++;
+
+                               if (q == 254) {
+                                       if (inlen-- != 0) {
+                                               leftover = 0;
+                                               q = base64_table_dec[*c++];
+                                               ret = ((q == 254) && (inlen == 0)) ? 1 : 0;
+                                               break;
+                                       }
+                                       else {
+                                               ret = 1;
+                                               break;
+                                       }
+                               }
+                               /* If we get here, there was an error: */
+                               break;
+                       }
+                       *o++ = carry | (q >> 2);
+                       carry = q << 6;
+                       leftover++;
+                       outl++;
+
+               case 3:
+                       if (inlen-- == 0) {
+                               ret = 1;
+                               break;
+                       }
+                       if ((q = base64_table_dec[*c++]) >= 254) {
+                               leftover = 0;
+                               /*
+                                * When q == 254, the input char is '='. Return 1 and EOF.
+                                * When q == 255, the input char is invalid. Return 0 and EOF.
+                                */
+                               ret = ((q == 254) && (inlen == 0)) ? 1 : 0;
+                               break;
+                       }
+
+                       *o++ = carry | q;
+                       carry = 0;
+                       leftover = 0;
+                       outl++;
+               }
+       }
+
+       if (!ret && inlen > 0) {
+               /* Skip to the next valid character in input */
+               while (base64_table_dec[*c] >= 254 && inlen > 0) {
+                       c ++;
+                       inlen --;
+               }
+
+               if (inlen > 0) {
+                       goto repeat;
+               }
+       }
+
+       *outlen = outl;
+
+       return ret;
+}
+#endif
index 601b2e5052bf2318cec2683bc17030bf5ef39851..a5a27f40387b76db6f7b7026b37aa8e834d4c57b 100644 (file)
@@ -23,6 +23,10 @@ struct rspamd_cryptobox_segment {
        gsize len;
 };
 
+#if defined(__GNUC__) && ((__GNUC__ == 4) &&  (__GNUC_MINOR__ >= 8) || (__GNUC__ > 4))
+#define RSPAMD_HAS_TARGET_ATTR
+#endif
+
 #define rspamd_cryptobox_MAX_NONCEBYTES 24
 #define rspamd_cryptobox_MAX_PKBYTES 65
 #define rspamd_cryptobox_MAX_SKBYTES 32