From fbddd1b83f1e21068e1b287af1861fb40649eab8 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 13 Dec 2016 16:19:02 +0000 Subject: [PATCH] [Feature] Start import of the optimized base64 decode --- src/libcryptobox/CMakeLists.txt | 4 +- src/libcryptobox/base64/base64.c | 78 +++++++++++ src/libcryptobox/base64/base64.h | 23 ++++ src/libcryptobox/base64/ref.c | 227 +++++++++++++++++++++++++++++++ src/libcryptobox/cryptobox.h | 10 ++ src/libmime/message.h | 14 ++ 6 files changed, 355 insertions(+), 1 deletion(-) create mode 100644 src/libcryptobox/base64/base64.c create mode 100644 src/libcryptobox/base64/base64.h create mode 100644 src/libcryptobox/base64/ref.c diff --git a/src/libcryptobox/CMakeLists.txt b/src/libcryptobox/CMakeLists.txt index aef9c9dbd..165f9b40f 100644 --- a/src/libcryptobox/CMakeLists.txt +++ b/src/libcryptobox/CMakeLists.txt @@ -15,6 +15,8 @@ SET(CURVESRC ${CMAKE_CURRENT_SOURCE_DIR}/curve25519/ref.c SET(EDSRC ${CMAKE_CURRENT_SOURCE_DIR}/ed25519/ref.c ${CMAKE_CURRENT_SOURCE_DIR}/ed25519/ed25519.c) +SET(BASE64SRC ${CMAKE_CURRENT_SOURCE_DIR}/base64/ref.c + ${CMAKE_CURRENT_SOURCE_DIR}/base64/base64.c) SET(ASM_CODE " .macro TEST1 op @@ -99,4 +101,4 @@ SET(LIBCRYPTOBOXSRC ${CMAKE_CURRENT_SOURCE_DIR}/cryptobox.c ${CMAKE_CURRENT_SOURCE_DIR}/catena/catena.c) SET(RSPAMD_CRYPTOBOX ${LIBCRYPTOBOXSRC} ${CHACHASRC} ${POLYSRC} ${SIPHASHSRC} - ${CURVESRC} ${BLAKE2SRC} ${EDSRC} PARENT_SCOPE) + ${CURVESRC} ${BLAKE2SRC} ${EDSRC} ${BASE64SRC} PARENT_SCOPE) diff --git a/src/libcryptobox/base64/base64.c b/src/libcryptobox/base64/base64.c new file mode 100644 index 000000000..c280b59fa --- /dev/null +++ b/src/libcryptobox/base64/base64.c @@ -0,0 +1,78 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "config.h" +#include "cryptobox.h" +#include "base64.h" +#include "platform_config.h" + +extern unsigned long cpu_config; + +typedef struct base64_impl { + unsigned long cpu_flags; + const char *desc; + + int (*decode) (const char *in, size_t inlen, + unsigned char *out, size_t *outlen); +} base64_impl_t; + +#define BASE64_DECLARE(ext) \ + int base64_decode_##ext(const char *in, size_t inlen, unsigned char *out, size_t *outlen); +#define BASE64_IMPL(cpuflags, desc, ext) \ + {(cpuflags), desc, base64_decode_##ext} + +BASE64_DECLARE(ref); +#define BASE64_REF BASE64_IMPL(0, "ref", ref) + +static const base64_impl_t base64_list[] = { + BASE64_REF, +#if defined(BASE64_AVX2) + BASE64_AVX2, +#endif +#if defined(BASE64_AVX) + BASE64_AVX, +#endif +#if defined(BASE64_SSSE3) + BASE64_SSSE3, +#endif +}; + +static const base64_impl_t *base64_opt = &base64_list[0]; + +const char * +base64_load (void) +{ + guint i; + + if (cpu_config != 0) { + for (i = 0; i < G_N_ELEMENTS (base64_list); i++) { + if (base64_list[i].cpu_flags & cpu_config) { + base64_opt = &base64_list[i]; + break; + } + } + } + + + return base64_opt->desc; +} + +gboolean +rspamd_cryptobox_base64_decode (const gchar *in, gsize inlen, + guchar *out, gsize *outlen) +{ + return base64_opt->decode (in, inlen, out, outlen); +} diff --git a/src/libcryptobox/base64/base64.h b/src/libcryptobox/base64/base64.h new file mode 100644 index 000000000..e7c639d58 --- /dev/null +++ b/src/libcryptobox/base64/base64.h @@ -0,0 +1,23 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBCRYPTOBOX_BASE64_BASE64_H_ +#define SRC_LIBCRYPTOBOX_BASE64_BASE64_H_ + +#include "config.h" + +const char* base64_load (void); + +#endif /* SRC_LIBCRYPTOBOX_BASE64_BASE64_H_ */ diff --git a/src/libcryptobox/base64/ref.c b/src/libcryptobox/base64/ref.c new file mode 100644 index 000000000..b262fa5e5 --- /dev/null +++ b/src/libcryptobox/base64/ref.c @@ -0,0 +1,227 @@ +/*- +Copyright (c) 2013-2015, Alfred Klomp +Copyright (c) 2016, Vsevolod Stakhov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +const uint8_t +base64_table_dec[] = +{ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255, + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +}; + +#define INNER_LOOP_64 do { \ + while (inlen >= 13) { \ + uint64_t str, res, dec; \ + str = *(uint64_t *)c; \ + str = GUINT64_TO_BE(str); \ + if ((dec = base64_table_dec[str >> 56]) > 63) { \ + break; \ + } \ + res = dec << 58; \ + if ((dec = base64_table_dec[(str >> 48) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 52; \ + if ((dec = base64_table_dec[(str >> 40) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 46; \ + if ((dec = base64_table_dec[(str >> 32) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 40; \ + if ((dec = base64_table_dec[(str >> 24) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 34; \ + if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 28; \ + if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 22; \ + if ((dec = base64_table_dec[str & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 16; \ + res = GUINT64_FROM_BE(res); \ + *(uint64_t *)o = res; \ + c += 8; \ + o += 6; \ + outl += 6; \ + inlen -= 8; \ + } \ +} while (0) + +#define INNER_LOOP_32 do { \ + while (inlen >= 8) { \ + uint32_t str, res, dec; \ + str = *(uint32_t *)c; \ + str = GUINT32_TO_BE(str); \ + if ((dec = base64_table_dec[str >> 24]) > 63) { \ + break; \ + } \ + res = dec << 26; \ + if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 20; \ + if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 14; \ + if ((dec = base64_table_dec[str & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 8; \ + res = GUINT32_FROM_BE(res); \ + *(uint32_t *)o = res; \ + c += 4; \ + o += 3; \ + outl += 3; \ + inlen -= 4; \ + } \ +} while (0) + + +int +base64_decode_ref (const char *in, size_t inlen, + unsigned char *out, size_t *outlen) +{ + ssize_t ret = 0; + const uint8_t *c = (const uint8_t *)in; + uint8_t *o = (uint8_t *)out; + uint8_t q, carry; + size_t outl = 0; + size_t leftover = 0; + + switch (leftover) { + for (;;) { + case 0: +#if defined(__LP64__) + INNER_LOOP_64; +#else + INNER_LOOP_32; +#endif + + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + + break; + } + carry = q << 2; + leftover++; + + case 1: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + return (-1); + } + *o++ = carry | (q >> 4); + carry = q << 4; + leftover++; + outl++; + + case 2: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + leftover++; + + if (q == 254) { + if (inlen-- != 0) { + leftover = 0; + q = base64_table_dec[*c++]; + ret = ((q == 254) && (inlen == 0)) ? 1 : 0; + break; + } + else { + ret = 1; + break; + } + } + /* If we get here, there was an error: */ + break; + } + *o++ = carry | (q >> 2); + carry = q << 6; + leftover++; + outl++; + + case 3: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + leftover = 0; + /* + * When q == 254, the input char is '='. Return 1 and EOF. + * When q == 255, the input char is invalid. Return 0 and EOF. + */ + ret = ((q == 254) && (inlen == 0)) ? 1 : 0; + break; + } + + *o++ = carry | q; + carry = 0; + leftover = 0; + outl++; + } + } + + *outlen = outl; + + return ret; +} diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h index 13a3b9121..07cc5adb9 100644 --- a/src/libcryptobox/cryptobox.h +++ b/src/libcryptobox/cryptobox.h @@ -370,4 +370,14 @@ guint64 rspamd_cryptobox_fast_hash_specific ( const void *data, gsize len, guint64 seed); +/** + * Decode base64 using platform optimized code + * @param in + * @param inlen + * @param out + * @param outlen + * @return + */ +gboolean rspamd_cryptobox_base64_decode (const gchar *in, gsize inlen, + guchar *out, gsize *outlen); #endif /* CRYPTOBOX_H_ */ diff --git a/src/libmime/message.h b/src/libmime/message.h index ff7ebafc9..3dd32c4db 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -11,6 +11,7 @@ #include "addr.h" #include "cryptobox.h" #include "mime_headers.h" +#include "content_type.h" #include struct rspamd_task; @@ -24,13 +25,26 @@ enum rspamd_mime_part_flags { RSPAMD_MIME_PART_ARCHIVE = (1 << 3) }; +enum rspamd_cte { + RSPAMD_CTE_UNKNOWN = 0, + RSPAMD_CTE_7BIT = 1, + RSPAMD_CTE_8BIT = 2, + RSPAMD_CTE_QP = 3, + RSPAMD_CTE_B64 = 4, +}; + struct rspamd_mime_part { GMimeContentType *type; + struct rspamd_content_type *ct; + rspamd_ftok_t raw_data; + rspamd_ftok_t parsed_data; + enum rspamd_cte cte; GByteArray *content; GMimeObject *parent; GMimeObject *mime; GHashTable *raw_headers; gchar *raw_headers_str; + gsize raw_headers_len; guchar digest[rspamd_cryptobox_HASHBYTES]; const gchar *filename; const gchar *boundary; -- 2.39.5