diff options
Diffstat (limited to 'src/libcryptobox/base64')
-rw-r--r-- | src/libcryptobox/base64/avx2.c | 110 | ||||
-rw-r--r-- | src/libcryptobox/base64/base64.c | 368 | ||||
-rw-r--r-- | src/libcryptobox/base64/base64.h | 6 | ||||
-rw-r--r-- | src/libcryptobox/base64/ref.c | 187 | ||||
-rw-r--r-- | src/libcryptobox/base64/sse42.c | 111 |
5 files changed, 513 insertions, 269 deletions
diff --git a/src/libcryptobox/base64/avx2.c b/src/libcryptobox/base64/avx2.c index c44f8edf1..38abffc32 100644 --- a/src/libcryptobox/base64/avx2.c +++ b/src/libcryptobox/base64/avx2.c @@ -77,16 +77,16 @@ extern const uint8_t base64_table_dec[256]; #include <immintrin.h> -#define CMPGT(s,n) _mm256_cmpgt_epi8((s), _mm256_set1_epi8(n)) -#define CMPEQ(s,n) _mm256_cmpeq_epi8((s), _mm256_set1_epi8(n)) -#define REPLACE(s,n) _mm256_and_si256((s), _mm256_set1_epi8(n)) -#define RANGE(s,a,b) _mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) - 1)) +#define CMPGT(s, n) _mm256_cmpgt_epi8((s), _mm256_set1_epi8(n)) +#define CMPEQ(s, n) _mm256_cmpeq_epi8((s), _mm256_set1_epi8(n)) +#define REPLACE(s, n) _mm256_and_si256((s), _mm256_set1_epi8(n)) +#define RANGE(s, a, b) _mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) -1)) static inline __m256i -dec_reshuffle (__m256i in) __attribute__((__target__("avx2"))); +dec_reshuffle(__m256i in) __attribute__((__target__("avx2"))); static inline __m256i -dec_reshuffle (__m256i in) +dec_reshuffle(__m256i in) { // in, lower lane, bits, upper case are most significant bits, lower case are least significant bits: // 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ @@ -108,8 +108,8 @@ dec_reshuffle (__m256i in) // Pack bytes together in each lane: out = _mm256_shuffle_epi8(out, _mm256_setr_epi8( - 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, - 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1)); + 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, + 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1)); // 00000000 00000000 00000000 00000000 // LLllllll KKKKkkkk JJJJJJjj IIiiiiii // HHHHhhhh GGGGGGgg FFffffff EEEEeeee @@ -120,54 +120,52 @@ dec_reshuffle (__m256i in) } -#define INNER_LOOP_AVX2 \ - while (inlen >= 45) { \ - __m256i str = _mm256_loadu_si256((__m256i *)c); \ - const __m256i lut_lo = _mm256_setr_epi8( \ - 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \ - 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A, \ - 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \ - 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \ - const __m256i lut_hi = _mm256_setr_epi8( \ - 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \ - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, \ - 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \ - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \ - const __m256i lut_roll = _mm256_setr_epi8( \ - 0, 16, 19, 4, -65, -65, -71, -71, \ - 0, 0, 0, 0, 0, 0, 0, 0, \ - 0, 16, 19, 4, -65, -65, -71, -71, \ - 0, 0, 0, 0, 0, 0, 0, 0); \ - const __m256i mask_2F = _mm256_set1_epi8(0x2f); \ - const __m256i hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), mask_2F); \ - const __m256i lo_nibbles = _mm256_and_si256(str, mask_2F); \ - const __m256i hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles); \ - const __m256i lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles); \ - const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F); \ - const __m256i roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles)); \ - if (!_mm256_testz_si256(lo, hi)) { \ - seen_error = true; \ - break; \ - } \ - str = _mm256_add_epi8(str, roll); \ - str = dec_reshuffle(str); \ - _mm256_storeu_si256((__m256i *)o, str); \ - c += 32; \ - o += 24; \ - outl += 24; \ - inlen -= 32; \ +#define INNER_LOOP_AVX2 \ + while (inlen >= 45) { \ + __m256i str = _mm256_loadu_si256((__m256i *) c); \ + const __m256i lut_lo = _mm256_setr_epi8( \ + 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \ + 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A, \ + 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \ + 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \ + const __m256i lut_hi = _mm256_setr_epi8( \ + 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \ + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, \ + 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \ + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \ + const __m256i lut_roll = _mm256_setr_epi8( \ + 0, 16, 19, 4, -65, -65, -71, -71, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 16, 19, 4, -65, -65, -71, -71, \ + 0, 0, 0, 0, 0, 0, 0, 0); \ + const __m256i mask_2F = _mm256_set1_epi8(0x2f); \ + const __m256i hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), mask_2F); \ + const __m256i lo_nibbles = _mm256_and_si256(str, mask_2F); \ + const __m256i hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles); \ + const __m256i lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles); \ + const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F); \ + const __m256i roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles)); \ + if (!_mm256_testz_si256(lo, hi)) { \ + seen_error = true; \ + break; \ + } \ + str = _mm256_add_epi8(str, roll); \ + str = dec_reshuffle(str); \ + _mm256_storeu_si256((__m256i *) o, str); \ + c += 32; \ + o += 24; \ + outl += 24; \ + inlen -= 32; \ } -int -base64_decode_avx2 (const char *in, size_t inlen, - unsigned char *out, size_t *outlen) __attribute__((__target__("avx2"))); -int -base64_decode_avx2 (const char *in, size_t inlen, - unsigned char *out, size_t *outlen) +int base64_decode_avx2(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("avx2"))); +int base64_decode_avx2(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) { ssize_t ret = 0; - const uint8_t *c = (const uint8_t *)in; - uint8_t *o = (uint8_t *)out; + const uint8_t *c = (const uint8_t *) in; + uint8_t *o = (uint8_t *) out; uint8_t q, carry; size_t outl = 0; size_t leftover = 0; @@ -177,7 +175,7 @@ repeat: switch (leftover) { for (;;) { case 0: - if (G_LIKELY (!seen_error)) { + if (G_LIKELY(!seen_error)) { INNER_LOOP_AVX2 } @@ -227,7 +225,7 @@ repeat: } } else { - leftover --; + leftover--; } /* If we get here, there was an error: */ break; @@ -268,8 +266,8 @@ repeat: if (!ret && inlen > 0) { /* Skip to the next valid character in input */ while (inlen > 0 && base64_table_dec[*c] >= 254) { - c ++; - inlen --; + c++; + inlen--; } if (inlen > 0) { diff --git a/src/libcryptobox/base64/base64.c b/src/libcryptobox/base64/base64.c index efa356252..0dc93b3d2 100644 --- a/src/libcryptobox/base64/base64.c +++ b/src/libcryptobox/base64/base64.c @@ -24,24 +24,264 @@ extern unsigned cpu_config; const uint8_t -base64_table_dec[256] = -{ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63, - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, - 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255, - 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + base64_table_dec[256] = + { + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 62, + 255, + 255, + 255, + 63, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 255, + 255, + 255, + 254, + 255, + 255, + 255, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 255, + 255, + 255, + 255, + 255, + 255, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, }; static const char base64_alphabet[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; @@ -51,52 +291,54 @@ typedef struct base64_impl { unsigned short min_len; unsigned int cpu_flags; const char *desc; - int (*decode) (const char *in, size_t inlen, - unsigned char *out, size_t *outlen); + int (*decode)(const char *in, size_t inlen, + unsigned char *out, size_t *outlen); } base64_impl_t; #define BASE64_DECLARE(ext) \ - int base64_decode_##ext(const char *in, size_t inlen, unsigned char *out, size_t *outlen); -#define BASE64_IMPL(cpuflags, min_len, desc, ext) \ - {0, (min_len), (cpuflags), desc, base64_decode_##ext} + int base64_decode_##ext(const char *in, size_t inlen, unsigned char *out, size_t *outlen); +#define BASE64_IMPL(cpuflags, min_len, desc, ext) \ + { \ + 0, (min_len), (cpuflags), desc, base64_decode_##ext \ + } BASE64_DECLARE(ref); #define BASE64_REF BASE64_IMPL(0, 0, "ref", ref) #ifdef RSPAMD_HAS_TARGET_ATTR -# if defined(HAVE_SSE42) -int base64_decode_sse42 (const char *in, size_t inlen, - unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2"))); +#if defined(HAVE_SSE42) +int base64_decode_sse42(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2"))); BASE64_DECLARE(sse42); -# define BASE64_SSE42 BASE64_IMPL(CPUID_SSE42, 24, "sse42", sse42) -# endif +#define BASE64_SSE42 BASE64_IMPL(CPUID_SSE42, 24, "sse42", sse42) +#endif #endif #ifdef RSPAMD_HAS_TARGET_ATTR -# if defined(HAVE_AVX2) -int base64_decode_avx2 (const char *in, size_t inlen, - unsigned char *out, size_t *outlen) __attribute__((__target__("avx2"))); +#if defined(HAVE_AVX2) +int base64_decode_avx2(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("avx2"))); BASE64_DECLARE(avx2); -# define BASE64_AVX2 BASE64_IMPL(CPUID_AVX2, 128, "avx2", avx2) -# endif +#define BASE64_AVX2 BASE64_IMPL(CPUID_AVX2, 128, "avx2", avx2) +#endif #endif static base64_impl_t base64_list[] = { - BASE64_REF, + BASE64_REF, #ifdef BASE64_SSE42 - BASE64_SSE42, + BASE64_SSE42, #endif #ifdef BASE64_AVX2 - BASE64_AVX2, + BASE64_AVX2, #endif }; static const base64_impl_t *base64_ref = &base64_list[0]; const char * -base64_load (void) +base64_load(void) { guint i; const base64_impl_t *opt_impl = base64_ref; @@ -105,7 +347,7 @@ base64_load (void) base64_list[0].enabled = true; if (cpu_config != 0) { - for (i = 1; i < G_N_ELEMENTS (base64_list); i++) { + for (i = 1; i < G_N_ELEMENTS(base64_list); i++) { if (base64_list[i].cpu_flags & cpu_config) { base64_list[i].enabled = true; opt_impl = &base64_list[i]; @@ -118,68 +360,68 @@ base64_load (void) } gboolean -rspamd_cryptobox_base64_decode (const gchar *in, gsize inlen, - guchar *out, gsize *outlen) +rspamd_cryptobox_base64_decode(const gchar *in, gsize inlen, + guchar *out, gsize *outlen) { const base64_impl_t *opt_impl = base64_ref; - for (gint i = G_N_ELEMENTS (base64_list) - 1; i > 0; i --) { + for (gint i = G_N_ELEMENTS(base64_list) - 1; i > 0; i--) { if (base64_list[i].enabled && base64_list[i].min_len <= inlen) { opt_impl = &base64_list[i]; break; } } - return opt_impl->decode (in, inlen, out, outlen); + return opt_impl->decode(in, inlen, out, outlen); } double -base64_test (bool generic, size_t niters, size_t len, size_t str_len) +base64_test(bool generic, size_t niters, size_t len, size_t str_len) { size_t cycles; guchar *in, *out, *tmp; gdouble t1, t2, total = 0; gsize outlen; - g_assert (len > 0); - in = g_malloc (len); - tmp = g_malloc (len); - ottery_rand_bytes (in, len); + g_assert(len > 0); + in = g_malloc(len); + tmp = g_malloc(len); + ottery_rand_bytes(in, len); - out = rspamd_encode_base64_fold (in, len, str_len, &outlen, - RSPAMD_TASK_NEWLINES_CRLF); + out = rspamd_encode_base64_fold(in, len, str_len, &outlen, + RSPAMD_TASK_NEWLINES_CRLF); if (generic) { - base64_list[0].decode (out, outlen, tmp, &len); + base64_list[0].decode(out, outlen, tmp, &len); } else { - rspamd_cryptobox_base64_decode (out, outlen, tmp, &len); + rspamd_cryptobox_base64_decode(out, outlen, tmp, &len); } - g_assert (memcmp (in, tmp, len) == 0); + g_assert(memcmp(in, tmp, len) == 0); - for (cycles = 0; cycles < niters; cycles ++) { - t1 = rspamd_get_ticks (TRUE); + for (cycles = 0; cycles < niters; cycles++) { + t1 = rspamd_get_ticks(TRUE); if (generic) { - base64_list[0].decode (out, outlen, tmp, &len); + base64_list[0].decode(out, outlen, tmp, &len); } else { - rspamd_cryptobox_base64_decode (out, outlen, tmp, &len); + rspamd_cryptobox_base64_decode(out, outlen, tmp, &len); } - t2 = rspamd_get_ticks (TRUE); + t2 = rspamd_get_ticks(TRUE); total += t2 - t1; } - g_free (in); - g_free (tmp); - g_free (out); + g_free(in); + g_free(tmp); + g_free(out); return total; } gboolean -rspamd_cryptobox_base64_is_valid (const gchar *in, gsize inlen) +rspamd_cryptobox_base64_is_valid(const gchar *in, gsize inlen) { const guchar *p, *end; @@ -191,12 +433,12 @@ rspamd_cryptobox_base64_is_valid (const gchar *in, gsize inlen) end = in + inlen; while (p < end && *p != '=') { - if (!g_ascii_isspace (*p)) { + if (!g_ascii_isspace(*p)) { if (base64_table_dec[*p] == 255) { return FALSE; } } - p ++; + p++; } return TRUE; diff --git a/src/libcryptobox/base64/base64.h b/src/libcryptobox/base64/base64.h index e2be379b5..f53c80afd 100644 --- a/src/libcryptobox/base64/base64.h +++ b/src/libcryptobox/base64/base64.h @@ -18,13 +18,13 @@ #include "config.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif -const char *base64_load (void); +const char *base64_load(void); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libcryptobox/base64/ref.c b/src/libcryptobox/base64/ref.c index e01a4dc5e..61df68e35 100644 --- a/src/libcryptobox/base64/ref.c +++ b/src/libcryptobox/base64/ref.c @@ -32,92 +32,99 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. extern const uint8_t base64_table_dec[256]; -#define INNER_LOOP_64 do { \ - uint64_t str, res, dec; \ - bool aligned = rspamd_is_aligned_as(c, str); \ - while (inlen >= 13) { \ - if (aligned) { str = *(uint64_t *)c; } else {memcpy(&str, c, sizeof(str)); } \ - str = GUINT64_TO_BE(str); \ - if ((dec = base64_table_dec[str >> 56]) > 63) { \ - break; \ - } \ - res = dec << 58; \ - if ((dec = base64_table_dec[(str >> 48) & 0xFF]) > 63) { \ - break; \ - } \ - res |= dec << 52; \ - if ((dec = base64_table_dec[(str >> 40) & 0xFF]) > 63) { \ - break; \ - } \ - res |= dec << 46; \ - if ((dec = base64_table_dec[(str >> 32) & 0xFF]) > 63) { \ - break; \ - } \ - res |= dec << 40; \ - if ((dec = base64_table_dec[(str >> 24) & 0xFF]) > 63) { \ - break; \ - } \ - res |= dec << 34; \ - if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \ - break; \ - } \ - res |= dec << 28; \ - if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \ - break; \ - } \ - res |= dec << 22; \ - if ((dec = base64_table_dec[str & 0xFF]) > 63) { \ - break; \ - } \ - res |= dec << 16; \ - res = GUINT64_FROM_BE(res); \ - memcpy(o, &res, sizeof(res)); \ - c += 8; \ - o += 6; \ - outl += 6; \ - inlen -= 8; \ - } \ -} while (0) - -#define INNER_LOOP_32 do { \ - uint32_t str, res, dec; \ - bool aligned = rspamd_is_aligned_as(c, str); \ - while (inlen >= 8) { \ - if (aligned) { str = *(uint32_t *)c; } else {memcpy(&str, c, sizeof(str)); } \ - str = GUINT32_TO_BE(str); \ - if ((dec = base64_table_dec[str >> 24]) > 63) { \ - break; \ - } \ - res = dec << 26; \ - if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \ - break; \ - } \ - res |= dec << 20; \ - if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \ - break; \ - } \ - res |= dec << 14; \ - if ((dec = base64_table_dec[str & 0xFF]) > 63) { \ - break; \ - } \ - res |= dec << 8; \ - res = GUINT32_FROM_BE(res); \ - memcpy(o, &res, sizeof(res)); \ - c += 4; \ - o += 3; \ - outl += 3; \ - inlen -= 4; \ - } \ -} while (0) - - -int -base64_decode_ref (const char *in, size_t inlen, - unsigned char *out, size_t *outlen) +#define INNER_LOOP_64 \ + do { \ + uint64_t str, res, dec; \ + bool aligned = rspamd_is_aligned_as(c, str); \ + while (inlen >= 13) { \ + if (aligned) { str = *(uint64_t *) c; } \ + else { \ + memcpy(&str, c, sizeof(str)); \ + } \ + str = GUINT64_TO_BE(str); \ + if ((dec = base64_table_dec[str >> 56]) > 63) { \ + break; \ + } \ + res = dec << 58; \ + if ((dec = base64_table_dec[(str >> 48) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 52; \ + if ((dec = base64_table_dec[(str >> 40) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 46; \ + if ((dec = base64_table_dec[(str >> 32) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 40; \ + if ((dec = base64_table_dec[(str >> 24) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 34; \ + if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 28; \ + if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 22; \ + if ((dec = base64_table_dec[str & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 16; \ + res = GUINT64_FROM_BE(res); \ + memcpy(o, &res, sizeof(res)); \ + c += 8; \ + o += 6; \ + outl += 6; \ + inlen -= 8; \ + } \ + } while (0) + +#define INNER_LOOP_32 \ + do { \ + uint32_t str, res, dec; \ + bool aligned = rspamd_is_aligned_as(c, str); \ + while (inlen >= 8) { \ + if (aligned) { str = *(uint32_t *) c; } \ + else { \ + memcpy(&str, c, sizeof(str)); \ + } \ + str = GUINT32_TO_BE(str); \ + if ((dec = base64_table_dec[str >> 24]) > 63) { \ + break; \ + } \ + res = dec << 26; \ + if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 20; \ + if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 14; \ + if ((dec = base64_table_dec[str & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 8; \ + res = GUINT32_FROM_BE(res); \ + memcpy(o, &res, sizeof(res)); \ + c += 4; \ + o += 3; \ + outl += 3; \ + inlen -= 4; \ + } \ + } while (0) + + +int base64_decode_ref(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) { ssize_t ret = 0; - const uint8_t *c = (const uint8_t *)in; - uint8_t *o = (uint8_t *)out; + const uint8_t *c = (const uint8_t *) in; + uint8_t *o = (uint8_t *) out; uint8_t q, carry; size_t outl = 0; size_t leftover = 0; @@ -140,7 +147,7 @@ repeat: ret = 0; break; } - carry = (uint8_t)(q << 2); + carry = (uint8_t) (q << 2); leftover++; case 1: @@ -153,7 +160,7 @@ repeat: break; } *o++ = carry | (q >> 4); - carry = (uint8_t)(q << 4); + carry = (uint8_t) (q << 4); leftover++; outl++; @@ -178,13 +185,13 @@ repeat: } } else { - leftover --; + leftover--; } /* If we get here, there was an error: */ break; } *o++ = carry | (q >> 2); - carry = (uint8_t)(q << 6); + carry = (uint8_t) (q << 6); leftover++; outl++; @@ -219,8 +226,8 @@ repeat: if (!ret && inlen > 0) { /* Skip to the next valid character in input */ while (inlen > 0 && base64_table_dec[*c] >= 254) { - c ++; - inlen --; + c++; + inlen--; } if (inlen > 0) { diff --git a/src/libcryptobox/base64/sse42.c b/src/libcryptobox/base64/sse42.c index f3b812dc9..36070abe8 100644 --- a/src/libcryptobox/base64/sse42.c +++ b/src/libcryptobox/base64/sse42.c @@ -73,9 +73,9 @@ extern const uint8_t base64_table_dec[256]; static inline __m128i -dec_reshuffle (__m128i in) __attribute__((__target__("sse4.2"))); +dec_reshuffle(__m128i in) __attribute__((__target__("sse4.2"))); -static inline __m128i dec_reshuffle (__m128i in) +static inline __m128i dec_reshuffle(__m128i in) { // Mask in a single byte per shift: const __m128i maskB2 = _mm_set1_epi32(0x003F0000); @@ -92,64 +92,61 @@ static inline __m128i dec_reshuffle (__m128i in) // Reshuffle and repack into 12-byte output format: return _mm_shuffle_epi8(out, _mm_setr_epi8( - 3, 2, 1, - 7, 6, 5, - 11, 10, 9, - 15, 14, 13, - -1, -1, -1, -1)); + 3, 2, 1, + 7, 6, 5, + 11, 10, 9, + 15, 14, 13, + -1, -1, -1, -1)); } -#define CMPGT(s,n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n)) - -#define INNER_LOOP_SSE42 \ - while (inlen >= 24) { \ - __m128i str = _mm_loadu_si128((__m128i *)c); \ - const __m128i lut = _mm_setr_epi8( \ - 19, 16, 4, 4, \ - 4, 4, 4, 4, \ - 4, 4, 4, 4, \ - 0, 0, -71, -65 \ - ); \ - const __m128i range = _mm_setr_epi8( \ - '+','+', \ - '+','+', \ - '+','+', \ - '+','+', \ - '/','/', \ - '0','9', \ - 'A','Z', \ - 'a','z'); \ +#define CMPGT(s, n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n)) + +#define INNER_LOOP_SSE42 \ + while (inlen >= 24) { \ + __m128i str = _mm_loadu_si128((__m128i *) c); \ + const __m128i lut = _mm_setr_epi8( \ + 19, 16, 4, 4, \ + 4, 4, 4, 4, \ + 4, 4, 4, 4, \ + 0, 0, -71, -65); \ + const __m128i range = _mm_setr_epi8( \ + '+', '+', \ + '+', '+', \ + '+', '+', \ + '+', '+', \ + '/', '/', \ + '0', '9', \ + 'A', 'Z', \ + 'a', 'z'); \ if (_mm_cmpistrc(range, str, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY)) { \ - seen_error = true; \ - break; \ - } \ - __m128i indices = _mm_subs_epu8(str, _mm_set1_epi8(46)); \ - __m128i mask45 = CMPGT(str, 64); \ - __m128i mask5 = CMPGT(str, 96); \ - indices = _mm_andnot_si128(mask45, indices); \ - mask45 = _mm_add_epi8(_mm_slli_epi16(_mm_abs_epi8(mask45), 4), mask45); \ - indices = _mm_add_epi8(indices, mask45); \ - indices = _mm_add_epi8(indices, mask5); \ - __m128i delta = _mm_shuffle_epi8(lut, indices); \ - str = _mm_add_epi8(str, delta); \ - str = dec_reshuffle(str); \ - _mm_storeu_si128((__m128i *)o, str); \ - c += 16; \ - o += 12; \ - outl += 12; \ - inlen -= 16; \ + seen_error = true; \ + break; \ + } \ + __m128i indices = _mm_subs_epu8(str, _mm_set1_epi8(46)); \ + __m128i mask45 = CMPGT(str, 64); \ + __m128i mask5 = CMPGT(str, 96); \ + indices = _mm_andnot_si128(mask45, indices); \ + mask45 = _mm_add_epi8(_mm_slli_epi16(_mm_abs_epi8(mask45), 4), mask45); \ + indices = _mm_add_epi8(indices, mask45); \ + indices = _mm_add_epi8(indices, mask5); \ + __m128i delta = _mm_shuffle_epi8(lut, indices); \ + str = _mm_add_epi8(str, delta); \ + str = dec_reshuffle(str); \ + _mm_storeu_si128((__m128i *) o, str); \ + c += 16; \ + o += 12; \ + outl += 12; \ + inlen -= 16; \ } -int -base64_decode_sse42 (const char *in, size_t inlen, - unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2"))); -int -base64_decode_sse42 (const char *in, size_t inlen, - unsigned char *out, size_t *outlen) +int base64_decode_sse42(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2"))); +int base64_decode_sse42(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) { ssize_t ret = 0; - const uint8_t *c = (const uint8_t *)in; - uint8_t *o = (uint8_t *)out; + const uint8_t *c = (const uint8_t *) in; + uint8_t *o = (uint8_t *) out; uint8_t q, carry; size_t outl = 0; size_t leftover = 0; @@ -159,7 +156,7 @@ repeat: switch (leftover) { for (;;) { case 0: - if (G_LIKELY (!seen_error)) { + if (G_LIKELY(!seen_error)) { INNER_LOOP_SSE42 } @@ -209,7 +206,7 @@ repeat: } } else { - leftover --; + leftover--; } /* If we get here, there was an error: */ break; @@ -250,8 +247,8 @@ repeat: if (!ret && inlen > 0) { /* Skip to the next valid character in input */ while (inlen > 0 && base64_table_dec[*c] >= 254) { - c ++; - inlen --; + c++; + inlen--; } if (inlen > 0) { |