aboutsummaryrefslogtreecommitdiffstats
path: root/src/libcryptobox/base64
diff options
context:
space:
mode:
Diffstat (limited to 'src/libcryptobox/base64')
-rw-r--r--src/libcryptobox/base64/avx2.c110
-rw-r--r--src/libcryptobox/base64/base64.c368
-rw-r--r--src/libcryptobox/base64/base64.h6
-rw-r--r--src/libcryptobox/base64/ref.c187
-rw-r--r--src/libcryptobox/base64/sse42.c111
5 files changed, 513 insertions, 269 deletions
diff --git a/src/libcryptobox/base64/avx2.c b/src/libcryptobox/base64/avx2.c
index c44f8edf1..38abffc32 100644
--- a/src/libcryptobox/base64/avx2.c
+++ b/src/libcryptobox/base64/avx2.c
@@ -77,16 +77,16 @@ extern const uint8_t base64_table_dec[256];
#include <immintrin.h>
-#define CMPGT(s,n) _mm256_cmpgt_epi8((s), _mm256_set1_epi8(n))
-#define CMPEQ(s,n) _mm256_cmpeq_epi8((s), _mm256_set1_epi8(n))
-#define REPLACE(s,n) _mm256_and_si256((s), _mm256_set1_epi8(n))
-#define RANGE(s,a,b) _mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) - 1))
+#define CMPGT(s, n) _mm256_cmpgt_epi8((s), _mm256_set1_epi8(n))
+#define CMPEQ(s, n) _mm256_cmpeq_epi8((s), _mm256_set1_epi8(n))
+#define REPLACE(s, n) _mm256_and_si256((s), _mm256_set1_epi8(n))
+#define RANGE(s, a, b) _mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) -1))
static inline __m256i
-dec_reshuffle (__m256i in) __attribute__((__target__("avx2")));
+dec_reshuffle(__m256i in) __attribute__((__target__("avx2")));
static inline __m256i
-dec_reshuffle (__m256i in)
+dec_reshuffle(__m256i in)
{
// in, lower lane, bits, upper case are most significant bits, lower case are least significant bits:
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
@@ -108,8 +108,8 @@ dec_reshuffle (__m256i in)
// Pack bytes together in each lane:
out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
- 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
- 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
+ 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
+ 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
// 00000000 00000000 00000000 00000000
// LLllllll KKKKkkkk JJJJJJjj IIiiiiii
// HHHHhhhh GGGGGGgg FFffffff EEEEeeee
@@ -120,54 +120,52 @@ dec_reshuffle (__m256i in)
}
-#define INNER_LOOP_AVX2 \
- while (inlen >= 45) { \
- __m256i str = _mm256_loadu_si256((__m256i *)c); \
- const __m256i lut_lo = _mm256_setr_epi8( \
- 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \
- 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A, \
- 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \
- 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \
- const __m256i lut_hi = _mm256_setr_epi8( \
- 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \
- 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, \
- 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \
- 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \
- const __m256i lut_roll = _mm256_setr_epi8( \
- 0, 16, 19, 4, -65, -65, -71, -71, \
- 0, 0, 0, 0, 0, 0, 0, 0, \
- 0, 16, 19, 4, -65, -65, -71, -71, \
- 0, 0, 0, 0, 0, 0, 0, 0); \
- const __m256i mask_2F = _mm256_set1_epi8(0x2f); \
- const __m256i hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), mask_2F); \
- const __m256i lo_nibbles = _mm256_and_si256(str, mask_2F); \
- const __m256i hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles); \
- const __m256i lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles); \
- const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F); \
- const __m256i roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles)); \
- if (!_mm256_testz_si256(lo, hi)) { \
- seen_error = true; \
- break; \
- } \
- str = _mm256_add_epi8(str, roll); \
- str = dec_reshuffle(str); \
- _mm256_storeu_si256((__m256i *)o, str); \
- c += 32; \
- o += 24; \
- outl += 24; \
- inlen -= 32; \
+#define INNER_LOOP_AVX2 \
+ while (inlen >= 45) { \
+ __m256i str = _mm256_loadu_si256((__m256i *) c); \
+ const __m256i lut_lo = _mm256_setr_epi8( \
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A, \
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \
+ const __m256i lut_hi = _mm256_setr_epi8( \
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, \
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \
+ const __m256i lut_roll = _mm256_setr_epi8( \
+ 0, 16, 19, 4, -65, -65, -71, -71, \
+ 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 16, 19, 4, -65, -65, -71, -71, \
+ 0, 0, 0, 0, 0, 0, 0, 0); \
+ const __m256i mask_2F = _mm256_set1_epi8(0x2f); \
+ const __m256i hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), mask_2F); \
+ const __m256i lo_nibbles = _mm256_and_si256(str, mask_2F); \
+ const __m256i hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles); \
+ const __m256i lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles); \
+ const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F); \
+ const __m256i roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles)); \
+ if (!_mm256_testz_si256(lo, hi)) { \
+ seen_error = true; \
+ break; \
+ } \
+ str = _mm256_add_epi8(str, roll); \
+ str = dec_reshuffle(str); \
+ _mm256_storeu_si256((__m256i *) o, str); \
+ c += 32; \
+ o += 24; \
+ outl += 24; \
+ inlen -= 32; \
}
-int
-base64_decode_avx2 (const char *in, size_t inlen,
- unsigned char *out, size_t *outlen) __attribute__((__target__("avx2")));
-int
-base64_decode_avx2 (const char *in, size_t inlen,
- unsigned char *out, size_t *outlen)
+int base64_decode_avx2(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen) __attribute__((__target__("avx2")));
+int base64_decode_avx2(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen)
{
ssize_t ret = 0;
- const uint8_t *c = (const uint8_t *)in;
- uint8_t *o = (uint8_t *)out;
+ const uint8_t *c = (const uint8_t *) in;
+ uint8_t *o = (uint8_t *) out;
uint8_t q, carry;
size_t outl = 0;
size_t leftover = 0;
@@ -177,7 +175,7 @@ repeat:
switch (leftover) {
for (;;) {
case 0:
- if (G_LIKELY (!seen_error)) {
+ if (G_LIKELY(!seen_error)) {
INNER_LOOP_AVX2
}
@@ -227,7 +225,7 @@ repeat:
}
}
else {
- leftover --;
+ leftover--;
}
/* If we get here, there was an error: */
break;
@@ -268,8 +266,8 @@ repeat:
if (!ret && inlen > 0) {
/* Skip to the next valid character in input */
while (inlen > 0 && base64_table_dec[*c] >= 254) {
- c ++;
- inlen --;
+ c++;
+ inlen--;
}
if (inlen > 0) {
diff --git a/src/libcryptobox/base64/base64.c b/src/libcryptobox/base64/base64.c
index efa356252..0dc93b3d2 100644
--- a/src/libcryptobox/base64/base64.c
+++ b/src/libcryptobox/base64/base64.c
@@ -24,24 +24,264 @@
extern unsigned cpu_config;
const uint8_t
-base64_table_dec[256] =
-{
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63,
- 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255,
- 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255,
- 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ base64_table_dec[256] =
+ {
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 62,
+ 255,
+ 255,
+ 255,
+ 63,
+ 52,
+ 53,
+ 54,
+ 55,
+ 56,
+ 57,
+ 58,
+ 59,
+ 60,
+ 61,
+ 255,
+ 255,
+ 255,
+ 254,
+ 255,
+ 255,
+ 255,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 22,
+ 23,
+ 24,
+ 25,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 26,
+ 27,
+ 28,
+ 29,
+ 30,
+ 31,
+ 32,
+ 33,
+ 34,
+ 35,
+ 36,
+ 37,
+ 38,
+ 39,
+ 40,
+ 41,
+ 42,
+ 43,
+ 44,
+ 45,
+ 46,
+ 47,
+ 48,
+ 49,
+ 50,
+ 51,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
};
static const char base64_alphabet[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
@@ -51,52 +291,54 @@ typedef struct base64_impl {
unsigned short min_len;
unsigned int cpu_flags;
const char *desc;
- int (*decode) (const char *in, size_t inlen,
- unsigned char *out, size_t *outlen);
+ int (*decode)(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen);
} base64_impl_t;
#define BASE64_DECLARE(ext) \
- int base64_decode_##ext(const char *in, size_t inlen, unsigned char *out, size_t *outlen);
-#define BASE64_IMPL(cpuflags, min_len, desc, ext) \
- {0, (min_len), (cpuflags), desc, base64_decode_##ext}
+ int base64_decode_##ext(const char *in, size_t inlen, unsigned char *out, size_t *outlen);
+#define BASE64_IMPL(cpuflags, min_len, desc, ext) \
+ { \
+ 0, (min_len), (cpuflags), desc, base64_decode_##ext \
+ }
BASE64_DECLARE(ref);
#define BASE64_REF BASE64_IMPL(0, 0, "ref", ref)
#ifdef RSPAMD_HAS_TARGET_ATTR
-# if defined(HAVE_SSE42)
-int base64_decode_sse42 (const char *in, size_t inlen,
- unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2")));
+#if defined(HAVE_SSE42)
+int base64_decode_sse42(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2")));
BASE64_DECLARE(sse42);
-# define BASE64_SSE42 BASE64_IMPL(CPUID_SSE42, 24, "sse42", sse42)
-# endif
+#define BASE64_SSE42 BASE64_IMPL(CPUID_SSE42, 24, "sse42", sse42)
+#endif
#endif
#ifdef RSPAMD_HAS_TARGET_ATTR
-# if defined(HAVE_AVX2)
-int base64_decode_avx2 (const char *in, size_t inlen,
- unsigned char *out, size_t *outlen) __attribute__((__target__("avx2")));
+#if defined(HAVE_AVX2)
+int base64_decode_avx2(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen) __attribute__((__target__("avx2")));
BASE64_DECLARE(avx2);
-# define BASE64_AVX2 BASE64_IMPL(CPUID_AVX2, 128, "avx2", avx2)
-# endif
+#define BASE64_AVX2 BASE64_IMPL(CPUID_AVX2, 128, "avx2", avx2)
+#endif
#endif
static base64_impl_t base64_list[] = {
- BASE64_REF,
+ BASE64_REF,
#ifdef BASE64_SSE42
- BASE64_SSE42,
+ BASE64_SSE42,
#endif
#ifdef BASE64_AVX2
- BASE64_AVX2,
+ BASE64_AVX2,
#endif
};
static const base64_impl_t *base64_ref = &base64_list[0];
const char *
-base64_load (void)
+base64_load(void)
{
guint i;
const base64_impl_t *opt_impl = base64_ref;
@@ -105,7 +347,7 @@ base64_load (void)
base64_list[0].enabled = true;
if (cpu_config != 0) {
- for (i = 1; i < G_N_ELEMENTS (base64_list); i++) {
+ for (i = 1; i < G_N_ELEMENTS(base64_list); i++) {
if (base64_list[i].cpu_flags & cpu_config) {
base64_list[i].enabled = true;
opt_impl = &base64_list[i];
@@ -118,68 +360,68 @@ base64_load (void)
}
gboolean
-rspamd_cryptobox_base64_decode (const gchar *in, gsize inlen,
- guchar *out, gsize *outlen)
+rspamd_cryptobox_base64_decode(const gchar *in, gsize inlen,
+ guchar *out, gsize *outlen)
{
const base64_impl_t *opt_impl = base64_ref;
- for (gint i = G_N_ELEMENTS (base64_list) - 1; i > 0; i --) {
+ for (gint i = G_N_ELEMENTS(base64_list) - 1; i > 0; i--) {
if (base64_list[i].enabled && base64_list[i].min_len <= inlen) {
opt_impl = &base64_list[i];
break;
}
}
- return opt_impl->decode (in, inlen, out, outlen);
+ return opt_impl->decode(in, inlen, out, outlen);
}
double
-base64_test (bool generic, size_t niters, size_t len, size_t str_len)
+base64_test(bool generic, size_t niters, size_t len, size_t str_len)
{
size_t cycles;
guchar *in, *out, *tmp;
gdouble t1, t2, total = 0;
gsize outlen;
- g_assert (len > 0);
- in = g_malloc (len);
- tmp = g_malloc (len);
- ottery_rand_bytes (in, len);
+ g_assert(len > 0);
+ in = g_malloc(len);
+ tmp = g_malloc(len);
+ ottery_rand_bytes(in, len);
- out = rspamd_encode_base64_fold (in, len, str_len, &outlen,
- RSPAMD_TASK_NEWLINES_CRLF);
+ out = rspamd_encode_base64_fold(in, len, str_len, &outlen,
+ RSPAMD_TASK_NEWLINES_CRLF);
if (generic) {
- base64_list[0].decode (out, outlen, tmp, &len);
+ base64_list[0].decode(out, outlen, tmp, &len);
}
else {
- rspamd_cryptobox_base64_decode (out, outlen, tmp, &len);
+ rspamd_cryptobox_base64_decode(out, outlen, tmp, &len);
}
- g_assert (memcmp (in, tmp, len) == 0);
+ g_assert(memcmp(in, tmp, len) == 0);
- for (cycles = 0; cycles < niters; cycles ++) {
- t1 = rspamd_get_ticks (TRUE);
+ for (cycles = 0; cycles < niters; cycles++) {
+ t1 = rspamd_get_ticks(TRUE);
if (generic) {
- base64_list[0].decode (out, outlen, tmp, &len);
+ base64_list[0].decode(out, outlen, tmp, &len);
}
else {
- rspamd_cryptobox_base64_decode (out, outlen, tmp, &len);
+ rspamd_cryptobox_base64_decode(out, outlen, tmp, &len);
}
- t2 = rspamd_get_ticks (TRUE);
+ t2 = rspamd_get_ticks(TRUE);
total += t2 - t1;
}
- g_free (in);
- g_free (tmp);
- g_free (out);
+ g_free(in);
+ g_free(tmp);
+ g_free(out);
return total;
}
gboolean
-rspamd_cryptobox_base64_is_valid (const gchar *in, gsize inlen)
+rspamd_cryptobox_base64_is_valid(const gchar *in, gsize inlen)
{
const guchar *p, *end;
@@ -191,12 +433,12 @@ rspamd_cryptobox_base64_is_valid (const gchar *in, gsize inlen)
end = in + inlen;
while (p < end && *p != '=') {
- if (!g_ascii_isspace (*p)) {
+ if (!g_ascii_isspace(*p)) {
if (base64_table_dec[*p] == 255) {
return FALSE;
}
}
- p ++;
+ p++;
}
return TRUE;
diff --git a/src/libcryptobox/base64/base64.h b/src/libcryptobox/base64/base64.h
index e2be379b5..f53c80afd 100644
--- a/src/libcryptobox/base64/base64.h
+++ b/src/libcryptobox/base64/base64.h
@@ -18,13 +18,13 @@
#include "config.h"
-#ifdef __cplusplus
+#ifdef __cplusplus
extern "C" {
#endif
-const char *base64_load (void);
+const char *base64_load(void);
-#ifdef __cplusplus
+#ifdef __cplusplus
}
#endif
diff --git a/src/libcryptobox/base64/ref.c b/src/libcryptobox/base64/ref.c
index e01a4dc5e..61df68e35 100644
--- a/src/libcryptobox/base64/ref.c
+++ b/src/libcryptobox/base64/ref.c
@@ -32,92 +32,99 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
extern const uint8_t base64_table_dec[256];
-#define INNER_LOOP_64 do { \
- uint64_t str, res, dec; \
- bool aligned = rspamd_is_aligned_as(c, str); \
- while (inlen >= 13) { \
- if (aligned) { str = *(uint64_t *)c; } else {memcpy(&str, c, sizeof(str)); } \
- str = GUINT64_TO_BE(str); \
- if ((dec = base64_table_dec[str >> 56]) > 63) { \
- break; \
- } \
- res = dec << 58; \
- if ((dec = base64_table_dec[(str >> 48) & 0xFF]) > 63) { \
- break; \
- } \
- res |= dec << 52; \
- if ((dec = base64_table_dec[(str >> 40) & 0xFF]) > 63) { \
- break; \
- } \
- res |= dec << 46; \
- if ((dec = base64_table_dec[(str >> 32) & 0xFF]) > 63) { \
- break; \
- } \
- res |= dec << 40; \
- if ((dec = base64_table_dec[(str >> 24) & 0xFF]) > 63) { \
- break; \
- } \
- res |= dec << 34; \
- if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \
- break; \
- } \
- res |= dec << 28; \
- if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \
- break; \
- } \
- res |= dec << 22; \
- if ((dec = base64_table_dec[str & 0xFF]) > 63) { \
- break; \
- } \
- res |= dec << 16; \
- res = GUINT64_FROM_BE(res); \
- memcpy(o, &res, sizeof(res)); \
- c += 8; \
- o += 6; \
- outl += 6; \
- inlen -= 8; \
- } \
-} while (0)
-
-#define INNER_LOOP_32 do { \
- uint32_t str, res, dec; \
- bool aligned = rspamd_is_aligned_as(c, str); \
- while (inlen >= 8) { \
- if (aligned) { str = *(uint32_t *)c; } else {memcpy(&str, c, sizeof(str)); } \
- str = GUINT32_TO_BE(str); \
- if ((dec = base64_table_dec[str >> 24]) > 63) { \
- break; \
- } \
- res = dec << 26; \
- if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \
- break; \
- } \
- res |= dec << 20; \
- if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \
- break; \
- } \
- res |= dec << 14; \
- if ((dec = base64_table_dec[str & 0xFF]) > 63) { \
- break; \
- } \
- res |= dec << 8; \
- res = GUINT32_FROM_BE(res); \
- memcpy(o, &res, sizeof(res)); \
- c += 4; \
- o += 3; \
- outl += 3; \
- inlen -= 4; \
- } \
-} while (0)
-
-
-int
-base64_decode_ref (const char *in, size_t inlen,
- unsigned char *out, size_t *outlen)
+#define INNER_LOOP_64 \
+ do { \
+ uint64_t str, res, dec; \
+ bool aligned = rspamd_is_aligned_as(c, str); \
+ while (inlen >= 13) { \
+ if (aligned) { str = *(uint64_t *) c; } \
+ else { \
+ memcpy(&str, c, sizeof(str)); \
+ } \
+ str = GUINT64_TO_BE(str); \
+ if ((dec = base64_table_dec[str >> 56]) > 63) { \
+ break; \
+ } \
+ res = dec << 58; \
+ if ((dec = base64_table_dec[(str >> 48) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 52; \
+ if ((dec = base64_table_dec[(str >> 40) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 46; \
+ if ((dec = base64_table_dec[(str >> 32) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 40; \
+ if ((dec = base64_table_dec[(str >> 24) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 34; \
+ if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 28; \
+ if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 22; \
+ if ((dec = base64_table_dec[str & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 16; \
+ res = GUINT64_FROM_BE(res); \
+ memcpy(o, &res, sizeof(res)); \
+ c += 8; \
+ o += 6; \
+ outl += 6; \
+ inlen -= 8; \
+ } \
+ } while (0)
+
+#define INNER_LOOP_32 \
+ do { \
+ uint32_t str, res, dec; \
+ bool aligned = rspamd_is_aligned_as(c, str); \
+ while (inlen >= 8) { \
+ if (aligned) { str = *(uint32_t *) c; } \
+ else { \
+ memcpy(&str, c, sizeof(str)); \
+ } \
+ str = GUINT32_TO_BE(str); \
+ if ((dec = base64_table_dec[str >> 24]) > 63) { \
+ break; \
+ } \
+ res = dec << 26; \
+ if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 20; \
+ if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 14; \
+ if ((dec = base64_table_dec[str & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 8; \
+ res = GUINT32_FROM_BE(res); \
+ memcpy(o, &res, sizeof(res)); \
+ c += 4; \
+ o += 3; \
+ outl += 3; \
+ inlen -= 4; \
+ } \
+ } while (0)
+
+
+int base64_decode_ref(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen)
{
ssize_t ret = 0;
- const uint8_t *c = (const uint8_t *)in;
- uint8_t *o = (uint8_t *)out;
+ const uint8_t *c = (const uint8_t *) in;
+ uint8_t *o = (uint8_t *) out;
uint8_t q, carry;
size_t outl = 0;
size_t leftover = 0;
@@ -140,7 +147,7 @@ repeat:
ret = 0;
break;
}
- carry = (uint8_t)(q << 2);
+ carry = (uint8_t) (q << 2);
leftover++;
case 1:
@@ -153,7 +160,7 @@ repeat:
break;
}
*o++ = carry | (q >> 4);
- carry = (uint8_t)(q << 4);
+ carry = (uint8_t) (q << 4);
leftover++;
outl++;
@@ -178,13 +185,13 @@ repeat:
}
}
else {
- leftover --;
+ leftover--;
}
/* If we get here, there was an error: */
break;
}
*o++ = carry | (q >> 2);
- carry = (uint8_t)(q << 6);
+ carry = (uint8_t) (q << 6);
leftover++;
outl++;
@@ -219,8 +226,8 @@ repeat:
if (!ret && inlen > 0) {
/* Skip to the next valid character in input */
while (inlen > 0 && base64_table_dec[*c] >= 254) {
- c ++;
- inlen --;
+ c++;
+ inlen--;
}
if (inlen > 0) {
diff --git a/src/libcryptobox/base64/sse42.c b/src/libcryptobox/base64/sse42.c
index f3b812dc9..36070abe8 100644
--- a/src/libcryptobox/base64/sse42.c
+++ b/src/libcryptobox/base64/sse42.c
@@ -73,9 +73,9 @@ extern const uint8_t base64_table_dec[256];
static inline __m128i
-dec_reshuffle (__m128i in) __attribute__((__target__("sse4.2")));
+dec_reshuffle(__m128i in) __attribute__((__target__("sse4.2")));
-static inline __m128i dec_reshuffle (__m128i in)
+static inline __m128i dec_reshuffle(__m128i in)
{
// Mask in a single byte per shift:
const __m128i maskB2 = _mm_set1_epi32(0x003F0000);
@@ -92,64 +92,61 @@ static inline __m128i dec_reshuffle (__m128i in)
// Reshuffle and repack into 12-byte output format:
return _mm_shuffle_epi8(out, _mm_setr_epi8(
- 3, 2, 1,
- 7, 6, 5,
- 11, 10, 9,
- 15, 14, 13,
- -1, -1, -1, -1));
+ 3, 2, 1,
+ 7, 6, 5,
+ 11, 10, 9,
+ 15, 14, 13,
+ -1, -1, -1, -1));
}
-#define CMPGT(s,n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n))
-
-#define INNER_LOOP_SSE42 \
- while (inlen >= 24) { \
- __m128i str = _mm_loadu_si128((__m128i *)c); \
- const __m128i lut = _mm_setr_epi8( \
- 19, 16, 4, 4, \
- 4, 4, 4, 4, \
- 4, 4, 4, 4, \
- 0, 0, -71, -65 \
- ); \
- const __m128i range = _mm_setr_epi8( \
- '+','+', \
- '+','+', \
- '+','+', \
- '+','+', \
- '/','/', \
- '0','9', \
- 'A','Z', \
- 'a','z'); \
+#define CMPGT(s, n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n))
+
+#define INNER_LOOP_SSE42 \
+ while (inlen >= 24) { \
+ __m128i str = _mm_loadu_si128((__m128i *) c); \
+ const __m128i lut = _mm_setr_epi8( \
+ 19, 16, 4, 4, \
+ 4, 4, 4, 4, \
+ 4, 4, 4, 4, \
+ 0, 0, -71, -65); \
+ const __m128i range = _mm_setr_epi8( \
+ '+', '+', \
+ '+', '+', \
+ '+', '+', \
+ '+', '+', \
+ '/', '/', \
+ '0', '9', \
+ 'A', 'Z', \
+ 'a', 'z'); \
if (_mm_cmpistrc(range, str, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY)) { \
- seen_error = true; \
- break; \
- } \
- __m128i indices = _mm_subs_epu8(str, _mm_set1_epi8(46)); \
- __m128i mask45 = CMPGT(str, 64); \
- __m128i mask5 = CMPGT(str, 96); \
- indices = _mm_andnot_si128(mask45, indices); \
- mask45 = _mm_add_epi8(_mm_slli_epi16(_mm_abs_epi8(mask45), 4), mask45); \
- indices = _mm_add_epi8(indices, mask45); \
- indices = _mm_add_epi8(indices, mask5); \
- __m128i delta = _mm_shuffle_epi8(lut, indices); \
- str = _mm_add_epi8(str, delta); \
- str = dec_reshuffle(str); \
- _mm_storeu_si128((__m128i *)o, str); \
- c += 16; \
- o += 12; \
- outl += 12; \
- inlen -= 16; \
+ seen_error = true; \
+ break; \
+ } \
+ __m128i indices = _mm_subs_epu8(str, _mm_set1_epi8(46)); \
+ __m128i mask45 = CMPGT(str, 64); \
+ __m128i mask5 = CMPGT(str, 96); \
+ indices = _mm_andnot_si128(mask45, indices); \
+ mask45 = _mm_add_epi8(_mm_slli_epi16(_mm_abs_epi8(mask45), 4), mask45); \
+ indices = _mm_add_epi8(indices, mask45); \
+ indices = _mm_add_epi8(indices, mask5); \
+ __m128i delta = _mm_shuffle_epi8(lut, indices); \
+ str = _mm_add_epi8(str, delta); \
+ str = dec_reshuffle(str); \
+ _mm_storeu_si128((__m128i *) o, str); \
+ c += 16; \
+ o += 12; \
+ outl += 12; \
+ inlen -= 16; \
}
-int
-base64_decode_sse42 (const char *in, size_t inlen,
- unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2")));
-int
-base64_decode_sse42 (const char *in, size_t inlen,
- unsigned char *out, size_t *outlen)
+int base64_decode_sse42(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2")));
+int base64_decode_sse42(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen)
{
ssize_t ret = 0;
- const uint8_t *c = (const uint8_t *)in;
- uint8_t *o = (uint8_t *)out;
+ const uint8_t *c = (const uint8_t *) in;
+ uint8_t *o = (uint8_t *) out;
uint8_t q, carry;
size_t outl = 0;
size_t leftover = 0;
@@ -159,7 +156,7 @@ repeat:
switch (leftover) {
for (;;) {
case 0:
- if (G_LIKELY (!seen_error)) {
+ if (G_LIKELY(!seen_error)) {
INNER_LOOP_SSE42
}
@@ -209,7 +206,7 @@ repeat:
}
}
else {
- leftover --;
+ leftover--;
}
/* If we get here, there was an error: */
break;
@@ -250,8 +247,8 @@ repeat:
if (!ret && inlen > 0) {
/* Skip to the next valid character in input */
while (inlen > 0 && base64_table_dec[*c] >= 254) {
- c ++;
- inlen --;
+ c++;
+ inlen--;
}
if (inlen > 0) {