summaryrefslogtreecommitdiffstats
path: root/contrib/t1ha
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-05-22 14:15:47 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-05-22 14:15:47 +0100
commitb6156175b6e0e9a1d8063571f81c428f4aac570f (patch)
tree6608e0f98d0807497ec9e6abb6e14e447b574231 /contrib/t1ha
parent061d3d2e1da9acf6f17014cefab9c66af7afa1d5 (diff)
downloadrspamd-b6156175b6e0e9a1d8063571f81c428f4aac570f.tar.gz
rspamd-b6156175b6e0e9a1d8063571f81c428f4aac570f.zip
[Minor] Backport fixes from t1ha
Diffstat (limited to 'contrib/t1ha')
-rw-r--r--contrib/t1ha/t1ha1.c51
-rw-r--r--contrib/t1ha/t1ha2.c95
-rw-r--r--contrib/t1ha/t1ha_bits.h226
3 files changed, 221 insertions, 151 deletions
diff --git a/contrib/t1ha/t1ha1.c b/contrib/t1ha/t1ha1.c
index 956f7e24e..6e25d37f4 100644
--- a/contrib/t1ha/t1ha1.c
+++ b/contrib/t1ha/t1ha1.c
@@ -58,21 +58,21 @@ static __inline uint64_t final_weak_avalanche(uint64_t a, uint64_t b) {
return mux64(rot64(a + b, 17), prime_4) + mix64(a ^ b, prime_0);
}
-/* TODO C++ template in the next version */
-#define T1HA1_BODY(ENDIANNES, ALIGNESS, DOCOPY) \
+/* TODO: C++ template in the next version */
+#define T1HA1_BODY(ENDIANNES, ALIGNESS) \
+ const uint64_t *v = (const uint64_t *)data; \
if (unlikely(len > 32)) { \
uint64_t c = rot64(len, 17) + seed; \
uint64_t d = len ^ rot64(seed, 17); \
- const void *detent = (const uint8_t *)data + len - 31; \
+ const uint64_t *detent = \
+ (const uint64_t *)((const uint8_t *)data + len - 31); \
do { \
- const uint64_t *v = (const uint64_t *)data; \
- if (DOCOPY) \
- memcpy((void *)(v = align), data, 32); \
- \
const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \
const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \
const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \
const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \
+ v += 4; \
+ prefetch(v); \
\
const uint64_t d02 = w0 ^ rot64(w2 + d, 17); \
const uint64_t c13 = w1 ^ rot64(w3 + c, 17); \
@@ -80,18 +80,13 @@ static __inline uint64_t final_weak_avalanche(uint64_t a, uint64_t b) {
d -= b ^ rot64(w1, 31); \
a ^= prime_1 * (d02 + w3); \
b ^= prime_0 * (c13 + w2); \
- data = (const uint64_t *)data + 4; \
- } while (likely(data < detent)); \
+ } while (likely(v < detent)); \
\
a ^= prime_6 * (rot64(c, 17) + d); \
b ^= prime_5 * (c + rot64(d, 17)); \
len &= 31; \
} \
\
- const uint64_t *v = (const uint64_t *)data; \
- if (unlikely(need_copy4align) && len > 8) \
- memcpy((void *)(v = align), data, len); \
- \
switch (len) { \
default: \
b += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_4); \
@@ -134,26 +129,30 @@ uint64_t t1ha1_le(const void *data, size_t len, uint64_t seed) {
uint64_t a = seed;
uint64_t b = len;
- const bool need_copy4align =
- (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0 && !UNALIGNED_OK;
- uint64_t align[4];
- if (need_copy4align) {
- T1HA1_BODY(le, aligned, true);
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+ T1HA1_BODY(le, unaligned);
+#else
+ const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
+ if (misaligned) {
+ T1HA1_BODY(le, unaligned);
} else {
- T1HA1_BODY(le, unaligned, false);
+ T1HA1_BODY(le, aligned);
}
+#endif
}
uint64_t t1ha1_be(const void *data, size_t len, uint64_t seed) {
uint64_t a = seed;
uint64_t b = len;
- const bool need_copy4align =
- (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0 && !UNALIGNED_OK;
- uint64_t align[4];
- if (need_copy4align) {
- T1HA1_BODY(be, aligned, true);
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+ T1HA1_BODY(be, unaligned);
+#else
+ const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
+ if (misaligned) {
+ T1HA1_BODY(be, unaligned);
} else {
- T1HA1_BODY(be, unaligned, false);
+ T1HA1_BODY(be, aligned);
}
-} \ No newline at end of file
+#endif
+}
diff --git a/contrib/t1ha/t1ha2.c b/contrib/t1ha/t1ha2.c
index 95f646da4..4cb5281ee 100644
--- a/contrib/t1ha/t1ha2.c
+++ b/contrib/t1ha/t1ha2.c
@@ -56,7 +56,7 @@ static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x,
s->n.d = ~y + rot64(x, 19);
}
-/* TODO C++ template in the next version */
+/* TODO: C++ template in the next version */
#define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v) \
do { \
t1ha_state256_t *const s = state; \
@@ -67,8 +67,8 @@ static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x,
\
const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56); \
const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19); \
- s->n.d ^= s->n.b + rot64(w1, 38); \
s->n.c ^= s->n.a + rot64(w0, 57); \
+ s->n.d ^= s->n.b + rot64(w1, 38); \
s->n.b ^= prime_6 * (c13 + w2); \
s->n.a ^= prime_5 * (d02 + w3); \
} while (0)
@@ -78,26 +78,23 @@ static __always_inline void squash(t1ha_state256_t *s) {
s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d);
}
-/* TODO C++ template in the next version */
-#define T1HA2_LOOP(ENDIANNES, ALIGNESS, BUFFER4COPY, state, data, len) \
+/* TODO: C++ template in the next version */
+#define T1HA2_LOOP(ENDIANNES, ALIGNESS, state, data, len) \
do { \
const void *detent = (const uint8_t *)data + len - 31; \
do { \
const uint64_t *v = (const uint64_t *)data; \
- if (BUFFER4COPY != NULL) \
- memcpy((void *)(v = BUFFER4COPY), data, 32); \
- T1HA2_UPDATE(le, unaligned, state, v); \
data = (const uint64_t *)data + 4; \
+ prefetch(data); \
+ T1HA2_UPDATE(le, ALIGNESS, state, v); \
} while (likely(data < detent)); \
} while (0)
-/* TODO C++ template in the next version */
-#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, BUFFER4COPY, state, data, len) \
+/* TODO: C++ template in the next version */
+#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, state, data, len) \
do { \
t1ha_state256_t *const s = state; \
const uint64_t *v = (const uint64_t *)data; \
- if (BUFFER4COPY != NULL) \
- memcpy((void *)(v = BUFFER4COPY), data, len); \
switch (len) { \
default: \
mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
@@ -141,13 +138,11 @@ static __always_inline void squash(t1ha_state256_t *s) {
} \
} while (0)
-/* TODO C++ template in the next version */
-#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, BUFFER4COPY, state, data, len) \
+/* TODO: C++ template in the next version */
+#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, state, data, len) \
do { \
t1ha_state256_t *const s = state; \
const uint64_t *v = (const uint64_t *)data; \
- if (BUFFER4COPY != NULL) \
- memcpy((void *)(v = BUFFER4COPY), data, len); \
switch (len) { \
default: \
mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
@@ -207,26 +202,34 @@ uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) {
t1ha_state256_t state;
init_ab(&state, seed, length);
- const bool need_copy4align =
- (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0 && !UNALIGNED_OK;
- if (need_copy4align) {
- uint64_t buffer4align[4];
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+ if (unlikely(length > 32)) {
+ init_cd(&state, seed, length);
+ T1HA2_LOOP(le, unaligned, &state, data, length);
+ squash(&state);
+ length &= 31;
+ }
+ T1HA2_TAIL_AB(le, unaligned, &state, data, length);
+#else
+ const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
+ if (misaligned) {
if (unlikely(length > 32)) {
init_cd(&state, seed, length);
- T1HA2_LOOP(le, aligned, buffer4align, &state, data, length);
+ T1HA2_LOOP(le, unaligned, &state, data, length);
squash(&state);
length &= 31;
}
- T1HA2_TAIL_AB(le, aligned, buffer4align, &state, data, length);
+ T1HA2_TAIL_AB(le, unaligned, &state, data, length);
} else {
if (unlikely(length > 32)) {
init_cd(&state, seed, length);
- T1HA2_LOOP(le, unaligned, NULL, &state, data, length);
+ T1HA2_LOOP(le, aligned, &state, data, length);
squash(&state);
length &= 31;
}
- T1HA2_TAIL_AB(le, unaligned, NULL, &state, data, length);
+ T1HA2_TAIL_AB(le, aligned, &state, data, length);
}
+#endif
}
uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result,
@@ -236,22 +239,28 @@ uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result,
init_ab(&state, seed, length);
init_cd(&state, seed, length);
- const bool need_copy4align =
- (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0 && !UNALIGNED_OK;
- if (need_copy4align) {
- uint64_t buffer4align[4];
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+ if (unlikely(length > 32)) {
+ T1HA2_LOOP(le, unaligned, &state, data, length);
+ length &= 31;
+ }
+ T1HA2_TAIL_ABCD(le, unaligned, &state, data, length);
+#else
+ const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
+ if (misaligned) {
if (unlikely(length > 32)) {
- T1HA2_LOOP(le, aligned, buffer4align, &state, data, length);
+ T1HA2_LOOP(le, unaligned, &state, data, length);
length &= 31;
}
- T1HA2_TAIL_ABCD(le, aligned, buffer4align, &state, data, length);
+ T1HA2_TAIL_ABCD(le, unaligned, &state, data, length);
} else {
if (unlikely(length > 32)) {
- T1HA2_LOOP(le, unaligned, NULL, &state, data, length);
+ T1HA2_LOOP(le, aligned, &state, data, length);
length &= 31;
}
- T1HA2_TAIL_ABCD(le, unaligned, NULL, &state, data, length);
+ T1HA2_TAIL_ABCD(le, aligned, &state, data, length);
}
+#endif
}
//------------------------------------------------------------------------------
@@ -283,13 +292,16 @@ void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data,
}
if (length >= 32) {
- const bool need_copy4align =
- (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0 && !UNALIGNED_OK;
- if (need_copy4align) {
- T1HA2_LOOP(le, aligned, ctx->buffer.u64, &ctx->state, data, length);
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+ T1HA2_LOOP(le, unaligned, &ctx->state, data, length);
+#else
+ const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
+ if (misaligned) {
+ T1HA2_LOOP(le, unaligned, &ctx->state, data, length);
} else {
- T1HA2_LOOP(le, unaligned, NULL, &ctx->state, data, length);
+ T1HA2_LOOP(le, aligned, &ctx->state, data, length);
}
+#endif
length &= 31;
}
@@ -307,13 +319,8 @@ uint64_t t1ha2_final(t1ha_context_t *__restrict ctx,
if (likely(!extra_result)) {
squash(&ctx->state);
- T1HA2_TAIL_AB(le, aligned, NULL, &ctx->state, ctx->buffer.u64,
- ctx->partial);
- return final64(ctx->state.n.a, ctx->state.n.b);
+ T1HA2_TAIL_AB(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial);
}
- T1HA2_TAIL_ABCD(le, aligned, NULL, &ctx->state, ctx->buffer.u64,
- ctx->partial);
- return final128(ctx->state.n.a, ctx->state.n.b, ctx->state.n.c,
- ctx->state.n.d, extra_result);
-} \ No newline at end of file
+ T1HA2_TAIL_ABCD(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial);
+}
diff --git a/contrib/t1ha/t1ha_bits.h b/contrib/t1ha/t1ha_bits.h
index e3815a4e7..454e43aed 100644
--- a/contrib/t1ha/t1ha_bits.h
+++ b/contrib/t1ha/t1ha_bits.h
@@ -72,19 +72,23 @@
#error Unsupported byte order.
#endif
-#if !defined(UNALIGNED_OK)
-#if (defined(__ia32__) || defined(__e2k__) || \
- defined(__ARM_FEATURE_UNALIGNED)) && \
- !defined(__ALIGNED__)
-#define UNALIGNED_OK 1
+#define T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE 0
+#define T1HA_CONFIG_UNALIGNED_ACCESS__SLOW 1
+#define T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT 2
+
+#ifndef T1HA_CONFIG_UNALIGNED_ACCESS
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+#define T1HA_CONFIG_UNALIGNED_ACCESS T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+#elif defined(__ia32__)
+#define T1HA_CONFIG_UNALIGNED_ACCESS T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+#elif defined(__e2k__)
+#define T1HA_CONFIG_UNALIGNED_ACCESS T1HA_CONFIG_UNALIGNED_ACCESS__SLOW
+#elif defined(__ARM_FEATURE_UNALIGNED)
+#define T1HA_CONFIG_UNALIGNED_ACCESS T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
#else
-#define UNALIGNED_OK 0
+#define T1HA_CONFIG_UNALIGNED_ACCESS T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
#endif
-#endif /* UNALIGNED_OK */
-
-#if UNALIGNED_OK && !defined(PAGESIZE)
-#define PAGESIZE 4096
-#endif /* PAGESIZE */
+#endif /* T1HA_CONFIG_UNALIGNED_ACCESS */
#define ALIGNMENT_16 2
#define ALIGNMENT_32 4
@@ -94,6 +98,10 @@
#define ALIGNMENT_64 4
#endif
+#ifndef PAGESIZE
+#define PAGESIZE 4096
+#endif /* PAGESIZE */
+
/***************************************************************************/
#ifndef __has_builtin
@@ -118,6 +126,13 @@
#if __GNUC_PREREQ(4, 4) || defined(__clang__)
+#if defined(__ia32__) || defined(__e2k__)
+#include <x86intrin.h>
+#endif
+
+#if defined(__ia32__) && !defined(__cpuid_count)
+#include <cpuid.h>
+#endif
#if defined(__e2k__)
#include <e2kbuiltin.h>
@@ -393,10 +408,10 @@ typedef struct {
#endif /* read_unaligned */
#ifndef read_aligned
-#if __has_builtin(assume_aligned)
+#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_assume_aligned)
#define read_aligned(ptr, bits) \
(*(const uint##bits##_t *)__builtin_assume_aligned(ptr, ALIGNMENT_##bits))
-#elif __has_attribute(aligned) && !defined(__clang__)
+#elif (__GNUC_PREREQ(3, 3) || __has_attribute(aligned)) && !defined(__clang__)
#define read_aligned(ptr, bits) \
(*(const uint##bits##_t __attribute__((aligned(ALIGNMENT_##bits))) *)(ptr))
#elif __has_attribute(assume_aligned)
@@ -427,6 +442,20 @@ static __always_inline const
#endif
#endif /* read_aligned */
+#ifndef prefetch
+#if (__GNUC_PREREQ(4, 0) || __has_builtin(__builtin_prefetch)) && \
+ !defined(__ia32__)
+#define prefetch(ptr) __builtin_prefetch(ptr)
+#elif defined(_M_ARM64) || defined(_M_ARM)
+#define prefetch(ptr) __prefetch(ptr)
+#else
+#define prefetch(ptr) \
+ do { \
+ (void)(ptr); \
+ } while (0)
+#endif
+#endif /* prefetch */
+
#if __has_warning("-Wconstant-logical-operand")
#if defined(__clang__)
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
@@ -451,28 +480,33 @@ static __always_inline const
/*---------------------------------------------------------- Little Endian */
-#ifndef fetch64_le_aligned
-static __always_inline uint64_t fetch64_le_aligned(const void *v) {
+#ifndef fetch16_le_aligned
+static __always_inline uint16_t fetch16_le_aligned(const void *v) {
+ assert(((uintptr_t)v) % ALIGNMENT_16 == 0);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- return read_aligned(v, 64);
+ return read_aligned(v, 16);
#else
- return bswap64(read_aligned(v, 64));
+ return bswap16(read_aligned(v, 16));
#endif
}
-#endif /* fetch64_le_aligned */
+#endif /* fetch16_le_aligned */
-#ifndef fetch64_le_unaligned
-static __always_inline uint64_t fetch64_le_unaligned(const void *v) {
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- return read_unaligned(v, 64);
+#ifndef fetch16_le_unaligned
+static __always_inline uint16_t fetch16_le_unaligned(const void *v) {
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
+ const uint8_t *p = (const uint8_t *)v;
+ return p[0] | (uint16_t)p[1] << 8;
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return read_unaligned(v, 16);
#else
- return bswap64(read_unaligned(v, 64));
+ return bswap16(read_unaligned(v, 16));
#endif
}
-#endif /* fetch64_le_unaligned */
+#endif /* fetch16_le_unaligned */
#ifndef fetch32_le_aligned
static __always_inline uint32_t fetch32_le_aligned(const void *v) {
+ assert(((uintptr_t)v) % ALIGNMENT_32 == 0);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
return read_aligned(v, 32);
#else
@@ -483,7 +517,10 @@ static __always_inline uint32_t fetch32_le_aligned(const void *v) {
#ifndef fetch32_le_unaligned
static __always_inline uint32_t fetch32_le_unaligned(const void *v) {
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
+ return fetch16_le_unaligned(v) |
+ (uint32_t)fetch16_le_unaligned((const uint8_t *)v + 2) << 16;
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
return read_unaligned(v, 32);
#else
return bswap32(read_unaligned(v, 32));
@@ -491,25 +528,29 @@ static __always_inline uint32_t fetch32_le_unaligned(const void *v) {
}
#endif /* fetch32_le_unaligned */
-#ifndef fetch16_le_aligned
-static __always_inline uint16_t fetch16_le_aligned(const void *v) {
+#ifndef fetch64_le_aligned
+static __always_inline uint64_t fetch64_le_aligned(const void *v) {
+ assert(((uintptr_t)v) % ALIGNMENT_64 == 0);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- return read_aligned(v, 16);
+ return read_aligned(v, 64);
#else
- return bswap16(read_aligned(v, 16));
+ return bswap64(read_aligned(v, 64));
#endif
}
-#endif /* fetch16_le_aligned */
+#endif /* fetch64_le_aligned */
-#ifndef fetch16_le_unaligned
-static __always_inline uint16_t fetch16_le_unaligned(const void *v) {
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- return read_unaligned(v, 16);
+#ifndef fetch64_le_unaligned
+static __always_inline uint64_t fetch64_le_unaligned(const void *v) {
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
+ return fetch32_le_unaligned(v) |
+ (uint64_t)fetch32_le_unaligned((const uint8_t *)v + 4) << 32;
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return read_unaligned(v, 64);
#else
- return bswap16(read_unaligned(v, 16));
+ return bswap64(read_unaligned(v, 64));
#endif
}
-#endif /* fetch16_le_unaligned */
+#endif /* fetch64_le_unaligned */
static __always_inline uint64_t tail64_le_aligned(const void *v, size_t tail) {
const uint8_t *const p = (const uint8_t *)v;
@@ -517,10 +558,12 @@ static __always_inline uint64_t tail64_le_aligned(const void *v, size_t tail) {
/* We can perform a 'oneshot' read, which is little bit faster. */
const unsigned shift = ((8 - tail) & 7) << 3;
return fetch64_le_aligned(p) & ((~UINT64_C(0)) >> shift);
-#endif /* 'oneshot' read */
-
+#else
uint64_t r = 0;
switch (tail & 7) {
+ default:
+ unreachable();
+/* fall through */
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
/* For most CPUs this code is better when not needed byte reordering. */
case 0:
@@ -577,14 +620,15 @@ static __always_inline uint64_t tail64_le_aligned(const void *v, size_t tail) {
return r + p[0];
#endif
}
- unreachable();
+#endif /* T1HA_USE_FAST_ONESHOT_READ */
}
-#if T1HA_USE_FAST_ONESHOT_READ && UNALIGNED_OK && defined(PAGESIZE) && \
- !defined(__SANITIZE_ADDRESS__) && !defined(__sun)
+#if T1HA_USE_FAST_ONESHOT_READ && \
+ T1HA_CONFIG_UNALIGNED_ACCESS != T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE && \
+ defined(PAGESIZE) && !defined(__sun) && !defined(__SANITIZE_ADDRESS__)
#define can_read_underside(ptr, size) \
((size) <= sizeof(uintptr_t) && ((PAGESIZE - (size)) & (uintptr_t)(ptr)) != 0)
-#endif /* can_fast_read */
+#endif /* T1HA_USE_FAST_ONESHOT_READ */
static __always_inline uint64_t tail64_le_unaligned(const void *v,
size_t tail) {
@@ -600,11 +644,14 @@ static __always_inline uint64_t tail64_le_unaligned(const void *v,
return fetch64_le_unaligned(p) >> shift;
}
return fetch64_le_unaligned(p) & ((~UINT64_C(0)) >> shift);
-#endif /* 'oneshot' read */
-
+#else
uint64_t r = 0;
switch (tail & 7) {
-#if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ default:
+ unreachable();
+/* fall through */
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT && \
+ __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
/* For most CPUs this code is better when not needed
* copying for alignment or byte reordering. */
case 0:
@@ -663,36 +710,41 @@ static __always_inline uint64_t tail64_le_unaligned(const void *v,
return r + p[0];
#endif
}
- unreachable();
+#endif /* can_read_underside */
}
/*------------------------------------------------------------- Big Endian */
-#ifndef fetch64_be_aligned
-static __maybe_unused __always_inline uint64_t
-fetch64_be_aligned(const void *v) {
+#ifndef fetch16_be_aligned
+static __maybe_unused __always_inline uint16_t
+fetch16_be_aligned(const void *v) {
+ assert(((uintptr_t)v) % ALIGNMENT_16 == 0);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- return read_aligned(v, 64);
+ return read_aligned(v, 16);
#else
- return bswap64(read_aligned(v, 64));
+ return bswap16(read_aligned(v, 16));
#endif
}
-#endif /* fetch64_be_aligned */
+#endif /* fetch16_be_aligned */
-#ifndef fetch64_be_unaligned
-static __maybe_unused __always_inline uint64_t
-fetch64_be_unaligned(const void *v) {
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- return read_unaligned(v, 64);
+#ifndef fetch16_be_unaligned
+static __maybe_unused __always_inline uint16_t
+fetch16_be_unaligned(const void *v) {
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
+ const uint8_t *p = (const uint8_t *)v;
+ return (uint16_t)p[0] << 8 | p[1];
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return read_unaligned(v, 16);
#else
- return bswap64(read_unaligned(v, 64));
+ return bswap16(read_unaligned(v, 16));
#endif
}
-#endif /* fetch64_be_unaligned */
+#endif /* fetch16_be_unaligned */
#ifndef fetch32_be_aligned
static __maybe_unused __always_inline uint32_t
fetch32_be_aligned(const void *v) {
+ assert(((uintptr_t)v) % ALIGNMENT_32 == 0);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return read_aligned(v, 32);
#else
@@ -704,7 +756,10 @@ fetch32_be_aligned(const void *v) {
#ifndef fetch32_be_unaligned
static __maybe_unused __always_inline uint32_t
fetch32_be_unaligned(const void *v) {
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
+ return (uint32_t)fetch16_be_unaligned(v) << 16 |
+ fetch16_be_unaligned((const uint8_t *)v + 2);
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return read_unaligned(v, 32);
#else
return bswap32(read_unaligned(v, 32));
@@ -712,27 +767,31 @@ fetch32_be_unaligned(const void *v) {
}
#endif /* fetch32_be_unaligned */
-#ifndef fetch16_be_aligned
-static __maybe_unused __always_inline uint16_t
-fetch16_be_aligned(const void *v) {
+#ifndef fetch64_be_aligned
+static __maybe_unused __always_inline uint64_t
+fetch64_be_aligned(const void *v) {
+ assert(((uintptr_t)v) % ALIGNMENT_64 == 0);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- return read_aligned(v, 16);
+ return read_aligned(v, 64);
#else
- return bswap16(read_aligned(v, 16));
+ return bswap64(read_aligned(v, 64));
#endif
}
-#endif /* fetch16_be_aligned */
+#endif /* fetch64_be_aligned */
-#ifndef fetch16_be_unaligned
-static __maybe_unused __always_inline uint16_t
-fetch16_be_unaligned(const void *v) {
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- return read_unaligned(v, 16);
+#ifndef fetch64_be_unaligned
+static __maybe_unused __always_inline uint64_t
+fetch64_be_unaligned(const void *v) {
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
+ return (uint64_t)fetch32_be_unaligned(v) << 32 |
+ fetch32_be_unaligned((const uint8_t *)v + 4);
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return read_unaligned(v, 64);
#else
- return bswap16(read_unaligned(v, 16));
+ return bswap64(read_unaligned(v, 64));
#endif
}
-#endif /* fetch16_be_unaligned */
+#endif /* fetch64_be_unaligned */
static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v,
size_t tail) {
@@ -741,9 +800,11 @@ static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v,
/* We can perform a 'oneshot' read, which is little bit faster. */
const unsigned shift = ((8 - tail) & 7) << 3;
return fetch64_be_aligned(p) >> shift;
-#endif /* 'oneshot' read */
-
+#else
switch (tail & 7) {
+ default:
+ unreachable();
+/* fall through */
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* For most CPUs this code is better when not byte reordering. */
case 1:
@@ -762,7 +823,7 @@ static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v,
return (uint64_t)fetch32_be_aligned(p) << 24 |
(uint32_t)fetch16_be_aligned(p + 4) << 8 | p[6];
case 0:
- return fetch64_be(p);
+ return fetch64_be_aligned(p);
#else
case 1:
return p[0];
@@ -789,7 +850,7 @@ static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v,
(uint64_t)p[1] << 48 | (uint64_t)p[0] << 56;
#endif
}
- unreachable();
+#endif /* T1HA_USE_FAST_ONESHOT_READ */
}
static __maybe_unused __always_inline uint64_t
@@ -806,10 +867,13 @@ tail64_be_unaligned(const void *v, size_t tail) {
return fetch64_be_unaligned(p) & ((~UINT64_C(0)) >> shift);
}
return fetch64_be_unaligned(p) >> shift;
-#endif /* 'oneshot' read */
-
+#else
switch (tail & 7) {
-#if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ default:
+ unreachable();
+/* fall through */
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT && \
+ __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* For most CPUs this code is better when not needed
* copying for alignment or byte reordering. */
case 1:
@@ -858,7 +922,7 @@ tail64_be_unaligned(const void *v, size_t tail) {
(uint64_t)p[1] << 48 | (uint64_t)p[0] << 56;
#endif
}
- unreachable();
+#endif /* can_read_underside */
}
/***************************************************************************/