mirror of
https://github.com/rspamd/rspamd.git
synced 2024-07-29 20:17:47 +02:00
parent
1a21511cf9
commit
974d1dbcab
@ -58,81 +58,89 @@ static __inline uint64_t final_weak_avalanche(uint64_t a, uint64_t b) {
|
||||
return mux64(rot64(a + b, 17), prime_4) + mix64(a ^ b, prime_0);
|
||||
}
|
||||
|
||||
/* TODO C++ template in the next version */
|
||||
#define T1HA1_BODY(ENDIANNES, ALIGNESS, DOCOPY) \
|
||||
if (unlikely(len > 32)) { \
|
||||
uint64_t c = rot64(len, 17) + seed; \
|
||||
uint64_t d = len ^ rot64(seed, 17); \
|
||||
const void *detent = (const uint8_t *)data + len - 31; \
|
||||
do { \
|
||||
const uint64_t *v = (const uint64_t *)data; \
|
||||
if (DOCOPY) \
|
||||
memcpy((void *)(v = align), data, 32); \
|
||||
\
|
||||
const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \
|
||||
const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \
|
||||
const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \
|
||||
const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \
|
||||
\
|
||||
const uint64_t d02 = w0 ^ rot64(w2 + d, 17); \
|
||||
const uint64_t c13 = w1 ^ rot64(w3 + c, 17); \
|
||||
c += a ^ rot64(w0, 41); \
|
||||
d -= b ^ rot64(w1, 31); \
|
||||
a ^= prime_1 * (d02 + w3); \
|
||||
b ^= prime_0 * (c13 + w2); \
|
||||
data = (const uint64_t *)data + 4; \
|
||||
} while (likely(data < detent)); \
|
||||
\
|
||||
a ^= prime_6 * (rot64(c, 17) + d); \
|
||||
b ^= prime_5 * (c + rot64(d, 17)); \
|
||||
len &= 31; \
|
||||
} \
|
||||
\
|
||||
const uint64_t *v = (const uint64_t *)data; \
|
||||
if (unlikely(need_copy4align) && len > 8) \
|
||||
memcpy((void *)(v = align), data, len); \
|
||||
\
|
||||
switch (len) { \
|
||||
default: \
|
||||
b += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_4); \
|
||||
/* fall through */ \
|
||||
case 24: \
|
||||
case 23: \
|
||||
case 22: \
|
||||
case 21: \
|
||||
case 20: \
|
||||
case 19: \
|
||||
case 18: \
|
||||
case 17: \
|
||||
a += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_3); \
|
||||
/* fall through */ \
|
||||
case 16: \
|
||||
case 15: \
|
||||
case 14: \
|
||||
case 13: \
|
||||
case 12: \
|
||||
case 11: \
|
||||
case 10: \
|
||||
case 9: \
|
||||
b += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_2); \
|
||||
/* fall through */ \
|
||||
case 8: \
|
||||
case 7: \
|
||||
case 6: \
|
||||
case 5: \
|
||||
case 4: \
|
||||
case 3: \
|
||||
case 2: \
|
||||
case 1: \
|
||||
a += mux64(tail64_##ENDIANNES##_##ALIGNESS(v, len), prime_1); \
|
||||
/* fall through */ \
|
||||
case 0: \
|
||||
return final_weak_avalanche(a, b); \
|
||||
}
|
||||
|
||||
uint64_t t1ha1_le(const void *data, size_t len, uint64_t seed) {
|
||||
uint64_t a = seed;
|
||||
uint64_t b = len;
|
||||
|
||||
const int need_align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK;
|
||||
const bool need_copy4align =
|
||||
(((uintptr_t)data) & (ALIGMENT_64 - 1)) != 0 && !UNALIGNED_OK;
|
||||
uint64_t align[4];
|
||||
|
||||
if (unlikely(len > 32)) {
|
||||
uint64_t c = rot64(len, 17) + seed;
|
||||
uint64_t d = len ^ rot64(seed, 17);
|
||||
const void *detent = (const uint8_t *)data + len - 31;
|
||||
do {
|
||||
const uint64_t *v = (const uint64_t *)data;
|
||||
if (unlikely(need_align))
|
||||
v = (const uint64_t *)memcpy(&align, unaligned(v), 32);
|
||||
|
||||
uint64_t w0 = fetch64_le(v + 0);
|
||||
uint64_t w1 = fetch64_le(v + 1);
|
||||
uint64_t w2 = fetch64_le(v + 2);
|
||||
uint64_t w3 = fetch64_le(v + 3);
|
||||
|
||||
uint64_t d02 = w0 ^ rot64(w2 + d, 17);
|
||||
uint64_t c13 = w1 ^ rot64(w3 + c, 17);
|
||||
c += a ^ rot64(w0, 41);
|
||||
d -= b ^ rot64(w1, 31);
|
||||
a ^= prime_1 * (d02 + w3);
|
||||
b ^= prime_0 * (c13 + w2);
|
||||
data = (const uint64_t *)data + 4;
|
||||
} while (likely(data < detent));
|
||||
|
||||
a ^= prime_6 * (rot64(c, 17) + d);
|
||||
b ^= prime_5 * (c + rot64(d, 17));
|
||||
len &= 31;
|
||||
}
|
||||
|
||||
const uint64_t *v = (const uint64_t *)data;
|
||||
if (unlikely(need_align) && len > 8)
|
||||
v = (const uint64_t *)memcpy(&align, unaligned(v), len);
|
||||
|
||||
switch (len) {
|
||||
default:
|
||||
b += mux64(fetch64_le(v++), prime_4);
|
||||
/* fall through */
|
||||
case 24:
|
||||
case 23:
|
||||
case 22:
|
||||
case 21:
|
||||
case 20:
|
||||
case 19:
|
||||
case 18:
|
||||
case 17:
|
||||
a += mux64(fetch64_le(v++), prime_3);
|
||||
/* fall through */
|
||||
case 16:
|
||||
case 15:
|
||||
case 14:
|
||||
case 13:
|
||||
case 12:
|
||||
case 11:
|
||||
case 10:
|
||||
case 9:
|
||||
b += mux64(fetch64_le(v++), prime_2);
|
||||
/* fall through */
|
||||
case 8:
|
||||
case 7:
|
||||
case 6:
|
||||
case 5:
|
||||
case 4:
|
||||
case 3:
|
||||
case 2:
|
||||
case 1:
|
||||
a += mux64(tail64_le(v, len), prime_1);
|
||||
/* fall through */
|
||||
case 0:
|
||||
return final_weak_avalanche(a, b);
|
||||
if (need_copy4align) {
|
||||
T1HA1_BODY(le, aligned, true);
|
||||
} else {
|
||||
T1HA1_BODY(le, unaligned, false);
|
||||
}
|
||||
}
|
||||
|
||||
@ -140,76 +148,12 @@ uint64_t t1ha1_be(const void *data, size_t len, uint64_t seed) {
|
||||
uint64_t a = seed;
|
||||
uint64_t b = len;
|
||||
|
||||
const int need_align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK;
|
||||
const bool need_copy4align =
|
||||
(((uintptr_t)data) & (ALIGMENT_64 - 1)) != 0 && !UNALIGNED_OK;
|
||||
uint64_t align[4];
|
||||
|
||||
if (unlikely(len > 32)) {
|
||||
uint64_t c = rot64(len, 17) + seed;
|
||||
uint64_t d = len ^ rot64(seed, 17);
|
||||
const void *detent = (const uint8_t *)data + len - 31;
|
||||
do {
|
||||
const uint64_t *v = (const uint64_t *)data;
|
||||
if (unlikely(need_align))
|
||||
v = (const uint64_t *)memcpy(&align, unaligned(v), 32);
|
||||
|
||||
uint64_t w0 = fetch64_be(v + 0);
|
||||
uint64_t w1 = fetch64_be(v + 1);
|
||||
uint64_t w2 = fetch64_be(v + 2);
|
||||
uint64_t w3 = fetch64_be(v + 3);
|
||||
|
||||
uint64_t d02 = w0 ^ rot64(w2 + d, 17);
|
||||
uint64_t c13 = w1 ^ rot64(w3 + c, 17);
|
||||
c += a ^ rot64(w0, 41);
|
||||
d -= b ^ rot64(w1, 31);
|
||||
a ^= prime_1 * (d02 + w3);
|
||||
b ^= prime_0 * (c13 + w2);
|
||||
data = (const uint64_t *)data + 4;
|
||||
} while (likely(data < detent));
|
||||
|
||||
a ^= prime_6 * (rot64(c, 17) + d);
|
||||
b ^= prime_5 * (c + rot64(d, 17));
|
||||
len &= 31;
|
||||
if (need_copy4align) {
|
||||
T1HA1_BODY(be, aligned, true);
|
||||
} else {
|
||||
T1HA1_BODY(be, unaligned, false);
|
||||
}
|
||||
|
||||
const uint64_t *v = (const uint64_t *)data;
|
||||
if (unlikely(need_align) && len > 8)
|
||||
v = (const uint64_t *)memcpy(&align, unaligned(v), len);
|
||||
|
||||
switch (len) {
|
||||
default:
|
||||
b += mux64(fetch64_be(v++), prime_4);
|
||||
/* fall through */
|
||||
case 24:
|
||||
case 23:
|
||||
case 22:
|
||||
case 21:
|
||||
case 20:
|
||||
case 19:
|
||||
case 18:
|
||||
case 17:
|
||||
a += mux64(fetch64_be(v++), prime_3);
|
||||
/* fall through */
|
||||
case 16:
|
||||
case 15:
|
||||
case 14:
|
||||
case 13:
|
||||
case 12:
|
||||
case 11:
|
||||
case 10:
|
||||
case 9:
|
||||
b += mux64(fetch64_be(v++), prime_2);
|
||||
/* fall through */
|
||||
case 8:
|
||||
case 7:
|
||||
case 6:
|
||||
case 5:
|
||||
case 4:
|
||||
case 3:
|
||||
case 2:
|
||||
case 1:
|
||||
a += mux64(tail64_be(v, len), prime_1);
|
||||
/* fall through */
|
||||
case 0:
|
||||
return final_weak_avalanche(a, b);
|
||||
}
|
||||
}
|
||||
}
|
@ -56,128 +56,140 @@ static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x,
|
||||
s->n.d = ~y + rot64(x, 19);
|
||||
}
|
||||
|
||||
static __always_inline void update(t1ha_state256_t *__restrict s,
|
||||
const uint64_t *__restrict v) {
|
||||
uint64_t w0 = fetch64_le(v + 0);
|
||||
uint64_t w1 = fetch64_le(v + 1);
|
||||
uint64_t w2 = fetch64_le(v + 2);
|
||||
uint64_t w3 = fetch64_le(v + 3);
|
||||
|
||||
uint64_t d02 = w0 + rot64(w2 + s->n.d, 56);
|
||||
uint64_t c13 = w1 + rot64(w3 + s->n.c, 19);
|
||||
#ifdef __e2k__
|
||||
/* FIXME: temporary workaround for lcc's ELBRUS scheduling bug (LY) */
|
||||
s->n.c ^= s->n.a + rot64(w0, 57);
|
||||
s->n.d ^= s->n.b + rot64(w1, 38);
|
||||
#else
|
||||
s->n.d ^= s->n.b + rot64(w1, 38);
|
||||
s->n.c ^= s->n.a + rot64(w0, 57);
|
||||
#endif
|
||||
s->n.b ^= prime_6 * (c13 + w2);
|
||||
s->n.a ^= prime_5 * (d02 + w3);
|
||||
}
|
||||
/* TODO C++ template in the next version */
|
||||
#define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v) \
|
||||
do { \
|
||||
t1ha_state256_t *const s = state; \
|
||||
const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \
|
||||
const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \
|
||||
const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \
|
||||
const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \
|
||||
\
|
||||
const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56); \
|
||||
const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19); \
|
||||
s->n.d ^= s->n.b + rot64(w1, 38); \
|
||||
s->n.c ^= s->n.a + rot64(w0, 57); \
|
||||
s->n.b ^= prime_6 * (c13 + w2); \
|
||||
s->n.a ^= prime_5 * (d02 + w3); \
|
||||
} while (0)
|
||||
|
||||
static __always_inline void squash(t1ha_state256_t *s) {
|
||||
s->n.a ^= prime_6 * (s->n.c + rot64(s->n.d, 23));
|
||||
s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d);
|
||||
}
|
||||
|
||||
static __always_inline const void *
|
||||
loop(bool need_copy4align, uint64_t *__restrict buffer4align,
|
||||
t1ha_state256_t *__restrict s, const void *__restrict data, size_t len) {
|
||||
const void *detent = (const uint8_t *)data + len - 31;
|
||||
do {
|
||||
const uint64_t *v = (const uint64_t *)data;
|
||||
if (unlikely(need_copy4align))
|
||||
v = (const uint64_t *)memcpy(buffer4align, unaligned(v), 32);
|
||||
update(s, v);
|
||||
data = (const uint64_t *)data + 4;
|
||||
} while (likely(data < detent));
|
||||
return data;
|
||||
}
|
||||
/* TODO C++ template in the next version */
|
||||
#define T1HA2_LOOP(ENDIANNES, ALIGNESS, BUFFER4COPY, state, data, len) \
|
||||
do { \
|
||||
const void *detent = (const uint8_t *)data + len - 31; \
|
||||
do { \
|
||||
const uint64_t *v = (const uint64_t *)data; \
|
||||
if (BUFFER4COPY != NULL) \
|
||||
memcpy((void *)(v = BUFFER4COPY), data, 32); \
|
||||
T1HA2_UPDATE(le, unaligned, state, v); \
|
||||
data = (const uint64_t *)data + 4; \
|
||||
} while (likely(data < detent)); \
|
||||
} while (0)
|
||||
|
||||
static __always_inline void tail_ab(t1ha_state256_t *__restrict s,
|
||||
const uint64_t *__restrict v, size_t len) {
|
||||
switch (len) {
|
||||
default:
|
||||
mixup64(&s->n.a, &s->n.b, fetch64_le(v++), prime_4);
|
||||
/* fall through */
|
||||
case 24:
|
||||
case 23:
|
||||
case 22:
|
||||
case 21:
|
||||
case 20:
|
||||
case 19:
|
||||
case 18:
|
||||
case 17:
|
||||
mixup64(&s->n.b, &s->n.a, fetch64_le(v++), prime_3);
|
||||
/* fall through */
|
||||
case 16:
|
||||
case 15:
|
||||
case 14:
|
||||
case 13:
|
||||
case 12:
|
||||
case 11:
|
||||
case 10:
|
||||
case 9:
|
||||
mixup64(&s->n.a, &s->n.b, fetch64_le(v++), prime_2);
|
||||
/* fall through */
|
||||
case 8:
|
||||
case 7:
|
||||
case 6:
|
||||
case 5:
|
||||
case 4:
|
||||
case 3:
|
||||
case 2:
|
||||
case 1:
|
||||
mixup64(&s->n.b, &s->n.a, tail64_le(v, len), prime_1);
|
||||
/* fall through */
|
||||
case 0:
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* TODO C++ template in the next version */
|
||||
#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, BUFFER4COPY, state, data, len) \
|
||||
do { \
|
||||
t1ha_state256_t *const s = state; \
|
||||
const uint64_t *v = (const uint64_t *)data; \
|
||||
if (BUFFER4COPY != NULL) \
|
||||
memcpy((void *)(v = BUFFER4COPY), data, len); \
|
||||
switch (len) { \
|
||||
default: \
|
||||
mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
|
||||
prime_4); \
|
||||
/* fall through */ \
|
||||
case 24: \
|
||||
case 23: \
|
||||
case 22: \
|
||||
case 21: \
|
||||
case 20: \
|
||||
case 19: \
|
||||
case 18: \
|
||||
case 17: \
|
||||
mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
|
||||
prime_3); \
|
||||
/* fall through */ \
|
||||
case 16: \
|
||||
case 15: \
|
||||
case 14: \
|
||||
case 13: \
|
||||
case 12: \
|
||||
case 11: \
|
||||
case 10: \
|
||||
case 9: \
|
||||
mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
|
||||
prime_2); \
|
||||
/* fall through */ \
|
||||
case 8: \
|
||||
case 7: \
|
||||
case 6: \
|
||||
case 5: \
|
||||
case 4: \
|
||||
case 3: \
|
||||
case 2: \
|
||||
case 1: \
|
||||
mixup64(&s->n.b, &s->n.a, tail64_##ENDIANNES##_##ALIGNESS(v, len), \
|
||||
prime_1); \
|
||||
/* fall through */ \
|
||||
case 0: \
|
||||
return final64(s->n.a, s->n.b); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static __always_inline void tail_abcd(t1ha_state256_t *__restrict s,
|
||||
const uint64_t *__restrict v,
|
||||
size_t len) {
|
||||
switch (len) {
|
||||
default:
|
||||
mixup64(&s->n.a, &s->n.d, fetch64_le(v++), prime_4);
|
||||
/* fall through */
|
||||
case 24:
|
||||
case 23:
|
||||
case 22:
|
||||
case 21:
|
||||
case 20:
|
||||
case 19:
|
||||
case 18:
|
||||
case 17:
|
||||
mixup64(&s->n.b, &s->n.a, fetch64_le(v++), prime_3);
|
||||
/* fall through */
|
||||
case 16:
|
||||
case 15:
|
||||
case 14:
|
||||
case 13:
|
||||
case 12:
|
||||
case 11:
|
||||
case 10:
|
||||
case 9:
|
||||
mixup64(&s->n.c, &s->n.b, fetch64_le(v++), prime_2);
|
||||
/* fall through */
|
||||
case 8:
|
||||
case 7:
|
||||
case 6:
|
||||
case 5:
|
||||
case 4:
|
||||
case 3:
|
||||
case 2:
|
||||
case 1:
|
||||
mixup64(&s->n.d, &s->n.c, tail64_le(v, len), prime_1);
|
||||
/* fall through */
|
||||
case 0:
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* TODO C++ template in the next version */
|
||||
#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, BUFFER4COPY, state, data, len) \
|
||||
do { \
|
||||
t1ha_state256_t *const s = state; \
|
||||
const uint64_t *v = (const uint64_t *)data; \
|
||||
if (BUFFER4COPY != NULL) \
|
||||
memcpy((void *)(v = BUFFER4COPY), data, len); \
|
||||
switch (len) { \
|
||||
default: \
|
||||
mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
|
||||
prime_4); \
|
||||
/* fall through */ \
|
||||
case 24: \
|
||||
case 23: \
|
||||
case 22: \
|
||||
case 21: \
|
||||
case 20: \
|
||||
case 19: \
|
||||
case 18: \
|
||||
case 17: \
|
||||
mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
|
||||
prime_3); \
|
||||
/* fall through */ \
|
||||
case 16: \
|
||||
case 15: \
|
||||
case 14: \
|
||||
case 13: \
|
||||
case 12: \
|
||||
case 11: \
|
||||
case 10: \
|
||||
case 9: \
|
||||
mixup64(&s->n.c, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
|
||||
prime_2); \
|
||||
/* fall through */ \
|
||||
case 8: \
|
||||
case 7: \
|
||||
case 6: \
|
||||
case 5: \
|
||||
case 4: \
|
||||
case 3: \
|
||||
case 2: \
|
||||
case 1: \
|
||||
mixup64(&s->n.d, &s->n.c, tail64_##ENDIANNES##_##ALIGNESS(v, len), \
|
||||
prime_1); \
|
||||
/* fall through */ \
|
||||
case 0: \
|
||||
return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static __always_inline uint64_t final128(uint64_t a, uint64_t b, uint64_t c,
|
||||
uint64_t d, uint64_t *h) {
|
||||
@ -195,22 +207,26 @@ uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) {
|
||||
t1ha_state256_t state;
|
||||
init_ab(&state, seed, length);
|
||||
|
||||
const int need_copy4align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK;
|
||||
uint64_t buffer4align[4];
|
||||
|
||||
if (unlikely(length > 32)) {
|
||||
init_cd(&state, seed, length);
|
||||
data = loop(need_copy4align, buffer4align, &state, data, length);
|
||||
squash(&state);
|
||||
length &= 31;
|
||||
const bool need_copy4align =
|
||||
(((uintptr_t)data) & (ALIGMENT_64 - 1)) != 0 && !UNALIGNED_OK;
|
||||
if (need_copy4align) {
|
||||
uint64_t buffer4align[4];
|
||||
if (unlikely(length > 32)) {
|
||||
init_cd(&state, seed, length);
|
||||
T1HA2_LOOP(le, aligned, buffer4align, &state, data, length);
|
||||
squash(&state);
|
||||
length &= 31;
|
||||
}
|
||||
T1HA2_TAIL_AB(le, aligned, buffer4align, &state, data, length);
|
||||
} else {
|
||||
if (unlikely(length > 32)) {
|
||||
init_cd(&state, seed, length);
|
||||
T1HA2_LOOP(le, unaligned, NULL, &state, data, length);
|
||||
squash(&state);
|
||||
length &= 31;
|
||||
}
|
||||
T1HA2_TAIL_AB(le, unaligned, NULL, &state, data, length);
|
||||
}
|
||||
|
||||
const uint64_t *v = (const uint64_t *)data;
|
||||
if (unlikely(need_copy4align) && length > 8)
|
||||
v = (const uint64_t *)memcpy(&buffer4align, unaligned(v), length);
|
||||
|
||||
tail_ab(&state, v, length);
|
||||
return final64(state.n.a, state.n.b);
|
||||
}
|
||||
|
||||
uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result,
|
||||
@ -220,20 +236,22 @@ uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result,
|
||||
init_ab(&state, seed, length);
|
||||
init_cd(&state, seed, length);
|
||||
|
||||
const int need_copy4align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK;
|
||||
uint64_t buffer4align[4];
|
||||
|
||||
if (unlikely(length > 32)) {
|
||||
data = loop(need_copy4align, buffer4align, &state, data, length);
|
||||
length &= 31;
|
||||
const bool need_copy4align =
|
||||
(((uintptr_t)data) & (ALIGMENT_64 - 1)) != 0 && !UNALIGNED_OK;
|
||||
if (need_copy4align) {
|
||||
uint64_t buffer4align[4];
|
||||
if (unlikely(length > 32)) {
|
||||
T1HA2_LOOP(le, aligned, buffer4align, &state, data, length);
|
||||
length &= 31;
|
||||
}
|
||||
T1HA2_TAIL_ABCD(le, aligned, buffer4align, &state, data, length);
|
||||
} else {
|
||||
if (unlikely(length > 32)) {
|
||||
T1HA2_LOOP(le, unaligned, NULL, &state, data, length);
|
||||
length &= 31;
|
||||
}
|
||||
T1HA2_TAIL_ABCD(le, unaligned, NULL, &state, data, length);
|
||||
}
|
||||
|
||||
const uint64_t *v = (const uint64_t *)data;
|
||||
if (unlikely(need_copy4align) && length > 8)
|
||||
v = (const uint64_t *)memcpy(&buffer4align, unaligned(v), length);
|
||||
|
||||
tail_abcd(&state, v, length);
|
||||
return final128(state.n.a, state.n.b, state.n.c, state.n.d, extra_result);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -252,7 +270,7 @@ void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data,
|
||||
if (ctx->partial) {
|
||||
const size_t left = 32 - ctx->partial;
|
||||
const size_t chunk = (length >= left) ? left : length;
|
||||
memcpy(ctx->buffer.bytes + ctx->partial, unaligned(data), chunk);
|
||||
memcpy(ctx->buffer.bytes + ctx->partial, data, chunk);
|
||||
ctx->partial += chunk;
|
||||
if (ctx->partial < 32) {
|
||||
assert(left >= length);
|
||||
@ -261,37 +279,41 @@ void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data,
|
||||
ctx->partial = 0;
|
||||
data = (const uint8_t *)data + chunk;
|
||||
length -= chunk;
|
||||
update(&ctx->state, ctx->buffer.u64);
|
||||
T1HA2_UPDATE(le, aligned, &ctx->state, ctx->buffer.u64);
|
||||
}
|
||||
|
||||
if (length >= 32) {
|
||||
const bool need_copy4align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK;
|
||||
if (need_copy4align)
|
||||
data = loop(true, ctx->buffer.u64, &ctx->state, data, length);
|
||||
else
|
||||
data = loop(false, NULL, &ctx->state, data, length);
|
||||
const bool need_copy4align =
|
||||
(((uintptr_t)data) & (ALIGMENT_64 - 1)) != 0 && !UNALIGNED_OK;
|
||||
if (need_copy4align) {
|
||||
T1HA2_LOOP(le, aligned, ctx->buffer.u64, &ctx->state, data, length);
|
||||
} else {
|
||||
T1HA2_LOOP(le, unaligned, NULL, &ctx->state, data, length);
|
||||
}
|
||||
length &= 31;
|
||||
}
|
||||
|
||||
if (length)
|
||||
memcpy(ctx->buffer.bytes, unaligned(data), ctx->partial = length);
|
||||
memcpy(ctx->buffer.bytes, data, ctx->partial = length);
|
||||
}
|
||||
|
||||
uint64_t t1ha2_final(t1ha_context_t *__restrict ctx,
|
||||
uint64_t *__restrict extra_result) {
|
||||
uint64_t bytes = (ctx->total << 3) ^ (UINT64_C(1) << 63);
|
||||
uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63);
|
||||
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
|
||||
bytes = bswap64(bytes);
|
||||
bits = bswap64(bits);
|
||||
#endif
|
||||
t1ha2_update(ctx, &bytes, 8);
|
||||
t1ha2_update(ctx, &bits, 8);
|
||||
|
||||
if (likely(!extra_result)) {
|
||||
squash(&ctx->state);
|
||||
tail_ab(&ctx->state, ctx->buffer.u64, ctx->partial);
|
||||
T1HA2_TAIL_AB(le, aligned, NULL, &ctx->state, ctx->buffer.u64,
|
||||
ctx->partial);
|
||||
return final64(ctx->state.n.a, ctx->state.n.b);
|
||||
}
|
||||
|
||||
tail_abcd(&ctx->state, ctx->buffer.u64, ctx->partial);
|
||||
T1HA2_TAIL_ABCD(le, aligned, NULL, &ctx->state, ctx->buffer.u64,
|
||||
ctx->partial);
|
||||
return final128(ctx->state.n.a, ctx->state.n.b, ctx->state.n.c,
|
||||
ctx->state.n.d, extra_result);
|
||||
}
|
||||
}
|
@ -86,6 +86,14 @@
|
||||
#define PAGESIZE 4096
|
||||
#endif /* PAGESIZE */
|
||||
|
||||
#define ALIGMENT_16 2
|
||||
#define ALIGMENT_32 4
|
||||
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul
|
||||
#define ALIGMENT_64 8
|
||||
#else
|
||||
#define ALIGMENT_64 4
|
||||
#endif
|
||||
|
||||
/***************************************************************************/
|
||||
|
||||
#ifndef __has_builtin
|
||||
@ -160,10 +168,8 @@ static __maybe_unused __always_inline void e2k_add64carry_last(unsigned carry,
|
||||
e2k_add64carry_last(carry, base, addend, sum)
|
||||
#endif /* __iset__ >= 5 */
|
||||
|
||||
#if 0 /* LY: unreasonable, because alignment is required :( */
|
||||
#define fetch64_be(ptr) ((uint64_t)__builtin_e2k_ld_64s_be(ptr))
|
||||
#define fetch32_be(ptr) ((uint32_t)__builtin_e2k_ld_32u_be(ptr))
|
||||
#endif
|
||||
#define fetch64_be_aligned(ptr) ((uint64_t)__builtin_e2k_ld_64s_be(ptr))
|
||||
#define fetch32_be_aligned(ptr) ((uint32_t)__builtin_e2k_ld_32u_be(ptr))
|
||||
|
||||
#endif /* __e2k__ Elbrus */
|
||||
|
||||
@ -337,86 +343,174 @@ static __always_inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; }
|
||||
#endif
|
||||
#endif /* bswap16 */
|
||||
|
||||
#ifndef unaligned
|
||||
#if defined(__LCC__)
|
||||
#pragma diag_suppress wrong_entity_for_attribute
|
||||
#define unaligned(ptr) ((const char __attribute__((packed, aligned(1))) *)(ptr))
|
||||
#elif defined(__clang__)
|
||||
#pragma clang diagnostic ignored "-Wignored-attributes"
|
||||
#define unaligned(ptr) ((const char __attribute__((packed, aligned(1))) *)(ptr))
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC diagnostic ignored "-Wpacked"
|
||||
#define unaligned(ptr) ((const char __attribute__((packed, aligned(1))) *)(ptr))
|
||||
#if defined(__GNUC__) || (__has_attribute(packed) && __has_attribute(aligned))
|
||||
typedef struct {
|
||||
uint8_t unaligned_8;
|
||||
uint16_t unaligned_16;
|
||||
uint32_t unaligned_32;
|
||||
uint64_t unaligned_64;
|
||||
} __attribute__((packed, aligned(1))) t1ha_unaligned_proxy;
|
||||
#define read_unaligned(ptr, bits) \
|
||||
(((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \
|
||||
t1ha_unaligned_proxy, unaligned_##bits))) \
|
||||
->unaligned_##bits)
|
||||
#define read_aligned(ptr, bits) \
|
||||
(*(const __attribute__((aligned(ALIGMENT_##bits))) uint##bits##_t *)(ptr))
|
||||
#elif defined(_MSC_VER)
|
||||
#pragma warning( \
|
||||
disable : 4235) /* nonstandard extension used: '__unaligned' \
|
||||
* keyword not supported on this architecture */
|
||||
#define unaligned(ptr) ((const char __unaligned *)(ptr))
|
||||
#define read_unaligned(ptr, bits) (*(const __unaligned uint##bits##_t *)(ptr))
|
||||
#define read_aligned(ptr, bits) \
|
||||
(*(const __declspec(align(ALIGMENT_##bits)) uint##bits##_t *)(ptr))
|
||||
#else
|
||||
#define unaligned(ptr) ((const char *)(ptr))
|
||||
#error FIXME
|
||||
#define read_unaligned(ptr, bits) \
|
||||
(*(const uint##bits##_ *)((const char *)(ptr)))
|
||||
#define read_aligned(ptr, bits) (*(const uint##bits##_t *)(ptr))
|
||||
#endif /* read_unaligned */
|
||||
|
||||
#if 0
|
||||
#ifndef DECLARE_UNALIGNED_PTR
|
||||
#if defined(__LCC__)
|
||||
#pragma diag_suppress wrong_entity_for_attribute
|
||||
#define DECLARE_UNALIGNED_PTR(ptr) char __attribute__((packed)) * ptr
|
||||
#elif defined(__clang__)
|
||||
#pragma clang diagnostic ignored "-Wignored-attributes"
|
||||
#define DECLARE_UNALIGNED_PTR(ptr) char __attribute__((packed)) * ptr
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC diagnostic ignored "-Wpacked"
|
||||
#define DECLARE_UNALIGNED_PTR(ptr) char __attribute__((packed)) * ptr
|
||||
#elif defined(_MSC_VER)
|
||||
#pragma warning( \
|
||||
disable : 4235) /* nonstandard extension used: '__unaligned' \
|
||||
* keyword not supported on this architecture */
|
||||
#define DECLARE_UNALIGNED_PTR(ptr) char __unaligned *ptr
|
||||
#else
|
||||
#define DECLARE_UNALIGNED_PTR(ptr) char *ptr
|
||||
#endif
|
||||
#endif /* unaligned */
|
||||
#endif /* cast_unaligned */
|
||||
|
||||
#ifndef cast_unaligned
|
||||
#if defined(__LCC__)
|
||||
#pragma diag_suppress wrong_entity_for_attribute
|
||||
#define cast_unaligned(ptr) ((const char __attribute__((packed)) *)(ptr))
|
||||
#elif defined(__clang__)
|
||||
#pragma clang diagnostic ignored "-Wignored-attributes"
|
||||
#define cast_unaligned(ptr) ((const char __attribute__((packed)) *)(ptr))
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC diagnostic ignored "-Wpacked"
|
||||
#define cast_unaligned(ptr) ((const char __attribute__((packed)) *)(ptr))
|
||||
#elif defined(_MSC_VER)
|
||||
#pragma warning( \
|
||||
disable : 4235) /* nonstandard extension used: '__unaligned' \
|
||||
* keyword not supported on this architecture */
|
||||
#define cast_unaligned(ptr) ((const char __unaligned *)(ptr))
|
||||
#else
|
||||
#define cast_unaligned(ptr) ((const char *)(ptr))
|
||||
#endif
|
||||
#endif /* cast_unaligned */
|
||||
|
||||
#ifndef cast_aligned
|
||||
#if defined(__LCC__)
|
||||
#pragma diag_suppress wrong_entity_for_attribute
|
||||
#define cast_aligned(type, ptr) \
|
||||
((const type __attribute__((aligned(sizeof(type)))) *)(ptr))
|
||||
#elif defined(__clang__)
|
||||
#pragma clang diagnostic ignored "-Wignored-attributes"
|
||||
#define cast_aligned(type, ptr) \
|
||||
((const type __attribute__((aligned(sizeof(type)))) *)(ptr))
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC diagnostic ignored "-Wpacked"
|
||||
#define cast_aligned(type, ptr) \
|
||||
((const type __attribute__((aligned(sizeof(type)))) *)(ptr))
|
||||
#elif defined(_MSC_VER)
|
||||
#define cast_aligned(type, ptr) \
|
||||
((const type __declspec((align(sizeof(type)))) *)(ptr))
|
||||
#else
|
||||
#define cast_aligned(type, ptr) ((const type *)(ptr))
|
||||
#endif
|
||||
#endif /* cast_aligned */
|
||||
#endif /* 0 */
|
||||
|
||||
/***************************************************************************/
|
||||
|
||||
#ifndef fetch64_le
|
||||
static __always_inline uint64_t fetch64_le(const void *v) {
|
||||
/*---------------------------------------------------------- Little Endian */
|
||||
|
||||
#ifndef fetch64_le_aligned
|
||||
static __always_inline uint64_t fetch64_le_aligned(const void *v) {
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
return *(const uint64_t *)v;
|
||||
return read_aligned(v, 64);
|
||||
#else
|
||||
return bswap64(*(const uint64_t *)v);
|
||||
return bswap64(read_aligned(v, 64));
|
||||
#endif
|
||||
}
|
||||
#endif /* fetch64_le */
|
||||
#endif /* fetch64_le_aligned */
|
||||
|
||||
#ifndef fetch32_le
|
||||
static __always_inline uint32_t fetch32_le(const void *v) {
|
||||
#ifndef fetch64_le_unaligned
|
||||
static __always_inline uint64_t fetch64_le_unaligned(const void *v) {
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
return *(const uint32_t *)v;
|
||||
return read_unaligned(v, 64);
|
||||
#else
|
||||
return bswap32(*(const uint32_t *)v);
|
||||
return bswap64(read_unaligned(v, 64));
|
||||
#endif
|
||||
}
|
||||
#endif /* fetch32_le */
|
||||
#endif /* fetch64_le_unaligned */
|
||||
|
||||
#ifndef fetch16_le
|
||||
static __always_inline uint16_t fetch16_le(const void *v) {
|
||||
#ifndef fetch32_le_aligned
|
||||
static __always_inline uint32_t fetch32_le_aligned(const void *v) {
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
return *(const uint16_t *)v;
|
||||
return read_aligned(v, 32);
|
||||
#else
|
||||
return bswap16(*(const uint16_t *)v);
|
||||
return bswap32(read_aligned(v, 32));
|
||||
#endif
|
||||
}
|
||||
#endif /* fetch16_le */
|
||||
#endif /* fetch32_le_aligned */
|
||||
|
||||
#if T1HA_USE_FAST_ONESHOT_READ && UNALIGNED_OK && defined(PAGESIZE) && \
|
||||
!defined(__SANITIZE_ADDRESS__) && !defined(__sun)
|
||||
#define can_read_underside(ptr, size) \
|
||||
((size) <= sizeof(uintptr_t) && ((PAGESIZE - (size)) & (uintptr_t)(ptr)) != 0)
|
||||
#endif /* can_fast_read */
|
||||
#ifndef fetch32_le_unaligned
|
||||
static __always_inline uint32_t fetch32_le_unaligned(const void *v) {
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
return read_unaligned(v, 32);
|
||||
#else
|
||||
return bswap32(read_unaligned(v, 32));
|
||||
#endif
|
||||
}
|
||||
#endif /* fetch32_le_unaligned */
|
||||
|
||||
static __always_inline uint64_t tail64_le(const void *v, size_t tail) {
|
||||
const uint8_t *p = (const uint8_t *)v;
|
||||
#ifdef can_read_underside
|
||||
/* On some systems (e.g. x86) we can perform a 'oneshot' read, which
|
||||
* is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com>
|
||||
* for the reminder. */
|
||||
const unsigned offset = (8 - tail) & 7;
|
||||
const unsigned shift = offset << 3;
|
||||
if (likely(can_read_underside(p, 8))) {
|
||||
p -= offset;
|
||||
return fetch64_le(p) >> shift;
|
||||
}
|
||||
return fetch64_le(p) & ((~UINT64_C(0)) >> shift);
|
||||
#ifndef fetch16_le_aligned
|
||||
static __always_inline uint16_t fetch16_le_aligned(const void *v) {
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
return read_aligned(v, 16);
|
||||
#else
|
||||
return bswap16(read_aligned(v, 16));
|
||||
#endif
|
||||
}
|
||||
#endif /* fetch16_le_aligned */
|
||||
|
||||
#ifndef fetch16_le_unaligned
|
||||
static __always_inline uint16_t fetch16_le_unaligned(const void *v) {
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
return read_unaligned(v, 16);
|
||||
#else
|
||||
return bswap16(read_unaligned(v, 16));
|
||||
#endif
|
||||
}
|
||||
#endif /* fetch16_le_unaligned */
|
||||
|
||||
static __always_inline uint64_t tail64_le_aligned(const void *v, size_t tail) {
|
||||
const uint8_t *const p = (const uint8_t *)v;
|
||||
#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__)
|
||||
/* We can perform a 'oneshot' read, which is little bit faster. */
|
||||
const unsigned shift = ((8 - tail) & 7) << 3;
|
||||
return fetch64_le_aligned(p) & ((~UINT64_C(0)) >> shift);
|
||||
#endif /* 'oneshot' read */
|
||||
|
||||
uint64_t r = 0;
|
||||
switch (tail & 7) {
|
||||
#if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
/* For most CPUs this code is better when not needed
|
||||
* copying for alignment or byte reordering. */
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
/* For most CPUs this code is better when not needed byte reordering. */
|
||||
case 0:
|
||||
return fetch64_le(p);
|
||||
return fetch64_le_aligned(p);
|
||||
case 7:
|
||||
r = (uint64_t)p[6] << 8;
|
||||
/* fall through */
|
||||
@ -429,12 +523,96 @@ static __always_inline uint64_t tail64_le(const void *v, size_t tail) {
|
||||
r <<= 32;
|
||||
/* fall through */
|
||||
case 4:
|
||||
return r + fetch32_le(p);
|
||||
return r + fetch32_le_aligned(p);
|
||||
case 3:
|
||||
r = (uint64_t)p[2] << 16;
|
||||
/* fall through */
|
||||
case 2:
|
||||
return r + fetch16_le(p);
|
||||
return r + fetch16_le_aligned(p);
|
||||
case 1:
|
||||
return p[0];
|
||||
#else
|
||||
case 0:
|
||||
r = p[7] << 8;
|
||||
/* fall through */
|
||||
case 7:
|
||||
r += p[6];
|
||||
r <<= 8;
|
||||
/* fall through */
|
||||
case 6:
|
||||
r += p[5];
|
||||
r <<= 8;
|
||||
/* fall through */
|
||||
case 5:
|
||||
r += p[4];
|
||||
r <<= 8;
|
||||
/* fall through */
|
||||
case 4:
|
||||
r += p[3];
|
||||
r <<= 8;
|
||||
/* fall through */
|
||||
case 3:
|
||||
r += p[2];
|
||||
r <<= 8;
|
||||
/* fall through */
|
||||
case 2:
|
||||
r += p[1];
|
||||
r <<= 8;
|
||||
/* fall through */
|
||||
case 1:
|
||||
return r + p[0];
|
||||
#endif
|
||||
}
|
||||
unreachable();
|
||||
}
|
||||
|
||||
#if T1HA_USE_FAST_ONESHOT_READ && UNALIGNED_OK && defined(PAGESIZE) && \
|
||||
!defined(__SANITIZE_ADDRESS__) && !defined(__sun)
|
||||
#define can_read_underside(ptr, size) \
|
||||
((size) <= sizeof(uintptr_t) && ((PAGESIZE - (size)) & (uintptr_t)(ptr)) != 0)
|
||||
#endif /* can_fast_read */
|
||||
|
||||
static __always_inline uint64_t tail64_le_unaligned(const void *v,
|
||||
size_t tail) {
|
||||
const uint8_t *p = (const uint8_t *)v;
|
||||
#ifdef can_read_underside
|
||||
/* On some systems (e.g. x86) we can perform a 'oneshot' read, which
|
||||
* is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com>
|
||||
* for the reminder. */
|
||||
const unsigned offset = (8 - tail) & 7;
|
||||
const unsigned shift = offset << 3;
|
||||
if (likely(can_read_underside(p, 8))) {
|
||||
p -= offset;
|
||||
return fetch64_le_unaligned(p) >> shift;
|
||||
}
|
||||
return fetch64_le_unaligned(p) & ((~UINT64_C(0)) >> shift);
|
||||
#endif /* 'oneshot' read */
|
||||
|
||||
uint64_t r = 0;
|
||||
switch (tail & 7) {
|
||||
#if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
/* For most CPUs this code is better when not needed
|
||||
* copying for alignment or byte reordering. */
|
||||
case 0:
|
||||
return fetch64_le_unaligned(p);
|
||||
case 7:
|
||||
r = (uint64_t)p[6] << 8;
|
||||
/* fall through */
|
||||
case 6:
|
||||
r += p[5];
|
||||
r <<= 8;
|
||||
/* fall through */
|
||||
case 5:
|
||||
r += p[4];
|
||||
r <<= 32;
|
||||
/* fall through */
|
||||
case 4:
|
||||
return r + fetch32_le_unaligned(p);
|
||||
case 3:
|
||||
r = (uint64_t)p[2] << 16;
|
||||
/* fall through */
|
||||
case 2:
|
||||
return r + fetch16_le_unaligned(p);
|
||||
case 1:
|
||||
return p[0];
|
||||
#else
|
||||
@ -474,38 +652,134 @@ static __always_inline uint64_t tail64_le(const void *v, size_t tail) {
|
||||
unreachable();
|
||||
}
|
||||
|
||||
#ifndef fetch64_be
|
||||
static __maybe_unused __always_inline uint64_t fetch64_be(const void *v) {
|
||||
/*------------------------------------------------------------- Big Endian */
|
||||
|
||||
#ifndef fetch64_be_aligned
|
||||
static __maybe_unused __always_inline uint64_t
|
||||
fetch64_be_aligned(const void *v) {
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
return *(const uint64_t *)v;
|
||||
return read_aligned(v, 64);
|
||||
#else
|
||||
return bswap64(*(const uint64_t *)v);
|
||||
return bswap64(read_aligned(v, 64));
|
||||
#endif
|
||||
}
|
||||
#endif /* fetch64_be */
|
||||
#endif /* fetch64_be_aligned */
|
||||
|
||||
#ifndef fetch32_be
|
||||
static __maybe_unused __always_inline uint32_t fetch32_be(const void *v) {
|
||||
#ifndef fetch64_be_unaligned
|
||||
static __maybe_unused __always_inline uint64_t
|
||||
fetch64_be_unaligned(const void *v) {
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
return *(const uint32_t *)v;
|
||||
return read_unaligned(v, 64);
|
||||
#else
|
||||
return bswap32(*(const uint32_t *)v);
|
||||
return bswap64(read_unaligned(v, 64));
|
||||
#endif
|
||||
}
|
||||
#endif /* fetch32_be */
|
||||
#endif /* fetch64_be_unaligned */
|
||||
|
||||
#ifndef fetch16_be
|
||||
static __maybe_unused __always_inline uint16_t fetch16_be(const void *v) {
|
||||
#ifndef fetch32_be_aligned
|
||||
static __maybe_unused __always_inline uint32_t
|
||||
fetch32_be_aligned(const void *v) {
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
return *(const uint16_t *)v;
|
||||
return read_aligned(v, 32);
|
||||
#else
|
||||
return bswap16(*(const uint16_t *)v);
|
||||
return bswap32(read_aligned(v, 32));
|
||||
#endif
|
||||
}
|
||||
#endif /* fetch16_be */
|
||||
#endif /* fetch32_be_aligned */
|
||||
|
||||
static __maybe_unused __always_inline uint64_t tail64_be(const void *v,
|
||||
size_t tail) {
|
||||
#ifndef fetch32_be_unaligned
|
||||
static __maybe_unused __always_inline uint32_t
|
||||
fetch32_be_unaligned(const void *v) {
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
return read_unaligned(v, 32);
|
||||
#else
|
||||
return bswap32(read_unaligned(v, 32));
|
||||
#endif
|
||||
}
|
||||
#endif /* fetch32_be_unaligned */
|
||||
|
||||
#ifndef fetch16_be_aligned
|
||||
static __maybe_unused __always_inline uint16_t
|
||||
fetch16_be_aligned(const void *v) {
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
return read_aligned(v, 16);
|
||||
#else
|
||||
return bswap16(read_aligned(v, 16));
|
||||
#endif
|
||||
}
|
||||
#endif /* fetch16_be_aligned */
|
||||
|
||||
#ifndef fetch16_be_unaligned
|
||||
static __maybe_unused __always_inline uint16_t
|
||||
fetch16_be_unaligned(const void *v) {
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
return read_unaligned(v, 16);
|
||||
#else
|
||||
return bswap16(read_unaligned(v, 16));
|
||||
#endif
|
||||
}
|
||||
#endif /* fetch16_be_unaligned */
|
||||
|
||||
static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v,
|
||||
size_t tail) {
|
||||
const uint8_t *const p = (const uint8_t *)v;
|
||||
#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__)
|
||||
/* We can perform a 'oneshot' read, which is little bit faster. */
|
||||
const unsigned shift = ((8 - tail) & 7) << 3;
|
||||
return fetch64_be_aligned(p) >> shift;
|
||||
#endif /* 'oneshot' read */
|
||||
|
||||
switch (tail & 7) {
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
/* For most CPUs this code is better when not byte reordering. */
|
||||
case 1:
|
||||
return p[0];
|
||||
case 2:
|
||||
return fetch16_be_aligned(p);
|
||||
case 3:
|
||||
return (uint32_t)fetch16_be_aligned(p) << 8 | p[2];
|
||||
case 4:
|
||||
return fetch32_be_aligned(p);
|
||||
case 5:
|
||||
return (uint64_t)fetch32_be_aligned(p) << 8 | p[4];
|
||||
case 6:
|
||||
return (uint64_t)fetch32_be_aligned(p) << 16 | fetch16_be_aligned(p + 4);
|
||||
case 7:
|
||||
return (uint64_t)fetch32_be_aligned(p) << 24 |
|
||||
(uint32_t)fetch16_be_aligned(p + 4) << 8 | p[6];
|
||||
case 0:
|
||||
return fetch64_be(p);
|
||||
#else
|
||||
case 1:
|
||||
return p[0];
|
||||
case 2:
|
||||
return p[1] | (uint32_t)p[0] << 8;
|
||||
case 3:
|
||||
return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16;
|
||||
case 4:
|
||||
return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 |
|
||||
(uint32_t)p[0] << 24;
|
||||
case 5:
|
||||
return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 |
|
||||
(uint32_t)p[1] << 24 | (uint64_t)p[0] << 32;
|
||||
case 6:
|
||||
return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 |
|
||||
(uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40;
|
||||
case 7:
|
||||
return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 |
|
||||
(uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 |
|
||||
(uint64_t)p[0] << 48;
|
||||
case 0:
|
||||
return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 |
|
||||
(uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 |
|
||||
(uint64_t)p[1] << 48 | (uint64_t)p[0] << 56;
|
||||
#endif
|
||||
}
|
||||
unreachable();
|
||||
}
|
||||
|
||||
static __maybe_unused __always_inline uint64_t
|
||||
tail64_be_unaligned(const void *v, size_t tail) {
|
||||
const uint8_t *p = (const uint8_t *)v;
|
||||
#ifdef can_read_underside
|
||||
/* On some systems we can perform a 'oneshot' read, which is little bit
|
||||
@ -515,9 +789,9 @@ static __maybe_unused __always_inline uint64_t tail64_be(const void *v,
|
||||
const unsigned shift = offset << 3;
|
||||
if (likely(can_read_underside(p, 8))) {
|
||||
p -= offset;
|
||||
return fetch64_be(p) & ((~UINT64_C(0)) >> shift);
|
||||
return fetch64_be_unaligned(p) & ((~UINT64_C(0)) >> shift);
|
||||
}
|
||||
return fetch64_be(p) >> shift;
|
||||
return fetch64_be_unaligned(p) >> shift;
|
||||
#endif /* 'oneshot' read */
|
||||
|
||||
switch (tail & 7) {
|
||||
@ -527,20 +801,21 @@ static __maybe_unused __always_inline uint64_t tail64_be(const void *v,
|
||||
case 1:
|
||||
return p[0];
|
||||
case 2:
|
||||
return fetch16_be(p);
|
||||
return fetch16_be_unaligned(p);
|
||||
case 3:
|
||||
return (uint32_t)fetch16_be(p) << 8 | p[2];
|
||||
return (uint32_t)fetch16_be_unaligned(p) << 8 | p[2];
|
||||
case 4:
|
||||
return fetch32_be(p);
|
||||
case 5:
|
||||
return (uint64_t)fetch32_be(p) << 8 | p[4];
|
||||
return (uint64_t)fetch32_be_unaligned(p) << 8 | p[4];
|
||||
case 6:
|
||||
return (uint64_t)fetch32_be(p) << 16 | fetch16_be(p + 4);
|
||||
return (uint64_t)fetch32_be_unaligned(p) << 16 |
|
||||
fetch16_be_unaligned(p + 4);
|
||||
case 7:
|
||||
return (uint64_t)fetch32_be(p) << 24 | (uint32_t)fetch16_be(p + 4) << 8 |
|
||||
p[6];
|
||||
return (uint64_t)fetch32_be_unaligned(p) << 24 |
|
||||
(uint32_t)fetch16_be_unaligned(p + 4) << 8 | p[6];
|
||||
case 0:
|
||||
return fetch64_be(p);
|
||||
return fetch64_be_unaligned(p);
|
||||
#else
|
||||
/* For most CPUs this code is better than a
|
||||
* copying for alignment and/or byte reordering. */
|
||||
|
Loading…
Reference in New Issue
Block a user