aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/t1ha/t1ha2.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/t1ha/t1ha2.c')
-rw-r--r--contrib/t1ha/t1ha2.c334
1 files changed, 178 insertions, 156 deletions
diff --git a/contrib/t1ha/t1ha2.c b/contrib/t1ha/t1ha2.c
index f87e8bb82..1f5a9d905 100644
--- a/contrib/t1ha/t1ha2.c
+++ b/contrib/t1ha/t1ha2.c
@@ -56,128 +56,140 @@ static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x,
s->n.d = ~y + rot64(x, 19);
}
-static __always_inline void update(t1ha_state256_t *__restrict s,
- const uint64_t *__restrict v) {
- uint64_t w0 = fetch64_le(v + 0);
- uint64_t w1 = fetch64_le(v + 1);
- uint64_t w2 = fetch64_le(v + 2);
- uint64_t w3 = fetch64_le(v + 3);
-
- uint64_t d02 = w0 + rot64(w2 + s->n.d, 56);
- uint64_t c13 = w1 + rot64(w3 + s->n.c, 19);
-#ifdef __e2k__
- /* FIXME: temporary workaround for lcc's ELBRUS scheduling bug (LY) */
- s->n.c ^= s->n.a + rot64(w0, 57);
- s->n.d ^= s->n.b + rot64(w1, 38);
-#else
- s->n.d ^= s->n.b + rot64(w1, 38);
- s->n.c ^= s->n.a + rot64(w0, 57);
-#endif
- s->n.b ^= prime_6 * (c13 + w2);
- s->n.a ^= prime_5 * (d02 + w3);
-}
+/* TODO C++ template in the next version */
+#define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v) \
+ do { \
+ t1ha_state256_t *const s = state; \
+ const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \
+ const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \
+ const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \
+ const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \
+ \
+ const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56); \
+ const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19); \
+ s->n.d ^= s->n.b + rot64(w1, 38); \
+ s->n.c ^= s->n.a + rot64(w0, 57); \
+ s->n.b ^= prime_6 * (c13 + w2); \
+ s->n.a ^= prime_5 * (d02 + w3); \
+ } while (0)
static __always_inline void squash(t1ha_state256_t *s) {
s->n.a ^= prime_6 * (s->n.c + rot64(s->n.d, 23));
s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d);
}
-static __always_inline const void *
-loop(bool need_copy4align, uint64_t *__restrict buffer4align,
- t1ha_state256_t *__restrict s, const void *__restrict data, size_t len) {
- const void *detent = (const uint8_t *)data + len - 31;
- do {
- const uint64_t *v = (const uint64_t *)data;
- if (unlikely(need_copy4align))
- v = (const uint64_t *)memcpy(buffer4align, unaligned(v), 32);
- update(s, v);
- data = (const uint64_t *)data + 4;
- } while (likely(data < detent));
- return data;
-}
+/* TODO C++ template in the next version */
+#define T1HA2_LOOP(ENDIANNES, ALIGNESS, BUFFER4COPY, state, data, len) \
+ do { \
+ const void *detent = (const uint8_t *)data + len - 31; \
+ do { \
+ const uint64_t *v = (const uint64_t *)data; \
+ if (BUFFER4COPY != NULL) \
+ memcpy((void *)(v = BUFFER4COPY), data, 32); \
+ T1HA2_UPDATE(le, unaligned, state, v); \
+ data = (const uint64_t *)data + 4; \
+ } while (likely(data < detent)); \
+ } while (0)
-static __always_inline void tail_ab(t1ha_state256_t *__restrict s,
- const uint64_t *__restrict v, size_t len) {
- switch (len) {
- default:
- mixup64(&s->n.a, &s->n.b, fetch64_le(v++), prime_4);
- /* fall through */
- case 24:
- case 23:
- case 22:
- case 21:
- case 20:
- case 19:
- case 18:
- case 17:
- mixup64(&s->n.b, &s->n.a, fetch64_le(v++), prime_3);
- /* fall through */
- case 16:
- case 15:
- case 14:
- case 13:
- case 12:
- case 11:
- case 10:
- case 9:
- mixup64(&s->n.a, &s->n.b, fetch64_le(v++), prime_2);
- /* fall through */
- case 8:
- case 7:
- case 6:
- case 5:
- case 4:
- case 3:
- case 2:
- case 1:
- mixup64(&s->n.b, &s->n.a, tail64_le(v, len), prime_1);
- /* fall through */
- case 0:
- return;
- }
-}
+/* TODO C++ template in the next version */
+#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, BUFFER4COPY, state, data, len) \
+ do { \
+ t1ha_state256_t *const s = state; \
+ const uint64_t *v = (const uint64_t *)data; \
+ if (BUFFER4COPY != NULL) \
+ memcpy((void *)(v = BUFFER4COPY), data, len); \
+ switch (len) { \
+ default: \
+ mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
+ prime_4); \
+ /* fall through */ \
+ case 24: \
+ case 23: \
+ case 22: \
+ case 21: \
+ case 20: \
+ case 19: \
+ case 18: \
+ case 17: \
+ mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
+ prime_3); \
+ /* fall through */ \
+ case 16: \
+ case 15: \
+ case 14: \
+ case 13: \
+ case 12: \
+ case 11: \
+ case 10: \
+ case 9: \
+ mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
+ prime_2); \
+ /* fall through */ \
+ case 8: \
+ case 7: \
+ case 6: \
+ case 5: \
+ case 4: \
+ case 3: \
+ case 2: \
+ case 1: \
+ mixup64(&s->n.b, &s->n.a, tail64_##ENDIANNES##_##ALIGNESS(v, len), \
+ prime_1); \
+ /* fall through */ \
+ case 0: \
+ return final64(s->n.a, s->n.b); \
+ } \
+ } while (0)
-static __always_inline void tail_abcd(t1ha_state256_t *__restrict s,
- const uint64_t *__restrict v,
- size_t len) {
- switch (len) {
- default:
- mixup64(&s->n.a, &s->n.d, fetch64_le(v++), prime_4);
- /* fall through */
- case 24:
- case 23:
- case 22:
- case 21:
- case 20:
- case 19:
- case 18:
- case 17:
- mixup64(&s->n.b, &s->n.a, fetch64_le(v++), prime_3);
- /* fall through */
- case 16:
- case 15:
- case 14:
- case 13:
- case 12:
- case 11:
- case 10:
- case 9:
- mixup64(&s->n.c, &s->n.b, fetch64_le(v++), prime_2);
- /* fall through */
- case 8:
- case 7:
- case 6:
- case 5:
- case 4:
- case 3:
- case 2:
- case 1:
- mixup64(&s->n.d, &s->n.c, tail64_le(v, len), prime_1);
- /* fall through */
- case 0:
- return;
- }
-}
+/* TODO C++ template in the next version */
+#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, BUFFER4COPY, state, data, len) \
+ do { \
+ t1ha_state256_t *const s = state; \
+ const uint64_t *v = (const uint64_t *)data; \
+ if (BUFFER4COPY != NULL) \
+ memcpy((void *)(v = BUFFER4COPY), data, len); \
+ switch (len) { \
+ default: \
+ mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
+ prime_4); \
+ /* fall through */ \
+ case 24: \
+ case 23: \
+ case 22: \
+ case 21: \
+ case 20: \
+ case 19: \
+ case 18: \
+ case 17: \
+ mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
+ prime_3); \
+ /* fall through */ \
+ case 16: \
+ case 15: \
+ case 14: \
+ case 13: \
+ case 12: \
+ case 11: \
+ case 10: \
+ case 9: \
+ mixup64(&s->n.c, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
+ prime_2); \
+ /* fall through */ \
+ case 8: \
+ case 7: \
+ case 6: \
+ case 5: \
+ case 4: \
+ case 3: \
+ case 2: \
+ case 1: \
+ mixup64(&s->n.d, &s->n.c, tail64_##ENDIANNES##_##ALIGNESS(v, len), \
+ prime_1); \
+ /* fall through */ \
+ case 0: \
+ return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result); \
+ } \
+ } while (0)
static __always_inline uint64_t final128(uint64_t a, uint64_t b, uint64_t c,
uint64_t d, uint64_t *h) {
@@ -195,22 +207,26 @@ uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) {
t1ha_state256_t state;
init_ab(&state, seed, length);
- const int need_copy4align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK;
- uint64_t buffer4align[4];
-
- if (unlikely(length > 32)) {
- init_cd(&state, seed, length);
- data = loop(need_copy4align, buffer4align, &state, data, length);
- squash(&state);
- length &= 31;
+ const bool need_copy4align =
+ (((uintptr_t)data) & (ALIGMENT_64 - 1)) != 0 && !UNALIGNED_OK;
+ if (need_copy4align) {
+ uint64_t buffer4align[4];
+ if (unlikely(length > 32)) {
+ init_cd(&state, seed, length);
+ T1HA2_LOOP(le, aligned, buffer4align, &state, data, length);
+ squash(&state);
+ length &= 31;
+ }
+ T1HA2_TAIL_AB(le, aligned, buffer4align, &state, data, length);
+ } else {
+ if (unlikely(length > 32)) {
+ init_cd(&state, seed, length);
+ T1HA2_LOOP(le, unaligned, NULL, &state, data, length);
+ squash(&state);
+ length &= 31;
+ }
+ T1HA2_TAIL_AB(le, unaligned, NULL, &state, data, length);
}
-
- const uint64_t *v = (const uint64_t *)data;
- if (unlikely(need_copy4align) && length > 8)
- v = (const uint64_t *)memcpy(&buffer4align, unaligned(v), length);
-
- tail_ab(&state, v, length);
- return final64(state.n.a, state.n.b);
}
uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result,
@@ -220,20 +236,22 @@ uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result,
init_ab(&state, seed, length);
init_cd(&state, seed, length);
- const int need_copy4align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK;
- uint64_t buffer4align[4];
-
- if (unlikely(length > 32)) {
- data = loop(need_copy4align, buffer4align, &state, data, length);
- length &= 31;
+ const bool need_copy4align =
+ (((uintptr_t)data) & (ALIGMENT_64 - 1)) != 0 && !UNALIGNED_OK;
+ if (need_copy4align) {
+ uint64_t buffer4align[4];
+ if (unlikely(length > 32)) {
+ T1HA2_LOOP(le, aligned, buffer4align, &state, data, length);
+ length &= 31;
+ }
+ T1HA2_TAIL_ABCD(le, aligned, buffer4align, &state, data, length);
+ } else {
+ if (unlikely(length > 32)) {
+ T1HA2_LOOP(le, unaligned, NULL, &state, data, length);
+ length &= 31;
+ }
+ T1HA2_TAIL_ABCD(le, unaligned, NULL, &state, data, length);
}
-
- const uint64_t *v = (const uint64_t *)data;
- if (unlikely(need_copy4align) && length > 8)
- v = (const uint64_t *)memcpy(&buffer4align, unaligned(v), length);
-
- tail_abcd(&state, v, length);
- return final128(state.n.a, state.n.b, state.n.c, state.n.d, extra_result);
}
//------------------------------------------------------------------------------
@@ -252,7 +270,7 @@ void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data,
if (ctx->partial) {
const size_t left = 32 - ctx->partial;
const size_t chunk = (length >= left) ? left : length;
- memcpy(ctx->buffer.bytes + ctx->partial, unaligned(data), chunk);
+ memcpy(ctx->buffer.bytes + ctx->partial, data, chunk);
ctx->partial += chunk;
if (ctx->partial < 32) {
assert(left >= length);
@@ -261,37 +279,41 @@ void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data,
ctx->partial = 0;
data = (const uint8_t *)data + chunk;
length -= chunk;
- update(&ctx->state, ctx->buffer.u64);
+ T1HA2_UPDATE(le, aligned, &ctx->state, ctx->buffer.u64);
}
if (length >= 32) {
- const bool need_copy4align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK;
- if (need_copy4align)
- data = loop(true, ctx->buffer.u64, &ctx->state, data, length);
- else
- data = loop(false, NULL, &ctx->state, data, length);
+ const bool need_copy4align =
+ (((uintptr_t)data) & (ALIGMENT_64 - 1)) != 0 && !UNALIGNED_OK;
+ if (need_copy4align) {
+ T1HA2_LOOP(le, aligned, ctx->buffer.u64, &ctx->state, data, length);
+ } else {
+ T1HA2_LOOP(le, unaligned, NULL, &ctx->state, data, length);
+ }
length &= 31;
}
if (length)
- memcpy(ctx->buffer.bytes, unaligned(data), ctx->partial = length);
+ memcpy(ctx->buffer.bytes, data, ctx->partial = length);
}
uint64_t t1ha2_final(t1ha_context_t *__restrict ctx,
uint64_t *__restrict extra_result) {
- uint64_t bytes = (ctx->total << 3) ^ (UINT64_C(1) << 63);
+ uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63);
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
- bytes = bswap64(bytes);
+ bits = bswap64(bits);
#endif
- t1ha2_update(ctx, &bytes, 8);
+ t1ha2_update(ctx, &bits, 8);
if (likely(!extra_result)) {
squash(&ctx->state);
- tail_ab(&ctx->state, ctx->buffer.u64, ctx->partial);
+ T1HA2_TAIL_AB(le, aligned, NULL, &ctx->state, ctx->buffer.u64,
+ ctx->partial);
return final64(ctx->state.n.a, ctx->state.n.b);
}
- tail_abcd(&ctx->state, ctx->buffer.u64, ctx->partial);
+ T1HA2_TAIL_ABCD(le, aligned, NULL, &ctx->state, ctx->buffer.u64,
+ ctx->partial);
return final128(ctx->state.n.a, ctx->state.n.b, ctx->state.n.c,
ctx->state.n.d, extra_result);
-}
+} \ No newline at end of file