You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

aes_cryptobox.c 6.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. /*
  2. * Copyright (c) 2017, Vsevolod Stakhov
  3. * Copyright (c) 2017, Frank Denis
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are met:
  8. * * Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * * Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
  15. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  16. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  17. * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
  18. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  19. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  20. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  21. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  23. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. #include "config.h"
  26. #include "ottery-internal.h"
  27. #include "cryptobox.h"
  28. #if defined(__x86_64__) && defined(RSPAMD_HAS_TARGET_ATTR)
  29. #if defined(__GNUC__) && !defined(__clang__)
  30. #pragma GCC push_options
  31. #pragma GCC target("aes")
  32. #endif
  33. #ifndef __SSE2__
  34. #define __SSE2__
  35. #endif
  36. #ifndef __SSE__
  37. #define __SSE__
  38. #endif
  39. #ifndef __AES__
  40. #define __AES__
  41. #endif
  42. #include <immintrin.h>
  43. #define ROUNDS 10
  44. typedef struct RSPAMD_ALIGNED(16) aes_rng_state {
  45. __m128i round_keys[ROUNDS + 1];
  46. __m128i counter;
  47. } aes_stream_state;
  48. #define STATE_LEN sizeof(aes_stream_state)
  49. #define STATE_BYTES 16
  50. #define OUTPUT_LEN 1024
  51. static void
  52. aes_key_expand (__m128i round_keys[ROUNDS + 1], __m128i t) __attribute__((target("aes")));
  53. static void
  54. aes_key_expand (__m128i round_keys[ROUNDS + 1], __m128i t)
  55. {
  56. __m128i t1;
  57. #define DO_ROUND_KEY(ROUND, RC) \
  58. do { \
  59. t1 = _mm_aeskeygenassist_si128(t, (RC)); \
  60. round_keys[ROUND] = t; \
  61. t = _mm_xor_si128(t, _mm_slli_si128(t, 4)); \
  62. t = _mm_xor_si128(t, _mm_slli_si128(t, 8)); \
  63. t = _mm_xor_si128(t, _mm_shuffle_epi32(t1, 0xff)); \
  64. } while (0)
  65. DO_ROUND_KEY(0, 1);
  66. DO_ROUND_KEY(1, 2);
  67. DO_ROUND_KEY(2, 4);
  68. DO_ROUND_KEY(3, 8);
  69. DO_ROUND_KEY(4, 16);
  70. DO_ROUND_KEY(5, 32);
  71. DO_ROUND_KEY(6, 64);
  72. DO_ROUND_KEY(7, 128);
  73. DO_ROUND_KEY(8, 27);
  74. DO_ROUND_KEY(9, 54);
  75. round_keys[10] = t;
  76. }
  77. /*
  78. * Computes one 128 bytes block and refresh keys
  79. */
  80. static void
  81. aes_round(unsigned char *buf, struct aes_rng_state *st) __attribute__((target("aes")));
  82. static void
  83. aes_round(unsigned char *buf, struct aes_rng_state *st)
  84. {
  85. const __m128i one = _mm_set_epi64x(0, 1);
  86. __m128i *round_keys = st->round_keys;
  87. __m128i c0, c1, c2, c3, c4, c5, c6, c7;
  88. __m128i r0, r1, r2, r3, r4, r5, r6, r7;
  89. __m128i s0, s1, s2, s3, s4, s5, s6, s7;
  90. size_t i;
  91. #define COMPUTE_ROUNDS(N) \
  92. do { \
  93. r##N = _mm_aesenc_si128( _mm_xor_si128(c##N, round_keys[0]), round_keys[1]); \
  94. r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[2]), round_keys[3]); \
  95. r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[4]), round_keys[5]); \
  96. s##N = r##N; \
  97. r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[6]), round_keys[7]); \
  98. r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[8]), round_keys[9]); \
  99. r##N = _mm_xor_si128(s##N, _mm_aesenclast_si128(r##N, round_keys[10])); \
  100. } while (0)
  101. c0 = st->counter;
  102. for (i = 0; i < OUTPUT_LEN / 128; i ++) {
  103. c1 = _mm_add_epi64 (c0, one);
  104. c2 = _mm_add_epi64 (c1, one);
  105. c3 = _mm_add_epi64 (c2, one);
  106. c4 = _mm_add_epi64 (c3, one);
  107. c5 = _mm_add_epi64 (c4, one);
  108. c6 = _mm_add_epi64 (c5, one);
  109. c7 = _mm_add_epi64 (c6, one);
  110. COMPUTE_ROUNDS(0);
  111. COMPUTE_ROUNDS(1);
  112. COMPUTE_ROUNDS(2);
  113. COMPUTE_ROUNDS(3);
  114. COMPUTE_ROUNDS(4);
  115. COMPUTE_ROUNDS(5);
  116. COMPUTE_ROUNDS(6);
  117. COMPUTE_ROUNDS(7);
  118. c0 = _mm_add_epi64 (c7, one);
  119. _mm_storeu_si128 ((__m128i *) (void *) (buf + 0), r0);
  120. _mm_storeu_si128 ((__m128i *) (void *) (buf + 16), r1);
  121. _mm_storeu_si128 ((__m128i *) (void *) (buf + 32), r2);
  122. _mm_storeu_si128 ((__m128i *) (void *) (buf + 48), r3);
  123. _mm_storeu_si128 ((__m128i *) (void *) (buf + 64), r4);
  124. _mm_storeu_si128 ((__m128i *) (void *) (buf + 80), r5);
  125. _mm_storeu_si128 ((__m128i *) (void *) (buf + 96), r6);
  126. _mm_storeu_si128 ((__m128i *) (void *) (buf + 112), r7);
  127. buf += 128;
  128. }
  129. st->counter = c0;
  130. c0 = _mm_setzero_si128();
  131. COMPUTE_ROUNDS(0);
  132. aes_key_expand(round_keys, r0);
  133. }
  134. static void
  135. aes_cryptobox_state_setup (void *state_, const uint8_t *bytes)
  136. {
  137. struct aes_rng_state *x = state_;
  138. aes_key_expand (x->round_keys,
  139. _mm_loadu_si128((const __m128i *) (const void *)bytes));
  140. }
  141. static void
  142. aes_cryptobox_generate (void *state_, uint8_t *output, uint32_t idx)
  143. {
  144. struct aes_rng_state *x = state_;
  145. aes_round(output, x);
  146. }
  147. #define PRF_AES(r) { \
  148. "AES-" #r, \
  149. "AES-" #r "-NOSIMD", \
  150. "AES-" #r "-NOSIMD-DEFAULT", \
  151. STATE_LEN, \
  152. STATE_BYTES, \
  153. OUTPUT_LEN, \
  154. OTTERY_CPUCAP_AES, \
  155. aes_cryptobox_state_setup, \
  156. aes_cryptobox_generate \
  157. }
  158. const struct ottery_prf ottery_prf_aes_cryptobox_ = PRF_AES(128);
  159. #endif /* x86_64 */