You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

chacha_merged.c 5.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. /*
  2. * This code is based on Dan Bernstein's pure C "merged" ChaCha
  3. * implementation; details below.
  4. *
  5. * Note that I've ripped out all of the code that wasn't suitable for doing
  6. * block-oriented operation, all (residual) support for 128-bit ChaCha keys,
  7. * all support for counter values over 32 bits, the ability to xor the stream
  8. * with a plaintext, and so on.
  9. *
  10. * Future versions of this might remove bigendian conversions too. DO NOT use
  11. * this code for your stream cipher: go back to the original source. (I got
  12. * this copy from SUPERCOP).
  13. */
  14. /*
  15. chacha-merged.c version 20080118
  16. D. J. Bernstein
  17. Public domain.
  18. */
  19. #include <string.h>
  20. #include "ottery-internal.h"
  21. #define u8 uint8_t
  22. #define u32 uint32_t
  23. #include "chacha_merged_ecrypt.h"
  24. #define ROTATE(v,c) (ROTL32(v,c))
  25. #define XOR(v,w) ((v) ^ (w))
  26. #define PLUS(v,w) (U32V((v) + (w)))
  27. #define PLUSONE(v) (PLUS((v),1))
  28. #define QUARTERROUND(a,b,c,d) \
  29. a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
  30. c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
  31. a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
  32. c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
  33. static const char sigma[16] = "expand 32-byte k";
  34. static void ECRYPT_keysetup(ECRYPT_ctx *x,const u8 *k,u32 ivbits)
  35. {
  36. const char *constants;
  37. (void)ivbits;
  38. x->input[4] = U8TO32_LITTLE(k + 0);
  39. x->input[5] = U8TO32_LITTLE(k + 4);
  40. x->input[6] = U8TO32_LITTLE(k + 8);
  41. x->input[7] = U8TO32_LITTLE(k + 12);
  42. k += 16;
  43. constants = sigma;
  44. x->input[8] = U8TO32_LITTLE(k + 0);
  45. x->input[9] = U8TO32_LITTLE(k + 4);
  46. x->input[10] = U8TO32_LITTLE(k + 8);
  47. x->input[11] = U8TO32_LITTLE(k + 12);
  48. x->input[0] = U8TO32_LITTLE(constants + 0);
  49. x->input[1] = U8TO32_LITTLE(constants + 4);
  50. x->input[2] = U8TO32_LITTLE(constants + 8);
  51. x->input[3] = U8TO32_LITTLE(constants + 12);
  52. }
  53. static void ECRYPT_ivsetup(ECRYPT_ctx *x,const u8 *iv)
  54. {
  55. x->input[12] = 0;
  56. x->input[13] = 0;
  57. x->input[14] = U8TO32_LITTLE(iv + 0);
  58. x->input[15] = U8TO32_LITTLE(iv + 4);
  59. }
  60. #define IDX_STEP 16
  61. #define OUTPUT_LEN (IDX_STEP * 64)
  62. static inline void chacha_merged_getblocks(const int chacha_rounds, ECRYPT_ctx *x,u8 *c) __attribute__((always_inline));
  63. /** Generate OUTPUT_LEN bytes of output using the key, nonce, and counter in x,
  64. * and store them in c.
  65. */
  66. static void chacha_merged_getblocks(const int chacha_rounds, ECRYPT_ctx *x,u8 *c)
  67. {
  68. u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
  69. u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
  70. unsigned i, block;
  71. j0 = x->input[0];
  72. j1 = x->input[1];
  73. j2 = x->input[2];
  74. j3 = x->input[3];
  75. j4 = x->input[4];
  76. j5 = x->input[5];
  77. j6 = x->input[6];
  78. j7 = x->input[7];
  79. j8 = x->input[8];
  80. j9 = x->input[9];
  81. j10 = x->input[10];
  82. j11 = x->input[11];
  83. j12 = x->input[12];
  84. j13 = x->input[13];
  85. j14 = x->input[14];
  86. j15 = x->input[15];
  87. for (block = 0; block < IDX_STEP; ++block) {
  88. x0 = j0;
  89. x1 = j1;
  90. x2 = j2;
  91. x3 = j3;
  92. x4 = j4;
  93. x5 = j5;
  94. x6 = j6;
  95. x7 = j7;
  96. x8 = j8;
  97. x9 = j9;
  98. x10 = j10;
  99. x11 = j11;
  100. x12 = j12;
  101. x13 = j13;
  102. x14 = j14;
  103. x15 = j15;
  104. for (i = chacha_rounds;i > 0;i -= 2) {
  105. QUARTERROUND( x0, x4, x8,x12)
  106. QUARTERROUND( x1, x5, x9,x13)
  107. QUARTERROUND( x2, x6,x10,x14)
  108. QUARTERROUND( x3, x7,x11,x15)
  109. QUARTERROUND( x0, x5,x10,x15)
  110. QUARTERROUND( x1, x6,x11,x12)
  111. QUARTERROUND( x2, x7, x8,x13)
  112. QUARTERROUND( x3, x4, x9,x14)
  113. }
  114. x0 = PLUS(x0,j0);
  115. x1 = PLUS(x1,j1);
  116. x2 = PLUS(x2,j2);
  117. x3 = PLUS(x3,j3);
  118. x4 = PLUS(x4,j4);
  119. x5 = PLUS(x5,j5);
  120. x6 = PLUS(x6,j6);
  121. x7 = PLUS(x7,j7);
  122. x8 = PLUS(x8,j8);
  123. x9 = PLUS(x9,j9);
  124. x10 = PLUS(x10,j10);
  125. x11 = PLUS(x11,j11);
  126. x12 = PLUS(x12,j12);
  127. x13 = PLUS(x13,j13);
  128. x14 = PLUS(x14,j14);
  129. x15 = PLUS(x15,j15);
  130. j12 = PLUSONE(j12);
  131. /* Ottery: j13 can never need to be incremented. */
  132. U32TO8_LITTLE(c + 0,x0);
  133. U32TO8_LITTLE(c + 4,x1);
  134. U32TO8_LITTLE(c + 8,x2);
  135. U32TO8_LITTLE(c + 12,x3);
  136. U32TO8_LITTLE(c + 16,x4);
  137. U32TO8_LITTLE(c + 20,x5);
  138. U32TO8_LITTLE(c + 24,x6);
  139. U32TO8_LITTLE(c + 28,x7);
  140. U32TO8_LITTLE(c + 32,x8);
  141. U32TO8_LITTLE(c + 36,x9);
  142. U32TO8_LITTLE(c + 40,x10);
  143. U32TO8_LITTLE(c + 44,x11);
  144. U32TO8_LITTLE(c + 48,x12);
  145. U32TO8_LITTLE(c + 52,x13);
  146. U32TO8_LITTLE(c + 56,x14);
  147. U32TO8_LITTLE(c + 60,x15);
  148. c += 64;
  149. }
  150. }
  151. #define STATE_LEN (sizeof(ECRYPT_ctx))
  152. #define STATE_BYTES 40
  153. static void
  154. chacha_merged_state_setup(void *state_, const uint8_t *bytes)
  155. {
  156. ECRYPT_ctx *x = state_;
  157. ECRYPT_keysetup(x, bytes, 0);
  158. ECRYPT_ivsetup(x, bytes+32);
  159. }
  160. static void
  161. chacha8_merged_generate(void *state_, uint8_t *output, uint32_t idx)
  162. {
  163. ECRYPT_ctx *x = state_;
  164. x->input[12] = idx * IDX_STEP;
  165. chacha_merged_getblocks(8, x, output);
  166. }
  167. static void
  168. chacha12_merged_generate(void *state_, uint8_t *output, uint32_t idx)
  169. {
  170. ECRYPT_ctx *x = state_;
  171. x->input[12] = idx * IDX_STEP;
  172. chacha_merged_getblocks(12, x, output);
  173. }
  174. static void
  175. chacha20_merged_generate(void *state_, uint8_t *output, uint32_t idx)
  176. {
  177. ECRYPT_ctx *x = state_;
  178. x->input[12] = idx * IDX_STEP;
  179. chacha_merged_getblocks(20, x, output);
  180. }
  181. #define PRF_CHACHA(r) { \
  182. "CHACHA" #r, \
  183. "CHACHA" #r "-NOSIMD", \
  184. "CHACHA" #r "-NOSIMD-DEFAULT", \
  185. STATE_LEN, \
  186. STATE_BYTES, \
  187. OUTPUT_LEN, \
  188. 0, \
  189. chacha_merged_state_setup, \
  190. chacha ## r ## _merged_generate \
  191. }
  192. const struct ottery_prf ottery_prf_chacha8_merged_ = PRF_CHACHA(8);
  193. const struct ottery_prf ottery_prf_chacha12_merged_ = PRF_CHACHA(12);
  194. const struct ottery_prf ottery_prf_chacha20_merged_ = PRF_CHACHA(20);