You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

sum_ppc64le.s 4.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. // Copyright 2019 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ppc64le,!gccgo,!appengine
  5. #include "textflag.h"
  6. // This was ported from the amd64 implementation.
  7. #define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
  8. MOVD (msg), t0; \
  9. MOVD 8(msg), t1; \
  10. MOVD $1, t2; \
  11. ADDC t0, h0, h0; \
  12. ADDE t1, h1, h1; \
  13. ADDE t2, h2; \
  14. ADD $16, msg
  15. #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
  16. MULLD r0, h0, t0; \
  17. MULLD r0, h1, t4; \
  18. MULHDU r0, h0, t1; \
  19. MULHDU r0, h1, t5; \
  20. ADDC t4, t1, t1; \
  21. MULLD r0, h2, t2; \
  22. ADDZE t5; \
  23. MULHDU r1, h0, t4; \
  24. MULLD r1, h0, h0; \
  25. ADD t5, t2, t2; \
  26. ADDC h0, t1, t1; \
  27. MULLD h2, r1, t3; \
  28. ADDZE t4, h0; \
  29. MULHDU r1, h1, t5; \
  30. MULLD r1, h1, t4; \
  31. ADDC t4, t2, t2; \
  32. ADDE t5, t3, t3; \
  33. ADDC h0, t2, t2; \
  34. MOVD $-4, t4; \
  35. MOVD t0, h0; \
  36. MOVD t1, h1; \
  37. ADDZE t3; \
  38. ANDCC $3, t2, h2; \
  39. AND t2, t4, t0; \
  40. ADDC t0, h0, h0; \
  41. ADDE t3, h1, h1; \
  42. SLD $62, t3, t4; \
  43. SRD $2, t2; \
  44. ADDZE h2; \
  45. OR t4, t2, t2; \
  46. SRD $2, t3; \
  47. ADDC t2, h0, h0; \
  48. ADDE t3, h1, h1; \
  49. ADDZE h2
  50. DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
  51. DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
  52. GLOBL ·poly1305Mask<>(SB), RODATA, $16
  53. // func update(state *[7]uint64, msg []byte)
  54. TEXT ·update(SB), $0-32
  55. MOVD state+0(FP), R3
  56. MOVD msg_base+8(FP), R4
  57. MOVD msg_len+16(FP), R5
  58. MOVD 0(R3), R8 // h0
  59. MOVD 8(R3), R9 // h1
  60. MOVD 16(R3), R10 // h2
  61. MOVD 24(R3), R11 // r0
  62. MOVD 32(R3), R12 // r1
  63. CMP R5, $16
  64. BLT bytes_between_0_and_15
  65. loop:
  66. POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
  67. multiply:
  68. POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
  69. ADD $-16, R5
  70. CMP R5, $16
  71. BGE loop
  72. bytes_between_0_and_15:
  73. CMP $0, R5
  74. BEQ done
  75. MOVD $0, R16 // h0
  76. MOVD $0, R17 // h1
  77. flush_buffer:
  78. CMP R5, $8
  79. BLE just1
  80. MOVD $8, R21
  81. SUB R21, R5, R21
  82. // Greater than 8 -- load the rightmost remaining bytes in msg
  83. // and put into R17 (h1)
  84. MOVD (R4)(R21), R17
  85. MOVD $16, R22
  86. // Find the offset to those bytes
  87. SUB R5, R22, R22
  88. SLD $3, R22
  89. // Shift to get only the bytes in msg
  90. SRD R22, R17, R17
  91. // Put 1 at high end
  92. MOVD $1, R23
  93. SLD $3, R21
  94. SLD R21, R23, R23
  95. OR R23, R17, R17
  96. // Remainder is 8
  97. MOVD $8, R5
  98. just1:
  99. CMP R5, $8
  100. BLT less8
  101. // Exactly 8
  102. MOVD (R4), R16
  103. CMP $0, R17
  104. // Check if we've already set R17; if not
  105. // set 1 to indicate end of msg.
  106. BNE carry
  107. MOVD $1, R17
  108. BR carry
  109. less8:
  110. MOVD $0, R16 // h0
  111. MOVD $0, R22 // shift count
  112. CMP R5, $4
  113. BLT less4
  114. MOVWZ (R4), R16
  115. ADD $4, R4
  116. ADD $-4, R5
  117. MOVD $32, R22
  118. less4:
  119. CMP R5, $2
  120. BLT less2
  121. MOVHZ (R4), R21
  122. SLD R22, R21, R21
  123. OR R16, R21, R16
  124. ADD $16, R22
  125. ADD $-2, R5
  126. ADD $2, R4
  127. less2:
  128. CMP $0, R5
  129. BEQ insert1
  130. MOVBZ (R4), R21
  131. SLD R22, R21, R21
  132. OR R16, R21, R16
  133. ADD $8, R22
  134. insert1:
  135. // Insert 1 at end of msg
  136. MOVD $1, R21
  137. SLD R22, R21, R21
  138. OR R16, R21, R16
  139. carry:
  140. // Add new values to h0, h1, h2
  141. ADDC R16, R8
  142. ADDE R17, R9
  143. ADDE $0, R10
  144. MOVD $16, R5
  145. ADD R5, R4
  146. BR multiply
  147. done:
  148. // Save h0, h1, h2 in state
  149. MOVD R8, 0(R3)
  150. MOVD R9, 8(R3)
  151. MOVD R10, 16(R3)
  152. RET
  153. // func initialize(state *[7]uint64, key *[32]byte)
  154. TEXT ·initialize(SB), $0-16
  155. MOVD state+0(FP), R3
  156. MOVD key+8(FP), R4
  157. // state[0...7] is initialized with zero
  158. // Load key
  159. MOVD 0(R4), R5
  160. MOVD 8(R4), R6
  161. MOVD 16(R4), R7
  162. MOVD 24(R4), R8
  163. // Address of key mask
  164. MOVD $·poly1305Mask<>(SB), R9
  165. // Save original key in state
  166. MOVD R7, 40(R3)
  167. MOVD R8, 48(R3)
  168. // Get mask
  169. MOVD (R9), R7
  170. MOVD 8(R9), R8
  171. // And with key
  172. AND R5, R7, R5
  173. AND R6, R8, R6
  174. // Save masked key in state
  175. MOVD R5, 24(R3)
  176. MOVD R6, 32(R3)
  177. RET
  178. // func finalize(tag *[TagSize]byte, state *[7]uint64)
  179. TEXT ·finalize(SB), $0-16
  180. MOVD tag+0(FP), R3
  181. MOVD state+8(FP), R4
  182. // Get h0, h1, h2 from state
  183. MOVD 0(R4), R5
  184. MOVD 8(R4), R6
  185. MOVD 16(R4), R7
  186. // Save h0, h1
  187. MOVD R5, R8
  188. MOVD R6, R9
  189. MOVD $3, R20
  190. MOVD $-1, R21
  191. SUBC $-5, R5
  192. SUBE R21, R6
  193. SUBE R20, R7
  194. MOVD $0, R21
  195. SUBZE R21
  196. // Check for carry
  197. CMP $0, R21
  198. ISEL $2, R5, R8, R5
  199. ISEL $2, R6, R9, R6
  200. MOVD 40(R4), R8
  201. MOVD 48(R4), R9
  202. ADDC R8, R5
  203. ADDE R9, R6
  204. MOVD R5, 0(R3)
  205. MOVD R6, 8(R3)
  206. RET