You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

popcnt_amd64.s 1.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. // +build !go1.9
  2. // +build amd64,!appengine
  3. TEXT ·hasAsm(SB),4,$0-1
  4. MOVQ $1, AX
  5. CPUID
  6. SHRQ $23, CX
  7. ANDQ $1, CX
  8. MOVB CX, ret+0(FP)
  9. RET
  10. #define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2
  11. TEXT ·popcntSliceAsm(SB),4,$0-32
  12. XORQ AX, AX
  13. MOVQ s+0(FP), SI
  14. MOVQ s_len+8(FP), CX
  15. TESTQ CX, CX
  16. JZ popcntSliceEnd
  17. popcntSliceLoop:
  18. BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
  19. ADDQ DX, AX
  20. ADDQ $8, SI
  21. LOOP popcntSliceLoop
  22. popcntSliceEnd:
  23. MOVQ AX, ret+24(FP)
  24. RET
  25. TEXT ·popcntMaskSliceAsm(SB),4,$0-56
  26. XORQ AX, AX
  27. MOVQ s+0(FP), SI
  28. MOVQ s_len+8(FP), CX
  29. TESTQ CX, CX
  30. JZ popcntMaskSliceEnd
  31. MOVQ m+24(FP), DI
  32. popcntMaskSliceLoop:
  33. MOVQ (DI), DX
  34. NOTQ DX
  35. ANDQ (SI), DX
  36. POPCNTQ_DX_DX
  37. ADDQ DX, AX
  38. ADDQ $8, SI
  39. ADDQ $8, DI
  40. LOOP popcntMaskSliceLoop
  41. popcntMaskSliceEnd:
  42. MOVQ AX, ret+48(FP)
  43. RET
  44. TEXT ·popcntAndSliceAsm(SB),4,$0-56
  45. XORQ AX, AX
  46. MOVQ s+0(FP), SI
  47. MOVQ s_len+8(FP), CX
  48. TESTQ CX, CX
  49. JZ popcntAndSliceEnd
  50. MOVQ m+24(FP), DI
  51. popcntAndSliceLoop:
  52. MOVQ (DI), DX
  53. ANDQ (SI), DX
  54. POPCNTQ_DX_DX
  55. ADDQ DX, AX
  56. ADDQ $8, SI
  57. ADDQ $8, DI
  58. LOOP popcntAndSliceLoop
  59. popcntAndSliceEnd:
  60. MOVQ AX, ret+48(FP)
  61. RET
  62. TEXT ·popcntOrSliceAsm(SB),4,$0-56
  63. XORQ AX, AX
  64. MOVQ s+0(FP), SI
  65. MOVQ s_len+8(FP), CX
  66. TESTQ CX, CX
  67. JZ popcntOrSliceEnd
  68. MOVQ m+24(FP), DI
  69. popcntOrSliceLoop:
  70. MOVQ (DI), DX
  71. ORQ (SI), DX
  72. POPCNTQ_DX_DX
  73. ADDQ DX, AX
  74. ADDQ $8, SI
  75. ADDQ $8, DI
  76. LOOP popcntOrSliceLoop
  77. popcntOrSliceEnd:
  78. MOVQ AX, ret+48(FP)
  79. RET
  80. TEXT ·popcntXorSliceAsm(SB),4,$0-56
  81. XORQ AX, AX
  82. MOVQ s+0(FP), SI
  83. MOVQ s_len+8(FP), CX
  84. TESTQ CX, CX
  85. JZ popcntXorSliceEnd
  86. MOVQ m+24(FP), DI
  87. popcntXorSliceLoop:
  88. MOVQ (DI), DX
  89. XORQ (SI), DX
  90. POPCNTQ_DX_DX
  91. ADDQ DX, AX
  92. ADDQ $8, SI
  93. ADDQ $8, DI
  94. LOOP popcntXorSliceLoop
  95. popcntXorSliceEnd:
  96. MOVQ AX, ret+48(FP)
  97. RET