You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

popcnt_amd64.s 1.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. // +build amd64,!appengine,!go1.9
  2. TEXT ·hasAsm(SB),4,$0-1
  3. MOVQ $1, AX
  4. CPUID
  5. SHRQ $23, CX
  6. ANDQ $1, CX
  7. MOVB CX, ret+0(FP)
  8. RET
  9. #define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2
  10. TEXT ·popcntSliceAsm(SB),4,$0-32
  11. XORQ AX, AX
  12. MOVQ s+0(FP), SI
  13. MOVQ s_len+8(FP), CX
  14. TESTQ CX, CX
  15. JZ popcntSliceEnd
  16. popcntSliceLoop:
  17. BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
  18. ADDQ DX, AX
  19. ADDQ $8, SI
  20. LOOP popcntSliceLoop
  21. popcntSliceEnd:
  22. MOVQ AX, ret+24(FP)
  23. RET
  24. TEXT ·popcntMaskSliceAsm(SB),4,$0-56
  25. XORQ AX, AX
  26. MOVQ s+0(FP), SI
  27. MOVQ s_len+8(FP), CX
  28. TESTQ CX, CX
  29. JZ popcntMaskSliceEnd
  30. MOVQ m+24(FP), DI
  31. popcntMaskSliceLoop:
  32. MOVQ (DI), DX
  33. NOTQ DX
  34. ANDQ (SI), DX
  35. POPCNTQ_DX_DX
  36. ADDQ DX, AX
  37. ADDQ $8, SI
  38. ADDQ $8, DI
  39. LOOP popcntMaskSliceLoop
  40. popcntMaskSliceEnd:
  41. MOVQ AX, ret+48(FP)
  42. RET
  43. TEXT ·popcntAndSliceAsm(SB),4,$0-56
  44. XORQ AX, AX
  45. MOVQ s+0(FP), SI
  46. MOVQ s_len+8(FP), CX
  47. TESTQ CX, CX
  48. JZ popcntAndSliceEnd
  49. MOVQ m+24(FP), DI
  50. popcntAndSliceLoop:
  51. MOVQ (DI), DX
  52. ANDQ (SI), DX
  53. POPCNTQ_DX_DX
  54. ADDQ DX, AX
  55. ADDQ $8, SI
  56. ADDQ $8, DI
  57. LOOP popcntAndSliceLoop
  58. popcntAndSliceEnd:
  59. MOVQ AX, ret+48(FP)
  60. RET
  61. TEXT ·popcntOrSliceAsm(SB),4,$0-56
  62. XORQ AX, AX
  63. MOVQ s+0(FP), SI
  64. MOVQ s_len+8(FP), CX
  65. TESTQ CX, CX
  66. JZ popcntOrSliceEnd
  67. MOVQ m+24(FP), DI
  68. popcntOrSliceLoop:
  69. MOVQ (DI), DX
  70. ORQ (SI), DX
  71. POPCNTQ_DX_DX
  72. ADDQ DX, AX
  73. ADDQ $8, SI
  74. ADDQ $8, DI
  75. LOOP popcntOrSliceLoop
  76. popcntOrSliceEnd:
  77. MOVQ AX, ret+48(FP)
  78. RET
  79. TEXT ·popcntXorSliceAsm(SB),4,$0-56
  80. XORQ AX, AX
  81. MOVQ s+0(FP), SI
  82. MOVQ s_len+8(FP), CX
  83. TESTQ CX, CX
  84. JZ popcntXorSliceEnd
  85. MOVQ m+24(FP), DI
  86. popcntXorSliceLoop:
  87. MOVQ (DI), DX
  88. XORQ (SI), DX
  89. POPCNTQ_DX_DX
  90. ADDQ DX, AX
  91. ADDQ $8, SI
  92. ADDQ $8, DI
  93. LOOP popcntXorSliceLoop
  94. popcntXorSliceEnd:
  95. MOVQ AX, ret+48(FP)
  96. RET