diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/flate/crc32_amd64.s')
-rw-r--r-- | vendor/github.com/klauspost/compress/flate/crc32_amd64.s | 213 |
1 files changed, 0 insertions, 213 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/crc32_amd64.s b/vendor/github.com/klauspost/compress/flate/crc32_amd64.s deleted file mode 100644 index 2fb2079b9d..0000000000 --- a/vendor/github.com/klauspost/compress/flate/crc32_amd64.s +++ /dev/null @@ -1,213 +0,0 @@ -//+build !noasm -//+build !appengine - -// Copyright 2015, Klaus Post, see LICENSE for details. - -// func crc32sse(a []byte) uint32 -TEXT ·crc32sse(SB), 4, $0 - MOVQ a+0(FP), R10 - XORQ BX, BX - - // CRC32 dword (R10), EBX - BYTE $0xF2; BYTE $0x41; BYTE $0x0f - BYTE $0x38; BYTE $0xf1; BYTE $0x1a - - MOVL BX, ret+24(FP) - RET - -// func crc32sseAll(a []byte, dst []uint32) -TEXT ·crc32sseAll(SB), 4, $0 - MOVQ a+0(FP), R8 // R8: src - MOVQ a_len+8(FP), R10 // input length - MOVQ dst+24(FP), R9 // R9: dst - SUBQ $4, R10 - JS end - JZ one_crc - MOVQ R10, R13 - SHRQ $2, R10 // len/4 - ANDQ $3, R13 // len&3 - XORQ BX, BX - ADDQ $1, R13 - TESTQ R10, R10 - JZ rem_loop - -crc_loop: - MOVQ (R8), R11 - XORQ BX, BX - XORQ DX, DX - XORQ DI, DI - MOVQ R11, R12 - SHRQ $8, R11 - MOVQ R12, AX - MOVQ R11, CX - SHRQ $16, R12 - SHRQ $16, R11 - MOVQ R12, SI - - // CRC32 EAX, EBX - BYTE $0xF2; BYTE $0x0f - BYTE $0x38; BYTE $0xf1; BYTE $0xd8 - - // CRC32 ECX, EDX - BYTE $0xF2; BYTE $0x0f - BYTE $0x38; BYTE $0xf1; BYTE $0xd1 - - // CRC32 ESI, EDI - BYTE $0xF2; BYTE $0x0f - BYTE $0x38; BYTE $0xf1; BYTE $0xfe - MOVL BX, (R9) - MOVL DX, 4(R9) - MOVL DI, 8(R9) - - XORQ BX, BX - MOVL R11, AX - - // CRC32 EAX, EBX - BYTE $0xF2; BYTE $0x0f - BYTE $0x38; BYTE $0xf1; BYTE $0xd8 - MOVL BX, 12(R9) - - ADDQ $16, R9 - ADDQ $4, R8 - XORQ BX, BX - SUBQ $1, R10 - JNZ crc_loop - -rem_loop: - MOVL (R8), AX - - // CRC32 EAX, EBX - BYTE $0xF2; BYTE $0x0f - BYTE $0x38; BYTE $0xf1; BYTE $0xd8 - - MOVL BX, (R9) - ADDQ $4, R9 - ADDQ $1, R8 - XORQ BX, BX - SUBQ $1, R13 - JNZ rem_loop - -end: - RET - -one_crc: - MOVQ $1, R13 - XORQ BX, BX - JMP rem_loop - -// func matchLenSSE4(a, b []byte, max int) int -TEXT ·matchLenSSE4(SB), 4, $0 - MOVQ a_base+0(FP), SI - MOVQ b_base+24(FP), DI - MOVQ DI, DX - MOVQ max+48(FP), CX - -cmp8: - // As long as we are 8 or more bytes before the end of max, we can load and - // compare 8 bytes at a time. If those 8 bytes are equal, repeat. - CMPQ CX, $8 - JLT cmp1 - MOVQ (SI), AX - MOVQ (DI), BX - CMPQ AX, BX - JNE bsf - ADDQ $8, SI - ADDQ $8, DI - SUBQ $8, CX - JMP cmp8 - -bsf: - // If those 8 bytes were not equal, XOR the two 8 byte values, and return - // the index of the first byte that differs. The BSF instruction finds the - // least significant 1 bit, the amd64 architecture is little-endian, and - // the shift by 3 converts a bit index to a byte index. - XORQ AX, BX - BSFQ BX, BX - SHRQ $3, BX - ADDQ BX, DI - - // Subtract off &b[0] to convert from &b[ret] to ret, and return. - SUBQ DX, DI - MOVQ DI, ret+56(FP) - RET - -cmp1: - // In the slices' tail, compare 1 byte at a time. - CMPQ CX, $0 - JEQ matchLenEnd - MOVB (SI), AX - MOVB (DI), BX - CMPB AX, BX - JNE matchLenEnd - ADDQ $1, SI - ADDQ $1, DI - SUBQ $1, CX - JMP cmp1 - -matchLenEnd: - // Subtract off &b[0] to convert from &b[ret] to ret, and return. - SUBQ DX, DI - MOVQ DI, ret+56(FP) - RET - -// func histogram(b []byte, h []int32) -TEXT ·histogram(SB), 4, $0 - MOVQ b+0(FP), SI // SI: &b - MOVQ b_len+8(FP), R9 // R9: len(b) - MOVQ h+24(FP), DI // DI: Histogram - MOVQ R9, R8 - SHRQ $3, R8 - JZ hist1 - XORQ R11, R11 - -loop_hist8: - MOVQ (SI), R10 - - MOVB R10, R11 - INCL (DI)(R11*4) - SHRQ $8, R10 - - MOVB R10, R11 - INCL (DI)(R11*4) - SHRQ $8, R10 - - MOVB R10, R11 - INCL (DI)(R11*4) - SHRQ $8, R10 - - MOVB R10, R11 - INCL (DI)(R11*4) - SHRQ $8, R10 - - MOVB R10, R11 - INCL (DI)(R11*4) - SHRQ $8, R10 - - MOVB R10, R11 - INCL (DI)(R11*4) - SHRQ $8, R10 - - MOVB R10, R11 - INCL (DI)(R11*4) - SHRQ $8, R10 - - INCL (DI)(R10*4) - - ADDQ $8, SI - DECQ R8 - JNZ loop_hist8 - -hist1: - ANDQ $7, R9 - JZ end_hist - XORQ R10, R10 - -loop_hist1: - MOVB (SI), R10 - INCL (DI)(R10*4) - INCQ SI - DECQ R9 - JNZ loop_hist1 - -end_hist: - RET |