summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/klauspost/crc32/crc32_amd64p32.s
blob: a578d685cc5226b06addedfc7c1935f32915810b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build gc

#define NOSPLIT 4
#define RODATA 8

// func castagnoliSSE42(crc uint32, p []byte) uint32
TEXT ·castagnoliSSE42(SB), NOSPLIT, $0
	MOVL crc+0(FP), AX   // CRC value
	MOVL p+4(FP), SI     // data pointer
	MOVL p_len+8(FP), CX // len(p)

	NOTL AX

	// If there's less than 8 bytes to process, we do it byte-by-byte.
	CMPQ CX, $8
	JL   cleanup

	// Process individual bytes until the input is 8-byte aligned.
startup:
	MOVQ SI, BX
	ANDQ $7, BX
	JZ   aligned

	CRC32B (SI), AX
	DECQ   CX
	INCQ   SI
	JMP    startup

aligned:
	// The input is now 8-byte aligned and we can process 8-byte chunks.
	CMPQ CX, $8
	JL   cleanup

	CRC32Q (SI), AX
	ADDQ   $8, SI
	SUBQ   $8, CX
	JMP    aligned

cleanup:
	// We may have some bytes left over that we process one at a time.
	CMPQ CX, $0
	JE   done

	CRC32B (SI), AX
	INCQ   SI
	DECQ   CX
	JMP    cleanup

done:
	NOTL AX
	MOVL AX, ret+16(FP)
	RET

// func haveSSE42() bool
TEXT ·haveSSE42(SB), NOSPLIT, $0
	XORQ AX, AX
	INCL AX
	CPUID
	SHRQ $20, CX
	ANDQ $1, CX
	MOVB CX, ret+0(FP)
	RET