diff options
author | techknowlogick <techknowlogick@gitea.io> | 2021-02-28 18:08:33 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-28 18:08:33 -0500 |
commit | 47f6a4ec3f058f69b65fb6501d6ac98994b8f8da (patch) | |
tree | 4d1421a4c836de9de4014117419c151035c17eec /vendor/github.com/klauspost/cpuid | |
parent | 030646eea41e17e58e11e73b19339630b6d6148e (diff) | |
download | gitea-47f6a4ec3f058f69b65fb6501d6ac98994b8f8da.tar.gz gitea-47f6a4ec3f058f69b65fb6501d6ac98994b8f8da.zip |
go1.16 (#14783)
Diffstat (limited to 'vendor/github.com/klauspost/cpuid')
18 files changed, 2072 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/cpuid/v2/.gitignore b/vendor/github.com/klauspost/cpuid/v2/.gitignore new file mode 100644 index 0000000000..daf913b1b3 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/.gitignore @@ -0,0 +1,24 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof diff --git a/vendor/github.com/klauspost/cpuid/v2/.travis.yml b/vendor/github.com/klauspost/cpuid/v2/.travis.yml new file mode 100644 index 0000000000..894bb8f692 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/.travis.yml @@ -0,0 +1,56 @@ +language: go + +os: + - linux + - osx + - windows + +arch: + - amd64 + - arm64 + +go: + - 1.13.x + - 1.14.x + - 1.15.x + - 1.16.x + - master + +script: + - go vet ./... + - go test -test.v -test.run ^TestCPUID$ + - go test -race ./... + - go test -tags=noasm ./... + +matrix: + allow_failures: + - go: 'master' + fast_finish: true + include: + - stage: gofmt + go: 1.15.x + os: linux + arch: amd64 + script: + - diff <(gofmt -d .) <(printf "") + - diff <(gofmt -d ./private) <(printf "") + - go install github.com/klauspost/asmfmt/cmd/asmfmt + - diff <(asmfmt -d .) <(printf "") + - stage: i386 + go: 1.15.x + os: linux + arch: amd64 + script: + - GOOS=linux GOARCH=386 go test . + - stage: buildotherprev + go: 1.15.x + os: linux + arch: amd64 + script: + - ./test-architectures.sh + - stage: buildother + go: 1.16.x + os: linux + arch: amd64 + script: + - ./test-architectures.sh diff --git a/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt b/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt new file mode 100644 index 0000000000..2ef4714f71 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt @@ -0,0 +1,35 @@ +Developer Certificate of Origin
+Version 1.1
+
+Copyright (C) 2015- Klaus Post & Contributors.
+Email: klauspost@gmail.com
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+
+Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+(c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+(d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
diff --git a/vendor/github.com/klauspost/cpuid/v2/LICENSE b/vendor/github.com/klauspost/cpuid/v2/LICENSE new file mode 100644 index 0000000000..5cec7ee949 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Klaus Post + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md new file mode 100644 index 0000000000..465f4b77cb --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/README.md @@ -0,0 +1,137 @@ +# cpuid +Package cpuid provides information about the CPU running the current program. + +CPU features are detected on startup, and kept for fast access through the life of the application. +Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use. + +You can access the CPU information by accessing the shared CPU variable of the cpuid library. + +Package home: https://github.com/klauspost/cpuid + +[![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2) +[![Build Status][3]][4] + +[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master +[4]: https://travis-ci.org/klauspost/cpuid + +## installing + +`go get -u github.com/klauspost/cpuid/v2` using modules. + +Drop `v2` for others. + +## example + +```Go +package main + +import ( + "fmt" + "strings" + + . "github.com/klauspost/cpuid/v2" +) + +func main() { + // Print basic CPU information: + fmt.Println("Name:", CPU.BrandName) + fmt.Println("PhysicalCores:", CPU.PhysicalCores) + fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore) + fmt.Println("LogicalCores:", CPU.LogicalCores) + fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID) + fmt.Println("Features:", fmt.Sprintf(strings.Join(CPU.FeatureSet(), ","))) + fmt.Println("Cacheline bytes:", CPU.CacheLine) + fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes") + fmt.Println("L1 Instruction Cache:", CPU.Cache.L1D, "bytes") + fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes") + fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes") + fmt.Println("Frequency", CPU.Hz, "hz") + + // Test if we have these specific features: + if CPU.Supports(SSE, SSE2) { + fmt.Println("We have Streaming SIMD 2 Extensions") + } +} +``` + +Sample output: +``` +>go run main.go +Name: AMD Ryzen 9 3950X 16-Core Processor +PhysicalCores: 16 +ThreadsPerCore: 2 +LogicalCores: 32 +Family 23 Model: 113 Vendor ID: AMD +Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CMOV,CX16,F16C,FMA3,HTT,HYPERVISOR,LZCNT,MMX,MMXEXT,NX,POPCNT,RDRAND,RDSEED,RDTSCP,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3 +Cacheline bytes: 64 +L1 Data Cache: 32768 bytes +L1 Instruction Cache: 32768 bytes +L2 Cache: 524288 bytes +L3 Cache: 16777216 bytes +Frequency 0 hz +We have Streaming SIMD 2 Extensions +``` + +# usage + +The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features. +A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler. + +Note that for some cpu/os combinations some features will not be detected. +`amd64` has rather good support and should work reliably on all platforms. + +Note that hypervisors may not pass through all CPU features. + +## arm64 feature detection + +Not all operating systems provide ARM features directly +and there is no safe way to do so for the rest. + +Currently `arm64/linux` and `arm64/freebsd` should be quite reliable. +`arm64/darwin` adds features expected from the M1 processor, but a lot remains undetected. + +A `DetectARM()` can be used if you are able to control your deployment, +it will detect CPU features, but may crash if the OS doesn't intercept the calls. +A `-cpu.arm` flag for detecting unsafe ARM features can be added. See below. + +Note that currently only features are detected on ARM, +no additional information is currently available. + +## flags + +It is possible to add flags that affects cpu detection. + +For this the `Flags()` command is provided. + +This must be called *before* `flag.Parse()` AND after the flags have been parsed `Detect()` must be called. + +This means that any detection used in `init()` functions will not contain these flags. + +Example: + +```Go +package main + +import ( + "flag" + "fmt" + "strings" + + "github.com/klauspost/cpuid/v2" +) + +func main() { + cpuid.Flags() + flag.Parse() + cpuid.Detect() + + // Test if we have these specific features: + if cpuid.CPU.Supports(cpuid.SSE, cpuid.SSE2) { + fmt.Println("We have Streaming SIMD 2 Extensions") + } +} +``` + +# license + +This code is published under an MIT license. See LICENSE file for more information. diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go new file mode 100644 index 0000000000..e298a9edd7 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go @@ -0,0 +1,1017 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// Package cpuid provides information about the CPU running the current program. +// +// CPU features are detected on startup, and kept for fast access through the life of the application. +// Currently x86 / x64 (AMD64) as well as arm64 is supported. +// +// You can access the CPU information by accessing the shared CPU variable of the cpuid library. +// +// Package home: https://github.com/klauspost/cpuid +package cpuid + +import ( + "flag" + "fmt" + "math" + "os" + "strings" +) + +// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf +// and Processor Programming Reference (PPR) + +// Vendor is a representation of a CPU vendor. +type Vendor int + +const ( + VendorUnknown Vendor = iota + Intel + AMD + VIA + Transmeta + NSC + KVM // Kernel-based Virtual Machine + MSVM // Microsoft Hyper-V or Windows Virtual PC + VMware + XenHVM + Bhyve + Hygon + SiS + RDC + + Ampere + ARM + Broadcom + Cavium + DEC + Fujitsu + Infineon + Motorola + NVIDIA + AMCC + Qualcomm + Marvell + + lastVendor +) + +//go:generate stringer -type=FeatureID,Vendor + +// FeatureID is the ID of a specific cpu feature. +type FeatureID int + +const ( + // Keep index -1 as unknown + UNKNOWN = -1 + + // Add features + ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + AESNI // Advanced Encryption Standard New Instructions + AMD3DNOW // AMD 3DNOW + AMD3DNOWEXT // AMD 3DNowExt + AMXBF16 // Tile computational operations on BFLOAT16 numbers + AMXINT8 // Tile computational operations on 8-bit integers + AMXTILE // Tile architecture + AVX // AVX functions + AVX2 // AVX2 functions + AVX512BF16 // AVX-512 BFLOAT16 Instructions + AVX512BITALG // AVX-512 Bit Algorithms + AVX512BW // AVX-512 Byte and Word Instructions + AVX512CD // AVX-512 Conflict Detection Instructions + AVX512DQ // AVX-512 Doubleword and Quadword Instructions + AVX512ER // AVX-512 Exponential and Reciprocal Instructions + AVX512F // AVX-512 Foundation + AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions + AVX512PF // AVX-512 Prefetch Instructions + AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions + AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 + AVX512VL // AVX-512 Vector Length Extensions + AVX512VNNI // AVX-512 Vector Neural Network Instructions + AVX512VP2INTERSECT // AVX-512 Intersect for D/Q + AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword + AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one. + BMI1 // Bit Manipulation Instruction Set 1 + BMI2 // Bit Manipulation Instruction Set 2 + CLDEMOTE // Cache Line Demote + CLMUL // Carry-less Multiplication + CMOV // i686 CMOV + CX16 // CMPXCHG16B Instruction + ENQCMD // Enqueue Command + ERMS // Enhanced REP MOVSB/STOSB + F16C // Half-precision floating-point conversion + FMA3 // Intel FMA 3. Does not imply AVX. + FMA4 // Bulldozer FMA4 functions + GFNI // Galois Field New Instructions + HLE // Hardware Lock Elision + HTT // Hyperthreading (enabled) + HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors + IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) + IBS // Instruction Based Sampling (AMD) + IBSBRNTRGT // Instruction Based Sampling Feature (AMD) + IBSFETCHSAM // Instruction Based Sampling Feature (AMD) + IBSFFV // Instruction Based Sampling Feature (AMD) + IBSOPCNT // Instruction Based Sampling Feature (AMD) + IBSOPCNTEXT // Instruction Based Sampling Feature (AMD) + IBSOPSAM // Instruction Based Sampling Feature (AMD) + IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) + IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) + LZCNT // LZCNT instruction + MMX // standard MMX + MMXEXT // SSE integer functions or AMD MMX ext + MOVDIR64B // Move 64 Bytes as Direct Store + MOVDIRI // Move Doubleword as Direct Store + MPX // Intel MPX (Memory Protection Extensions) + NX // NX (No-Execute) bit + POPCNT // POPCNT instruction + RDRAND // RDRAND instruction is available + RDSEED // RDSEED instruction is available + RDTSCP // RDTSCP Instruction + RTM // Restricted Transactional Memory + SERIALIZE // Serialize Instruction Execution + SGX // Software Guard Extensions + SGXLC // Software Guard Extensions Launch Control + SHA // Intel SHA Extensions + SSE // SSE functions + SSE2 // P4 SSE functions + SSE3 // Prescott SSE3 functions + SSE4 // Penryn SSE4.1 functions + SSE42 // Nehalem SSE4.2 functions + SSE4A // AMD Barcelona microarchitecture SSE4a instructions + SSSE3 // Conroe SSSE3 functions + STIBP // Single Thread Indirect Branch Predictors + TBM // AMD Trailing Bit Manipulation + TSXLDTRK // Intel TSX Suspend Load Address Tracking + VAES // Vector AES + VMX // Virtual Machine Extensions + VPCLMULQDQ // Carry-Less Multiplication Quadword + WAITPKG // TPAUSE, UMONITOR, UMWAIT + WBNOINVD // Write Back and Do Not Invalidate Cache + XOP // Bulldozer XOP functions + + // ARM features: + AESARM // AES instructions + ARMCPUID // Some CPU ID registers readable at user-level + ASIMD // Advanced SIMD + ASIMDDP // SIMD Dot Product + ASIMDHP // Advanced SIMD half-precision floating point + ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) + ATOMICS // Large System Extensions (LSE) + CRC32 // CRC32/CRC32C instructions + DCPOP // Data cache clean to Point of Persistence (DC CVAP) + EVTSTRM // Generic timer + FCMA // Floatin point complex number addition and multiplication + FP // Single-precision and double-precision floating point + FPHP // Half-precision floating point + GPA // Generic Pointer Authentication + JSCVT // Javascript-style double->int convert (FJCVTZS) + LRCPC // Weaker release consistency (LDAPR, etc) + PMULL // Polynomial Multiply instructions (PMULL/PMULL2) + SHA1 // SHA-1 instructions (SHA1C, etc) + SHA2 // SHA-2 instructions (SHA256H, etc) + SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) + SHA512 // SHA512 instructions + SM3 // SM3 instructions + SM4 // SM4 instructions + SVE // Scalable Vector Extension + + // Keep it last. It automatically defines the size of []flagSet + lastID + + firstID FeatureID = UNKNOWN + 1 +) + +// CPUInfo contains information about the detected system CPU. +type CPUInfo struct { + BrandName string // Brand name reported by the CPU + VendorID Vendor // Comparable CPU vendor ID + VendorString string // Raw vendor string. + featureSet flagSet // Features of the CPU + PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. + ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. + LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. + Family int // CPU family number + Model int // CPU model number + CacheLine int // Cache line size in bytes. Will be 0 if undetectable. + Hz int64 // Clock speed, if known, 0 otherwise + Cache struct { + L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected + L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected + L2 int // L2 Cache (per core or shared). Will be -1 if undetected + L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected + } + SGX SGXSupport + maxFunc uint32 + maxExFunc uint32 +} + +var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) +var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) +var xgetbv func(index uint32) (eax, edx uint32) +var rdtscpAsm func() (eax, ebx, ecx, edx uint32) + +// CPU contains information about the CPU as detected on startup, +// or when Detect last was called. +// +// Use this as the primary entry point to you data. +var CPU CPUInfo + +func init() { + initCPU() + Detect() +} + +// Detect will re-detect current CPU info. +// This will replace the content of the exported CPU variable. +// +// Unless you expect the CPU to change while you are running your program +// you should not need to call this function. +// If you call this, you must ensure that no other goroutine is accessing the +// exported CPU variable. +func Detect() { + // Set defaults + CPU.ThreadsPerCore = 1 + CPU.Cache.L1I = -1 + CPU.Cache.L1D = -1 + CPU.Cache.L2 = -1 + CPU.Cache.L3 = -1 + safe := true + if detectArmFlag != nil { + safe = !*detectArmFlag + } + addInfo(&CPU, safe) + if displayFeats != nil && *displayFeats { + fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ",")) + // Exit with non-zero so tests will print value. + os.Exit(1) + } + if disableFlag != nil { + s := strings.Split(*disableFlag, ",") + for _, feat := range s { + feat := ParseFeature(strings.TrimSpace(feat)) + if feat != UNKNOWN { + CPU.featureSet.unset(feat) + } + } + } +} + +// DetectARM will detect ARM64 features. +// This is NOT done automatically since it can potentially crash +// if the OS does not handle the command. +// If in the future this can be done safely this function may not +// do anything. +func DetectARM() { + addInfo(&CPU, false) +} + +var detectArmFlag *bool +var displayFeats *bool +var disableFlag *string + +// Flags will enable flags. +// This must be called *before* flag.Parse AND +// Detect must be called after the flags have been parsed. +// Note that this means that any detection used in init() functions +// will not contain these flags. +func Flags() { + disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list") + displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits") + detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash") +} + +// Supports returns whether the CPU supports all of the requested features. +func (c CPUInfo) Supports(ids ...FeatureID) bool { + for _, id := range ids { + if !c.featureSet.inSet(id) { + return false + } + } + return true +} + +// Has allows for checking a single feature. +// Should be inlined by the compiler. +func (c CPUInfo) Has(id FeatureID) bool { + return c.featureSet.inSet(id) +} + +// Disable will disable one or several features. +func (c *CPUInfo) Disable(ids ...FeatureID) bool { + for _, id := range ids { + c.featureSet.unset(id) + } + return true +} + +// Enable will disable one or several features even if they were undetected. +// This is of course not recommended for obvious reasons. +func (c *CPUInfo) Enable(ids ...FeatureID) bool { + for _, id := range ids { + c.featureSet.set(id) + } + return true +} + +// IsVendor returns true if vendor is recognized as Intel +func (c CPUInfo) IsVendor(v Vendor) bool { + return c.VendorID == v +} + +func (c CPUInfo) FeatureSet() []string { + s := make([]string, 0) + for _, f := range c.featureSet.Strings() { + s = append(s, f) + } + return s +} + +// RTCounter returns the 64-bit time-stamp counter +// Uses the RDTSCP instruction. The value 0 is returned +// if the CPU does not support the instruction. +func (c CPUInfo) RTCounter() uint64 { + if !c.Supports(RDTSCP) { + return 0 + } + a, _, _, d := rdtscpAsm() + return uint64(a) | (uint64(d) << 32) +} + +// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. +// This variable is OS dependent, but on Linux contains information +// about the current cpu/core the code is running on. +// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. +func (c CPUInfo) Ia32TscAux() uint32 { + if !c.Supports(RDTSCP) { + return 0 + } + _, _, ecx, _ := rdtscpAsm() + return ecx +} + +// LogicalCPU will return the Logical CPU the code is currently executing on. +// This is likely to change when the OS re-schedules the running thread +// to another CPU. +// If the current core cannot be detected, -1 will be returned. +func (c CPUInfo) LogicalCPU() int { + if c.maxFunc < 1 { + return -1 + } + _, ebx, _, _ := cpuid(1) + return int(ebx >> 24) +} + +// hertz tries to compute the clock speed of the CPU. If leaf 15 is +// supported, use it, otherwise parse the brand string. Yes, really. +func hertz(model string) int64 { + mfi := maxFunctionID() + if mfi >= 0x15 { + eax, ebx, ecx, _ := cpuid(0x15) + if eax != 0 && ebx != 0 && ecx != 0 { + return int64((int64(ecx) * int64(ebx)) / int64(eax)) + } + } + // computeHz determines the official rated speed of a CPU from its brand + // string. This insanity is *actually the official documented way to do + // this according to Intel*, prior to leaf 0x15 existing. The official + // documentation only shows this working for exactly `x.xx` or `xxxx` + // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other + // sizes. + hz := strings.LastIndex(model, "Hz") + if hz < 3 { + return 0 + } + var multiplier int64 + switch model[hz-1] { + case 'M': + multiplier = 1000 * 1000 + case 'G': + multiplier = 1000 * 1000 * 1000 + case 'T': + multiplier = 1000 * 1000 * 1000 * 1000 + } + if multiplier == 0 { + return 0 + } + freq := int64(0) + divisor := int64(0) + decimalShift := int64(1) + var i int + for i = hz - 2; i >= 0 && model[i] != ' '; i-- { + if model[i] >= '0' && model[i] <= '9' { + freq += int64(model[i]-'0') * decimalShift + decimalShift *= 10 + } else if model[i] == '.' { + if divisor != 0 { + return 0 + } + divisor = decimalShift + } else { + return 0 + } + } + // we didn't find a space + if i < 0 { + return 0 + } + if divisor != 0 { + return (freq * multiplier) / divisor + } + return freq * multiplier +} + +// VM Will return true if the cpu id indicates we are in +// a virtual machine. +func (c CPUInfo) VM() bool { + return CPU.featureSet.inSet(HYPERVISOR) +} + +// flags contains detected cpu features and characteristics +type flags uint64 + +// log2(bits_in_uint64) +const flagBitsLog2 = 6 +const flagBits = 1 << flagBitsLog2 +const flagMask = flagBits - 1 + +// flagSet contains detected cpu features and characteristics in an array of flags +type flagSet [(lastID + flagMask) / flagBits]flags + +func (s flagSet) inSet(feat FeatureID) bool { + return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 +} + +func (s *flagSet) set(feat FeatureID) { + s[feat>>flagBitsLog2] |= 1 << (feat & flagMask) +} + +// setIf will set a feature if boolean is true. +func (s *flagSet) setIf(cond bool, features ...FeatureID) { + if cond { + for _, offset := range features { + s[offset>>flagBitsLog2] |= 1 << (offset & flagMask) + } + } +} + +func (s *flagSet) unset(offset FeatureID) { + bit := flags(1 << (offset & flagMask)) + s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit +} + +// or with another flagset. +func (s *flagSet) or(other flagSet) { + for i, v := range other[:] { + s[i] |= v + } +} + +// ParseFeature will parse the string and return the ID of the matching feature. +// Will return UNKNOWN if not found. +func ParseFeature(s string) FeatureID { + s = strings.ToUpper(s) + for i := firstID; i < lastID; i++ { + if i.String() == s { + return i + } + } + return UNKNOWN +} + +// Strings returns an array of the detected features for FlagsSet. +func (s flagSet) Strings() []string { + if len(s) == 0 { + return []string{""} + } + r := make([]string, 0) + for i := firstID; i < lastID; i++ { + if s.inSet(i) { + r = append(r, i.String()) + } + } + return r +} + +func maxExtendedFunction() uint32 { + eax, _, _, _ := cpuid(0x80000000) + return eax +} + +func maxFunctionID() uint32 { + a, _, _, _ := cpuid(0) + return a +} + +func brandName() string { + if maxExtendedFunction() >= 0x80000004 { + v := make([]uint32, 0, 48) + for i := uint32(0); i < 3; i++ { + a, b, c, d := cpuid(0x80000002 + i) + v = append(v, a, b, c, d) + } + return strings.Trim(string(valAsString(v...)), " ") + } + return "unknown" +} + +func threadsPerCore() int { + mfi := maxFunctionID() + vend, _ := vendorID() + + if mfi < 0x4 || (vend != Intel && vend != AMD) { + return 1 + } + + if mfi < 0xb { + if vend != Intel { + return 1 + } + _, b, _, d := cpuid(1) + if (d & (1 << 28)) != 0 { + // v will contain logical core count + v := (b >> 16) & 255 + if v > 1 { + a4, _, _, _ := cpuid(4) + // physical cores + v2 := (a4 >> 26) + 1 + if v2 > 0 { + return int(v) / int(v2) + } + } + } + return 1 + } + _, b, _, _ := cpuidex(0xb, 0) + if b&0xffff == 0 { + if vend == AMD { + // Workaround for AMD returning 0, assume 2 if >= Zen 2 + // It will be more correct than not. + fam, _ := familyModel() + _, _, _, d := cpuid(1) + if (d&(1<<28)) != 0 && fam >= 23 { + return 2 + } + } + return 1 + } + return int(b & 0xffff) +} + +func logicalCores() int { + mfi := maxFunctionID() + v, _ := vendorID() + switch v { + case Intel: + // Use this on old Intel processors + if mfi < 0xb { + if mfi < 1 { + return 0 + } + // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) + // that can be assigned to logical processors in a physical package. + // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. + _, ebx, _, _ := cpuid(1) + logical := (ebx >> 16) & 0xff + return int(logical) + } + _, b, _, _ := cpuidex(0xb, 1) + return int(b & 0xffff) + case AMD, Hygon: + _, b, _, _ := cpuid(1) + return int((b >> 16) & 0xff) + default: + return 0 + } +} + +func familyModel() (int, int) { + if maxFunctionID() < 0x1 { + return 0, 0 + } + eax, _, _, _ := cpuid(1) + family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) + model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) + return int(family), int(model) +} + +func physicalCores() int { + v, _ := vendorID() + switch v { + case Intel: + return logicalCores() / threadsPerCore() + case AMD, Hygon: + lc := logicalCores() + tpc := threadsPerCore() + if lc > 0 && tpc > 0 { + return lc / tpc + } + + // The following is inaccurate on AMD EPYC 7742 64-Core Processor + if maxExtendedFunction() >= 0x80000008 { + _, _, c, _ := cpuid(0x80000008) + if c&0xff > 0 { + return int(c&0xff) + 1 + } + } + } + return 0 +} + +// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID +var vendorMapping = map[string]Vendor{ + "AMDisbetter!": AMD, + "AuthenticAMD": AMD, + "CentaurHauls": VIA, + "GenuineIntel": Intel, + "TransmetaCPU": Transmeta, + "GenuineTMx86": Transmeta, + "Geode by NSC": NSC, + "VIA VIA VIA ": VIA, + "KVMKVMKVMKVM": KVM, + "Microsoft Hv": MSVM, + "VMwareVMware": VMware, + "XenVMMXenVMM": XenHVM, + "bhyve bhyve ": Bhyve, + "HygonGenuine": Hygon, + "Vortex86 SoC": SiS, + "SiS SiS SiS ": SiS, + "RiseRiseRise": SiS, + "Genuine RDC": RDC, +} + +func vendorID() (Vendor, string) { + _, b, c, d := cpuid(0) + v := string(valAsString(b, d, c)) + vend, ok := vendorMapping[v] + if !ok { + return VendorUnknown, v + } + return vend, v +} + +func cacheLine() int { + if maxFunctionID() < 0x1 { + return 0 + } + + _, ebx, _, _ := cpuid(1) + cache := (ebx & 0xff00) >> 5 // cflush size + if cache == 0 && maxExtendedFunction() >= 0x80000006 { + _, _, ecx, _ := cpuid(0x80000006) + cache = ecx & 0xff // cacheline size + } + // TODO: Read from Cache and TLB Information + return int(cache) +} + +func (c *CPUInfo) cacheSize() { + c.Cache.L1D = -1 + c.Cache.L1I = -1 + c.Cache.L2 = -1 + c.Cache.L3 = -1 + vendor, _ := vendorID() + switch vendor { + case Intel: + if maxFunctionID() < 4 { + return + } + for i := uint32(0); ; i++ { + eax, ebx, ecx, _ := cpuidex(4, i) + cacheType := eax & 15 + if cacheType == 0 { + break + } + cacheLevel := (eax >> 5) & 7 + coherency := int(ebx&0xfff) + 1 + partitions := int((ebx>>12)&0x3ff) + 1 + associativity := int((ebx>>22)&0x3ff) + 1 + sets := int(ecx) + 1 + size := associativity * partitions * coherency * sets + switch cacheLevel { + case 1: + if cacheType == 1 { + // 1 = Data Cache + c.Cache.L1D = size + } else if cacheType == 2 { + // 2 = Instruction Cache + c.Cache.L1I = size + } else { + if c.Cache.L1D < 0 { + c.Cache.L1I = size + } + if c.Cache.L1I < 0 { + c.Cache.L1I = size + } + } + case 2: + c.Cache.L2 = size + case 3: + c.Cache.L3 = size + } + } + case AMD, Hygon: + // Untested. + if maxExtendedFunction() < 0x80000005 { + return + } + _, _, ecx, edx := cpuid(0x80000005) + c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) + c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) + + if maxExtendedFunction() < 0x80000006 { + return + } + _, _, ecx, _ = cpuid(0x80000006) + c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) + + // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties + if maxExtendedFunction() < 0x8000001D { + return + } + for i := uint32(0); i < math.MaxUint32; i++ { + eax, ebx, ecx, _ := cpuidex(0x8000001D, i) + + level := (eax >> 5) & 7 + cacheNumSets := ecx + 1 + cacheLineSize := 1 + (ebx & 2047) + cachePhysPartitions := 1 + ((ebx >> 12) & 511) + cacheNumWays := 1 + ((ebx >> 22) & 511) + + typ := eax & 15 + size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) + if typ == 0 { + return + } + + switch level { + case 1: + switch typ { + case 1: + // Data cache + c.Cache.L1D = size + case 2: + // Inst cache + c.Cache.L1I = size + default: + if c.Cache.L1D < 0 { + c.Cache.L1I = size + } + if c.Cache.L1I < 0 { + c.Cache.L1I = size + } + } + case 2: + c.Cache.L2 = size + case 3: + c.Cache.L3 = size + } + } + } + + return +} + +type SGXEPCSection struct { + BaseAddress uint64 + EPCSize uint64 +} + +type SGXSupport struct { + Available bool + LaunchControl bool + SGX1Supported bool + SGX2Supported bool + MaxEnclaveSizeNot64 int64 + MaxEnclaveSize64 int64 + EPCSections []SGXEPCSection +} + +func hasSGX(available, lc bool) (rval SGXSupport) { + rval.Available = available + + if !available { + return + } + + rval.LaunchControl = lc + + a, _, _, d := cpuidex(0x12, 0) + rval.SGX1Supported = a&0x01 != 0 + rval.SGX2Supported = a&0x02 != 0 + rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 + rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 + rval.EPCSections = make([]SGXEPCSection, 0) + + for subleaf := uint32(2); subleaf < 2+8; subleaf++ { + eax, ebx, ecx, edx := cpuidex(0x12, subleaf) + leafType := eax & 0xf + + if leafType == 0 { + // Invalid subleaf, stop iterating + break + } else if leafType == 1 { + // EPC Section subleaf + baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) + size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) + + section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} + rval.EPCSections = append(rval.EPCSections, section) + } + } + + return +} + +func support() flagSet { + var fs flagSet + mfi := maxFunctionID() + vend, _ := vendorID() + if mfi < 0x1 { + return fs + } + family, model := familyModel() + + _, _, c, d := cpuid(1) + fs.setIf((d&(1<<15)) != 0, CMOV) + fs.setIf((d&(1<<23)) != 0, MMX) + fs.setIf((d&(1<<25)) != 0, MMXEXT) + fs.setIf((d&(1<<25)) != 0, SSE) + fs.setIf((d&(1<<26)) != 0, SSE2) + fs.setIf((c&1) != 0, SSE3) + fs.setIf((c&(1<<5)) != 0, VMX) + fs.setIf((c&0x00000200) != 0, SSSE3) + fs.setIf((c&0x00080000) != 0, SSE4) + fs.setIf((c&0x00100000) != 0, SSE42) + fs.setIf((c&(1<<25)) != 0, AESNI) + fs.setIf((c&(1<<1)) != 0, CLMUL) + fs.setIf(c&(1<<23) != 0, POPCNT) + fs.setIf(c&(1<<30) != 0, RDRAND) + + // This bit has been reserved by Intel & AMD for use by hypervisors, + // and indicates the presence of a hypervisor. + fs.setIf(c&(1<<31) != 0, HYPERVISOR) + fs.setIf(c&(1<<29) != 0, F16C) + fs.setIf(c&(1<<13) != 0, CX16) + + if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { + fs.setIf(threadsPerCore() > 1, HTT) + } + if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { + fs.setIf(threadsPerCore() > 1, HTT) + } + // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits + const avxCheck = 1<<26 | 1<<27 | 1<<28 + if c&avxCheck == avxCheck { + // Check for OS support + eax, _ := xgetbv(0) + if (eax & 0x6) == 0x6 { + fs.set(AVX) + switch vend { + case Intel: + // Older than Haswell. + fs.setIf(family == 6 && model < 60, AVXSLOW) + case AMD: + // Older than Zen 2 + fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW) + } + } + } + // FMA3 can be used with SSE registers, so no OS support is strictly needed. + // fma3 and OSXSAVE needed. + const fma3Check = 1<<12 | 1<<27 + fs.setIf(c&fma3Check == fma3Check, FMA3) + + // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. + if mfi >= 7 { + _, ebx, ecx, edx := cpuidex(7, 0) + eax1, _, _, _ := cpuidex(7, 1) + if fs.inSet(AVX) && (ebx&0x00000020) != 0 { + fs.set(AVX2) + } + // CPUID.(EAX=7, ECX=0).EBX + if (ebx & 0x00000008) != 0 { + fs.set(BMI1) + fs.setIf((ebx&0x00000100) != 0, BMI2) + } + fs.setIf(ebx&(1<<2) != 0, SGX) + fs.setIf(ebx&(1<<4) != 0, HLE) + fs.setIf(ebx&(1<<9) != 0, ERMS) + fs.setIf(ebx&(1<<11) != 0, RTM) + fs.setIf(ebx&(1<<14) != 0, MPX) + fs.setIf(ebx&(1<<18) != 0, RDSEED) + fs.setIf(ebx&(1<<19) != 0, ADX) + fs.setIf(ebx&(1<<29) != 0, SHA) + // CPUID.(EAX=7, ECX=0).ECX + fs.setIf(ecx&(1<<5) != 0, WAITPKG) + fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) + fs.setIf(ecx&(1<<27) != 0, MOVDIRI) + fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) + fs.setIf(ecx&(1<<29) != 0, ENQCMD) + fs.setIf(ecx&(1<<30) != 0, SGXLC) + // CPUID.(EAX=7, ECX=0).EDX + fs.setIf(edx&(1<<14) != 0, SERIALIZE) + fs.setIf(edx&(1<<16) != 0, TSXLDTRK) + fs.setIf(edx&(1<<26) != 0, IBPB) + fs.setIf(edx&(1<<27) != 0, STIBP) + + // Only detect AVX-512 features if XGETBV is supported + if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { + // Check for OS support + eax, _ := xgetbv(0) + + // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and + // ZMM16-ZMM31 state are enabled by OS) + /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). + if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 { + fs.setIf(ebx&(1<<16) != 0, AVX512F) + fs.setIf(ebx&(1<<17) != 0, AVX512DQ) + fs.setIf(ebx&(1<<21) != 0, AVX512IFMA) + fs.setIf(ebx&(1<<26) != 0, AVX512PF) + fs.setIf(ebx&(1<<27) != 0, AVX512ER) + fs.setIf(ebx&(1<<28) != 0, AVX512CD) + fs.setIf(ebx&(1<<30) != 0, AVX512BW) + fs.setIf(ebx&(1<<31) != 0, AVX512VL) + // ecx + fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) + fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) + fs.setIf(ecx&(1<<8) != 0, GFNI) + fs.setIf(ecx&(1<<9) != 0, VAES) + fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) + fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) + fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) + fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ) + // edx + fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT) + fs.setIf(edx&(1<<22) != 0, AMXBF16) + fs.setIf(edx&(1<<24) != 0, AMXTILE) + fs.setIf(edx&(1<<25) != 0, AMXINT8) + // eax1 = CPUID.(EAX=7, ECX=1).EAX + fs.setIf(eax1&(1<<5) != 0, AVX512BF16) + } + } + } + + if maxExtendedFunction() >= 0x80000001 { + _, _, c, d := cpuid(0x80000001) + if (c & (1 << 5)) != 0 { + fs.set(LZCNT) + fs.set(POPCNT) + } + fs.setIf((c&(1<<10)) != 0, IBS) + fs.setIf((d&(1<<31)) != 0, AMD3DNOW) + fs.setIf((d&(1<<30)) != 0, AMD3DNOWEXT) + fs.setIf((d&(1<<23)) != 0, MMX) + fs.setIf((d&(1<<22)) != 0, MMXEXT) + fs.setIf((c&(1<<6)) != 0, SSE4A) + fs.setIf(d&(1<<20) != 0, NX) + fs.setIf(d&(1<<27) != 0, RDTSCP) + + /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be + * used unless the OS has AVX support. */ + if fs.inSet(AVX) { + fs.setIf((c&0x00000800) != 0, XOP) + fs.setIf((c&0x00010000) != 0, FMA4) + } + + } + if maxExtendedFunction() >= 0x80000008 { + _, b, _, _ := cpuid(0x80000008) + fs.setIf((b&(1<<9)) != 0, WBNOINVD) + } + + if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { + eax, _, _, _ := cpuid(0x8000001b) + fs.setIf((eax>>0)&1 == 1, IBSFFV) + fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM) + fs.setIf((eax>>2)&1 == 1, IBSOPSAM) + fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT) + fs.setIf((eax>>4)&1 == 1, IBSOPCNT) + fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) + fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) + fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) + } + + return fs +} + +func valAsString(values ...uint32) []byte { + r := make([]byte, 4*len(values)) + for i, v := range values { + dst := r[i*4:] + dst[0] = byte(v & 0xff) + dst[1] = byte((v >> 8) & 0xff) + dst[2] = byte((v >> 16) & 0xff) + dst[3] = byte((v >> 24) & 0xff) + switch { + case dst[0] == 0: + return r[:i*4] + case dst[1] == 0: + return r[:i*4+1] + case dst[2] == 0: + return r[:i*4+2] + case dst[3] == 0: + return r[:i*4+3] + } + } + return r +} diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s new file mode 100644 index 0000000000..089638f51a --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s @@ -0,0 +1,42 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build 386,!gccgo,!noasm,!appengine + +// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuid(SB), 7, $0 + XORL CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+4(FP) + MOVL BX, ebx+8(FP) + MOVL CX, ecx+12(FP) + MOVL DX, edx+16(FP) + RET + +// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuidex(SB), 7, $0 + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func xgetbv(index uint32) (eax, edx uint32) +TEXT ·asmXgetbv(SB), 7, $0 + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+4(FP) + MOVL DX, edx+8(FP) + RET + +// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) +TEXT ·asmRdtscpAsm(SB), 7, $0 + BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP + MOVL AX, eax+0(FP) + MOVL BX, ebx+4(FP) + MOVL CX, ecx+8(FP) + MOVL DX, edx+12(FP) + RET diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s new file mode 100644 index 0000000000..3ba0559e93 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s @@ -0,0 +1,42 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build amd64,!gccgo,!noasm,!appengine + +// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuid(SB), 7, $0 + XORQ CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuidex(SB), 7, $0 + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func asmXgetbv(index uint32) (eax, edx uint32) +TEXT ·asmXgetbv(SB), 7, $0 + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+8(FP) + MOVL DX, edx+12(FP) + RET + +// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) +TEXT ·asmRdtscpAsm(SB), 7, $0 + BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP + MOVL AX, eax+0(FP) + MOVL BX, ebx+4(FP) + MOVL CX, ecx+8(FP) + MOVL DX, edx+12(FP) + RET diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s new file mode 100644 index 0000000000..b31d6aec43 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s @@ -0,0 +1,26 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build arm64,!gccgo,!noasm,!appengine + +// See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt + +// func getMidr +TEXT ·getMidr(SB), 7, $0 + WORD $0xd5380000 // mrs x0, midr_el1 /* Main ID Register */ + MOVD R0, midr+0(FP) + RET + +// func getProcFeatures +TEXT ·getProcFeatures(SB), 7, $0 + WORD $0xd5380400 // mrs x0, id_aa64pfr0_el1 /* Processor Feature Register 0 */ + MOVD R0, procFeatures+0(FP) + RET + +// func getInstAttributes +TEXT ·getInstAttributes(SB), 7, $0 + WORD $0xd5380600 // mrs x0, id_aa64isar0_el1 /* Instruction Set Attribute Register 0 */ + WORD $0xd5380621 // mrs x1, id_aa64isar1_el1 /* Instruction Set Attribute Register 1 */ + MOVD R0, instAttrReg0+0(FP) + MOVD R1, instAttrReg1+8(FP) + RET + diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go new file mode 100644 index 0000000000..9bf9f77f37 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go @@ -0,0 +1,246 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build arm64,!gccgo,!noasm,!appengine + +package cpuid + +import "runtime" + +func getMidr() (midr uint64) +func getProcFeatures() (procFeatures uint64) +func getInstAttributes() (instAttrReg0, instAttrReg1 uint64) + +func initCPU() { + cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } + cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } + xgetbv = func(uint32) (a, b uint32) { return 0, 0 } + rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 } +} + +func addInfo(c *CPUInfo, safe bool) { + // Seems to be safe to assume on ARM64 + c.CacheLine = 64 + detectOS(c) + + // ARM64 disabled since it may crash if interrupt is not intercepted by OS. + if safe && !c.Supports(ARMCPUID) && runtime.GOOS != "freebsd" { + return + } + midr := getMidr() + + // MIDR_EL1 - Main ID Register + // https://developer.arm.com/docs/ddi0595/h/aarch64-system-registers/midr_el1 + // x--------------------------------------------------x + // | Name | bits | visible | + // |--------------------------------------------------| + // | Implementer | [31-24] | y | + // |--------------------------------------------------| + // | Variant | [23-20] | y | + // |--------------------------------------------------| + // | Architecture | [19-16] | y | + // |--------------------------------------------------| + // | PartNum | [15-4] | y | + // |--------------------------------------------------| + // | Revision | [3-0] | y | + // x--------------------------------------------------x + + switch (midr >> 24) & 0xff { + case 0xC0: + c.VendorString = "Ampere Computing" + c.VendorID = Ampere + case 0x41: + c.VendorString = "Arm Limited" + c.VendorID = ARM + case 0x42: + c.VendorString = "Broadcom Corporation" + c.VendorID = Broadcom + case 0x43: + c.VendorString = "Cavium Inc" + c.VendorID = Cavium + case 0x44: + c.VendorString = "Digital Equipment Corporation" + c.VendorID = DEC + case 0x46: + c.VendorString = "Fujitsu Ltd" + c.VendorID = Fujitsu + case 0x49: + c.VendorString = "Infineon Technologies AG" + c.VendorID = Infineon + case 0x4D: + c.VendorString = "Motorola or Freescale Semiconductor Inc" + c.VendorID = Motorola + case 0x4E: + c.VendorString = "NVIDIA Corporation" + c.VendorID = NVIDIA + case 0x50: + c.VendorString = "Applied Micro Circuits Corporation" + c.VendorID = AMCC + case 0x51: + c.VendorString = "Qualcomm Inc" + c.VendorID = Qualcomm + case 0x56: + c.VendorString = "Marvell International Ltd" + c.VendorID = Marvell + case 0x69: + c.VendorString = "Intel Corporation" + c.VendorID = Intel + } + + // Lower 4 bits: Architecture + // Architecture Meaning + // 0b0001 Armv4. + // 0b0010 Armv4T. + // 0b0011 Armv5 (obsolete). + // 0b0100 Armv5T. + // 0b0101 Armv5TE. + // 0b0110 Armv5TEJ. + // 0b0111 Armv6. + // 0b1111 Architectural features are individually identified in the ID_* registers, see 'ID registers'. + // Upper 4 bit: Variant + // An IMPLEMENTATION DEFINED variant number. + // Typically, this field is used to distinguish between different product variants, or major revisions of a product. + c.Family = int(midr>>16) & 0xff + + // PartNum, bits [15:4] + // An IMPLEMENTATION DEFINED primary part number for the device. + // On processors implemented by Arm, if the top four bits of the primary + // part number are 0x0 or 0x7, the variant and architecture are encoded differently. + // Revision, bits [3:0] + // An IMPLEMENTATION DEFINED revision number for the device. + c.Model = int(midr) & 0xffff + + procFeatures := getProcFeatures() + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + // x--------------------------------------------------x + // | Name | bits | visible | + // |--------------------------------------------------| + // | DIT | [51-48] | y | + // |--------------------------------------------------| + // | SVE | [35-32] | y | + // |--------------------------------------------------| + // | GIC | [27-24] | n | + // |--------------------------------------------------| + // | AdvSIMD | [23-20] | y | + // |--------------------------------------------------| + // | FP | [19-16] | y | + // |--------------------------------------------------| + // | EL3 | [15-12] | n | + // |--------------------------------------------------| + // | EL2 | [11-8] | n | + // |--------------------------------------------------| + // | EL1 | [7-4] | n | + // |--------------------------------------------------| + // | EL0 | [3-0] | n | + // x--------------------------------------------------x + + var f flagSet + // if procFeatures&(0xf<<48) != 0 { + // fmt.Println("DIT") + // } + f.setIf(procFeatures&(0xf<<32) != 0, SVE) + if procFeatures&(0xf<<20) != 15<<20 { + f.set(ASIMD) + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1 + // 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic. + f.setIf(procFeatures&(0xf<<20) == 1<<20, FPHP, ASIMDHP) + } + f.setIf(procFeatures&(0xf<<16) != 0, FP) + + instAttrReg0, instAttrReg1 := getInstAttributes() + + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 + // + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + // x--------------------------------------------------x + // | Name | bits | visible | + // |--------------------------------------------------| + // | TS | [55-52] | y | + // |--------------------------------------------------| + // | FHM | [51-48] | y | + // |--------------------------------------------------| + // | DP | [47-44] | y | + // |--------------------------------------------------| + // | SM4 | [43-40] | y | + // |--------------------------------------------------| + // | SM3 | [39-36] | y | + // |--------------------------------------------------| + // | SHA3 | [35-32] | y | + // |--------------------------------------------------| + // | RDM | [31-28] | y | + // |--------------------------------------------------| + // | ATOMICS | [23-20] | y | + // |--------------------------------------------------| + // | CRC32 | [19-16] | y | + // |--------------------------------------------------| + // | SHA2 | [15-12] | y | + // |--------------------------------------------------| + // | SHA1 | [11-8] | y | + // |--------------------------------------------------| + // | AES | [7-4] | y | + // x--------------------------------------------------x + + // if instAttrReg0&(0xf<<52) != 0 { + // fmt.Println("TS") + // } + // if instAttrReg0&(0xf<<48) != 0 { + // fmt.Println("FHM") + // } + f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP) + f.setIf(instAttrReg0&(0xf<<40) != 0, SM4) + f.setIf(instAttrReg0&(0xf<<36) != 0, SM3) + f.setIf(instAttrReg0&(0xf<<32) != 0, SHA3) + f.setIf(instAttrReg0&(0xf<<28) != 0, ASIMDRDM) + f.setIf(instAttrReg0&(0xf<<20) != 0, ATOMICS) + f.setIf(instAttrReg0&(0xf<<16) != 0, CRC32) + f.setIf(instAttrReg0&(0xf<<12) != 0, SHA2) + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 + // 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented. + f.setIf(instAttrReg0&(0xf<<12) == 2<<12, SHA512) + f.setIf(instAttrReg0&(0xf<<8) != 0, SHA1) + f.setIf(instAttrReg0&(0xf<<4) != 0, AESARM) + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 + // 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities. + f.setIf(instAttrReg0&(0xf<<4) == 2<<4, PMULL) + + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1 + // + // ID_AA64ISAR1_EL1 - Instruction set attribute register 1 + // x--------------------------------------------------x + // | Name | bits | visible | + // |--------------------------------------------------| + // | GPI | [31-28] | y | + // |--------------------------------------------------| + // | GPA | [27-24] | y | + // |--------------------------------------------------| + // | LRCPC | [23-20] | y | + // |--------------------------------------------------| + // | FCMA | [19-16] | y | + // |--------------------------------------------------| + // | JSCVT | [15-12] | y | + // |--------------------------------------------------| + // | API | [11-8] | y | + // |--------------------------------------------------| + // | APA | [7-4] | y | + // |--------------------------------------------------| + // | DPB | [3-0] | y | + // x--------------------------------------------------x + + // if instAttrReg1&(0xf<<28) != 0 { + // fmt.Println("GPI") + // } + f.setIf(instAttrReg1&(0xf<<28) != 24, GPA) + f.setIf(instAttrReg1&(0xf<<20) != 0, LRCPC) + f.setIf(instAttrReg1&(0xf<<16) != 0, FCMA) + f.setIf(instAttrReg1&(0xf<<12) != 0, JSCVT) + // if instAttrReg1&(0xf<<8) != 0 { + // fmt.Println("API") + // } + // if instAttrReg1&(0xf<<4) != 0 { + // fmt.Println("APA") + // } + f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP) + + // Store + c.featureSet.or(f) +} diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_ref.go b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go new file mode 100644 index 0000000000..e9c8606ab9 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go @@ -0,0 +1,14 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build !amd64,!386,!arm64 gccgo noasm appengine + +package cpuid + +func initCPU() { + cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } + cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } + xgetbv = func(uint32) (a, b uint32) { return 0, 0 } + rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 } +} + +func addInfo(info *CPUInfo, safe bool) {} diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go new file mode 100644 index 0000000000..381940ebdf --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go @@ -0,0 +1,33 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine + +package cpuid + +func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) +func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +func asmXgetbv(index uint32) (eax, edx uint32) +func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) + +func initCPU() { + cpuid = asmCpuid + cpuidex = asmCpuidex + xgetbv = asmXgetbv + rdtscpAsm = asmRdtscpAsm +} + +func addInfo(c *CPUInfo, safe bool) { + c.maxFunc = maxFunctionID() + c.maxExFunc = maxExtendedFunction() + c.BrandName = brandName() + c.CacheLine = cacheLine() + c.Family, c.Model = familyModel() + c.featureSet = support() + c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC)) + c.ThreadsPerCore = threadsPerCore() + c.LogicalCores = logicalCores() + c.PhysicalCores = physicalCores() + c.VendorID, c.VendorString = vendorID() + c.Hz = hertz(c.BrandName) + c.cacheSize() +} diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go new file mode 100644 index 0000000000..0e764f9027 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go @@ -0,0 +1,173 @@ +// Code generated by "stringer -type=FeatureID,Vendor"; DO NOT EDIT. + +package cpuid + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[ADX-1] + _ = x[AESNI-2] + _ = x[AMD3DNOW-3] + _ = x[AMD3DNOWEXT-4] + _ = x[AMXBF16-5] + _ = x[AMXINT8-6] + _ = x[AMXTILE-7] + _ = x[AVX-8] + _ = x[AVX2-9] + _ = x[AVX512BF16-10] + _ = x[AVX512BITALG-11] + _ = x[AVX512BW-12] + _ = x[AVX512CD-13] + _ = x[AVX512DQ-14] + _ = x[AVX512ER-15] + _ = x[AVX512F-16] + _ = x[AVX512IFMA-17] + _ = x[AVX512PF-18] + _ = x[AVX512VBMI-19] + _ = x[AVX512VBMI2-20] + _ = x[AVX512VL-21] + _ = x[AVX512VNNI-22] + _ = x[AVX512VP2INTERSECT-23] + _ = x[AVX512VPOPCNTDQ-24] + _ = x[AVXSLOW-25] + _ = x[BMI1-26] + _ = x[BMI2-27] + _ = x[CLDEMOTE-28] + _ = x[CLMUL-29] + _ = x[CMOV-30] + _ = x[CX16-31] + _ = x[ENQCMD-32] + _ = x[ERMS-33] + _ = x[F16C-34] + _ = x[FMA3-35] + _ = x[FMA4-36] + _ = x[GFNI-37] + _ = x[HLE-38] + _ = x[HTT-39] + _ = x[HYPERVISOR-40] + _ = x[IBPB-41] + _ = x[IBS-42] + _ = x[IBSBRNTRGT-43] + _ = x[IBSFETCHSAM-44] + _ = x[IBSFFV-45] + _ = x[IBSOPCNT-46] + _ = x[IBSOPCNTEXT-47] + _ = x[IBSOPSAM-48] + _ = x[IBSRDWROPCNT-49] + _ = x[IBSRIPINVALIDCHK-50] + _ = x[LZCNT-51] + _ = x[MMX-52] + _ = x[MMXEXT-53] + _ = x[MOVDIR64B-54] + _ = x[MOVDIRI-55] + _ = x[MPX-56] + _ = x[NX-57] + _ = x[POPCNT-58] + _ = x[RDRAND-59] + _ = x[RDSEED-60] + _ = x[RDTSCP-61] + _ = x[RTM-62] + _ = x[SERIALIZE-63] + _ = x[SGX-64] + _ = x[SGXLC-65] + _ = x[SHA-66] + _ = x[SSE-67] + _ = x[SSE2-68] + _ = x[SSE3-69] + _ = x[SSE4-70] + _ = x[SSE42-71] + _ = x[SSE4A-72] + _ = x[SSSE3-73] + _ = x[STIBP-74] + _ = x[TBM-75] + _ = x[TSXLDTRK-76] + _ = x[VAES-77] + _ = x[VMX-78] + _ = x[VPCLMULQDQ-79] + _ = x[WAITPKG-80] + _ = x[WBNOINVD-81] + _ = x[XOP-82] + _ = x[AESARM-83] + _ = x[ARMCPUID-84] + _ = x[ASIMD-85] + _ = x[ASIMDDP-86] + _ = x[ASIMDHP-87] + _ = x[ASIMDRDM-88] + _ = x[ATOMICS-89] + _ = x[CRC32-90] + _ = x[DCPOP-91] + _ = x[EVTSTRM-92] + _ = x[FCMA-93] + _ = x[FP-94] + _ = x[FPHP-95] + _ = x[GPA-96] + _ = x[JSCVT-97] + _ = x[LRCPC-98] + _ = x[PMULL-99] + _ = x[SHA1-100] + _ = x[SHA2-101] + _ = x[SHA3-102] + _ = x[SHA512-103] + _ = x[SM3-104] + _ = x[SM4-105] + _ = x[SVE-106] + _ = x[lastID-107] + _ = x[firstID-0] +} + +const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXSLOWBMI1BMI2CLDEMOTECLMULCMOVCX16ENQCMDERMSF16CFMA3FMA4GFNIHLEHTTHYPERVISORIBPBIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKLZCNTMMXMMXEXTMOVDIR64BMOVDIRIMPXNXPOPCNTRDRANDRDSEEDRDTSCPRTMSERIALIZESGXSGXLCSHASSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPTBMTSXLDTRKVAESVMXVPCLMULQDQWAITPKGWBNOINVDXOPAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" + +var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 58, 62, 72, 84, 92, 100, 108, 116, 123, 133, 141, 151, 162, 170, 180, 198, 213, 220, 224, 228, 236, 241, 245, 249, 255, 259, 263, 267, 271, 275, 278, 281, 291, 295, 298, 308, 319, 325, 333, 344, 352, 364, 380, 385, 388, 394, 403, 410, 413, 415, 421, 427, 433, 439, 442, 451, 454, 459, 462, 465, 469, 473, 477, 482, 487, 492, 497, 500, 508, 512, 515, 525, 532, 540, 543, 549, 557, 562, 569, 576, 584, 591, 596, 601, 608, 612, 614, 618, 621, 626, 631, 636, 640, 644, 648, 654, 657, 660, 663, 669} + +func (i FeatureID) String() string { + if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) { + return "FeatureID(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _FeatureID_name[_FeatureID_index[i]:_FeatureID_index[i+1]] +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[VendorUnknown-0] + _ = x[Intel-1] + _ = x[AMD-2] + _ = x[VIA-3] + _ = x[Transmeta-4] + _ = x[NSC-5] + _ = x[KVM-6] + _ = x[MSVM-7] + _ = x[VMware-8] + _ = x[XenHVM-9] + _ = x[Bhyve-10] + _ = x[Hygon-11] + _ = x[SiS-12] + _ = x[RDC-13] + _ = x[Ampere-14] + _ = x[ARM-15] + _ = x[Broadcom-16] + _ = x[Cavium-17] + _ = x[DEC-18] + _ = x[Fujitsu-19] + _ = x[Infineon-20] + _ = x[Motorola-21] + _ = x[NVIDIA-22] + _ = x[AMCC-23] + _ = x[Qualcomm-24] + _ = x[Marvell-25] + _ = x[lastVendor-26] +} + +const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvelllastVendor" + +var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 155} + +func (i Vendor) String() string { + if i < 0 || i >= Vendor(len(_Vendor_index)-1) { + return "Vendor(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _Vendor_name[_Vendor_index[i]:_Vendor_index[i+1]] +} diff --git a/vendor/github.com/klauspost/cpuid/v2/go.mod b/vendor/github.com/klauspost/cpuid/v2/go.mod new file mode 100644 index 0000000000..2afac8eb28 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/go.mod @@ -0,0 +1,3 @@ +module github.com/klauspost/cpuid/v2 + +go 1.13 diff --git a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go new file mode 100644 index 0000000000..82d272fab3 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go @@ -0,0 +1,15 @@ +// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file. + +package cpuid + +import "runtime" + +func detectOS(c *CPUInfo) bool { + // There are no hw.optional sysctl values for the below features on Mac OS 11.0 + // to detect their supported state dynamically. Assume the CPU features that + // Apple Silicon M1 supports to be available as a minimal set of features + // to all Go programs running on darwin/arm64. + // TODO: Add more if we know them. + c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2) + return true +} diff --git a/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go new file mode 100644 index 0000000000..a01afad81c --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go @@ -0,0 +1,161 @@ +// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file. + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file located +// here https://github.com/golang/sys/blob/master/LICENSE + +package cpuid + +import ( + "encoding/binary" + "io/ioutil" + "runtime" + "unsafe" +) + +// HWCAP bits. +const ( + hwcap_FP = 1 << 0 + hwcap_ASIMD = 1 << 1 + hwcap_EVTSTRM = 1 << 2 + hwcap_AES = 1 << 3 + hwcap_PMULL = 1 << 4 + hwcap_SHA1 = 1 << 5 + hwcap_SHA2 = 1 << 6 + hwcap_CRC32 = 1 << 7 + hwcap_ATOMICS = 1 << 8 + hwcap_FPHP = 1 << 9 + hwcap_ASIMDHP = 1 << 10 + hwcap_CPUID = 1 << 11 + hwcap_ASIMDRDM = 1 << 12 + hwcap_JSCVT = 1 << 13 + hwcap_FCMA = 1 << 14 + hwcap_LRCPC = 1 << 15 + hwcap_DCPOP = 1 << 16 + hwcap_SHA3 = 1 << 17 + hwcap_SM3 = 1 << 18 + hwcap_SM4 = 1 << 19 + hwcap_ASIMDDP = 1 << 20 + hwcap_SHA512 = 1 << 21 + hwcap_SVE = 1 << 22 + hwcap_ASIMDFHM = 1 << 23 +) + +//go:linkname hwcap internal/cpu.HWCap +var hwcap uint + +func detectOS(c *CPUInfo) bool { + // For now assuming no hyperthreading is reasonable. + c.LogicalCores = int(getproccount()) + c.PhysicalCores = c.LogicalCores + c.ThreadsPerCore = 1 + if hwcap == 0 { + // We did not get values from the runtime. + // Try reading /proc/self/auxv + + // From https://github.com/golang/sys + const ( + _AT_HWCAP = 16 + _AT_HWCAP2 = 26 + + uintSize = int(32 << (^uint(0) >> 63)) + ) + + buf, err := ioutil.ReadFile("/proc/self/auxv") + if err != nil { + // e.g. on android /proc/self/auxv is not accessible, so silently + // ignore the error and leave Initialized = false. On some + // architectures (e.g. arm64) doinit() implements a fallback + // readout and will set Initialized = true again. + return false + } + bo := binary.LittleEndian + for len(buf) >= 2*(uintSize/8) { + var tag, val uint + switch uintSize { + case 32: + tag = uint(bo.Uint32(buf[0:])) + val = uint(bo.Uint32(buf[4:])) + buf = buf[8:] + case 64: + tag = uint(bo.Uint64(buf[0:])) + val = uint(bo.Uint64(buf[8:])) + buf = buf[16:] + } + switch tag { + case _AT_HWCAP: + hwcap = val + case _AT_HWCAP2: + // Not used + } + } + if hwcap == 0 { + return false + } + } + + // HWCap was populated by the runtime from the auxiliary vector. + // Use HWCap information since reading aarch64 system registers + // is not supported in user space on older linux kernels. + c.featureSet.setIf(isSet(hwcap, hwcap_AES), AESARM) + c.featureSet.setIf(isSet(hwcap, hwcap_ASIMD), ASIMD) + c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDDP), ASIMDDP) + c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDHP), ASIMDHP) + c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDRDM), ASIMDRDM) + c.featureSet.setIf(isSet(hwcap, hwcap_CPUID), ARMCPUID) + c.featureSet.setIf(isSet(hwcap, hwcap_CRC32), CRC32) + c.featureSet.setIf(isSet(hwcap, hwcap_DCPOP), DCPOP) + c.featureSet.setIf(isSet(hwcap, hwcap_EVTSTRM), EVTSTRM) + c.featureSet.setIf(isSet(hwcap, hwcap_FCMA), FCMA) + c.featureSet.setIf(isSet(hwcap, hwcap_FP), FP) + c.featureSet.setIf(isSet(hwcap, hwcap_FPHP), FPHP) + c.featureSet.setIf(isSet(hwcap, hwcap_JSCVT), JSCVT) + c.featureSet.setIf(isSet(hwcap, hwcap_LRCPC), LRCPC) + c.featureSet.setIf(isSet(hwcap, hwcap_PMULL), PMULL) + c.featureSet.setIf(isSet(hwcap, hwcap_SHA1), SHA1) + c.featureSet.setIf(isSet(hwcap, hwcap_SHA2), SHA2) + c.featureSet.setIf(isSet(hwcap, hwcap_SHA3), SHA3) + c.featureSet.setIf(isSet(hwcap, hwcap_SHA512), SHA512) + c.featureSet.setIf(isSet(hwcap, hwcap_SM3), SM3) + c.featureSet.setIf(isSet(hwcap, hwcap_SM4), SM4) + c.featureSet.setIf(isSet(hwcap, hwcap_SVE), SVE) + + // The Samsung S9+ kernel reports support for atomics, but not all cores + // actually support them, resulting in SIGILL. See issue #28431. + // TODO(elias.naur): Only disable the optimization on bad chipsets on android. + c.featureSet.setIf(isSet(hwcap, hwcap_ATOMICS) && runtime.GOOS != "android", ATOMICS) + + return true +} + +func isSet(hwc uint, value uint) bool { + return hwc&value != 0 +} + +//go:noescape +//go:linkname sched_getaffinity runtime.sched_getaffinity +func sched_getaffinity(pid, len uintptr, buf *byte) int32 + +func getproccount() int32 { + // This buffer is huge (8 kB) but we are on the system stack + // and there should be plenty of space (64 kB). + // Also this is a leaf, so we're not holding up the memory for long. + const maxCPUs = 64 * 1024 + var buf [maxCPUs / 8]byte + r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0]) + if r < 0 { + return 0 + } + n := int32(0) + for _, v := range buf[:r] { + for v != 0 { + n += int32(v & 1) + v >>= 1 + } + } + if n == 0 { + n = 1 + } + return n +} diff --git a/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go new file mode 100644 index 0000000000..df0ad06b38 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go @@ -0,0 +1,11 @@ +// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file. + +// +build arm64 +// +build !linux +// +build !darwin + +package cpuid + +func detectOS(c *CPUInfo) bool { + return false +} diff --git a/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh b/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh new file mode 100644 index 0000000000..50150eaabe --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e + +go tool dist list | while IFS=/ read os arch; do + echo "Checking $os/$arch..." + echo " normal" + GOARCH=$arch GOOS=$os go build -o /dev/null ./... + echo " noasm" + GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null ./... + echo " appengine" + GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null ./... + echo " noasm,appengine" + GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null ./... +done |