diff options
Diffstat (limited to 'vendor/github.com/klauspost/cpuid')
-rw-r--r-- | vendor/github.com/klauspost/cpuid/.gitignore | 24 | ||||
-rw-r--r-- | vendor/github.com/klauspost/cpuid/.travis.yml | 46 | ||||
-rw-r--r-- | vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt | 35 | ||||
-rw-r--r-- | vendor/github.com/klauspost/cpuid/LICENSE | 22 | ||||
-rw-r--r-- | vendor/github.com/klauspost/cpuid/README.md | 191 | ||||
-rw-r--r-- | vendor/github.com/klauspost/cpuid/cpuid.go | 1504 | ||||
-rw-r--r-- | vendor/github.com/klauspost/cpuid/cpuid_386.s | 42 | ||||
-rw-r--r-- | vendor/github.com/klauspost/cpuid/cpuid_amd64.s | 42 | ||||
-rw-r--r-- | vendor/github.com/klauspost/cpuid/cpuid_arm64.s | 26 | ||||
-rw-r--r-- | vendor/github.com/klauspost/cpuid/detect_arm64.go | 219 | ||||
-rw-r--r-- | vendor/github.com/klauspost/cpuid/detect_intel.go | 33 | ||||
-rw-r--r-- | vendor/github.com/klauspost/cpuid/detect_ref.go | 14 | ||||
-rw-r--r-- | vendor/github.com/klauspost/cpuid/go.mod | 3 |
13 files changed, 2201 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/cpuid/.gitignore b/vendor/github.com/klauspost/cpuid/.gitignore new file mode 100644 index 0000000000..daf913b1b3 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/.gitignore @@ -0,0 +1,24 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof diff --git a/vendor/github.com/klauspost/cpuid/.travis.yml b/vendor/github.com/klauspost/cpuid/.travis.yml new file mode 100644 index 0000000000..77d975fe28 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/.travis.yml @@ -0,0 +1,46 @@ +language: go + +os: + - linux + - osx + - windows + +arch: + - amd64 + - arm64 + +go: + - 1.12.x + - 1.13.x + - 1.14.x + - master + +script: + - go vet ./... + - go test -race ./... + - go test -tags=noasm ./... + +stages: + - gofmt + - test + +matrix: + allow_failures: + - go: 'master' + fast_finish: true + include: + - stage: gofmt + go: 1.14.x + os: linux + arch: amd64 + script: + - diff <(gofmt -d .) <(printf "") + - diff <(gofmt -d ./private) <(printf "") + - go install github.com/klauspost/asmfmt/cmd/asmfmt + - diff <(asmfmt -d .) <(printf "") + - stage: i386 + go: 1.14.x + os: linux + arch: amd64 + script: + - GOOS=linux GOARCH=386 go test . diff --git a/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt b/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt new file mode 100644 index 0000000000..2ef4714f71 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt @@ -0,0 +1,35 @@ +Developer Certificate of Origin
+Version 1.1
+
+Copyright (C) 2015- Klaus Post & Contributors.
+Email: klauspost@gmail.com
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+
+Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+(c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+(d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
diff --git a/vendor/github.com/klauspost/cpuid/LICENSE b/vendor/github.com/klauspost/cpuid/LICENSE new file mode 100644 index 0000000000..5cec7ee949 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Klaus Post + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/vendor/github.com/klauspost/cpuid/README.md b/vendor/github.com/klauspost/cpuid/README.md new file mode 100644 index 0000000000..38d4a8b936 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/README.md @@ -0,0 +1,191 @@ +# cpuid +Package cpuid provides information about the CPU running the current program. + +CPU features are detected on startup, and kept for fast access through the life of the application. +Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use. + +You can access the CPU information by accessing the shared CPU variable of the cpuid library. + +Package home: https://github.com/klauspost/cpuid + +[![GoDoc][1]][2] [![Build Status][3]][4] + +[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg +[2]: https://godoc.org/github.com/klauspost/cpuid +[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master +[4]: https://travis-ci.org/klauspost/cpuid + +# features + +## x86 CPU Instructions +* **CMOV** (i686 CMOV) +* **NX** (NX (No-Execute) bit) +* **AMD3DNOW** (AMD 3DNOW) +* **AMD3DNOWEXT** (AMD 3DNowExt) +* **MMX** (standard MMX) +* **MMXEXT** (SSE integer functions or AMD MMX ext) +* **SSE** (SSE functions) +* **SSE2** (P4 SSE functions) +* **SSE3** (Prescott SSE3 functions) +* **SSSE3** (Conroe SSSE3 functions) +* **SSE4** (Penryn SSE4.1 functions) +* **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions) +* **SSE42** (Nehalem SSE4.2 functions) +* **AVX** (AVX functions) +* **AVX2** (AVX2 functions) +* **FMA3** (Intel FMA 3) +* **FMA4** (Bulldozer FMA4 functions) +* **XOP** (Bulldozer XOP functions) +* **F16C** (Half-precision floating-point conversion) +* **BMI1** (Bit Manipulation Instruction Set 1) +* **BMI2** (Bit Manipulation Instruction Set 2) +* **TBM** (AMD Trailing Bit Manipulation) +* **LZCNT** (LZCNT instruction) +* **POPCNT** (POPCNT instruction) +* **AESNI** (Advanced Encryption Standard New Instructions) +* **CLMUL** (Carry-less Multiplication) +* **HTT** (Hyperthreading (enabled)) +* **HLE** (Hardware Lock Elision) +* **RTM** (Restricted Transactional Memory) +* **RDRAND** (RDRAND instruction is available) +* **RDSEED** (RDSEED instruction is available) +* **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions)) +* **SHA** (Intel SHA Extensions) +* **AVX512F** (AVX-512 Foundation) +* **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions) +* **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions) +* **AVX512PF** (AVX-512 Prefetch Instructions) +* **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions) +* **AVX512CD** (AVX-512 Conflict Detection Instructions) +* **AVX512BW** (AVX-512 Byte and Word Instructions) +* **AVX512VL** (AVX-512 Vector Length Extensions) +* **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions) +* **AVX512VBMI2** (AVX-512 Vector Bit Manipulation Instructions, Version 2) +* **AVX512VNNI** (AVX-512 Vector Neural Network Instructions) +* **AVX512VPOPCNTDQ** (AVX-512 Vector Population Count Doubleword and Quadword) +* **GFNI** (Galois Field New Instructions) +* **VAES** (Vector AES) +* **AVX512BITALG** (AVX-512 Bit Algorithms) +* **VPCLMULQDQ** (Carry-Less Multiplication Quadword) +* **AVX512BF16** (AVX-512 BFLOAT16 Instructions) +* **AVX512VP2INTERSECT** (AVX-512 Intersect for D/Q) +* **MPX** (Intel MPX (Memory Protection Extensions)) +* **ERMS** (Enhanced REP MOVSB/STOSB) +* **RDTSCP** (RDTSCP Instruction) +* **CX16** (CMPXCHG16B Instruction) +* **SGX** (Software Guard Extensions, with activation details) +* **VMX** (Virtual Machine Extensions) + +## Performance +* **RDTSCP()** Returns current cycle count. Can be used for benchmarking. +* **SSE2SLOW** (SSE2 is supported, but usually not faster) +* **SSE3SLOW** (SSE3 is supported, but usually not faster) +* **ATOM** (Atom processor, some SSSE3 instructions are slower) +* **Cache line** (Probable size of a cache line). +* **L1, L2, L3 Cache size** on newer Intel/AMD CPUs. + +## ARM CPU features + +# ARM FEATURE DETECTION DISABLED! + +See [#52](https://github.com/klauspost/cpuid/issues/52). + +Currently only `arm64` platforms are implemented. + +* **FP** Single-precision and double-precision floating point +* **ASIMD** Advanced SIMD +* **EVTSTRM** Generic timer +* **AES** AES instructions +* **PMULL** Polynomial Multiply instructions (PMULL/PMULL2) +* **SHA1** SHA-1 instructions (SHA1C, etc) +* **SHA2** SHA-2 instructions (SHA256H, etc) +* **CRC32** CRC32/CRC32C instructions +* **ATOMICS** Large System Extensions (LSE) +* **FPHP** Half-precision floating point +* **ASIMDHP** Advanced SIMD half-precision floating point +* **ARMCPUID** Some CPU ID registers readable at user-level +* **ASIMDRDM** Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) +* **JSCVT** Javascript-style double->int convert (FJCVTZS) +* **FCMA** Floating point complex number addition and multiplication +* **LRCPC** Weaker release consistency (LDAPR, etc) +* **DCPOP** Data cache clean to Point of Persistence (DC CVAP) +* **SHA3** SHA-3 instructions (EOR3, RAXI, XAR, BCAX) +* **SM3** SM3 instructions +* **SM4** SM4 instructions +* **ASIMDDP** SIMD Dot Product +* **SHA512** SHA512 instructions +* **SVE** Scalable Vector Extension +* **GPA** Generic Pointer Authentication + +## Cpu Vendor/VM +* **Intel** +* **AMD** +* **VIA** +* **Transmeta** +* **NSC** +* **KVM** (Kernel-based Virtual Machine) +* **MSVM** (Microsoft Hyper-V or Windows Virtual PC) +* **VMware** +* **XenHVM** +* **Bhyve** +* **Hygon** + +# installing + +```go get github.com/klauspost/cpuid``` + +# example + +```Go +package main + +import ( + "fmt" + "github.com/klauspost/cpuid" +) + +func main() { + // Print basic CPU information: + fmt.Println("Name:", cpuid.CPU.BrandName) + fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores) + fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore) + fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores) + fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model) + fmt.Println("Features:", cpuid.CPU.Features) + fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine) + fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes") + fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes") + fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes") + fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes") + + // Test if we have a specific feature: + if cpuid.CPU.SSE() { + fmt.Println("We have Streaming SIMD Extensions") + } +} +``` + +Sample output: +``` +>go run main.go +Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz +PhysicalCores: 2 +ThreadsPerCore: 2 +LogicalCores: 4 +Family 6 Model: 42 +Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL +Cacheline bytes: 64 +We have Streaming SIMD Extensions +``` + +# private package + +In the "private" folder you can find an autogenerated version of the library you can include in your own packages. + +For this purpose all exports are removed, and functions and constants are lowercased. + +This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages. + +# license + +This code is published under an MIT license. See LICENSE file for more information. diff --git a/vendor/github.com/klauspost/cpuid/cpuid.go b/vendor/github.com/klauspost/cpuid/cpuid.go new file mode 100644 index 0000000000..208b3e79b1 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/cpuid.go @@ -0,0 +1,1504 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// Package cpuid provides information about the CPU running the current program. +// +// CPU features are detected on startup, and kept for fast access through the life of the application. +// Currently x86 / x64 (AMD64) as well as arm64 is supported. +// +// You can access the CPU information by accessing the shared CPU variable of the cpuid library. +// +// Package home: https://github.com/klauspost/cpuid +package cpuid + +import ( + "math" + "strings" +) + +// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf +// and Processor Programming Reference (PPR) + +// Vendor is a representation of a CPU vendor. +type Vendor int + +const ( + Other Vendor = iota + Intel + AMD + VIA + Transmeta + NSC + KVM // Kernel-based Virtual Machine + MSVM // Microsoft Hyper-V or Windows Virtual PC + VMware + XenHVM + Bhyve + Hygon + SiS + RDC +) + +const ( + CMOV = 1 << iota // i686 CMOV + NX // NX (No-Execute) bit + AMD3DNOW // AMD 3DNOW + AMD3DNOWEXT // AMD 3DNowExt + MMX // standard MMX + MMXEXT // SSE integer functions or AMD MMX ext + SSE // SSE functions + SSE2 // P4 SSE functions + SSE3 // Prescott SSE3 functions + SSSE3 // Conroe SSSE3 functions + SSE4 // Penryn SSE4.1 functions + SSE4A // AMD Barcelona microarchitecture SSE4a instructions + SSE42 // Nehalem SSE4.2 functions + AVX // AVX functions + AVX2 // AVX2 functions + FMA3 // Intel FMA 3 + FMA4 // Bulldozer FMA4 functions + XOP // Bulldozer XOP functions + F16C // Half-precision floating-point conversion + BMI1 // Bit Manipulation Instruction Set 1 + BMI2 // Bit Manipulation Instruction Set 2 + TBM // AMD Trailing Bit Manipulation + LZCNT // LZCNT instruction + POPCNT // POPCNT instruction + AESNI // Advanced Encryption Standard New Instructions + CLMUL // Carry-less Multiplication + HTT // Hyperthreading (enabled) + HLE // Hardware Lock Elision + RTM // Restricted Transactional Memory + RDRAND // RDRAND instruction is available + RDSEED // RDSEED instruction is available + ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + SHA // Intel SHA Extensions + AVX512F // AVX-512 Foundation + AVX512DQ // AVX-512 Doubleword and Quadword Instructions + AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions + AVX512PF // AVX-512 Prefetch Instructions + AVX512ER // AVX-512 Exponential and Reciprocal Instructions + AVX512CD // AVX-512 Conflict Detection Instructions + AVX512BW // AVX-512 Byte and Word Instructions + AVX512VL // AVX-512 Vector Length Extensions + AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions + AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 + AVX512VNNI // AVX-512 Vector Neural Network Instructions + AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword + GFNI // Galois Field New Instructions + VAES // Vector AES + AVX512BITALG // AVX-512 Bit Algorithms + VPCLMULQDQ // Carry-Less Multiplication Quadword + AVX512BF16 // AVX-512 BFLOAT16 Instructions + AVX512VP2INTERSECT // AVX-512 Intersect for D/Q + MPX // Intel MPX (Memory Protection Extensions) + ERMS // Enhanced REP MOVSB/STOSB + RDTSCP // RDTSCP Instruction + CX16 // CMPXCHG16B Instruction + SGX // Software Guard Extensions + SGXLC // Software Guard Extensions Launch Control + IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) + STIBP // Single Thread Indirect Branch Predictors + VMX // Virtual Machine Extensions + + // Performance indicators + SSE2SLOW // SSE2 is supported, but usually not faster + SSE3SLOW // SSE3 is supported, but usually not faster + ATOM // Atom processor, some SSSE3 instructions are slower +) + +var flagNames = map[Flags]string{ + CMOV: "CMOV", // i686 CMOV + NX: "NX", // NX (No-Execute) bit + AMD3DNOW: "AMD3DNOW", // AMD 3DNOW + AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt + MMX: "MMX", // Standard MMX + MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext + SSE: "SSE", // SSE functions + SSE2: "SSE2", // P4 SSE2 functions + SSE3: "SSE3", // Prescott SSE3 functions + SSSE3: "SSSE3", // Conroe SSSE3 functions + SSE4: "SSE4.1", // Penryn SSE4.1 functions + SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions + SSE42: "SSE4.2", // Nehalem SSE4.2 functions + AVX: "AVX", // AVX functions + AVX2: "AVX2", // AVX functions + FMA3: "FMA3", // Intel FMA 3 + FMA4: "FMA4", // Bulldozer FMA4 functions + XOP: "XOP", // Bulldozer XOP functions + F16C: "F16C", // Half-precision floating-point conversion + BMI1: "BMI1", // Bit Manipulation Instruction Set 1 + BMI2: "BMI2", // Bit Manipulation Instruction Set 2 + TBM: "TBM", // AMD Trailing Bit Manipulation + LZCNT: "LZCNT", // LZCNT instruction + POPCNT: "POPCNT", // POPCNT instruction + AESNI: "AESNI", // Advanced Encryption Standard New Instructions + CLMUL: "CLMUL", // Carry-less Multiplication + HTT: "HTT", // Hyperthreading (enabled) + HLE: "HLE", // Hardware Lock Elision + RTM: "RTM", // Restricted Transactional Memory + RDRAND: "RDRAND", // RDRAND instruction is available + RDSEED: "RDSEED", // RDSEED instruction is available + ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + SHA: "SHA", // Intel SHA Extensions + AVX512F: "AVX512F", // AVX-512 Foundation + AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions + AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions + AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions + AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions + AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions + AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions + AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions + AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions + AVX512VBMI2: "AVX512VBMI2", // AVX-512 Vector Bit Manipulation Instructions, Version 2 + AVX512VNNI: "AVX512VNNI", // AVX-512 Vector Neural Network Instructions + AVX512VPOPCNTDQ: "AVX512VPOPCNTDQ", // AVX-512 Vector Population Count Doubleword and Quadword + GFNI: "GFNI", // Galois Field New Instructions + VAES: "VAES", // Vector AES + AVX512BITALG: "AVX512BITALG", // AVX-512 Bit Algorithms + VPCLMULQDQ: "VPCLMULQDQ", // Carry-Less Multiplication Quadword + AVX512BF16: "AVX512BF16", // AVX-512 BFLOAT16 Instruction + AVX512VP2INTERSECT: "AVX512VP2INTERSECT", // AVX-512 Intersect for D/Q + MPX: "MPX", // Intel MPX (Memory Protection Extensions) + ERMS: "ERMS", // Enhanced REP MOVSB/STOSB + RDTSCP: "RDTSCP", // RDTSCP Instruction + CX16: "CX16", // CMPXCHG16B Instruction + SGX: "SGX", // Software Guard Extensions + SGXLC: "SGXLC", // Software Guard Extensions Launch Control + IBPB: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier + STIBP: "STIBP", // Single Thread Indirect Branch Predictors + VMX: "VMX", // Virtual Machine Extensions + + // Performance indicators + SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster + SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster + ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower + +} + +/* all special features for arm64 should be defined here */ +const ( + /* extension instructions */ + FP ArmFlags = 1 << iota + ASIMD + EVTSTRM + AES + PMULL + SHA1 + SHA2 + CRC32 + ATOMICS + FPHP + ASIMDHP + ARMCPUID + ASIMDRDM + JSCVT + FCMA + LRCPC + DCPOP + SHA3 + SM3 + SM4 + ASIMDDP + SHA512 + SVE + GPA +) + +var flagNamesArm = map[ArmFlags]string{ + FP: "FP", // Single-precision and double-precision floating point + ASIMD: "ASIMD", // Advanced SIMD + EVTSTRM: "EVTSTRM", // Generic timer + AES: "AES", // AES instructions + PMULL: "PMULL", // Polynomial Multiply instructions (PMULL/PMULL2) + SHA1: "SHA1", // SHA-1 instructions (SHA1C, etc) + SHA2: "SHA2", // SHA-2 instructions (SHA256H, etc) + CRC32: "CRC32", // CRC32/CRC32C instructions + ATOMICS: "ATOMICS", // Large System Extensions (LSE) + FPHP: "FPHP", // Half-precision floating point + ASIMDHP: "ASIMDHP", // Advanced SIMD half-precision floating point + ARMCPUID: "CPUID", // Some CPU ID registers readable at user-level + ASIMDRDM: "ASIMDRDM", // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) + JSCVT: "JSCVT", // Javascript-style double->int convert (FJCVTZS) + FCMA: "FCMA", // Floatin point complex number addition and multiplication + LRCPC: "LRCPC", // Weaker release consistency (LDAPR, etc) + DCPOP: "DCPOP", // Data cache clean to Point of Persistence (DC CVAP) + SHA3: "SHA3", // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) + SM3: "SM3", // SM3 instructions + SM4: "SM4", // SM4 instructions + ASIMDDP: "ASIMDDP", // SIMD Dot Product + SHA512: "SHA512", // SHA512 instructions + SVE: "SVE", // Scalable Vector Extension + GPA: "GPA", // Generic Pointer Authentication +} + +// CPUInfo contains information about the detected system CPU. +type CPUInfo struct { + BrandName string // Brand name reported by the CPU + VendorID Vendor // Comparable CPU vendor ID + VendorString string // Raw vendor string. + Features Flags // Features of the CPU (x64) + Arm ArmFlags // Features of the CPU (arm) + PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. + ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. + LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. + Family int // CPU family number + Model int // CPU model number + CacheLine int // Cache line size in bytes. Will be 0 if undetectable. + Hz int64 // Clock speed, if known + Cache struct { + L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected + L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected + L2 int // L2 Cache (per core or shared). Will be -1 if undetected + L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected + } + SGX SGXSupport + maxFunc uint32 + maxExFunc uint32 +} + +var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) +var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) +var xgetbv func(index uint32) (eax, edx uint32) +var rdtscpAsm func() (eax, ebx, ecx, edx uint32) + +// CPU contains information about the CPU as detected on startup, +// or when Detect last was called. +// +// Use this as the primary entry point to you data. +var CPU CPUInfo + +func init() { + initCPU() + Detect() +} + +// Detect will re-detect current CPU info. +// This will replace the content of the exported CPU variable. +// +// Unless you expect the CPU to change while you are running your program +// you should not need to call this function. +// If you call this, you must ensure that no other goroutine is accessing the +// exported CPU variable. +func Detect() { + // Set defaults + CPU.ThreadsPerCore = 1 + CPU.Cache.L1I = -1 + CPU.Cache.L1D = -1 + CPU.Cache.L2 = -1 + CPU.Cache.L3 = -1 + addInfo(&CPU) +} + +// Generated here: http://play.golang.org/p/BxFH2Gdc0G + +// Cmov indicates support of CMOV instructions +func (c CPUInfo) Cmov() bool { + return c.Features&CMOV != 0 +} + +// Amd3dnow indicates support of AMD 3DNOW! instructions +func (c CPUInfo) Amd3dnow() bool { + return c.Features&AMD3DNOW != 0 +} + +// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions +func (c CPUInfo) Amd3dnowExt() bool { + return c.Features&AMD3DNOWEXT != 0 +} + +// VMX indicates support of VMX +func (c CPUInfo) VMX() bool { + return c.Features&VMX != 0 +} + +// MMX indicates support of MMX instructions +func (c CPUInfo) MMX() bool { + return c.Features&MMX != 0 +} + +// MMXExt indicates support of MMXEXT instructions +// (SSE integer functions or AMD MMX ext) +func (c CPUInfo) MMXExt() bool { + return c.Features&MMXEXT != 0 +} + +// SSE indicates support of SSE instructions +func (c CPUInfo) SSE() bool { + return c.Features&SSE != 0 +} + +// SSE2 indicates support of SSE 2 instructions +func (c CPUInfo) SSE2() bool { + return c.Features&SSE2 != 0 +} + +// SSE3 indicates support of SSE 3 instructions +func (c CPUInfo) SSE3() bool { + return c.Features&SSE3 != 0 +} + +// SSSE3 indicates support of SSSE 3 instructions +func (c CPUInfo) SSSE3() bool { + return c.Features&SSSE3 != 0 +} + +// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions +func (c CPUInfo) SSE4() bool { + return c.Features&SSE4 != 0 +} + +// SSE42 indicates support of SSE4.2 instructions +func (c CPUInfo) SSE42() bool { + return c.Features&SSE42 != 0 +} + +// AVX indicates support of AVX instructions +// and operating system support of AVX instructions +func (c CPUInfo) AVX() bool { + return c.Features&AVX != 0 +} + +// AVX2 indicates support of AVX2 instructions +func (c CPUInfo) AVX2() bool { + return c.Features&AVX2 != 0 +} + +// FMA3 indicates support of FMA3 instructions +func (c CPUInfo) FMA3() bool { + return c.Features&FMA3 != 0 +} + +// FMA4 indicates support of FMA4 instructions +func (c CPUInfo) FMA4() bool { + return c.Features&FMA4 != 0 +} + +// XOP indicates support of XOP instructions +func (c CPUInfo) XOP() bool { + return c.Features&XOP != 0 +} + +// F16C indicates support of F16C instructions +func (c CPUInfo) F16C() bool { + return c.Features&F16C != 0 +} + +// BMI1 indicates support of BMI1 instructions +func (c CPUInfo) BMI1() bool { + return c.Features&BMI1 != 0 +} + +// BMI2 indicates support of BMI2 instructions +func (c CPUInfo) BMI2() bool { + return c.Features&BMI2 != 0 +} + +// TBM indicates support of TBM instructions +// (AMD Trailing Bit Manipulation) +func (c CPUInfo) TBM() bool { + return c.Features&TBM != 0 +} + +// Lzcnt indicates support of LZCNT instruction +func (c CPUInfo) Lzcnt() bool { + return c.Features&LZCNT != 0 +} + +// Popcnt indicates support of POPCNT instruction +func (c CPUInfo) Popcnt() bool { + return c.Features&POPCNT != 0 +} + +// HTT indicates the processor has Hyperthreading enabled +func (c CPUInfo) HTT() bool { + return c.Features&HTT != 0 +} + +// SSE2Slow indicates that SSE2 may be slow on this processor +func (c CPUInfo) SSE2Slow() bool { + return c.Features&SSE2SLOW != 0 +} + +// SSE3Slow indicates that SSE3 may be slow on this processor +func (c CPUInfo) SSE3Slow() bool { + return c.Features&SSE3SLOW != 0 +} + +// AesNi indicates support of AES-NI instructions +// (Advanced Encryption Standard New Instructions) +func (c CPUInfo) AesNi() bool { + return c.Features&AESNI != 0 +} + +// Clmul indicates support of CLMUL instructions +// (Carry-less Multiplication) +func (c CPUInfo) Clmul() bool { + return c.Features&CLMUL != 0 +} + +// NX indicates support of NX (No-Execute) bit +func (c CPUInfo) NX() bool { + return c.Features&NX != 0 +} + +// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions +func (c CPUInfo) SSE4A() bool { + return c.Features&SSE4A != 0 +} + +// HLE indicates support of Hardware Lock Elision +func (c CPUInfo) HLE() bool { + return c.Features&HLE != 0 +} + +// RTM indicates support of Restricted Transactional Memory +func (c CPUInfo) RTM() bool { + return c.Features&RTM != 0 +} + +// Rdrand indicates support of RDRAND instruction is available +func (c CPUInfo) Rdrand() bool { + return c.Features&RDRAND != 0 +} + +// Rdseed indicates support of RDSEED instruction is available +func (c CPUInfo) Rdseed() bool { + return c.Features&RDSEED != 0 +} + +// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions) +func (c CPUInfo) ADX() bool { + return c.Features&ADX != 0 +} + +// SHA indicates support of Intel SHA Extensions +func (c CPUInfo) SHA() bool { + return c.Features&SHA != 0 +} + +// AVX512F indicates support of AVX-512 Foundation +func (c CPUInfo) AVX512F() bool { + return c.Features&AVX512F != 0 +} + +// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions +func (c CPUInfo) AVX512DQ() bool { + return c.Features&AVX512DQ != 0 +} + +// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions +func (c CPUInfo) AVX512IFMA() bool { + return c.Features&AVX512IFMA != 0 +} + +// AVX512PF indicates support of AVX-512 Prefetch Instructions +func (c CPUInfo) AVX512PF() bool { + return c.Features&AVX512PF != 0 +} + +// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions +func (c CPUInfo) AVX512ER() bool { + return c.Features&AVX512ER != 0 +} + +// AVX512CD indicates support of AVX-512 Conflict Detection Instructions +func (c CPUInfo) AVX512CD() bool { + return c.Features&AVX512CD != 0 +} + +// AVX512BW indicates support of AVX-512 Byte and Word Instructions +func (c CPUInfo) AVX512BW() bool { + return c.Features&AVX512BW != 0 +} + +// AVX512VL indicates support of AVX-512 Vector Length Extensions +func (c CPUInfo) AVX512VL() bool { + return c.Features&AVX512VL != 0 +} + +// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions +func (c CPUInfo) AVX512VBMI() bool { + return c.Features&AVX512VBMI != 0 +} + +// AVX512VBMI2 indicates support of AVX-512 Vector Bit Manipulation Instructions, Version 2 +func (c CPUInfo) AVX512VBMI2() bool { + return c.Features&AVX512VBMI2 != 0 +} + +// AVX512VNNI indicates support of AVX-512 Vector Neural Network Instructions +func (c CPUInfo) AVX512VNNI() bool { + return c.Features&AVX512VNNI != 0 +} + +// AVX512VPOPCNTDQ indicates support of AVX-512 Vector Population Count Doubleword and Quadword +func (c CPUInfo) AVX512VPOPCNTDQ() bool { + return c.Features&AVX512VPOPCNTDQ != 0 +} + +// GFNI indicates support of Galois Field New Instructions +func (c CPUInfo) GFNI() bool { + return c.Features&GFNI != 0 +} + +// VAES indicates support of Vector AES +func (c CPUInfo) VAES() bool { + return c.Features&VAES != 0 +} + +// AVX512BITALG indicates support of AVX-512 Bit Algorithms +func (c CPUInfo) AVX512BITALG() bool { + return c.Features&AVX512BITALG != 0 +} + +// VPCLMULQDQ indicates support of Carry-Less Multiplication Quadword +func (c CPUInfo) VPCLMULQDQ() bool { + return c.Features&VPCLMULQDQ != 0 +} + +// AVX512BF16 indicates support of +func (c CPUInfo) AVX512BF16() bool { + return c.Features&AVX512BF16 != 0 +} + +// AVX512VP2INTERSECT indicates support of +func (c CPUInfo) AVX512VP2INTERSECT() bool { + return c.Features&AVX512VP2INTERSECT != 0 +} + +// MPX indicates support of Intel MPX (Memory Protection Extensions) +func (c CPUInfo) MPX() bool { + return c.Features&MPX != 0 +} + +// ERMS indicates support of Enhanced REP MOVSB/STOSB +func (c CPUInfo) ERMS() bool { + return c.Features&ERMS != 0 +} + +// RDTSCP Instruction is available. +func (c CPUInfo) RDTSCP() bool { + return c.Features&RDTSCP != 0 +} + +// CX16 indicates if CMPXCHG16B instruction is available. +func (c CPUInfo) CX16() bool { + return c.Features&CX16 != 0 +} + +// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection. +// So TSX simply checks that. +func (c CPUInfo) TSX() bool { + return c.Features&(HLE|RTM) == HLE|RTM +} + +// Atom indicates an Atom processor +func (c CPUInfo) Atom() bool { + return c.Features&ATOM != 0 +} + +// Intel returns true if vendor is recognized as Intel +func (c CPUInfo) Intel() bool { + return c.VendorID == Intel +} + +// AMD returns true if vendor is recognized as AMD +func (c CPUInfo) AMD() bool { + return c.VendorID == AMD +} + +// Hygon returns true if vendor is recognized as Hygon +func (c CPUInfo) Hygon() bool { + return c.VendorID == Hygon +} + +// Transmeta returns true if vendor is recognized as Transmeta +func (c CPUInfo) Transmeta() bool { + return c.VendorID == Transmeta +} + +// NSC returns true if vendor is recognized as National Semiconductor +func (c CPUInfo) NSC() bool { + return c.VendorID == NSC +} + +// VIA returns true if vendor is recognized as VIA +func (c CPUInfo) VIA() bool { + return c.VendorID == VIA +} + +// RTCounter returns the 64-bit time-stamp counter +// Uses the RDTSCP instruction. The value 0 is returned +// if the CPU does not support the instruction. +func (c CPUInfo) RTCounter() uint64 { + if !c.RDTSCP() { + return 0 + } + a, _, _, d := rdtscpAsm() + return uint64(a) | (uint64(d) << 32) +} + +// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. +// This variable is OS dependent, but on Linux contains information +// about the current cpu/core the code is running on. +// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. +func (c CPUInfo) Ia32TscAux() uint32 { + if !c.RDTSCP() { + return 0 + } + _, _, ecx, _ := rdtscpAsm() + return ecx +} + +// LogicalCPU will return the Logical CPU the code is currently executing on. +// This is likely to change when the OS re-schedules the running thread +// to another CPU. +// If the current core cannot be detected, -1 will be returned. +func (c CPUInfo) LogicalCPU() int { + if c.maxFunc < 1 { + return -1 + } + _, ebx, _, _ := cpuid(1) + return int(ebx >> 24) +} + +// hertz tries to compute the clock speed of the CPU. If leaf 15 is +// supported, use it, otherwise parse the brand string. Yes, really. +func hertz(model string) int64 { + mfi := maxFunctionID() + if mfi >= 0x15 { + eax, ebx, ecx, _ := cpuid(0x15) + if eax != 0 && ebx != 0 && ecx != 0 { + return int64((int64(ecx) * int64(ebx)) / int64(eax)) + } + } + // computeHz determines the official rated speed of a CPU from its brand + // string. This insanity is *actually the official documented way to do + // this according to Intel*, prior to leaf 0x15 existing. The official + // documentation only shows this working for exactly `x.xx` or `xxxx` + // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other + // sizes. + hz := strings.LastIndex(model, "Hz") + if hz < 3 { + return -1 + } + var multiplier int64 + switch model[hz-1] { + case 'M': + multiplier = 1000 * 1000 + case 'G': + multiplier = 1000 * 1000 * 1000 + case 'T': + multiplier = 1000 * 1000 * 1000 * 1000 + } + if multiplier == 0 { + return -1 + } + freq := int64(0) + divisor := int64(0) + decimalShift := int64(1) + var i int + for i = hz - 2; i >= 0 && model[i] != ' '; i-- { + if model[i] >= '0' && model[i] <= '9' { + freq += int64(model[i]-'0') * decimalShift + decimalShift *= 10 + } else if model[i] == '.' { + if divisor != 0 { + return -1 + } + divisor = decimalShift + } else { + return -1 + } + } + // we didn't find a space + if i < 0 { + return -1 + } + if divisor != 0 { + return (freq * multiplier) / divisor + } + return freq * multiplier +} + +// VM Will return true if the cpu id indicates we are in +// a virtual machine. This is only a hint, and will very likely +// have many false negatives. +func (c CPUInfo) VM() bool { + switch c.VendorID { + case MSVM, KVM, VMware, XenHVM, Bhyve: + return true + } + return false +} + +// Flags contains detected cpu features and characteristics +type Flags uint64 + +// ArmFlags contains detected ARM cpu features and characteristics +type ArmFlags uint64 + +// String returns a string representation of the detected +// CPU features. +func (f Flags) String() string { + return strings.Join(f.Strings(), ",") +} + +// Strings returns an array of the detected features. +func (f Flags) Strings() []string { + r := make([]string, 0, 20) + for i := uint(0); i < 64; i++ { + key := Flags(1 << i) + val := flagNames[key] + if f&key != 0 { + r = append(r, val) + } + } + return r +} + +// String returns a string representation of the detected +// CPU features. +func (f ArmFlags) String() string { + return strings.Join(f.Strings(), ",") +} + +// Strings returns an array of the detected features. +func (f ArmFlags) Strings() []string { + r := make([]string, 0, 20) + for i := uint(0); i < 64; i++ { + key := ArmFlags(1 << i) + val := flagNamesArm[key] + if f&key != 0 { + r = append(r, val) + } + } + return r +} +func maxExtendedFunction() uint32 { + eax, _, _, _ := cpuid(0x80000000) + return eax +} + +func maxFunctionID() uint32 { + a, _, _, _ := cpuid(0) + return a +} + +func brandName() string { + if maxExtendedFunction() >= 0x80000004 { + v := make([]uint32, 0, 48) + for i := uint32(0); i < 3; i++ { + a, b, c, d := cpuid(0x80000002 + i) + v = append(v, a, b, c, d) + } + return strings.Trim(string(valAsString(v...)), " ") + } + return "unknown" +} + +func threadsPerCore() int { + mfi := maxFunctionID() + vend, _ := vendorID() + + if mfi < 0x4 || (vend != Intel && vend != AMD) { + return 1 + } + + if mfi < 0xb { + if vend != Intel { + return 1 + } + _, b, _, d := cpuid(1) + if (d & (1 << 28)) != 0 { + // v will contain logical core count + v := (b >> 16) & 255 + if v > 1 { + a4, _, _, _ := cpuid(4) + // physical cores + v2 := (a4 >> 26) + 1 + if v2 > 0 { + return int(v) / int(v2) + } + } + } + return 1 + } + _, b, _, _ := cpuidex(0xb, 0) + if b&0xffff == 0 { + return 1 + } + return int(b & 0xffff) +} + +func logicalCores() int { + mfi := maxFunctionID() + v, _ := vendorID() + switch v { + case Intel: + // Use this on old Intel processors + if mfi < 0xb { + if mfi < 1 { + return 0 + } + // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) + // that can be assigned to logical processors in a physical package. + // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. + _, ebx, _, _ := cpuid(1) + logical := (ebx >> 16) & 0xff + return int(logical) + } + _, b, _, _ := cpuidex(0xb, 1) + return int(b & 0xffff) + case AMD, Hygon: + _, b, _, _ := cpuid(1) + return int((b >> 16) & 0xff) + default: + return 0 + } +} + +func familyModel() (int, int) { + if maxFunctionID() < 0x1 { + return 0, 0 + } + eax, _, _, _ := cpuid(1) + family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) + model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) + return int(family), int(model) +} + +func physicalCores() int { + v, _ := vendorID() + switch v { + case Intel: + return logicalCores() / threadsPerCore() + case AMD, Hygon: + lc := logicalCores() + tpc := threadsPerCore() + if lc > 0 && tpc > 0 { + return lc / tpc + } + // The following is inaccurate on AMD EPYC 7742 64-Core Processor + + if maxExtendedFunction() >= 0x80000008 { + _, _, c, _ := cpuid(0x80000008) + return int(c&0xff) + 1 + } + } + return 0 +} + +// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID +var vendorMapping = map[string]Vendor{ + "AMDisbetter!": AMD, + "AuthenticAMD": AMD, + "CentaurHauls": VIA, + "GenuineIntel": Intel, + "TransmetaCPU": Transmeta, + "GenuineTMx86": Transmeta, + "Geode by NSC": NSC, + "VIA VIA VIA ": VIA, + "KVMKVMKVMKVM": KVM, + "Microsoft Hv": MSVM, + "VMwareVMware": VMware, + "XenVMMXenVMM": XenHVM, + "bhyve bhyve ": Bhyve, + "HygonGenuine": Hygon, + "Vortex86 SoC": SiS, + "SiS SiS SiS ": SiS, + "RiseRiseRise": SiS, + "Genuine RDC": RDC, +} + +func vendorID() (Vendor, string) { + _, b, c, d := cpuid(0) + v := string(valAsString(b, d, c)) + vend, ok := vendorMapping[v] + if !ok { + return Other, v + } + return vend, v +} + +func cacheLine() int { + if maxFunctionID() < 0x1 { + return 0 + } + + _, ebx, _, _ := cpuid(1) + cache := (ebx & 0xff00) >> 5 // cflush size + if cache == 0 && maxExtendedFunction() >= 0x80000006 { + _, _, ecx, _ := cpuid(0x80000006) + cache = ecx & 0xff // cacheline size + } + // TODO: Read from Cache and TLB Information + return int(cache) +} + +func (c *CPUInfo) cacheSize() { + c.Cache.L1D = -1 + c.Cache.L1I = -1 + c.Cache.L2 = -1 + c.Cache.L3 = -1 + vendor, _ := vendorID() + switch vendor { + case Intel: + if maxFunctionID() < 4 { + return + } + for i := uint32(0); ; i++ { + eax, ebx, ecx, _ := cpuidex(4, i) + cacheType := eax & 15 + if cacheType == 0 { + break + } + cacheLevel := (eax >> 5) & 7 + coherency := int(ebx&0xfff) + 1 + partitions := int((ebx>>12)&0x3ff) + 1 + associativity := int((ebx>>22)&0x3ff) + 1 + sets := int(ecx) + 1 + size := associativity * partitions * coherency * sets + switch cacheLevel { + case 1: + if cacheType == 1 { + // 1 = Data Cache + c.Cache.L1D = size + } else if cacheType == 2 { + // 2 = Instruction Cache + c.Cache.L1I = size + } else { + if c.Cache.L1D < 0 { + c.Cache.L1I = size + } + if c.Cache.L1I < 0 { + c.Cache.L1I = size + } + } + case 2: + c.Cache.L2 = size + case 3: + c.Cache.L3 = size + } + } + case AMD, Hygon: + // Untested. + if maxExtendedFunction() < 0x80000005 { + return + } + _, _, ecx, edx := cpuid(0x80000005) + c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) + c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) + + if maxExtendedFunction() < 0x80000006 { + return + } + _, _, ecx, _ = cpuid(0x80000006) + c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) + + // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties + if maxExtendedFunction() < 0x8000001D { + return + } + for i := uint32(0); i < math.MaxUint32; i++ { + eax, ebx, ecx, _ := cpuidex(0x8000001D, i) + + level := (eax >> 5) & 7 + cacheNumSets := ecx + 1 + cacheLineSize := 1 + (ebx & 2047) + cachePhysPartitions := 1 + ((ebx >> 12) & 511) + cacheNumWays := 1 + ((ebx >> 22) & 511) + + typ := eax & 15 + size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) + if typ == 0 { + return + } + + switch level { + case 1: + switch typ { + case 1: + // Data cache + c.Cache.L1D = size + case 2: + // Inst cache + c.Cache.L1I = size + default: + if c.Cache.L1D < 0 { + c.Cache.L1I = size + } + if c.Cache.L1I < 0 { + c.Cache.L1I = size + } + } + case 2: + c.Cache.L2 = size + case 3: + c.Cache.L3 = size + } + } + } + + return +} + +type SGXEPCSection struct { + BaseAddress uint64 + EPCSize uint64 +} + +type SGXSupport struct { + Available bool + LaunchControl bool + SGX1Supported bool + SGX2Supported bool + MaxEnclaveSizeNot64 int64 + MaxEnclaveSize64 int64 + EPCSections []SGXEPCSection +} + +func hasSGX(available, lc bool) (rval SGXSupport) { + rval.Available = available + + if !available { + return + } + + rval.LaunchControl = lc + + a, _, _, d := cpuidex(0x12, 0) + rval.SGX1Supported = a&0x01 != 0 + rval.SGX2Supported = a&0x02 != 0 + rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 + rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 + rval.EPCSections = make([]SGXEPCSection, 0) + + for subleaf := uint32(2); subleaf < 2+8; subleaf++ { + eax, ebx, ecx, edx := cpuidex(0x12, subleaf) + leafType := eax & 0xf + + if leafType == 0 { + // Invalid subleaf, stop iterating + break + } else if leafType == 1 { + // EPC Section subleaf + baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) + size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) + + section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} + rval.EPCSections = append(rval.EPCSections, section) + } + } + + return +} + +func support() Flags { + mfi := maxFunctionID() + vend, _ := vendorID() + if mfi < 0x1 { + return 0 + } + rval := uint64(0) + _, _, c, d := cpuid(1) + if (d & (1 << 15)) != 0 { + rval |= CMOV + } + if (d & (1 << 23)) != 0 { + rval |= MMX + } + if (d & (1 << 25)) != 0 { + rval |= MMXEXT + } + if (d & (1 << 25)) != 0 { + rval |= SSE + } + if (d & (1 << 26)) != 0 { + rval |= SSE2 + } + if (c & 1) != 0 { + rval |= SSE3 + } + if (c & (1 << 5)) != 0 { + rval |= VMX + } + if (c & 0x00000200) != 0 { + rval |= SSSE3 + } + if (c & 0x00080000) != 0 { + rval |= SSE4 + } + if (c & 0x00100000) != 0 { + rval |= SSE42 + } + if (c & (1 << 25)) != 0 { + rval |= AESNI + } + if (c & (1 << 1)) != 0 { + rval |= CLMUL + } + if c&(1<<23) != 0 { + rval |= POPCNT + } + if c&(1<<30) != 0 { + rval |= RDRAND + } + if c&(1<<29) != 0 { + rval |= F16C + } + if c&(1<<13) != 0 { + rval |= CX16 + } + if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { + if threadsPerCore() > 1 { + rval |= HTT + } + } + if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { + if threadsPerCore() > 1 { + rval |= HTT + } + } + // Check XGETBV, OXSAVE and AVX bits + if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { + // Check for OS support + eax, _ := xgetbv(0) + if (eax & 0x6) == 0x6 { + rval |= AVX + if (c & 0x00001000) != 0 { + rval |= FMA3 + } + } + } + + // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. + if mfi >= 7 { + _, ebx, ecx, edx := cpuidex(7, 0) + eax1, _, _, _ := cpuidex(7, 1) + if (rval&AVX) != 0 && (ebx&0x00000020) != 0 { + rval |= AVX2 + } + if (ebx & 0x00000008) != 0 { + rval |= BMI1 + if (ebx & 0x00000100) != 0 { + rval |= BMI2 + } + } + if ebx&(1<<2) != 0 { + rval |= SGX + } + if ebx&(1<<4) != 0 { + rval |= HLE + } + if ebx&(1<<9) != 0 { + rval |= ERMS + } + if ebx&(1<<11) != 0 { + rval |= RTM + } + if ebx&(1<<14) != 0 { + rval |= MPX + } + if ebx&(1<<18) != 0 { + rval |= RDSEED + } + if ebx&(1<<19) != 0 { + rval |= ADX + } + if ebx&(1<<29) != 0 { + rval |= SHA + } + if edx&(1<<26) != 0 { + rval |= IBPB + } + if ecx&(1<<30) != 0 { + rval |= SGXLC + } + if edx&(1<<27) != 0 { + rval |= STIBP + } + + // Only detect AVX-512 features if XGETBV is supported + if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { + // Check for OS support + eax, _ := xgetbv(0) + + // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and + // ZMM16-ZMM31 state are enabled by OS) + /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). + if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 { + if ebx&(1<<16) != 0 { + rval |= AVX512F + } + if ebx&(1<<17) != 0 { + rval |= AVX512DQ + } + if ebx&(1<<21) != 0 { + rval |= AVX512IFMA + } + if ebx&(1<<26) != 0 { + rval |= AVX512PF + } + if ebx&(1<<27) != 0 { + rval |= AVX512ER + } + if ebx&(1<<28) != 0 { + rval |= AVX512CD + } + if ebx&(1<<30) != 0 { + rval |= AVX512BW + } + if ebx&(1<<31) != 0 { + rval |= AVX512VL + } + // ecx + if ecx&(1<<1) != 0 { + rval |= AVX512VBMI + } + if ecx&(1<<6) != 0 { + rval |= AVX512VBMI2 + } + if ecx&(1<<8) != 0 { + rval |= GFNI + } + if ecx&(1<<9) != 0 { + rval |= VAES + } + if ecx&(1<<10) != 0 { + rval |= VPCLMULQDQ + } + if ecx&(1<<11) != 0 { + rval |= AVX512VNNI + } + if ecx&(1<<12) != 0 { + rval |= AVX512BITALG + } + if ecx&(1<<14) != 0 { + rval |= AVX512VPOPCNTDQ + } + // edx + if edx&(1<<8) != 0 { + rval |= AVX512VP2INTERSECT + } + // cpuid eax 07h,ecx=1 + if eax1&(1<<5) != 0 { + rval |= AVX512BF16 + } + } + } + } + + if maxExtendedFunction() >= 0x80000001 { + _, _, c, d := cpuid(0x80000001) + if (c & (1 << 5)) != 0 { + rval |= LZCNT + rval |= POPCNT + } + if (d & (1 << 31)) != 0 { + rval |= AMD3DNOW + } + if (d & (1 << 30)) != 0 { + rval |= AMD3DNOWEXT + } + if (d & (1 << 23)) != 0 { + rval |= MMX + } + if (d & (1 << 22)) != 0 { + rval |= MMXEXT + } + if (c & (1 << 6)) != 0 { + rval |= SSE4A + } + if d&(1<<20) != 0 { + rval |= NX + } + if d&(1<<27) != 0 { + rval |= RDTSCP + } + + /* Allow for selectively disabling SSE2 functions on AMD processors + with SSE2 support but not SSE4a. This includes Athlon64, some + Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster + than SSE2 often enough to utilize this special-case flag. + AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case + so that SSE2 is used unless explicitly disabled by checking + AV_CPU_FLAG_SSE2SLOW. */ + if vend != Intel && + rval&SSE2 != 0 && (c&0x00000040) == 0 { + rval |= SSE2SLOW + } + + /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be + * used unless the OS has AVX support. */ + if (rval & AVX) != 0 { + if (c & 0x00000800) != 0 { + rval |= XOP + } + if (c & 0x00010000) != 0 { + rval |= FMA4 + } + } + + if vend == Intel { + family, model := familyModel() + if family == 6 && (model == 9 || model == 13 || model == 14) { + /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and + * 6/14 (core1 "yonah") theoretically support sse2, but it's + * usually slower than mmx. */ + if (rval & SSE2) != 0 { + rval |= SSE2SLOW + } + if (rval & SSE3) != 0 { + rval |= SSE3SLOW + } + } + /* The Atom processor has SSSE3 support, which is useful in many cases, + * but sometimes the SSSE3 version is slower than the SSE2 equivalent + * on the Atom, but is generally faster on other processors supporting + * SSSE3. This flag allows for selectively disabling certain SSSE3 + * functions on the Atom. */ + if family == 6 && model == 28 { + rval |= ATOM + } + } + } + return Flags(rval) +} + +func valAsString(values ...uint32) []byte { + r := make([]byte, 4*len(values)) + for i, v := range values { + dst := r[i*4:] + dst[0] = byte(v & 0xff) + dst[1] = byte((v >> 8) & 0xff) + dst[2] = byte((v >> 16) & 0xff) + dst[3] = byte((v >> 24) & 0xff) + switch { + case dst[0] == 0: + return r[:i*4] + case dst[1] == 0: + return r[:i*4+1] + case dst[2] == 0: + return r[:i*4+2] + case dst[3] == 0: + return r[:i*4+3] + } + } + return r +} + +// Single-precision and double-precision floating point +func (c CPUInfo) ArmFP() bool { + return c.Arm&FP != 0 +} + +// Advanced SIMD +func (c CPUInfo) ArmASIMD() bool { + return c.Arm&ASIMD != 0 +} + +// Generic timer +func (c CPUInfo) ArmEVTSTRM() bool { + return c.Arm&EVTSTRM != 0 +} + +// AES instructions +func (c CPUInfo) ArmAES() bool { + return c.Arm&AES != 0 +} + +// Polynomial Multiply instructions (PMULL/PMULL2) +func (c CPUInfo) ArmPMULL() bool { + return c.Arm&PMULL != 0 +} + +// SHA-1 instructions (SHA1C, etc) +func (c CPUInfo) ArmSHA1() bool { + return c.Arm&SHA1 != 0 +} + +// SHA-2 instructions (SHA256H, etc) +func (c CPUInfo) ArmSHA2() bool { + return c.Arm&SHA2 != 0 +} + +// CRC32/CRC32C instructions +func (c CPUInfo) ArmCRC32() bool { + return c.Arm&CRC32 != 0 +} + +// Large System Extensions (LSE) +func (c CPUInfo) ArmATOMICS() bool { + return c.Arm&ATOMICS != 0 +} + +// Half-precision floating point +func (c CPUInfo) ArmFPHP() bool { + return c.Arm&FPHP != 0 +} + +// Advanced SIMD half-precision floating point +func (c CPUInfo) ArmASIMDHP() bool { + return c.Arm&ASIMDHP != 0 +} + +// Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) +func (c CPUInfo) ArmASIMDRDM() bool { + return c.Arm&ASIMDRDM != 0 +} + +// Javascript-style double->int convert (FJCVTZS) +func (c CPUInfo) ArmJSCVT() bool { + return c.Arm&JSCVT != 0 +} + +// Floatin point complex number addition and multiplication +func (c CPUInfo) ArmFCMA() bool { + return c.Arm&FCMA != 0 +} + +// Weaker release consistency (LDAPR, etc) +func (c CPUInfo) ArmLRCPC() bool { + return c.Arm&LRCPC != 0 +} + +// Data cache clean to Point of Persistence (DC CVAP) +func (c CPUInfo) ArmDCPOP() bool { + return c.Arm&DCPOP != 0 +} + +// SHA-3 instructions (EOR3, RAXI, XAR, BCAX) +func (c CPUInfo) ArmSHA3() bool { + return c.Arm&SHA3 != 0 +} + +// SM3 instructions +func (c CPUInfo) ArmSM3() bool { + return c.Arm&SM3 != 0 +} + +// SM4 instructions +func (c CPUInfo) ArmSM4() bool { + return c.Arm&SM4 != 0 +} + +// SIMD Dot Product +func (c CPUInfo) ArmASIMDDP() bool { + return c.Arm&ASIMDDP != 0 +} + +// SHA512 instructions +func (c CPUInfo) ArmSHA512() bool { + return c.Arm&SHA512 != 0 +} + +// Scalable Vector Extension +func (c CPUInfo) ArmSVE() bool { + return c.Arm&SVE != 0 +} + +// Generic Pointer Authentication +func (c CPUInfo) ArmGPA() bool { + return c.Arm&GPA != 0 +} diff --git a/vendor/github.com/klauspost/cpuid/cpuid_386.s b/vendor/github.com/klauspost/cpuid/cpuid_386.s new file mode 100644 index 0000000000..089638f51a --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/cpuid_386.s @@ -0,0 +1,42 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build 386,!gccgo,!noasm,!appengine + +// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuid(SB), 7, $0 + XORL CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+4(FP) + MOVL BX, ebx+8(FP) + MOVL CX, ecx+12(FP) + MOVL DX, edx+16(FP) + RET + +// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuidex(SB), 7, $0 + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func xgetbv(index uint32) (eax, edx uint32) +TEXT ·asmXgetbv(SB), 7, $0 + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+4(FP) + MOVL DX, edx+8(FP) + RET + +// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) +TEXT ·asmRdtscpAsm(SB), 7, $0 + BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP + MOVL AX, eax+0(FP) + MOVL BX, ebx+4(FP) + MOVL CX, ecx+8(FP) + MOVL DX, edx+12(FP) + RET diff --git a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s new file mode 100644 index 0000000000..3ba0559e93 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s @@ -0,0 +1,42 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build amd64,!gccgo,!noasm,!appengine + +// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuid(SB), 7, $0 + XORQ CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuidex(SB), 7, $0 + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func asmXgetbv(index uint32) (eax, edx uint32) +TEXT ·asmXgetbv(SB), 7, $0 + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+8(FP) + MOVL DX, edx+12(FP) + RET + +// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) +TEXT ·asmRdtscpAsm(SB), 7, $0 + BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP + MOVL AX, eax+0(FP) + MOVL BX, ebx+4(FP) + MOVL CX, ecx+8(FP) + MOVL DX, edx+12(FP) + RET diff --git a/vendor/github.com/klauspost/cpuid/cpuid_arm64.s b/vendor/github.com/klauspost/cpuid/cpuid_arm64.s new file mode 100644 index 0000000000..8975ee8dba --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/cpuid_arm64.s @@ -0,0 +1,26 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build arm64,!gccgo + +// See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt + +// func getMidr +TEXT ·getMidr(SB), 7, $0 + WORD $0xd5380000 // mrs x0, midr_el1 /* Main ID Register */ + MOVD R0, midr+0(FP) + RET + +// func getProcFeatures +TEXT ·getProcFeatures(SB), 7, $0 + WORD $0xd5380400 // mrs x0, id_aa64pfr0_el1 /* Processor Feature Register 0 */ + MOVD R0, procFeatures+0(FP) + RET + +// func getInstAttributes +TEXT ·getInstAttributes(SB), 7, $0 + WORD $0xd5380600 // mrs x0, id_aa64isar0_el1 /* Instruction Set Attribute Register 0 */ + WORD $0xd5380621 // mrs x1, id_aa64isar1_el1 /* Instruction Set Attribute Register 1 */ + MOVD R0, instAttrReg0+0(FP) + MOVD R1, instAttrReg1+8(FP) + RET + diff --git a/vendor/github.com/klauspost/cpuid/detect_arm64.go b/vendor/github.com/klauspost/cpuid/detect_arm64.go new file mode 100644 index 0000000000..923a826183 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/detect_arm64.go @@ -0,0 +1,219 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build arm64,!gccgo,!noasm,!appengine + +package cpuid + +func getMidr() (midr uint64) +func getProcFeatures() (procFeatures uint64) +func getInstAttributes() (instAttrReg0, instAttrReg1 uint64) + +func initCPU() { + cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } + cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } + xgetbv = func(uint32) (a, b uint32) { return 0, 0 } + rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 } +} + +func addInfo(c *CPUInfo) { + // ARM64 disabled for now. + if true { + return + } + // midr := getMidr() + + // MIDR_EL1 - Main ID Register + // x--------------------------------------------------x + // | Name | bits | visible | + // |--------------------------------------------------| + // | Implementer | [31-24] | y | + // |--------------------------------------------------| + // | Variant | [23-20] | y | + // |--------------------------------------------------| + // | Architecture | [19-16] | y | + // |--------------------------------------------------| + // | PartNum | [15-4] | y | + // |--------------------------------------------------| + // | Revision | [3-0] | y | + // x--------------------------------------------------x + + // fmt.Printf(" implementer: 0x%02x\n", (midr>>24)&0xff) + // fmt.Printf(" variant: 0x%01x\n", (midr>>20)&0xf) + // fmt.Printf("architecture: 0x%01x\n", (midr>>16)&0xf) + // fmt.Printf(" part num: 0x%03x\n", (midr>>4)&0xfff) + // fmt.Printf(" revision: 0x%01x\n", (midr>>0)&0xf) + + procFeatures := getProcFeatures() + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + // x--------------------------------------------------x + // | Name | bits | visible | + // |--------------------------------------------------| + // | DIT | [51-48] | y | + // |--------------------------------------------------| + // | SVE | [35-32] | y | + // |--------------------------------------------------| + // | GIC | [27-24] | n | + // |--------------------------------------------------| + // | AdvSIMD | [23-20] | y | + // |--------------------------------------------------| + // | FP | [19-16] | y | + // |--------------------------------------------------| + // | EL3 | [15-12] | n | + // |--------------------------------------------------| + // | EL2 | [11-8] | n | + // |--------------------------------------------------| + // | EL1 | [7-4] | n | + // |--------------------------------------------------| + // | EL0 | [3-0] | n | + // x--------------------------------------------------x + + var f ArmFlags + // if procFeatures&(0xf<<48) != 0 { + // fmt.Println("DIT") + // } + if procFeatures&(0xf<<32) != 0 { + f |= SVE + } + if procFeatures&(0xf<<20) != 15<<20 { + f |= ASIMD + if procFeatures&(0xf<<20) == 1<<20 { + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1 + // 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic. + f |= FPHP + f |= ASIMDHP + } + } + if procFeatures&(0xf<<16) != 0 { + f |= FP + } + + instAttrReg0, instAttrReg1 := getInstAttributes() + + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 + // + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + // x--------------------------------------------------x + // | Name | bits | visible | + // |--------------------------------------------------| + // | TS | [55-52] | y | + // |--------------------------------------------------| + // | FHM | [51-48] | y | + // |--------------------------------------------------| + // | DP | [47-44] | y | + // |--------------------------------------------------| + // | SM4 | [43-40] | y | + // |--------------------------------------------------| + // | SM3 | [39-36] | y | + // |--------------------------------------------------| + // | SHA3 | [35-32] | y | + // |--------------------------------------------------| + // | RDM | [31-28] | y | + // |--------------------------------------------------| + // | ATOMICS | [23-20] | y | + // |--------------------------------------------------| + // | CRC32 | [19-16] | y | + // |--------------------------------------------------| + // | SHA2 | [15-12] | y | + // |--------------------------------------------------| + // | SHA1 | [11-8] | y | + // |--------------------------------------------------| + // | AES | [7-4] | y | + // x--------------------------------------------------x + + // if instAttrReg0&(0xf<<52) != 0 { + // fmt.Println("TS") + // } + // if instAttrReg0&(0xf<<48) != 0 { + // fmt.Println("FHM") + // } + if instAttrReg0&(0xf<<44) != 0 { + f |= ASIMDDP + } + if instAttrReg0&(0xf<<40) != 0 { + f |= SM4 + } + if instAttrReg0&(0xf<<36) != 0 { + f |= SM3 + } + if instAttrReg0&(0xf<<32) != 0 { + f |= SHA3 + } + if instAttrReg0&(0xf<<28) != 0 { + f |= ASIMDRDM + } + if instAttrReg0&(0xf<<20) != 0 { + f |= ATOMICS + } + if instAttrReg0&(0xf<<16) != 0 { + f |= CRC32 + } + if instAttrReg0&(0xf<<12) != 0 { + f |= SHA2 + } + if instAttrReg0&(0xf<<12) == 2<<12 { + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 + // 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented. + f |= SHA512 + } + if instAttrReg0&(0xf<<8) != 0 { + f |= SHA1 + } + if instAttrReg0&(0xf<<4) != 0 { + f |= AES + } + if instAttrReg0&(0xf<<4) == 2<<4 { + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 + // 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities. + f |= PMULL + } + + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1 + // + // ID_AA64ISAR1_EL1 - Instruction set attribute register 1 + // x--------------------------------------------------x + // | Name | bits | visible | + // |--------------------------------------------------| + // | GPI | [31-28] | y | + // |--------------------------------------------------| + // | GPA | [27-24] | y | + // |--------------------------------------------------| + // | LRCPC | [23-20] | y | + // |--------------------------------------------------| + // | FCMA | [19-16] | y | + // |--------------------------------------------------| + // | JSCVT | [15-12] | y | + // |--------------------------------------------------| + // | API | [11-8] | y | + // |--------------------------------------------------| + // | APA | [7-4] | y | + // |--------------------------------------------------| + // | DPB | [3-0] | y | + // x--------------------------------------------------x + + // if instAttrReg1&(0xf<<28) != 0 { + // fmt.Println("GPI") + // } + if instAttrReg1&(0xf<<28) != 24 { + f |= GPA + } + if instAttrReg1&(0xf<<20) != 0 { + f |= LRCPC + } + if instAttrReg1&(0xf<<16) != 0 { + f |= FCMA + } + if instAttrReg1&(0xf<<12) != 0 { + f |= JSCVT + } + // if instAttrReg1&(0xf<<8) != 0 { + // fmt.Println("API") + // } + // if instAttrReg1&(0xf<<4) != 0 { + // fmt.Println("APA") + // } + if instAttrReg1&(0xf<<0) != 0 { + f |= DCPOP + } + c.Arm = f +} diff --git a/vendor/github.com/klauspost/cpuid/detect_intel.go b/vendor/github.com/klauspost/cpuid/detect_intel.go new file mode 100644 index 0000000000..363951b3b2 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/detect_intel.go @@ -0,0 +1,33 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build 386,!gccgo,!noasm amd64,!gccgo,!noasm,!appengine + +package cpuid + +func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) +func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +func asmXgetbv(index uint32) (eax, edx uint32) +func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) + +func initCPU() { + cpuid = asmCpuid + cpuidex = asmCpuidex + xgetbv = asmXgetbv + rdtscpAsm = asmRdtscpAsm +} + +func addInfo(c *CPUInfo) { + c.maxFunc = maxFunctionID() + c.maxExFunc = maxExtendedFunction() + c.BrandName = brandName() + c.CacheLine = cacheLine() + c.Family, c.Model = familyModel() + c.Features = support() + c.SGX = hasSGX(c.Features&SGX != 0, c.Features&SGXLC != 0) + c.ThreadsPerCore = threadsPerCore() + c.LogicalCores = logicalCores() + c.PhysicalCores = physicalCores() + c.VendorID, c.VendorString = vendorID() + c.Hz = hertz(c.BrandName) + c.cacheSize() +} diff --git a/vendor/github.com/klauspost/cpuid/detect_ref.go b/vendor/github.com/klauspost/cpuid/detect_ref.go new file mode 100644 index 0000000000..970ff3d22b --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/detect_ref.go @@ -0,0 +1,14 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build !amd64,!386,!arm64 gccgo noasm appengine + +package cpuid + +func initCPU() { + cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } + cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } + xgetbv = func(uint32) (a, b uint32) { return 0, 0 } + rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 } +} + +func addInfo(info *CPUInfo) {} diff --git a/vendor/github.com/klauspost/cpuid/go.mod b/vendor/github.com/klauspost/cpuid/go.mod new file mode 100644 index 0000000000..55563f2a85 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/go.mod @@ -0,0 +1,3 @@ +module github.com/klauspost/cpuid + +go 1.12 |