1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
|
#ifndef TH_SIMD_INC
#define TH_SIMD_INC
#include <stdint.h>
#include <stdlib.h>
#if defined(_MSC_VER)
#include <intrin.h>
#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
#include <cpuid.h>
#endif
// Can be found on Intel ISA Reference for CPUID
#define CPUID_AVX2_BIT 0x20 // Bit 5 of EBX for EAX=0x7
#define CPUID_AVX_BIT 0x10000000 // Bit 28 of ECX for EAX=0x1
#define CPUID_SSE_BIT 0x2000000 // bit 25 of EDX for EAX=0x1
// Helper macros for initialization
#define FUNCTION_IMPL(NAME, EXT) \
{ .function=(void *)NAME, \
.supportedSimdExt=EXT \
}
#define INIT_DISPATCH_PTR(OP) \
do { \
int i; \
for (i = 0; i < sizeof(THVector_(OP ## _DISPATCHTABLE)) / sizeof(FunctionDescription); ++i) { \
THVector_(OP ## _DISPATCHPTR) = THVector_(OP ## _DISPATCHTABLE)[i].function; \
if (THVector_(OP ## _DISPATCHTABLE)[i].supportedSimdExt & hostSimdExts) { \
break; \
} \
} \
} while(0)
typedef struct FunctionDescription
{
void *function;
uint32_t supportedSimdExt;
} FunctionDescription;
enum SIMDExtensions
{
#if defined(__NEON__)
SIMDExtension_NEON = 0x1,
#elif defined(__PPC64__)
SIMDExtension_VSX = 0x1,
#else
SIMDExtension_AVX2 = 0x1,
SIMDExtension_AVX = 0x2,
SIMDExtension_SSE = 0x4,
#endif
SIMDExtension_DEFAULT = 0x0
};
#if defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
#if defined(__NEON__)
static inline uint32_t detectHostSIMDExtensions()
{
return SIMDExtension_NEON;
}
#else //ARM without NEON
static inline uint32_t detectHostSIMDExtensions()
{
return SIMDExtension_DEFAULT;
}
#endif
#elif defined(__PPC64__)
#if defined(__VSX__)
static inline uint32_t detectHostSIMDExtensions()
{
uint32_t hostSimdExts = SIMDExtension_DEFAULT;
char *evar;
evar = getenv("TH_NO_VSX");
if (evar == NULL || strncmp(evar, "1", 2) != 0)
hostSimdExts = SIMDExtension_VSX;
return hostSimdExts;
}
#else //PPC64 without VSX
static inline uint32_t detectHostSIMDExtensions()
{
return SIMDExtension_DEFAULT;
}
#endif
#else // x86
static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
{
#if defined(_MSC_VER)
uint32_t cpuInfo[4];
__cpuid(cpuInfo, *eax);
*eax = cpuInfo[0];
*ebx = cpuInfo[1];
*ecx = cpuInfo[2];
*edx = cpuInfo[3];
#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
uint32_t level = *eax;
__get_cpuid (level, eax, ebx, ecx, edx);
#else
uint32_t a = *eax, b, c = *ecx, d;
asm volatile ( "cpuid\n\t"
: "+a"(a), "=b"(b), "+c"(c), "=d"(d) );
*eax = a;
*ebx = b;
*ecx = c;
*edx = d;
#endif
}
static inline uint32_t detectHostSIMDExtensions()
{
uint32_t eax, ebx, ecx, edx;
uint32_t hostSimdExts = 0x0;
int TH_NO_AVX = 1, TH_NO_AVX2 = 1, TH_NO_SSE = 1;
char *evar;
evar = getenv("TH_NO_AVX2");
if (evar == NULL || strncmp(evar, "1", 2) != 0)
TH_NO_AVX2 = 0;
// Check for AVX2. Requires separate CPUID
eax = 0x7;
ecx = 0x0;
cpuid(&eax, &ebx, &ecx, &edx);
if ((ebx & CPUID_AVX2_BIT) && TH_NO_AVX2 == 0) {
hostSimdExts |= SIMDExtension_AVX2;
}
// Detect and enable AVX and SSE
eax = 0x1;
cpuid(&eax, &ebx, &ecx, &edx);
evar = getenv("TH_NO_AVX");
if (evar == NULL || strncmp(evar, "1", 2) != 0)
TH_NO_AVX = 0;
if (ecx & CPUID_AVX_BIT && TH_NO_AVX == 0) {
hostSimdExts |= SIMDExtension_AVX;
}
evar = getenv("TH_NO_SSE");
if (evar == NULL || strncmp(evar, "1", 2) != 0)
TH_NO_SSE = 0;
if (edx & CPUID_SSE_BIT && TH_NO_SSE == 0) {
hostSimdExts |= SIMDExtension_SSE;
}
return hostSimdExts;
}
#endif // end SIMD extension detection code
#endif
|