The Gaudi Framework  v28r3 (cc1cf868)
instrset_detect.cpp
Go to the documentation of this file.
1 /************************** instrset_detect.cpp ****************************
2 | Author: Agner Fog
3 | Date created: 2012-05-30
4 | Last modified: 2014-07-23
5 | Version: 1.14
6 | Project: vector classes
7 | Description:
8 | Functions for checking which instruction sets are supported.
9 |
10 | (c) Copyright 2012 - 2014 GNU General Public License http://www.gnu.org/licenses
11 \*****************************************************************************/
12 
13 #include "instrset.h"
14 
15 #if ! __aarch64__
16 // Define interface to cpuid instruction.
17 // input: eax = functionnumber, ecx = 0
18 // output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3]
19 static inline void cpuid (int output[4], int functionnumber) {
20 #if defined (_MSC_VER) // Microsoft compiler, intrin.h included
21 
22  __cpuidex(output, functionnumber, 0); // intrinsic function for CPUID
23 #elif defined(__INTEL_COMPILER) // Intel compiler
24 
25  __cpuid(output, functionnumber); // intrinsic function for CPUID
26 #elif defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax
27 
28  int a, b, c, d;
29  __asm("cpuid" : "=a"(a),"=b"(b),"=c"(c),"=d"(d) : "a"(functionnumber),"c"(0) : );
30  output[0] = a;
31  output[1] = b;
32  output[2] = c;
33  output[3] = d;
34 
35 #else // unknown platform. try inline assembly with masm/intel syntax
36 
37  __asm {
38  mov eax, functionnumber
39  xor ecx, ecx
40  cpuid;
41  mov esi, output
42  mov [esi], eax
43  mov [esi+4], ebx
44  mov [esi+8], ecx
45  mov [esi+12], edx
46  }
47 
48 #endif
49 }
50 
51 // Define interface to xgetbv instruction
52 static inline int64_t xgetbv (int ctr) {
53 #if (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) // Microsoft or Intel compiler supporting _xgetbv intrinsic
54 
55  return _xgetbv(ctr); // intrinsic function for XGETBV
56 
57 #elif defined(__GNUC__) // use inline assembly, Gnu/AT&T syntax
58 
59  uint32_t a, d;
60  __asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : );
61  return a | (uint64_t(d) << 32);
62 
63 #else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS syntax
64 
65  uint32_t a, d;
66  __asm {
67  mov ecx, ctr
68  _emit 0x0f
69  _emit 0x01
70  _emit 0xd0 ; // xgetbv
71  mov a, eax
72  mov d, edx
73  }
74  return a | (uint64_t(d) << 32);
75 
76 #endif
77 }
78 
79 
80 /* find supported instruction set
81  return value:
82  0 = 80386 instruction set
83  1 or above = SSE (XMM) supported by CPU (not testing for O.S. support)
84  2 or above = SSE2
85  3 or above = SSE3
86  4 or above = Supplementary SSE3 (SSSE3)
87  5 or above = SSE4.1
88  6 or above = SSE4.2
89  7 or above = AVX supported by CPU and operating system
90  8 or above = AVX2
91  9 or above = AVX512F
92 */
93 int instrset_detect(void) {
94 
95  static int iset = -1; // remember value for next call
96  if (iset >= 0) {
97  return iset; // called before
98  }
99  iset = 0; // default value
100  int abcd[4] = {0,0,0,0}; // cpuid results
101  cpuid(abcd, 0); // call cpuid function 0
102  if (abcd[0] == 0) return iset; // no further cpuid function supported
103  cpuid(abcd, 1); // call cpuid function 1 for feature flags
104  if ((abcd[3] & (1 << 0)) == 0) return iset; // no floating point
105  if ((abcd[3] & (1 << 23)) == 0) return iset; // no MMX
106  if ((abcd[3] & (1 << 15)) == 0) return iset; // no conditional move
107  if ((abcd[3] & (1 << 24)) == 0) return iset; // no FXSAVE
108  if ((abcd[3] & (1 << 25)) == 0) return iset; // no SSE
109  iset = 1; // 1: SSE supported
110  if ((abcd[3] & (1 << 26)) == 0) return iset; // no SSE2
111  iset = 2; // 2: SSE2 supported
112  if ((abcd[2] & (1 << 0)) == 0) return iset; // no SSE3
113  iset = 3; // 3: SSE3 supported
114  if ((abcd[2] & (1 << 9)) == 0) return iset; // no SSSE3
115  iset = 4; // 4: SSSE3 supported
116  if ((abcd[2] & (1 << 19)) == 0) return iset; // no SSE4.1
117  iset = 5; // 5: SSE4.1 supported
118  if ((abcd[2] & (1 << 23)) == 0) return iset; // no POPCNT
119  if ((abcd[2] & (1 << 20)) == 0) return iset; // no SSE4.2
120  iset = 6; // 6: SSE4.2 supported
121  if ((abcd[2] & (1 << 27)) == 0) return iset; // no OSXSAVE
122  if ((xgetbv(0) & 6) != 6) return iset; // AVX not enabled in O.S.
123  if ((abcd[2] & (1 << 28)) == 0) return iset; // no AVX
124  iset = 7; // 7: AVX supported
125  cpuid(abcd, 7); // call cpuid leaf 7 for feature flags
126  if ((abcd[1] & (1 << 5)) == 0) return iset; // no AVX2
127  iset = 8; // 8: AVX2 supported
128  cpuid(abcd, 0xD); // call cpuid leaf 0xD for feature flags
129  if ((abcd[0] & 0x60) != 0x60) return iset; // no AVX512
130  iset = 9; // 8: AVX512F supported
131  return iset;
132 }
133 
134 // detect if CPU supports the FMA3 instruction set
135 bool hasFMA3(void) {
136  if (instrset_detect() < 7) return false; // must have AVX
137  int abcd[4]; // cpuid results
138  cpuid(abcd, 1); // call cpuid function 1
139  return ((abcd[2] & (1 << 12)) != 0); // ecx bit 12 indicates FMA3
140 }
141 
142 // detect if CPU supports the FMA4 instruction set
143 bool hasFMA4(void) {
144  if (instrset_detect() < 7) return false; // must have AVX
145  int abcd[4]; // cpuid results
146  cpuid(abcd, 0x80000001); // call cpuid function 0x80000001
147  return ((abcd[2] & (1 << 16)) != 0); // ecx bit 16 indicates FMA4
148 }
149 
150 // detect if CPU supports the XOP instruction set
151 bool hasXOP(void) {
152  if (instrset_detect() < 7) return false; // must have AVX
153  int abcd[4]; // cpuid results
154  cpuid(abcd, 0x80000001); // call cpuid function 0x80000001
155  return ((abcd[2] & (1 << 11)) != 0); // ecx bit 11 indicates XOP
156 }
157 #else
158 // __aarch64__ version (no special feature yet)
159 int instrset_detect(void) {
160  return 0;
161 }
162 bool hasFMA3(void) { return false; }
163 bool hasFMA4(void) { return false; }
164 bool hasXOP(void) { return false; }
165 #endif
bool hasXOP(void)
int instrset_detect(void)
#define cpuid(func, eax, ebx, ecx, edx)
unsigned long long uint64_t
Definition: instrset.h:144
bool hasFMA4(void)
long long int64_t
Definition: instrset.h:143
unsigned int uint32_t
Definition: instrset.h:142
bool hasFMA3(void)