All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
instrset_detect.cpp
Go to the documentation of this file.
1 /************************** instrset_detect.cpp ****************************
2 | Author: Agner Fog
3 | Date created: 2012-05-30
4 | Last modified: 2014-07-23
5 | Version: 1.14
6 | Project: vector classes
7 | Description:
8 | Functions for checking which instruction sets are supported.
9 |
10 | (c) Copyright 2012 - 2014 GNU General Public License http://www.gnu.org/licenses
11 \*****************************************************************************/
12 
13 #include "instrset.h"
14 
15 #if ! __aarch64__
16 // Define interface to cpuid instruction.
17 // input: eax = functionnumber, ecx = 0
18 // output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3]
19 static inline void cpuid (int output[4], int functionnumber) {
20 #if defined (_MSC_VER) || defined (__INTEL_COMPILER) // Microsoft or Intel compiler, intrin.h included
21 
22  __cpuidex(output, functionnumber, 0); // intrinsic function for CPUID
23 
24 #elif defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax
25 
26  int a, b, c, d;
27  __asm("cpuid" : "=a"(a),"=b"(b),"=c"(c),"=d"(d) : "a"(functionnumber),"c"(0) : );
28  output[0] = a;
29  output[1] = b;
30  output[2] = c;
31  output[3] = d;
32 
33 #else // unknown platform. try inline assembly with masm/intel syntax
34 
35  __asm {
36  mov eax, functionnumber
37  xor ecx, ecx
38  cpuid;
39  mov esi, output
40  mov [esi], eax
41  mov [esi+4], ebx
42  mov [esi+8], ecx
43  mov [esi+12], edx
44  }
45 
46 #endif
47 }
48 
49 // Define interface to xgetbv instruction
50 static inline int64_t xgetbv (int ctr) {
51 #if (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) // Microsoft or Intel compiler supporting _xgetbv intrinsic
52 
53  return _xgetbv(ctr); // intrinsic function for XGETBV
54 
55 #elif defined(__GNUC__) // use inline assembly, Gnu/AT&T syntax
56 
57  uint32_t a, d;
58  __asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : );
59  return a | (uint64_t(d) << 32);
60 
61 #else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS syntax
62 
63  uint32_t a, d;
64  __asm {
65  mov ecx, ctr
66  _emit 0x0f
67  _emit 0x01
68  _emit 0xd0 ; // xgetbv
69  mov a, eax
70  mov d, edx
71  }
72  return a | (uint64_t(d) << 32);
73 
74 #endif
75 }
76 
77 
78 /* find supported instruction set
79  return value:
80  0 = 80386 instruction set
81  1 or above = SSE (XMM) supported by CPU (not testing for O.S. support)
82  2 or above = SSE2
83  3 or above = SSE3
84  4 or above = Supplementary SSE3 (SSSE3)
85  5 or above = SSE4.1
86  6 or above = SSE4.2
87  7 or above = AVX supported by CPU and operating system
88  8 or above = AVX2
89  9 or above = AVX512F
90 */
91 int instrset_detect(void) {
92 
93  static int iset = -1; // remember value for next call
94  if (iset >= 0) {
95  return iset; // called before
96  }
97  iset = 0; // default value
98  int abcd[4] = {0,0,0,0}; // cpuid results
99  cpuid(abcd, 0); // call cpuid function 0
100  if (abcd[0] == 0) return iset; // no further cpuid function supported
101  cpuid(abcd, 1); // call cpuid function 1 for feature flags
102  if ((abcd[3] & (1 << 0)) == 0) return iset; // no floating point
103  if ((abcd[3] & (1 << 23)) == 0) return iset; // no MMX
104  if ((abcd[3] & (1 << 15)) == 0) return iset; // no conditional move
105  if ((abcd[3] & (1 << 24)) == 0) return iset; // no FXSAVE
106  if ((abcd[3] & (1 << 25)) == 0) return iset; // no SSE
107  iset = 1; // 1: SSE supported
108  if ((abcd[3] & (1 << 26)) == 0) return iset; // no SSE2
109  iset = 2; // 2: SSE2 supported
110  if ((abcd[2] & (1 << 0)) == 0) return iset; // no SSE3
111  iset = 3; // 3: SSE3 supported
112  if ((abcd[2] & (1 << 9)) == 0) return iset; // no SSSE3
113  iset = 4; // 4: SSSE3 supported
114  if ((abcd[2] & (1 << 19)) == 0) return iset; // no SSE4.1
115  iset = 5; // 5: SSE4.1 supported
116  if ((abcd[2] & (1 << 23)) == 0) return iset; // no POPCNT
117  if ((abcd[2] & (1 << 20)) == 0) return iset; // no SSE4.2
118  iset = 6; // 6: SSE4.2 supported
119  if ((abcd[2] & (1 << 27)) == 0) return iset; // no OSXSAVE
120  if ((xgetbv(0) & 6) != 6) return iset; // AVX not enabled in O.S.
121  if ((abcd[2] & (1 << 28)) == 0) return iset; // no AVX
122  iset = 7; // 7: AVX supported
123  cpuid(abcd, 7); // call cpuid leaf 7 for feature flags
124  if ((abcd[1] & (1 << 5)) == 0) return iset; // no AVX2
125  iset = 8; // 8: AVX2 supported
126  cpuid(abcd, 0xD); // call cpuid leaf 0xD for feature flags
127  if ((abcd[0] & 0x60) != 0x60) return iset; // no AVX512
128  iset = 9; // 8: AVX512F supported
129  return iset;
130 }
131 
132 // detect if CPU supports the FMA3 instruction set
133 bool hasFMA3(void) {
134  if (instrset_detect() < 7) return false; // must have AVX
135  int abcd[4]; // cpuid results
136  cpuid(abcd, 1); // call cpuid function 1
137  return ((abcd[2] & (1 << 12)) != 0); // ecx bit 12 indicates FMA3
138 }
139 
140 // detect if CPU supports the FMA4 instruction set
141 bool hasFMA4(void) {
142  if (instrset_detect() < 7) return false; // must have AVX
143  int abcd[4]; // cpuid results
144  cpuid(abcd, 0x80000001); // call cpuid function 0x80000001
145  return ((abcd[2] & (1 << 16)) != 0); // ecx bit 16 indicates FMA4
146 }
147 
148 // detect if CPU supports the XOP instruction set
149 bool hasXOP(void) {
150  if (instrset_detect() < 7) return false; // must have AVX
151  int abcd[4]; // cpuid results
152  cpuid(abcd, 0x80000001); // call cpuid function 0x80000001
153  return ((abcd[2] & (1 << 11)) != 0); // ecx bit 11 indicates XOP
154 }
155 #else
156 // __aarch64__ version (no special feature yet)
157 int instrset_detect(void) {
158  return 0;
159 }
160 bool hasFMA3(void) { return false; }
161 bool hasFMA4(void) { return false; }
162 bool hasXOP(void) { return false; }
163 #endif
bool hasXOP(void)
int instrset_detect(void)
#define cpuid(func, eax, ebx, ecx, edx)
unsigned long long uint64_t
Definition: instrset.h:144
bool hasFMA4(void)
long long int64_t
Definition: instrset.h:143
unsigned int uint32_t
Definition: instrset.h:142
bool hasFMA3(void)