All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
instrset_detect.cpp
Go to the documentation of this file.
1 /************************** instrset_detect.cpp ****************************
2 | Author: Agner Fog
3 | Date created: 2012-05-30
4 | Last modified: 2014-07-23
5 | Version: 1.14
6 | Project: vector classes
7 | Description:
8 | Functions for checking which instruction sets are supported.
9 |
10 | (c) Copyright 2012 - 2014 GNU General Public License http://www.gnu.org/licenses
11 \*****************************************************************************/
12 
13 #include "instrset.h"
14 
15 // Define interface to cpuid instruction.
16 // input: eax = functionnumber, ecx = 0
17 // output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3]
18 static inline void cpuid (int output[4], int functionnumber) {
19 #if defined (_MSC_VER) || defined (__INTEL_COMPILER) // Microsoft or Intel compiler, intrin.h included
20 
21  __cpuidex(output, functionnumber, 0); // intrinsic function for CPUID
22 
23 #elif defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax
24 
25  int a, b, c, d;
26  __asm("cpuid" : "=a"(a),"=b"(b),"=c"(c),"=d"(d) : "a"(functionnumber),"c"(0) : );
27  output[0] = a;
28  output[1] = b;
29  output[2] = c;
30  output[3] = d;
31 
32 #else // unknown platform. try inline assembly with masm/intel syntax
33 
34  __asm {
35  mov eax, functionnumber
36  xor ecx, ecx
37  cpuid;
38  mov esi, output
39  mov [esi], eax
40  mov [esi+4], ebx
41  mov [esi+8], ecx
42  mov [esi+12], edx
43  }
44 
45 #endif
46 }
47 
48 // Define interface to xgetbv instruction
49 static inline int64_t xgetbv (int ctr) {
50 #if (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) // Microsoft or Intel compiler supporting _xgetbv intrinsic
51 
52  return _xgetbv(ctr); // intrinsic function for XGETBV
53 
54 #elif defined(__GNUC__) // use inline assembly, Gnu/AT&T syntax
55 
56  uint32_t a, d;
57  __asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : );
58  return a | (uint64_t(d) << 32);
59 
60 #else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS syntax
61 
62  uint32_t a, d;
63  __asm {
64  mov ecx, ctr
65  _emit 0x0f
66  _emit 0x01
67  _emit 0xd0 ; // xgetbv
68  mov a, eax
69  mov d, edx
70  }
71  return a | (uint64_t(d) << 32);
72 
73 #endif
74 }
75 
76 
77 /* find supported instruction set
78  return value:
79  0 = 80386 instruction set
80  1 or above = SSE (XMM) supported by CPU (not testing for O.S. support)
81  2 or above = SSE2
82  3 or above = SSE3
83  4 or above = Supplementary SSE3 (SSSE3)
84  5 or above = SSE4.1
85  6 or above = SSE4.2
86  7 or above = AVX supported by CPU and operating system
87  8 or above = AVX2
88  9 or above = AVX512F
89 */
90 int instrset_detect(void) {
91 
92  static int iset = -1; // remember value for next call
93  if (iset >= 0) {
94  return iset; // called before
95  }
96  iset = 0; // default value
97  int abcd[4] = {0,0,0,0}; // cpuid results
98  cpuid(abcd, 0); // call cpuid function 0
99  if (abcd[0] == 0) return iset; // no further cpuid function supported
100  cpuid(abcd, 1); // call cpuid function 1 for feature flags
101  if ((abcd[3] & (1 << 0)) == 0) return iset; // no floating point
102  if ((abcd[3] & (1 << 23)) == 0) return iset; // no MMX
103  if ((abcd[3] & (1 << 15)) == 0) return iset; // no conditional move
104  if ((abcd[3] & (1 << 24)) == 0) return iset; // no FXSAVE
105  if ((abcd[3] & (1 << 25)) == 0) return iset; // no SSE
106  iset = 1; // 1: SSE supported
107  if ((abcd[3] & (1 << 26)) == 0) return iset; // no SSE2
108  iset = 2; // 2: SSE2 supported
109  if ((abcd[2] & (1 << 0)) == 0) return iset; // no SSE3
110  iset = 3; // 3: SSE3 supported
111  if ((abcd[2] & (1 << 9)) == 0) return iset; // no SSSE3
112  iset = 4; // 4: SSSE3 supported
113  if ((abcd[2] & (1 << 19)) == 0) return iset; // no SSE4.1
114  iset = 5; // 5: SSE4.1 supported
115  if ((abcd[2] & (1 << 23)) == 0) return iset; // no POPCNT
116  if ((abcd[2] & (1 << 20)) == 0) return iset; // no SSE4.2
117  iset = 6; // 6: SSE4.2 supported
118  if ((abcd[2] & (1 << 27)) == 0) return iset; // no OSXSAVE
119  if ((xgetbv(0) & 6) != 6) return iset; // AVX not enabled in O.S.
120  if ((abcd[2] & (1 << 28)) == 0) return iset; // no AVX
121  iset = 7; // 7: AVX supported
122  cpuid(abcd, 7); // call cpuid leaf 7 for feature flags
123  if ((abcd[1] & (1 << 5)) == 0) return iset; // no AVX2
124  iset = 8; // 8: AVX2 supported
125  cpuid(abcd, 0xD); // call cpuid leaf 0xD for feature flags
126  if ((abcd[0] & 0x60) != 0x60) return iset; // no AVX512
127  iset = 9; // 8: AVX512F supported
128  return iset;
129 }
130 
131 // detect if CPU supports the FMA3 instruction set
132 bool hasFMA3(void) {
133  if (instrset_detect() < 7) return false; // must have AVX
134  int abcd[4]; // cpuid results
135  cpuid(abcd, 1); // call cpuid function 1
136  return ((abcd[2] & (1 << 12)) != 0); // ecx bit 12 indicates FMA3
137 }
138 
139 // detect if CPU supports the FMA4 instruction set
140 bool hasFMA4(void) {
141  if (instrset_detect() < 7) return false; // must have AVX
142  int abcd[4]; // cpuid results
143  cpuid(abcd, 0x80000001); // call cpuid function 0x80000001
144  return ((abcd[2] & (1 << 16)) != 0); // ecx bit 16 indicates FMA4
145 }
146 
147 // detect if CPU supports the XOP instruction set
148 bool hasXOP(void) {
149  if (instrset_detect() < 7) return false; // must have AVX
150  int abcd[4]; // cpuid results
151  cpuid(abcd, 0x80000001); // call cpuid function 0x80000001
152  return ((abcd[2] & (1 << 11)) != 0); // ecx bit 11 indicates XOP
153 }