Loading [MathJax]/extensions/tex2jax.js
The Gaudi Framework  v31r0 (aeb156f0)
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
instrset_detect.cpp
Go to the documentation of this file.
1 /************************** instrset_detect.cpp ****************************
2 * Author: Agner Fog
3 * Date created: 2012-05-30
4 * Last modified: 2017-05-02
5 * Version: 1.28
6 * Project: vector classes
7 * Description:
8 * Functions for checking which instruction sets are supported.
9 *
10 * (c) Copyright 2012-2017 GNU General Public License http://www.gnu.org/licenses
11 \*****************************************************************************/
12 
13 #include "instrset.h"
14 
15 #ifdef VCL_NAMESPACE
16 namespace VCL_NAMESPACE {
17 #endif
18 
19  // Define interface to cpuid instruction.
20  // input: eax = functionnumber, ecx = 0
21  // output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3]
22  static inline void cpuid( int output[4], int functionnumber ) {
23 #if defined( __GNUC__ ) || defined( __clang__ ) // use inline assembly, Gnu/AT&T syntax
24 
25  int a, b, c, d;
26  __asm( "cpuid" : "=a"( a ), "=b"( b ), "=c"( c ), "=d"( d ) : "a"( functionnumber ), "c"( 0 ) : );
27  output[0] = a;
28  output[1] = b;
29  output[2] = c;
30  output[3] = d;
31 
32 #elif defined( _MSC_VER ) || defined( __INTEL_COMPILER ) // Microsoft or Intel compiler, intrin.h included
33 
34  __cpuidex( output, functionnumber, 0 ); // intrinsic function for CPUID
35 
36 #else // unknown platform. try inline assembly with masm/intel syntax
37 
38  __asm {
39  mov eax, functionnumber
40  xor ecx, ecx
41  cpuid;
42  mov esi, output
43  mov [esi], eax
44  mov [esi+4], ebx
45  mov [esi+8], ecx
46  mov [esi+12], edx
47  }
48 
49 #endif
50  }
51 
52  // Define interface to xgetbv instruction
53  static inline int64_t xgetbv( int ctr ) {
54 #if ( defined( _MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000 ) || \
55  ( defined( __INTEL_COMPILER ) && __INTEL_COMPILER >= 1200 ) // Microsoft or Intel compiler supporting _xgetbv
56  // intrinsic
57 
58  return _xgetbv( ctr ); // intrinsic function for XGETBV
59 
60 #elif defined( __GNUC__ ) // use inline assembly, Gnu/AT&T syntax
61 
62  uint32_t a, d;
63  __asm( "xgetbv" : "=a"( a ), "=d"( d ) : "c"( ctr ) : );
64  return a | ( uint64_t( d ) << 32 );
65 
66 #else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS
67  // syntax
68 
69  uint32_t a, d;
70  __asm {
71  mov ecx, ctr
72  _emit 0x0f
73  _emit 0x01
74  _emit 0xd0 ; // xgetbv
75  mov a, eax
76  mov d, edx
77  }
78  return a | ( uint64_t( d ) << 32 );
79 
80 #endif
81  }
82 
83  /* find supported instruction set
84  return value:
85  0 = 80386 instruction set
86  1 or above = SSE (XMM) supported by CPU (not testing for O.S. support)
87  2 or above = SSE2
88  3 or above = SSE3
89  4 or above = Supplementary SSE3 (SSSE3)
90  5 or above = SSE4.1
91  6 or above = SSE4.2
92  7 or above = AVX supported by CPU and operating system
93  8 or above = AVX2
94  9 or above = AVX512F
95  10 or above = AVX512VL
96  11 or above = AVX512BW, AVX512DQ
97  */
98  int instrset_detect( void ) {
99 
100  static int iset = -1; // remember value for next call
101  if ( iset >= 0 ) {
102  return iset; // called before
103  }
104  iset = 0; // default value
105  int abcd[4] = {0, 0, 0, 0}; // cpuid results
106  cpuid( abcd, 0 ); // call cpuid function 0
107  if ( abcd[0] == 0 ) return iset; // no further cpuid function supported
108  cpuid( abcd, 1 ); // call cpuid function 1 for feature flags
109  if ( ( abcd[3] & ( 1 << 0 ) ) == 0 ) return iset; // no floating point
110  if ( ( abcd[3] & ( 1 << 23 ) ) == 0 ) return iset; // no MMX
111  if ( ( abcd[3] & ( 1 << 15 ) ) == 0 ) return iset; // no conditional move
112  if ( ( abcd[3] & ( 1 << 24 ) ) == 0 ) return iset; // no FXSAVE
113  if ( ( abcd[3] & ( 1 << 25 ) ) == 0 ) return iset; // no SSE
114  iset = 1; // 1: SSE supported
115  if ( ( abcd[3] & ( 1 << 26 ) ) == 0 ) return iset; // no SSE2
116  iset = 2; // 2: SSE2 supported
117  if ( ( abcd[2] & ( 1 << 0 ) ) == 0 ) return iset; // no SSE3
118  iset = 3; // 3: SSE3 supported
119  if ( ( abcd[2] & ( 1 << 9 ) ) == 0 ) return iset; // no SSSE3
120  iset = 4; // 4: SSSE3 supported
121  if ( ( abcd[2] & ( 1 << 19 ) ) == 0 ) return iset; // no SSE4.1
122  iset = 5; // 5: SSE4.1 supported
123  if ( ( abcd[2] & ( 1 << 23 ) ) == 0 ) return iset; // no POPCNT
124  if ( ( abcd[2] & ( 1 << 20 ) ) == 0 ) return iset; // no SSE4.2
125  iset = 6; // 6: SSE4.2 supported
126  if ( ( abcd[2] & ( 1 << 27 ) ) == 0 ) return iset; // no OSXSAVE
127  if ( ( xgetbv( 0 ) & 6 ) != 6 ) return iset; // AVX not enabled in O.S.
128  if ( ( abcd[2] & ( 1 << 28 ) ) == 0 ) return iset; // no AVX
129  iset = 7; // 7: AVX supported
130  cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
131  if ( ( abcd[1] & ( 1 << 5 ) ) == 0 ) return iset; // no AVX2
132  iset = 8;
133  if ( ( abcd[1] & ( 1 << 16 ) ) == 0 ) return iset; // no AVX512
134  cpuid( abcd, 0xD ); // call cpuid leaf 0xD for feature flags
135  if ( ( abcd[0] & 0x60 ) != 0x60 ) return iset; // no AVX512
136  iset = 9;
137  cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
138  if ( ( abcd[1] & ( 1 << 31 ) ) == 0 ) return iset; // no AVX512VL
139  iset = 10;
140  if ( ( abcd[1] & 0x40020000 ) != 0x40020000 ) return iset; // no AVX512BW, AVX512DQ
141  iset = 11;
142  return iset;
143  }
144 
145  // detect if CPU supports the FMA3 instruction set
146  bool hasFMA3( void ) {
147  if ( instrset_detect() < 7 ) return false; // must have AVX
148  int abcd[4]; // cpuid results
149  cpuid( abcd, 1 ); // call cpuid function 1
150  return ( ( abcd[2] & ( 1 << 12 ) ) != 0 ); // ecx bit 12 indicates FMA3
151  }
152 
153  // detect if CPU supports the FMA4 instruction set
154  bool hasFMA4( void ) {
155  if ( instrset_detect() < 7 ) return false; // must have AVX
156  int abcd[4]; // cpuid results
157  cpuid( abcd, 0x80000001 ); // call cpuid function 0x80000001
158  return ( ( abcd[2] & ( 1 << 16 ) ) != 0 ); // ecx bit 16 indicates FMA4
159  }
160 
161  // detect if CPU supports the XOP instruction set
162  bool hasXOP( void ) {
163  if ( instrset_detect() < 7 ) return false; // must have AVX
164  int abcd[4]; // cpuid results
165  cpuid( abcd, 0x80000001 ); // call cpuid function 0x80000001
166  return ( ( abcd[2] & ( 1 << 11 ) ) != 0 ); // ecx bit 11 indicates XOP
167  }
168 
169  // detect if CPU supports the F16C instruction set
170  bool hasF16C( void ) {
171  if ( instrset_detect() < 7 ) return false; // must have AVX
172  int abcd[4]; // cpuid results
173  cpuid( abcd, 1 ); // call cpuid function 1
174  return ( ( abcd[2] & ( 1 << 29 ) ) != 0 ); // ecx bit 29 indicates F16C
175  }
176 
177  // detect if CPU supports the AVX512ER instruction set
178  bool hasAVX512ER( void ) {
179  if ( instrset_detect() < 9 ) return false; // must have AVX512F
180  int abcd[4]; // cpuid results
181  cpuid( abcd, 7 ); // call cpuid function 7
182  return ( ( abcd[1] & ( 1 << 27 ) ) != 0 ); // ebx bit 27 indicates AVX512ER
183  }
184 
185 #ifdef VCL_NAMESPACE
186 }
187 #endif
#define VCL_NAMESPACE
Definition: System.cpp:37
bool hasF16C(void)
bool hasXOP(void)
int instrset_detect(void)
unsigned long long uint64_t
Definition: instrset.h:143
bool hasFMA4(void)
long long int64_t
Definition: instrset.h:142
bool hasAVX512ER(void)
unsigned int uint32_t
Definition: instrset.h:141
#define cpuid(func, eax, ebx, ecx, edx)
bool hasFMA3(void)