The Gaudi Framework  v30r3 (a5ef0a68)
instrset_detect.cpp
Go to the documentation of this file.
1 /************************** instrset_detect.cpp ****************************
2 * Author: Agner Fog
3 * Date created: 2012-05-30
4 * Last modified: 2017-05-02
5 * Version: 1.28
6 * Project: vector classes
7 * Description:
8 * Functions for checking which instruction sets are supported.
9 *
10 * (c) Copyright 2012-2017 GNU General Public License http://www.gnu.org/licenses
11 \*****************************************************************************/
12 
13 #include "instrset.h"
14 
15 #ifdef VCL_NAMESPACE
16 namespace VCL_NAMESPACE
17 {
18 #endif
19 
20  // Define interface to cpuid instruction.
21  // input: eax = functionnumber, ecx = 0
22  // output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3]
23  static inline void cpuid( int output[4], int functionnumber )
24  {
25 #if defined( __GNUC__ ) || defined( __clang__ ) // use inline assembly, Gnu/AT&T syntax
26 
27  int a, b, c, d;
28  __asm( "cpuid" : "=a"( a ), "=b"( b ), "=c"( c ), "=d"( d ) : "a"( functionnumber ), "c"( 0 ) : );
29  output[0] = a;
30  output[1] = b;
31  output[2] = c;
32  output[3] = d;
33 
34 #elif defined( _MSC_VER ) || defined( __INTEL_COMPILER ) // Microsoft or Intel compiler, intrin.h included
35 
36  __cpuidex( output, functionnumber, 0 ); // intrinsic function for CPUID
37 
38 #else // unknown platform. try inline assembly with masm/intel syntax
39 
40  __asm {
41  mov eax, functionnumber
42  xor ecx, ecx
43  cpuid;
44  mov esi, output
45  mov [esi], eax
46  mov [esi+4], ebx
47  mov [esi+8], ecx
48  mov [esi+12], edx
49  }
50 
51 #endif
52  }
53 
54  // Define interface to xgetbv instruction
55  static inline int64_t xgetbv( int ctr )
56  {
57 #if ( defined( _MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000 ) || \
58  ( defined( __INTEL_COMPILER ) && \
59  __INTEL_COMPILER >= 1200 ) // Microsoft or Intel compiler supporting _xgetbv intrinsic
60 
61  return _xgetbv( ctr ); // intrinsic function for XGETBV
62 
63 #elif defined( __GNUC__ ) // use inline assembly, Gnu/AT&T syntax
64 
65  uint32_t a, d;
66  __asm( "xgetbv" : "=a"( a ), "=d"( d ) : "c"( ctr ) : );
67  return a | ( uint64_t( d ) << 32 );
68 
69 #else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS
70  // syntax
71 
72  uint32_t a, d;
73  __asm {
74  mov ecx, ctr
75  _emit 0x0f
76  _emit 0x01
77  _emit 0xd0 ; // xgetbv
78  mov a, eax
79  mov d, edx
80  }
81  return a | ( uint64_t( d ) << 32 );
82 
83 #endif
84  }
85 
86  /* find supported instruction set
87  return value:
88  0 = 80386 instruction set
89  1 or above = SSE (XMM) supported by CPU (not testing for O.S. support)
90  2 or above = SSE2
91  3 or above = SSE3
92  4 or above = Supplementary SSE3 (SSSE3)
93  5 or above = SSE4.1
94  6 or above = SSE4.2
95  7 or above = AVX supported by CPU and operating system
96  8 or above = AVX2
97  9 or above = AVX512F
98  10 or above = AVX512VL
99  11 or above = AVX512BW, AVX512DQ
100  */
101  int instrset_detect( void )
102  {
103 
104  static int iset = -1; // remember value for next call
105  if ( iset >= 0 ) {
106  return iset; // called before
107  }
108  iset = 0; // default value
109  int abcd[4] = {0, 0, 0, 0}; // cpuid results
110  cpuid( abcd, 0 ); // call cpuid function 0
111  if ( abcd[0] == 0 ) return iset; // no further cpuid function supported
112  cpuid( abcd, 1 ); // call cpuid function 1 for feature flags
113  if ( ( abcd[3] & ( 1 << 0 ) ) == 0 ) return iset; // no floating point
114  if ( ( abcd[3] & ( 1 << 23 ) ) == 0 ) return iset; // no MMX
115  if ( ( abcd[3] & ( 1 << 15 ) ) == 0 ) return iset; // no conditional move
116  if ( ( abcd[3] & ( 1 << 24 ) ) == 0 ) return iset; // no FXSAVE
117  if ( ( abcd[3] & ( 1 << 25 ) ) == 0 ) return iset; // no SSE
118  iset = 1; // 1: SSE supported
119  if ( ( abcd[3] & ( 1 << 26 ) ) == 0 ) return iset; // no SSE2
120  iset = 2; // 2: SSE2 supported
121  if ( ( abcd[2] & ( 1 << 0 ) ) == 0 ) return iset; // no SSE3
122  iset = 3; // 3: SSE3 supported
123  if ( ( abcd[2] & ( 1 << 9 ) ) == 0 ) return iset; // no SSSE3
124  iset = 4; // 4: SSSE3 supported
125  if ( ( abcd[2] & ( 1 << 19 ) ) == 0 ) return iset; // no SSE4.1
126  iset = 5; // 5: SSE4.1 supported
127  if ( ( abcd[2] & ( 1 << 23 ) ) == 0 ) return iset; // no POPCNT
128  if ( ( abcd[2] & ( 1 << 20 ) ) == 0 ) return iset; // no SSE4.2
129  iset = 6; // 6: SSE4.2 supported
130  if ( ( abcd[2] & ( 1 << 27 ) ) == 0 ) return iset; // no OSXSAVE
131  if ( ( xgetbv( 0 ) & 6 ) != 6 ) return iset; // AVX not enabled in O.S.
132  if ( ( abcd[2] & ( 1 << 28 ) ) == 0 ) return iset; // no AVX
133  iset = 7; // 7: AVX supported
134  cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
135  if ( ( abcd[1] & ( 1 << 5 ) ) == 0 ) return iset; // no AVX2
136  iset = 8;
137  if ( ( abcd[1] & ( 1 << 16 ) ) == 0 ) return iset; // no AVX512
138  cpuid( abcd, 0xD ); // call cpuid leaf 0xD for feature flags
139  if ( ( abcd[0] & 0x60 ) != 0x60 ) return iset; // no AVX512
140  iset = 9;
141  cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
142  if ( ( abcd[1] & ( 1 << 31 ) ) == 0 ) return iset; // no AVX512VL
143  iset = 10;
144  if ( ( abcd[1] & 0x40020000 ) != 0x40020000 ) return iset; // no AVX512BW, AVX512DQ
145  iset = 11;
146  return iset;
147  }
148 
149  // detect if CPU supports the FMA3 instruction set
150  bool hasFMA3( void )
151  {
152  if ( instrset_detect() < 7 ) return false; // must have AVX
153  int abcd[4]; // cpuid results
154  cpuid( abcd, 1 ); // call cpuid function 1
155  return ( ( abcd[2] & ( 1 << 12 ) ) != 0 ); // ecx bit 12 indicates FMA3
156  }
157 
158  // detect if CPU supports the FMA4 instruction set
159  bool hasFMA4( void )
160  {
161  if ( instrset_detect() < 7 ) return false; // must have AVX
162  int abcd[4]; // cpuid results
163  cpuid( abcd, 0x80000001 ); // call cpuid function 0x80000001
164  return ( ( abcd[2] & ( 1 << 16 ) ) != 0 ); // ecx bit 16 indicates FMA4
165  }
166 
167  // detect if CPU supports the XOP instruction set
168  bool hasXOP( void )
169  {
170  if ( instrset_detect() < 7 ) return false; // must have AVX
171  int abcd[4]; // cpuid results
172  cpuid( abcd, 0x80000001 ); // call cpuid function 0x80000001
173  return ( ( abcd[2] & ( 1 << 11 ) ) != 0 ); // ecx bit 11 indicates XOP
174  }
175 
176  // detect if CPU supports the F16C instruction set
177  bool hasF16C( void )
178  {
179  if ( instrset_detect() < 7 ) return false; // must have AVX
180  int abcd[4]; // cpuid results
181  cpuid( abcd, 1 ); // call cpuid function 1
182  return ( ( abcd[2] & ( 1 << 29 ) ) != 0 ); // ecx bit 29 indicates F16C
183  }
184 
185  // detect if CPU supports the AVX512ER instruction set
186  bool hasAVX512ER( void )
187  {
188  if ( instrset_detect() < 9 ) return false; // must have AVX512F
189  int abcd[4]; // cpuid results
190  cpuid( abcd, 7 ); // call cpuid function 7
191  return ( ( abcd[1] & ( 1 << 27 ) ) != 0 ); // ebx bit 27 indicates AVX512ER
192  }
193 
194 #ifdef VCL_NAMESPACE
195 }
196 #endif
#define VCL_NAMESPACE
Definition: System.cpp:37
bool hasF16C(void)
bool hasXOP(void)
int instrset_detect(void)
unsigned long long uint64_t
Definition: instrset.h:143
bool hasFMA4(void)
long long int64_t
Definition: instrset.h:142
bool hasAVX512ER(void)
unsigned int uint32_t
Definition: instrset.h:141
#define cpuid(func, eax, ebx, ecx, edx)
bool hasFMA3(void)