The Gaudi Framework  master (37c0b60a)
instrset_detect.cpp
Go to the documentation of this file.
1 /************************** instrset_detect.cpp ****************************
2  * Author: Agner Fog
3  * Date created: 2012-05-30
4  * Last modified: 2019-08-01
5  * Version: 2.00.00
6  * Project: vector class library
7  * Description:
8  * Functions for checking which instruction sets are supported.
9  *
10  * (c) Copyright 2012-2019 Agner Fog.
11  * Apache License version 2.0 or later.
12  ******************************************************************************/
13 
14 #include "instrset.h"
15 
16 #ifdef VCL_NAMESPACE
17 namespace VCL_NAMESPACE {
18 #endif
19 
20  // Define interface to xgetbv instruction
21  static inline uint64_t xgetbv( int ctr ) {
22 #if ( defined( _MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000 ) || \
23  ( defined( __INTEL_COMPILER ) && __INTEL_COMPILER >= 1200 )
24  // Microsoft or Intel compiler supporting _xgetbv intrinsic
25 
26  return uint64_t( _xgetbv( ctr ) ); // intrinsic function for XGETBV
27 
28 #elif defined( __GNUC__ ) || defined( __clang__ ) // use inline assembly, Gnu/AT&T syntax
29 
30  uint32_t a, d;
31  __asm( "xgetbv" : "=a"( a ), "=d"( d ) : "c"( ctr ) : );
32  return a | ( uint64_t( d ) << 32 );
33 
34 #else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS syntax
35  uint32_t a, d;
36  __asm {
37  mov ecx, ctr
38  _emit 0x0f
39  _emit 0x01
40  _emit 0xd0 ; // xgetbv
41  mov a, eax
42  mov d, edx
43  }
44  return a | ( uint64_t( d ) << 32 );
45 
46 #endif
47  }
48 
49  /* find supported instruction set
50  return value:
51  0 = 80386 instruction set
52  1 or above = SSE (XMM) supported by CPU (not testing for OS support)
53  2 or above = SSE2
54  3 or above = SSE3
55  4 or above = Supplementary SSE3 (SSSE3)
56  5 or above = SSE4.1
57  6 or above = SSE4.2
58  7 or above = AVX supported by CPU and operating system
59  8 or above = AVX2
60  9 or above = AVX512F
61  10 or above = AVX512VL, AVX512BW, AVX512DQ
62  */
63  int instrset_detect( void ) {
64 
65  static int iset = -1; // remember value for next call
66  if ( iset >= 0 ) {
67  return iset; // called before
68  }
69  iset = 0; // default value
70  int abcd[4] = { 0, 0, 0, 0 }; // cpuid results
71  cpuid( abcd, 0 ); // call cpuid function 0
72  if ( abcd[0] == 0 ) return iset; // no further cpuid function supported
73  cpuid( abcd, 1 ); // call cpuid function 1 for feature flags
74  if ( ( abcd[3] & ( 1 << 0 ) ) == 0 ) return iset; // no floating point
75  if ( ( abcd[3] & ( 1 << 23 ) ) == 0 ) return iset; // no MMX
76  if ( ( abcd[3] & ( 1 << 15 ) ) == 0 ) return iset; // no conditional move
77  if ( ( abcd[3] & ( 1 << 24 ) ) == 0 ) return iset; // no FXSAVE
78  if ( ( abcd[3] & ( 1 << 25 ) ) == 0 ) return iset; // no SSE
79  iset = 1; // 1: SSE supported
80  if ( ( abcd[3] & ( 1 << 26 ) ) == 0 ) return iset; // no SSE2
81  iset = 2; // 2: SSE2 supported
82  if ( ( abcd[2] & ( 1 << 0 ) ) == 0 ) return iset; // no SSE3
83  iset = 3; // 3: SSE3 supported
84  if ( ( abcd[2] & ( 1 << 9 ) ) == 0 ) return iset; // no SSSE3
85  iset = 4; // 4: SSSE3 supported
86  if ( ( abcd[2] & ( 1 << 19 ) ) == 0 ) return iset; // no SSE4.1
87  iset = 5; // 5: SSE4.1 supported
88  if ( ( abcd[2] & ( 1 << 23 ) ) == 0 ) return iset; // no POPCNT
89  if ( ( abcd[2] & ( 1 << 20 ) ) == 0 ) return iset; // no SSE4.2
90  iset = 6; // 6: SSE4.2 supported
91  if ( ( abcd[2] & ( 1 << 27 ) ) == 0 ) return iset; // no OSXSAVE
92  if ( ( xgetbv( 0 ) & 6 ) != 6 ) return iset; // AVX not enabled in O.S.
93  if ( ( abcd[2] & ( 1 << 28 ) ) == 0 ) return iset; // no AVX
94  iset = 7; // 7: AVX supported
95  cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
96  if ( ( abcd[1] & ( 1 << 5 ) ) == 0 ) return iset; // no AVX2
97  iset = 8;
98  if ( ( abcd[1] & ( 1 << 16 ) ) == 0 ) return iset; // no AVX512
99  cpuid( abcd, 0xD ); // call cpuid leaf 0xD for feature flags
100  if ( ( abcd[0] & 0x60 ) != 0x60 ) return iset; // no AVX512
101  iset = 9;
102  cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
103  if ( ( abcd[1] & ( 1 << 31 ) ) == 0 ) return iset; // no AVX512VL
104  if ( ( abcd[1] & 0x40020000 ) != 0x40020000 ) return iset; // no AVX512BW, AVX512DQ
105  iset = 10;
106  return iset;
107  }
108 
109  // detect if CPU supports the FMA3 instruction set
110  bool hasFMA3( void ) {
111  if ( instrset_detect() < 7 ) return false; // must have AVX
112  int abcd[4]; // cpuid results
113  cpuid( abcd, 1 ); // call cpuid function 1
114  return ( ( abcd[2] & ( 1 << 12 ) ) != 0 ); // ecx bit 12 indicates FMA3
115  }
116 
117  // detect if CPU supports the FMA4 instruction set
118  bool hasFMA4( void ) {
119  if ( instrset_detect() < 7 ) return false; // must have AVX
120  int abcd[4]; // cpuid results
121  cpuid( abcd, 0x80000001 ); // call cpuid function 0x80000001
122  return ( ( abcd[2] & ( 1 << 16 ) ) != 0 ); // ecx bit 16 indicates FMA4
123  }
124 
125  // detect if CPU supports the XOP instruction set
126  bool hasXOP( void ) {
127  if ( instrset_detect() < 7 ) return false; // must have AVX
128  int abcd[4]; // cpuid results
129  cpuid( abcd, 0x80000001 ); // call cpuid function 0x80000001
130  return ( ( abcd[2] & ( 1 << 11 ) ) != 0 ); // ecx bit 11 indicates XOP
131  }
132 
133  // detect if CPU supports the F16C instruction set
134  bool hasF16C( void ) {
135  if ( instrset_detect() < 7 ) return false; // must have AVX
136  int abcd[4]; // cpuid results
137  cpuid( abcd, 1 ); // call cpuid function 1
138  return ( ( abcd[2] & ( 1 << 29 ) ) != 0 ); // ecx bit 29 indicates F16C
139  }
140 
141  // detect if CPU supports the AVX512ER instruction set
142  bool hasAVX512ER( void ) {
143  if ( instrset_detect() < 9 ) return false; // must have AVX512F
144  int abcd[4]; // cpuid results
145  cpuid( abcd, 7 ); // call cpuid function 7
146  return ( ( abcd[1] & ( 1 << 27 ) ) != 0 ); // ebx bit 27 indicates AVX512ER
147  }
148 
149  // detect if CPU supports the AVX512VBMI instruction set
150  bool hasAVX512VBMI( void ) {
151  if ( instrset_detect() < 10 ) return false; // must have AVX512BW
152  int abcd[4]; // cpuid results
153  cpuid( abcd, 7 ); // call cpuid function 7
154  return ( ( abcd[2] & ( 1 << 1 ) ) != 0 ); // ecx bit 1 indicates AVX512VBMI
155  }
156 
157  // detect if CPU supports the AVX512VBMI2 instruction set
158  bool hasAVX512VBMI2( void ) {
159  if ( instrset_detect() < 10 ) return false; // must have AVX512BW
160  int abcd[4]; // cpuid results
161  cpuid( abcd, 7 ); // call cpuid function 7
162  return ( ( abcd[2] & ( 1 << 6 ) ) != 0 ); // ecx bit 6 indicates AVX512VBMI2
163  }
164 
165 #ifdef VCL_NAMESPACE
166 }
167 #endif
hasAVX512VBMI
bool hasAVX512VBMI(void)
Definition: instrset_detect.cpp:150
hasAVX512VBMI2
bool hasAVX512VBMI2(void)
Definition: instrset_detect.cpp:158
hasAVX512ER
bool hasAVX512ER(void)
Definition: instrset_detect.cpp:142
hasFMA3
bool hasFMA3(void)
Definition: instrset_detect.cpp:110
hasF16C
bool hasF16C(void)
Definition: instrset_detect.cpp:134
hasXOP
bool hasXOP(void)
Definition: instrset_detect.cpp:126
hasFMA4
bool hasFMA4(void)
Definition: instrset_detect.cpp:118
instrset.h
instrset_detect
int instrset_detect(void)
Definition: instrset_detect.cpp:63
cpuid
#define cpuid(func, ax, bx, cx, dx)
Definition: PerfMonAuditor.cpp:83