The Gaudi Framework  master (82fdf313)
Loading...
Searching...
No Matches
instrset_detect.cpp
Go to the documentation of this file.
1/************************** instrset_detect.cpp ****************************
2 * Author: Agner Fog
3 * Date created: 2012-05-30
4 * Last modified: 2019-08-01
5 * Version: 2.00.00
6 * Project: vector class library
7 * Description:
8 * Functions for checking which instruction sets are supported.
9 *
10 * (c) Copyright 2012-2025 Agner Fog.
11 * Apache License version 2.0 or later.
12 ******************************************************************************/
13
14#include "instrset.h"
15
16#ifdef VCL_NAMESPACE
17namespace VCL_NAMESPACE {
18#endif
19
20 // Define interface to xgetbv instruction
21 static inline uint64_t xgetbv( int ctr ) {
22#if ( defined( _MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000 ) || \
23 ( defined( __INTEL_COMPILER ) && __INTEL_COMPILER >= 1200 )
24 // Microsoft or Intel compiler supporting _xgetbv intrinsic
25
26 return uint64_t( _xgetbv( ctr ) ); // intrinsic function for XGETBV
27
28#elif defined( __GNUC__ ) || defined( __clang__ ) // use inline assembly, Gnu/AT&T syntax
29
30 uint32_t a, d;
31 __asm( "xgetbv" : "=a"( a ), "=d"( d ) : "c"( ctr ) : );
32 return a | ( uint64_t( d ) << 32 );
33
34#else // other compiler. try inline assembly with masm/intel/MS syntax
35 uint32_t a, d;
36 __asm {
37 mov ecx, ctr
38 _emit 0x0f
39 _emit 0x01
40 _emit 0xd0 ; // xgetbv
41 mov a, eax
42 mov d, edx
43 }
44 return a | ( uint64_t( d ) << 32 );
45
46#endif
47 }
48
49 /* find supported instruction set
50 return value:
51 0 = 80386 instruction set
52 1 or above = SSE (XMM) supported by CPU (not testing for OS support)
53 2 or above = SSE2
54 3 or above = SSE3
55 4 or above = Supplementary SSE3 (SSSE3)
56 5 or above = SSE4.1
57 6 or above = SSE4.2
58 7 or above = AVX supported by CPU and operating system
59 8 or above = AVX2
60 9 or above = AVX512F
61 10 or above = AVX512VL, AVX512BW, AVX512DQ
62 */
63 int instrset_detect( void ) {
64
65 static int iset = -1; // remember value for next call
66 if ( iset >= 0 ) {
67 return iset; // called before
68 }
69 iset = 0; // default value
70 int abcd[4] = { 0, 0, 0, 0 }; // cpuid results
71 cpuid( abcd, 0 ); // call cpuid function 0
72 if ( abcd[0] == 0 ) return iset; // no further cpuid function supported
73 cpuid( abcd, 1 ); // call cpuid function 1 for feature flags
74 if ( ( abcd[3] & ( 1 << 0 ) ) == 0 ) return iset; // no floating point
75 if ( ( abcd[3] & ( 1 << 23 ) ) == 0 ) return iset; // no MMX
76 if ( ( abcd[3] & ( 1 << 15 ) ) == 0 ) return iset; // no conditional move
77 if ( ( abcd[3] & ( 1 << 24 ) ) == 0 ) return iset; // no FXSAVE
78 if ( ( abcd[3] & ( 1 << 25 ) ) == 0 ) return iset; // no SSE
79 iset = 1; // 1: SSE supported
80 if ( ( abcd[3] & ( 1 << 26 ) ) == 0 ) return iset; // no SSE2
81 iset = 2; // 2: SSE2 supported
82 if ( ( abcd[2] & ( 1 << 0 ) ) == 0 ) return iset; // no SSE3
83 iset = 3; // 3: SSE3 supported
84 if ( ( abcd[2] & ( 1 << 9 ) ) == 0 ) return iset; // no SSSE3
85 iset = 4; // 4: SSSE3 supported
86 if ( ( abcd[2] & ( 1 << 19 ) ) == 0 ) return iset; // no SSE4.1
87 iset = 5; // 5: SSE4.1 supported
88 if ( ( abcd[2] & ( 1 << 23 ) ) == 0 ) return iset; // no POPCNT
89 if ( ( abcd[2] & ( 1 << 20 ) ) == 0 ) return iset; // no SSE4.2
90 iset = 6; // 6: SSE4.2 supported
91 if ( ( abcd[2] & ( 1 << 27 ) ) == 0 ) return iset; // no OSXSAVE
92 if ( ( xgetbv( 0 ) & 6 ) != 6 ) return iset; // AVX not enabled in O.S.
93 if ( ( abcd[2] & ( 1 << 28 ) ) == 0 ) return iset; // no AVX
94 iset = 7; // 7: AVX supported
95 cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
96 if ( ( abcd[1] & ( 1 << 5 ) ) == 0 ) return iset; // no AVX2
97 iset = 8;
98 if ( ( abcd[1] & ( 1 << 16 ) ) == 0 ) return iset; // no AVX512
99 cpuid( abcd, 0xD ); // call cpuid leaf 0xD for feature flags
100 if ( ( abcd[0] & 0x60 ) != 0x60 ) return iset; // no AVX512
101 iset = 9;
102 cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
103 if ( ( abcd[1] & ( 1 << 31 ) ) == 0 ) return iset; // no AVX512VL
104 if ( ( abcd[1] & 0x40020000 ) != 0x40020000 ) return iset; // no AVX512BW, AVX512DQ
105 iset = 10;
106 return iset;
107 }
108
109 // detect if CPU supports the FMA3 instruction set
110 bool hasFMA3( void ) {
111 if ( instrset_detect() < 7 ) return false; // must have AVX
112 int abcd[4]; // cpuid results
113 cpuid( abcd, 1 ); // call cpuid function 1
114 return ( ( abcd[2] & ( 1 << 12 ) ) != 0 ); // ecx bit 12 indicates FMA3
115 }
116
117 // detect if CPU supports the FMA4 instruction set
118 bool hasFMA4( void ) {
119 if ( instrset_detect() < 7 ) return false; // must have AVX
120 int abcd[4]; // cpuid results
121 cpuid( abcd, 0x80000001 ); // call cpuid function 0x80000001
122 return ( ( abcd[2] & ( 1 << 16 ) ) != 0 ); // ecx bit 16 indicates FMA4
123 }
124
125 // detect if CPU supports the XOP instruction set
126 bool hasXOP( void ) {
127 if ( instrset_detect() < 7 ) return false; // must have AVX
128 int abcd[4]; // cpuid results
129 cpuid( abcd, 0x80000001 ); // call cpuid function 0x80000001
130 return ( ( abcd[2] & ( 1 << 11 ) ) != 0 ); // ecx bit 11 indicates XOP
131 }
132
133 // detect if CPU supports the F16C instruction set
134 bool hasF16C( void ) {
135 if ( instrset_detect() < 7 ) return false; // must have AVX
136 int abcd[4]; // cpuid results
137 cpuid( abcd, 1 ); // call cpuid function 1
138 return ( ( abcd[2] & ( 1 << 29 ) ) != 0 ); // ecx bit 29 indicates F16C
139 }
140
141 // detect if CPU supports the AVX512ER instruction set
142 bool hasAVX512ER( void ) {
143 if ( instrset_detect() < 9 ) return false; // must have AVX512F
144 int abcd[4]; // cpuid results
145 cpuid( abcd, 7 ); // call cpuid function 7
146 return ( ( abcd[1] & ( 1 << 27 ) ) != 0 ); // ebx bit 27 indicates AVX512ER
147 }
148
149 // detect if CPU supports the AVX512VBMI instruction set
150 bool hasAVX512VBMI( void ) {
151 if ( instrset_detect() < 10 ) return false; // must have AVX512BW
152 int abcd[4]; // cpuid results
153 cpuid( abcd, 7 ); // call cpuid function 7
154 return ( ( abcd[2] & ( 1 << 1 ) ) != 0 ); // ecx bit 1 indicates AVX512VBMI
155 }
156
157 // detect if CPU supports the AVX512VBMI2 instruction set
158 bool hasAVX512VBMI2( void ) {
159 if ( instrset_detect() < 10 ) return false; // must have AVX512BW
160 int abcd[4]; // cpuid results
161 cpuid( abcd, 7 ); // call cpuid function 7
162 return ( ( abcd[2] & ( 1 << 6 ) ) != 0 ); // ecx bit 6 indicates AVX512VBMI2
163 }
164
165#ifdef VCL_NAMESPACE
166}
167#endif
bool hasAVX512VBMI2(void)
bool hasFMA3(void)
bool hasAVX512VBMI(void)
bool hasAVX512ER(void)
int instrset_detect(void)
bool hasXOP(void)
bool hasFMA4(void)
bool hasF16C(void)