Loading [MathJax]/extensions/tex2jax.js
The Gaudi Framework  v31r0 (aeb156f0)
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
instrset.h
Go to the documentation of this file.
1 /**************************** instrset.h **********************************
2  * Author: Agner Fog
3  * Date created: 2012-05-30
4  * Last modified: 2016-11-25
5  * Version: 1.25
6  * Project: vector classes
7  * Description:
8  * Header file for various compiler-specific tasks and other common tasks to
9  * vector class library:
10  * > selects the supported instruction set
11  * > defines integer types
12  * > defines compiler version macros
13  * > undefines certain macros that prevent function overloading
14  * > defines template class to represent compile-time integer constant
15  * > defines template for compile-time error messages
16  *
17  * (c) Copyright 2012-2016 GNU General Public License www.gnu.org/licenses
18  ******************************************************************************/
19 
20 #ifndef INSTRSET_H
21 #define INSTRSET_H 125
22 
23 // Detect 64 bit mode
24 #if ( defined( _M_AMD64 ) || defined( _M_X64 ) || defined( __amd64 ) ) && !defined( __x86_64__ )
25 # define __x86_64__ 1 // There are many different macros for this, decide on only one
26 #endif
27 
28 // Find instruction set from compiler macros if INSTRSET not defined
29 // Note: Most of these macros are not defined in Microsoft compilers
30 #ifndef INSTRSET
31 # if defined( __AVX512F__ ) || defined( __AVX512__ )
32 # define INSTRSET 9
33 # elif defined( __AVX2__ )
34 # define INSTRSET 8
35 # elif defined( __AVX__ )
36 # define INSTRSET 7
37 # elif defined( __SSE4_2__ )
38 # define INSTRSET 6
39 # elif defined( __SSE4_1__ )
40 # define INSTRSET 5
41 # elif defined( __SSSE3__ )
42 # define INSTRSET 4
43 # elif defined( __SSE3__ )
44 # define INSTRSET 3
45 # elif defined( __SSE2__ ) || defined( __x86_64__ )
46 # define INSTRSET 2
47 # elif defined( __SSE__ )
48 # define INSTRSET 1
49 # elif defined( _M_IX86_FP ) // Defined in MS compiler. 1: SSE, 2: SSE2
50 # define INSTRSET _M_IX86_FP
51 # else
52 # define INSTRSET 0
53 # endif // instruction set defines
54 #endif // INSTRSET
55 
56 // Include the appropriate header file for intrinsic functions
57 #if INSTRSET > 7 // AVX2 and later
58 # if defined( __GNUC__ ) && !defined( __INTEL_COMPILER )
59 # include <x86intrin.h> // x86intrin.h includes header files for whatever instruction
60  // sets are specified on the compiler command line, such as:
61  // xopintrin.h, fma4intrin.h
62 # else
63 # include <immintrin.h> // MS version of immintrin.h covers AVX, AVX2 and FMA3
64 # endif // __GNUC__
65 #elif INSTRSET == 7
66 # include <immintrin.h> // AVX
67 #elif INSTRSET == 6
68 # include <nmmintrin.h> // SSE4.2
69 #elif INSTRSET == 5
70 # include <smmintrin.h> // SSE4.1
71 #elif INSTRSET == 4
72 # include <tmmintrin.h> // SSSE3
73 #elif INSTRSET == 3
74 # include <pmmintrin.h> // SSE3
75 #elif INSTRSET == 2
76 # include <emmintrin.h> // SSE2
77 #elif INSTRSET == 1
78 # include <xmmintrin.h> // SSE
79 #endif // INSTRSET
80 
81 #if INSTRSET >= 8 && !defined( __FMA__ )
82 // Assume that all processors that have AVX2 also have FMA3
83 # if defined( __GNUC__ ) && !defined( __INTEL_COMPILER ) && !defined( __clang__ )
84 // Prevent error message in g++ when using FMA intrinsics with avx2:
85 # pragma message "It is recommended to specify also option -mfma when using -mavx2 or higher"
86 # else
87 # define __FMA__ 1
88 # endif
89 #endif
90 
91 // AMD instruction sets
92 #if defined( __XOP__ ) || defined( __FMA4__ )
93 # ifdef __GNUC__
94 # include <x86intrin.h> // AMD XOP (Gnu)
95 # else
96 # include <ammintrin.h> // AMD XOP (Microsoft)
97 # endif // __GNUC__
98 #elif defined( __SSE4A__ ) // AMD SSE4A
99 # include <ammintrin.h>
100 #endif // __XOP__
101 
102 // FMA3 instruction set
103 #if defined( __FMA__ ) && ( defined( __GNUC__ ) || defined( __clang__ ) ) && !defined( __INTEL_COMPILER )
104 # include <fmaintrin.h>
105 #endif // __FMA__
106 
107 // FMA4 instruction set
108 #if defined( __FMA4__ ) && ( defined( __GNUC__ ) || defined( __clang__ ) )
109 # include <fma4intrin.h> // must have both x86intrin.h and fma4intrin.h, don't know why
110 #endif // __FMA4__
111 
112 // Define integer types with known size
113 #if defined( __GNUC__ ) || defined( __clang__ ) || ( defined( _MSC_VER ) && _MSC_VER >= 1600 )
114 // Compilers supporting C99 or C++0x have stdint.h defining these integer types
115 # include <stdint.h>
116 #elif defined( _MSC_VER )
117 // Older Microsoft compilers have their own definitions
118 typedef signed __int8 int8_t;
119 typedef unsigned __int8 uint8_t;
120 typedef signed __int16 int16_t;
121 typedef unsigned __int16 uint16_t;
122 typedef signed __int32 int32_t;
123 typedef unsigned __int32 uint32_t;
124 typedef signed __int64 int64_t;
125 typedef unsigned __int64 uint64_t;
126 # ifndef _INTPTR_T_DEFINED
127 # define _INTPTR_T_DEFINED
128 # ifdef __x86_64__
129 typedef int64_t intptr_t;
130 # else
131 typedef int32_t intptr_t;
132 # endif
133 # endif
134 #else
135 // This works with most compilers
136 typedef signed char int8_t;
137 typedef unsigned char uint8_t;
138 typedef signed short int int16_t;
139 typedef unsigned short int uint16_t;
140 typedef signed int int32_t;
141 typedef unsigned int uint32_t;
142 typedef long long int64_t;
143 typedef unsigned long long uint64_t;
144 # ifdef __x86_64__
145 typedef int64_t intptr_t;
146 # else
148 # endif
149 #endif
150 
151 #include <stdlib.h> // define abs(int)
152 
153 #ifdef _MSC_VER // Microsoft compiler or compatible Intel compiler
154 # include <intrin.h> // define _BitScanReverse(int), __cpuid(int[4],int), _xgetbv(int)
155 #endif // _MSC_VER
156 
157 // functions in instrset_detect.cpp
158 #ifdef VCL_NAMESPACE
159 namespace VCL_NAMESPACE {
160 #endif
161  int instrset_detect( void ); // tells which instruction sets are supported
162  bool hasFMA3( void ); // true if FMA3 instructions supported
163  bool hasFMA4( void ); // true if FMA4 instructions supported
164  bool hasXOP( void ); // true if XOP instructions supported
165  bool hasAVX512ER( void ); // true if AVX512ER instructions supported
166 #ifdef VCL_NAMESPACE
167 }
168 #endif
169 
170 // GCC version
171 #if defined( __GNUC__ ) && !defined( GCC_VERSION ) && !defined( __clang__ )
172 # define GCC_VERSION ( (__GNUC__)*10000 + (__GNUC_MINOR__)*100 + ( __GNUC_PATCHLEVEL__ ) )
173 #endif
174 
175 // Clang version
176 #if defined( __clang__ )
177 # define CLANG_VERSION ( (__clang_major__)*10000 + (__clang_minor__)*100 + ( __clang_patchlevel__ ) )
178 // Problem: The version number is not consistent across platforms
179 // http://llvm.org/bugs/show_bug.cgi?id=12643
180 // Apple bug 18746972
181 #endif
182 
183 // Fix problem with non-overloadable macros named min and max in WinDef.h
184 #ifdef _MSC_VER
185 # if defined( _WINDEF_ ) && defined( min ) && defined( max )
186 # undef min
187 # undef max
188 # endif
189 # ifndef NOMINMAX
190 # define NOMINMAX
191 # endif
192 #endif
193 
194 #ifdef VCL_NAMESPACE
195 namespace VCL_NAMESPACE {
196 #endif
197  // Template class to represent compile-time integer constant
198  template <int32_t n>
199  class Const_int_t {}; // represent compile-time signed integer constant
200  template <uint32_t n>
201  class Const_uint_t {}; // represent compile-time unsigned integer constant
202 #define const_int( n ) ( Const_int_t<n>() ) // n must be compile-time integer constant
203 #define const_uint( n ) ( Const_uint_t<n>() ) // n must be compile-time unsigned integer constant
204 
205  // Template for compile-time error messages
206  template <bool>
208  public:
210  };
211  template <>
212  class Static_error_check<false> { // generate compile-time error if false
213  private:
215  };
216 #ifdef VCL_NAMESPACE
217 }
218 #endif
219 
220 #endif // INSTRSET_H
#define VCL_NAMESPACE
Definition: System.cpp:37
bool hasAVX512ER(void)
unsigned long long uint64_t
Definition: instrset.h:143
unsigned short int uint16_t
Definition: instrset.h:139
bool hasFMA4(void)
bool hasXOP(void)
signed char int8_t
Definition: instrset.h:136
signed int int32_t
Definition: instrset.h:140
long long int64_t
Definition: instrset.h:142
signed short int int16_t
Definition: instrset.h:138
bool hasFMA3(void)
int instrset_detect(void)
unsigned char uint8_t
Definition: instrset.h:137
unsigned int uint32_t
Definition: instrset.h:141
int32_t intptr_t
Definition: instrset.h:147