instrset.h
Go to the documentation of this file.
1 /**************************** instrset.h **********************************
2 * Author: Agner Fog
3 * Date created: 2012-05-30
4 * Last modified: 2014-10-22
5 * Version: 1.16
6 * Project: vector classes
7 * Description:
8 * Header file for various compiler-specific tasks and other common tasks to
9 * vector class library:
10 * > selects the supported instruction set
11 * > defines integer types
12 * > defines compiler version macros
13 * > undefines certain macros that prevent function overloading
14 * > defines template class to represent compile-time integer constant
15 * > defines template for compile-time error messages
16 *
17 * (c) Copyright 2012 - 2014 GNU General Public License www.gnu.org/licenses
18 ******************************************************************************/
19 
20 #ifndef INSTRSET_H
21 #define INSTRSET_H 116
22 
23 // Detect 64 bit mode
24 #if (defined(_M_AMD64) || defined(_M_X64) || defined(__amd64) ) && ! defined(__x86_64__)
25 #define __x86_64__ 1 // There are many different macros for this, decide on only one
26 #endif
27 
28 // Find instruction set from compiler macros if INSTRSET not defined
29 // Note: Microsoft compilers do not define these macros automatically
30 #ifndef INSTRSET
31 #if defined ( __AVX512F__ ) || defined ( __AVX512__ ) // || defined ( __AVX512ER__ )
32 #define INSTRSET 9
33 #elif defined ( __AVX2__ )
34 #define INSTRSET 8
35 #elif defined ( __AVX__ )
36 #define INSTRSET 7
37 #elif defined ( __SSE4_2__ )
38 #define INSTRSET 6
39 #elif defined ( __SSE4_1__ )
40 #define INSTRSET 5
41 #elif defined ( __SSSE3__ )
42 #define INSTRSET 4
43 #elif defined ( __SSE3__ )
44 #define INSTRSET 3
45 #elif defined ( __SSE2__ ) || defined ( __x86_64__ )
46 #define INSTRSET 2
47 #elif defined ( __SSE__ )
48 #define INSTRSET 1
49 #elif defined ( _M_IX86_FP ) // Defined in MS compiler. 1: SSE, 2: SSE2
50 #define INSTRSET _M_IX86_FP
51 #else
52 #define INSTRSET 0
53 #endif // instruction set defines
54 #endif // INSTRSET
55 
56 // Include the appropriate header file for intrinsic functions
57 #if INSTRSET > 7 // AVX2 and later
58 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)
59 #include <x86intrin.h> // x86intrin.h includes header files for whatever instruction
60  // sets are specified on the compiler command line, such as:
61  // xopintrin.h, fma4intrin.h
62 #else
63 #include <immintrin.h> // MS version of immintrin.h covers AVX, AVX2 and FMA3
64 #endif // __GNUC__
65 #elif INSTRSET == 7
66 #include <immintrin.h> // AVX
67 #elif INSTRSET == 6
68 #include <nmmintrin.h> // SSE4.2
69 #elif INSTRSET == 5
70 #include <smmintrin.h> // SSE4.1
71 #elif INSTRSET == 4
72 #include <tmmintrin.h> // SSSE3
73 #elif INSTRSET == 3
74 #include <pmmintrin.h> // SSE3
75 #elif INSTRSET == 2
76 #include <emmintrin.h> // SSE2
77 #elif INSTRSET == 1
78 #include <xmmintrin.h> // SSE
79 #endif // INSTRSET
80 
81 #if INSTRSET >= 8 && !defined(__FMA__)
82 // Assume that all processors that have AVX2 also have FMA3
83 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) && ! defined (__clang__)
84 // Prevent error message in g++ when using FMA intrinsics with avx2:
85 #pragma message "It is recommended to specify also option -mfma when using -mavx2 or higher"
86 #else
87 #define __FMA__ 1
88 #endif
89 #endif
90 
91 // AMD instruction sets
92 #if defined (__XOP__) || defined (__FMA4__)
93 #ifdef __GNUC__
94 #include <x86intrin.h> // AMD XOP (Gnu)
95 #else
96 #include <ammintrin.h> // AMD XOP (Microsoft)
97 #endif // __GNUC__
98 #elif defined (__SSE4A__) // AMD SSE4A
99 #include <ammintrin.h>
100 #endif // __XOP__
101 
102 // FMA3 instruction set
103 #if defined (__FMA__) && (defined(__GNUC__) || defined(__clang__)) && ! defined (__INTEL_COMPILER)
104 #include <fmaintrin.h>
105 #endif // __FMA__
106 
107 // FMA4 instruction set
108 #if defined (__FMA4__) && (defined(__GNUC__) || defined(__clang__))
109 #include <fma4intrin.h> // must have both x86intrin.h and fma4intrin.h, don't know why
110 #endif // __FMA4__
111 
112 
113 // Define integer types with known size
114 #if defined(__GNUC__) || defined(__clang__) || (defined(_MSC_VER) && _MSC_VER >= 1600)
115  // Compilers supporting C99 or C++0x have stdint.h defining these integer types
116  #include <stdint.h>
117 #elif defined(_MSC_VER)
118  // Older Microsoft compilers have their own definitions
119  typedef signed __int8 int8_t;
120  typedef unsigned __int8 uint8_t;
121  typedef signed __int16 int16_t;
122  typedef unsigned __int16 uint16_t;
123  typedef signed __int32 int32_t;
124  typedef unsigned __int32 uint32_t;
125  typedef signed __int64 int64_t;
126  typedef unsigned __int64 uint64_t;
127  #ifndef _INTPTR_T_DEFINED
128  #define _INTPTR_T_DEFINED
129  #ifdef __x86_64__
130  typedef int64_t intptr_t;
131  #else
132  typedef int32_t intptr_t;
133  #endif
134  #endif
135 #else
136  // This works with most compilers
137  typedef signed char int8_t;
138  typedef unsigned char uint8_t;
139  typedef signed short int int16_t;
140  typedef unsigned short int uint16_t;
141  typedef signed int int32_t;
142  typedef unsigned int uint32_t;
143  typedef long long int64_t;
144  typedef unsigned long long uint64_t;
145  #ifdef __x86_64__
146  typedef int64_t intptr_t;
147  #else
148  typedef int32_t intptr_t;
149  #endif
150 #endif
151 
152 #include <stdlib.h> // define abs(int)
153 
154 #ifdef _MSC_VER // Microsoft compiler or compatible Intel compiler
155 #include <intrin.h> // define _BitScanReverse(int), __cpuid(int[4],int), _xgetbv(int)
156 #endif // _MSC_VER
157 
158 // functions in instrset_detect.cpp
159 int instrset_detect(void); // tells which instruction sets are supported
160 bool hasFMA3(void); // true if FMA3 instructions supported
161 bool hasFMA4(void); // true if FMA4 instructions supported
162 bool hasXOP (void); // true if XOP instructions supported
163 
164 // GCC version
165 #if defined(__GNUC__) && !defined (GCC_VERSION) && !defined (__clang__)
166 #define GCC_VERSION ((__GNUC__) * 10000 + (__GNUC_MINOR__) * 100 + (__GNUC_PATCHLEVEL__))
167 #endif
168 
169 // Clang version
170 #if defined (__clang__)
171 #define CLANG_VERSION ((__clang_major__) * 10000 + (__clang_minor__) * 100 + (__clang_patchlevel__))
172 // Problem: The version number is not consistent across platforms
173 // http://llvm.org/bugs/show_bug.cgi?id=12643
174 // Apple bug 18746972
175 #endif
176 
177 // Fix problem with macros named min and max in WinDef.h
178 #ifdef _MSC_VER
179 #if defined (_WINDEF_) && defined(min) && defined(max)
180 #undef min
181 #undef max
182 #endif
183 #ifndef NOMINMAX
184 #define NOMINMAX
185 #endif
186 #endif
187 
188 // Template class to represent compile-time integer constant
189 template <int32_t n> class Const_int_t {}; // represent compile-time signed integer constant
190 template <uint32_t n> class Const_uint_t {}; // represent compile-time unsigned integer constant
191 #define const_int(n) (Const_int_t <n>()) // n must be compile-time integer constant
192 #define const_uint(n) (Const_uint_t<n>()) // n must be compile-time unsigned integer constant
193 
194 // Template for compile-time error messages
195 template <bool> class Static_error_check {
196  public: Static_error_check(){};
197 };
198 template <> class Static_error_check<false> { // generate compile-time error if false
199  private: Static_error_check(){};
200 };
201 
202 
203 #endif // INSTRSET_H
unsigned long long uint64_t
Definition: instrset.h:144
unsigned short int uint16_t
Definition: instrset.h:140
bool hasFMA4(void)
bool hasXOP(void)
signed char int8_t
Definition: instrset.h:137
signed int int32_t
Definition: instrset.h:141
long long int64_t
Definition: instrset.h:143
signed short int int16_t
Definition: instrset.h:139
bool hasFMA3(void)
int instrset_detect(void)
unsigned char uint8_t
Definition: instrset.h:138
unsigned int uint32_t
Definition: instrset.h:142
int32_t intptr_t
Definition: instrset.h:148