![]() |
The Gaudi Framework
master (82fdf313)
|
#include <stdint.h>
#include <stdlib.h>
Go to the source code of this file.
Classes | |
class | Const_int_t< n > |
class | Const_uint_t< n > |
struct | EList< T, N > |
Macros | |
#define | ALLOW_FP_PERMUTE true |
#define | INSTRSET 0 |
#define | const_int(n) |
#define | const_uint(n) |
Functions | |
int | instrset_detect (void) |
bool | hasFMA3 (void) |
bool | hasFMA4 (void) |
bool | hasXOP (void) |
bool | hasAVX512ER (void) |
bool | hasAVX512VBMI (void) |
bool | hasAVX512VBMI2 (void) |
int | physicalProcessors (int *logical_processors=0) |
constexpr int | bit_scan_reverse_const (uint64_t const n) |
template<typename V> | |
constexpr auto | get_inttype () |
template<int N> | |
constexpr auto | zero_mask (int const (&a)[N]) |
template<typename V> | |
constexpr auto | zero_mask_broad (int const (&A)[V::size()]) |
template<int N, int B> | |
constexpr uint64_t | make_bit_mask (int const (&a)[N]) |
template<typename V> | |
constexpr auto | make_broad_mask (uint64_t const m) |
template<typename V> | |
constexpr auto | perm_mask_broad (int const (&A)[V::size()]) |
template<typename V> | |
constexpr uint64_t | perm_flags (int const (&a)[V::size()]) |
template<int N> | |
constexpr uint64_t | compress_mask (int const (&a)[N]) |
template<int N> | |
constexpr uint64_t | expand_mask (int const (&a)[N]) |
template<typename V> | |
constexpr uint64_t | perm16_flags (int const (&a)[V::size()]) |
template<typename V, int oppos = 0> | |
constexpr auto | pshufb_mask (int const (&A)[V::size()]) |
template<int N> | |
constexpr EList< int, N/2 > | largeblock_perm (int const (&a)[N]) |
template<typename V> | |
constexpr uint64_t | blend_flags (int const (&a)[V::size()]) |
template<int N, int dozero> | |
constexpr EList< int, 2 *N > | blend_perm_indexes (int const (&a)[N]) |
template<int N> | |
constexpr EList< int, N/2 > | largeblock_indexes (int const (&a)[N]) |
template<typename dummy> | |
void | blend2 () |
template<typename dummy> | |
void | blend4 () |
template<typename dummy> | |
void | blend8 () |
template<typename dummy> | |
void | blend16 () |
template<typename dummy> | |
void | blend32 () |
template<int N, int dozero, int src1, int src2> | |
constexpr EList< int, N > | blend_half_indexes (int const (&a)[N]) |
template<typename W, int... i0> | |
auto | blend_half (W const &a, W const &b) |
Variables | |
constexpr int | V_DC = -256 |
const int | perm_zeroing = 1 |
const int | perm_perm = 2 |
const int | perm_allzero = 4 |
const int | perm_largeblock = 8 |
const int | perm_addz = 0x10 |
const int | perm_addz2 = 0x20 |
const int | perm_cross_lane = 0x40 |
const int | perm_same_pattern = 0x80 |
const int | perm_punpckh = 0x100 |
const int | perm_punpckl = 0x200 |
const int | perm_rotate |
const int | perm_shright |
const int | perm_shleft |
const int | perm_rotate_big |
const int | perm_broadcast = 0x8000 |
const int | perm_zext = 0x10000 |
const int | perm_compress = 0x20000 |
const int | perm_expand = 0x40000 |
const int | perm_outofrange = 0x10000000 |
const int | perm_rot_count = 32 |
const int | perm_ipattern |
const int | blend_zeroing = 1 |
const int | blend_allzero = 2 |
const int | blend_largeblock = 4 |
const int | blend_addz = 8 |
const int | blend_a = 0x10 |
const int | blend_b = 0x20 |
const int | blend_perma = 0x40 |
const int | blend_permb = 0x80 |
const int | blend_cross_lane = 0x100 |
const int | blend_same_pattern = 0x200 |
const int | blend_punpckhab = 0x1000 |
const int | blend_punpckhba = 0x2000 |
const int | blend_punpcklab = 0x4000 |
const int | blend_punpcklba = 0x8000 |
const int | blend_rotateab = 0x10000 |
const int | blend_rotateba = 0x20000 |
const int | blend_shufab = 0x40000 |
const int | blend_shufba = 0x80000 |
const int | blend_rotate_big = 0x100000 |
const int | blend_outofrange = 0x10000000 |
const int | blend_shufpattern = 32 |
const int | blend_rotpattern = 40 |
#define ALLOW_FP_PERMUTE true |
Definition at line 28 of file instrset.h.
#define const_int | ( | n | ) |
Definition at line 404 of file instrset.h.
#define const_uint | ( | n | ) |
Definition at line 405 of file instrset.h.
#define INSTRSET 0 |
Definition at line 76 of file instrset.h.
|
constexpr |
Definition at line 379 of file instrset.h.
void blend16 | ( | ) |
Definition at line 1296 of file instrset.h.
void blend2 | ( | ) |
Definition at line 1290 of file instrset.h.
void blend32 | ( | ) |
Definition at line 1298 of file instrset.h.
void blend4 | ( | ) |
Definition at line 1292 of file instrset.h.
void blend8 | ( | ) |
Definition at line 1294 of file instrset.h.
|
constexpr |
Definition at line 1035 of file instrset.h.
auto blend_half | ( | W const & | a, |
W const & | b ) |
Definition at line 1351 of file instrset.h.
|
constexpr |
Definition at line 1307 of file instrset.h.
|
constexpr |
Definition at line 1212 of file instrset.h.
|
constexpr |
Definition at line 822 of file instrset.h.
|
constexpr |
Definition at line 846 of file instrset.h.
|
constexpr |
Definition at line 467 of file instrset.h.
bool hasAVX512ER | ( | void | ) |
Definition at line 142 of file instrset_detect.cpp.
bool hasAVX512VBMI | ( | void | ) |
Definition at line 150 of file instrset_detect.cpp.
bool hasAVX512VBMI2 | ( | void | ) |
Definition at line 158 of file instrset_detect.cpp.
bool hasFMA3 | ( | void | ) |
Definition at line 110 of file instrset_detect.cpp.
bool hasFMA4 | ( | void | ) |
Definition at line 118 of file instrset_detect.cpp.
bool hasXOP | ( | void | ) |
Definition at line 126 of file instrset_detect.cpp.
int instrset_detect | ( | void | ) |
Definition at line 63 of file instrset_detect.cpp.
|
constexpr |
Definition at line 1247 of file instrset.h.
|
constexpr |
Definition at line 976 of file instrset.h.
|
constexpr |
Definition at line 523 of file instrset.h.
|
constexpr |
Definition at line 550 of file instrset.h.
|
constexpr |
Definition at line 874 of file instrset.h.
|
constexpr |
Definition at line 601 of file instrset.h.
|
constexpr |
Definition at line 562 of file instrset.h.
int physicalProcessors | ( | int * | logical_processors = 0 | ) |
|
constexpr |
Definition at line 933 of file instrset.h.
|
constexpr |
Definition at line 484 of file instrset.h.
|
constexpr |
Definition at line 504 of file instrset.h.
const int blend_a = 0x10 |
Definition at line 1015 of file instrset.h.
const int blend_addz = 8 |
Definition at line 1014 of file instrset.h.
const int blend_allzero = 2 |
Definition at line 1012 of file instrset.h.
const int blend_b = 0x20 |
Definition at line 1016 of file instrset.h.
const int blend_cross_lane = 0x100 |
Definition at line 1019 of file instrset.h.
const int blend_largeblock = 4 |
Definition at line 1013 of file instrset.h.
const int blend_outofrange = 0x10000000 |
Definition at line 1030 of file instrset.h.
const int blend_perma = 0x40 |
Definition at line 1017 of file instrset.h.
const int blend_permb = 0x80 |
Definition at line 1018 of file instrset.h.
const int blend_punpckhab = 0x1000 |
Definition at line 1021 of file instrset.h.
const int blend_punpckhba = 0x2000 |
Definition at line 1022 of file instrset.h.
const int blend_punpcklab = 0x4000 |
Definition at line 1023 of file instrset.h.
const int blend_punpcklba = 0x8000 |
Definition at line 1024 of file instrset.h.
const int blend_rotate_big = 0x100000 |
Definition at line 1029 of file instrset.h.
const int blend_rotateab = 0x10000 |
Definition at line 1025 of file instrset.h.
const int blend_rotateba = 0x20000 |
Definition at line 1026 of file instrset.h.
const int blend_rotpattern = 40 |
Definition at line 1032 of file instrset.h.
const int blend_same_pattern = 0x200 |
Definition at line 1020 of file instrset.h.
const int blend_shufab = 0x40000 |
Definition at line 1027 of file instrset.h.
const int blend_shufba = 0x80000 |
Definition at line 1028 of file instrset.h.
const int blend_shufpattern = 32 |
Definition at line 1031 of file instrset.h.
const int blend_zeroing = 1 |
Definition at line 1011 of file instrset.h.
const int perm_addz = 0x10 |
Definition at line 577 of file instrset.h.
const int perm_addz2 = 0x20 |
Definition at line 578 of file instrset.h.
const int perm_allzero = 4 |
Definition at line 575 of file instrset.h.
const int perm_broadcast = 0x8000 |
Definition at line 591 of file instrset.h.
const int perm_compress = 0x20000 |
Definition at line 593 of file instrset.h.
const int perm_cross_lane = 0x40 |
Definition at line 579 of file instrset.h.
const int perm_expand = 0x40000 |
Definition at line 594 of file instrset.h.
const int perm_ipattern |
Definition at line 597 of file instrset.h.
const int perm_largeblock = 8 |
Definition at line 576 of file instrset.h.
const int perm_outofrange = 0x10000000 |
Definition at line 595 of file instrset.h.
const int perm_perm = 2 |
Definition at line 574 of file instrset.h.
const int perm_punpckh = 0x100 |
Definition at line 581 of file instrset.h.
const int perm_punpckl = 0x200 |
Definition at line 582 of file instrset.h.
const int perm_rot_count = 32 |
Definition at line 596 of file instrset.h.
const int perm_rotate |
Definition at line 583 of file instrset.h.
const int perm_rotate_big |
Definition at line 589 of file instrset.h.
const int perm_same_pattern = 0x80 |
Definition at line 580 of file instrset.h.
const int perm_shleft |
Definition at line 587 of file instrset.h.
const int perm_shright |
Definition at line 585 of file instrset.h.
const int perm_zeroing = 1 |
Definition at line 573 of file instrset.h.
const int perm_zext = 0x10000 |
Definition at line 592 of file instrset.h.
|
constexpr |
Definition at line 219 of file instrset.h.