1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | #ifndef __IMMINTRIN_H |
25 | #error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." |
26 | #endif |
27 | |
28 | #ifndef __IFMAVLINTRIN_H |
29 | #define __IFMAVLINTRIN_H |
30 | |
31 | |
32 | #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128))) |
33 | #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256))) |
34 | |
35 | |
36 | |
37 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
38 | _mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z) |
39 | { |
40 | return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di) __X, (__v2di) __Y, |
41 | (__v2di) __Z); |
42 | } |
43 | |
44 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
45 | _mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) |
46 | { |
47 | return (__m128i)__builtin_ia32_selectq_128(__M, |
48 | (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), |
49 | (__v2di)__W); |
50 | } |
51 | |
52 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
53 | _mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) |
54 | { |
55 | return (__m128i)__builtin_ia32_selectq_128(__M, |
56 | (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), |
57 | (__v2di)_mm_setzero_si128()); |
58 | } |
59 | |
60 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
61 | _mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z) |
62 | { |
63 | return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y, |
64 | (__v4di)__Z); |
65 | } |
66 | |
67 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
68 | _mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) |
69 | { |
70 | return (__m256i)__builtin_ia32_selectq_256(__M, |
71 | (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), |
72 | (__v4di)__W); |
73 | } |
74 | |
75 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
76 | _mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) |
77 | { |
78 | return (__m256i)__builtin_ia32_selectq_256(__M, |
79 | (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), |
80 | (__v4di)_mm256_setzero_si256()); |
81 | } |
82 | |
83 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
84 | _mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z) |
85 | { |
86 | return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y, |
87 | (__v2di)__Z); |
88 | } |
89 | |
90 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
91 | _mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) |
92 | { |
93 | return (__m128i)__builtin_ia32_selectq_128(__M, |
94 | (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), |
95 | (__v2di)__W); |
96 | } |
97 | |
98 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
99 | _mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) |
100 | { |
101 | return (__m128i)__builtin_ia32_selectq_128(__M, |
102 | (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), |
103 | (__v2di)_mm_setzero_si128()); |
104 | } |
105 | |
106 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
107 | _mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z) |
108 | { |
109 | return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y, |
110 | (__v4di)__Z); |
111 | } |
112 | |
113 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
114 | _mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) |
115 | { |
116 | return (__m256i)__builtin_ia32_selectq_256(__M, |
117 | (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), |
118 | (__v4di)__W); |
119 | } |
120 | |
121 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
122 | _mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) |
123 | { |
124 | return (__m256i)__builtin_ia32_selectq_256(__M, |
125 | (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), |
126 | (__v4di)_mm256_setzero_si256()); |
127 | } |
128 | |
129 | |
130 | #undef __DEFAULT_FN_ATTRS128 |
131 | #undef __DEFAULT_FN_ATTRS256 |
132 | |
133 | #endif |
134 | |