1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | #ifndef __IMMINTRIN_H |
25 | #error "Never use <gfniintrin.h> directly; include <immintrin.h> instead." |
26 | #endif |
27 | |
28 | #ifndef __GFNIINTRIN_H |
29 | #define __GFNIINTRIN_H |
30 | |
31 | |
32 | #define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \ |
33 | (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ |
34 | (__v16qi)(__m128i)(B), \ |
35 | (char)(I)) |
36 | |
37 | #define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ |
38 | (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ |
39 | (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \ |
40 | (__v16qi)(__m128i)(S)) |
41 | |
42 | |
43 | #define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ |
44 | (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \ |
45 | U, A, B, I) |
46 | |
47 | |
48 | #define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \ |
49 | (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \ |
50 | (__v32qi)(__m256i)(B), \ |
51 | (char)(I)) |
52 | |
53 | #define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ |
54 | (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ |
55 | (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \ |
56 | (__v32qi)(__m256i)(S)) |
57 | |
58 | #define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ |
59 | (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \ |
60 | U, A, B, I) |
61 | |
62 | |
63 | #define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \ |
64 | (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \ |
65 | (__v64qi)(__m512i)(B), \ |
66 | (char)(I)) |
67 | |
68 | #define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ |
69 | (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ |
70 | (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \ |
71 | (__v64qi)(__m512i)(S)) |
72 | |
73 | #define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ |
74 | (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \ |
75 | U, A, B, I) |
76 | |
77 | #define _mm_gf2p8affine_epi64_epi8(A, B, I) \ |
78 | (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \ |
79 | (__v16qi)(__m128i)(B), \ |
80 | (char)(I)) |
81 | |
82 | #define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ |
83 | (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ |
84 | (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \ |
85 | (__v16qi)(__m128i)(S)) |
86 | |
87 | |
88 | #define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ |
89 | (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \ |
90 | U, A, B, I) |
91 | |
92 | |
93 | #define _mm256_gf2p8affine_epi64_epi8(A, B, I) \ |
94 | (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \ |
95 | (__v32qi)(__m256i)(B), \ |
96 | (char)(I)) |
97 | |
98 | #define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ |
99 | (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ |
100 | (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \ |
101 | (__v32qi)(__m256i)(S)) |
102 | |
103 | #define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ |
104 | (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \ |
105 | U, A, B, I) |
106 | |
107 | |
108 | #define _mm512_gf2p8affine_epi64_epi8(A, B, I) \ |
109 | (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \ |
110 | (__v64qi)(__m512i)(B), \ |
111 | (char)(I)) |
112 | |
113 | #define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ |
114 | (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ |
115 | (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \ |
116 | (__v64qi)(__m512i)(S)) |
117 | |
118 | #define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ |
119 | (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \ |
120 | U, A, B, I) |
121 | |
122 | |
123 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128))) |
124 | |
125 | |
126 | #define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256))) |
127 | |
128 | |
129 | #define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512))) |
130 | |
131 | |
132 | #define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128))) |
133 | #define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256))) |
134 | |
135 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
136 | _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) |
137 | { |
138 | return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A, |
139 | (__v16qi) __B); |
140 | } |
141 | |
142 | static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 |
143 | _mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) |
144 | { |
145 | return (__m128i) __builtin_ia32_selectb_128(__U, |
146 | (__v16qi) _mm_gf2p8mul_epi8(__A, __B), |
147 | (__v16qi) __S); |
148 | } |
149 | |
150 | static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 |
151 | _mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B) |
152 | { |
153 | return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(), |
154 | __U, __A, __B); |
155 | } |
156 | |
157 | static __inline__ __m256i __DEFAULT_FN_ATTRS_Y |
158 | _mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) |
159 | { |
160 | return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A, |
161 | (__v32qi) __B); |
162 | } |
163 | |
164 | static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 |
165 | _mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B) |
166 | { |
167 | return (__m256i) __builtin_ia32_selectb_256(__U, |
168 | (__v32qi) _mm256_gf2p8mul_epi8(__A, __B), |
169 | (__v32qi) __S); |
170 | } |
171 | |
172 | static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 |
173 | _mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B) |
174 | { |
175 | return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(), |
176 | __U, __A, __B); |
177 | } |
178 | |
179 | static __inline__ __m512i __DEFAULT_FN_ATTRS_Z |
180 | _mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) |
181 | { |
182 | return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A, |
183 | (__v64qi) __B); |
184 | } |
185 | |
186 | static __inline__ __m512i __DEFAULT_FN_ATTRS_Z |
187 | _mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B) |
188 | { |
189 | return (__m512i) __builtin_ia32_selectb_512(__U, |
190 | (__v64qi) _mm512_gf2p8mul_epi8(__A, __B), |
191 | (__v64qi) __S); |
192 | } |
193 | |
194 | static __inline__ __m512i __DEFAULT_FN_ATTRS_Z |
195 | _mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B) |
196 | { |
197 | return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(), |
198 | __U, __A, __B); |
199 | } |
200 | |
201 | #undef __DEFAULT_FN_ATTRS |
202 | #undef __DEFAULT_FN_ATTRS_Y |
203 | #undef __DEFAULT_FN_ATTRS_Z |
204 | #undef __DEFAULT_FN_ATTRS_VL128 |
205 | #undef __DEFAULT_FN_ATTRS_VL256 |
206 | |
207 | #endif |
208 | |
209 | |