1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | #ifndef __IMMINTRIN_H |
25 | #error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead." |
26 | #endif |
27 | |
28 | #ifndef __AVX512VBMI2INTRIN_H |
29 | #define __AVX512VBMI2INTRIN_H |
30 | |
31 | |
32 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512))) |
33 | |
34 | |
35 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
36 | _mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) |
37 | { |
38 | return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, |
39 | (__v32hi) __S, |
40 | __U); |
41 | } |
42 | |
43 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
44 | _mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) |
45 | { |
46 | return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, |
47 | (__v32hi) _mm512_setzero_si512(), |
48 | __U); |
49 | } |
50 | |
51 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
52 | _mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) |
53 | { |
54 | return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, |
55 | (__v64qi) __S, |
56 | __U); |
57 | } |
58 | |
59 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
60 | _mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) |
61 | { |
62 | return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, |
63 | (__v64qi) _mm512_setzero_si512(), |
64 | __U); |
65 | } |
66 | |
67 | static __inline__ void __DEFAULT_FN_ATTRS |
68 | _mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) |
69 | { |
70 | __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, |
71 | __U); |
72 | } |
73 | |
74 | static __inline__ void __DEFAULT_FN_ATTRS |
75 | _mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) |
76 | { |
77 | __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, |
78 | __U); |
79 | } |
80 | |
81 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
82 | _mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) |
83 | { |
84 | return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, |
85 | (__v32hi) __S, |
86 | __U); |
87 | } |
88 | |
89 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
90 | _mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) |
91 | { |
92 | return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, |
93 | (__v32hi) _mm512_setzero_si512(), |
94 | __U); |
95 | } |
96 | |
97 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
98 | _mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) |
99 | { |
100 | return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, |
101 | (__v64qi) __S, |
102 | __U); |
103 | } |
104 | |
105 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
106 | _mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) |
107 | { |
108 | return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, |
109 | (__v64qi) _mm512_setzero_si512(), |
110 | __U); |
111 | } |
112 | |
113 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
114 | _mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) |
115 | { |
116 | return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, |
117 | (__v32hi) __S, |
118 | __U); |
119 | } |
120 | |
121 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
122 | _mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) |
123 | { |
124 | return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, |
125 | (__v32hi) _mm512_setzero_si512(), |
126 | __U); |
127 | } |
128 | |
129 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
130 | _mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) |
131 | { |
132 | return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, |
133 | (__v64qi) __S, |
134 | __U); |
135 | } |
136 | |
137 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
138 | _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) |
139 | { |
140 | return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, |
141 | (__v64qi) _mm512_setzero_si512(), |
142 | __U); |
143 | } |
144 | |
145 | #define _mm512_shldi_epi64(A, B, I) \ |
146 | (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ |
147 | (__v8di)(__m512i)(B), (int)(I)) |
148 | |
149 | #define _mm512_mask_shldi_epi64(S, U, A, B, I) \ |
150 | (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
151 | (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ |
152 | (__v8di)(__m512i)(S)) |
153 | |
154 | #define _mm512_maskz_shldi_epi64(U, A, B, I) \ |
155 | (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
156 | (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ |
157 | (__v8di)_mm512_setzero_si512()) |
158 | |
159 | #define _mm512_shldi_epi32(A, B, I) \ |
160 | (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ |
161 | (__v16si)(__m512i)(B), (int)(I)) |
162 | |
163 | #define _mm512_mask_shldi_epi32(S, U, A, B, I) \ |
164 | (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
165 | (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ |
166 | (__v16si)(__m512i)(S)) |
167 | |
168 | #define _mm512_maskz_shldi_epi32(U, A, B, I) \ |
169 | (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
170 | (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ |
171 | (__v16si)_mm512_setzero_si512()) |
172 | |
173 | #define _mm512_shldi_epi16(A, B, I) \ |
174 | (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ |
175 | (__v32hi)(__m512i)(B), (int)(I)) |
176 | |
177 | #define _mm512_mask_shldi_epi16(S, U, A, B, I) \ |
178 | (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
179 | (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ |
180 | (__v32hi)(__m512i)(S)) |
181 | |
182 | #define _mm512_maskz_shldi_epi16(U, A, B, I) \ |
183 | (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
184 | (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ |
185 | (__v32hi)_mm512_setzero_si512()) |
186 | |
187 | #define _mm512_shrdi_epi64(A, B, I) \ |
188 | (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ |
189 | (__v8di)(__m512i)(B), (int)(I)) |
190 | |
191 | #define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ |
192 | (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
193 | (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ |
194 | (__v8di)(__m512i)(S)) |
195 | |
196 | #define _mm512_maskz_shrdi_epi64(U, A, B, I) \ |
197 | (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
198 | (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ |
199 | (__v8di)_mm512_setzero_si512()) |
200 | |
201 | #define _mm512_shrdi_epi32(A, B, I) \ |
202 | (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ |
203 | (__v16si)(__m512i)(B), (int)(I)) |
204 | |
205 | #define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ |
206 | (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
207 | (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ |
208 | (__v16si)(__m512i)(S)) |
209 | |
210 | #define _mm512_maskz_shrdi_epi32(U, A, B, I) \ |
211 | (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
212 | (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ |
213 | (__v16si)_mm512_setzero_si512()) |
214 | |
215 | #define _mm512_shrdi_epi16(A, B, I) \ |
216 | (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ |
217 | (__v32hi)(__m512i)(B), (int)(I)) |
218 | |
219 | #define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ |
220 | (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
221 | (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ |
222 | (__v32hi)(__m512i)(S)) |
223 | |
224 | #define _mm512_maskz_shrdi_epi16(U, A, B, I) \ |
225 | (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
226 | (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ |
227 | (__v32hi)_mm512_setzero_si512()) |
228 | |
229 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
230 | _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) |
231 | { |
232 | return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B, |
233 | (__v8di)__C); |
234 | } |
235 | |
236 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
237 | _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) |
238 | { |
239 | return (__m512i)__builtin_ia32_selectq_512(__U, |
240 | (__v8di)_mm512_shldv_epi64(__A, __B, __C), |
241 | (__v8di)__A); |
242 | } |
243 | |
244 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
245 | _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) |
246 | { |
247 | return (__m512i)__builtin_ia32_selectq_512(__U, |
248 | (__v8di)_mm512_shldv_epi64(__A, __B, __C), |
249 | (__v8di)_mm512_setzero_si512()); |
250 | } |
251 | |
252 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
253 | _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) |
254 | { |
255 | return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B, |
256 | (__v16si)__C); |
257 | } |
258 | |
259 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
260 | _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) |
261 | { |
262 | return (__m512i)__builtin_ia32_selectd_512(__U, |
263 | (__v16si)_mm512_shldv_epi32(__A, __B, __C), |
264 | (__v16si)__A); |
265 | } |
266 | |
267 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
268 | _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) |
269 | { |
270 | return (__m512i)__builtin_ia32_selectd_512(__U, |
271 | (__v16si)_mm512_shldv_epi32(__A, __B, __C), |
272 | (__v16si)_mm512_setzero_si512()); |
273 | } |
274 | |
275 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
276 | _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) |
277 | { |
278 | return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B, |
279 | (__v32hi)__C); |
280 | } |
281 | |
282 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
283 | _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) |
284 | { |
285 | return (__m512i)__builtin_ia32_selectw_512(__U, |
286 | (__v32hi)_mm512_shldv_epi16(__A, __B, __C), |
287 | (__v32hi)__A); |
288 | } |
289 | |
290 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
291 | _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) |
292 | { |
293 | return (__m512i)__builtin_ia32_selectw_512(__U, |
294 | (__v32hi)_mm512_shldv_epi16(__A, __B, __C), |
295 | (__v32hi)_mm512_setzero_si512()); |
296 | } |
297 | |
298 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
299 | _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) |
300 | { |
301 | return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B, |
302 | (__v8di)__C); |
303 | } |
304 | |
305 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
306 | _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) |
307 | { |
308 | return (__m512i)__builtin_ia32_selectq_512(__U, |
309 | (__v8di)_mm512_shrdv_epi64(__A, __B, __C), |
310 | (__v8di)__A); |
311 | } |
312 | |
313 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
314 | _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) |
315 | { |
316 | return (__m512i)__builtin_ia32_selectq_512(__U, |
317 | (__v8di)_mm512_shrdv_epi64(__A, __B, __C), |
318 | (__v8di)_mm512_setzero_si512()); |
319 | } |
320 | |
321 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
322 | _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) |
323 | { |
324 | return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B, |
325 | (__v16si)__C); |
326 | } |
327 | |
328 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
329 | _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) |
330 | { |
331 | return (__m512i) __builtin_ia32_selectd_512(__U, |
332 | (__v16si)_mm512_shrdv_epi32(__A, __B, __C), |
333 | (__v16si)__A); |
334 | } |
335 | |
336 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
337 | _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) |
338 | { |
339 | return (__m512i) __builtin_ia32_selectd_512(__U, |
340 | (__v16si)_mm512_shrdv_epi32(__A, __B, __C), |
341 | (__v16si)_mm512_setzero_si512()); |
342 | } |
343 | |
344 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
345 | _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) |
346 | { |
347 | return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B, |
348 | (__v32hi)__C); |
349 | } |
350 | |
351 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
352 | _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) |
353 | { |
354 | return (__m512i)__builtin_ia32_selectw_512(__U, |
355 | (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), |
356 | (__v32hi)__A); |
357 | } |
358 | |
359 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
360 | _mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) |
361 | { |
362 | return (__m512i)__builtin_ia32_selectw_512(__U, |
363 | (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), |
364 | (__v32hi)_mm512_setzero_si512()); |
365 | } |
366 | |
367 | |
368 | #undef __DEFAULT_FN_ATTRS |
369 | |
370 | #endif |
371 | |
372 | |