1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | #ifndef __IMMINTRIN_H |
25 | #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead." |
26 | #endif |
27 | |
28 | #ifndef __AVX512DQINTRIN_H |
29 | #define __AVX512DQINTRIN_H |
30 | |
31 | |
32 | #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512))) |
33 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) |
34 | |
35 | static __inline __mmask8 __DEFAULT_FN_ATTRS |
36 | _knot_mask8(__mmask8 __M) |
37 | { |
38 | return __builtin_ia32_knotqi(__M); |
39 | } |
40 | |
41 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
42 | _kand_mask8(__mmask8 __A, __mmask8 __B) |
43 | { |
44 | return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B); |
45 | } |
46 | |
47 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
48 | _kandn_mask8(__mmask8 __A, __mmask8 __B) |
49 | { |
50 | return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B); |
51 | } |
52 | |
53 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
54 | _kor_mask8(__mmask8 __A, __mmask8 __B) |
55 | { |
56 | return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B); |
57 | } |
58 | |
59 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
60 | _kxnor_mask8(__mmask8 __A, __mmask8 __B) |
61 | { |
62 | return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B); |
63 | } |
64 | |
65 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
66 | _kxor_mask8(__mmask8 __A, __mmask8 __B) |
67 | { |
68 | return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B); |
69 | } |
70 | |
71 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
72 | _kortestc_mask8_u8(__mmask8 __A, __mmask8 __B) |
73 | { |
74 | return (unsigned char)__builtin_ia32_kortestcqi(__A, __B); |
75 | } |
76 | |
77 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
78 | _kortestz_mask8_u8(__mmask8 __A, __mmask8 __B) |
79 | { |
80 | return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); |
81 | } |
82 | |
83 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
84 | _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { |
85 | *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B); |
86 | return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); |
87 | } |
88 | |
89 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
90 | _ktestc_mask8_u8(__mmask8 __A, __mmask8 __B) |
91 | { |
92 | return (unsigned char)__builtin_ia32_ktestcqi(__A, __B); |
93 | } |
94 | |
95 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
96 | _ktestz_mask8_u8(__mmask8 __A, __mmask8 __B) |
97 | { |
98 | return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); |
99 | } |
100 | |
101 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
102 | _ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { |
103 | *__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B); |
104 | return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); |
105 | } |
106 | |
107 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
108 | _ktestc_mask16_u8(__mmask16 __A, __mmask16 __B) |
109 | { |
110 | return (unsigned char)__builtin_ia32_ktestchi(__A, __B); |
111 | } |
112 | |
113 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
114 | _ktestz_mask16_u8(__mmask16 __A, __mmask16 __B) |
115 | { |
116 | return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); |
117 | } |
118 | |
119 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
120 | _ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { |
121 | *__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B); |
122 | return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); |
123 | } |
124 | |
125 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
126 | _kadd_mask8(__mmask8 __A, __mmask8 __B) |
127 | { |
128 | return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B); |
129 | } |
130 | |
131 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS |
132 | _kadd_mask16(__mmask16 __A, __mmask16 __B) |
133 | { |
134 | return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B); |
135 | } |
136 | |
137 | #define _kshiftli_mask8(A, I) \ |
138 | (__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I)) |
139 | |
140 | #define _kshiftri_mask8(A, I) \ |
141 | (__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I)) |
142 | |
143 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
144 | _cvtmask8_u32(__mmask8 __A) { |
145 | return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A); |
146 | } |
147 | |
148 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
149 | _cvtu32_mask8(unsigned int __A) { |
150 | return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A); |
151 | } |
152 | |
153 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
154 | _load_mask8(__mmask8 *__A) { |
155 | return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A); |
156 | } |
157 | |
158 | static __inline__ void __DEFAULT_FN_ATTRS |
159 | _store_mask8(__mmask8 *__A, __mmask8 __B) { |
160 | *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B); |
161 | } |
162 | |
163 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
164 | _mm512_mullo_epi64 (__m512i __A, __m512i __B) { |
165 | return (__m512i) ((__v8du) __A * (__v8du) __B); |
166 | } |
167 | |
168 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
169 | _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { |
170 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
171 | (__v8di)_mm512_mullo_epi64(__A, __B), |
172 | (__v8di)__W); |
173 | } |
174 | |
175 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
176 | _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { |
177 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
178 | (__v8di)_mm512_mullo_epi64(__A, __B), |
179 | (__v8di)_mm512_setzero_si512()); |
180 | } |
181 | |
182 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
183 | _mm512_xor_pd(__m512d __A, __m512d __B) { |
184 | return (__m512d)((__v8du)__A ^ (__v8du)__B); |
185 | } |
186 | |
187 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
188 | _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { |
189 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
190 | (__v8df)_mm512_xor_pd(__A, __B), |
191 | (__v8df)__W); |
192 | } |
193 | |
194 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
195 | _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { |
196 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
197 | (__v8df)_mm512_xor_pd(__A, __B), |
198 | (__v8df)_mm512_setzero_pd()); |
199 | } |
200 | |
201 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
202 | _mm512_xor_ps (__m512 __A, __m512 __B) { |
203 | return (__m512)((__v16su)__A ^ (__v16su)__B); |
204 | } |
205 | |
206 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
207 | _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { |
208 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
209 | (__v16sf)_mm512_xor_ps(__A, __B), |
210 | (__v16sf)__W); |
211 | } |
212 | |
213 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
214 | _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { |
215 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
216 | (__v16sf)_mm512_xor_ps(__A, __B), |
217 | (__v16sf)_mm512_setzero_ps()); |
218 | } |
219 | |
220 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
221 | _mm512_or_pd(__m512d __A, __m512d __B) { |
222 | return (__m512d)((__v8du)__A | (__v8du)__B); |
223 | } |
224 | |
225 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
226 | _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { |
227 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
228 | (__v8df)_mm512_or_pd(__A, __B), |
229 | (__v8df)__W); |
230 | } |
231 | |
232 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
233 | _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { |
234 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
235 | (__v8df)_mm512_or_pd(__A, __B), |
236 | (__v8df)_mm512_setzero_pd()); |
237 | } |
238 | |
239 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
240 | _mm512_or_ps(__m512 __A, __m512 __B) { |
241 | return (__m512)((__v16su)__A | (__v16su)__B); |
242 | } |
243 | |
244 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
245 | _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { |
246 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
247 | (__v16sf)_mm512_or_ps(__A, __B), |
248 | (__v16sf)__W); |
249 | } |
250 | |
251 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
252 | _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { |
253 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
254 | (__v16sf)_mm512_or_ps(__A, __B), |
255 | (__v16sf)_mm512_setzero_ps()); |
256 | } |
257 | |
258 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
259 | _mm512_and_pd(__m512d __A, __m512d __B) { |
260 | return (__m512d)((__v8du)__A & (__v8du)__B); |
261 | } |
262 | |
263 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
264 | _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { |
265 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
266 | (__v8df)_mm512_and_pd(__A, __B), |
267 | (__v8df)__W); |
268 | } |
269 | |
270 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
271 | _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { |
272 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
273 | (__v8df)_mm512_and_pd(__A, __B), |
274 | (__v8df)_mm512_setzero_pd()); |
275 | } |
276 | |
277 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
278 | _mm512_and_ps(__m512 __A, __m512 __B) { |
279 | return (__m512)((__v16su)__A & (__v16su)__B); |
280 | } |
281 | |
282 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
283 | _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { |
284 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
285 | (__v16sf)_mm512_and_ps(__A, __B), |
286 | (__v16sf)__W); |
287 | } |
288 | |
289 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
290 | _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { |
291 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
292 | (__v16sf)_mm512_and_ps(__A, __B), |
293 | (__v16sf)_mm512_setzero_ps()); |
294 | } |
295 | |
296 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
297 | _mm512_andnot_pd(__m512d __A, __m512d __B) { |
298 | return (__m512d)(~(__v8du)__A & (__v8du)__B); |
299 | } |
300 | |
301 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
302 | _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { |
303 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
304 | (__v8df)_mm512_andnot_pd(__A, __B), |
305 | (__v8df)__W); |
306 | } |
307 | |
308 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
309 | _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { |
310 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
311 | (__v8df)_mm512_andnot_pd(__A, __B), |
312 | (__v8df)_mm512_setzero_pd()); |
313 | } |
314 | |
315 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
316 | _mm512_andnot_ps(__m512 __A, __m512 __B) { |
317 | return (__m512)(~(__v16su)__A & (__v16su)__B); |
318 | } |
319 | |
320 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
321 | _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { |
322 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
323 | (__v16sf)_mm512_andnot_ps(__A, __B), |
324 | (__v16sf)__W); |
325 | } |
326 | |
327 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
328 | _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) { |
329 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
330 | (__v16sf)_mm512_andnot_ps(__A, __B), |
331 | (__v16sf)_mm512_setzero_ps()); |
332 | } |
333 | |
334 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
335 | _mm512_cvtpd_epi64 (__m512d __A) { |
336 | return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, |
337 | (__v8di) _mm512_setzero_si512(), |
338 | (__mmask8) -1, |
339 | _MM_FROUND_CUR_DIRECTION); |
340 | } |
341 | |
342 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
343 | _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { |
344 | return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, |
345 | (__v8di) __W, |
346 | (__mmask8) __U, |
347 | _MM_FROUND_CUR_DIRECTION); |
348 | } |
349 | |
350 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
351 | _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { |
352 | return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, |
353 | (__v8di) _mm512_setzero_si512(), |
354 | (__mmask8) __U, |
355 | _MM_FROUND_CUR_DIRECTION); |
356 | } |
357 | |
358 | #define _mm512_cvt_roundpd_epi64(A, R) \ |
359 | (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ |
360 | (__v8di)_mm512_setzero_si512(), \ |
361 | (__mmask8)-1, (int)(R)) |
362 | |
363 | #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \ |
364 | (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ |
365 | (__v8di)(__m512i)(W), \ |
366 | (__mmask8)(U), (int)(R)) |
367 | |
368 | #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \ |
369 | (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ |
370 | (__v8di)_mm512_setzero_si512(), \ |
371 | (__mmask8)(U), (int)(R)) |
372 | |
373 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
374 | _mm512_cvtpd_epu64 (__m512d __A) { |
375 | return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, |
376 | (__v8di) _mm512_setzero_si512(), |
377 | (__mmask8) -1, |
378 | _MM_FROUND_CUR_DIRECTION); |
379 | } |
380 | |
381 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
382 | _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { |
383 | return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, |
384 | (__v8di) __W, |
385 | (__mmask8) __U, |
386 | _MM_FROUND_CUR_DIRECTION); |
387 | } |
388 | |
389 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
390 | _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { |
391 | return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, |
392 | (__v8di) _mm512_setzero_si512(), |
393 | (__mmask8) __U, |
394 | _MM_FROUND_CUR_DIRECTION); |
395 | } |
396 | |
397 | #define _mm512_cvt_roundpd_epu64(A, R) \ |
398 | (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ |
399 | (__v8di)_mm512_setzero_si512(), \ |
400 | (__mmask8)-1, (int)(R)) |
401 | |
402 | #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \ |
403 | (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ |
404 | (__v8di)(__m512i)(W), \ |
405 | (__mmask8)(U), (int)(R)) |
406 | |
407 | #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \ |
408 | (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ |
409 | (__v8di)_mm512_setzero_si512(), \ |
410 | (__mmask8)(U), (int)(R)) |
411 | |
412 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
413 | _mm512_cvtps_epi64 (__m256 __A) { |
414 | return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, |
415 | (__v8di) _mm512_setzero_si512(), |
416 | (__mmask8) -1, |
417 | _MM_FROUND_CUR_DIRECTION); |
418 | } |
419 | |
420 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
421 | _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { |
422 | return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, |
423 | (__v8di) __W, |
424 | (__mmask8) __U, |
425 | _MM_FROUND_CUR_DIRECTION); |
426 | } |
427 | |
428 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
429 | _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { |
430 | return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, |
431 | (__v8di) _mm512_setzero_si512(), |
432 | (__mmask8) __U, |
433 | _MM_FROUND_CUR_DIRECTION); |
434 | } |
435 | |
436 | #define _mm512_cvt_roundps_epi64(A, R) \ |
437 | (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ |
438 | (__v8di)_mm512_setzero_si512(), \ |
439 | (__mmask8)-1, (int)(R)) |
440 | |
441 | #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \ |
442 | (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ |
443 | (__v8di)(__m512i)(W), \ |
444 | (__mmask8)(U), (int)(R)) |
445 | |
446 | #define _mm512_maskz_cvt_roundps_epi64(U, A, R) \ |
447 | (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ |
448 | (__v8di)_mm512_setzero_si512(), \ |
449 | (__mmask8)(U), (int)(R)) |
450 | |
451 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
452 | _mm512_cvtps_epu64 (__m256 __A) { |
453 | return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, |
454 | (__v8di) _mm512_setzero_si512(), |
455 | (__mmask8) -1, |
456 | _MM_FROUND_CUR_DIRECTION); |
457 | } |
458 | |
459 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
460 | _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { |
461 | return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, |
462 | (__v8di) __W, |
463 | (__mmask8) __U, |
464 | _MM_FROUND_CUR_DIRECTION); |
465 | } |
466 | |
467 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
468 | _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { |
469 | return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, |
470 | (__v8di) _mm512_setzero_si512(), |
471 | (__mmask8) __U, |
472 | _MM_FROUND_CUR_DIRECTION); |
473 | } |
474 | |
475 | #define _mm512_cvt_roundps_epu64(A, R) \ |
476 | (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ |
477 | (__v8di)_mm512_setzero_si512(), \ |
478 | (__mmask8)-1, (int)(R)) |
479 | |
480 | #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \ |
481 | (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ |
482 | (__v8di)(__m512i)(W), \ |
483 | (__mmask8)(U), (int)(R)) |
484 | |
485 | #define _mm512_maskz_cvt_roundps_epu64(U, A, R) \ |
486 | (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ |
487 | (__v8di)_mm512_setzero_si512(), \ |
488 | (__mmask8)(U), (int)(R)) |
489 | |
490 | |
491 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
492 | _mm512_cvtepi64_pd (__m512i __A) { |
493 | return (__m512d)__builtin_convertvector((__v8di)__A, __v8df); |
494 | } |
495 | |
496 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
497 | _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) { |
498 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
499 | (__v8df)_mm512_cvtepi64_pd(__A), |
500 | (__v8df)__W); |
501 | } |
502 | |
503 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
504 | _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { |
505 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
506 | (__v8df)_mm512_cvtepi64_pd(__A), |
507 | (__v8df)_mm512_setzero_pd()); |
508 | } |
509 | |
510 | #define _mm512_cvt_roundepi64_pd(A, R) \ |
511 | (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ |
512 | (__v8df)_mm512_setzero_pd(), \ |
513 | (__mmask8)-1, (int)(R)) |
514 | |
515 | #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \ |
516 | (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ |
517 | (__v8df)(__m512d)(W), \ |
518 | (__mmask8)(U), (int)(R)) |
519 | |
520 | #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \ |
521 | (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ |
522 | (__v8df)_mm512_setzero_pd(), \ |
523 | (__mmask8)(U), (int)(R)) |
524 | |
525 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
526 | _mm512_cvtepi64_ps (__m512i __A) { |
527 | return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, |
528 | (__v8sf) _mm256_setzero_ps(), |
529 | (__mmask8) -1, |
530 | _MM_FROUND_CUR_DIRECTION); |
531 | } |
532 | |
533 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
534 | _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) { |
535 | return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, |
536 | (__v8sf) __W, |
537 | (__mmask8) __U, |
538 | _MM_FROUND_CUR_DIRECTION); |
539 | } |
540 | |
541 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
542 | _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { |
543 | return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, |
544 | (__v8sf) _mm256_setzero_ps(), |
545 | (__mmask8) __U, |
546 | _MM_FROUND_CUR_DIRECTION); |
547 | } |
548 | |
549 | #define _mm512_cvt_roundepi64_ps(A, R) \ |
550 | (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ |
551 | (__v8sf)_mm256_setzero_ps(), \ |
552 | (__mmask8)-1, (int)(R)) |
553 | |
554 | #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \ |
555 | (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ |
556 | (__v8sf)(__m256)(W), (__mmask8)(U), \ |
557 | (int)(R)) |
558 | |
559 | #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \ |
560 | (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ |
561 | (__v8sf)_mm256_setzero_ps(), \ |
562 | (__mmask8)(U), (int)(R)) |
563 | |
564 | |
565 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
566 | _mm512_cvttpd_epi64 (__m512d __A) { |
567 | return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, |
568 | (__v8di) _mm512_setzero_si512(), |
569 | (__mmask8) -1, |
570 | _MM_FROUND_CUR_DIRECTION); |
571 | } |
572 | |
573 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
574 | _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { |
575 | return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, |
576 | (__v8di) __W, |
577 | (__mmask8) __U, |
578 | _MM_FROUND_CUR_DIRECTION); |
579 | } |
580 | |
581 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
582 | _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { |
583 | return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, |
584 | (__v8di) _mm512_setzero_si512(), |
585 | (__mmask8) __U, |
586 | _MM_FROUND_CUR_DIRECTION); |
587 | } |
588 | |
589 | #define _mm512_cvtt_roundpd_epi64(A, R) \ |
590 | (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ |
591 | (__v8di)_mm512_setzero_si512(), \ |
592 | (__mmask8)-1, (int)(R)) |
593 | |
594 | #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \ |
595 | (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ |
596 | (__v8di)(__m512i)(W), \ |
597 | (__mmask8)(U), (int)(R)) |
598 | |
599 | #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \ |
600 | (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ |
601 | (__v8di)_mm512_setzero_si512(), \ |
602 | (__mmask8)(U), (int)(R)) |
603 | |
604 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
605 | _mm512_cvttpd_epu64 (__m512d __A) { |
606 | return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, |
607 | (__v8di) _mm512_setzero_si512(), |
608 | (__mmask8) -1, |
609 | _MM_FROUND_CUR_DIRECTION); |
610 | } |
611 | |
612 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
613 | _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { |
614 | return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, |
615 | (__v8di) __W, |
616 | (__mmask8) __U, |
617 | _MM_FROUND_CUR_DIRECTION); |
618 | } |
619 | |
620 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
621 | _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { |
622 | return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, |
623 | (__v8di) _mm512_setzero_si512(), |
624 | (__mmask8) __U, |
625 | _MM_FROUND_CUR_DIRECTION); |
626 | } |
627 | |
628 | #define _mm512_cvtt_roundpd_epu64(A, R) \ |
629 | (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ |
630 | (__v8di)_mm512_setzero_si512(), \ |
631 | (__mmask8)-1, (int)(R)) |
632 | |
633 | #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \ |
634 | (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ |
635 | (__v8di)(__m512i)(W), \ |
636 | (__mmask8)(U), (int)(R)) |
637 | |
638 | #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \ |
639 | (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ |
640 | (__v8di)_mm512_setzero_si512(), \ |
641 | (__mmask8)(U), (int)(R)) |
642 | |
643 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
644 | _mm512_cvttps_epi64 (__m256 __A) { |
645 | return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, |
646 | (__v8di) _mm512_setzero_si512(), |
647 | (__mmask8) -1, |
648 | _MM_FROUND_CUR_DIRECTION); |
649 | } |
650 | |
651 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
652 | _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { |
653 | return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, |
654 | (__v8di) __W, |
655 | (__mmask8) __U, |
656 | _MM_FROUND_CUR_DIRECTION); |
657 | } |
658 | |
659 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
660 | _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { |
661 | return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, |
662 | (__v8di) _mm512_setzero_si512(), |
663 | (__mmask8) __U, |
664 | _MM_FROUND_CUR_DIRECTION); |
665 | } |
666 | |
667 | #define _mm512_cvtt_roundps_epi64(A, R) \ |
668 | (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ |
669 | (__v8di)_mm512_setzero_si512(), \ |
670 | (__mmask8)-1, (int)(R)) |
671 | |
672 | #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \ |
673 | (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ |
674 | (__v8di)(__m512i)(W), \ |
675 | (__mmask8)(U), (int)(R)) |
676 | |
677 | #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \ |
678 | (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ |
679 | (__v8di)_mm512_setzero_si512(), \ |
680 | (__mmask8)(U), (int)(R)) |
681 | |
682 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
683 | _mm512_cvttps_epu64 (__m256 __A) { |
684 | return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, |
685 | (__v8di) _mm512_setzero_si512(), |
686 | (__mmask8) -1, |
687 | _MM_FROUND_CUR_DIRECTION); |
688 | } |
689 | |
690 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
691 | _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { |
692 | return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, |
693 | (__v8di) __W, |
694 | (__mmask8) __U, |
695 | _MM_FROUND_CUR_DIRECTION); |
696 | } |
697 | |
698 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
699 | _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { |
700 | return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, |
701 | (__v8di) _mm512_setzero_si512(), |
702 | (__mmask8) __U, |
703 | _MM_FROUND_CUR_DIRECTION); |
704 | } |
705 | |
706 | #define _mm512_cvtt_roundps_epu64(A, R) \ |
707 | (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ |
708 | (__v8di)_mm512_setzero_si512(), \ |
709 | (__mmask8)-1, (int)(R)) |
710 | |
711 | #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \ |
712 | (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ |
713 | (__v8di)(__m512i)(W), \ |
714 | (__mmask8)(U), (int)(R)) |
715 | |
716 | #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \ |
717 | (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ |
718 | (__v8di)_mm512_setzero_si512(), \ |
719 | (__mmask8)(U), (int)(R)) |
720 | |
721 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
722 | _mm512_cvtepu64_pd (__m512i __A) { |
723 | return (__m512d)__builtin_convertvector((__v8du)__A, __v8df); |
724 | } |
725 | |
726 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
727 | _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) { |
728 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
729 | (__v8df)_mm512_cvtepu64_pd(__A), |
730 | (__v8df)__W); |
731 | } |
732 | |
733 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
734 | _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { |
735 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
736 | (__v8df)_mm512_cvtepu64_pd(__A), |
737 | (__v8df)_mm512_setzero_pd()); |
738 | } |
739 | |
740 | #define _mm512_cvt_roundepu64_pd(A, R) \ |
741 | (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ |
742 | (__v8df)_mm512_setzero_pd(), \ |
743 | (__mmask8)-1, (int)(R)) |
744 | |
745 | #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \ |
746 | (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ |
747 | (__v8df)(__m512d)(W), \ |
748 | (__mmask8)(U), (int)(R)) |
749 | |
750 | |
751 | #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \ |
752 | (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ |
753 | (__v8df)_mm512_setzero_pd(), \ |
754 | (__mmask8)(U), (int)(R)) |
755 | |
756 | |
757 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
758 | _mm512_cvtepu64_ps (__m512i __A) { |
759 | return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, |
760 | (__v8sf) _mm256_setzero_ps(), |
761 | (__mmask8) -1, |
762 | _MM_FROUND_CUR_DIRECTION); |
763 | } |
764 | |
765 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
766 | _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) { |
767 | return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, |
768 | (__v8sf) __W, |
769 | (__mmask8) __U, |
770 | _MM_FROUND_CUR_DIRECTION); |
771 | } |
772 | |
773 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
774 | _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { |
775 | return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, |
776 | (__v8sf) _mm256_setzero_ps(), |
777 | (__mmask8) __U, |
778 | _MM_FROUND_CUR_DIRECTION); |
779 | } |
780 | |
781 | #define _mm512_cvt_roundepu64_ps(A, R) \ |
782 | (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ |
783 | (__v8sf)_mm256_setzero_ps(), \ |
784 | (__mmask8)-1, (int)(R)) |
785 | |
786 | #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \ |
787 | (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ |
788 | (__v8sf)(__m256)(W), (__mmask8)(U), \ |
789 | (int)(R)) |
790 | |
791 | #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \ |
792 | (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ |
793 | (__v8sf)_mm256_setzero_ps(), \ |
794 | (__mmask8)(U), (int)(R)) |
795 | |
796 | #define _mm512_range_pd(A, B, C) \ |
797 | (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ |
798 | (__v8df)(__m512d)(B), (int)(C), \ |
799 | (__v8df)_mm512_setzero_pd(), \ |
800 | (__mmask8)-1, \ |
801 | _MM_FROUND_CUR_DIRECTION) |
802 | |
803 | #define _mm512_mask_range_pd(W, U, A, B, C) \ |
804 | (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ |
805 | (__v8df)(__m512d)(B), (int)(C), \ |
806 | (__v8df)(__m512d)(W), (__mmask8)(U), \ |
807 | _MM_FROUND_CUR_DIRECTION) |
808 | |
809 | #define _mm512_maskz_range_pd(U, A, B, C) \ |
810 | (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ |
811 | (__v8df)(__m512d)(B), (int)(C), \ |
812 | (__v8df)_mm512_setzero_pd(), \ |
813 | (__mmask8)(U), \ |
814 | _MM_FROUND_CUR_DIRECTION) |
815 | |
816 | #define _mm512_range_round_pd(A, B, C, R) \ |
817 | (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ |
818 | (__v8df)(__m512d)(B), (int)(C), \ |
819 | (__v8df)_mm512_setzero_pd(), \ |
820 | (__mmask8)-1, (int)(R)) |
821 | |
822 | #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \ |
823 | (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ |
824 | (__v8df)(__m512d)(B), (int)(C), \ |
825 | (__v8df)(__m512d)(W), (__mmask8)(U), \ |
826 | (int)(R)) |
827 | |
828 | #define _mm512_maskz_range_round_pd(U, A, B, C, R) \ |
829 | (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ |
830 | (__v8df)(__m512d)(B), (int)(C), \ |
831 | (__v8df)_mm512_setzero_pd(), \ |
832 | (__mmask8)(U), (int)(R)) |
833 | |
834 | #define _mm512_range_ps(A, B, C) \ |
835 | (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ |
836 | (__v16sf)(__m512)(B), (int)(C), \ |
837 | (__v16sf)_mm512_setzero_ps(), \ |
838 | (__mmask16)-1, \ |
839 | _MM_FROUND_CUR_DIRECTION) |
840 | |
841 | #define _mm512_mask_range_ps(W, U, A, B, C) \ |
842 | (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ |
843 | (__v16sf)(__m512)(B), (int)(C), \ |
844 | (__v16sf)(__m512)(W), (__mmask16)(U), \ |
845 | _MM_FROUND_CUR_DIRECTION) |
846 | |
847 | #define _mm512_maskz_range_ps(U, A, B, C) \ |
848 | (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ |
849 | (__v16sf)(__m512)(B), (int)(C), \ |
850 | (__v16sf)_mm512_setzero_ps(), \ |
851 | (__mmask16)(U), \ |
852 | _MM_FROUND_CUR_DIRECTION) |
853 | |
854 | #define _mm512_range_round_ps(A, B, C, R) \ |
855 | (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ |
856 | (__v16sf)(__m512)(B), (int)(C), \ |
857 | (__v16sf)_mm512_setzero_ps(), \ |
858 | (__mmask16)-1, (int)(R)) |
859 | |
860 | #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \ |
861 | (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ |
862 | (__v16sf)(__m512)(B), (int)(C), \ |
863 | (__v16sf)(__m512)(W), (__mmask16)(U), \ |
864 | (int)(R)) |
865 | |
866 | #define _mm512_maskz_range_round_ps(U, A, B, C, R) \ |
867 | (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ |
868 | (__v16sf)(__m512)(B), (int)(C), \ |
869 | (__v16sf)_mm512_setzero_ps(), \ |
870 | (__mmask16)(U), (int)(R)) |
871 | |
872 | #define _mm_range_round_ss(A, B, C, R) \ |
873 | (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ |
874 | (__v4sf)(__m128)(B), \ |
875 | (__v4sf)_mm_setzero_ps(), \ |
876 | (__mmask8) -1, (int)(C),\ |
877 | (int)(R)) |
878 | |
879 | #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) |
880 | |
881 | #define _mm_mask_range_round_ss(W, U, A, B, C, R) \ |
882 | (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ |
883 | (__v4sf)(__m128)(B), \ |
884 | (__v4sf)(__m128)(W),\ |
885 | (__mmask8)(U), (int)(C),\ |
886 | (int)(R)) |
887 | |
888 | #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) |
889 | |
890 | #define _mm_maskz_range_round_ss(U, A, B, C, R) \ |
891 | (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ |
892 | (__v4sf)(__m128)(B), \ |
893 | (__v4sf)_mm_setzero_ps(), \ |
894 | (__mmask8)(U), (int)(C),\ |
895 | (int)(R)) |
896 | |
897 | #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) |
898 | |
899 | #define _mm_range_round_sd(A, B, C, R) \ |
900 | (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ |
901 | (__v2df)(__m128d)(B), \ |
902 | (__v2df)_mm_setzero_pd(), \ |
903 | (__mmask8) -1, (int)(C),\ |
904 | (int)(R)) |
905 | |
906 | #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) |
907 | |
908 | #define _mm_mask_range_round_sd(W, U, A, B, C, R) \ |
909 | (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ |
910 | (__v2df)(__m128d)(B), \ |
911 | (__v2df)(__m128d)(W),\ |
912 | (__mmask8)(U), (int)(C),\ |
913 | (int)(R)) |
914 | |
915 | #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) |
916 | |
917 | #define _mm_maskz_range_round_sd(U, A, B, C, R) \ |
918 | (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ |
919 | (__v2df)(__m128d)(B), \ |
920 | (__v2df)_mm_setzero_pd(), \ |
921 | (__mmask8)(U), (int)(C),\ |
922 | (int)(R)) |
923 | |
924 | #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) |
925 | |
926 | #define _mm512_reduce_pd(A, B) \ |
927 | (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ |
928 | (__v8df)_mm512_setzero_pd(), \ |
929 | (__mmask8)-1, \ |
930 | _MM_FROUND_CUR_DIRECTION) |
931 | |
932 | #define _mm512_mask_reduce_pd(W, U, A, B) \ |
933 | (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ |
934 | (__v8df)(__m512d)(W), \ |
935 | (__mmask8)(U), \ |
936 | _MM_FROUND_CUR_DIRECTION) |
937 | |
938 | #define _mm512_maskz_reduce_pd(U, A, B) \ |
939 | (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ |
940 | (__v8df)_mm512_setzero_pd(), \ |
941 | (__mmask8)(U), \ |
942 | _MM_FROUND_CUR_DIRECTION) |
943 | |
944 | #define _mm512_reduce_ps(A, B) \ |
945 | (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ |
946 | (__v16sf)_mm512_setzero_ps(), \ |
947 | (__mmask16)-1, \ |
948 | _MM_FROUND_CUR_DIRECTION) |
949 | |
950 | #define _mm512_mask_reduce_ps(W, U, A, B) \ |
951 | (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ |
952 | (__v16sf)(__m512)(W), \ |
953 | (__mmask16)(U), \ |
954 | _MM_FROUND_CUR_DIRECTION) |
955 | |
956 | #define _mm512_maskz_reduce_ps(U, A, B) \ |
957 | (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ |
958 | (__v16sf)_mm512_setzero_ps(), \ |
959 | (__mmask16)(U), \ |
960 | _MM_FROUND_CUR_DIRECTION) |
961 | |
962 | #define _mm512_reduce_round_pd(A, B, R) \ |
963 | (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ |
964 | (__v8df)_mm512_setzero_pd(), \ |
965 | (__mmask8)-1, (int)(R)) |
966 | |
967 | #define _mm512_mask_reduce_round_pd(W, U, A, B, R) \ |
968 | (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ |
969 | (__v8df)(__m512d)(W), \ |
970 | (__mmask8)(U), (int)(R)) |
971 | |
972 | #define _mm512_maskz_reduce_round_pd(U, A, B, R) \ |
973 | (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ |
974 | (__v8df)_mm512_setzero_pd(), \ |
975 | (__mmask8)(U), (int)(R)) |
976 | |
977 | #define _mm512_reduce_round_ps(A, B, R) \ |
978 | (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ |
979 | (__v16sf)_mm512_setzero_ps(), \ |
980 | (__mmask16)-1, (int)(R)) |
981 | |
982 | #define _mm512_mask_reduce_round_ps(W, U, A, B, R) \ |
983 | (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ |
984 | (__v16sf)(__m512)(W), \ |
985 | (__mmask16)(U), (int)(R)) |
986 | |
987 | #define _mm512_maskz_reduce_round_ps(U, A, B, R) \ |
988 | (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ |
989 | (__v16sf)_mm512_setzero_ps(), \ |
990 | (__mmask16)(U), (int)(R)) |
991 | |
992 | #define _mm_reduce_ss(A, B, C) \ |
993 | (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ |
994 | (__v4sf)(__m128)(B), \ |
995 | (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ |
996 | (int)(C), _MM_FROUND_CUR_DIRECTION) |
997 | |
998 | #define _mm_mask_reduce_ss(W, U, A, B, C) \ |
999 | (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ |
1000 | (__v4sf)(__m128)(B), \ |
1001 | (__v4sf)(__m128)(W), (__mmask8)(U), \ |
1002 | (int)(C), _MM_FROUND_CUR_DIRECTION) |
1003 | |
1004 | #define _mm_maskz_reduce_ss(U, A, B, C) \ |
1005 | (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ |
1006 | (__v4sf)(__m128)(B), \ |
1007 | (__v4sf)_mm_setzero_ps(), \ |
1008 | (__mmask8)(U), (int)(C), \ |
1009 | _MM_FROUND_CUR_DIRECTION) |
1010 | |
1011 | #define _mm_reduce_round_ss(A, B, C, R) \ |
1012 | (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ |
1013 | (__v4sf)(__m128)(B), \ |
1014 | (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ |
1015 | (int)(C), (int)(R)) |
1016 | |
1017 | #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ |
1018 | (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ |
1019 | (__v4sf)(__m128)(B), \ |
1020 | (__v4sf)(__m128)(W), (__mmask8)(U), \ |
1021 | (int)(C), (int)(R)) |
1022 | |
1023 | #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ |
1024 | (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ |
1025 | (__v4sf)(__m128)(B), \ |
1026 | (__v4sf)_mm_setzero_ps(), \ |
1027 | (__mmask8)(U), (int)(C), (int)(R)) |
1028 | |
1029 | #define _mm_reduce_sd(A, B, C) \ |
1030 | (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ |
1031 | (__v2df)(__m128d)(B), \ |
1032 | (__v2df)_mm_setzero_pd(), \ |
1033 | (__mmask8)-1, (int)(C), \ |
1034 | _MM_FROUND_CUR_DIRECTION) |
1035 | |
1036 | #define _mm_mask_reduce_sd(W, U, A, B, C) \ |
1037 | (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ |
1038 | (__v2df)(__m128d)(B), \ |
1039 | (__v2df)(__m128d)(W), (__mmask8)(U), \ |
1040 | (int)(C), _MM_FROUND_CUR_DIRECTION) |
1041 | |
1042 | #define _mm_maskz_reduce_sd(U, A, B, C) \ |
1043 | (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ |
1044 | (__v2df)(__m128d)(B), \ |
1045 | (__v2df)_mm_setzero_pd(), \ |
1046 | (__mmask8)(U), (int)(C), \ |
1047 | _MM_FROUND_CUR_DIRECTION) |
1048 | |
1049 | #define _mm_reduce_round_sd(A, B, C, R) \ |
1050 | (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ |
1051 | (__v2df)(__m128d)(B), \ |
1052 | (__v2df)_mm_setzero_pd(), \ |
1053 | (__mmask8)-1, (int)(C), (int)(R)) |
1054 | |
1055 | #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ |
1056 | (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ |
1057 | (__v2df)(__m128d)(B), \ |
1058 | (__v2df)(__m128d)(W), (__mmask8)(U), \ |
1059 | (int)(C), (int)(R)) |
1060 | |
1061 | #define _mm_maskz_reduce_round_sd(U, A, B, C, R) \ |
1062 | (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ |
1063 | (__v2df)(__m128d)(B), \ |
1064 | (__v2df)_mm_setzero_pd(), \ |
1065 | (__mmask8)(U), (int)(C), (int)(R)) |
1066 | |
1067 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 |
1068 | _mm512_movepi32_mask (__m512i __A) |
1069 | { |
1070 | return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); |
1071 | } |
1072 | |
1073 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1074 | _mm512_movm_epi32 (__mmask16 __A) |
1075 | { |
1076 | return (__m512i) __builtin_ia32_cvtmask2d512 (__A); |
1077 | } |
1078 | |
1079 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1080 | _mm512_movm_epi64 (__mmask8 __A) |
1081 | { |
1082 | return (__m512i) __builtin_ia32_cvtmask2q512 (__A); |
1083 | } |
1084 | |
1085 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 |
1086 | _mm512_movepi64_mask (__m512i __A) |
1087 | { |
1088 | return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); |
1089 | } |
1090 | |
1091 | |
1092 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
1093 | _mm512_broadcast_f32x2 (__m128 __A) |
1094 | { |
1095 | return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, |
1096 | 0, 1, 0, 1, 0, 1, 0, 1, |
1097 | 0, 1, 0, 1, 0, 1, 0, 1); |
1098 | } |
1099 | |
1100 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
1101 | _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) |
1102 | { |
1103 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, |
1104 | (__v16sf)_mm512_broadcast_f32x2(__A), |
1105 | (__v16sf)__O); |
1106 | } |
1107 | |
1108 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
1109 | _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) |
1110 | { |
1111 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, |
1112 | (__v16sf)_mm512_broadcast_f32x2(__A), |
1113 | (__v16sf)_mm512_setzero_ps()); |
1114 | } |
1115 | |
1116 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
1117 | _mm512_broadcast_f32x8(__m256 __A) |
1118 | { |
1119 | return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A, |
1120 | 0, 1, 2, 3, 4, 5, 6, 7, |
1121 | 0, 1, 2, 3, 4, 5, 6, 7); |
1122 | } |
1123 | |
1124 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
1125 | _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) |
1126 | { |
1127 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, |
1128 | (__v16sf)_mm512_broadcast_f32x8(__A), |
1129 | (__v16sf)__O); |
1130 | } |
1131 | |
1132 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
1133 | _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) |
1134 | { |
1135 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, |
1136 | (__v16sf)_mm512_broadcast_f32x8(__A), |
1137 | (__v16sf)_mm512_setzero_ps()); |
1138 | } |
1139 | |
1140 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
1141 | _mm512_broadcast_f64x2(__m128d __A) |
1142 | { |
1143 | return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, |
1144 | 0, 1, 0, 1, 0, 1, 0, 1); |
1145 | } |
1146 | |
1147 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
1148 | _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) |
1149 | { |
1150 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, |
1151 | (__v8df)_mm512_broadcast_f64x2(__A), |
1152 | (__v8df)__O); |
1153 | } |
1154 | |
1155 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
1156 | _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) |
1157 | { |
1158 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, |
1159 | (__v8df)_mm512_broadcast_f64x2(__A), |
1160 | (__v8df)_mm512_setzero_pd()); |
1161 | } |
1162 | |
1163 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1164 | _mm512_broadcast_i32x2 (__m128i __A) |
1165 | { |
1166 | return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, |
1167 | 0, 1, 0, 1, 0, 1, 0, 1, |
1168 | 0, 1, 0, 1, 0, 1, 0, 1); |
1169 | } |
1170 | |
1171 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1172 | _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) |
1173 | { |
1174 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
1175 | (__v16si)_mm512_broadcast_i32x2(__A), |
1176 | (__v16si)__O); |
1177 | } |
1178 | |
1179 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1180 | _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) |
1181 | { |
1182 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
1183 | (__v16si)_mm512_broadcast_i32x2(__A), |
1184 | (__v16si)_mm512_setzero_si512()); |
1185 | } |
1186 | |
1187 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1188 | _mm512_broadcast_i32x8(__m256i __A) |
1189 | { |
1190 | return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A, |
1191 | 0, 1, 2, 3, 4, 5, 6, 7, |
1192 | 0, 1, 2, 3, 4, 5, 6, 7); |
1193 | } |
1194 | |
1195 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1196 | _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) |
1197 | { |
1198 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
1199 | (__v16si)_mm512_broadcast_i32x8(__A), |
1200 | (__v16si)__O); |
1201 | } |
1202 | |
1203 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1204 | _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) |
1205 | { |
1206 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
1207 | (__v16si)_mm512_broadcast_i32x8(__A), |
1208 | (__v16si)_mm512_setzero_si512()); |
1209 | } |
1210 | |
1211 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1212 | _mm512_broadcast_i64x2(__m128i __A) |
1213 | { |
1214 | return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, |
1215 | 0, 1, 0, 1, 0, 1, 0, 1); |
1216 | } |
1217 | |
1218 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1219 | _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) |
1220 | { |
1221 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
1222 | (__v8di)_mm512_broadcast_i64x2(__A), |
1223 | (__v8di)__O); |
1224 | } |
1225 | |
1226 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1227 | _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) |
1228 | { |
1229 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
1230 | (__v8di)_mm512_broadcast_i64x2(__A), |
1231 | (__v8di)_mm512_setzero_si512()); |
1232 | } |
1233 | |
1234 | #define (A, imm) \ |
1235 | (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ |
1236 | (__v8sf)_mm256_undefined_ps(), \ |
1237 | (__mmask8)-1) |
1238 | |
1239 | #define (W, U, A, imm) \ |
1240 | (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ |
1241 | (__v8sf)(__m256)(W), \ |
1242 | (__mmask8)(U)) |
1243 | |
1244 | #define (U, A, imm) \ |
1245 | (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ |
1246 | (__v8sf)_mm256_setzero_ps(), \ |
1247 | (__mmask8)(U)) |
1248 | |
1249 | #define (A, imm) \ |
1250 | (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ |
1251 | (int)(imm), \ |
1252 | (__v2df)_mm_undefined_pd(), \ |
1253 | (__mmask8)-1) |
1254 | |
1255 | #define (W, U, A, imm) \ |
1256 | (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ |
1257 | (int)(imm), \ |
1258 | (__v2df)(__m128d)(W), \ |
1259 | (__mmask8)(U)) |
1260 | |
1261 | #define (U, A, imm) \ |
1262 | (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ |
1263 | (int)(imm), \ |
1264 | (__v2df)_mm_setzero_pd(), \ |
1265 | (__mmask8)(U)) |
1266 | |
1267 | #define (A, imm) \ |
1268 | (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ |
1269 | (__v8si)_mm256_undefined_si256(), \ |
1270 | (__mmask8)-1) |
1271 | |
1272 | #define (W, U, A, imm) \ |
1273 | (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ |
1274 | (__v8si)(__m256i)(W), \ |
1275 | (__mmask8)(U)) |
1276 | |
1277 | #define (U, A, imm) \ |
1278 | (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ |
1279 | (__v8si)_mm256_setzero_si256(), \ |
1280 | (__mmask8)(U)) |
1281 | |
1282 | #define (A, imm) \ |
1283 | (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ |
1284 | (int)(imm), \ |
1285 | (__v2di)_mm_undefined_si128(), \ |
1286 | (__mmask8)-1) |
1287 | |
1288 | #define (W, U, A, imm) \ |
1289 | (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ |
1290 | (int)(imm), \ |
1291 | (__v2di)(__m128i)(W), \ |
1292 | (__mmask8)(U)) |
1293 | |
1294 | #define (U, A, imm) \ |
1295 | (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ |
1296 | (int)(imm), \ |
1297 | (__v2di)_mm_setzero_si128(), \ |
1298 | (__mmask8)(U)) |
1299 | |
1300 | #define _mm512_insertf32x8(A, B, imm) \ |
1301 | (__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \ |
1302 | (__v8sf)(__m256)(B), (int)(imm)) |
1303 | |
1304 | #define _mm512_mask_insertf32x8(W, U, A, B, imm) \ |
1305 | (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
1306 | (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ |
1307 | (__v16sf)(__m512)(W)) |
1308 | |
1309 | #define _mm512_maskz_insertf32x8(U, A, B, imm) \ |
1310 | (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
1311 | (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ |
1312 | (__v16sf)_mm512_setzero_ps()) |
1313 | |
1314 | #define _mm512_insertf64x2(A, B, imm) \ |
1315 | (__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \ |
1316 | (__v2df)(__m128d)(B), (int)(imm)) |
1317 | |
1318 | #define _mm512_mask_insertf64x2(W, U, A, B, imm) \ |
1319 | (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
1320 | (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ |
1321 | (__v8df)(__m512d)(W)) |
1322 | |
1323 | #define _mm512_maskz_insertf64x2(U, A, B, imm) \ |
1324 | (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
1325 | (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ |
1326 | (__v8df)_mm512_setzero_pd()) |
1327 | |
1328 | #define _mm512_inserti32x8(A, B, imm) \ |
1329 | (__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \ |
1330 | (__v8si)(__m256i)(B), (int)(imm)) |
1331 | |
1332 | #define _mm512_mask_inserti32x8(W, U, A, B, imm) \ |
1333 | (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
1334 | (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ |
1335 | (__v16si)(__m512i)(W)) |
1336 | |
1337 | #define _mm512_maskz_inserti32x8(U, A, B, imm) \ |
1338 | (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
1339 | (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ |
1340 | (__v16si)_mm512_setzero_si512()) |
1341 | |
1342 | #define _mm512_inserti64x2(A, B, imm) \ |
1343 | (__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \ |
1344 | (__v2di)(__m128i)(B), (int)(imm)) |
1345 | |
1346 | #define _mm512_mask_inserti64x2(W, U, A, B, imm) \ |
1347 | (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
1348 | (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ |
1349 | (__v8di)(__m512i)(W)) |
1350 | |
1351 | #define _mm512_maskz_inserti64x2(U, A, B, imm) \ |
1352 | (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
1353 | (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ |
1354 | (__v8di)_mm512_setzero_si512()) |
1355 | |
1356 | #define _mm512_mask_fpclass_ps_mask(U, A, imm) \ |
1357 | (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ |
1358 | (int)(imm), (__mmask16)(U)) |
1359 | |
1360 | #define _mm512_fpclass_ps_mask(A, imm) \ |
1361 | (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ |
1362 | (int)(imm), (__mmask16)-1) |
1363 | |
1364 | #define _mm512_mask_fpclass_pd_mask(U, A, imm) \ |
1365 | (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ |
1366 | (__mmask8)(U)) |
1367 | |
1368 | #define _mm512_fpclass_pd_mask(A, imm) \ |
1369 | (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ |
1370 | (__mmask8)-1) |
1371 | |
1372 | #define _mm_fpclass_sd_mask(A, imm) \ |
1373 | (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ |
1374 | (__mmask8)-1) |
1375 | |
1376 | #define _mm_mask_fpclass_sd_mask(U, A, imm) \ |
1377 | (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ |
1378 | (__mmask8)(U)) |
1379 | |
1380 | #define _mm_fpclass_ss_mask(A, imm) \ |
1381 | (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ |
1382 | (__mmask8)-1) |
1383 | |
1384 | #define _mm_mask_fpclass_ss_mask(U, A, imm) \ |
1385 | (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ |
1386 | (__mmask8)(U)) |
1387 | |
1388 | #undef __DEFAULT_FN_ATTRS512 |
1389 | #undef __DEFAULT_FN_ATTRS |
1390 | |
1391 | #endif |
1392 | |