Clang Project

clang_source_code/lib/Headers/avx512dqintrin.h
1/*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512DQINTRIN_H
29#define __AVX512DQINTRIN_H
30
31/* Define the default attributes for the functions in this file. */
32#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
33#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
34
35static __inline __mmask8 __DEFAULT_FN_ATTRS
36_knot_mask8(__mmask8 __M)
37{
38  return __builtin_ia32_knotqi(__M);
39}
40
41static __inline__ __mmask8 __DEFAULT_FN_ATTRS
42_kand_mask8(__mmask8 __A__mmask8 __B)
43{
44  return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B);
45}
46
47static __inline__ __mmask8 __DEFAULT_FN_ATTRS
48_kandn_mask8(__mmask8 __A__mmask8 __B)
49{
50  return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B);
51}
52
53static __inline__ __mmask8 __DEFAULT_FN_ATTRS
54_kor_mask8(__mmask8 __A__mmask8 __B)
55{
56  return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B);
57}
58
59static __inline__ __mmask8 __DEFAULT_FN_ATTRS
60_kxnor_mask8(__mmask8 __A__mmask8 __B)
61{
62  return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B);
63}
64
65static __inline__ __mmask8 __DEFAULT_FN_ATTRS
66_kxor_mask8(__mmask8 __A__mmask8 __B)
67{
68  return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
69}
70
71static __inline__ unsigned char __DEFAULT_FN_ATTRS
72_kortestc_mask8_u8(__mmask8 __A__mmask8 __B)
73{
74  return (unsigned char)__builtin_ia32_kortestcqi(__A__B);
75}
76
77static __inline__ unsigned char __DEFAULT_FN_ATTRS
78_kortestz_mask8_u8(__mmask8 __A__mmask8 __B)
79{
80  return (unsigned char)__builtin_ia32_kortestzqi(__A__B);
81}
82
83static __inline__ unsigned char __DEFAULT_FN_ATTRS
84_kortest_mask8_u8(__mmask8 __A__mmask8 __Bunsigned char *__C) {
85  *__C = (unsigned char)__builtin_ia32_kortestcqi(__A__B);
86  return (unsigned char)__builtin_ia32_kortestzqi(__A__B);
87}
88
89static __inline__ unsigned char __DEFAULT_FN_ATTRS
90_ktestc_mask8_u8(__mmask8 __A__mmask8 __B)
91{
92  return (unsigned char)__builtin_ia32_ktestcqi(__A__B);
93}
94
95static __inline__ unsigned char __DEFAULT_FN_ATTRS
96_ktestz_mask8_u8(__mmask8 __A__mmask8 __B)
97{
98  return (unsigned char)__builtin_ia32_ktestzqi(__A__B);
99}
100
101static __inline__ unsigned char __DEFAULT_FN_ATTRS
102_ktest_mask8_u8(__mmask8 __A__mmask8 __Bunsigned char *__C) {
103  *__C = (unsigned char)__builtin_ia32_ktestcqi(__A__B);
104  return (unsigned char)__builtin_ia32_ktestzqi(__A__B);
105}
106
107static __inline__ unsigned char __DEFAULT_FN_ATTRS
108_ktestc_mask16_u8(__mmask16 __A__mmask16 __B)
109{
110  return (unsigned char)__builtin_ia32_ktestchi(__A__B);
111}
112
113static __inline__ unsigned char __DEFAULT_FN_ATTRS
114_ktestz_mask16_u8(__mmask16 __A__mmask16 __B)
115{
116  return (unsigned char)__builtin_ia32_ktestzhi(__A__B);
117}
118
119static __inline__ unsigned char __DEFAULT_FN_ATTRS
120_ktest_mask16_u8(__mmask16 __A__mmask16 __Bunsigned char *__C) {
121  *__C = (unsigned char)__builtin_ia32_ktestchi(__A__B);
122  return (unsigned char)__builtin_ia32_ktestzhi(__A__B);
123}
124
125static __inline__ __mmask8 __DEFAULT_FN_ATTRS
126_kadd_mask8(__mmask8 __A__mmask8 __B)
127{
128  return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B);
129}
130
131static __inline__ __mmask16 __DEFAULT_FN_ATTRS
132_kadd_mask16(__mmask16 __A__mmask16 __B)
133{
134  return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B);
135}
136
137#define _kshiftli_mask8(A, I) \
138  (__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I))
139
140#define _kshiftri_mask8(A, I) \
141  (__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))
142
143static __inline__ unsigned int __DEFAULT_FN_ATTRS
144_cvtmask8_u32(__mmask8 __A) {
145  return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A);
146}
147
148static __inline__ __mmask8 __DEFAULT_FN_ATTRS
149_cvtu32_mask8(unsigned int __A) {
150  return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A);
151}
152
153static __inline__ __mmask8 __DEFAULT_FN_ATTRS
154_load_mask8(__mmask8 *__A) {
155  return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A);
156}
157
158static __inline__ void __DEFAULT_FN_ATTRS
159_store_mask8(__mmask8 *__A__mmask8 __B) {
160  *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B);
161}
162
163static __inline__ __m512i __DEFAULT_FN_ATTRS512
164_mm512_mullo_epi64 (__m512i __A__m512i __B) {
165  return (__m512i) ((__v8du__A * (__v8du__B);
166}
167
168static __inline__ __m512i __DEFAULT_FN_ATTRS512
169_mm512_mask_mullo_epi64(__m512i __W__mmask8 __U__m512i __A__m512i __B) {
170  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
171                                             (__v8di)_mm512_mullo_epi64(__A__B),
172                                             (__v8di)__W);
173}
174
175static __inline__ __m512i __DEFAULT_FN_ATTRS512
176_mm512_maskz_mullo_epi64(__mmask8 __U__m512i __A__m512i __B) {
177  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
178                                             (__v8di)_mm512_mullo_epi64(__A__B),
179                                             (__v8di)_mm512_setzero_si512());
180}
181
182static __inline__ __m512d __DEFAULT_FN_ATTRS512
183_mm512_xor_pd(__m512d __A__m512d __B) {
184  return (__m512d)((__v8du)__A ^ (__v8du)__B);
185}
186
187static __inline__ __m512d __DEFAULT_FN_ATTRS512
188_mm512_mask_xor_pd(__m512d __W__mmask8 __U__m512d __A__m512d __B) {
189  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
190                                              (__v8df)_mm512_xor_pd(__A__B),
191                                              (__v8df)__W);
192}
193
194static __inline__ __m512d __DEFAULT_FN_ATTRS512
195_mm512_maskz_xor_pd(__mmask8 __U__m512d __A__m512d __B) {
196  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
197                                              (__v8df)_mm512_xor_pd(__A__B),
198                                              (__v8df)_mm512_setzero_pd());
199}
200
201static __inline__ __m512 __DEFAULT_FN_ATTRS512
202_mm512_xor_ps (__m512 __A__m512 __B) {
203  return (__m512)((__v16su)__A ^ (__v16su)__B);
204}
205
206static __inline__ __m512 __DEFAULT_FN_ATTRS512
207_mm512_mask_xor_ps(__m512 __W__mmask16 __U__m512 __A__m512 __B) {
208  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
209                                             (__v16sf)_mm512_xor_ps(__A__B),
210                                             (__v16sf)__W);
211}
212
213static __inline__ __m512 __DEFAULT_FN_ATTRS512
214_mm512_maskz_xor_ps(__mmask16 __U__m512 __A__m512 __B) {
215  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
216                                             (__v16sf)_mm512_xor_ps(__A__B),
217                                             (__v16sf)_mm512_setzero_ps());
218}
219
220static __inline__ __m512d __DEFAULT_FN_ATTRS512
221_mm512_or_pd(__m512d __A__m512d __B) {
222  return (__m512d)((__v8du)__A | (__v8du)__B);
223}
224
225static __inline__ __m512d __DEFAULT_FN_ATTRS512
226_mm512_mask_or_pd(__m512d __W__mmask8 __U__m512d __A__m512d __B) {
227  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
228                                              (__v8df)_mm512_or_pd(__A__B),
229                                              (__v8df)__W);
230}
231
232static __inline__ __m512d __DEFAULT_FN_ATTRS512
233_mm512_maskz_or_pd(__mmask8 __U__m512d __A__m512d __B) {
234  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
235                                              (__v8df)_mm512_or_pd(__A__B),
236                                              (__v8df)_mm512_setzero_pd());
237}
238
239static __inline__ __m512 __DEFAULT_FN_ATTRS512
240_mm512_or_ps(__m512 __A__m512 __B) {
241  return (__m512)((__v16su)__A | (__v16su)__B);
242}
243
244static __inline__ __m512 __DEFAULT_FN_ATTRS512
245_mm512_mask_or_ps(__m512 __W__mmask16 __U__m512 __A__m512 __B) {
246  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
247                                             (__v16sf)_mm512_or_ps(__A__B),
248                                             (__v16sf)__W);
249}
250
251static __inline__ __m512 __DEFAULT_FN_ATTRS512
252_mm512_maskz_or_ps(__mmask16 __U__m512 __A__m512 __B) {
253  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
254                                             (__v16sf)_mm512_or_ps(__A__B),
255                                             (__v16sf)_mm512_setzero_ps());
256}
257
258static __inline__ __m512d __DEFAULT_FN_ATTRS512
259_mm512_and_pd(__m512d __A__m512d __B) {
260  return (__m512d)((__v8du)__A & (__v8du)__B);
261}
262
263static __inline__ __m512d __DEFAULT_FN_ATTRS512
264_mm512_mask_and_pd(__m512d __W__mmask8 __U__m512d __A__m512d __B) {
265  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
266                                              (__v8df)_mm512_and_pd(__A__B),
267                                              (__v8df)__W);
268}
269
270static __inline__ __m512d __DEFAULT_FN_ATTRS512
271_mm512_maskz_and_pd(__mmask8 __U__m512d __A__m512d __B) {
272  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
273                                              (__v8df)_mm512_and_pd(__A__B),
274                                              (__v8df)_mm512_setzero_pd());
275}
276
277static __inline__ __m512 __DEFAULT_FN_ATTRS512
278_mm512_and_ps(__m512 __A__m512 __B) {
279  return (__m512)((__v16su)__A & (__v16su)__B);
280}
281
282static __inline__ __m512 __DEFAULT_FN_ATTRS512
283_mm512_mask_and_ps(__m512 __W__mmask16 __U__m512 __A__m512 __B) {
284  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
285                                             (__v16sf)_mm512_and_ps(__A__B),
286                                             (__v16sf)__W);
287}
288
289static __inline__ __m512 __DEFAULT_FN_ATTRS512
290_mm512_maskz_and_ps(__mmask16 __U__m512 __A__m512 __B) {
291  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
292                                             (__v16sf)_mm512_and_ps(__A__B),
293                                             (__v16sf)_mm512_setzero_ps());
294}
295
296static __inline__ __m512d __DEFAULT_FN_ATTRS512
297_mm512_andnot_pd(__m512d __A__m512d __B) {
298  return (__m512d)(~(__v8du)__A & (__v8du)__B);
299}
300
301static __inline__ __m512d __DEFAULT_FN_ATTRS512
302_mm512_mask_andnot_pd(__m512d __W__mmask8 __U__m512d __A__m512d __B) {
303  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
304                                              (__v8df)_mm512_andnot_pd(__A__B),
305                                              (__v8df)__W);
306}
307
308static __inline__ __m512d __DEFAULT_FN_ATTRS512
309_mm512_maskz_andnot_pd(__mmask8 __U__m512d __A__m512d __B) {
310  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
311                                              (__v8df)_mm512_andnot_pd(__A__B),
312                                              (__v8df)_mm512_setzero_pd());
313}
314
315static __inline__ __m512 __DEFAULT_FN_ATTRS512
316_mm512_andnot_ps(__m512 __A__m512 __B) {
317  return (__m512)(~(__v16su)__A & (__v16su)__B);
318}
319
320static __inline__ __m512 __DEFAULT_FN_ATTRS512
321_mm512_mask_andnot_ps(__m512 __W__mmask16 __U__m512 __A__m512 __B) {
322  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
323                                             (__v16sf)_mm512_andnot_ps(__A__B),
324                                             (__v16sf)__W);
325}
326
327static __inline__ __m512 __DEFAULT_FN_ATTRS512
328_mm512_maskz_andnot_ps(__mmask16 __U__m512 __A__m512 __B) {
329  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
330                                             (__v16sf)_mm512_andnot_ps(__A__B),
331                                             (__v16sf)_mm512_setzero_ps());
332}
333
334static __inline__ __m512i __DEFAULT_FN_ATTRS512
335_mm512_cvtpd_epi64 (__m512d __A) {
336  return (__m512i__builtin_ia32_cvtpd2qq512_mask ((__v8df__A,
337                (__v8di_mm512_setzero_si512(),
338                (__mmask8) -1,
339                _MM_FROUND_CUR_DIRECTION);
340}
341
342static __inline__ __m512i __DEFAULT_FN_ATTRS512
343_mm512_mask_cvtpd_epi64 (__m512i __W__mmask8 __U__m512d __A) {
344  return (__m512i__builtin_ia32_cvtpd2qq512_mask ((__v8df__A,
345                (__v8di__W,
346                (__mmask8__U,
347                _MM_FROUND_CUR_DIRECTION);
348}
349
350static __inline__ __m512i __DEFAULT_FN_ATTRS512
351_mm512_maskz_cvtpd_epi64 (__mmask8 __U__m512d __A) {
352  return (__m512i__builtin_ia32_cvtpd2qq512_mask ((__v8df__A,
353                (__v8di_mm512_setzero_si512(),
354                (__mmask8__U,
355                _MM_FROUND_CUR_DIRECTION);
356}
357
358#define _mm512_cvt_roundpd_epi64(A, R) \
359  (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
360                                           (__v8di)_mm512_setzero_si512(), \
361                                           (__mmask8)-1, (int)(R))
362
363#define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \
364  (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
365                                           (__v8di)(__m512i)(W), \
366                                           (__mmask8)(U), (int)(R))
367
368#define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \
369  (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
370                                           (__v8di)_mm512_setzero_si512(), \
371                                           (__mmask8)(U), (int)(R))
372
373static __inline__ __m512i __DEFAULT_FN_ATTRS512
374_mm512_cvtpd_epu64 (__m512d __A) {
375  return (__m512i__builtin_ia32_cvtpd2uqq512_mask ((__v8df__A,
376                 (__v8di_mm512_setzero_si512(),
377                 (__mmask8) -1,
378                 _MM_FROUND_CUR_DIRECTION);
379}
380
381static __inline__ __m512i __DEFAULT_FN_ATTRS512
382_mm512_mask_cvtpd_epu64 (__m512i __W__mmask8 __U__m512d __A) {
383  return (__m512i__builtin_ia32_cvtpd2uqq512_mask ((__v8df__A,
384                 (__v8di__W,
385                 (__mmask8__U,
386                 _MM_FROUND_CUR_DIRECTION);
387}
388
389static __inline__ __m512i __DEFAULT_FN_ATTRS512
390_mm512_maskz_cvtpd_epu64 (__mmask8 __U__m512d __A) {
391  return (__m512i__builtin_ia32_cvtpd2uqq512_mask ((__v8df__A,
392                 (__v8di_mm512_setzero_si512(),
393                 (__mmask8__U,
394                 _MM_FROUND_CUR_DIRECTION);
395}
396
397#define _mm512_cvt_roundpd_epu64(A, R) \
398  (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
399                                            (__v8di)_mm512_setzero_si512(), \
400                                            (__mmask8)-1, (int)(R))
401
402#define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \
403  (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
404                                            (__v8di)(__m512i)(W), \
405                                            (__mmask8)(U), (int)(R))
406
407#define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \
408  (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
409                                            (__v8di)_mm512_setzero_si512(), \
410                                            (__mmask8)(U), (int)(R))
411
412static __inline__ __m512i __DEFAULT_FN_ATTRS512
413_mm512_cvtps_epi64 (__m256 __A) {
414  return (__m512i__builtin_ia32_cvtps2qq512_mask ((__v8sf__A,
415                (__v8di_mm512_setzero_si512(),
416                (__mmask8) -1,
417                _MM_FROUND_CUR_DIRECTION);
418}
419
420static __inline__ __m512i __DEFAULT_FN_ATTRS512
421_mm512_mask_cvtps_epi64 (__m512i __W__mmask8 __U__m256 __A) {
422  return (__m512i__builtin_ia32_cvtps2qq512_mask ((__v8sf__A,
423                (__v8di__W,
424                (__mmask8__U,
425                _MM_FROUND_CUR_DIRECTION);
426}
427
428static __inline__ __m512i __DEFAULT_FN_ATTRS512
429_mm512_maskz_cvtps_epi64 (__mmask8 __U__m256 __A) {
430  return (__m512i__builtin_ia32_cvtps2qq512_mask ((__v8sf__A,
431                (__v8di_mm512_setzero_si512(),
432                (__mmask8__U,
433                _MM_FROUND_CUR_DIRECTION);
434}
435
436#define _mm512_cvt_roundps_epi64(A, R) \
437  (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
438                                           (__v8di)_mm512_setzero_si512(), \
439                                           (__mmask8)-1, (int)(R))
440
441#define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \
442  (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
443                                           (__v8di)(__m512i)(W), \
444                                           (__mmask8)(U), (int)(R))
445
446#define _mm512_maskz_cvt_roundps_epi64(U, A, R) \
447  (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
448                                           (__v8di)_mm512_setzero_si512(), \
449                                           (__mmask8)(U), (int)(R))
450
451static __inline__ __m512i __DEFAULT_FN_ATTRS512
452_mm512_cvtps_epu64 (__m256 __A) {
453  return (__m512i__builtin_ia32_cvtps2uqq512_mask ((__v8sf__A,
454                 (__v8di_mm512_setzero_si512(),
455                 (__mmask8) -1,
456                 _MM_FROUND_CUR_DIRECTION);
457}
458
459static __inline__ __m512i __DEFAULT_FN_ATTRS512
460_mm512_mask_cvtps_epu64 (__m512i __W__mmask8 __U__m256 __A) {
461  return (__m512i__builtin_ia32_cvtps2uqq512_mask ((__v8sf__A,
462                 (__v8di__W,
463                 (__mmask8__U,
464                 _MM_FROUND_CUR_DIRECTION);
465}
466
467static __inline__ __m512i __DEFAULT_FN_ATTRS512
468_mm512_maskz_cvtps_epu64 (__mmask8 __U__m256 __A) {
469  return (__m512i__builtin_ia32_cvtps2uqq512_mask ((__v8sf__A,
470                 (__v8di_mm512_setzero_si512(),
471                 (__mmask8__U,
472                 _MM_FROUND_CUR_DIRECTION);
473}
474
475#define _mm512_cvt_roundps_epu64(A, R) \
476  (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
477                                            (__v8di)_mm512_setzero_si512(), \
478                                            (__mmask8)-1, (int)(R))
479
480#define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \
481  (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
482                                            (__v8di)(__m512i)(W), \
483                                            (__mmask8)(U), (int)(R))
484
485#define _mm512_maskz_cvt_roundps_epu64(U, A, R) \
486  (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
487                                            (__v8di)_mm512_setzero_si512(), \
488                                            (__mmask8)(U), (int)(R))
489
490
491static __inline__ __m512d __DEFAULT_FN_ATTRS512
492_mm512_cvtepi64_pd (__m512i __A) {
493  return (__m512d)__builtin_convertvector((__v8di)__A, __v8df);
494}
495
496static __inline__ __m512d __DEFAULT_FN_ATTRS512
497_mm512_mask_cvtepi64_pd (__m512d __W__mmask8 __U__m512i __A) {
498  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
499                                              (__v8df)_mm512_cvtepi64_pd(__A),
500                                              (__v8df)__W);
501}
502
503static __inline__ __m512d __DEFAULT_FN_ATTRS512
504_mm512_maskz_cvtepi64_pd (__mmask8 __U__m512i __A) {
505  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
506                                              (__v8df)_mm512_cvtepi64_pd(__A),
507                                              (__v8df)_mm512_setzero_pd());
508}
509
510#define _mm512_cvt_roundepi64_pd(A, R) \
511  (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
512                                           (__v8df)_mm512_setzero_pd(), \
513                                           (__mmask8)-1, (int)(R))
514
515#define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \
516  (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
517                                           (__v8df)(__m512d)(W), \
518                                           (__mmask8)(U), (int)(R))
519
520#define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \
521  (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
522                                           (__v8df)_mm512_setzero_pd(), \
523                                           (__mmask8)(U), (int)(R))
524
525static __inline__ __m256 __DEFAULT_FN_ATTRS512
526_mm512_cvtepi64_ps (__m512i __A) {
527  return (__m256__builtin_ia32_cvtqq2ps512_mask ((__v8di__A,
528               (__v8sf_mm256_setzero_ps(),
529               (__mmask8) -1,
530               _MM_FROUND_CUR_DIRECTION);
531}
532
533static __inline__ __m256 __DEFAULT_FN_ATTRS512
534_mm512_mask_cvtepi64_ps (__m256 __W__mmask8 __U__m512i __A) {
535  return (__m256__builtin_ia32_cvtqq2ps512_mask ((__v8di__A,
536               (__v8sf__W,
537               (__mmask8__U,
538               _MM_FROUND_CUR_DIRECTION);
539}
540
541static __inline__ __m256 __DEFAULT_FN_ATTRS512
542_mm512_maskz_cvtepi64_ps (__mmask8 __U__m512i __A) {
543  return (__m256__builtin_ia32_cvtqq2ps512_mask ((__v8di__A,
544               (__v8sf_mm256_setzero_ps(),
545               (__mmask8__U,
546               _MM_FROUND_CUR_DIRECTION);
547}
548
549#define _mm512_cvt_roundepi64_ps(A, R) \
550  (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
551                                          (__v8sf)_mm256_setzero_ps(), \
552                                          (__mmask8)-1, (int)(R))
553
554#define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \
555  (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
556                                          (__v8sf)(__m256)(W), (__mmask8)(U), \
557                                          (int)(R))
558
559#define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \
560  (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
561                                          (__v8sf)_mm256_setzero_ps(), \
562                                          (__mmask8)(U), (int)(R))
563
564
565static __inline__ __m512i __DEFAULT_FN_ATTRS512
566_mm512_cvttpd_epi64 (__m512d __A) {
567  return (__m512i__builtin_ia32_cvttpd2qq512_mask ((__v8df__A,
568                 (__v8di_mm512_setzero_si512(),
569                 (__mmask8) -1,
570                 _MM_FROUND_CUR_DIRECTION);
571}
572
573static __inline__ __m512i __DEFAULT_FN_ATTRS512
574_mm512_mask_cvttpd_epi64 (__m512i __W__mmask8 __U__m512d __A) {
575  return (__m512i__builtin_ia32_cvttpd2qq512_mask ((__v8df__A,
576                 (__v8di__W,
577                 (__mmask8__U,
578                 _MM_FROUND_CUR_DIRECTION);
579}
580
581static __inline__ __m512i __DEFAULT_FN_ATTRS512
582_mm512_maskz_cvttpd_epi64 (__mmask8 __U__m512d __A) {
583  return (__m512i__builtin_ia32_cvttpd2qq512_mask ((__v8df__A,
584                 (__v8di_mm512_setzero_si512(),
585                 (__mmask8__U,
586                 _MM_FROUND_CUR_DIRECTION);
587}
588
589#define _mm512_cvtt_roundpd_epi64(A, R) \
590  (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
591                                            (__v8di)_mm512_setzero_si512(), \
592                                            (__mmask8)-1, (int)(R))
593
594#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \
595  (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
596                                            (__v8di)(__m512i)(W), \
597                                            (__mmask8)(U), (int)(R))
598
599#define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \
600  (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
601                                            (__v8di)_mm512_setzero_si512(), \
602                                            (__mmask8)(U), (int)(R))
603
604static __inline__ __m512i __DEFAULT_FN_ATTRS512
605_mm512_cvttpd_epu64 (__m512d __A) {
606  return (__m512i__builtin_ia32_cvttpd2uqq512_mask ((__v8df__A,
607                  (__v8di_mm512_setzero_si512(),
608                  (__mmask8) -1,
609                  _MM_FROUND_CUR_DIRECTION);
610}
611
612static __inline__ __m512i __DEFAULT_FN_ATTRS512
613_mm512_mask_cvttpd_epu64 (__m512i __W__mmask8 __U__m512d __A) {
614  return (__m512i__builtin_ia32_cvttpd2uqq512_mask ((__v8df__A,
615                  (__v8di__W,
616                  (__mmask8__U,
617                  _MM_FROUND_CUR_DIRECTION);
618}
619
620static __inline__ __m512i __DEFAULT_FN_ATTRS512
621_mm512_maskz_cvttpd_epu64 (__mmask8 __U__m512d __A) {
622  return (__m512i__builtin_ia32_cvttpd2uqq512_mask ((__v8df__A,
623                  (__v8di_mm512_setzero_si512(),
624                  (__mmask8__U,
625                  _MM_FROUND_CUR_DIRECTION);
626}
627
628#define _mm512_cvtt_roundpd_epu64(A, R) \
629  (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
630                                             (__v8di)_mm512_setzero_si512(), \
631                                             (__mmask8)-1, (int)(R))
632
633#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \
634  (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
635                                             (__v8di)(__m512i)(W), \
636                                             (__mmask8)(U), (int)(R))
637
638#define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \
639  (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
640                                             (__v8di)_mm512_setzero_si512(), \
641                                             (__mmask8)(U), (int)(R))
642
643static __inline__ __m512i __DEFAULT_FN_ATTRS512
644_mm512_cvttps_epi64 (__m256 __A) {
645  return (__m512i__builtin_ia32_cvttps2qq512_mask ((__v8sf__A,
646                 (__v8di_mm512_setzero_si512(),
647                 (__mmask8) -1,
648                 _MM_FROUND_CUR_DIRECTION);
649}
650
651static __inline__ __m512i __DEFAULT_FN_ATTRS512
652_mm512_mask_cvttps_epi64 (__m512i __W__mmask8 __U__m256 __A) {
653  return (__m512i__builtin_ia32_cvttps2qq512_mask ((__v8sf__A,
654                 (__v8di__W,
655                 (__mmask8__U,
656                 _MM_FROUND_CUR_DIRECTION);
657}
658
659static __inline__ __m512i __DEFAULT_FN_ATTRS512
660_mm512_maskz_cvttps_epi64 (__mmask8 __U__m256 __A) {
661  return (__m512i__builtin_ia32_cvttps2qq512_mask ((__v8sf__A,
662                 (__v8di_mm512_setzero_si512(),
663                 (__mmask8__U,
664                 _MM_FROUND_CUR_DIRECTION);
665}
666
667#define _mm512_cvtt_roundps_epi64(A, R) \
668  (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
669                                            (__v8di)_mm512_setzero_si512(), \
670                                            (__mmask8)-1, (int)(R))
671
672#define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \
673  (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
674                                            (__v8di)(__m512i)(W), \
675                                            (__mmask8)(U), (int)(R))
676
677#define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \
678  (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
679                                            (__v8di)_mm512_setzero_si512(), \
680                                            (__mmask8)(U), (int)(R))
681
682static __inline__ __m512i __DEFAULT_FN_ATTRS512
683_mm512_cvttps_epu64 (__m256 __A) {
684  return (__m512i__builtin_ia32_cvttps2uqq512_mask ((__v8sf__A,
685                  (__v8di_mm512_setzero_si512(),
686                  (__mmask8) -1,
687                  _MM_FROUND_CUR_DIRECTION);
688}
689
690static __inline__ __m512i __DEFAULT_FN_ATTRS512
691_mm512_mask_cvttps_epu64 (__m512i __W__mmask8 __U__m256 __A) {
692  return (__m512i__builtin_ia32_cvttps2uqq512_mask ((__v8sf__A,
693                  (__v8di__W,
694                  (__mmask8__U,
695                  _MM_FROUND_CUR_DIRECTION);
696}
697
698static __inline__ __m512i __DEFAULT_FN_ATTRS512
699_mm512_maskz_cvttps_epu64 (__mmask8 __U__m256 __A) {
700  return (__m512i__builtin_ia32_cvttps2uqq512_mask ((__v8sf__A,
701                  (__v8di_mm512_setzero_si512(),
702                  (__mmask8__U,
703                  _MM_FROUND_CUR_DIRECTION);
704}
705
706#define _mm512_cvtt_roundps_epu64(A, R) \
707  (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
708                                             (__v8di)_mm512_setzero_si512(), \
709                                             (__mmask8)-1, (int)(R))
710
711#define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \
712  (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
713                                             (__v8di)(__m512i)(W), \
714                                             (__mmask8)(U), (int)(R))
715
716#define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \
717  (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
718                                             (__v8di)_mm512_setzero_si512(), \
719                                             (__mmask8)(U), (int)(R))
720
721static __inline__ __m512d __DEFAULT_FN_ATTRS512
722_mm512_cvtepu64_pd (__m512i __A) {
723  return (__m512d)__builtin_convertvector((__v8du)__A, __v8df);
724}
725
726static __inline__ __m512d __DEFAULT_FN_ATTRS512
727_mm512_mask_cvtepu64_pd (__m512d __W__mmask8 __U__m512i __A) {
728  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
729                                              (__v8df)_mm512_cvtepu64_pd(__A),
730                                              (__v8df)__W);
731}
732
733static __inline__ __m512d __DEFAULT_FN_ATTRS512
734_mm512_maskz_cvtepu64_pd (__mmask8 __U__m512i __A) {
735  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
736                                              (__v8df)_mm512_cvtepu64_pd(__A),
737                                              (__v8df)_mm512_setzero_pd());
738}
739
740#define _mm512_cvt_roundepu64_pd(A, R) \
741  (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
742                                            (__v8df)_mm512_setzero_pd(), \
743                                            (__mmask8)-1, (int)(R))
744
745#define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \
746  (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
747                                            (__v8df)(__m512d)(W), \
748                                            (__mmask8)(U), (int)(R))
749
750
751#define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \
752  (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
753                                            (__v8df)_mm512_setzero_pd(), \
754                                            (__mmask8)(U), (int)(R))
755
756
757static __inline__ __m256 __DEFAULT_FN_ATTRS512
758_mm512_cvtepu64_ps (__m512i __A) {
759  return (__m256__builtin_ia32_cvtuqq2ps512_mask ((__v8di__A,
760                (__v8sf_mm256_setzero_ps(),
761                (__mmask8) -1,
762                _MM_FROUND_CUR_DIRECTION);
763}
764
765static __inline__ __m256 __DEFAULT_FN_ATTRS512
766_mm512_mask_cvtepu64_ps (__m256 __W__mmask8 __U__m512i __A) {
767  return (__m256__builtin_ia32_cvtuqq2ps512_mask ((__v8di__A,
768                (__v8sf__W,
769                (__mmask8__U,
770                _MM_FROUND_CUR_DIRECTION);
771}
772
773static __inline__ __m256 __DEFAULT_FN_ATTRS512
774_mm512_maskz_cvtepu64_ps (__mmask8 __U__m512i __A) {
775  return (__m256__builtin_ia32_cvtuqq2ps512_mask ((__v8di__A,
776                (__v8sf_mm256_setzero_ps(),
777                (__mmask8__U,
778                _MM_FROUND_CUR_DIRECTION);
779}
780
781#define _mm512_cvt_roundepu64_ps(A, R) \
782  (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
783                                           (__v8sf)_mm256_setzero_ps(), \
784                                           (__mmask8)-1, (int)(R))
785
786#define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \
787  (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
788                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
789                                           (int)(R))
790
791#define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \
792  (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
793                                           (__v8sf)_mm256_setzero_ps(), \
794                                           (__mmask8)(U), (int)(R))
795
796#define _mm512_range_pd(A, B, C) \
797  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
798                                          (__v8df)(__m512d)(B), (int)(C), \
799                                          (__v8df)_mm512_setzero_pd(), \
800                                          (__mmask8)-1, \
801                                          _MM_FROUND_CUR_DIRECTION)
802
803#define _mm512_mask_range_pd(W, U, A, B, C) \
804  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
805                                          (__v8df)(__m512d)(B), (int)(C), \
806                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
807                                          _MM_FROUND_CUR_DIRECTION)
808
809#define _mm512_maskz_range_pd(U, A, B, C) \
810  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
811                                          (__v8df)(__m512d)(B), (int)(C), \
812                                          (__v8df)_mm512_setzero_pd(), \
813                                          (__mmask8)(U), \
814                                          _MM_FROUND_CUR_DIRECTION)
815
816#define _mm512_range_round_pd(A, B, C, R) \
817  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
818                                          (__v8df)(__m512d)(B), (int)(C), \
819                                          (__v8df)_mm512_setzero_pd(), \
820                                          (__mmask8)-1, (int)(R))
821
822#define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
823  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
824                                          (__v8df)(__m512d)(B), (int)(C), \
825                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
826                                          (int)(R))
827
828#define _mm512_maskz_range_round_pd(U, A, B, C, R) \
829  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
830                                          (__v8df)(__m512d)(B), (int)(C), \
831                                          (__v8df)_mm512_setzero_pd(), \
832                                          (__mmask8)(U), (int)(R))
833
834#define _mm512_range_ps(A, B, C) \
835  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
836                                         (__v16sf)(__m512)(B), (int)(C), \
837                                         (__v16sf)_mm512_setzero_ps(), \
838                                         (__mmask16)-1, \
839                                         _MM_FROUND_CUR_DIRECTION)
840
841#define _mm512_mask_range_ps(W, U, A, B, C) \
842  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
843                                         (__v16sf)(__m512)(B), (int)(C), \
844                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
845                                         _MM_FROUND_CUR_DIRECTION)
846
847#define _mm512_maskz_range_ps(U, A, B, C) \
848  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
849                                         (__v16sf)(__m512)(B), (int)(C), \
850                                         (__v16sf)_mm512_setzero_ps(), \
851                                         (__mmask16)(U), \
852                                         _MM_FROUND_CUR_DIRECTION)
853
854#define _mm512_range_round_ps(A, B, C, R) \
855  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
856                                         (__v16sf)(__m512)(B), (int)(C), \
857                                         (__v16sf)_mm512_setzero_ps(), \
858                                         (__mmask16)-1, (int)(R))
859
860#define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
861  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
862                                         (__v16sf)(__m512)(B), (int)(C), \
863                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
864                                         (int)(R))
865
866#define _mm512_maskz_range_round_ps(U, A, B, C, R) \
867  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
868                                         (__v16sf)(__m512)(B), (int)(C), \
869                                         (__v16sf)_mm512_setzero_ps(), \
870                                         (__mmask16)(U), (int)(R))
871
872#define _mm_range_round_ss(A, B, C, R) \
873  (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
874                                               (__v4sf)(__m128)(B), \
875                                               (__v4sf)_mm_setzero_ps(), \
876                                               (__mmask8) -1, (int)(C),\
877                                               (int)(R))
878
879#define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
880
881#define _mm_mask_range_round_ss(W, U, A, B, C, R) \
882  (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
883                                               (__v4sf)(__m128)(B), \
884                                               (__v4sf)(__m128)(W),\
885                                               (__mmask8)(U), (int)(C),\
886                                               (int)(R))
887
888#define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
889
890#define _mm_maskz_range_round_ss(U, A, B, C, R) \
891  (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
892                                               (__v4sf)(__m128)(B), \
893                                               (__v4sf)_mm_setzero_ps(), \
894                                               (__mmask8)(U), (int)(C),\
895                                               (int)(R))
896
897#define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
898
899#define _mm_range_round_sd(A, B, C, R) \
900  (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
901                                                (__v2df)(__m128d)(B), \
902                                                (__v2df)_mm_setzero_pd(), \
903                                                (__mmask8) -1, (int)(C),\
904                                                (int)(R))
905
906#define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
907
908#define _mm_mask_range_round_sd(W, U, A, B, C, R) \
909  (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
910                                                (__v2df)(__m128d)(B), \
911                                                (__v2df)(__m128d)(W),\
912                                                (__mmask8)(U), (int)(C),\
913                                                (int)(R))
914
915#define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
916
917#define _mm_maskz_range_round_sd(U, A, B, C, R) \
918  (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
919                                                (__v2df)(__m128d)(B), \
920                                                (__v2df)_mm_setzero_pd(), \
921                                                (__mmask8)(U), (int)(C),\
922                                                (int)(R))
923
924#define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
925
926#define _mm512_reduce_pd(A, B) \
927  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
928                                           (__v8df)_mm512_setzero_pd(), \
929                                           (__mmask8)-1, \
930                                           _MM_FROUND_CUR_DIRECTION)
931
932#define _mm512_mask_reduce_pd(W, U, A, B) \
933  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
934                                           (__v8df)(__m512d)(W), \
935                                           (__mmask8)(U), \
936                                           _MM_FROUND_CUR_DIRECTION)
937
938#define _mm512_maskz_reduce_pd(U, A, B) \
939  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
940                                           (__v8df)_mm512_setzero_pd(), \
941                                           (__mmask8)(U), \
942                                           _MM_FROUND_CUR_DIRECTION)
943
944#define _mm512_reduce_ps(A, B) \
945  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
946                                          (__v16sf)_mm512_setzero_ps(), \
947                                          (__mmask16)-1, \
948                                          _MM_FROUND_CUR_DIRECTION)
949
950#define _mm512_mask_reduce_ps(W, U, A, B) \
951  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
952                                          (__v16sf)(__m512)(W), \
953                                          (__mmask16)(U), \
954                                          _MM_FROUND_CUR_DIRECTION)
955
956#define _mm512_maskz_reduce_ps(U, A, B) \
957  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
958                                          (__v16sf)_mm512_setzero_ps(), \
959                                          (__mmask16)(U), \
960                                          _MM_FROUND_CUR_DIRECTION)
961
962#define _mm512_reduce_round_pd(A, B, R) \
963  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
964                                           (__v8df)_mm512_setzero_pd(), \
965                                           (__mmask8)-1, (int)(R))
966
967#define _mm512_mask_reduce_round_pd(W, U, A, B, R) \
968  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
969                                           (__v8df)(__m512d)(W), \
970                                           (__mmask8)(U), (int)(R))
971
972#define _mm512_maskz_reduce_round_pd(U, A, B, R) \
973  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
974                                           (__v8df)_mm512_setzero_pd(), \
975                                           (__mmask8)(U), (int)(R))
976
977#define _mm512_reduce_round_ps(A, B, R) \
978  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
979                                          (__v16sf)_mm512_setzero_ps(), \
980                                          (__mmask16)-1, (int)(R))
981
982#define _mm512_mask_reduce_round_ps(W, U, A, B, R) \
983  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
984                                          (__v16sf)(__m512)(W), \
985                                          (__mmask16)(U), (int)(R))
986
987#define _mm512_maskz_reduce_round_ps(U, A, B, R) \
988  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
989                                          (__v16sf)_mm512_setzero_ps(), \
990                                          (__mmask16)(U), (int)(R))
991
992#define _mm_reduce_ss(A, B, C) \
993  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
994                                       (__v4sf)(__m128)(B), \
995                                       (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
996                                       (int)(C), _MM_FROUND_CUR_DIRECTION)
997
998#define _mm_mask_reduce_ss(W, U, A, B, C) \
999  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1000                                       (__v4sf)(__m128)(B), \
1001                                       (__v4sf)(__m128)(W), (__mmask8)(U), \
1002                                       (int)(C), _MM_FROUND_CUR_DIRECTION)
1003
1004#define _mm_maskz_reduce_ss(U, A, B, C) \
1005  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1006                                       (__v4sf)(__m128)(B), \
1007                                       (__v4sf)_mm_setzero_ps(), \
1008                                       (__mmask8)(U), (int)(C), \
1009                                       _MM_FROUND_CUR_DIRECTION)
1010
1011#define _mm_reduce_round_ss(A, B, C, R) \
1012  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1013                                       (__v4sf)(__m128)(B), \
1014                                       (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
1015                                       (int)(C), (int)(R))
1016
1017#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \
1018  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1019                                       (__v4sf)(__m128)(B), \
1020                                       (__v4sf)(__m128)(W), (__mmask8)(U), \
1021                                       (int)(C), (int)(R))
1022
1023#define _mm_maskz_reduce_round_ss(U, A, B, C, R) \
1024  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1025                                       (__v4sf)(__m128)(B), \
1026                                       (__v4sf)_mm_setzero_ps(), \
1027                                       (__mmask8)(U), (int)(C), (int)(R))
1028
1029#define _mm_reduce_sd(A, B, C) \
1030  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1031                                        (__v2df)(__m128d)(B), \
1032                                        (__v2df)_mm_setzero_pd(), \
1033                                        (__mmask8)-1, (int)(C), \
1034                                        _MM_FROUND_CUR_DIRECTION)
1035
1036#define _mm_mask_reduce_sd(W, U, A, B, C) \
1037  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1038                                        (__v2df)(__m128d)(B), \
1039                                        (__v2df)(__m128d)(W), (__mmask8)(U), \
1040                                        (int)(C), _MM_FROUND_CUR_DIRECTION)
1041
1042#define _mm_maskz_reduce_sd(U, A, B, C) \
1043  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1044                                        (__v2df)(__m128d)(B), \
1045                                        (__v2df)_mm_setzero_pd(), \
1046                                        (__mmask8)(U), (int)(C), \
1047                                        _MM_FROUND_CUR_DIRECTION)
1048
1049#define _mm_reduce_round_sd(A, B, C, R) \
1050  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1051                                        (__v2df)(__m128d)(B), \
1052                                        (__v2df)_mm_setzero_pd(), \
1053                                        (__mmask8)-1, (int)(C), (int)(R))
1054
1055#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \
1056  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1057                                        (__v2df)(__m128d)(B), \
1058                                        (__v2df)(__m128d)(W), (__mmask8)(U), \
1059                                        (int)(C), (int)(R))
1060
1061#define _mm_maskz_reduce_round_sd(U, A, B, C, R) \
1062  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1063                                        (__v2df)(__m128d)(B), \
1064                                        (__v2df)_mm_setzero_pd(), \
1065                                        (__mmask8)(U), (int)(C), (int)(R))
1066
1067static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
1068_mm512_movepi32_mask (__m512i __A)
1069{
1070  return (__mmask16__builtin_ia32_cvtd2mask512 ((__v16si__A);
1071}
1072
1073static __inline__ __m512i __DEFAULT_FN_ATTRS512
1074_mm512_movm_epi32 (__mmask16 __A)
1075{
1076  return (__m512i__builtin_ia32_cvtmask2d512 (__A);
1077}
1078
1079static __inline__ __m512i __DEFAULT_FN_ATTRS512
1080_mm512_movm_epi64 (__mmask8 __A)
1081{
1082  return (__m512i__builtin_ia32_cvtmask2q512 (__A);
1083}
1084
1085static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
1086_mm512_movepi64_mask (__m512i __A)
1087{
1088  return (__mmask8__builtin_ia32_cvtq2mask512 ((__v8di__A);
1089}
1090
1091
1092static __inline__ __m512 __DEFAULT_FN_ATTRS512
1093_mm512_broadcast_f32x2 (__m128 __A)
1094{
1095  return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
1096                                         01010101,
1097                                         01010101);
1098}
1099
1100static __inline__ __m512 __DEFAULT_FN_ATTRS512
1101_mm512_mask_broadcast_f32x2 (__m512 __O__mmask16 __M__m128 __A)
1102{
1103  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1104                                             (__v16sf)_mm512_broadcast_f32x2(__A),
1105                                             (__v16sf)__O);
1106}
1107
1108static __inline__ __m512 __DEFAULT_FN_ATTRS512
1109_mm512_maskz_broadcast_f32x2 (__mmask16 __M__m128 __A)
1110{
1111  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1112                                             (__v16sf)_mm512_broadcast_f32x2(__A),
1113                                             (__v16sf)_mm512_setzero_ps());
1114}
1115
1116static __inline__ __m512 __DEFAULT_FN_ATTRS512
1117_mm512_broadcast_f32x8(__m256 __A)
1118{
1119  return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
1120                                         01234567,
1121                                         01234567);
1122}
1123
1124static __inline__ __m512 __DEFAULT_FN_ATTRS512
1125_mm512_mask_broadcast_f32x8(__m512 __O__mmask16 __M__m256 __A)
1126{
1127  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1128                                           (__v16sf)_mm512_broadcast_f32x8(__A),
1129                                           (__v16sf)__O);
1130}
1131
1132static __inline__ __m512 __DEFAULT_FN_ATTRS512
1133_mm512_maskz_broadcast_f32x8(__mmask16 __M__m256 __A)
1134{
1135  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1136                                           (__v16sf)_mm512_broadcast_f32x8(__A),
1137                                           (__v16sf)_mm512_setzero_ps());
1138}
1139
1140static __inline__ __m512d __DEFAULT_FN_ATTRS512
1141_mm512_broadcast_f64x2(__m128d __A)
1142{
1143  return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
1144                                          01010101);
1145}
1146
1147static __inline__ __m512d __DEFAULT_FN_ATTRS512
1148_mm512_mask_broadcast_f64x2(__m512d __O__mmask8 __M__m128d __A)
1149{
1150  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1151                                            (__v8df)_mm512_broadcast_f64x2(__A),
1152                                            (__v8df)__O);
1153}
1154
1155static __inline__ __m512d __DEFAULT_FN_ATTRS512
1156_mm512_maskz_broadcast_f64x2(__mmask8 __M__m128d __A)
1157{
1158  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1159                                            (__v8df)_mm512_broadcast_f64x2(__A),
1160                                            (__v8df)_mm512_setzero_pd());
1161}
1162
1163static __inline__ __m512i __DEFAULT_FN_ATTRS512
1164_mm512_broadcast_i32x2 (__m128i __A)
1165{
1166  return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1167                                          01010101,
1168                                          01010101);
1169}
1170
1171static __inline__ __m512i __DEFAULT_FN_ATTRS512
1172_mm512_mask_broadcast_i32x2 (__m512i __O__mmask16 __M__m128i __A)
1173{
1174  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1175                                             (__v16si)_mm512_broadcast_i32x2(__A),
1176                                             (__v16si)__O);
1177}
1178
1179static __inline__ __m512i __DEFAULT_FN_ATTRS512
1180_mm512_maskz_broadcast_i32x2 (__mmask16 __M__m128i __A)
1181{
1182  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1183                                             (__v16si)_mm512_broadcast_i32x2(__A),
1184                                             (__v16si)_mm512_setzero_si512());
1185}
1186
1187static __inline__ __m512i __DEFAULT_FN_ATTRS512
1188_mm512_broadcast_i32x8(__m256i __A)
1189{
1190  return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
1191                                          01234567,
1192                                          01234567);
1193}
1194
1195static __inline__ __m512i __DEFAULT_FN_ATTRS512
1196_mm512_mask_broadcast_i32x8(__m512i __O__mmask16 __M__m256i __A)
1197{
1198  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1199                                           (__v16si)_mm512_broadcast_i32x8(__A),
1200                                           (__v16si)__O);
1201}
1202
1203static __inline__ __m512i __DEFAULT_FN_ATTRS512
1204_mm512_maskz_broadcast_i32x8(__mmask16 __M__m256i __A)
1205{
1206  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1207                                           (__v16si)_mm512_broadcast_i32x8(__A),
1208                                           (__v16si)_mm512_setzero_si512());
1209}
1210
1211static __inline__ __m512i __DEFAULT_FN_ATTRS512
1212_mm512_broadcast_i64x2(__m128i __A)
1213{
1214  return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1215                                          01010101);
1216}
1217
1218static __inline__ __m512i __DEFAULT_FN_ATTRS512
1219_mm512_mask_broadcast_i64x2(__m512i __O__mmask8 __M__m128i __A)
1220{
1221  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1222                                            (__v8di)_mm512_broadcast_i64x2(__A),
1223                                            (__v8di)__O);
1224}
1225
1226static __inline__ __m512i __DEFAULT_FN_ATTRS512
1227_mm512_maskz_broadcast_i64x2(__mmask8 __M__m128i __A)
1228{
1229  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1230                                            (__v8di)_mm512_broadcast_i64x2(__A),
1231                                            (__v8di)_mm512_setzero_si512());
1232}
1233
1234#define _mm512_extractf32x8_ps(A, imm) \
1235  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1236                                           (__v8sf)_mm256_undefined_ps(), \
1237                                           (__mmask8)-1)
1238
1239#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
1240  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1241                                           (__v8sf)(__m256)(W), \
1242                                           (__mmask8)(U))
1243
1244#define _mm512_maskz_extractf32x8_ps(U, A, imm) \
1245  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1246                                           (__v8sf)_mm256_setzero_ps(), \
1247                                           (__mmask8)(U))
1248
1249#define _mm512_extractf64x2_pd(A, imm) \
1250  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1251                                                (int)(imm), \
1252                                                (__v2df)_mm_undefined_pd(), \
1253                                                (__mmask8)-1)
1254
1255#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
1256  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1257                                                (int)(imm), \
1258                                                (__v2df)(__m128d)(W), \
1259                                                (__mmask8)(U))
1260
1261#define _mm512_maskz_extractf64x2_pd(U, A, imm) \
1262  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1263                                                (int)(imm), \
1264                                                (__v2df)_mm_setzero_pd(), \
1265                                                (__mmask8)(U))
1266
1267#define _mm512_extracti32x8_epi32(A, imm) \
1268  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1269                                            (__v8si)_mm256_undefined_si256(), \
1270                                            (__mmask8)-1)
1271
1272#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
1273  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1274                                            (__v8si)(__m256i)(W), \
1275                                            (__mmask8)(U))
1276
1277#define _mm512_maskz_extracti32x8_epi32(U, A, imm) \
1278  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1279                                            (__v8si)_mm256_setzero_si256(), \
1280                                            (__mmask8)(U))
1281
1282#define _mm512_extracti64x2_epi64(A, imm) \
1283  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1284                                                (int)(imm), \
1285                                                (__v2di)_mm_undefined_si128(), \
1286                                                (__mmask8)-1)
1287
1288#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
1289  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1290                                                (int)(imm), \
1291                                                (__v2di)(__m128i)(W), \
1292                                                (__mmask8)(U))
1293
1294#define _mm512_maskz_extracti64x2_epi64(U, A, imm) \
1295  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1296                                                (int)(imm), \
1297                                                (__v2di)_mm_setzero_si128(), \
1298                                                (__mmask8)(U))
1299
1300#define _mm512_insertf32x8(A, B, imm) \
1301  (__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \
1302                                     (__v8sf)(__m256)(B), (int)(imm))
1303
1304#define _mm512_mask_insertf32x8(W, U, A, B, imm) \
1305  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1306                                 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1307                                 (__v16sf)(__m512)(W))
1308
1309#define _mm512_maskz_insertf32x8(U, A, B, imm) \
1310  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1311                                 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1312                                 (__v16sf)_mm512_setzero_ps())
1313
1314#define _mm512_insertf64x2(A, B, imm) \
1315  (__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \
1316                                          (__v2df)(__m128d)(B), (int)(imm))
1317
1318#define _mm512_mask_insertf64x2(W, U, A, B, imm) \
1319  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1320                                  (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1321                                  (__v8df)(__m512d)(W))
1322
1323#define _mm512_maskz_insertf64x2(U, A, B, imm) \
1324  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1325                                  (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1326                                  (__v8df)_mm512_setzero_pd())
1327
1328#define _mm512_inserti32x8(A, B, imm) \
1329  (__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \
1330                                      (__v8si)(__m256i)(B), (int)(imm))
1331
1332#define _mm512_mask_inserti32x8(W, U, A, B, imm) \
1333  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1334                                 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1335                                 (__v16si)(__m512i)(W))
1336
1337#define _mm512_maskz_inserti32x8(U, A, B, imm) \
1338  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1339                                 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1340                                 (__v16si)_mm512_setzero_si512())
1341
1342#define _mm512_inserti64x2(A, B, imm) \
1343  (__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \
1344                                          (__v2di)(__m128i)(B), (int)(imm))
1345
1346#define _mm512_mask_inserti64x2(W, U, A, B, imm) \
1347  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1348                                  (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1349                                  (__v8di)(__m512i)(W))
1350
1351#define _mm512_maskz_inserti64x2(U, A, B, imm) \
1352  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1353                                  (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1354                                  (__v8di)_mm512_setzero_si512())
1355
1356#define _mm512_mask_fpclass_ps_mask(U, A, imm) \
1357  (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1358                                              (int)(imm), (__mmask16)(U))
1359
1360#define _mm512_fpclass_ps_mask(A, imm) \
1361  (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1362                                              (int)(imm), (__mmask16)-1)
1363
1364#define _mm512_mask_fpclass_pd_mask(U, A, imm) \
1365  (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1366                                             (__mmask8)(U))
1367
1368#define _mm512_fpclass_pd_mask(A, imm) \
1369  (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1370                                             (__mmask8)-1)
1371
1372#define _mm_fpclass_sd_mask(A, imm) \
1373  (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1374                                          (__mmask8)-1)
1375
1376#define _mm_mask_fpclass_sd_mask(U, A, imm) \
1377  (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1378                                          (__mmask8)(U))
1379
1380#define _mm_fpclass_ss_mask(A, imm) \
1381  (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1382                                          (__mmask8)-1)
1383
1384#define _mm_mask_fpclass_ss_mask(U, A, imm) \
1385  (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1386                                          (__mmask8)(U))
1387
1388#undef __DEFAULT_FN_ATTRS512
1389#undef __DEFAULT_FN_ATTRS
1390
1391#endif
1392