Clang Project

clang_source_code/lib/Headers/avx512fintrin.h
1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef char __v64qi __attribute__((__vector_size__(64)));
31typedef short __v32hi __attribute__((__vector_size__(64)));
32typedef double __v8df __attribute__((__vector_size__(64)));
33typedef float __v16sf __attribute__((__vector_size__(64)));
34typedef long long __v8di __attribute__((__vector_size__(64)));
35typedef int __v16si __attribute__((__vector_size__(64)));
36
37/* Unsigned types */
38typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41typedef unsigned int __v16su __attribute__((__vector_size__(64)));
42
43typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
44typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
45typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
46
47typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
48typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
49typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
50
51typedef unsigned char __mmask8;
52typedef unsigned short __mmask16;
53
54/* Rounding mode macros.  */
55#define _MM_FROUND_TO_NEAREST_INT   0x00
56#define _MM_FROUND_TO_NEG_INF       0x01
57#define _MM_FROUND_TO_POS_INF       0x02
58#define _MM_FROUND_TO_ZERO          0x03
59#define _MM_FROUND_CUR_DIRECTION    0x04
60
61/* Constants for integer comparison predicates */
62typedef enum {
63    _MM_CMPINT_EQ,      /* Equal */
64    _MM_CMPINT_LT,      /* Less than */
65    _MM_CMPINT_LE,      /* Less than or Equal */
66    _MM_CMPINT_UNUSED,
67    _MM_CMPINT_NE,      /* Not Equal */
68    _MM_CMPINT_NLT,     /* Not Less than */
69#define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
70    _MM_CMPINT_NLE      /* Not Less than or Equal */
71#define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
72_MM_CMPINT_ENUM;
73
74typedef enum
75{
76  _MM_PERM_AAAA = 0x00_MM_PERM_AAAB = 0x01_MM_PERM_AAAC = 0x02,
77  _MM_PERM_AAAD = 0x03_MM_PERM_AABA = 0x04_MM_PERM_AABB = 0x05,
78  _MM_PERM_AABC = 0x06_MM_PERM_AABD = 0x07_MM_PERM_AACA = 0x08,
79  _MM_PERM_AACB = 0x09_MM_PERM_AACC = 0x0A_MM_PERM_AACD = 0x0B,
80  _MM_PERM_AADA = 0x0C_MM_PERM_AADB = 0x0D_MM_PERM_AADC = 0x0E,
81  _MM_PERM_AADD = 0x0F_MM_PERM_ABAA = 0x10_MM_PERM_ABAB = 0x11,
82  _MM_PERM_ABAC = 0x12_MM_PERM_ABAD = 0x13_MM_PERM_ABBA = 0x14,
83  _MM_PERM_ABBB = 0x15_MM_PERM_ABBC = 0x16_MM_PERM_ABBD = 0x17,
84  _MM_PERM_ABCA = 0x18_MM_PERM_ABCB = 0x19_MM_PERM_ABCC = 0x1A,
85  _MM_PERM_ABCD = 0x1B_MM_PERM_ABDA = 0x1C_MM_PERM_ABDB = 0x1D,
86  _MM_PERM_ABDC = 0x1E_MM_PERM_ABDD = 0x1F_MM_PERM_ACAA = 0x20,
87  _MM_PERM_ACAB = 0x21_MM_PERM_ACAC = 0x22_MM_PERM_ACAD = 0x23,
88  _MM_PERM_ACBA = 0x24_MM_PERM_ACBB = 0x25_MM_PERM_ACBC = 0x26,
89  _MM_PERM_ACBD = 0x27_MM_PERM_ACCA = 0x28_MM_PERM_ACCB = 0x29,
90  _MM_PERM_ACCC = 0x2A_MM_PERM_ACCD = 0x2B_MM_PERM_ACDA = 0x2C,
91  _MM_PERM_ACDB = 0x2D_MM_PERM_ACDC = 0x2E_MM_PERM_ACDD = 0x2F,
92  _MM_PERM_ADAA = 0x30_MM_PERM_ADAB = 0x31_MM_PERM_ADAC = 0x32,
93  _MM_PERM_ADAD = 0x33_MM_PERM_ADBA = 0x34_MM_PERM_ADBB = 0x35,
94  _MM_PERM_ADBC = 0x36_MM_PERM_ADBD = 0x37_MM_PERM_ADCA = 0x38,
95  _MM_PERM_ADCB = 0x39_MM_PERM_ADCC = 0x3A_MM_PERM_ADCD = 0x3B,
96  _MM_PERM_ADDA = 0x3C_MM_PERM_ADDB = 0x3D_MM_PERM_ADDC = 0x3E,
97  _MM_PERM_ADDD = 0x3F_MM_PERM_BAAA = 0x40_MM_PERM_BAAB = 0x41,
98  _MM_PERM_BAAC = 0x42_MM_PERM_BAAD = 0x43_MM_PERM_BABA = 0x44,
99  _MM_PERM_BABB = 0x45_MM_PERM_BABC = 0x46_MM_PERM_BABD = 0x47,
100  _MM_PERM_BACA = 0x48_MM_PERM_BACB = 0x49_MM_PERM_BACC = 0x4A,
101  _MM_PERM_BACD = 0x4B_MM_PERM_BADA = 0x4C_MM_PERM_BADB = 0x4D,
102  _MM_PERM_BADC = 0x4E_MM_PERM_BADD = 0x4F_MM_PERM_BBAA = 0x50,
103  _MM_PERM_BBAB = 0x51_MM_PERM_BBAC = 0x52_MM_PERM_BBAD = 0x53,
104  _MM_PERM_BBBA = 0x54_MM_PERM_BBBB = 0x55_MM_PERM_BBBC = 0x56,
105  _MM_PERM_BBBD = 0x57_MM_PERM_BBCA = 0x58_MM_PERM_BBCB = 0x59,
106  _MM_PERM_BBCC = 0x5A_MM_PERM_BBCD = 0x5B_MM_PERM_BBDA = 0x5C,
107  _MM_PERM_BBDB = 0x5D_MM_PERM_BBDC = 0x5E_MM_PERM_BBDD = 0x5F,
108  _MM_PERM_BCAA = 0x60_MM_PERM_BCAB = 0x61_MM_PERM_BCAC = 0x62,
109  _MM_PERM_BCAD = 0x63_MM_PERM_BCBA = 0x64_MM_PERM_BCBB = 0x65,
110  _MM_PERM_BCBC = 0x66_MM_PERM_BCBD = 0x67_MM_PERM_BCCA = 0x68,
111  _MM_PERM_BCCB = 0x69_MM_PERM_BCCC = 0x6A_MM_PERM_BCCD = 0x6B,
112  _MM_PERM_BCDA = 0x6C_MM_PERM_BCDB = 0x6D_MM_PERM_BCDC = 0x6E,
113  _MM_PERM_BCDD = 0x6F_MM_PERM_BDAA = 0x70_MM_PERM_BDAB = 0x71,
114  _MM_PERM_BDAC = 0x72_MM_PERM_BDAD = 0x73_MM_PERM_BDBA = 0x74,
115  _MM_PERM_BDBB = 0x75_MM_PERM_BDBC = 0x76_MM_PERM_BDBD = 0x77,
116  _MM_PERM_BDCA = 0x78_MM_PERM_BDCB = 0x79_MM_PERM_BDCC = 0x7A,
117  _MM_PERM_BDCD = 0x7B_MM_PERM_BDDA = 0x7C_MM_PERM_BDDB = 0x7D,
118  _MM_PERM_BDDC = 0x7E_MM_PERM_BDDD = 0x7F_MM_PERM_CAAA = 0x80,
119  _MM_PERM_CAAB = 0x81_MM_PERM_CAAC = 0x82_MM_PERM_CAAD = 0x83,
120  _MM_PERM_CABA = 0x84_MM_PERM_CABB = 0x85_MM_PERM_CABC = 0x86,
121  _MM_PERM_CABD = 0x87_MM_PERM_CACA = 0x88_MM_PERM_CACB = 0x89,
122  _MM_PERM_CACC = 0x8A_MM_PERM_CACD = 0x8B_MM_PERM_CADA = 0x8C,
123  _MM_PERM_CADB = 0x8D_MM_PERM_CADC = 0x8E_MM_PERM_CADD = 0x8F,
124  _MM_PERM_CBAA = 0x90_MM_PERM_CBAB = 0x91_MM_PERM_CBAC = 0x92,
125  _MM_PERM_CBAD = 0x93_MM_PERM_CBBA = 0x94_MM_PERM_CBBB = 0x95,
126  _MM_PERM_CBBC = 0x96_MM_PERM_CBBD = 0x97_MM_PERM_CBCA = 0x98,
127  _MM_PERM_CBCB = 0x99_MM_PERM_CBCC = 0x9A_MM_PERM_CBCD = 0x9B,
128  _MM_PERM_CBDA = 0x9C_MM_PERM_CBDB = 0x9D_MM_PERM_CBDC = 0x9E,
129  _MM_PERM_CBDD = 0x9F_MM_PERM_CCAA = 0xA0_MM_PERM_CCAB = 0xA1,
130  _MM_PERM_CCAC = 0xA2_MM_PERM_CCAD = 0xA3_MM_PERM_CCBA = 0xA4,
131  _MM_PERM_CCBB = 0xA5_MM_PERM_CCBC = 0xA6_MM_PERM_CCBD = 0xA7,
132  _MM_PERM_CCCA = 0xA8_MM_PERM_CCCB = 0xA9_MM_PERM_CCCC = 0xAA,
133  _MM_PERM_CCCD = 0xAB_MM_PERM_CCDA = 0xAC_MM_PERM_CCDB = 0xAD,
134  _MM_PERM_CCDC = 0xAE_MM_PERM_CCDD = 0xAF_MM_PERM_CDAA = 0xB0,
135  _MM_PERM_CDAB = 0xB1_MM_PERM_CDAC = 0xB2_MM_PERM_CDAD = 0xB3,
136  _MM_PERM_CDBA = 0xB4_MM_PERM_CDBB = 0xB5_MM_PERM_CDBC = 0xB6,
137  _MM_PERM_CDBD = 0xB7_MM_PERM_CDCA = 0xB8_MM_PERM_CDCB = 0xB9,
138  _MM_PERM_CDCC = 0xBA_MM_PERM_CDCD = 0xBB_MM_PERM_CDDA = 0xBC,
139  _MM_PERM_CDDB = 0xBD_MM_PERM_CDDC = 0xBE_MM_PERM_CDDD = 0xBF,
140  _MM_PERM_DAAA = 0xC0_MM_PERM_DAAB = 0xC1_MM_PERM_DAAC = 0xC2,
141  _MM_PERM_DAAD = 0xC3_MM_PERM_DABA = 0xC4_MM_PERM_DABB = 0xC5,
142  _MM_PERM_DABC = 0xC6_MM_PERM_DABD = 0xC7_MM_PERM_DACA = 0xC8,
143  _MM_PERM_DACB = 0xC9_MM_PERM_DACC = 0xCA_MM_PERM_DACD = 0xCB,
144  _MM_PERM_DADA = 0xCC_MM_PERM_DADB = 0xCD_MM_PERM_DADC = 0xCE,
145  _MM_PERM_DADD = 0xCF_MM_PERM_DBAA = 0xD0_MM_PERM_DBAB = 0xD1,
146  _MM_PERM_DBAC = 0xD2_MM_PERM_DBAD = 0xD3_MM_PERM_DBBA = 0xD4,
147  _MM_PERM_DBBB = 0xD5_MM_PERM_DBBC = 0xD6_MM_PERM_DBBD = 0xD7,
148  _MM_PERM_DBCA = 0xD8_MM_PERM_DBCB = 0xD9_MM_PERM_DBCC = 0xDA,
149  _MM_PERM_DBCD = 0xDB_MM_PERM_DBDA = 0xDC_MM_PERM_DBDB = 0xDD,
150  _MM_PERM_DBDC = 0xDE_MM_PERM_DBDD = 0xDF_MM_PERM_DCAA = 0xE0,
151  _MM_PERM_DCAB = 0xE1_MM_PERM_DCAC = 0xE2_MM_PERM_DCAD = 0xE3,
152  _MM_PERM_DCBA = 0xE4_MM_PERM_DCBB = 0xE5_MM_PERM_DCBC = 0xE6,
153  _MM_PERM_DCBD = 0xE7_MM_PERM_DCCA = 0xE8_MM_PERM_DCCB = 0xE9,
154  _MM_PERM_DCCC = 0xEA_MM_PERM_DCCD = 0xEB_MM_PERM_DCDA = 0xEC,
155  _MM_PERM_DCDB = 0xED_MM_PERM_DCDC = 0xEE_MM_PERM_DCDD = 0xEF,
156  _MM_PERM_DDAA = 0xF0_MM_PERM_DDAB = 0xF1_MM_PERM_DDAC = 0xF2,
157  _MM_PERM_DDAD = 0xF3_MM_PERM_DDBA = 0xF4_MM_PERM_DDBB = 0xF5,
158  _MM_PERM_DDBC = 0xF6_MM_PERM_DDBD = 0xF7_MM_PERM_DDCA = 0xF8,
159  _MM_PERM_DDCB = 0xF9_MM_PERM_DDCC = 0xFA_MM_PERM_DDCD = 0xFB,
160  _MM_PERM_DDDA = 0xFC_MM_PERM_DDDB = 0xFD_MM_PERM_DDDC = 0xFE,
161  _MM_PERM_DDDD = 0xFF
162_MM_PERM_ENUM;
163
164typedef enum
165{
166  _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
167  _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
168  _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
169  _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
170_MM_MANTISSA_NORM_ENUM;
171
172typedef enum
173{
174  _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
175  _MM_MANT_SIGN_zero,   /* sign = 0             */
176  _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
177_MM_MANTISSA_SIGN_ENUM;
178
179/* Define the default attributes for the functions in this file. */
180#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
181#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
182#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
183
184/* Create vectors with repeated elements */
185
186static  __inline __m512i __DEFAULT_FN_ATTRS512
187_mm512_setzero_si512(void)
188{
189  return __extension__ (__m512i)(__v8di){ 00000000 };
190}
191
192#define _mm512_setzero_epi32 _mm512_setzero_si512
193
194static __inline__ __m512d __DEFAULT_FN_ATTRS512
195_mm512_undefined_pd(void)
196{
197  return (__m512d)__builtin_ia32_undef512();
198}
199
200static __inline__ __m512 __DEFAULT_FN_ATTRS512
201_mm512_undefined(void)
202{
203  return (__m512)__builtin_ia32_undef512();
204}
205
206static __inline__ __m512 __DEFAULT_FN_ATTRS512
207_mm512_undefined_ps(void)
208{
209  return (__m512)__builtin_ia32_undef512();
210}
211
212static __inline__ __m512i __DEFAULT_FN_ATTRS512
213_mm512_undefined_epi32(void)
214{
215  return (__m512i)__builtin_ia32_undef512();
216}
217
218static __inline__ __m512i __DEFAULT_FN_ATTRS512
219_mm512_broadcastd_epi32 (__m128i __A)
220{
221  return (__m512i)__builtin_shufflevector((__v4si__A, (__v4si__A,
222                                          0000000000000000);
223}
224
225static __inline__ __m512i __DEFAULT_FN_ATTRS512
226_mm512_mask_broadcastd_epi32 (__m512i __O__mmask16 __M__m128i __A)
227{
228  return (__m512i)__builtin_ia32_selectd_512(__M,
229                                             (__v16si_mm512_broadcastd_epi32(__A),
230                                             (__v16si__O);
231}
232
233static __inline__ __m512i __DEFAULT_FN_ATTRS512
234_mm512_maskz_broadcastd_epi32 (__mmask16 __M__m128i __A)
235{
236  return (__m512i)__builtin_ia32_selectd_512(__M,
237                                             (__v16si_mm512_broadcastd_epi32(__A),
238                                             (__v16si_mm512_setzero_si512());
239}
240
241static __inline__ __m512i __DEFAULT_FN_ATTRS512
242_mm512_broadcastq_epi64 (__m128i __A)
243{
244  return (__m512i)__builtin_shufflevector((__v2di__A, (__v2di__A,
245                                          00000000);
246}
247
248static __inline__ __m512i __DEFAULT_FN_ATTRS512
249_mm512_mask_broadcastq_epi64 (__m512i __O__mmask8 __M__m128i __A)
250{
251  return (__m512i)__builtin_ia32_selectq_512(__M,
252                                             (__v8di_mm512_broadcastq_epi64(__A),
253                                             (__v8di__O);
254
255}
256
257static __inline__ __m512i __DEFAULT_FN_ATTRS512
258_mm512_maskz_broadcastq_epi64 (__mmask8 __M__m128i __A)
259{
260  return (__m512i)__builtin_ia32_selectq_512(__M,
261                                             (__v8di_mm512_broadcastq_epi64(__A),
262                                             (__v8di_mm512_setzero_si512());
263}
264
265
266static __inline __m512 __DEFAULT_FN_ATTRS512
267_mm512_setzero_ps(void)
268{
269  return __extension__ (__m512){ 0.00.00.00.00.00.00.00.0,
270                                 0.00.00.00.00.00.00.00.0 };
271}
272
273#define _mm512_setzero _mm512_setzero_ps
274
275static  __inline __m512d __DEFAULT_FN_ATTRS512
276_mm512_setzero_pd(void)
277{
278  return __extension__ (__m512d){ 0.00.00.00.00.00.00.00.0 };
279}
280
281static __inline __m512 __DEFAULT_FN_ATTRS512
282_mm512_set1_ps(float __w)
283{
284  return __extension__ (__m512){ __w__w__w__w__w__w__w__w,
285                                 __w__w__w__w__w__w__w__w  };
286}
287
288static __inline __m512d __DEFAULT_FN_ATTRS512
289_mm512_set1_pd(double __w)
290{
291  return __extension__ (__m512d){ __w__w__w__w__w__w__w__w };
292}
293
294static __inline __m512i __DEFAULT_FN_ATTRS512
295_mm512_set1_epi8(char __w)
296{
297  return __extension__ (__m512i)(__v64qi){
298    __w__w__w__w__w__w__w__w,
299    __w__w__w__w__w__w__w__w,
300    __w__w__w__w__w__w__w__w,
301    __w__w__w__w__w__w__w__w,
302    __w__w__w__w__w__w__w__w,
303    __w__w__w__w__w__w__w__w,
304    __w__w__w__w__w__w__w__w,
305    __w__w__w__w__w__w__w__w  };
306}
307
308static __inline __m512i __DEFAULT_FN_ATTRS512
309_mm512_set1_epi16(short __w)
310{
311  return __extension__ (__m512i)(__v32hi){
312    __w__w__w__w__w__w__w__w,
313    __w__w__w__w__w__w__w__w,
314    __w__w__w__w__w__w__w__w,
315    __w__w__w__w__w__w__w__w };
316}
317
318static __inline __m512i __DEFAULT_FN_ATTRS512
319_mm512_set1_epi32(int __s)
320{
321  return __extension__ (__m512i)(__v16si){
322    __s__s__s__s__s__s__s__s,
323    __s__s__s__s__s__s__s__s };
324}
325
326static __inline __m512i __DEFAULT_FN_ATTRS512
327_mm512_maskz_set1_epi32(__mmask16 __Mint __A)
328{
329  return (__m512i)__builtin_ia32_selectd_512(__M,
330                                             (__v16si)_mm512_set1_epi32(__A),
331                                             (__v16si)_mm512_setzero_si512());
332}
333
334static __inline __m512i __DEFAULT_FN_ATTRS512
335_mm512_set1_epi64(long long __d)
336{
337  return __extension__(__m512i)(__v8di){ __d__d__d__d__d__d__d__d };
338}
339
340static __inline __m512i __DEFAULT_FN_ATTRS512
341_mm512_maskz_set1_epi64(__mmask8 __Mlong long __A)
342{
343  return (__m512i)__builtin_ia32_selectq_512(__M,
344                                             (__v8di)_mm512_set1_epi64(__A),
345                                             (__v8di)_mm512_setzero_si512());
346}
347
348static __inline__ __m512 __DEFAULT_FN_ATTRS512
349_mm512_broadcastss_ps(__m128 __A)
350{
351  return (__m512)__builtin_shufflevector((__v4sf__A, (__v4sf__A,
352                                         0000000000000000);
353}
354
355static __inline __m512i __DEFAULT_FN_ATTRS512
356_mm512_set4_epi32 (int __Aint __Bint __Cint __D)
357{
358  return __extension__ (__m512i)(__v16si)
359   { __D__C__B__A__D__C__B__A,
360     __D__C__B__A__D__C__B__A };
361}
362
363static __inline __m512i __DEFAULT_FN_ATTRS512
364_mm512_set4_epi64 (long long __Along long __Blong long __C,
365       long long __D)
366{
367  return __extension__ (__m512i) (__v8di)
368   { __D__C__B__A__D__C__B__A };
369}
370
371static __inline __m512d __DEFAULT_FN_ATTRS512
372_mm512_set4_pd (double __Adouble __Bdouble __Cdouble __D)
373{
374  return __extension__ (__m512d)
375   { __D__C__B__A__D__C__B__A };
376}
377
378static __inline __m512 __DEFAULT_FN_ATTRS512
379_mm512_set4_ps (float __Afloat __Bfloat __Cfloat __D)
380{
381  return __extension__ (__m512)
382   { __D__C__B__A__D__C__B__A,
383     __D__C__B__A__D__C__B__A };
384}
385
386#define _mm512_setr4_epi32(e0,e1,e2,e3)               \
387  _mm512_set4_epi32((e3),(e2),(e1),(e0))
388
389#define _mm512_setr4_epi64(e0,e1,e2,e3)               \
390  _mm512_set4_epi64((e3),(e2),(e1),(e0))
391
392#define _mm512_setr4_pd(e0,e1,e2,e3)                \
393  _mm512_set4_pd((e3),(e2),(e1),(e0))
394
395#define _mm512_setr4_ps(e0,e1,e2,e3)                \
396  _mm512_set4_ps((e3),(e2),(e1),(e0))
397
398static __inline__ __m512d __DEFAULT_FN_ATTRS512
399_mm512_broadcastsd_pd(__m128d __A)
400{
401  return (__m512d)__builtin_shufflevector((__v2df__A, (__v2df__A,
402                                          00000000);
403}
404
405/* Cast between vector types */
406
407static __inline __m512d __DEFAULT_FN_ATTRS512
408_mm512_castpd256_pd512(__m256d __a)
409{
410  return __builtin_shufflevector(__a__a0123, -1, -1, -1, -1);
411}
412
413static __inline __m512 __DEFAULT_FN_ATTRS512
414_mm512_castps256_ps512(__m256 __a)
415{
416  return __builtin_shufflevector(__a__a0,  1,  2,  3,  4,  5,  6,  7,
417                                          -1, -1, -1, -1, -1, -1, -1, -1);
418}
419
420static __inline __m128d __DEFAULT_FN_ATTRS512
421_mm512_castpd512_pd128(__m512d __a)
422{
423  return __builtin_shufflevector(__a__a01);
424}
425
426static __inline __m256d __DEFAULT_FN_ATTRS512
427_mm512_castpd512_pd256 (__m512d __A)
428{
429  return __builtin_shufflevector(__A__A0123);
430}
431
432static __inline __m128 __DEFAULT_FN_ATTRS512
433_mm512_castps512_ps128(__m512 __a)
434{
435  return __builtin_shufflevector(__a__a0123);
436}
437
438static __inline __m256 __DEFAULT_FN_ATTRS512
439_mm512_castps512_ps256 (__m512 __A)
440{
441  return __builtin_shufflevector(__A__A01234567);
442}
443
444static __inline __m512 __DEFAULT_FN_ATTRS512
445_mm512_castpd_ps (__m512d __A)
446{
447  return (__m512) (__A);
448}
449
450static __inline __m512i __DEFAULT_FN_ATTRS512
451_mm512_castpd_si512 (__m512d __A)
452{
453  return (__m512i) (__A);
454}
455
456static __inline__ __m512d __DEFAULT_FN_ATTRS512
457_mm512_castpd128_pd512 (__m128d __A)
458{
459  return __builtin_shufflevector( __A__A01, -1, -1, -1, -1, -1, -1);
460}
461
462static __inline __m512d __DEFAULT_FN_ATTRS512
463_mm512_castps_pd (__m512 __A)
464{
465  return (__m512d) (__A);
466}
467
468static __inline __m512i __DEFAULT_FN_ATTRS512
469_mm512_castps_si512 (__m512 __A)
470{
471  return (__m512i) (__A);
472}
473
474static __inline__ __m512 __DEFAULT_FN_ATTRS512
475_mm512_castps128_ps512 (__m128 __A)
476{
477    return  __builtin_shufflevector( __A__A0123, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
478}
479
480static __inline__ __m512i __DEFAULT_FN_ATTRS512
481_mm512_castsi128_si512 (__m128i __A)
482{
483   return  __builtin_shufflevector( __A__A01, -1, -1, -1, -1, -1, -1);
484}
485
486static __inline__ __m512i __DEFAULT_FN_ATTRS512
487_mm512_castsi256_si512 (__m256i __A)
488{
489   return  __builtin_shufflevector( __A__A0123, -1, -1, -1, -1);
490}
491
492static __inline __m512 __DEFAULT_FN_ATTRS512
493_mm512_castsi512_ps (__m512i __A)
494{
495  return (__m512) (__A);
496}
497
498static __inline __m512d __DEFAULT_FN_ATTRS512
499_mm512_castsi512_pd (__m512i __A)
500{
501  return (__m512d) (__A);
502}
503
504static __inline __m128i __DEFAULT_FN_ATTRS512
505_mm512_castsi512_si128 (__m512i __A)
506{
507  return (__m128i)__builtin_shufflevector(__A__A , 01);
508}
509
510static __inline __m256i __DEFAULT_FN_ATTRS512
511_mm512_castsi512_si256 (__m512i __A)
512{
513  return (__m256i)__builtin_shufflevector(__A__A , 0123);
514}
515
516static __inline__ __mmask16 __DEFAULT_FN_ATTRS
517_mm512_int2mask(int __a)
518{
519  return (__mmask16)__a;
520}
521
522static __inline__ int __DEFAULT_FN_ATTRS
523_mm512_mask2int(__mmask16 __a)
524{
525  return (int)__a;
526}
527
528/// Constructs a 512-bit floating-point vector of [8 x double] from a
529///    128-bit floating-point vector of [2 x double]. The lower 128 bits
530///    contain the value of the source vector. The upper 384 bits are set
531///    to zero.
532///
533/// \headerfile <x86intrin.h>
534///
535/// This intrinsic has no corresponding instruction.
536///
537/// \param __a
538///    A 128-bit vector of [2 x double].
539/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
540///    contain the value of the parameter. The upper 384 bits are set to zero.
541static __inline __m512d __DEFAULT_FN_ATTRS512
542_mm512_zextpd128_pd512(__m128d __a)
543{
544  return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 01232323);
545}
546
547/// Constructs a 512-bit floating-point vector of [8 x double] from a
548///    256-bit floating-point vector of [4 x double]. The lower 256 bits
549///    contain the value of the source vector. The upper 256 bits are set
550///    to zero.
551///
552/// \headerfile <x86intrin.h>
553///
554/// This intrinsic has no corresponding instruction.
555///
556/// \param __a
557///    A 256-bit vector of [4 x double].
558/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
559///    contain the value of the parameter. The upper 256 bits are set to zero.
560static __inline __m512d __DEFAULT_FN_ATTRS512
561_mm512_zextpd256_pd512(__m256d __a)
562{
563  return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 01234567);
564}
565
566/// Constructs a 512-bit floating-point vector of [16 x float] from a
567///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
568///    the value of the source vector. The upper 384 bits are set to zero.
569///
570/// \headerfile <x86intrin.h>
571///
572/// This intrinsic has no corresponding instruction.
573///
574/// \param __a
575///    A 128-bit vector of [4 x float].
576/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
577///    contain the value of the parameter. The upper 384 bits are set to zero.
578static __inline __m512 __DEFAULT_FN_ATTRS512
579_mm512_zextps128_ps512(__m128 __a)
580{
581  return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0123456745674567);
582}
583
584/// Constructs a 512-bit floating-point vector of [16 x float] from a
585///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
586///    the value of the source vector. The upper 256 bits are set to zero.
587///
588/// \headerfile <x86intrin.h>
589///
590/// This intrinsic has no corresponding instruction.
591///
592/// \param __a
593///    A 256-bit vector of [8 x float].
594/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
595///    contain the value of the parameter. The upper 256 bits are set to zero.
596static __inline __m512 __DEFAULT_FN_ATTRS512
597_mm512_zextps256_ps512(__m256 __a)
598{
599  return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0123456789101112131415);
600}
601
602/// Constructs a 512-bit integer vector from a 128-bit integer vector.
603///    The lower 128 bits contain the value of the source vector. The upper
604///    384 bits are set to zero.
605///
606/// \headerfile <x86intrin.h>
607///
608/// This intrinsic has no corresponding instruction.
609///
610/// \param __a
611///    A 128-bit integer vector.
612/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
613///    the parameter. The upper 384 bits are set to zero.
614static __inline __m512i __DEFAULT_FN_ATTRS512
615_mm512_zextsi128_si512(__m128i __a)
616{
617  return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 01232323);
618}
619
620/// Constructs a 512-bit integer vector from a 256-bit integer vector.
621///    The lower 256 bits contain the value of the source vector. The upper
622///    256 bits are set to zero.
623///
624/// \headerfile <x86intrin.h>
625///
626/// This intrinsic has no corresponding instruction.
627///
628/// \param __a
629///    A 256-bit integer vector.
630/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
631///    the parameter. The upper 256 bits are set to zero.
632static __inline __m512i __DEFAULT_FN_ATTRS512
633_mm512_zextsi256_si512(__m256i __a)
634{
635  return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 01234567);
636}
637
638/* Bitwise operators */
639static __inline__ __m512i __DEFAULT_FN_ATTRS512
640_mm512_and_epi32(__m512i __a__m512i __b)
641{
642  return (__m512i)((__v16su)__a & (__v16su)__b);
643}
644
645static __inline__ __m512i __DEFAULT_FN_ATTRS512
646_mm512_mask_and_epi32(__m512i __src__mmask16 __k__m512i __a__m512i __b)
647{
648  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
649                (__v16si_mm512_and_epi32(__a__b),
650                (__v16si__src);
651}
652
653static __inline__ __m512i __DEFAULT_FN_ATTRS512
654_mm512_maskz_and_epi32(__mmask16 __k__m512i __a__m512i __b)
655{
656  return (__m512i_mm512_mask_and_epi32(_mm512_setzero_si512 (),
657                                         __k__a__b);
658}
659
660static __inline__ __m512i __DEFAULT_FN_ATTRS512
661_mm512_and_epi64(__m512i __a__m512i __b)
662{
663  return (__m512i)((__v8du)__a & (__v8du)__b);
664}
665
666static __inline__ __m512i __DEFAULT_FN_ATTRS512
667_mm512_mask_and_epi64(__m512i __src__mmask8 __k__m512i __a__m512i __b)
668{
669    return (__m512i__builtin_ia32_selectq_512 ((__mmask8__k,
670                (__v8di_mm512_and_epi64(__a__b),
671                (__v8di__src);
672}
673
674static __inline__ __m512i __DEFAULT_FN_ATTRS512
675_mm512_maskz_and_epi64(__mmask8 __k__m512i __a__m512i __b)
676{
677  return (__m512i_mm512_mask_and_epi64(_mm512_setzero_si512 (),
678                                         __k__a__b);
679}
680
681static __inline__ __m512i __DEFAULT_FN_ATTRS512
682_mm512_andnot_si512 (__m512i __A__m512i __B)
683{
684  return (__m512i)(~(__v8du)__A & (__v8du)__B);
685}
686
687static __inline__ __m512i __DEFAULT_FN_ATTRS512
688_mm512_andnot_epi32 (__m512i __A__m512i __B)
689{
690  return (__m512i)(~(__v16su)__A & (__v16su)__B);
691}
692
693static __inline__ __m512i __DEFAULT_FN_ATTRS512
694_mm512_mask_andnot_epi32(__m512i __W__mmask16 __U__m512i __A__m512i __B)
695{
696  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
697                                         (__v16si)_mm512_andnot_epi32(__A__B),
698                                         (__v16si)__W);
699}
700
701static __inline__ __m512i __DEFAULT_FN_ATTRS512
702_mm512_maskz_andnot_epi32(__mmask16 __U__m512i __A__m512i __B)
703{
704  return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
705                                           __U__A__B);
706}
707
708static __inline__ __m512i __DEFAULT_FN_ATTRS512
709_mm512_andnot_epi64(__m512i __A__m512i __B)
710{
711  return (__m512i)(~(__v8du)__A & (__v8du)__B);
712}
713
714static __inline__ __m512i __DEFAULT_FN_ATTRS512
715_mm512_mask_andnot_epi64(__m512i __W__mmask8 __U__m512i __A__m512i __B)
716{
717  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
718                                          (__v8di)_mm512_andnot_epi64(__A__B),
719                                          (__v8di)__W);
720}
721
722static __inline__ __m512i __DEFAULT_FN_ATTRS512
723_mm512_maskz_andnot_epi64(__mmask8 __U__m512i __A__m512i __B)
724{
725  return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
726                                           __U__A__B);
727}
728
729static __inline__ __m512i __DEFAULT_FN_ATTRS512
730_mm512_or_epi32(__m512i __a__m512i __b)
731{
732  return (__m512i)((__v16su)__a | (__v16su)__b);
733}
734
735static __inline__ __m512i __DEFAULT_FN_ATTRS512
736_mm512_mask_or_epi32(__m512i __src__mmask16 __k__m512i __a__m512i __b)
737{
738  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
739                                             (__v16si)_mm512_or_epi32(__a__b),
740                                             (__v16si)__src);
741}
742
743static __inline__ __m512i __DEFAULT_FN_ATTRS512
744_mm512_maskz_or_epi32(__mmask16 __k__m512i __a__m512i __b)
745{
746  return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k__a__b);
747}
748
749static __inline__ __m512i __DEFAULT_FN_ATTRS512
750_mm512_or_epi64(__m512i __a__m512i __b)
751{
752  return (__m512i)((__v8du)__a | (__v8du)__b);
753}
754
755static __inline__ __m512i __DEFAULT_FN_ATTRS512
756_mm512_mask_or_epi64(__m512i __src__mmask8 __k__m512i __a__m512i __b)
757{
758  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
759                                             (__v8di)_mm512_or_epi64(__a__b),
760                                             (__v8di)__src);
761}
762
763static __inline__ __m512i __DEFAULT_FN_ATTRS512
764_mm512_maskz_or_epi64(__mmask8 __k__m512i __a__m512i __b)
765{
766  return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k__a__b);
767}
768
769static __inline__ __m512i __DEFAULT_FN_ATTRS512
770_mm512_xor_epi32(__m512i __a__m512i __b)
771{
772  return (__m512i)((__v16su)__a ^ (__v16su)__b);
773}
774
775static __inline__ __m512i __DEFAULT_FN_ATTRS512
776_mm512_mask_xor_epi32(__m512i __src__mmask16 __k__m512i __a__m512i __b)
777{
778  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
779                                            (__v16si)_mm512_xor_epi32(__a__b),
780                                            (__v16si)__src);
781}
782
783static __inline__ __m512i __DEFAULT_FN_ATTRS512
784_mm512_maskz_xor_epi32(__mmask16 __k__m512i __a__m512i __b)
785{
786  return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k__a__b);
787}
788
789static __inline__ __m512i __DEFAULT_FN_ATTRS512
790_mm512_xor_epi64(__m512i __a__m512i __b)
791{
792  return (__m512i)((__v8du)__a ^ (__v8du)__b);
793}
794
795static __inline__ __m512i __DEFAULT_FN_ATTRS512
796_mm512_mask_xor_epi64(__m512i __src__mmask8 __k__m512i __a__m512i __b)
797{
798  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
799                                             (__v8di)_mm512_xor_epi64(__a__b),
800                                             (__v8di)__src);
801}
802
803static __inline__ __m512i __DEFAULT_FN_ATTRS512
804_mm512_maskz_xor_epi64(__mmask8 __k__m512i __a__m512i __b)
805{
806  return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k__a__b);
807}
808
809static __inline__ __m512i __DEFAULT_FN_ATTRS512
810_mm512_and_si512(__m512i __a__m512i __b)
811{
812  return (__m512i)((__v8du)__a & (__v8du)__b);
813}
814
815static __inline__ __m512i __DEFAULT_FN_ATTRS512
816_mm512_or_si512(__m512i __a__m512i __b)
817{
818  return (__m512i)((__v8du)__a | (__v8du)__b);
819}
820
821static __inline__ __m512i __DEFAULT_FN_ATTRS512
822_mm512_xor_si512(__m512i __a__m512i __b)
823{
824  return (__m512i)((__v8du)__a ^ (__v8du)__b);
825}
826
827/* Arithmetic */
828
829static __inline __m512d __DEFAULT_FN_ATTRS512
830_mm512_add_pd(__m512d __a__m512d __b)
831{
832  return (__m512d)((__v8df)__a + (__v8df)__b);
833}
834
835static __inline __m512 __DEFAULT_FN_ATTRS512
836_mm512_add_ps(__m512 __a__m512 __b)
837{
838  return (__m512)((__v16sf)__a + (__v16sf)__b);
839}
840
841static __inline __m512d __DEFAULT_FN_ATTRS512
842_mm512_mul_pd(__m512d __a__m512d __b)
843{
844  return (__m512d)((__v8df)__a * (__v8df)__b);
845}
846
847static __inline __m512 __DEFAULT_FN_ATTRS512
848_mm512_mul_ps(__m512 __a__m512 __b)
849{
850  return (__m512)((__v16sf)__a * (__v16sf)__b);
851}
852
853static __inline __m512d __DEFAULT_FN_ATTRS512
854_mm512_sub_pd(__m512d __a__m512d __b)
855{
856  return (__m512d)((__v8df)__a - (__v8df)__b);
857}
858
859static __inline __m512 __DEFAULT_FN_ATTRS512
860_mm512_sub_ps(__m512 __a__m512 __b)
861{
862  return (__m512)((__v16sf)__a - (__v16sf)__b);
863}
864
865static __inline__ __m512i __DEFAULT_FN_ATTRS512
866_mm512_add_epi64 (__m512i __A__m512i __B)
867{
868  return (__m512i) ((__v8du__A + (__v8du__B);
869}
870
871static __inline__ __m512i __DEFAULT_FN_ATTRS512
872_mm512_mask_add_epi64(__m512i __W__mmask8 __U__m512i __A__m512i __B)
873{
874  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
875                                             (__v8di)_mm512_add_epi64(__A__B),
876                                             (__v8di)__W);
877}
878
879static __inline__ __m512i __DEFAULT_FN_ATTRS512
880_mm512_maskz_add_epi64(__mmask8 __U__m512i __A__m512i __B)
881{
882  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
883                                             (__v8di)_mm512_add_epi64(__A__B),
884                                             (__v8di)_mm512_setzero_si512());
885}
886
887static __inline__ __m512i __DEFAULT_FN_ATTRS512
888_mm512_sub_epi64 (__m512i __A__m512i __B)
889{
890  return (__m512i) ((__v8du__A - (__v8du__B);
891}
892
893static __inline__ __m512i __DEFAULT_FN_ATTRS512
894_mm512_mask_sub_epi64(__m512i __W__mmask8 __U__m512i __A__m512i __B)
895{
896  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
897                                             (__v8di)_mm512_sub_epi64(__A__B),
898                                             (__v8di)__W);
899}
900
901static __inline__ __m512i __DEFAULT_FN_ATTRS512
902_mm512_maskz_sub_epi64(__mmask8 __U__m512i __A__m512i __B)
903{
904  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
905                                             (__v8di)_mm512_sub_epi64(__A__B),
906                                             (__v8di)_mm512_setzero_si512());
907}
908
909static __inline__ __m512i __DEFAULT_FN_ATTRS512
910_mm512_add_epi32 (__m512i __A__m512i __B)
911{
912  return (__m512i) ((__v16su__A + (__v16su__B);
913}
914
915static __inline__ __m512i __DEFAULT_FN_ATTRS512
916_mm512_mask_add_epi32(__m512i __W__mmask16 __U__m512i __A__m512i __B)
917{
918  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
919                                             (__v16si)_mm512_add_epi32(__A__B),
920                                             (__v16si)__W);
921}
922
923static __inline__ __m512i __DEFAULT_FN_ATTRS512
924_mm512_maskz_add_epi32 (__mmask16 __U__m512i __A__m512i __B)
925{
926  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
927                                             (__v16si)_mm512_add_epi32(__A__B),
928                                             (__v16si)_mm512_setzero_si512());
929}
930
931static __inline__ __m512i __DEFAULT_FN_ATTRS512
932_mm512_sub_epi32 (__m512i __A__m512i __B)
933{
934  return (__m512i) ((__v16su__A - (__v16su__B);
935}
936
937static __inline__ __m512i __DEFAULT_FN_ATTRS512
938_mm512_mask_sub_epi32(__m512i __W__mmask16 __U__m512i __A__m512i __B)
939{
940  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
941                                             (__v16si)_mm512_sub_epi32(__A__B),
942                                             (__v16si)__W);
943}
944
945static __inline__ __m512i __DEFAULT_FN_ATTRS512
946_mm512_maskz_sub_epi32(__mmask16 __U__m512i __A__m512i __B)
947{
948  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
949                                             (__v16si)_mm512_sub_epi32(__A__B),
950                                             (__v16si)_mm512_setzero_si512());
951}
952
953#define _mm512_max_round_pd(A, B, R) \
954  (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
955                                   (__v8df)(__m512d)(B), (int)(R))
956
957#define _mm512_mask_max_round_pd(W, U, A, B, R) \
958  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
959                                   (__v8df)_mm512_max_round_pd((A), (B), (R)), \
960                                   (__v8df)(W))
961
962#define _mm512_maskz_max_round_pd(U, A, B, R) \
963  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
964                                   (__v8df)_mm512_max_round_pd((A), (B), (R)), \
965                                   (__v8df)_mm512_setzero_pd())
966
967static  __inline__ __m512d __DEFAULT_FN_ATTRS512
968_mm512_max_pd(__m512d __A__m512d __B)
969{
970  return (__m512d__builtin_ia32_maxpd512((__v8df__A, (__v8df__B,
971                                           _MM_FROUND_CUR_DIRECTION);
972}
973
974static __inline__ __m512d __DEFAULT_FN_ATTRS512
975_mm512_mask_max_pd (__m512d __W__mmask8 __U__m512d __A__m512d __B)
976{
977  return (__m512d)__builtin_ia32_selectpd_512(__U,
978                                              (__v8df)_mm512_max_pd(__A__B),
979                                              (__v8df)__W);
980}
981
982static __inline__ __m512d __DEFAULT_FN_ATTRS512
983_mm512_maskz_max_pd (__mmask8 __U__m512d __A__m512d __B)
984{
985  return (__m512d)__builtin_ia32_selectpd_512(__U,
986                                              (__v8df)_mm512_max_pd(__A__B),
987                                              (__v8df)_mm512_setzero_pd());
988}
989
990#define _mm512_max_round_ps(A, B, R) \
991  (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
992                                  (__v16sf)(__m512)(B), (int)(R))
993
994#define _mm512_mask_max_round_ps(W, U, A, B, R) \
995  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
996                                  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
997                                  (__v16sf)(W))
998
999#define _mm512_maskz_max_round_ps(U, A, B, R) \
1000  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1001                                  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
1002                                  (__v16sf)_mm512_setzero_ps())
1003
1004static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1005_mm512_max_ps(__m512 __A__m512 __B)
1006{
1007  return (__m512__builtin_ia32_maxps512((__v16sf__A, (__v16sf__B,
1008                                          _MM_FROUND_CUR_DIRECTION);
1009}
1010
1011static __inline__ __m512 __DEFAULT_FN_ATTRS512
1012_mm512_mask_max_ps (__m512 __W__mmask16 __U__m512 __A__m512 __B)
1013{
1014  return (__m512)__builtin_ia32_selectps_512(__U,
1015                                             (__v16sf)_mm512_max_ps(__A__B),
1016                                             (__v16sf)__W);
1017}
1018
1019static __inline__ __m512 __DEFAULT_FN_ATTRS512
1020_mm512_maskz_max_ps (__mmask16 __U__m512 __A__m512 __B)
1021{
1022  return (__m512)__builtin_ia32_selectps_512(__U,
1023                                             (__v16sf)_mm512_max_ps(__A__B),
1024                                             (__v16sf)_mm512_setzero_ps());
1025}
1026
1027static __inline__ __m128 __DEFAULT_FN_ATTRS128
1028_mm_mask_max_ss(__m128 __W__mmask8 __U,__m128 __A__m128 __B) {
1029  return (__m128__builtin_ia32_maxss_round_mask ((__v4sf__A,
1030                (__v4sf__B,
1031                (__v4sf__W,
1032                (__mmask8__U,
1033                _MM_FROUND_CUR_DIRECTION);
1034}
1035
1036static __inline__ __m128 __DEFAULT_FN_ATTRS128
1037_mm_maskz_max_ss(__mmask8 __U,__m128 __A__m128 __B) {
1038  return (__m128__builtin_ia32_maxss_round_mask ((__v4sf__A,
1039                (__v4sf__B,
1040                (__v4sf)  _mm_setzero_ps (),
1041                (__mmask8__U,
1042                _MM_FROUND_CUR_DIRECTION);
1043}
1044
1045#define _mm_max_round_ss(A, B, R) \
1046  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1047                                          (__v4sf)(__m128)(B), \
1048                                          (__v4sf)_mm_setzero_ps(), \
1049                                          (__mmask8)-1, (int)(R))
1050
1051#define _mm_mask_max_round_ss(W, U, A, B, R) \
1052  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1053                                          (__v4sf)(__m128)(B), \
1054                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1055                                          (int)(R))
1056
1057#define _mm_maskz_max_round_ss(U, A, B, R) \
1058  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1059                                          (__v4sf)(__m128)(B), \
1060                                          (__v4sf)_mm_setzero_ps(), \
1061                                          (__mmask8)(U), (int)(R))
1062
1063static __inline__ __m128d __DEFAULT_FN_ATTRS128
1064_mm_mask_max_sd(__m128d __W__mmask8 __U,__m128d __A__m128d __B) {
1065  return (__m128d__builtin_ia32_maxsd_round_mask ((__v2df__A,
1066                (__v2df__B,
1067                (__v2df__W,
1068                (__mmask8__U,
1069                _MM_FROUND_CUR_DIRECTION);
1070}
1071
1072static __inline__ __m128d __DEFAULT_FN_ATTRS128
1073_mm_maskz_max_sd(__mmask8 __U,__m128d __A__m128d __B) {
1074  return (__m128d__builtin_ia32_maxsd_round_mask ((__v2df__A,
1075                (__v2df__B,
1076                (__v2df)  _mm_setzero_pd (),
1077                (__mmask8__U,
1078                _MM_FROUND_CUR_DIRECTION);
1079}
1080
1081#define _mm_max_round_sd(A, B, R) \
1082  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1083                                           (__v2df)(__m128d)(B), \
1084                                           (__v2df)_mm_setzero_pd(), \
1085                                           (__mmask8)-1, (int)(R))
1086
1087#define _mm_mask_max_round_sd(W, U, A, B, R) \
1088  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1089                                           (__v2df)(__m128d)(B), \
1090                                           (__v2df)(__m128d)(W), \
1091                                           (__mmask8)(U), (int)(R))
1092
1093#define _mm_maskz_max_round_sd(U, A, B, R) \
1094  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1095                                           (__v2df)(__m128d)(B), \
1096                                           (__v2df)_mm_setzero_pd(), \
1097                                           (__mmask8)(U), (int)(R))
1098
1099static __inline __m512i
1100__DEFAULT_FN_ATTRS512
1101_mm512_max_epi32(__m512i __A__m512i __B)
1102{
1103  return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
1104}
1105
1106static __inline__ __m512i __DEFAULT_FN_ATTRS512
1107_mm512_mask_max_epi32 (__m512i __W__mmask16 __M__m512i __A__m512i __B)
1108{
1109  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1110                                            (__v16si)_mm512_max_epi32(__A__B),
1111                                            (__v16si)__W);
1112}
1113
1114static __inline__ __m512i __DEFAULT_FN_ATTRS512
1115_mm512_maskz_max_epi32 (__mmask16 __M__m512i __A__m512i __B)
1116{
1117  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1118                                            (__v16si)_mm512_max_epi32(__A__B),
1119                                            (__v16si)_mm512_setzero_si512());
1120}
1121
1122static __inline __m512i __DEFAULT_FN_ATTRS512
1123_mm512_max_epu32(__m512i __A__m512i __B)
1124{
1125  return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
1126}
1127
1128static __inline__ __m512i __DEFAULT_FN_ATTRS512
1129_mm512_mask_max_epu32 (__m512i __W__mmask16 __M__m512i __A__m512i __B)
1130{
1131  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1132                                            (__v16si)_mm512_max_epu32(__A__B),
1133                                            (__v16si)__W);
1134}
1135
1136static __inline__ __m512i __DEFAULT_FN_ATTRS512
1137_mm512_maskz_max_epu32 (__mmask16 __M__m512i __A__m512i __B)
1138{
1139  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1140                                            (__v16si)_mm512_max_epu32(__A__B),
1141                                            (__v16si)_mm512_setzero_si512());
1142}
1143
1144static __inline __m512i __DEFAULT_FN_ATTRS512
1145_mm512_max_epi64(__m512i __A__m512i __B)
1146{
1147  return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
1148}
1149
1150static __inline__ __m512i __DEFAULT_FN_ATTRS512
1151_mm512_mask_max_epi64 (__m512i __W__mmask8 __M__m512i __A__m512i __B)
1152{
1153  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1154                                             (__v8di)_mm512_max_epi64(__A__B),
1155                                             (__v8di)__W);
1156}
1157
1158static __inline__ __m512i __DEFAULT_FN_ATTRS512
1159_mm512_maskz_max_epi64 (__mmask8 __M__m512i __A__m512i __B)
1160{
1161  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1162                                             (__v8di)_mm512_max_epi64(__A__B),
1163                                             (__v8di)_mm512_setzero_si512());
1164}
1165
1166static __inline __m512i __DEFAULT_FN_ATTRS512
1167_mm512_max_epu64(__m512i __A__m512i __B)
1168{
1169  return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
1170}
1171
1172static __inline__ __m512i __DEFAULT_FN_ATTRS512
1173_mm512_mask_max_epu64 (__m512i __W__mmask8 __M__m512i __A__m512i __B)
1174{
1175  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1176                                             (__v8di)_mm512_max_epu64(__A__B),
1177                                             (__v8di)__W);
1178}
1179
1180static __inline__ __m512i __DEFAULT_FN_ATTRS512
1181_mm512_maskz_max_epu64 (__mmask8 __M__m512i __A__m512i __B)
1182{
1183  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1184                                             (__v8di)_mm512_max_epu64(__A__B),
1185                                             (__v8di)_mm512_setzero_si512());
1186}
1187
1188#define _mm512_min_round_pd(A, B, R) \
1189  (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1190                                   (__v8df)(__m512d)(B), (int)(R))
1191
1192#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1193  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1194                                   (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1195                                   (__v8df)(W))
1196
1197#define _mm512_maskz_min_round_pd(U, A, B, R) \
1198  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1199                                   (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1200                                   (__v8df)_mm512_setzero_pd())
1201
1202static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1203_mm512_min_pd(__m512d __A__m512d __B)
1204{
1205  return (__m512d__builtin_ia32_minpd512((__v8df__A, (__v8df__B,
1206                                           _MM_FROUND_CUR_DIRECTION);
1207}
1208
1209static __inline__ __m512d __DEFAULT_FN_ATTRS512
1210_mm512_mask_min_pd (__m512d __W__mmask8 __U__m512d __A__m512d __B)
1211{
1212  return (__m512d)__builtin_ia32_selectpd_512(__U,
1213                                              (__v8df)_mm512_min_pd(__A__B),
1214                                              (__v8df)__W);
1215}
1216
1217static __inline__ __m512d __DEFAULT_FN_ATTRS512
1218_mm512_maskz_min_pd (__mmask8 __U__m512d __A__m512d __B)
1219{
1220  return (__m512d)__builtin_ia32_selectpd_512(__U,
1221                                              (__v8df)_mm512_min_pd(__A__B),
1222                                              (__v8df)_mm512_setzero_pd());
1223}
1224
1225#define _mm512_min_round_ps(A, B, R) \
1226  (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1227                                  (__v16sf)(__m512)(B), (int)(R))
1228
1229#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1230  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1231                                  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1232                                  (__v16sf)(W))
1233
1234#define _mm512_maskz_min_round_ps(U, A, B, R) \
1235  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1236                                  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1237                                  (__v16sf)_mm512_setzero_ps())
1238
1239static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1240_mm512_min_ps(__m512 __A__m512 __B)
1241{
1242  return (__m512__builtin_ia32_minps512((__v16sf__A, (__v16sf__B,
1243                                          _MM_FROUND_CUR_DIRECTION);
1244}
1245
1246static __inline__ __m512 __DEFAULT_FN_ATTRS512
1247_mm512_mask_min_ps (__m512 __W__mmask16 __U__m512 __A__m512 __B)
1248{
1249  return (__m512)__builtin_ia32_selectps_512(__U,
1250                                             (__v16sf)_mm512_min_ps(__A__B),
1251                                             (__v16sf)__W);
1252}
1253
1254static __inline__ __m512 __DEFAULT_FN_ATTRS512
1255_mm512_maskz_min_ps (__mmask16 __U__m512 __A__m512 __B)
1256{
1257  return (__m512)__builtin_ia32_selectps_512(__U,
1258                                             (__v16sf)_mm512_min_ps(__A__B),
1259                                             (__v16sf)_mm512_setzero_ps());
1260}
1261
1262static __inline__ __m128 __DEFAULT_FN_ATTRS128
1263_mm_mask_min_ss(__m128 __W__mmask8 __U,__m128 __A__m128 __B) {
1264  return (__m128__builtin_ia32_minss_round_mask ((__v4sf__A,
1265                (__v4sf__B,
1266                (__v4sf__W,
1267                (__mmask8__U,
1268                _MM_FROUND_CUR_DIRECTION);
1269}
1270
1271static __inline__ __m128 __DEFAULT_FN_ATTRS128
1272_mm_maskz_min_ss(__mmask8 __U,__m128 __A__m128 __B) {
1273  return (__m128__builtin_ia32_minss_round_mask ((__v4sf__A,
1274                (__v4sf__B,
1275                (__v4sf)  _mm_setzero_ps (),
1276                (__mmask8__U,
1277                _MM_FROUND_CUR_DIRECTION);
1278}
1279
1280#define _mm_min_round_ss(A, B, R) \
1281  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1282                                          (__v4sf)(__m128)(B), \
1283                                          (__v4sf)_mm_setzero_ps(), \
1284                                          (__mmask8)-1, (int)(R))
1285
1286#define _mm_mask_min_round_ss(W, U, A, B, R) \
1287  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1288                                          (__v4sf)(__m128)(B), \
1289                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1290                                          (int)(R))
1291
1292#define _mm_maskz_min_round_ss(U, A, B, R) \
1293  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1294                                          (__v4sf)(__m128)(B), \
1295                                          (__v4sf)_mm_setzero_ps(), \
1296                                          (__mmask8)(U), (int)(R))
1297
1298static __inline__ __m128d __DEFAULT_FN_ATTRS128
1299_mm_mask_min_sd(__m128d __W__mmask8 __U,__m128d __A__m128d __B) {
1300  return (__m128d__builtin_ia32_minsd_round_mask ((__v2df__A,
1301                (__v2df__B,
1302                (__v2df__W,
1303                (__mmask8__U,
1304                _MM_FROUND_CUR_DIRECTION);
1305}
1306
1307static __inline__ __m128d __DEFAULT_FN_ATTRS128
1308_mm_maskz_min_sd(__mmask8 __U,__m128d __A__m128d __B) {
1309  return (__m128d__builtin_ia32_minsd_round_mask ((__v2df__A,
1310                (__v2df__B,
1311                (__v2df)  _mm_setzero_pd (),
1312                (__mmask8__U,
1313                _MM_FROUND_CUR_DIRECTION);
1314}
1315
1316#define _mm_min_round_sd(A, B, R) \
1317  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1318                                           (__v2df)(__m128d)(B), \
1319                                           (__v2df)_mm_setzero_pd(), \
1320                                           (__mmask8)-1, (int)(R))
1321
1322#define _mm_mask_min_round_sd(W, U, A, B, R) \
1323  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1324                                           (__v2df)(__m128d)(B), \
1325                                           (__v2df)(__m128d)(W), \
1326                                           (__mmask8)(U), (int)(R))
1327
1328#define _mm_maskz_min_round_sd(U, A, B, R) \
1329  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1330                                           (__v2df)(__m128d)(B), \
1331                                           (__v2df)_mm_setzero_pd(), \
1332                                           (__mmask8)(U), (int)(R))
1333
1334static __inline __m512i
1335__DEFAULT_FN_ATTRS512
1336_mm512_min_epi32(__m512i __A__m512i __B)
1337{
1338  return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
1339}
1340
1341static __inline__ __m512i __DEFAULT_FN_ATTRS512
1342_mm512_mask_min_epi32 (__m512i __W__mmask16 __M__m512i __A__m512i __B)
1343{
1344  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1345                                            (__v16si)_mm512_min_epi32(__A__B),
1346                                            (__v16si)__W);
1347}
1348
1349static __inline__ __m512i __DEFAULT_FN_ATTRS512
1350_mm512_maskz_min_epi32 (__mmask16 __M__m512i __A__m512i __B)
1351{
1352  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1353                                            (__v16si)_mm512_min_epi32(__A__B),
1354                                            (__v16si)_mm512_setzero_si512());
1355}
1356
1357static __inline __m512i __DEFAULT_FN_ATTRS512
1358_mm512_min_epu32(__m512i __A__m512i __B)
1359{
1360  return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
1361}
1362
1363static __inline__ __m512i __DEFAULT_FN_ATTRS512
1364_mm512_mask_min_epu32 (__m512i __W__mmask16 __M__m512i __A__m512i __B)
1365{
1366  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1367                                            (__v16si)_mm512_min_epu32(__A__B),
1368                                            (__v16si)__W);
1369}
1370
1371static __inline__ __m512i __DEFAULT_FN_ATTRS512
1372_mm512_maskz_min_epu32 (__mmask16 __M__m512i __A__m512i __B)
1373{
1374  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1375                                            (__v16si)_mm512_min_epu32(__A__B),
1376                                            (__v16si)_mm512_setzero_si512());
1377}
1378
1379static __inline __m512i __DEFAULT_FN_ATTRS512
1380_mm512_min_epi64(__m512i __A__m512i __B)
1381{
1382  return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
1383}
1384
1385static __inline__ __m512i __DEFAULT_FN_ATTRS512
1386_mm512_mask_min_epi64 (__m512i __W__mmask8 __M__m512i __A__m512i __B)
1387{
1388  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1389                                             (__v8di)_mm512_min_epi64(__A__B),
1390                                             (__v8di)__W);
1391}
1392
1393static __inline__ __m512i __DEFAULT_FN_ATTRS512
1394_mm512_maskz_min_epi64 (__mmask8 __M__m512i __A__m512i __B)
1395{
1396  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1397                                             (__v8di)_mm512_min_epi64(__A__B),
1398                                             (__v8di)_mm512_setzero_si512());
1399}
1400
1401static __inline __m512i __DEFAULT_FN_ATTRS512
1402_mm512_min_epu64(__m512i __A__m512i __B)
1403{
1404  return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
1405}
1406
1407static __inline__ __m512i __DEFAULT_FN_ATTRS512
1408_mm512_mask_min_epu64 (__m512i __W__mmask8 __M__m512i __A__m512i __B)
1409{
1410  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1411                                             (__v8di)_mm512_min_epu64(__A__B),
1412                                             (__v8di)__W);
1413}
1414
1415static __inline__ __m512i __DEFAULT_FN_ATTRS512
1416_mm512_maskz_min_epu64 (__mmask8 __M__m512i __A__m512i __B)
1417{
1418  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1419                                             (__v8di)_mm512_min_epu64(__A__B),
1420                                             (__v8di)_mm512_setzero_si512());
1421}
1422
1423static __inline __m512i __DEFAULT_FN_ATTRS512
1424_mm512_mul_epi32(__m512i __X__m512i __Y)
1425{
1426  return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si__Y);
1427}
1428
1429static __inline __m512i __DEFAULT_FN_ATTRS512
1430_mm512_mask_mul_epi32(__m512i __W__mmask8 __M__m512i __X__m512i __Y)
1431{
1432  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1433                                             (__v8di)_mm512_mul_epi32(__X__Y),
1434                                             (__v8di)__W);
1435}
1436
1437static __inline __m512i __DEFAULT_FN_ATTRS512
1438_mm512_maskz_mul_epi32(__mmask8 __M__m512i __X__m512i __Y)
1439{
1440  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1441                                             (__v8di)_mm512_mul_epi32(__X__Y),
1442                                             (__v8di)_mm512_setzero_si512 ());
1443}
1444
1445static __inline __m512i __DEFAULT_FN_ATTRS512
1446_mm512_mul_epu32(__m512i __X__m512i __Y)
1447{
1448  return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1449}
1450
1451static __inline __m512i __DEFAULT_FN_ATTRS512
1452_mm512_mask_mul_epu32(__m512i __W__mmask8 __M__m512i __X__m512i __Y)
1453{
1454  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1455                                             (__v8di)_mm512_mul_epu32(__X__Y),
1456                                             (__v8di)__W);
1457}
1458
1459static __inline __m512i __DEFAULT_FN_ATTRS512
1460_mm512_maskz_mul_epu32(__mmask8 __M__m512i __X__m512i __Y)
1461{
1462  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1463                                             (__v8di)_mm512_mul_epu32(__X__Y),
1464                                             (__v8di)_mm512_setzero_si512 ());
1465}
1466
1467static __inline __m512i __DEFAULT_FN_ATTRS512
1468_mm512_mullo_epi32 (__m512i __A__m512i __B)
1469{
1470  return (__m512i) ((__v16su__A * (__v16su__B);
1471}
1472
1473static __inline __m512i __DEFAULT_FN_ATTRS512
1474_mm512_maskz_mullo_epi32(__mmask16 __M__m512i __A__m512i __B)
1475{
1476  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1477                                             (__v16si)_mm512_mullo_epi32(__A__B),
1478                                             (__v16si)_mm512_setzero_si512());
1479}
1480
1481static __inline __m512i __DEFAULT_FN_ATTRS512
1482_mm512_mask_mullo_epi32(__m512i __W__mmask16 __M__m512i __A__m512i __B)
1483{
1484  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1485                                             (__v16si)_mm512_mullo_epi32(__A__B),
1486                                             (__v16si)__W);
1487}
1488
1489static __inline__ __m512i __DEFAULT_FN_ATTRS512
1490_mm512_mullox_epi64 (__m512i __A__m512i __B) {
1491  return (__m512i) ((__v8du__A * (__v8du__B);
1492}
1493
1494static __inline__ __m512i __DEFAULT_FN_ATTRS512
1495_mm512_mask_mullox_epi64(__m512i __W__mmask8 __U__m512i __A__m512i __B) {
1496  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1497                                             (__v8di)_mm512_mullox_epi64(__A__B),
1498                                             (__v8di)__W);
1499}
1500
1501#define _mm512_sqrt_round_pd(A, R) \
1502  (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))
1503
1504#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1505  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1506                                       (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1507                                       (__v8df)(__m512d)(W))
1508
1509#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1510  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1511                                       (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1512                                       (__v8df)_mm512_setzero_pd())
1513
1514static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1515_mm512_sqrt_pd(__m512d __A)
1516{
1517  return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1518                                           _MM_FROUND_CUR_DIRECTION);
1519}
1520
1521static __inline__ __m512d __DEFAULT_FN_ATTRS512
1522_mm512_mask_sqrt_pd (__m512d __W__mmask8 __U__m512d __A)
1523{
1524  return (__m512d)__builtin_ia32_selectpd_512(__U,
1525                                              (__v8df)_mm512_sqrt_pd(__A),
1526                                              (__v8df)__W);
1527}
1528
1529static __inline__ __m512d __DEFAULT_FN_ATTRS512
1530_mm512_maskz_sqrt_pd (__mmask8 __U__m512d __A)
1531{
1532  return (__m512d)__builtin_ia32_selectpd_512(__U,
1533                                              (__v8df)_mm512_sqrt_pd(__A),
1534                                              (__v8df)_mm512_setzero_pd());
1535}
1536
1537#define _mm512_sqrt_round_ps(A, R) \
1538  (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))
1539
1540#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1541  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1542                                      (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1543                                      (__v16sf)(__m512)(W))
1544
1545#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1546  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1547                                      (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1548                                      (__v16sf)_mm512_setzero_ps())
1549
1550static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1551_mm512_sqrt_ps(__m512 __A)
1552{
1553  return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1554                                          _MM_FROUND_CUR_DIRECTION);
1555}
1556
1557static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1558_mm512_mask_sqrt_ps(__m512 __W__mmask16 __U__m512 __A)
1559{
1560  return (__m512)__builtin_ia32_selectps_512(__U,
1561                                             (__v16sf)_mm512_sqrt_ps(__A),
1562                                             (__v16sf)__W);
1563}
1564
1565static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1566_mm512_maskz_sqrt_ps__mmask16 __U__m512 __A)
1567{
1568  return (__m512)__builtin_ia32_selectps_512(__U,
1569                                             (__v16sf)_mm512_sqrt_ps(__A),
1570                                             (__v16sf)_mm512_setzero_ps());
1571}
1572
1573static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1574_mm512_rsqrt14_pd(__m512d __A)
1575{
1576  return (__m512d__builtin_ia32_rsqrt14pd512_mask ((__v8df__A,
1577                 (__v8df)
1578                 _mm512_setzero_pd (),
1579                 (__mmask8) -1);}
1580
1581static __inline__ __m512d __DEFAULT_FN_ATTRS512
1582_mm512_mask_rsqrt14_pd (__m512d __W__mmask8 __U__m512d __A)
1583{
1584  return (__m512d__builtin_ia32_rsqrt14pd512_mask ((__v8df__A,
1585                  (__v8df__W,
1586                  (__mmask8__U);
1587}
1588
1589static __inline__ __m512d __DEFAULT_FN_ATTRS512
1590_mm512_maskz_rsqrt14_pd (__mmask8 __U__m512d __A)
1591{
1592  return (__m512d__builtin_ia32_rsqrt14pd512_mask ((__v8df__A,
1593                  (__v8df)
1594                  _mm512_setzero_pd (),
1595                  (__mmask8__U);
1596}
1597
1598static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1599_mm512_rsqrt14_ps(__m512 __A)
1600{
1601  return (__m512__builtin_ia32_rsqrt14ps512_mask ((__v16sf__A,
1602                (__v16sf)
1603                _mm512_setzero_ps (),
1604                (__mmask16) -1);
1605}
1606
1607static __inline__ __m512 __DEFAULT_FN_ATTRS512
1608_mm512_mask_rsqrt14_ps (__m512 __W__mmask16 __U__m512 __A)
1609{
1610  return (__m512__builtin_ia32_rsqrt14ps512_mask ((__v16sf__A,
1611                 (__v16sf__W,
1612                 (__mmask16__U);
1613}
1614
1615static __inline__ __m512 __DEFAULT_FN_ATTRS512
1616_mm512_maskz_rsqrt14_ps (__mmask16 __U__m512 __A)
1617{
1618  return (__m512__builtin_ia32_rsqrt14ps512_mask ((__v16sf__A,
1619                 (__v16sf)
1620                 _mm512_setzero_ps (),
1621                 (__mmask16__U);
1622}
1623
1624static  __inline__ __m128 __DEFAULT_FN_ATTRS128
1625_mm_rsqrt14_ss(__m128 __A__m128 __B)
1626{
1627  return (__m128__builtin_ia32_rsqrt14ss_mask ((__v4sf__A,
1628             (__v4sf__B,
1629             (__v4sf)
1630             _mm_setzero_ps (),
1631             (__mmask8) -1);
1632}
1633
1634static __inline__ __m128 __DEFAULT_FN_ATTRS128
1635_mm_mask_rsqrt14_ss (__m128 __W__mmask8 __U__m128 __A__m128 __B)
1636{
1637 return (__m128__builtin_ia32_rsqrt14ss_mask ((__v4sf__A,
1638          (__v4sf__B,
1639          (__v4sf__W,
1640          (__mmask8__U);
1641}
1642
1643static __inline__ __m128 __DEFAULT_FN_ATTRS128
1644_mm_maskz_rsqrt14_ss (__mmask8 __U__m128 __A__m128 __B)
1645{
1646 return (__m128__builtin_ia32_rsqrt14ss_mask ((__v4sf__A,
1647          (__v4sf__B,
1648          (__v4sf_mm_setzero_ps (),
1649          (__mmask8__U);
1650}
1651
1652static  __inline__ __m128d __DEFAULT_FN_ATTRS128
1653_mm_rsqrt14_sd(__m128d __A__m128d __B)
1654{
1655  return (__m128d__builtin_ia32_rsqrt14sd_mask ((__v2df__A,
1656              (__v2df__B,
1657              (__v2df)
1658              _mm_setzero_pd (),
1659              (__mmask8) -1);
1660}
1661
1662static __inline__ __m128d __DEFAULT_FN_ATTRS128
1663_mm_mask_rsqrt14_sd (__m128d __W__mmask8 __U__m128d __A__m128d __B)
1664{
1665 return (__m128d__builtin_ia32_rsqrt14sd_mask ( (__v2df__A,
1666          (__v2df__B,
1667          (__v2df__W,
1668          (__mmask8__U);
1669}
1670
1671static __inline__ __m128d __DEFAULT_FN_ATTRS128
1672_mm_maskz_rsqrt14_sd (__mmask8 __U__m128d __A__m128d __B)
1673{
1674 return (__m128d__builtin_ia32_rsqrt14sd_mask ( (__v2df__A,
1675          (__v2df__B,
1676          (__v2df_mm_setzero_pd (),
1677          (__mmask8__U);
1678}
1679
1680static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1681_mm512_rcp14_pd(__m512d __A)
1682{
1683  return (__m512d__builtin_ia32_rcp14pd512_mask ((__v8df__A,
1684               (__v8df)
1685               _mm512_setzero_pd (),
1686               (__mmask8) -1);
1687}
1688
1689static __inline__ __m512d __DEFAULT_FN_ATTRS512
1690_mm512_mask_rcp14_pd (__m512d __W__mmask8 __U__m512d __A)
1691{
1692  return (__m512d__builtin_ia32_rcp14pd512_mask ((__v8df__A,
1693                (__v8df__W,
1694                (__mmask8__U);
1695}
1696
1697static __inline__ __m512d __DEFAULT_FN_ATTRS512
1698_mm512_maskz_rcp14_pd (__mmask8 __U__m512d __A)
1699{
1700  return (__m512d__builtin_ia32_rcp14pd512_mask ((__v8df__A,
1701                (__v8df)
1702                _mm512_setzero_pd (),
1703                (__mmask8__U);
1704}
1705
1706static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1707_mm512_rcp14_ps(__m512 __A)
1708{
1709  return (__m512__builtin_ia32_rcp14ps512_mask ((__v16sf__A,
1710              (__v16sf)
1711              _mm512_setzero_ps (),
1712              (__mmask16) -1);
1713}
1714
1715static __inline__ __m512 __DEFAULT_FN_ATTRS512
1716_mm512_mask_rcp14_ps (__m512 __W__mmask16 __U__m512 __A)
1717{
1718  return (__m512__builtin_ia32_rcp14ps512_mask ((__v16sf__A,
1719                   (__v16sf__W,
1720                   (__mmask16__U);
1721}
1722
1723static __inline__ __m512 __DEFAULT_FN_ATTRS512
1724_mm512_maskz_rcp14_ps (__mmask16 __U__m512 __A)
1725{
1726  return (__m512__builtin_ia32_rcp14ps512_mask ((__v16sf__A,
1727                   (__v16sf)
1728                   _mm512_setzero_ps (),
1729                   (__mmask16__U);
1730}
1731
1732static  __inline__ __m128 __DEFAULT_FN_ATTRS128
1733_mm_rcp14_ss(__m128 __A__m128 __B)
1734{
1735  return (__m128__builtin_ia32_rcp14ss_mask ((__v4sf__A,
1736                 (__v4sf__B,
1737                 (__v4sf)
1738                 _mm_setzero_ps (),
1739                 (__mmask8) -1);
1740}
1741
1742static __inline__ __m128 __DEFAULT_FN_ATTRS128
1743_mm_mask_rcp14_ss (__m128 __W__mmask8 __U__m128 __A__m128 __B)
1744{
1745 return (__m128__builtin_ia32_rcp14ss_mask ((__v4sf__A,
1746          (__v4sf__B,
1747          (__v4sf__W,
1748          (__mmask8__U);
1749}
1750
1751static __inline__ __m128 __DEFAULT_FN_ATTRS128
1752_mm_maskz_rcp14_ss (__mmask8 __U__m128 __A__m128 __B)
1753{
1754 return (__m128__builtin_ia32_rcp14ss_mask ((__v4sf__A,
1755          (__v4sf__B,
1756          (__v4sf_mm_setzero_ps (),
1757          (__mmask8__U);
1758}
1759
1760static  __inline__ __m128d __DEFAULT_FN_ATTRS128
1761_mm_rcp14_sd(__m128d __A__m128d __B)
1762{
1763  return (__m128d__builtin_ia32_rcp14sd_mask ((__v2df__A,
1764            (__v2df__B,
1765            (__v2df)
1766            _mm_setzero_pd (),
1767            (__mmask8) -1);
1768}
1769
1770static __inline__ __m128d __DEFAULT_FN_ATTRS128
1771_mm_mask_rcp14_sd (__m128d __W__mmask8 __U__m128d __A__m128d __B)
1772{
1773 return (__m128d__builtin_ia32_rcp14sd_mask ( (__v2df__A,
1774          (__v2df__B,
1775          (__v2df__W,
1776          (__mmask8__U);
1777}
1778
1779static __inline__ __m128d __DEFAULT_FN_ATTRS128
1780_mm_maskz_rcp14_sd (__mmask8 __U__m128d __A__m128d __B)
1781{
1782 return (__m128d__builtin_ia32_rcp14sd_mask ( (__v2df__A,
1783          (__v2df__B,
1784          (__v2df_mm_setzero_pd (),
1785          (__mmask8__U);
1786}
1787
1788static __inline __m512 __DEFAULT_FN_ATTRS512
1789_mm512_floor_ps(__m512 __A)
1790{
1791  return (__m512__builtin_ia32_rndscaleps_mask ((__v16sf__A,
1792                                                  _MM_FROUND_FLOOR,
1793                                                  (__v16sf__A, -1,
1794                                                  _MM_FROUND_CUR_DIRECTION);
1795}
1796
1797static __inline__ __m512 __DEFAULT_FN_ATTRS512
1798_mm512_mask_floor_ps (__m512 __W__mmask16 __U__m512 __A)
1799{
1800  return (__m512__builtin_ia32_rndscaleps_mask ((__v16sf__A,
1801                   _MM_FROUND_FLOOR,
1802                   (__v16sf__W__U,
1803                   _MM_FROUND_CUR_DIRECTION);
1804}
1805
1806static __inline __m512d __DEFAULT_FN_ATTRS512
1807_mm512_floor_pd(__m512d __A)
1808{
1809  return (__m512d__builtin_ia32_rndscalepd_mask ((__v8df__A,
1810                                                   _MM_FROUND_FLOOR,
1811                                                   (__v8df__A, -1,
1812                                                   _MM_FROUND_CUR_DIRECTION);
1813}
1814
1815static __inline__ __m512d __DEFAULT_FN_ATTRS512
1816_mm512_mask_floor_pd (__m512d __W__mmask8 __U__m512d __A)
1817{
1818  return (__m512d__builtin_ia32_rndscalepd_mask ((__v8df__A,
1819                _MM_FROUND_FLOOR,
1820                (__v8df__W__U,
1821                _MM_FROUND_CUR_DIRECTION);
1822}
1823
1824static __inline__ __m512 __DEFAULT_FN_ATTRS512
1825_mm512_mask_ceil_ps (__m512 __W__mmask16 __U__m512 __A)
1826{
1827  return (__m512__builtin_ia32_rndscaleps_mask ((__v16sf__A,
1828                   _MM_FROUND_CEIL,
1829                   (__v16sf__W__U,
1830                   _MM_FROUND_CUR_DIRECTION);
1831}
1832
1833static __inline __m512 __DEFAULT_FN_ATTRS512
1834_mm512_ceil_ps(__m512 __A)
1835{
1836  return (__m512__builtin_ia32_rndscaleps_mask ((__v16sf__A,
1837                                                  _MM_FROUND_CEIL,
1838                                                  (__v16sf__A, -1,
1839                                                  _MM_FROUND_CUR_DIRECTION);
1840}
1841
1842static __inline __m512d __DEFAULT_FN_ATTRS512
1843_mm512_ceil_pd(__m512d __A)
1844{
1845  return (__m512d__builtin_ia32_rndscalepd_mask ((__v8df__A,
1846                                                   _MM_FROUND_CEIL,
1847                                                   (__v8df__A, -1,
1848                                                   _MM_FROUND_CUR_DIRECTION);
1849}
1850
1851static __inline__ __m512d __DEFAULT_FN_ATTRS512
1852_mm512_mask_ceil_pd (__m512d __W__mmask8 __U__m512d __A)
1853{
1854  return (__m512d__builtin_ia32_rndscalepd_mask ((__v8df__A,
1855                _MM_FROUND_CEIL,
1856                (__v8df__W__U,
1857                _MM_FROUND_CUR_DIRECTION);
1858}
1859
1860static __inline __m512i __DEFAULT_FN_ATTRS512
1861_mm512_abs_epi64(__m512i __A)
1862{
1863  return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
1864}
1865
1866static __inline__ __m512i __DEFAULT_FN_ATTRS512
1867_mm512_mask_abs_epi64 (__m512i __W__mmask8 __U__m512i __A)
1868{
1869  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1870                                             (__v8di)_mm512_abs_epi64(__A),
1871                                             (__v8di)__W);
1872}
1873
1874static __inline__ __m512i __DEFAULT_FN_ATTRS512
1875_mm512_maskz_abs_epi64 (__mmask8 __U__m512i __A)
1876{
1877  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1878                                             (__v8di)_mm512_abs_epi64(__A),
1879                                             (__v8di)_mm512_setzero_si512());
1880}
1881
1882static __inline __m512i __DEFAULT_FN_ATTRS512
1883_mm512_abs_epi32(__m512i __A)
1884{
1885  return (__m512i)__builtin_ia32_pabsd512((__v16si__A);
1886}
1887
1888static __inline__ __m512i __DEFAULT_FN_ATTRS512
1889_mm512_mask_abs_epi32 (__m512i __W__mmask16 __U__m512i __A)
1890{
1891  return (__m512i)__builtin_ia32_selectd_512(__U,
1892                                             (__v16si)_mm512_abs_epi32(__A),
1893                                             (__v16si)__W);
1894}
1895
1896static __inline__ __m512i __DEFAULT_FN_ATTRS512
1897_mm512_maskz_abs_epi32 (__mmask16 __U__m512i __A)
1898{
1899  return (__m512i)__builtin_ia32_selectd_512(__U,
1900                                             (__v16si)_mm512_abs_epi32(__A),
1901                                             (__v16si)_mm512_setzero_si512());
1902}
1903
1904static __inline__ __m128 __DEFAULT_FN_ATTRS128
1905_mm_mask_add_ss(__m128 __W__mmask8 __U,__m128 __A__m128 __B) {
1906  __A = _mm_add_ss(__A__B);
1907  return __builtin_ia32_selectss_128(__U__A__W);
1908}
1909
1910static __inline__ __m128 __DEFAULT_FN_ATTRS128
1911_mm_maskz_add_ss(__mmask8 __U,__m128 __A__m128 __B) {
1912  __A = _mm_add_ss(__A__B);
1913  return __builtin_ia32_selectss_128(__U__A_mm_setzero_ps());
1914}
1915
1916#define _mm_add_round_ss(A, B, R) \
1917  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1918                                          (__v4sf)(__m128)(B), \
1919                                          (__v4sf)_mm_setzero_ps(), \
1920                                          (__mmask8)-1, (int)(R))
1921
1922#define _mm_mask_add_round_ss(W, U, A, B, R) \
1923  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1924                                          (__v4sf)(__m128)(B), \
1925                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1926                                          (int)(R))
1927
1928#define _mm_maskz_add_round_ss(U, A, B, R) \
1929  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1930                                          (__v4sf)(__m128)(B), \
1931                                          (__v4sf)_mm_setzero_ps(), \
1932                                          (__mmask8)(U), (int)(R))
1933
1934static __inline__ __m128d __DEFAULT_FN_ATTRS128
1935_mm_mask_add_sd(__m128d __W__mmask8 __U,__m128d __A__m128d __B) {
1936  __A = _mm_add_sd(__A__B);
1937  return __builtin_ia32_selectsd_128(__U__A__W);
1938}
1939
1940static __inline__ __m128d __DEFAULT_FN_ATTRS128
1941_mm_maskz_add_sd(__mmask8 __U,__m128d __A__m128d __B) {
1942  __A = _mm_add_sd(__A__B);
1943  return __builtin_ia32_selectsd_128(__U__A_mm_setzero_pd());
1944}
1945#define _mm_add_round_sd(A, B, R) \
1946  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1947                                           (__v2df)(__m128d)(B), \
1948                                           (__v2df)_mm_setzero_pd(), \
1949                                           (__mmask8)-1, (int)(R))
1950
1951#define _mm_mask_add_round_sd(W, U, A, B, R) \
1952  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1953                                           (__v2df)(__m128d)(B), \
1954                                           (__v2df)(__m128d)(W), \
1955                                           (__mmask8)(U), (int)(R))
1956
1957#define _mm_maskz_add_round_sd(U, A, B, R) \
1958  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1959                                           (__v2df)(__m128d)(B), \
1960                                           (__v2df)_mm_setzero_pd(), \
1961                                           (__mmask8)(U), (int)(R))
1962
1963static __inline__ __m512d __DEFAULT_FN_ATTRS512
1964_mm512_mask_add_pd(__m512d __W__mmask8 __U__m512d __A__m512d __B) {
1965  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1966                                              (__v8df)_mm512_add_pd(__A__B),
1967                                              (__v8df)__W);
1968}
1969
1970static __inline__ __m512d __DEFAULT_FN_ATTRS512
1971_mm512_maskz_add_pd(__mmask8 __U__m512d __A__m512d __B) {
1972  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1973                                              (__v8df)_mm512_add_pd(__A__B),
1974                                              (__v8df)_mm512_setzero_pd());
1975}
1976
1977static __inline__ __m512 __DEFAULT_FN_ATTRS512
1978_mm512_mask_add_ps(__m512 __W__mmask16 __U__m512 __A__m512 __B) {
1979  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1980                                             (__v16sf)_mm512_add_ps(__A__B),
1981                                             (__v16sf)__W);
1982}
1983
1984static __inline__ __m512 __DEFAULT_FN_ATTRS512
1985_mm512_maskz_add_ps(__mmask16 __U__m512 __A__m512 __B) {
1986  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1987                                             (__v16sf)_mm512_add_ps(__A__B),
1988                                             (__v16sf)_mm512_setzero_ps());
1989}
1990
1991#define _mm512_add_round_pd(A, B, R) \
1992  (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1993                                   (__v8df)(__m512d)(B), (int)(R))
1994
1995#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1996  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1997                                   (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1998                                   (__v8df)(__m512d)(W));
1999
2000#define _mm512_maskz_add_round_pd(U, A, B, R) \
2001  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2002                                   (__v8df)_mm512_add_round_pd((A), (B), (R)), \
2003                                   (__v8df)_mm512_setzero_pd());
2004
2005#define _mm512_add_round_ps(A, B, R) \
2006  (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
2007                                  (__v16sf)(__m512)(B), (int)(R))
2008
2009#define _mm512_mask_add_round_ps(W, U, A, B, R) \
2010  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2011                                  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2012                                  (__v16sf)(__m512)(W));
2013
2014#define _mm512_maskz_add_round_ps(U, A, B, R) \
2015  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2016                                  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2017                                  (__v16sf)_mm512_setzero_ps());
2018
2019static __inline__ __m128 __DEFAULT_FN_ATTRS128
2020_mm_mask_sub_ss(__m128 __W__mmask8 __U,__m128 __A__m128 __B) {
2021  __A = _mm_sub_ss(__A__B);
2022  return __builtin_ia32_selectss_128(__U__A__W);
2023}
2024
2025static __inline__ __m128 __DEFAULT_FN_ATTRS128
2026_mm_maskz_sub_ss(__mmask8 __U,__m128 __A__m128 __B) {
2027  __A = _mm_sub_ss(__A__B);
2028  return __builtin_ia32_selectss_128(__U__A_mm_setzero_ps());
2029}
2030#define _mm_sub_round_ss(A, B, R) \
2031  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2032                                          (__v4sf)(__m128)(B), \
2033                                          (__v4sf)_mm_setzero_ps(), \
2034                                          (__mmask8)-1, (int)(R))
2035
2036#define _mm_mask_sub_round_ss(W, U, A, B, R) \
2037  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2038                                          (__v4sf)(__m128)(B), \
2039                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2040                                          (int)(R))
2041
2042#define _mm_maskz_sub_round_ss(U, A, B, R) \
2043  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2044                                          (__v4sf)(__m128)(B), \
2045                                          (__v4sf)_mm_setzero_ps(), \
2046                                          (__mmask8)(U), (int)(R))
2047
2048static __inline__ __m128d __DEFAULT_FN_ATTRS128
2049_mm_mask_sub_sd(__m128d __W__mmask8 __U,__m128d __A__m128d __B) {
2050  __A = _mm_sub_sd(__A__B);
2051  return __builtin_ia32_selectsd_128(__U__A__W);
2052}
2053
2054static __inline__ __m128d __DEFAULT_FN_ATTRS128
2055_mm_maskz_sub_sd(__mmask8 __U,__m128d __A__m128d __B) {
2056  __A = _mm_sub_sd(__A__B);
2057  return __builtin_ia32_selectsd_128(__U__A_mm_setzero_pd());
2058}
2059
2060#define _mm_sub_round_sd(A, B, R) \
2061  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2062                                           (__v2df)(__m128d)(B), \
2063                                           (__v2df)_mm_setzero_pd(), \
2064                                           (__mmask8)-1, (int)(R))
2065
2066#define _mm_mask_sub_round_sd(W, U, A, B, R) \
2067  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2068                                           (__v2df)(__m128d)(B), \
2069                                           (__v2df)(__m128d)(W), \
2070                                           (__mmask8)(U), (int)(R))
2071
2072#define _mm_maskz_sub_round_sd(U, A, B, R) \
2073  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2074                                           (__v2df)(__m128d)(B), \
2075                                           (__v2df)_mm_setzero_pd(), \
2076                                           (__mmask8)(U), (int)(R))
2077
2078static __inline__ __m512d __DEFAULT_FN_ATTRS512
2079_mm512_mask_sub_pd(__m512d __W__mmask8 __U__m512d __A__m512d __B) {
2080  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2081                                              (__v8df)_mm512_sub_pd(__A__B),
2082                                              (__v8df)__W);
2083}
2084
2085static __inline__ __m512d __DEFAULT_FN_ATTRS512
2086_mm512_maskz_sub_pd(__mmask8 __U__m512d __A__m512d __B) {
2087  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2088                                              (__v8df)_mm512_sub_pd(__A__B),
2089                                              (__v8df)_mm512_setzero_pd());
2090}
2091
2092static __inline__ __m512 __DEFAULT_FN_ATTRS512
2093_mm512_mask_sub_ps(__m512 __W__mmask16 __U__m512 __A__m512 __B) {
2094  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2095                                             (__v16sf)_mm512_sub_ps(__A__B),
2096                                             (__v16sf)__W);
2097}
2098
2099static __inline__ __m512 __DEFAULT_FN_ATTRS512
2100_mm512_maskz_sub_ps(__mmask16 __U__m512 __A__m512 __B) {
2101  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2102                                             (__v16sf)_mm512_sub_ps(__A__B),
2103                                             (__v16sf)_mm512_setzero_ps());
2104}
2105
2106#define _mm512_sub_round_pd(A, B, R) \
2107  (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2108                                   (__v8df)(__m512d)(B), (int)(R))
2109
2110#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2111  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2112                                   (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2113                                   (__v8df)(__m512d)(W));
2114
2115#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2116  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2117                                   (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2118                                   (__v8df)_mm512_setzero_pd());
2119
2120#define _mm512_sub_round_ps(A, B, R) \
2121  (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2122                                  (__v16sf)(__m512)(B), (int)(R))
2123
2124#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2125  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2126                                  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2127                                  (__v16sf)(__m512)(W));
2128
2129#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2130  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2131                                  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2132                                  (__v16sf)_mm512_setzero_ps());
2133
2134static __inline__ __m128 __DEFAULT_FN_ATTRS128
2135_mm_mask_mul_ss(__m128 __W__mmask8 __U,__m128 __A__m128 __B) {
2136  __A = _mm_mul_ss(__A__B);
2137  return __builtin_ia32_selectss_128(__U__A__W);
2138}
2139
2140static __inline__ __m128 __DEFAULT_FN_ATTRS128
2141_mm_maskz_mul_ss(__mmask8 __U,__m128 __A__m128 __B) {
2142  __A = _mm_mul_ss(__A__B);
2143  return __builtin_ia32_selectss_128(__U__A_mm_setzero_ps());
2144}
2145#define _mm_mul_round_ss(A, B, R) \
2146  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2147                                          (__v4sf)(__m128)(B), \
2148                                          (__v4sf)_mm_setzero_ps(), \
2149                                          (__mmask8)-1, (int)(R))
2150
2151#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2152  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2153                                          (__v4sf)(__m128)(B), \
2154                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2155                                          (int)(R))
2156
2157#define _mm_maskz_mul_round_ss(U, A, B, R) \
2158  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2159                                          (__v4sf)(__m128)(B), \
2160                                          (__v4sf)_mm_setzero_ps(), \
2161                                          (__mmask8)(U), (int)(R))
2162
2163static __inline__ __m128d __DEFAULT_FN_ATTRS128
2164_mm_mask_mul_sd(__m128d __W__mmask8 __U,__m128d __A__m128d __B) {
2165  __A = _mm_mul_sd(__A__B);
2166  return __builtin_ia32_selectsd_128(__U__A__W);
2167}
2168
2169static __inline__ __m128d __DEFAULT_FN_ATTRS128
2170_mm_maskz_mul_sd(__mmask8 __U,__m128d __A__m128d __B) {
2171  __A = _mm_mul_sd(__A__B);
2172  return __builtin_ia32_selectsd_128(__U__A_mm_setzero_pd());
2173}
2174
2175#define _mm_mul_round_sd(A, B, R) \
2176  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2177                                           (__v2df)(__m128d)(B), \
2178                                           (__v2df)_mm_setzero_pd(), \
2179                                           (__mmask8)-1, (int)(R))
2180
2181#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2182  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2183                                           (__v2df)(__m128d)(B), \
2184                                           (__v2df)(__m128d)(W), \
2185                                           (__mmask8)(U), (int)(R))
2186
2187#define _mm_maskz_mul_round_sd(U, A, B, R) \
2188  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2189                                           (__v2df)(__m128d)(B), \
2190                                           (__v2df)_mm_setzero_pd(), \
2191                                           (__mmask8)(U), (int)(R))
2192
2193static __inline__ __m512d __DEFAULT_FN_ATTRS512
2194_mm512_mask_mul_pd(__m512d __W__mmask8 __U__m512d __A__m512d __B) {
2195  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2196                                              (__v8df)_mm512_mul_pd(__A__B),
2197                                              (__v8df)__W);
2198}
2199
2200static __inline__ __m512d __DEFAULT_FN_ATTRS512
2201_mm512_maskz_mul_pd(__mmask8 __U__m512d __A__m512d __B) {
2202  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2203                                              (__v8df)_mm512_mul_pd(__A__B),
2204                                              (__v8df)_mm512_setzero_pd());
2205}
2206
2207static __inline__ __m512 __DEFAULT_FN_ATTRS512
2208_mm512_mask_mul_ps(__m512 __W__mmask16 __U__m512 __A__m512 __B) {
2209  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2210                                             (__v16sf)_mm512_mul_ps(__A__B),
2211                                             (__v16sf)__W);
2212}
2213
2214static __inline__ __m512 __DEFAULT_FN_ATTRS512
2215_mm512_maskz_mul_ps(__mmask16 __U__m512 __A__m512 __B) {
2216  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2217                                             (__v16sf)_mm512_mul_ps(__A__B),
2218                                             (__v16sf)_mm512_setzero_ps());
2219}
2220
2221#define _mm512_mul_round_pd(A, B, R) \
2222  (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2223                                   (__v8df)(__m512d)(B), (int)(R))
2224
2225#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2226  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2227                                   (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2228                                   (__v8df)(__m512d)(W));
2229
2230#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2231  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2232                                   (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2233                                   (__v8df)_mm512_setzero_pd());
2234
2235#define _mm512_mul_round_ps(A, B, R) \
2236  (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2237                                  (__v16sf)(__m512)(B), (int)(R))
2238
2239#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2240  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2241                                  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2242                                  (__v16sf)(__m512)(W));
2243
2244#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2245  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2246                                  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2247                                  (__v16sf)_mm512_setzero_ps());
2248
2249static __inline__ __m128 __DEFAULT_FN_ATTRS128
2250_mm_mask_div_ss(__m128 __W__mmask8 __U,__m128 __A__m128 __B) {
2251  __A = _mm_div_ss(__A__B);
2252  return __builtin_ia32_selectss_128(__U__A__W);
2253}
2254
2255static __inline__ __m128 __DEFAULT_FN_ATTRS128
2256_mm_maskz_div_ss(__mmask8 __U,__m128 __A__m128 __B) {
2257  __A = _mm_div_ss(__A__B);
2258  return __builtin_ia32_selectss_128(__U__A_mm_setzero_ps());
2259}
2260
2261#define _mm_div_round_ss(A, B, R) \
2262  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2263                                          (__v4sf)(__m128)(B), \
2264                                          (__v4sf)_mm_setzero_ps(), \
2265                                          (__mmask8)-1, (int)(R))
2266
2267#define _mm_mask_div_round_ss(W, U, A, B, R) \
2268  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2269                                          (__v4sf)(__m128)(B), \
2270                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2271                                          (int)(R))
2272
2273#define _mm_maskz_div_round_ss(U, A, B, R) \
2274  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2275                                          (__v4sf)(__m128)(B), \
2276                                          (__v4sf)_mm_setzero_ps(), \
2277                                          (__mmask8)(U), (int)(R))
2278
2279static __inline__ __m128d __DEFAULT_FN_ATTRS128
2280_mm_mask_div_sd(__m128d __W__mmask8 __U,__m128d __A__m128d __B) {
2281  __A = _mm_div_sd(__A__B);
2282  return __builtin_ia32_selectsd_128(__U__A__W);
2283}
2284
2285static __inline__ __m128d __DEFAULT_FN_ATTRS128
2286_mm_maskz_div_sd(__mmask8 __U,__m128d __A__m128d __B) {
2287  __A = _mm_div_sd(__A__B);
2288  return __builtin_ia32_selectsd_128(__U__A_mm_setzero_pd());
2289}
2290
2291#define _mm_div_round_sd(A, B, R) \
2292  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2293                                           (__v2df)(__m128d)(B), \
2294                                           (__v2df)_mm_setzero_pd(), \
2295                                           (__mmask8)-1, (int)(R))
2296
2297#define _mm_mask_div_round_sd(W, U, A, B, R) \
2298  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2299                                           (__v2df)(__m128d)(B), \
2300                                           (__v2df)(__m128d)(W), \
2301                                           (__mmask8)(U), (int)(R))
2302
2303#define _mm_maskz_div_round_sd(U, A, B, R) \
2304  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2305                                           (__v2df)(__m128d)(B), \
2306                                           (__v2df)_mm_setzero_pd(), \
2307                                           (__mmask8)(U), (int)(R))
2308
2309static __inline __m512d __DEFAULT_FN_ATTRS512
2310_mm512_div_pd(__m512d __a__m512d __b)
2311{
2312  return (__m512d)((__v8df)__a/(__v8df)__b);
2313}
2314
2315static __inline__ __m512d __DEFAULT_FN_ATTRS512
2316_mm512_mask_div_pd(__m512d __W__mmask8 __U__m512d __A__m512d __B) {
2317  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2318                                              (__v8df)_mm512_div_pd(__A__B),
2319                                              (__v8df)__W);
2320}
2321
2322static __inline__ __m512d __DEFAULT_FN_ATTRS512
2323_mm512_maskz_div_pd(__mmask8 __U__m512d __A__m512d __B) {
2324  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2325                                              (__v8df)_mm512_div_pd(__A__B),
2326                                              (__v8df)_mm512_setzero_pd());
2327}
2328
2329static __inline __m512 __DEFAULT_FN_ATTRS512
2330_mm512_div_ps(__m512 __a__m512 __b)
2331{
2332  return (__m512)((__v16sf)__a/(__v16sf)__b);
2333}
2334
2335static __inline__ __m512 __DEFAULT_FN_ATTRS512
2336_mm512_mask_div_ps(__m512 __W__mmask16 __U__m512 __A__m512 __B) {
2337  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2338                                             (__v16sf)_mm512_div_ps(__A__B),
2339                                             (__v16sf)__W);
2340}
2341
2342static __inline__ __m512 __DEFAULT_FN_ATTRS512
2343_mm512_maskz_div_ps(__mmask16 __U__m512 __A__m512 __B) {
2344  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2345                                             (__v16sf)_mm512_div_ps(__A__B),
2346                                             (__v16sf)_mm512_setzero_ps());
2347}
2348
2349#define _mm512_div_round_pd(A, B, R) \
2350  (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2351                                   (__v8df)(__m512d)(B), (int)(R))
2352
2353#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2354  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2355                                   (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2356                                   (__v8df)(__m512d)(W));
2357
2358#define _mm512_maskz_div_round_pd(U, A, B, R) \
2359  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2360                                   (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2361                                   (__v8df)_mm512_setzero_pd());
2362
2363#define _mm512_div_round_ps(A, B, R) \
2364  (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2365                                  (__v16sf)(__m512)(B), (int)(R))
2366
2367#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2368  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2369                                  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2370                                  (__v16sf)(__m512)(W));
2371
2372#define _mm512_maskz_div_round_ps(U, A, B, R) \
2373  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2374                                  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2375                                  (__v16sf)_mm512_setzero_ps());
2376
2377#define _mm512_roundscale_ps(A, B) \
2378  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2379                                         (__v16sf)_mm512_undefined_ps(), \
2380                                         (__mmask16)-1, \
2381                                         _MM_FROUND_CUR_DIRECTION)
2382
2383#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2384  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2385                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2386                                         _MM_FROUND_CUR_DIRECTION)
2387
2388#define _mm512_maskz_roundscale_ps(A, B, imm) \
2389  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2390                                         (__v16sf)_mm512_setzero_ps(), \
2391                                         (__mmask16)(A), \
2392                                         _MM_FROUND_CUR_DIRECTION)
2393
2394#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2395  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2396                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2397                                         (int)(R))
2398
2399#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2400  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2401                                         (__v16sf)_mm512_setzero_ps(), \
2402                                         (__mmask16)(A), (int)(R))
2403
2404#define _mm512_roundscale_round_ps(A, imm, R) \
2405  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2406                                         (__v16sf)_mm512_undefined_ps(), \
2407                                         (__mmask16)-1, (int)(R))
2408
2409#define _mm512_roundscale_pd(A, B) \
2410  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2411                                          (__v8df)_mm512_undefined_pd(), \
2412                                          (__mmask8)-1, \
2413                                          _MM_FROUND_CUR_DIRECTION)
2414
2415#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2416  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2417                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2418                                          _MM_FROUND_CUR_DIRECTION)
2419
2420#define _mm512_maskz_roundscale_pd(A, B, imm) \
2421  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2422                                          (__v8df)_mm512_setzero_pd(), \
2423                                          (__mmask8)(A), \
2424                                          _MM_FROUND_CUR_DIRECTION)
2425
2426#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2427  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2428                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2429                                          (int)(R))
2430
2431#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2432  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2433                                          (__v8df)_mm512_setzero_pd(), \
2434                                          (__mmask8)(A), (int)(R))
2435
2436#define _mm512_roundscale_round_pd(A, imm, R) \
2437  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2438                                          (__v8df)_mm512_undefined_pd(), \
2439                                          (__mmask8)-1, (int)(R))
2440
2441#define _mm512_fmadd_round_pd(A, B, C, R) \
2442  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2443                                           (__v8df)(__m512d)(B), \
2444                                           (__v8df)(__m512d)(C), \
2445                                           (__mmask8)-1, (int)(R))
2446
2447
2448#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2449  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2450                                           (__v8df)(__m512d)(B), \
2451                                           (__v8df)(__m512d)(C), \
2452                                           (__mmask8)(U), (int)(R))
2453
2454
2455#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2456  (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2457                                            (__v8df)(__m512d)(B), \
2458                                            (__v8df)(__m512d)(C), \
2459                                            (__mmask8)(U), (int)(R))
2460
2461
2462#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2463  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2464                                            (__v8df)(__m512d)(B), \
2465                                            (__v8df)(__m512d)(C), \
2466                                            (__mmask8)(U), (int)(R))
2467
2468
2469#define _mm512_fmsub_round_pd(A, B, C, R) \
2470  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2471                                           (__v8df)(__m512d)(B), \
2472                                           -(__v8df)(__m512d)(C), \
2473                                           (__mmask8)-1, (int)(R))
2474
2475
2476#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2477  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2478                                           (__v8df)(__m512d)(B), \
2479                                           -(__v8df)(__m512d)(C), \
2480                                           (__mmask8)(U), (int)(R))
2481
2482
2483#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2484  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2485                                            (__v8df)(__m512d)(B), \
2486                                            -(__v8df)(__m512d)(C), \
2487                                            (__mmask8)(U), (int)(R))
2488
2489
2490#define _mm512_fnmadd_round_pd(A, B, C, R) \
2491  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2492                                           (__v8df)(__m512d)(B), \
2493                                           (__v8df)(__m512d)(C), \
2494                                           (__mmask8)-1, (int)(R))
2495
2496
2497#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2498  (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2499                                            (__v8df)(__m512d)(B), \
2500                                            (__v8df)(__m512d)(C), \
2501                                            (__mmask8)(U), (int)(R))
2502
2503
2504#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2505  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2506                                            (__v8df)(__m512d)(B), \
2507                                            (__v8df)(__m512d)(C), \
2508                                            (__mmask8)(U), (int)(R))
2509
2510
2511#define _mm512_fnmsub_round_pd(A, B, C, R) \
2512  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2513                                           (__v8df)(__m512d)(B), \
2514                                           -(__v8df)(__m512d)(C), \
2515                                           (__mmask8)-1, (int)(R))
2516
2517
2518#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2519  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2520                                            (__v8df)(__m512d)(B), \
2521                                            -(__v8df)(__m512d)(C), \
2522                                            (__mmask8)(U), (int)(R))
2523
2524
2525static __inline__ __m512d __DEFAULT_FN_ATTRS512
2526_mm512_fmadd_pd(__m512d __A__m512d __B__m512d __C)
2527{
2528  return (__m512d__builtin_ia32_vfmaddpd512_mask ((__v8df__A,
2529                                                    (__v8df__B,
2530                                                    (__v8df__C,
2531                                                    (__mmask8) -1,
2532                                                    _MM_FROUND_CUR_DIRECTION);
2533}
2534
2535static __inline__ __m512d __DEFAULT_FN_ATTRS512
2536_mm512_mask_fmadd_pd(__m512d __A__mmask8 __U__m512d __B__m512d __C)
2537{
2538  return (__m512d__builtin_ia32_vfmaddpd512_mask ((__v8df__A,
2539                                                    (__v8df__B,
2540                                                    (__v8df__C,
2541                                                    (__mmask8__U,
2542                                                    _MM_FROUND_CUR_DIRECTION);
2543}
2544
2545static __inline__ __m512d __DEFAULT_FN_ATTRS512
2546_mm512_mask3_fmadd_pd(__m512d __A__m512d __B__m512d __C__mmask8 __U)
2547{
2548  return (__m512d__builtin_ia32_vfmaddpd512_mask3 ((__v8df__A,
2549                                                     (__v8df__B,
2550                                                     (__v8df__C,
2551                                                     (__mmask8__U,
2552                                                     _MM_FROUND_CUR_DIRECTION);
2553}
2554
2555static __inline__ __m512d __DEFAULT_FN_ATTRS512
2556_mm512_maskz_fmadd_pd(__mmask8 __U__m512d __A__m512d __B__m512d __C)
2557{
2558  return (__m512d__builtin_ia32_vfmaddpd512_maskz ((__v8df__A,
2559                                                     (__v8df__B,
2560                                                     (__v8df__C,
2561                                                     (__mmask8__U,
2562                                                     _MM_FROUND_CUR_DIRECTION);
2563}
2564
2565static __inline__ __m512d __DEFAULT_FN_ATTRS512
2566_mm512_fmsub_pd(__m512d __A__m512d __B__m512d __C)
2567{
2568  return (__m512d__builtin_ia32_vfmaddpd512_mask ((__v8df__A,
2569                                                    (__v8df__B,
2570                                                    -(__v8df__C,
2571                                                    (__mmask8) -1,
2572                                                    _MM_FROUND_CUR_DIRECTION);
2573}
2574
2575static __inline__ __m512d __DEFAULT_FN_ATTRS512
2576_mm512_mask_fmsub_pd(__m512d __A__mmask8 __U__m512d __B__m512d __C)
2577{
2578  return (__m512d__builtin_ia32_vfmaddpd512_mask ((__v8df__A,
2579                                                    (__v8df__B,
2580                                                    -(__v8df__C,
2581                                                    (__mmask8__U,
2582                                                    _MM_FROUND_CUR_DIRECTION);
2583}
2584
2585static __inline__ __m512d __DEFAULT_FN_ATTRS512
2586_mm512_maskz_fmsub_pd(__mmask8 __U__m512d __A__m512d __B__m512d __C)
2587{
2588  return (__m512d__builtin_ia32_vfmaddpd512_maskz ((__v8df__A,
2589                                                     (__v8df__B,
2590                                                     -(__v8df__C,
2591                                                     (__mmask8__U,
2592                                                     _MM_FROUND_CUR_DIRECTION);
2593}
2594
2595static __inline__ __m512d __DEFAULT_FN_ATTRS512
2596_mm512_fnmadd_pd(__m512d __A__m512d __B__m512d __C)
2597{
2598  return (__m512d__builtin_ia32_vfmaddpd512_mask ((__v8df__A,
2599                                                    -(__v8df__B,
2600                                                    (__v8df__C,
2601                                                    (__mmask8) -1,
2602                                                    _MM_FROUND_CUR_DIRECTION);
2603}
2604
2605static __inline__ __m512d __DEFAULT_FN_ATTRS512
2606_mm512_mask3_fnmadd_pd(__m512d __A__m512d __B__m512d __C__mmask8 __U)
2607{
2608  return (__m512d__builtin_ia32_vfmaddpd512_mask3 (-(__v8df__A,
2609                                                     (__v8df__B,
2610                                                     (__v8df__C,
2611                                                     (__mmask8__U,
2612                                                     _MM_FROUND_CUR_DIRECTION);
2613}
2614
2615static __inline__ __m512d __DEFAULT_FN_ATTRS512
2616_mm512_maskz_fnmadd_pd(__mmask8 __U__m512d __A__m512d __B__m512d __C)
2617{
2618  return (__m512d__builtin_ia32_vfmaddpd512_maskz (-(__v8df__A,
2619                                                     (__v8df__B,
2620                                                     (__v8df__C,
2621                                                     (__mmask8__U,
2622                                                     _MM_FROUND_CUR_DIRECTION);
2623}
2624
2625static __inline__ __m512d __DEFAULT_FN_ATTRS512
2626_mm512_fnmsub_pd(__m512d __A__m512d __B__m512d __C)
2627{
2628  return (__m512d__builtin_ia32_vfmaddpd512_mask ((__v8df__A,
2629                                                    -(__v8df__B,
2630                                                    -(__v8df__C,
2631                                                    (__mmask8) -1,
2632                                                    _MM_FROUND_CUR_DIRECTION);
2633}
2634
2635static __inline__ __m512d __DEFAULT_FN_ATTRS512
2636_mm512_maskz_fnmsub_pd(__mmask8 __U__m512d __A__m512d __B__m512d __C)
2637{
2638  return (__m512d__builtin_ia32_vfmaddpd512_maskz (-(__v8df__A,
2639                                                     (__v8df__B,
2640                                                     -(__v8df__C,
2641                                                     (__mmask8__U,
2642                                                     _MM_FROUND_CUR_DIRECTION);
2643}
2644
2645#define _mm512_fmadd_round_ps(A, B, C, R) \
2646  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2647                                          (__v16sf)(__m512)(B), \
2648                                          (__v16sf)(__m512)(C), \
2649                                          (__mmask16)-1, (int)(R))
2650
2651
2652#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2653  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2654                                          (__v16sf)(__m512)(B), \
2655                                          (__v16sf)(__m512)(C), \
2656                                          (__mmask16)(U), (int)(R))
2657
2658
2659#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2660  (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2661                                           (__v16sf)(__m512)(B), \
2662                                           (__v16sf)(__m512)(C), \
2663                                           (__mmask16)(U), (int)(R))
2664
2665
2666#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2667  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2668                                           (__v16sf)(__m512)(B), \
2669                                           (__v16sf)(__m512)(C), \
2670                                           (__mmask16)(U), (int)(R))
2671
2672
2673#define _mm512_fmsub_round_ps(A, B, C, R) \
2674  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2675                                          (__v16sf)(__m512)(B), \
2676                                          -(__v16sf)(__m512)(C), \
2677                                          (__mmask16)-1, (int)(R))
2678
2679
2680#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2681  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2682                                          (__v16sf)(__m512)(B), \
2683                                          -(__v16sf)(__m512)(C), \
2684                                          (__mmask16)(U), (int)(R))
2685
2686
2687#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2688  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2689                                           (__v16sf)(__m512)(B), \
2690                                           -(__v16sf)(__m512)(C), \
2691                                           (__mmask16)(U), (int)(R))
2692
2693
2694#define _mm512_fnmadd_round_ps(A, B, C, R) \
2695  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2696                                          -(__v16sf)(__m512)(B), \
2697                                          (__v16sf)(__m512)(C), \
2698                                          (__mmask16)-1, (int)(R))
2699
2700
2701#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2702  (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2703                                           (__v16sf)(__m512)(B), \
2704                                           (__v16sf)(__m512)(C), \
2705                                           (__mmask16)(U), (int)(R))
2706
2707
2708#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2709  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2710                                           (__v16sf)(__m512)(B), \
2711                                           (__v16sf)(__m512)(C), \
2712                                           (__mmask16)(U), (int)(R))
2713
2714
2715#define _mm512_fnmsub_round_ps(A, B, C, R) \
2716  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2717                                          -(__v16sf)(__m512)(B), \
2718                                          -(__v16sf)(__m512)(C), \
2719                                          (__mmask16)-1, (int)(R))
2720
2721
2722#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2723  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2724                                           (__v16sf)(__m512)(B), \
2725                                           -(__v16sf)(__m512)(C), \
2726                                           (__mmask16)(U), (int)(R))
2727
2728
2729static __inline__ __m512 __DEFAULT_FN_ATTRS512
2730_mm512_fmadd_ps(__m512 __A__m512 __B__m512 __C)
2731{
2732  return (__m512__builtin_ia32_vfmaddps512_mask ((__v16sf__A,
2733                                                   (__v16sf__B,
2734                                                   (__v16sf__C,
2735                                                   (__mmask16) -1,
2736                                                   _MM_FROUND_CUR_DIRECTION);
2737}
2738
2739static __inline__ __m512 __DEFAULT_FN_ATTRS512
2740_mm512_mask_fmadd_ps(__m512 __A__mmask16 __U__m512 __B__m512 __C)
2741{
2742  return (__m512__builtin_ia32_vfmaddps512_mask ((__v16sf__A,
2743                                                   (__v16sf__B,
2744                                                   (__v16sf__C,
2745                                                   (__mmask16__U,
2746                                                   _MM_FROUND_CUR_DIRECTION);
2747}
2748
2749static __inline__ __m512 __DEFAULT_FN_ATTRS512
2750_mm512_mask3_fmadd_ps(__m512 __A__m512 __B__m512 __C__mmask16 __U)
2751{
2752  return (__m512__builtin_ia32_vfmaddps512_mask3 ((__v16sf__A,
2753                                                    (__v16sf__B,
2754                                                    (__v16sf__C,
2755                                                    (__mmask16__U,
2756                                                    _MM_FROUND_CUR_DIRECTION);
2757}
2758
2759static __inline__ __m512 __DEFAULT_FN_ATTRS512
2760_mm512_maskz_fmadd_ps(__mmask16 __U__m512 __A__m512 __B__m512 __C)
2761{
2762  return (__m512__builtin_ia32_vfmaddps512_maskz ((__v16sf__A,
2763                                                    (__v16sf__B,
2764                                                    (__v16sf__C,
2765                                                    (__mmask16__U,
2766                                                    _MM_FROUND_CUR_DIRECTION);
2767}
2768
2769static __inline__ __m512 __DEFAULT_FN_ATTRS512
2770_mm512_fmsub_ps(__m512 __A__m512 __B__m512 __C)
2771{
2772  return (__m512__builtin_ia32_vfmaddps512_mask ((__v16sf__A,
2773                                                   (__v16sf__B,
2774                                                   -(__v16sf__C,
2775                                                   (__mmask16) -1,
2776                                                   _MM_FROUND_CUR_DIRECTION);
2777}
2778
2779static __inline__ __m512 __DEFAULT_FN_ATTRS512
2780_mm512_mask_fmsub_ps(__m512 __A__mmask16 __U__m512 __B__m512 __C)
2781{
2782  return (__m512__builtin_ia32_vfmaddps512_mask ((__v16sf__A,
2783                                                   (__v16sf__B,
2784                                                   -(__v16sf__C,
2785                                                   (__mmask16__U,
2786                                                   _MM_FROUND_CUR_DIRECTION);
2787}
2788
2789static __inline__ __m512 __DEFAULT_FN_ATTRS512
2790_mm512_maskz_fmsub_ps(__mmask16 __U__m512 __A__m512 __B__m512 __C)
2791{
2792  return (__m512__builtin_ia32_vfmaddps512_maskz ((__v16sf__A,
2793                                                    (__v16sf__B,
2794                                                    -(__v16sf__C,
2795                                                    (__mmask16__U,
2796                                                    _MM_FROUND_CUR_DIRECTION);
2797}
2798
2799static __inline__ __m512 __DEFAULT_FN_ATTRS512
2800_mm512_fnmadd_ps(__m512 __A__m512 __B__m512 __C)
2801{
2802  return (__m512__builtin_ia32_vfmaddps512_mask ((__v16sf__A,
2803                                                   -(__v16sf__B,
2804                                                   (__v16sf__C,
2805                                                   (__mmask16) -1,
2806                                                   _MM_FROUND_CUR_DIRECTION);
2807}
2808
2809static __inline__ __m512 __DEFAULT_FN_ATTRS512
2810_mm512_mask3_fnmadd_ps(__m512 __A__m512 __B__m512 __C__mmask16 __U)
2811{
2812  return (__m512__builtin_ia32_vfmaddps512_mask3 (-(__v16sf__A,
2813                                                    (__v16sf__B,
2814                                                    (__v16sf__C,
2815                                                    (__mmask16__U,
2816                                                    _MM_FROUND_CUR_DIRECTION);
2817}
2818
2819static __inline__ __m512 __DEFAULT_FN_ATTRS512
2820_mm512_maskz_fnmadd_ps(__mmask16 __U__m512 __A__m512 __B__m512 __C)
2821{
2822  return (__m512__builtin_ia32_vfmaddps512_maskz (-(__v16sf__A,
2823                                                    (__v16sf__B,
2824                                                    (__v16sf__C,
2825                                                    (__mmask16__U,
2826                                                    _MM_FROUND_CUR_DIRECTION);
2827}
2828
2829static __inline__ __m512 __DEFAULT_FN_ATTRS512
2830_mm512_fnmsub_ps(__m512 __A__m512 __B__m512 __C)
2831{
2832  return (__m512__builtin_ia32_vfmaddps512_mask ((__v16sf__A,
2833                                                   -(__v16sf__B,
2834                                                   -(__v16sf__C,
2835                                                   (__mmask16) -1,
2836                                                   _MM_FROUND_CUR_DIRECTION);
2837}
2838
2839static __inline__ __m512 __DEFAULT_FN_ATTRS512
2840_mm512_maskz_fnmsub_ps(__mmask16 __U__m512 __A__m512 __B__m512 __C)
2841{
2842  return (__m512__builtin_ia32_vfmaddps512_maskz (-(__v16sf__A,
2843                                                    (__v16sf__B,
2844                                                    -(__v16sf__C,
2845                                                    (__mmask16__U,
2846                                                    _MM_FROUND_CUR_DIRECTION);
2847}
2848
2849#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2850  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2851                                              (__v8df)(__m512d)(B), \
2852                                              (__v8df)(__m512d)(C), \
2853                                              (__mmask8)-1, (int)(R))
2854
2855
2856#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2857  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2858                                              (__v8df)(__m512d)(B), \
2859                                              (__v8df)(__m512d)(C), \
2860                                              (__mmask8)(U), (int)(R))
2861
2862
2863#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2864  (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2865                                               (__v8df)(__m512d)(B), \
2866                                               (__v8df)(__m512d)(C), \
2867                                               (__mmask8)(U), (int)(R))
2868
2869
2870#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2871  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2872                                               (__v8df)(__m512d)(B), \
2873                                               (__v8df)(__m512d)(C), \
2874                                               (__mmask8)(U), (int)(R))
2875
2876
2877#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2878  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2879                                              (__v8df)(__m512d)(B), \
2880                                              -(__v8df)(__m512d)(C), \
2881                                              (__mmask8)-1, (int)(R))
2882
2883
2884#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2885  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2886                                              (__v8df)(__m512d)(B), \
2887                                              -(__v8df)(__m512d)(C), \
2888                                              (__mmask8)(U), (int)(R))
2889
2890
2891#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2892  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2893                                               (__v8df)(__m512d)(B), \
2894                                               -(__v8df)(__m512d)(C), \
2895                                               (__mmask8)(U), (int)(R))
2896
2897
2898static __inline__ __m512d __DEFAULT_FN_ATTRS512
2899_mm512_fmaddsub_pd(__m512d __A__m512d __B__m512d __C)
2900{
2901  return (__m512d__builtin_ia32_vfmaddsubpd512_mask ((__v8df__A,
2902                                                      (__v8df__B,
2903                                                      (__v8df__C,
2904                                                      (__mmask8) -1,
2905                                                      _MM_FROUND_CUR_DIRECTION);
2906}
2907
2908static __inline__ __m512d __DEFAULT_FN_ATTRS512
2909_mm512_mask_fmaddsub_pd(__m512d __A__mmask8 __U__m512d __B__m512d __C)
2910{
2911  return (__m512d__builtin_ia32_vfmaddsubpd512_mask ((__v8df__A,
2912                                                      (__v8df__B,
2913                                                      (__v8df__C,
2914                                                      (__mmask8__U,
2915                                                      _MM_FROUND_CUR_DIRECTION);
2916}
2917
2918static __inline__ __m512d __DEFAULT_FN_ATTRS512
2919_mm512_mask3_fmaddsub_pd(__m512d __A__m512d __B__m512d __C__mmask8 __U)
2920{
2921  return (__m512d__builtin_ia32_vfmaddsubpd512_mask3 ((__v8df__A,
2922                                                       (__v8df__B,
2923                                                       (__v8df__C,
2924                                                       (__mmask8__U,
2925                                                       _MM_FROUND_CUR_DIRECTION);
2926}
2927
2928static __inline__ __m512d __DEFAULT_FN_ATTRS512
2929_mm512_maskz_fmaddsub_pd(__mmask8 __U__m512d __A__m512d __B__m512d __C)
2930{
2931  return (__m512d__builtin_ia32_vfmaddsubpd512_maskz ((__v8df__A,
2932                                                       (__v8df__B,
2933                                                       (__v8df__C,
2934                                                       (__mmask8__U,
2935                                                       _MM_FROUND_CUR_DIRECTION);
2936}
2937
2938static __inline__ __m512d __DEFAULT_FN_ATTRS512
2939_mm512_fmsubadd_pd(__m512d __A__m512d __B__m512d __C)
2940{
2941  return (__m512d__builtin_ia32_vfmaddsubpd512_mask ((__v8df__A,
2942                                                       (__v8df__B,
2943                                                       -(__v8df__C,
2944                                                       (__mmask8) -1,
2945                                                       _MM_FROUND_CUR_DIRECTION);
2946}
2947
2948static __inline__ __m512d __DEFAULT_FN_ATTRS512
2949_mm512_mask_fmsubadd_pd(__m512d __A__mmask8 __U__m512d __B__m512d __C)
2950{
2951  return (__m512d__builtin_ia32_vfmaddsubpd512_mask ((__v8df__A,
2952                                                       (__v8df__B,
2953                                                       -(__v8df__C,
2954                                                       (__mmask8__U,
2955                                                       _MM_FROUND_CUR_DIRECTION);
2956}
2957
2958static __inline__ __m512d __DEFAULT_FN_ATTRS512
2959_mm512_maskz_fmsubadd_pd(__mmask8 __U__m512d __A__m512d __B__m512d __C)
2960{
2961  return (__m512d__builtin_ia32_vfmaddsubpd512_maskz ((__v8df__A,
2962                                                        (__v8df__B,
2963                                                        -(__v8df__C,
2964                                                        (__mmask8__U,
2965                                                        _MM_FROUND_CUR_DIRECTION);
2966}
2967
2968#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2969  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2970                                             (__v16sf)(__m512)(B), \
2971                                             (__v16sf)(__m512)(C), \
2972                                             (__mmask16)-1, (int)(R))
2973
2974
2975#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2976  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2977                                             (__v16sf)(__m512)(B), \
2978                                             (__v16sf)(__m512)(C), \
2979                                             (__mmask16)(U), (int)(R))
2980
2981
2982#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2983  (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2984                                              (__v16sf)(__m512)(B), \
2985                                              (__v16sf)(__m512)(C), \
2986                                              (__mmask16)(U), (int)(R))
2987
2988
2989#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2990  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2991                                              (__v16sf)(__m512)(B), \
2992                                              (__v16sf)(__m512)(C), \
2993                                              (__mmask16)(U), (int)(R))
2994
2995
2996#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2997  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2998                                             (__v16sf)(__m512)(B), \
2999                                             -(__v16sf)(__m512)(C), \
3000                                             (__mmask16)-1, (int)(R))
3001
3002
3003#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3004  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3005                                             (__v16sf)(__m512)(B), \
3006                                             -(__v16sf)(__m512)(C), \
3007                                             (__mmask16)(U), (int)(R))
3008
3009
3010#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3011  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3012                                              (__v16sf)(__m512)(B), \
3013                                              -(__v16sf)(__m512)(C), \
3014                                              (__mmask16)(U), (int)(R))
3015
3016
3017static __inline__ __m512 __DEFAULT_FN_ATTRS512
3018_mm512_fmaddsub_ps(__m512 __A__m512 __B__m512 __C)
3019{
3020  return (__m512__builtin_ia32_vfmaddsubps512_mask ((__v16sf__A,
3021                                                      (__v16sf__B,
3022                                                      (__v16sf__C,
3023                                                      (__mmask16) -1,
3024                                                      _MM_FROUND_CUR_DIRECTION);
3025}
3026
3027static __inline__ __m512 __DEFAULT_FN_ATTRS512
3028_mm512_mask_fmaddsub_ps(__m512 __A__mmask16 __U__m512 __B__m512 __C)
3029{
3030  return (__m512__builtin_ia32_vfmaddsubps512_mask ((__v16sf__A,
3031                                                      (__v16sf__B,
3032                                                      (__v16sf__C,
3033                                                      (__mmask16__U,
3034                                                      _MM_FROUND_CUR_DIRECTION);
3035}
3036
3037static __inline__ __m512 __DEFAULT_FN_ATTRS512
3038_mm512_mask3_fmaddsub_ps(__m512 __A__m512 __B__m512 __C__mmask16 __U)
3039{
3040  return (__m512__builtin_ia32_vfmaddsubps512_mask3 ((__v16sf__A,
3041                                                       (__v16sf__B,
3042                                                       (__v16sf__C,
3043                                                       (__mmask16__U,
3044                                                       _MM_FROUND_CUR_DIRECTION);
3045}
3046
3047static __inline__ __m512 __DEFAULT_FN_ATTRS512
3048_mm512_maskz_fmaddsub_ps(__mmask16 __U__m512 __A__m512 __B__m512 __C)
3049{
3050  return (__m512__builtin_ia32_vfmaddsubps512_maskz ((__v16sf__A,
3051                                                       (__v16sf__B,
3052                                                       (__v16sf__C,
3053                                                       (__mmask16__U,
3054                                                       _MM_FROUND_CUR_DIRECTION);
3055}
3056
3057static __inline__ __m512 __DEFAULT_FN_ATTRS512
3058_mm512_fmsubadd_ps(__m512 __A__m512 __B__m512 __C)
3059{
3060  return (__m512__builtin_ia32_vfmaddsubps512_mask ((__v16sf__A,
3061                                                      (__v16sf__B,
3062                                                      -(__v16sf__C,
3063                                                      (__mmask16) -1,
3064                                                      _MM_FROUND_CUR_DIRECTION);
3065}
3066
3067static __inline__ __m512 __DEFAULT_FN_ATTRS512
3068_mm512_mask_fmsubadd_ps(__m512 __A__mmask16 __U__m512 __B__m512 __C)
3069{
3070  return (__m512__builtin_ia32_vfmaddsubps512_mask ((__v16sf__A,
3071                                                      (__v16sf__B,
3072                                                      -(__v16sf__C,
3073                                                      (__mmask16__U,
3074                                                      _MM_FROUND_CUR_DIRECTION);
3075}
3076
3077static __inline__ __m512 __DEFAULT_FN_ATTRS512
3078_mm512_maskz_fmsubadd_ps(__mmask16 __U__m512 __A__m512 __B__m512 __C)
3079{
3080  return (__m512__builtin_ia32_vfmaddsubps512_maskz ((__v16sf__A,
3081                                                       (__v16sf__B,
3082                                                       -(__v16sf__C,
3083                                                       (__mmask16__U,
3084                                                       _MM_FROUND_CUR_DIRECTION);
3085}
3086
3087#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3088  (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3089                                            (__v8df)(__m512d)(B), \
3090                                            (__v8df)(__m512d)(C), \
3091                                            (__mmask8)(U), (int)(R))
3092
3093
3094static __inline__ __m512d __DEFAULT_FN_ATTRS512
3095_mm512_mask3_fmsub_pd(__m512d __A__m512d __B__m512d __C__mmask8 __U)
3096{
3097  return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df__A,
3098                                                    (__v8df__B,
3099                                                    (__v8df__C,
3100                                                    (__mmask8__U,
3101                                                    _MM_FROUND_CUR_DIRECTION);
3102}
3103
3104#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3105  (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3106                                           (__v16sf)(__m512)(B), \
3107                                           (__v16sf)(__m512)(C), \
3108                                           (__mmask16)(U), (int)(R))
3109
3110static __inline__ __m512 __DEFAULT_FN_ATTRS512
3111_mm512_mask3_fmsub_ps(__m512 __A__m512 __B__m512 __C__mmask16 __U)
3112{
3113  return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf__A,
3114                                                   (__v16sf__B,
3115                                                   (__v16sf__C,
3116                                                   (__mmask16__U,
3117                                                   _MM_FROUND_CUR_DIRECTION);
3118}
3119
3120#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3121  (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3122                                               (__v8df)(__m512d)(B), \
3123                                               (__v8df)(__m512d)(C), \
3124                                               (__mmask8)(U), (int)(R))
3125
3126
3127static __inline__ __m512d __DEFAULT_FN_ATTRS512
3128_mm512_mask3_fmsubadd_pd(__m512d __A__m512d __B__m512d __C__mmask8 __U)
3129{
3130  return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df__A,
3131                                                       (__v8df__B,
3132                                                       (__v8df__C,
3133                                                       (__mmask8__U,
3134                                                       _MM_FROUND_CUR_DIRECTION);
3135}
3136
3137#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3138  (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3139                                              (__v16sf)(__m512)(B), \
3140                                              (__v16sf)(__m512)(C), \
3141                                              (__mmask16)(U), (int)(R))
3142
3143
3144static __inline__ __m512 __DEFAULT_FN_ATTRS512
3145_mm512_mask3_fmsubadd_ps(__m512 __A__m512 __B__m512 __C__mmask16 __U)
3146{
3147  return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf__A,
3148                                                      (__v16sf__B,
3149                                                      (__v16sf__C,
3150                                                      (__mmask16__U,
3151                                                      _MM_FROUND_CUR_DIRECTION);
3152}
3153
3154#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3155  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3156                                           -(__v8df)(__m512d)(B), \
3157                                           (__v8df)(__m512d)(C), \
3158                                           (__mmask8)(U), (int)(R))
3159
3160
3161static __inline__ __m512d __DEFAULT_FN_ATTRS512
3162_mm512_mask_fnmadd_pd(__m512d __A__mmask8 __U__m512d __B__m512d __C)
3163{
3164  return (__m512d__builtin_ia32_vfmaddpd512_mask ((__v8df__A,
3165                                                    -(__v8df__B,
3166                                                    (__v8df__C,
3167                                                    (__mmask8__U,
3168                                                    _MM_FROUND_CUR_DIRECTION);
3169}
3170
3171#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3172  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3173                                          -(__v16sf)(__m512)(B), \
3174                                          (__v16sf)(__m512)(C), \
3175                                          (__mmask16)(U), (int)(R))
3176
3177
3178static __inline__ __m512 __DEFAULT_FN_ATTRS512
3179_mm512_mask_fnmadd_ps(__m512 __A__mmask16 __U__m512 __B__m512 __C)
3180{
3181  return (__m512__builtin_ia32_vfmaddps512_mask ((__v16sf__A,
3182                                                   -(__v16sf__B,
3183                                                   (__v16sf__C,
3184                                                   (__mmask16__U,
3185                                                   _MM_FROUND_CUR_DIRECTION);
3186}
3187
3188#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3189  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3190                                           -(__v8df)(__m512d)(B), \
3191                                           -(__v8df)(__m512d)(C), \
3192                                           (__mmask8)(U), (int)(R))
3193
3194
3195#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3196  (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3197                                            (__v8df)(__m512d)(B), \
3198                                            (__v8df)(__m512d)(C), \
3199                                            (__mmask8)(U), (int)(R))
3200
3201
3202static __inline__ __m512d __DEFAULT_FN_ATTRS512
3203_mm512_mask_fnmsub_pd(__m512d __A__mmask8 __U__m512d __B__m512d __C)
3204{
3205  return (__m512d__builtin_ia32_vfmaddpd512_mask ((__v8df__A,
3206                                                    -(__v8df__B,
3207                                                    -(__v8df__C,
3208                                                    (__mmask8__U,
3209                                                    _MM_FROUND_CUR_DIRECTION);
3210}
3211
3212static __inline__ __m512d __DEFAULT_FN_ATTRS512
3213_mm512_mask3_fnmsub_pd(__m512d __A__m512d __B__m512d __C__mmask8 __U)
3214{
3215  return (__m512d__builtin_ia32_vfmsubpd512_mask3 (-(__v8df__A,
3216                                                     (__v8df__B,
3217                                                     (__v8df__C,
3218                                                     (__mmask8__U,
3219                                                     _MM_FROUND_CUR_DIRECTION);
3220}
3221
3222#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3223  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3224                                          -(__v16sf)(__m512)(B), \
3225                                          -(__v16sf)(__m512)(C), \
3226                                          (__mmask16)(U), (int)(R))
3227
3228
3229#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3230  (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3231                                           (__v16sf)(__m512)(B), \
3232                                           (__v16sf)(__m512)(C), \
3233                                           (__mmask16)(U), (int)(R))
3234
3235
3236static __inline__ __m512 __DEFAULT_FN_ATTRS512
3237_mm512_mask_fnmsub_ps(__m512 __A__mmask16 __U__m512 __B__m512 __C)
3238{
3239  return (__m512__builtin_ia32_vfmaddps512_mask ((__v16sf__A,
3240                                                   -(__v16sf__B,
3241                                                   -(__v16sf__C,
3242                                                   (__mmask16__U,
3243                                                   _MM_FROUND_CUR_DIRECTION);
3244}
3245
3246static __inline__ __m512 __DEFAULT_FN_ATTRS512
3247_mm512_mask3_fnmsub_ps(__m512 __A__m512 __B__m512 __C__mmask16 __U)
3248{
3249  return (__m512__builtin_ia32_vfmsubps512_mask3 (-(__v16sf__A,
3250                                                    (__v16sf__B,
3251                                                    (__v16sf__C,
3252                                                    (__mmask16__U,
3253                                                    _MM_FROUND_CUR_DIRECTION);
3254}
3255
3256
3257
3258/* Vector permutations */
3259
3260static __inline __m512i __DEFAULT_FN_ATTRS512
3261_mm512_permutex2var_epi32(__m512i __A__m512i __I__m512i __B)
3262{
3263  return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si__I,
3264                                                (__v16si__B);
3265}
3266
3267static __inline__ __m512i __DEFAULT_FN_ATTRS512
3268_mm512_mask_permutex2var_epi32(__m512i __A__mmask16 __U__m512i __I,
3269                               __m512i __B)
3270{
3271  return (__m512i)__builtin_ia32_selectd_512(__U,
3272                              (__v16si)_mm512_permutex2var_epi32(__A__I__B),
3273                              (__v16si)__A);
3274}
3275
3276static __inline__ __m512i __DEFAULT_FN_ATTRS512
3277_mm512_mask2_permutex2var_epi32(__m512i __A__m512i __I__mmask16 __U,
3278                                __m512i __B)
3279{
3280  return (__m512i)__builtin_ia32_selectd_512(__U,
3281                              (__v16si)_mm512_permutex2var_epi32(__A__I__B),
3282                              (__v16si)__I);
3283}
3284
3285static __inline__ __m512i __DEFAULT_FN_ATTRS512
3286_mm512_maskz_permutex2var_epi32(__mmask16 __U__m512i __A__m512i __I,
3287                                __m512i __B)
3288{
3289  return (__m512i)__builtin_ia32_selectd_512(__U,
3290                              (__v16si)_mm512_permutex2var_epi32(__A__I__B),
3291                              (__v16si)_mm512_setzero_si512());
3292}
3293
3294static __inline __m512i __DEFAULT_FN_ATTRS512
3295_mm512_permutex2var_epi64(__m512i __A__m512i __I__m512i __B)
3296{
3297  return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di__I,
3298                                                (__v8di__B);
3299}
3300
3301static __inline__ __m512i __DEFAULT_FN_ATTRS512
3302_mm512_mask_permutex2var_epi64(__m512i __A__mmask8 __U__m512i __I,
3303                               __m512i __B)
3304{
3305  return (__m512i)__builtin_ia32_selectq_512(__U,
3306                               (__v8di)_mm512_permutex2var_epi64(__A__I__B),
3307                               (__v8di)__A);
3308}
3309
3310static __inline__ __m512i __DEFAULT_FN_ATTRS512
3311_mm512_mask2_permutex2var_epi64(__m512i __A__m512i __I__mmask8 __U,
3312                                __m512i __B)
3313{
3314  return (__m512i)__builtin_ia32_selectq_512(__U,
3315                               (__v8di)_mm512_permutex2var_epi64(__A__I__B),
3316                               (__v8di)__I);
3317}
3318
3319static __inline__ __m512i __DEFAULT_FN_ATTRS512
3320_mm512_maskz_permutex2var_epi64(__mmask8 __U__m512i __A__m512i __I,
3321                                __m512i __B)
3322{
3323  return (__m512i)__builtin_ia32_selectq_512(__U,
3324                               (__v8di)_mm512_permutex2var_epi64(__A__I__B),
3325                               (__v8di)_mm512_setzero_si512());
3326}
3327
3328#define _mm512_alignr_epi64(A, B, I) \
3329  (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3330                                    (__v8di)(__m512i)(B), (int)(I))
3331
3332#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3333  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3334                                 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3335                                 (__v8di)(__m512i)(W))
3336
3337#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3338  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3339                                 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3340                                 (__v8di)_mm512_setzero_si512())
3341
3342#define _mm512_alignr_epi32(A, B, I) \
3343  (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3344                                    (__v16si)(__m512i)(B), (int)(I))
3345
3346#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3347  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3348                                (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3349                                (__v16si)(__m512i)(W))
3350
3351#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3352  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3353                                (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3354                                (__v16si)_mm512_setzero_si512())
3355/* Vector Extract */
3356
3357#define _mm512_extractf64x4_pd(A, I) \
3358  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3359                                            (__v4df)_mm256_undefined_pd(), \
3360                                            (__mmask8)-1)
3361
3362#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3363  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3364                                            (__v4df)(__m256d)(W), \
3365                                            (__mmask8)(U))
3366
3367#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3368  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3369                                            (__v4df)_mm256_setzero_pd(), \
3370                                            (__mmask8)(U))
3371
3372#define _mm512_extractf32x4_ps(A, I) \
3373  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3374                                           (__v4sf)_mm_undefined_ps(), \
3375                                           (__mmask8)-1)
3376
3377#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3378  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3379                                           (__v4sf)(__m128)(W), \
3380                                           (__mmask8)(U))
3381
3382#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3383  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3384                                           (__v4sf)_mm_setzero_ps(), \
3385                                           (__mmask8)(U))
3386
3387/* Vector Blend */
3388
3389static __inline __m512d __DEFAULT_FN_ATTRS512
3390_mm512_mask_blend_pd(__mmask8 __U__m512d __A__m512d __W)
3391{
3392  return (__m512d__builtin_ia32_selectpd_512 ((__mmask8__U,
3393                 (__v8df__W,
3394                 (__v8df__A);
3395}
3396
3397static __inline __m512 __DEFAULT_FN_ATTRS512
3398_mm512_mask_blend_ps(__mmask16 __U__m512 __A__m512 __W)
3399{
3400  return (__m512__builtin_ia32_selectps_512 ((__mmask16__U,
3401                (__v16sf__W,
3402                (__v16sf__A);
3403}
3404
3405static __inline __m512i __DEFAULT_FN_ATTRS512
3406_mm512_mask_blend_epi64(__mmask8 __U__m512i __A__m512i __W)
3407{
3408  return (__m512i__builtin_ia32_selectq_512 ((__mmask8__U,
3409                (__v8di__W,
3410                (__v8di__A);
3411}
3412
3413static __inline __m512i __DEFAULT_FN_ATTRS512
3414_mm512_mask_blend_epi32(__mmask16 __U__m512i __A__m512i __W)
3415{
3416  return (__m512i__builtin_ia32_selectd_512 ((__mmask16__U,
3417                (__v16si__W,
3418                (__v16si__A);
3419}
3420
3421/* Compare */
3422
3423#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3424  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3425                                          (__v16sf)(__m512)(B), (int)(P), \
3426                                          (__mmask16)-1, (int)(R))
3427
3428#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3429  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3430                                          (__v16sf)(__m512)(B), (int)(P), \
3431                                          (__mmask16)(U), (int)(R))
3432
3433#define _mm512_cmp_ps_mask(A, B, P) \
3434  _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3435#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3436  _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3437
3438#define _mm512_cmpeq_ps_mask(A, B) \
3439    _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3440#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3441    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3442
3443#define _mm512_cmplt_ps_mask(A, B) \
3444    _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3445#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3446    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3447
3448#define _mm512_cmple_ps_mask(A, B) \
3449    _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3450#define _mm512_mask_cmple_ps_mask(k, A, B) \
3451    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3452
3453#define _mm512_cmpunord_ps_mask(A, B) \
3454    _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3455#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3456    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3457
3458#define _mm512_cmpneq_ps_mask(A, B) \
3459    _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3460#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3461    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3462
3463#define _mm512_cmpnlt_ps_mask(A, B) \
3464    _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3465#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3466    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3467
3468#define _mm512_cmpnle_ps_mask(A, B) \
3469    _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3470#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3471    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3472
3473#define _mm512_cmpord_ps_mask(A, B) \
3474    _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3475#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3476    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3477
3478#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3479  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3480                                         (__v8df)(__m512d)(B), (int)(P), \
3481                                         (__mmask8)-1, (int)(R))
3482
3483#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3484  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3485                                         (__v8df)(__m512d)(B), (int)(P), \
3486                                         (__mmask8)(U), (int)(R))
3487
3488#define _mm512_cmp_pd_mask(A, B, P) \
3489  _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3490#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3491  _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3492
3493#define _mm512_cmpeq_pd_mask(A, B) \
3494    _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3495#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3496    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3497
3498#define _mm512_cmplt_pd_mask(A, B) \
3499    _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3500#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3501    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3502
3503#define _mm512_cmple_pd_mask(A, B) \
3504    _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3505#define _mm512_mask_cmple_pd_mask(k, A, B) \
3506    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3507
3508#define _mm512_cmpunord_pd_mask(A, B) \
3509    _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3510#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3511    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3512
3513#define _mm512_cmpneq_pd_mask(A, B) \
3514    _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3515#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3516    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3517
3518#define _mm512_cmpnlt_pd_mask(A, B) \
3519    _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3520#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3521    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3522
3523#define _mm512_cmpnle_pd_mask(A, B) \
3524    _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3525#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3526    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3527
3528#define _mm512_cmpord_pd_mask(A, B) \
3529    _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3530#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3531    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3532
3533/* Conversion */
3534
3535#define _mm512_cvtt_roundps_epu32(A, R) \
3536  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3537                                             (__v16si)_mm512_undefined_epi32(), \
3538                                             (__mmask16)-1, (int)(R))
3539
3540#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3541  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3542                                             (__v16si)(__m512i)(W), \
3543                                             (__mmask16)(U), (int)(R))
3544
3545#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3546  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3547                                             (__v16si)_mm512_setzero_si512(), \
3548                                             (__mmask16)(U), (int)(R))
3549
3550
3551static __inline __m512i __DEFAULT_FN_ATTRS512
3552_mm512_cvttps_epu32(__m512 __A)
3553{
3554  return (__m512i__builtin_ia32_cvttps2udq512_mask ((__v16sf__A,
3555                  (__v16si)
3556                  _mm512_setzero_si512 (),
3557                  (__mmask16) -1,
3558                  _MM_FROUND_CUR_DIRECTION);
3559}
3560
3561static __inline__ __m512i __DEFAULT_FN_ATTRS512
3562_mm512_mask_cvttps_epu32 (__m512i __W__mmask16 __U__m512 __A)
3563{
3564  return (__m512i__builtin_ia32_cvttps2udq512_mask ((__v16sf__A,
3565                   (__v16si__W,
3566                   (__mmask16__U,
3567                   _MM_FROUND_CUR_DIRECTION);
3568}
3569
3570static __inline__ __m512i __DEFAULT_FN_ATTRS512
3571_mm512_maskz_cvttps_epu32 (__mmask16 __U__m512 __A)
3572{
3573  return (__m512i__builtin_ia32_cvttps2udq512_mask ((__v16sf__A,
3574                   (__v16si_mm512_setzero_si512 (),
3575                   (__mmask16__U,
3576                   _MM_FROUND_CUR_DIRECTION);
3577}
3578
3579#define _mm512_cvt_roundepi32_ps(A, R) \
3580  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3581                                          (__v16sf)_mm512_setzero_ps(), \
3582                                          (__mmask16)-1, (int)(R))
3583
3584#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3585  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3586                                          (__v16sf)(__m512)(W), \
3587                                          (__mmask16)(U), (int)(R))
3588
3589#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3590  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3591                                          (__v16sf)_mm512_setzero_ps(), \
3592                                          (__mmask16)(U), (int)(R))
3593
3594#define _mm512_cvt_roundepu32_ps(A, R) \
3595  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3596                                           (__v16sf)_mm512_setzero_ps(), \
3597                                           (__mmask16)-1, (int)(R))
3598
3599#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3600  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3601                                           (__v16sf)(__m512)(W), \
3602                                           (__mmask16)(U), (int)(R))
3603
3604#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3605  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3606                                           (__v16sf)_mm512_setzero_ps(), \
3607                                           (__mmask16)(U), (int)(R))
3608
3609static __inline__ __m512 __DEFAULT_FN_ATTRS512
3610_mm512_cvtepu32_ps (__m512i __A)
3611{
3612  return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3613}
3614
3615static __inline__ __m512 __DEFAULT_FN_ATTRS512
3616_mm512_mask_cvtepu32_ps (__m512 __W__mmask16 __U__m512i __A)
3617{
3618  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3619                                             (__v16sf)_mm512_cvtepu32_ps(__A),
3620                                             (__v16sf)__W);
3621}
3622
3623static __inline__ __m512 __DEFAULT_FN_ATTRS512
3624_mm512_maskz_cvtepu32_ps (__mmask16 __U__m512i __A)
3625{
3626  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3627                                             (__v16sf)_mm512_cvtepu32_ps(__A),
3628                                             (__v16sf)_mm512_setzero_ps());
3629}
3630
3631static __inline __m512d __DEFAULT_FN_ATTRS512
3632_mm512_cvtepi32_pd(__m256i __A)
3633{
3634  return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3635}
3636
3637static __inline__ __m512d __DEFAULT_FN_ATTRS512
3638_mm512_mask_cvtepi32_pd (__m512d __W__mmask8 __U__m256i __A)
3639{
3640  return (__m512d)__builtin_ia32_selectpd_512((__mmask8__U,
3641                                              (__v8df)_mm512_cvtepi32_pd(__A),
3642                                              (__v8df)__W);
3643}
3644
3645static __inline__ __m512d __DEFAULT_FN_ATTRS512
3646_mm512_maskz_cvtepi32_pd (__mmask8 __U__m256i __A)
3647{
3648  return (__m512d)__builtin_ia32_selectpd_512((__mmask8__U,
3649                                              (__v8df)_mm512_cvtepi32_pd(__A),
3650                                              (__v8df)_mm512_setzero_pd());
3651}
3652
3653static __inline__ __m512d __DEFAULT_FN_ATTRS512
3654_mm512_cvtepi32lo_pd(__m512i __A)
3655{
3656  return (__m512d_mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3657}
3658
3659static __inline__ __m512d __DEFAULT_FN_ATTRS512
3660_mm512_mask_cvtepi32lo_pd(__m512d __W__mmask8 __U,__m512i __A)
3661{
3662  return (__m512d_mm512_mask_cvtepi32_pd(__W__U_mm512_castsi512_si256(__A));
3663}
3664
3665static __inline__ __m512 __DEFAULT_FN_ATTRS512
3666_mm512_cvtepi32_ps (__m512i __A)
3667{
3668  return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3669}
3670
3671static __inline__ __m512 __DEFAULT_FN_ATTRS512
3672_mm512_mask_cvtepi32_ps (__m512 __W__mmask16 __U__m512i __A)
3673{
3674  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3675                                             (__v16sf)_mm512_cvtepi32_ps(__A),
3676                                             (__v16sf)__W);
3677}
3678
3679static __inline__ __m512 __DEFAULT_FN_ATTRS512
3680_mm512_maskz_cvtepi32_ps (__mmask16 __U__m512i __A)
3681{
3682  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3683                                             (__v16sf)_mm512_cvtepi32_ps(__A),
3684                                             (__v16sf)_mm512_setzero_ps());
3685}
3686
3687static __inline __m512d __DEFAULT_FN_ATTRS512
3688_mm512_cvtepu32_pd(__m256i __A)
3689{
3690  return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3691}
3692
3693static __inline__ __m512d __DEFAULT_FN_ATTRS512
3694_mm512_mask_cvtepu32_pd (__m512d __W__mmask8 __U__m256i __A)
3695{
3696  return (__m512d)__builtin_ia32_selectpd_512((__mmask8__U,
3697                                              (__v8df)_mm512_cvtepu32_pd(__A),
3698                                              (__v8df)__W);
3699}
3700
3701static __inline__ __m512d __DEFAULT_FN_ATTRS512
3702_mm512_maskz_cvtepu32_pd (__mmask8 __U__m256i __A)
3703{
3704  return (__m512d)__builtin_ia32_selectpd_512((__mmask8__U,
3705                                              (__v8df)_mm512_cvtepu32_pd(__A),
3706                                              (__v8df)_mm512_setzero_pd());
3707}
3708
3709static __inline__ __m512d __DEFAULT_FN_ATTRS512
3710_mm512_cvtepu32lo_pd(__m512i __A)
3711{
3712  return (__m512d_mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3713}
3714
3715static __inline__ __m512d __DEFAULT_FN_ATTRS512
3716_mm512_mask_cvtepu32lo_pd(__m512d __W__mmask8 __U,__m512i __A)
3717{
3718  return (__m512d_mm512_mask_cvtepu32_pd(__W__U_mm512_castsi512_si256(__A));
3719}
3720
3721#define _mm512_cvt_roundpd_ps(A, R) \
3722  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3723                                          (__v8sf)_mm256_setzero_ps(), \
3724                                          (__mmask8)-1, (int)(R))
3725
3726#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3727  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3728                                          (__v8sf)(__m256)(W), (__mmask8)(U), \
3729                                          (int)(R))
3730
3731#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3732  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3733                                          (__v8sf)_mm256_setzero_ps(), \
3734                                          (__mmask8)(U), (int)(R))
3735
3736static __inline__ __m256 __DEFAULT_FN_ATTRS512
3737_mm512_cvtpd_ps (__m512d __A)
3738{
3739  return (__m256__builtin_ia32_cvtpd2ps512_mask ((__v8df__A,
3740                (__v8sf_mm256_undefined_ps (),
3741                (__mmask8) -1,
3742                _MM_FROUND_CUR_DIRECTION);
3743}
3744
3745static __inline__ __m256 __DEFAULT_FN_ATTRS512
3746_mm512_mask_cvtpd_ps (__m256 __W__mmask8 __U__m512d __A)
3747{
3748  return (__m256__builtin_ia32_cvtpd2ps512_mask ((__v8df__A,
3749                (__v8sf__W,
3750                (__mmask8__U,
3751                _MM_FROUND_CUR_DIRECTION);
3752}
3753
3754static __inline__ __m256 __DEFAULT_FN_ATTRS512
3755_mm512_maskz_cvtpd_ps (__mmask8 __U__m512d __A)
3756{
3757  return (__m256__builtin_ia32_cvtpd2ps512_mask ((__v8df__A,
3758                (__v8sf_mm256_setzero_ps (),
3759                (__mmask8__U,
3760                _MM_FROUND_CUR_DIRECTION);
3761}
3762
3763static __inline__ __m512 __DEFAULT_FN_ATTRS512
3764_mm512_cvtpd_pslo (__m512d __A)
3765{
3766  return (__m512) __builtin_shufflevector((__v8sf_mm512_cvtpd_ps(__A),
3767                (__v8sf_mm256_setzero_ps (),
3768                0123456789101112131415);
3769}
3770
3771static __inline__ __m512 __DEFAULT_FN_ATTRS512
3772_mm512_mask_cvtpd_pslo (__m512 __W__mmask8 __U,__m512d __A)
3773{
3774  return (__m512) __builtin_shufflevector (
3775                (__v8sf_mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3776                                               __U__A),
3777                (__v8sf_mm256_setzero_ps (),
3778                0123456789101112131415);
3779}
3780
3781#define _mm512_cvt_roundps_ph(A, I) \
3782  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3783                                            (__v16hi)_mm256_undefined_si256(), \
3784                                            (__mmask16)-1)
3785
3786#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3787  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3788                                            (__v16hi)(__m256i)(U), \
3789                                            (__mmask16)(W))
3790
3791#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3792  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3793                                            (__v16hi)_mm256_setzero_si256(), \
3794                                            (__mmask16)(W))
3795
3796#define _mm512_cvtps_ph(A, I) \
3797  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3798                                            (__v16hi)_mm256_setzero_si256(), \
3799                                            (__mmask16)-1)
3800
3801#define _mm512_mask_cvtps_ph(U, W, A, I) \
3802  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3803                                            (__v16hi)(__m256i)(U), \
3804                                            (__mmask16)(W))
3805
3806#define _mm512_maskz_cvtps_ph(W, A, I) \
3807  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3808                                            (__v16hi)_mm256_setzero_si256(), \
3809                                            (__mmask16)(W))
3810
3811#define _mm512_cvt_roundph_ps(A, R) \
3812  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3813                                           (__v16sf)_mm512_undefined_ps(), \
3814                                           (__mmask16)-1, (int)(R))
3815
3816#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3817  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3818                                           (__v16sf)(__m512)(W), \
3819                                           (__mmask16)(U), (int)(R))
3820
3821#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3822  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3823                                           (__v16sf)_mm512_setzero_ps(), \
3824                                           (__mmask16)(U), (int)(R))
3825
3826
3827static  __inline __m512 __DEFAULT_FN_ATTRS512
3828_mm512_cvtph_ps(__m256i __A)
3829{
3830  return (__m512__builtin_ia32_vcvtph2ps512_mask ((__v16hi__A,
3831                (__v16sf)
3832                _mm512_setzero_ps (),
3833                (__mmask16) -1,
3834                _MM_FROUND_CUR_DIRECTION);
3835}
3836
3837static __inline__ __m512 __DEFAULT_FN_ATTRS512
3838_mm512_mask_cvtph_ps (__m512 __W__mmask16 __U__m256i __A)
3839{
3840  return (__m512__builtin_ia32_vcvtph2ps512_mask ((__v16hi__A,
3841                 (__v16sf__W,
3842                 (__mmask16__U,
3843                 _MM_FROUND_CUR_DIRECTION);
3844}
3845
3846static __inline__ __m512 __DEFAULT_FN_ATTRS512
3847_mm512_maskz_cvtph_ps (__mmask16 __U__m256i __A)
3848{
3849  return (__m512__builtin_ia32_vcvtph2ps512_mask ((__v16hi__A,
3850                 (__v16sf_mm512_setzero_ps (),
3851                 (__mmask16__U,
3852                 _MM_FROUND_CUR_DIRECTION);
3853}
3854
3855#define _mm512_cvtt_roundpd_epi32(A, R) \
3856  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3857                                            (__v8si)_mm256_setzero_si256(), \
3858                                            (__mmask8)-1, (int)(R))
3859
3860#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3861  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3862                                            (__v8si)(__m256i)(W), \
3863                                            (__mmask8)(U), (int)(R))
3864
3865#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3866  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3867                                            (__v8si)_mm256_setzero_si256(), \
3868                                            (__mmask8)(U), (int)(R))
3869
3870static __inline __m256i __DEFAULT_FN_ATTRS512
3871_mm512_cvttpd_epi32(__m512d __a)
3872{
3873  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df__a,
3874                                                   (__v8si)_mm256_setzero_si256(),
3875                                                   (__mmask8) -1,
3876                                                    _MM_FROUND_CUR_DIRECTION);
3877}
3878
3879static __inline__ __m256i __DEFAULT_FN_ATTRS512
3880_mm512_mask_cvttpd_epi32 (__m256i __W__mmask8 __U__m512d __A)
3881{
3882  return (__m256i__builtin_ia32_cvttpd2dq512_mask ((__v8df__A,
3883                  (__v8si__W,
3884                  (__mmask8__U,
3885                  _MM_FROUND_CUR_DIRECTION);
3886}
3887
3888static __inline__ __m256i __DEFAULT_FN_ATTRS512
3889_mm512_maskz_cvttpd_epi32 (__mmask8 __U__m512d __A)
3890{
3891  return (__m256i__builtin_ia32_cvttpd2dq512_mask ((__v8df__A,
3892                  (__v8si_mm256_setzero_si256 (),
3893                  (__mmask8__U,
3894                  _MM_FROUND_CUR_DIRECTION);
3895}
3896
3897#define _mm512_cvtt_roundps_epi32(A, R) \
3898  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3899                                            (__v16si)_mm512_setzero_si512(), \
3900                                            (__mmask16)-1, (int)(R))
3901
3902#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3903  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3904                                            (__v16si)(__m512i)(W), \
3905                                            (__mmask16)(U), (int)(R))
3906
3907#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3908  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3909                                            (__v16si)_mm512_setzero_si512(), \
3910                                            (__mmask16)(U), (int)(R))
3911
3912static __inline __m512i __DEFAULT_FN_ATTRS512
3913_mm512_cvttps_epi32(__m512 __a)
3914{
3915  return (__m512i)
3916    __builtin_ia32_cvttps2dq512_mask((__v16sf__a,
3917                                     (__v16si_mm512_setzero_si512 (),
3918                                     (__mmask16) -1_MM_FROUND_CUR_DIRECTION);
3919}
3920
3921static __inline__ __m512i __DEFAULT_FN_ATTRS512
3922_mm512_mask_cvttps_epi32 (__m512i __W__mmask16 __U__m512 __A)
3923{
3924  return (__m512i__builtin_ia32_cvttps2dq512_mask ((__v16sf__A,
3925                  (__v16si__W,
3926                  (__mmask16__U,
3927                  _MM_FROUND_CUR_DIRECTION);
3928}
3929
3930static __inline__ __m512i __DEFAULT_FN_ATTRS512
3931_mm512_maskz_cvttps_epi32 (__mmask16 __U__m512 __A)
3932{
3933  return (__m512i__builtin_ia32_cvttps2dq512_mask ((__v16sf__A,
3934                  (__v16si_mm512_setzero_si512 (),
3935                  (__mmask16__U,
3936                  _MM_FROUND_CUR_DIRECTION);
3937}
3938
3939#define _mm512_cvt_roundps_epi32(A, R) \
3940  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3941                                           (__v16si)_mm512_setzero_si512(), \
3942                                           (__mmask16)-1, (int)(R))
3943
3944#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3945  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3946                                           (__v16si)(__m512i)(W), \
3947                                           (__mmask16)(U), (int)(R))
3948
3949#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3950  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3951                                           (__v16si)_mm512_setzero_si512(), \
3952                                           (__mmask16)(U), (int)(R))
3953
3954static __inline__ __m512i __DEFAULT_FN_ATTRS512
3955_mm512_cvtps_epi32 (__m512 __A)
3956{
3957  return (__m512i__builtin_ia32_cvtps2dq512_mask ((__v16sf__A,
3958                 (__v16si_mm512_undefined_epi32 (),
3959                 (__mmask16) -1,
3960                 _MM_FROUND_CUR_DIRECTION);
3961}
3962
3963static __inline__ __m512i __DEFAULT_FN_ATTRS512
3964_mm512_mask_cvtps_epi32 (__m512i __W__mmask16 __U__m512 __A)
3965{
3966  return (__m512i__builtin_ia32_cvtps2dq512_mask ((__v16sf__A,
3967                 (__v16si__W,
3968                 (__mmask16__U,
3969                 _MM_FROUND_CUR_DIRECTION);
3970}
3971
3972static __inline__ __m512i __DEFAULT_FN_ATTRS512
3973_mm512_maskz_cvtps_epi32 (__mmask16 __U__m512 __A)
3974{
3975  return (__m512i__builtin_ia32_cvtps2dq512_mask ((__v16sf__A,
3976                 (__v16si)
3977                 _mm512_setzero_si512 (),
3978                 (__mmask16__U,
3979                 _MM_FROUND_CUR_DIRECTION);
3980}
3981
3982#define _mm512_cvt_roundpd_epi32(A, R) \
3983  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3984                                           (__v8si)_mm256_setzero_si256(), \
3985                                           (__mmask8)-1, (int)(R))
3986
3987#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3988  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3989                                           (__v8si)(__m256i)(W), \
3990                                           (__mmask8)(U), (int)(R))
3991
3992#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3993  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3994                                           (__v8si)_mm256_setzero_si256(), \
3995                                           (__mmask8)(U), (int)(R))
3996
3997static __inline__ __m256i __DEFAULT_FN_ATTRS512
3998_mm512_cvtpd_epi32 (__m512d __A)
3999{
4000  return (__m256i__builtin_ia32_cvtpd2dq512_mask ((__v8df__A,
4001                 (__v8si)
4002                 _mm256_undefined_si256 (),
4003                 (__mmask8) -1,
4004                 _MM_FROUND_CUR_DIRECTION);
4005}
4006
4007static __inline__ __m256i __DEFAULT_FN_ATTRS512
4008_mm512_mask_cvtpd_epi32 (__m256i __W__mmask8 __U__m512d __A)
4009{
4010  return (__m256i__builtin_ia32_cvtpd2dq512_mask ((__v8df__A,
4011                 (__v8si__W,
4012                 (__mmask8__U,
4013                 _MM_FROUND_CUR_DIRECTION);
4014}
4015
4016static __inline__ __m256i __DEFAULT_FN_ATTRS512
4017_mm512_maskz_cvtpd_epi32 (__mmask8 __U__m512d __A)
4018{
4019  return (__m256i__builtin_ia32_cvtpd2dq512_mask ((__v8df__A,
4020                 (__v8si)
4021                 _mm256_setzero_si256 (),
4022                 (__mmask8__U,
4023                 _MM_FROUND_CUR_DIRECTION);
4024}
4025
4026#define _mm512_cvt_roundps_epu32(A, R) \
4027  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4028                                            (__v16si)_mm512_setzero_si512(), \
4029                                            (__mmask16)-1, (int)(R))
4030
4031#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4032  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4033                                            (__v16si)(__m512i)(W), \
4034                                            (__mmask16)(U), (int)(R))
4035
4036#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4037  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4038                                            (__v16si)_mm512_setzero_si512(), \
4039                                            (__mmask16)(U), (int)(R))
4040
4041static __inline__ __m512i __DEFAULT_FN_ATTRS512
4042_mm512_cvtps_epu32 ( __m512 __A)
4043{
4044  return (__m512i__builtin_ia32_cvtps2udq512_mask ((__v16sf__A,\
4045                  (__v16si)\
4046                  _mm512_undefined_epi32 (),
4047                  (__mmask16) -1,\
4048                  _MM_FROUND_CUR_DIRECTION);
4049}
4050
4051static __inline__ __m512i __DEFAULT_FN_ATTRS512
4052_mm512_mask_cvtps_epu32 (__m512i __W__mmask16 __U__m512 __A)
4053{
4054  return (__m512i__builtin_ia32_cvtps2udq512_mask ((__v16sf__A,
4055                  (__v16si__W,
4056                  (__mmask16__U,
4057                  _MM_FROUND_CUR_DIRECTION);
4058}
4059
4060static __inline__ __m512i __DEFAULT_FN_ATTRS512
4061_mm512_maskz_cvtps_epu32 ( __mmask16 __U__m512 __A)
4062{
4063  return (__m512i__builtin_ia32_cvtps2udq512_mask ((__v16sf__A,
4064                  (__v16si)
4065                  _mm512_setzero_si512 (),
4066                  (__mmask16__U ,
4067                  _MM_FROUND_CUR_DIRECTION);
4068}
4069
4070#define _mm512_cvt_roundpd_epu32(A, R) \
4071  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4072                                            (__v8si)_mm256_setzero_si256(), \
4073                                            (__mmask8)-1, (int)(R))
4074
4075#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4076  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4077                                            (__v8si)(__m256i)(W), \
4078                                            (__mmask8)(U), (int)(R))
4079
4080#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4081  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4082                                            (__v8si)_mm256_setzero_si256(), \
4083                                            (__mmask8)(U), (int)(R))
4084
4085static __inline__ __m256i __DEFAULT_FN_ATTRS512
4086_mm512_cvtpd_epu32 (__m512d __A)
4087{
4088  return (__m256i__builtin_ia32_cvtpd2udq512_mask ((__v8df__A,
4089                  (__v8si)
4090                  _mm256_undefined_si256 (),
4091                  (__mmask8) -1,
4092                  _MM_FROUND_CUR_DIRECTION);
4093}
4094
4095static __inline__ __m256i __DEFAULT_FN_ATTRS512
4096_mm512_mask_cvtpd_epu32 (__m256i __W__mmask8 __U__m512d __A)
4097{
4098  return (__m256i__builtin_ia32_cvtpd2udq512_mask ((__v8df__A,
4099                  (__v8si__W,
4100                  (__mmask8__U,
4101                  _MM_FROUND_CUR_DIRECTION);
4102}
4103
4104static __inline__ __m256i __DEFAULT_FN_ATTRS512
4105_mm512_maskz_cvtpd_epu32 (__mmask8 __U__m512d __A)
4106{
4107  return (__m256i__builtin_ia32_cvtpd2udq512_mask ((__v8df__A,
4108                  (__v8si)
4109                  _mm256_setzero_si256 (),
4110                  (__mmask8__U,
4111                  _MM_FROUND_CUR_DIRECTION);
4112}
4113
4114static __inline__ double __DEFAULT_FN_ATTRS512
4115_mm512_cvtsd_f64(__m512d __a)
4116{
4117  return __a[0];
4118}
4119
4120static __inline__ float __DEFAULT_FN_ATTRS512
4121_mm512_cvtss_f32(__m512 __a)
4122{
4123  return __a[0];
4124}
4125
4126/* Unpack and Interleave */
4127
4128static __inline __m512d __DEFAULT_FN_ATTRS512
4129_mm512_unpackhi_pd(__m512d __a__m512d __b)
4130{
4131  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4132                                          191+29+21+49+41+69+6);
4133}
4134
4135static __inline__ __m512d __DEFAULT_FN_ATTRS512
4136_mm512_mask_unpackhi_pd(__m512d __W__mmask8 __U__m512d __A__m512d __B)
4137{
4138  return (__m512d)__builtin_ia32_selectpd_512((__mmask8__U,
4139                                           (__v8df)_mm512_unpackhi_pd(__A__B),
4140                                           (__v8df)__W);
4141}
4142
4143static __inline__ __m512d __DEFAULT_FN_ATTRS512
4144_mm512_maskz_unpackhi_pd(__mmask8 __U__m512d __A__m512d __B)
4145{
4146  return (__m512d)__builtin_ia32_selectpd_512((__mmask8__U,
4147                                           (__v8df)_mm512_unpackhi_pd(__A__B),
4148                                           (__v8df)_mm512_setzero_pd());
4149}
4150
4151static __inline __m512d __DEFAULT_FN_ATTRS512
4152_mm512_unpacklo_pd(__m512d __a__m512d __b)
4153{
4154  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4155                                          080+28+20+48+40+68+6);
4156}
4157
4158static __inline__ __m512d __DEFAULT_FN_ATTRS512
4159_mm512_mask_unpacklo_pd(__m512d __W__mmask8 __U__m512d __A__m512d __B)
4160{
4161  return (__m512d)__builtin_ia32_selectpd_512((__mmask8__U,
4162                                           (__v8df)_mm512_unpacklo_pd(__A__B),
4163                                           (__v8df)__W);
4164}
4165
4166static __inline__ __m512d __DEFAULT_FN_ATTRS512
4167_mm512_maskz_unpacklo_pd (__mmask8 __U__m512d __A__m512d __B)
4168{
4169  return (__m512d)__builtin_ia32_selectpd_512((__mmask8__U,
4170                                           (__v8df)_mm512_unpacklo_pd(__A__B),
4171                                           (__v8df)_mm512_setzero_pd());
4172}
4173
4174static __inline __m512 __DEFAULT_FN_ATTRS512
4175_mm512_unpackhi_ps(__m512 __a__m512 __b)
4176{
4177  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4178                                         2,    18,    3,    19,
4179                                         2+4,  18+4,  3+4,  19+4,
4180                                         2+8,  18+8,  3+8,  19+8,
4181                                         2+1218+123+1219+12);
4182}
4183
4184static __inline__ __m512 __DEFAULT_FN_ATTRS512
4185_mm512_mask_unpackhi_ps(__m512 __W__mmask16 __U__m512 __A__m512 __B)
4186{
4187  return (__m512)__builtin_ia32_selectps_512((__mmask16__U,
4188                                          (__v16sf)_mm512_unpackhi_ps(__A__B),
4189                                          (__v16sf)__W);
4190}
4191
4192static __inline__ __m512 __DEFAULT_FN_ATTRS512
4193_mm512_maskz_unpackhi_ps (__mmask16 __U__m512 __A__m512 __B)
4194{
4195  return (__m512)__builtin_ia32_selectps_512((__mmask16__U,
4196                                          (__v16sf)_mm512_unpackhi_ps(__A__B),
4197                                          (__v16sf)_mm512_setzero_ps());
4198}
4199
4200static __inline __m512 __DEFAULT_FN_ATTRS512
4201_mm512_unpacklo_ps(__m512 __a__m512 __b)
4202{
4203  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4204                                         0,    16,    1,    17,
4205                                         0+4,  16+4,  1+4,  17+4,
4206                                         0+8,  16+8,  1+8,  17+8,
4207                                         0+1216+121+1217+12);
4208}
4209
4210static __inline__ __m512 __DEFAULT_FN_ATTRS512
4211_mm512_mask_unpacklo_ps(__m512 __W__mmask16 __U__m512 __A__m512 __B)
4212{
4213  return (__m512)__builtin_ia32_selectps_512((__mmask16__U,
4214                                          (__v16sf)_mm512_unpacklo_ps(__A__B),
4215                                          (__v16sf)__W);
4216}
4217
4218static __inline__ __m512 __DEFAULT_FN_ATTRS512
4219_mm512_maskz_unpacklo_ps (__mmask16 __U__m512 __A__m512 __B)
4220{
4221  return (__m512)__builtin_ia32_selectps_512((__mmask16__U,
4222                                          (__v16sf)_mm512_unpacklo_ps(__A__B),
4223                                          (__v16sf)_mm512_setzero_ps());
4224}
4225
4226static __inline__ __m512i __DEFAULT_FN_ATTRS512
4227_mm512_unpackhi_epi32(__m512i __A__m512i __B)
4228{
4229  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4230                                          2,    18,    3,    19,
4231                                          2+4,  18+4,  3+4,  19+4,
4232                                          2+8,  18+8,  3+8,  19+8,
4233                                          2+1218+123+1219+12);
4234}
4235
4236static __inline__ __m512i __DEFAULT_FN_ATTRS512
4237_mm512_mask_unpackhi_epi32(__m512i __W__mmask16 __U__m512i __A__m512i __B)
4238{
4239  return (__m512i)__builtin_ia32_selectd_512((__mmask16__U,
4240                                       (__v16si)_mm512_unpackhi_epi32(__A__B),
4241                                       (__v16si)__W);
4242}
4243
4244static __inline__ __m512i __DEFAULT_FN_ATTRS512
4245_mm512_maskz_unpackhi_epi32(__mmask16 __U__m512i __A__m512i __B)
4246{
4247  return (__m512i)__builtin_ia32_selectd_512((__mmask16__U,
4248                                       (__v16si)_mm512_unpackhi_epi32(__A__B),
4249                                       (__v16si)_mm512_setzero_si512());
4250}
4251
4252static __inline__ __m512i __DEFAULT_FN_ATTRS512
4253_mm512_unpacklo_epi32(__m512i __A__m512i __B)
4254{
4255  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4256                                          0,    16,    1,    17,
4257                                          0+4,  16+4,  1+4,  17+4,
4258                                          0+8,  16+8,  1+8,  17+8,
4259                                          0+1216+121+1217+12);
4260}
4261
4262static __inline__ __m512i __DEFAULT_FN_ATTRS512
4263_mm512_mask_unpacklo_epi32(__m512i __W__mmask16 __U__m512i __A__m512i __B)
4264{
4265  return (__m512i)__builtin_ia32_selectd_512((__mmask16__U,
4266                                       (__v16si)_mm512_unpacklo_epi32(__A__B),
4267                                       (__v16si)__W);
4268}
4269
4270static __inline__ __m512i __DEFAULT_FN_ATTRS512
4271_mm512_maskz_unpacklo_epi32(__mmask16 __U__m512i __A__m512i __B)
4272{
4273  return (__m512i)__builtin_ia32_selectd_512((__mmask16__U,
4274                                       (__v16si)_mm512_unpacklo_epi32(__A__B),
4275                                       (__v16si)_mm512_setzero_si512());
4276}
4277
4278static __inline__ __m512i __DEFAULT_FN_ATTRS512
4279_mm512_unpackhi_epi64(__m512i __A__m512i __B)
4280{
4281  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4282                                          191+29+21+49+41+69+6);
4283}
4284
4285static __inline__ __m512i __DEFAULT_FN_ATTRS512
4286_mm512_mask_unpackhi_epi64(__m512i __W__mmask8 __U__m512i __A__m512i __B)
4287{
4288  return (__m512i)__builtin_ia32_selectq_512((__mmask8__U,
4289                                        (__v8di)_mm512_unpackhi_epi64(__A__B),
4290                                        (__v8di)__W);
4291}
4292
4293static __inline__ __m512i __DEFAULT_FN_ATTRS512
4294_mm512_maskz_unpackhi_epi64(__mmask8 __U__m512i __A__m512i __B)
4295{
4296  return (__m512i)__builtin_ia32_selectq_512((__mmask8__U,
4297                                        (__v8di)_mm512_unpackhi_epi64(__A__B),
4298                                        (__v8di)_mm512_setzero_si512());
4299}
4300
4301static __inline__ __m512i __DEFAULT_FN_ATTRS512
4302_mm512_unpacklo_epi64 (__m512i __A__m512i __B)
4303{
4304  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4305                                          080+28+20+48+40+68+6);
4306}
4307
4308static __inline__ __m512i __DEFAULT_FN_ATTRS512
4309_mm512_mask_unpacklo_epi64 (__m512i __W__mmask8 __U__m512i __A__m512i __B)
4310{
4311  return (__m512i)__builtin_ia32_selectq_512((__mmask8__U,
4312                                        (__v8di)_mm512_unpacklo_epi64(__A__B),
4313                                        (__v8di)__W);
4314}
4315
4316static __inline__ __m512i __DEFAULT_FN_ATTRS512
4317_mm512_maskz_unpacklo_epi64 (__mmask8 __U__m512i __A__m512i __B)
4318{
4319  return (__m512i)__builtin_ia32_selectq_512((__mmask8__U,
4320                                        (__v8di)_mm512_unpacklo_epi64(__A__B),
4321                                        (__v8di)_mm512_setzero_si512());
4322}
4323
4324
4325/* SIMD load ops */
4326
4327static __inline __m512i __DEFAULT_FN_ATTRS512
4328_mm512_loadu_si512 (void const *__P)
4329{
4330  struct __loadu_si512 {
4331    __m512i_u __v;
4332  } __attribute__((__packed__, __may_alias__));
4333  return ((struct __loadu_si512*)__P)->__v;
4334}
4335
4336static __inline __m512i __DEFAULT_FN_ATTRS512
4337_mm512_loadu_epi32 (void const *__P)
4338{
4339  struct __loadu_epi32 {
4340    __m512i_u __v;
4341  } __attribute__((__packed__, __may_alias__));
4342  return ((struct __loadu_epi32*)__P)->__v;
4343}
4344
4345static __inline __m512i __DEFAULT_FN_ATTRS512
4346_mm512_mask_loadu_epi32 (__m512i __W__mmask16 __Uvoid const *__P)
4347{
4348  return (__m512i__builtin_ia32_loaddqusi512_mask ((const int *) __P,
4349                  (__v16si__W,
4350                  (__mmask16__U);
4351}
4352
4353
4354static __inline __m512i __DEFAULT_FN_ATTRS512
4355_mm512_maskz_loadu_epi32(__mmask16 __Uvoid const *__P)
4356{
4357  return (__m512i__builtin_ia32_loaddqusi512_mask ((const int *)__P,
4358                                                     (__v16si)
4359                                                     _mm512_setzero_si512 (),
4360                                                     (__mmask16__U);
4361}
4362
4363static __inline __m512i __DEFAULT_FN_ATTRS512
4364_mm512_loadu_epi64 (void const *__P)
4365{
4366  struct __loadu_epi64 {
4367    __m512i_u __v;
4368  } __attribute__((__packed__, __may_alias__));
4369  return ((struct __loadu_epi64*)__P)->__v;
4370}
4371
4372static __inline __m512i __DEFAULT_FN_ATTRS512
4373_mm512_mask_loadu_epi64 (__m512i __W__mmask8 __Uvoid const *__P)
4374{
4375  return (__m512i__builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4376                  (__v8di__W,
4377                  (__mmask8__U);
4378}
4379
4380static __inline __m512i __DEFAULT_FN_ATTRS512
4381_mm512_maskz_loadu_epi64(__mmask8 __Uvoid const *__P)
4382{
4383  return (__m512i__builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4384                                                     (__v8di)
4385                                                     _mm512_setzero_si512 (),
4386                                                     (__mmask8__U);
4387}
4388
4389static __inline __m512 __DEFAULT_FN_ATTRS512
4390_mm512_mask_loadu_ps (__m512 __W__mmask16 __Uvoid const *__P)
4391{
4392  return (__m512__builtin_ia32_loadups512_mask ((const float *) __P,
4393                   (__v16sf__W,
4394                   (__mmask16__U);
4395}
4396
4397static __inline __m512 __DEFAULT_FN_ATTRS512
4398_mm512_maskz_loadu_ps(__mmask16 __Uvoid const *__P)
4399{
4400  return (__m512__builtin_ia32_loadups512_mask ((const float *)__P,
4401                                                  (__v16sf)
4402                                                  _mm512_setzero_ps (),
4403                                                  (__mmask16__U);
4404}
4405
4406static __inline __m512d __DEFAULT_FN_ATTRS512
4407_mm512_mask_loadu_pd (__m512d __W__mmask8 __Uvoid const *__P)
4408{
4409  return (__m512d__builtin_ia32_loadupd512_mask ((const double *) __P,
4410                (__v8df__W,
4411                (__mmask8__U);
4412}
4413
4414static __inline __m512d __DEFAULT_FN_ATTRS512
4415_mm512_maskz_loadu_pd(__mmask8 __Uvoid const *__P)
4416{
4417  return (__m512d__builtin_ia32_loadupd512_mask ((const double *)__P,
4418                                                   (__v8df)
4419                                                   _mm512_setzero_pd (),
4420                                                   (__mmask8__U);
4421}
4422
4423static __inline __m512d __DEFAULT_FN_ATTRS512
4424_mm512_loadu_pd(void const *__p)
4425{
4426  struct __loadu_pd {
4427    __m512d_u __v;
4428  } __attribute__((__packed__, __may_alias__));
4429  return ((struct __loadu_pd*)__p)->__v;
4430}
4431
4432static __inline __m512 __DEFAULT_FN_ATTRS512
4433_mm512_loadu_ps(void const *__p)
4434{
4435  struct __loadu_ps {
4436    __m512_u __v;
4437  } __attribute__((__packed__, __may_alias__));
4438  return ((struct __loadu_ps*)__p)->__v;
4439}
4440
4441static __inline __m512 __DEFAULT_FN_ATTRS512
4442_mm512_load_ps(void const *__p)
4443{
4444  return *(__m512*)__p;
4445}
4446
4447static __inline __m512 __DEFAULT_FN_ATTRS512
4448_mm512_mask_load_ps (__m512 __W__mmask16 __Uvoid const *__P)
4449{
4450  return (__m512__builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4451                   (__v16sf__W,
4452                   (__mmask16__U);
4453}
4454
4455static __inline __m512 __DEFAULT_FN_ATTRS512
4456_mm512_maskz_load_ps(__mmask16 __Uvoid const *__P)
4457{
4458  return (__m512__builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4459                                                  (__v16sf)
4460                                                  _mm512_setzero_ps (),
4461                                                  (__mmask16__U);
4462}
4463
4464static __inline __m512d __DEFAULT_FN_ATTRS512
4465_mm512_load_pd(void const *__p)
4466{
4467  return *(__m512d*)__p;
4468}
4469
4470static __inline __m512d __DEFAULT_FN_ATTRS512
4471_mm512_mask_load_pd (__m512d __W__mmask8 __Uvoid const *__P)
4472{
4473  return (__m512d__builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4474                          (__v8df__W,
4475                          (__mmask8__U);
4476}
4477
4478static __inline __m512d __DEFAULT_FN_ATTRS512
4479_mm512_maskz_load_pd(__mmask8 __Uvoid const *__P)
4480{
4481  return (__m512d__builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4482                                                   (__v8df)
4483                                                   _mm512_setzero_pd (),
4484                                                   (__mmask8__U);
4485}
4486
4487static __inline __m512i __DEFAULT_FN_ATTRS512
4488_mm512_load_si512 (void const *__P)
4489{
4490  return *(__m512i *) __P;
4491}
4492
4493static __inline __m512i __DEFAULT_FN_ATTRS512
4494_mm512_load_epi32 (void const *__P)
4495{
4496  return *(__m512i *) __P;
4497}
4498
4499static __inline __m512i __DEFAULT_FN_ATTRS512
4500_mm512_load_epi64 (void const *__P)
4501{
4502  return *(__m512i *) __P;
4503}
4504
4505/* SIMD store ops */
4506
4507static __inline void __DEFAULT_FN_ATTRS512
4508_mm512_storeu_epi64 (void *__P__m512i __A)
4509{
4510  struct __storeu_epi64 {
4511    __m512i_u __v;
4512  } __attribute__((__packed__, __may_alias__));
4513  ((struct __storeu_epi64*)__P)->__v = __A;
4514}
4515
4516static __inline void __DEFAULT_FN_ATTRS512
4517_mm512_mask_storeu_epi64(void *__P__mmask8 __U__m512i __A)
4518{
4519  __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di__A,
4520                                     (__mmask8__U);
4521}
4522
4523static __inline void __DEFAULT_FN_ATTRS512
4524_mm512_storeu_si512 (void *__P__m512i __A)
4525{
4526  struct __storeu_si512 {
4527    __m512i_u __v;
4528  } __attribute__((__packed__, __may_alias__));
4529  ((struct __storeu_si512*)__P)->__v = __A;
4530}
4531
4532static __inline void __DEFAULT_FN_ATTRS512
4533_mm512_storeu_epi32 (void *__P__m512i __A)
4534{
4535  struct __storeu_epi32 {
4536    __m512i_u __v;
4537  } __attribute__((__packed__, __may_alias__));
4538  ((struct __storeu_epi32*)__P)->__v = __A;
4539}
4540
4541static __inline void __DEFAULT_FN_ATTRS512
4542_mm512_mask_storeu_epi32(void *__P__mmask16 __U__m512i __A)
4543{
4544  __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si__A,
4545                                     (__mmask16__U);
4546}
4547
4548static __inline void __DEFAULT_FN_ATTRS512
4549_mm512_mask_storeu_pd(void *__P__mmask8 __U__m512d __A)
4550{
4551  __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df__A, (__mmask8__U);
4552}
4553
4554static __inline void __DEFAULT_FN_ATTRS512
4555_mm512_storeu_pd(void *__P__m512d __A)
4556{
4557  struct __storeu_pd {
4558    __m512d_u __v;
4559  } __attribute__((__packed__, __may_alias__));
4560  ((struct __storeu_pd*)__P)->__v = __A;
4561}
4562
4563static __inline void __DEFAULT_FN_ATTRS512
4564_mm512_mask_storeu_ps(void *__P__mmask16 __U__m512 __A)
4565{
4566  __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf__A,
4567                                   (__mmask16__U);
4568}
4569
4570static __inline void __DEFAULT_FN_ATTRS512
4571_mm512_storeu_ps(void *__P__m512 __A)
4572{
4573  struct __storeu_ps {
4574    __m512_u __v;
4575  } __attribute__((__packed__, __may_alias__));
4576  ((struct __storeu_ps*)__P)->__v = __A;
4577}
4578
4579static __inline void __DEFAULT_FN_ATTRS512
4580_mm512_mask_store_pd(void *__P__mmask8 __U__m512d __A)
4581{
4582  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df__A, (__mmask8__U);
4583}
4584
4585static __inline void __DEFAULT_FN_ATTRS512
4586_mm512_store_pd(void *__P__m512d __A)
4587{
4588  *(__m512d*)__P = __A;
4589}
4590
4591static __inline void __DEFAULT_FN_ATTRS512
4592_mm512_mask_store_ps(void *__P__mmask16 __U__m512 __A)
4593{
4594  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf__A,
4595                                   (__mmask16__U);
4596}
4597
4598static __inline void __DEFAULT_FN_ATTRS512
4599_mm512_store_ps(void *__P__m512 __A)
4600{
4601  *(__m512*)__P = __A;
4602}
4603
4604static __inline void __DEFAULT_FN_ATTRS512
4605_mm512_store_si512 (void *__P__m512i __A)
4606{
4607  *(__m512i *) __P = __A;
4608}
4609
4610static __inline void __DEFAULT_FN_ATTRS512
4611_mm512_store_epi32 (void *__P__m512i __A)
4612{
4613  *(__m512i *) __P = __A;
4614}
4615
4616static __inline void __DEFAULT_FN_ATTRS512
4617_mm512_store_epi64 (void *__P__m512i __A)
4618{
4619  *(__m512i *) __P = __A;
4620}
4621
4622/* Mask ops */
4623
4624static __inline __mmask16 __DEFAULT_FN_ATTRS
4625_mm512_knot(__mmask16 __M)
4626{
4627  return __builtin_ia32_knothi(__M);
4628}
4629
4630/* Integer compare */
4631
4632#define _mm512_cmpeq_epi32_mask(A, B) \
4633    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4634#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4635    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4636#define _mm512_cmpge_epi32_mask(A, B) \
4637    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4638#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4639    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4640#define _mm512_cmpgt_epi32_mask(A, B) \
4641    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4642#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4643    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4644#define _mm512_cmple_epi32_mask(A, B) \
4645    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4646#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4647    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4648#define _mm512_cmplt_epi32_mask(A, B) \
4649    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4650#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4651    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4652#define _mm512_cmpneq_epi32_mask(A, B) \
4653    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4654#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4655    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4656
4657#define _mm512_cmpeq_epu32_mask(A, B) \
4658    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4659#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4660    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4661#define _mm512_cmpge_epu32_mask(A, B) \
4662    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4663#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4664    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4665#define _mm512_cmpgt_epu32_mask(A, B) \
4666    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4667#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4668    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4669#define _mm512_cmple_epu32_mask(A, B) \
4670    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4671#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4672    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4673#define _mm512_cmplt_epu32_mask(A, B) \
4674    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4675#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4676    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4677#define _mm512_cmpneq_epu32_mask(A, B) \
4678    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4679#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4680    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4681
4682#define _mm512_cmpeq_epi64_mask(A, B) \
4683    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4684#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4685    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4686#define _mm512_cmpge_epi64_mask(A, B) \
4687    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4688#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4689    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4690#define _mm512_cmpgt_epi64_mask(A, B) \
4691    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4692#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4693    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4694#define _mm512_cmple_epi64_mask(A, B) \
4695    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4696#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4697    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4698#define _mm512_cmplt_epi64_mask(A, B) \
4699    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4700#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4701    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4702#define _mm512_cmpneq_epi64_mask(A, B) \
4703    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4704#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4705    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4706
4707#define _mm512_cmpeq_epu64_mask(A, B) \
4708    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4709#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4710    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4711#define _mm512_cmpge_epu64_mask(A, B) \
4712    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4713#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4714    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4715#define _mm512_cmpgt_epu64_mask(A, B) \
4716    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4717#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4718    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4719#define _mm512_cmple_epu64_mask(A, B) \
4720    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4721#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4722    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4723#define _mm512_cmplt_epu64_mask(A, B) \
4724    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4725#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4726    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4727#define _mm512_cmpneq_epu64_mask(A, B) \
4728    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4729#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4730    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4731
4732static __inline__ __m512i __DEFAULT_FN_ATTRS512
4733_mm512_cvtepi8_epi32(__m128i __A)
4734{
4735  /* This function always performs a signed extension, but __v16qi is a char
4736     which may be signed or unsigned, so use __v16qs. */
4737  return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4738}
4739
4740static __inline__ __m512i __DEFAULT_FN_ATTRS512
4741_mm512_mask_cvtepi8_epi32(__m512i __W__mmask16 __U__m128i __A)
4742{
4743  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4744                                             (__v16si)_mm512_cvtepi8_epi32(__A),
4745                                             (__v16si)__W);
4746}
4747
4748static __inline__ __m512i __DEFAULT_FN_ATTRS512
4749_mm512_maskz_cvtepi8_epi32(__mmask16 __U__m128i __A)
4750{
4751  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4752                                             (__v16si)_mm512_cvtepi8_epi32(__A),
4753                                             (__v16si)_mm512_setzero_si512());
4754}
4755
4756static __inline__ __m512i __DEFAULT_FN_ATTRS512
4757_mm512_cvtepi8_epi64(__m128i __A)
4758{
4759  /* This function always performs a signed extension, but __v16qi is a char
4760     which may be signed or unsigned, so use __v16qs. */
4761  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A01234567), __v8di);
4762}
4763
4764static __inline__ __m512i __DEFAULT_FN_ATTRS512
4765_mm512_mask_cvtepi8_epi64(__m512i __W__mmask8 __U__m128i __A)
4766{
4767  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4768                                             (__v8di)_mm512_cvtepi8_epi64(__A),
4769                                             (__v8di)__W);
4770}
4771
4772static __inline__ __m512i __DEFAULT_FN_ATTRS512
4773_mm512_maskz_cvtepi8_epi64(__mmask8 __U__m128i __A)
4774{
4775  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4776                                             (__v8di)_mm512_cvtepi8_epi64(__A),
4777                                             (__v8di)_mm512_setzero_si512 ());
4778}
4779
4780static __inline__ __m512i __DEFAULT_FN_ATTRS512
4781_mm512_cvtepi32_epi64(__m256i __X)
4782{
4783  return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4784}
4785
4786static __inline__ __m512i __DEFAULT_FN_ATTRS512
4787_mm512_mask_cvtepi32_epi64(__m512i __W__mmask8 __U__m256i __X)
4788{
4789  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4790                                             (__v8di)_mm512_cvtepi32_epi64(__X),
4791                                             (__v8di)__W);
4792}
4793
4794static __inline__ __m512i __DEFAULT_FN_ATTRS512
4795_mm512_maskz_cvtepi32_epi64(__mmask8 __U__m256i __X)
4796{
4797  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4798                                             (__v8di)_mm512_cvtepi32_epi64(__X),
4799                                             (__v8di)_mm512_setzero_si512());
4800}
4801
4802static __inline__ __m512i __DEFAULT_FN_ATTRS512
4803_mm512_cvtepi16_epi32(__m256i __A)
4804{
4805  return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4806}
4807
4808static __inline__ __m512i __DEFAULT_FN_ATTRS512
4809_mm512_mask_cvtepi16_epi32(__m512i __W__mmask16 __U__m256i __A)
4810{
4811  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4812                                            (__v16si)_mm512_cvtepi16_epi32(__A),
4813                                            (__v16si)__W);
4814}
4815
4816static __inline__ __m512i __DEFAULT_FN_ATTRS512
4817_mm512_maskz_cvtepi16_epi32(__mmask16 __U__m256i __A)
4818{
4819  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4820                                            (__v16si)_mm512_cvtepi16_epi32(__A),
4821                                            (__v16si)_mm512_setzero_si512 ());
4822}
4823
4824static __inline__ __m512i __DEFAULT_FN_ATTRS512
4825_mm512_cvtepi16_epi64(__m128i __A)
4826{
4827  return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4828}
4829
4830static __inline__ __m512i __DEFAULT_FN_ATTRS512
4831_mm512_mask_cvtepi16_epi64(__m512i __W__mmask8 __U__m128i __A)
4832{
4833  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4834                                             (__v8di)_mm512_cvtepi16_epi64(__A),
4835                                             (__v8di)__W);
4836}
4837
4838static __inline__ __m512i __DEFAULT_FN_ATTRS512
4839_mm512_maskz_cvtepi16_epi64(__mmask8 __U__m128i __A)
4840{
4841  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4842                                             (__v8di)_mm512_cvtepi16_epi64(__A),
4843                                             (__v8di)_mm512_setzero_si512());
4844}
4845
4846static __inline__ __m512i __DEFAULT_FN_ATTRS512
4847_mm512_cvtepu8_epi32(__m128i __A)
4848{
4849  return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4850}
4851
4852static __inline__ __m512i __DEFAULT_FN_ATTRS512
4853_mm512_mask_cvtepu8_epi32(__m512i __W__mmask16 __U__m128i __A)
4854{
4855  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4856                                             (__v16si)_mm512_cvtepu8_epi32(__A),
4857                                             (__v16si)__W);
4858}
4859
4860static __inline__ __m512i __DEFAULT_FN_ATTRS512
4861_mm512_maskz_cvtepu8_epi32(__mmask16 __U__m128i __A)
4862{
4863  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4864                                             (__v16si)_mm512_cvtepu8_epi32(__A),
4865                                             (__v16si)_mm512_setzero_si512());
4866}
4867
4868static __inline__ __m512i __DEFAULT_FN_ATTRS512
4869_mm512_cvtepu8_epi64(__m128i __A)
4870{
4871  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A01234567), __v8di);
4872}
4873
4874static __inline__ __m512i __DEFAULT_FN_ATTRS512
4875_mm512_mask_cvtepu8_epi64(__m512i __W__mmask8 __U__m128i __A)
4876{
4877  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4878                                             (__v8di)_mm512_cvtepu8_epi64(__A),
4879                                             (__v8di)__W);
4880}
4881
4882static __inline__ __m512i __DEFAULT_FN_ATTRS512
4883_mm512_maskz_cvtepu8_epi64(__mmask8 __U__m128i __A)
4884{
4885  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4886                                             (__v8di)_mm512_cvtepu8_epi64(__A),
4887                                             (__v8di)_mm512_setzero_si512());
4888}
4889
4890static __inline__ __m512i __DEFAULT_FN_ATTRS512
4891_mm512_cvtepu32_epi64(__m256i __X)
4892{
4893  return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4894}
4895
4896static __inline__ __m512i __DEFAULT_FN_ATTRS512
4897_mm512_mask_cvtepu32_epi64(__m512i __W__mmask8 __U__m256i __X)
4898{
4899  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4900                                             (__v8di)_mm512_cvtepu32_epi64(__X),
4901                                             (__v8di)__W);
4902}
4903
4904static __inline__ __m512i __DEFAULT_FN_ATTRS512
4905_mm512_maskz_cvtepu32_epi64(__mmask8 __U__m256i __X)
4906{
4907  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4908                                             (__v8di)_mm512_cvtepu32_epi64(__X),
4909                                             (__v8di)_mm512_setzero_si512());
4910}
4911
4912static __inline__ __m512i __DEFAULT_FN_ATTRS512
4913_mm512_cvtepu16_epi32(__m256i __A)
4914{
4915  return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4916}
4917
4918static __inline__ __m512i __DEFAULT_FN_ATTRS512
4919_mm512_mask_cvtepu16_epi32(__m512i __W__mmask16 __U__m256i __A)
4920{
4921  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4922                                            (__v16si)_mm512_cvtepu16_epi32(__A),
4923                                            (__v16si)__W);
4924}
4925
4926static __inline__ __m512i __DEFAULT_FN_ATTRS512
4927_mm512_maskz_cvtepu16_epi32(__mmask16 __U__m256i __A)
4928{
4929  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4930                                            (__v16si)_mm512_cvtepu16_epi32(__A),
4931                                            (__v16si)_mm512_setzero_si512());
4932}
4933
4934static __inline__ __m512i __DEFAULT_FN_ATTRS512
4935_mm512_cvtepu16_epi64(__m128i __A)
4936{
4937  return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4938}
4939
4940static __inline__ __m512i __DEFAULT_FN_ATTRS512
4941_mm512_mask_cvtepu16_epi64(__m512i __W__mmask8 __U__m128i __A)
4942{
4943  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4944                                             (__v8di)_mm512_cvtepu16_epi64(__A),
4945                                             (__v8di)__W);
4946}
4947
4948static __inline__ __m512i __DEFAULT_FN_ATTRS512
4949_mm512_maskz_cvtepu16_epi64(__mmask8 __U__m128i __A)
4950{
4951  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4952                                             (__v8di)_mm512_cvtepu16_epi64(__A),
4953                                             (__v8di)_mm512_setzero_si512());
4954}
4955
4956static __inline__ __m512i __DEFAULT_FN_ATTRS512
4957_mm512_rorv_epi32 (__m512i __A__m512i __B)
4958{
4959  return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4960}
4961
4962static __inline__ __m512i __DEFAULT_FN_ATTRS512
4963_mm512_mask_rorv_epi32 (__m512i __W__mmask16 __U__m512i __A__m512i __B)
4964{
4965  return (__m512i)__builtin_ia32_selectd_512(__U,
4966                                           (__v16si)_mm512_rorv_epi32(__A__B),
4967                                           (__v16si)__W);
4968}
4969
4970static __inline__ __m512i __DEFAULT_FN_ATTRS512
4971_mm512_maskz_rorv_epi32 (__mmask16 __U__m512i __A__m512i __B)
4972{
4973  return (__m512i)__builtin_ia32_selectd_512(__U,
4974                                           (__v16si)_mm512_rorv_epi32(__A__B),
4975                                           (__v16si)_mm512_setzero_si512());
4976}
4977
4978static __inline__ __m512i __DEFAULT_FN_ATTRS512
4979_mm512_rorv_epi64 (__m512i __A__m512i __B)
4980{
4981  return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4982}
4983
4984static __inline__ __m512i __DEFAULT_FN_ATTRS512
4985_mm512_mask_rorv_epi64 (__m512i __W__mmask8 __U__m512i __A__m512i __B)
4986{
4987  return (__m512i)__builtin_ia32_selectq_512(__U,
4988                                            (__v8di)_mm512_rorv_epi64(__A__B),
4989                                            (__v8di)__W);
4990}
4991
4992static __inline__ __m512i __DEFAULT_FN_ATTRS512
4993_mm512_maskz_rorv_epi64 (__mmask8 __U__m512i __A__m512i __B)
4994{
4995  return (__m512i)__builtin_ia32_selectq_512(__U,
4996                                            (__v8di)_mm512_rorv_epi64(__A__B),
4997                                            (__v8di)_mm512_setzero_si512());
4998}
4999
5000
5001
5002#define _mm512_cmp_epi32_mask(a, b, p) \
5003  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5004                                         (__v16si)(__m512i)(b), (int)(p), \
5005                                         (__mmask16)-1)
5006
5007#define _mm512_cmp_epu32_mask(a, b, p) \
5008  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5009                                          (__v16si)(__m512i)(b), (int)(p), \
5010                                          (__mmask16)-1)
5011
5012#define _mm512_cmp_epi64_mask(a, b, p) \
5013  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5014                                        (__v8di)(__m512i)(b), (int)(p), \
5015                                        (__mmask8)-1)
5016
5017#define _mm512_cmp_epu64_mask(a, b, p) \
5018  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5019                                         (__v8di)(__m512i)(b), (int)(p), \
5020                                         (__mmask8)-1)
5021
5022#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
5023  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5024                                         (__v16si)(__m512i)(b), (int)(p), \
5025                                         (__mmask16)(m))
5026
5027#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
5028  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5029                                          (__v16si)(__m512i)(b), (int)(p), \
5030                                          (__mmask16)(m))
5031
5032#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
5033  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5034                                        (__v8di)(__m512i)(b), (int)(p), \
5035                                        (__mmask8)(m))
5036
5037#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
5038  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5039                                         (__v8di)(__m512i)(b), (int)(p), \
5040                                         (__mmask8)(m))
5041
5042#define _mm512_rol_epi32(a, b) \
5043  (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b))
5044
5045#define _mm512_mask_rol_epi32(W, U, a, b) \
5046  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5047                                      (__v16si)_mm512_rol_epi32((a), (b)), \
5048                                      (__v16si)(__m512i)(W))
5049
5050#define _mm512_maskz_rol_epi32(U, a, b) \
5051  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5052                                      (__v16si)_mm512_rol_epi32((a), (b)), \
5053                                      (__v16si)_mm512_setzero_si512())
5054
5055#define _mm512_rol_epi64(a, b) \
5056  (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b))
5057
5058#define _mm512_mask_rol_epi64(W, U, a, b) \
5059  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5060                                      (__v8di)_mm512_rol_epi64((a), (b)), \
5061                                      (__v8di)(__m512i)(W))
5062
5063#define _mm512_maskz_rol_epi64(U, a, b) \
5064  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5065                                      (__v8di)_mm512_rol_epi64((a), (b)), \
5066                                      (__v8di)_mm512_setzero_si512())
5067
5068static __inline__ __m512i __DEFAULT_FN_ATTRS512
5069_mm512_rolv_epi32 (__m512i __A__m512i __B)
5070{
5071  return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5072}
5073
5074static __inline__ __m512i __DEFAULT_FN_ATTRS512
5075_mm512_mask_rolv_epi32 (__m512i __W__mmask16 __U__m512i __A__m512i __B)
5076{
5077  return (__m512i)__builtin_ia32_selectd_512(__U,
5078                                           (__v16si)_mm512_rolv_epi32(__A__B),
5079                                           (__v16si)__W);
5080}
5081
5082static __inline__ __m512i __DEFAULT_FN_ATTRS512
5083_mm512_maskz_rolv_epi32 (__mmask16 __U__m512i __A__m512i __B)
5084{
5085  return (__m512i)__builtin_ia32_selectd_512(__U,
5086                                           (__v16si)_mm512_rolv_epi32(__A__B),
5087                                           (__v16si)_mm512_setzero_si512());
5088}
5089
5090static __inline__ __m512i __DEFAULT_FN_ATTRS512
5091_mm512_rolv_epi64 (__m512i __A__m512i __B)
5092{
5093  return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5094}
5095
5096static __inline__ __m512i __DEFAULT_FN_ATTRS512
5097_mm512_mask_rolv_epi64 (__m512i __W__mmask8 __U__m512i __A__m512i __B)
5098{
5099  return (__m512i)__builtin_ia32_selectq_512(__U,
5100                                            (__v8di)_mm512_rolv_epi64(__A__B),
5101                                            (__v8di)__W);
5102}
5103
5104static __inline__ __m512i __DEFAULT_FN_ATTRS512
5105_mm512_maskz_rolv_epi64 (__mmask8 __U__m512i __A__m512i __B)
5106{
5107  return (__m512i)__builtin_ia32_selectq_512(__U,
5108                                            (__v8di)_mm512_rolv_epi64(__A__B),
5109                                            (__v8di)_mm512_setzero_si512());
5110}
5111
5112#define _mm512_ror_epi32(A, B) \
5113  (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B))
5114
5115#define _mm512_mask_ror_epi32(W, U, A, B) \
5116  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5117                                      (__v16si)_mm512_ror_epi32((A), (B)), \
5118                                      (__v16si)(__m512i)(W))
5119
5120#define _mm512_maskz_ror_epi32(U, A, B) \
5121  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5122                                      (__v16si)_mm512_ror_epi32((A), (B)), \
5123                                      (__v16si)_mm512_setzero_si512())
5124
5125#define _mm512_ror_epi64(A, B) \
5126  (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B))
5127
5128#define _mm512_mask_ror_epi64(W, U, A, B) \
5129  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5130                                      (__v8di)_mm512_ror_epi64((A), (B)), \
5131                                      (__v8di)(__m512i)(W))
5132
5133#define _mm512_maskz_ror_epi64(U, A, B) \
5134  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5135                                      (__v8di)_mm512_ror_epi64((A), (B)), \
5136                                      (__v8di)_mm512_setzero_si512())
5137
5138static __inline__ __m512i __DEFAULT_FN_ATTRS512
5139_mm512_slli_epi32(__m512i __Aint __B)
5140{
5141  return (__m512i)__builtin_ia32_pslldi512((__v16si)__A__B);
5142}
5143
5144static __inline__ __m512i __DEFAULT_FN_ATTRS512
5145_mm512_mask_slli_epi32(__m512i __W__mmask16 __U__m512i __Aint __B)
5146{
5147  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5148                                         (__v16si)_mm512_slli_epi32(__A__B),
5149                                         (__v16si)__W);
5150}
5151
5152static __inline__ __m512i __DEFAULT_FN_ATTRS512
5153_mm512_maskz_slli_epi32(__mmask16 __U__m512i __Aint __B) {
5154  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5155                                         (__v16si)_mm512_slli_epi32(__A__B),
5156                                         (__v16si)_mm512_setzero_si512());
5157}
5158
5159static __inline__ __m512i __DEFAULT_FN_ATTRS512
5160_mm512_slli_epi64(__m512i __Aint __B)
5161{
5162  return (__m512i)__builtin_ia32_psllqi512((__v8di)__A__B);
5163}
5164
5165static __inline__ __m512i __DEFAULT_FN_ATTRS512
5166_mm512_mask_slli_epi64(__m512i __W__mmask8 __U__m512i __Aint __B)
5167{
5168  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5169                                          (__v8di)_mm512_slli_epi64(__A__B),
5170                                          (__v8di)__W);
5171}
5172
5173static __inline__ __m512i __DEFAULT_FN_ATTRS512
5174_mm512_maskz_slli_epi64(__mmask8 __U__m512i __Aint __B)
5175{
5176  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5177                                          (__v8di)_mm512_slli_epi64(__A__B),
5178                                          (__v8di)_mm512_setzero_si512());
5179}
5180
5181static __inline__ __m512i __DEFAULT_FN_ATTRS512
5182_mm512_srli_epi32(__m512i __Aint __B)
5183{
5184  return (__m512i)__builtin_ia32_psrldi512((__v16si)__A__B);
5185}
5186
5187static __inline__ __m512i __DEFAULT_FN_ATTRS512
5188_mm512_mask_srli_epi32(__m512i __W__mmask16 __U__m512i __Aint __B)
5189{
5190  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5191                                         (__v16si)_mm512_srli_epi32(__A__B),
5192                                         (__v16si)__W);
5193}
5194
5195static __inline__ __m512i __DEFAULT_FN_ATTRS512
5196_mm512_maskz_srli_epi32(__mmask16 __U__m512i __Aint __B) {
5197  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5198                                         (__v16si)_mm512_srli_epi32(__A__B),
5199                                         (__v16si)_mm512_setzero_si512());
5200}
5201
5202static __inline__ __m512i __DEFAULT_FN_ATTRS512
5203_mm512_srli_epi64(__m512i __Aint __B)
5204{
5205  return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A__B);
5206}
5207
5208static __inline__ __m512i __DEFAULT_FN_ATTRS512
5209_mm512_mask_srli_epi64(__m512i __W__mmask8 __U__m512i __Aint __B)
5210{
5211  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5212                                          (__v8di)_mm512_srli_epi64(__A__B),
5213                                          (__v8di)__W);
5214}
5215
5216static __inline__ __m512i __DEFAULT_FN_ATTRS512
5217_mm512_maskz_srli_epi64(__mmask8 __U__m512i __Aint __B)
5218{
5219  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5220                                          (__v8di)_mm512_srli_epi64(__A__B),
5221                                          (__v8di)_mm512_setzero_si512());
5222}
5223
5224static __inline__ __m512i __DEFAULT_FN_ATTRS512
5225_mm512_mask_load_epi32 (__m512i __W__mmask16 __Uvoid const *__P)
5226{
5227  return (__m512i__builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5228              (__v16si__W,
5229              (__mmask16__U);
5230}
5231
5232static __inline__ __m512i __DEFAULT_FN_ATTRS512
5233_mm512_maskz_load_epi32 (__mmask16 __Uvoid const *__P)
5234{
5235  return (__m512i__builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5236              (__v16si)
5237              _mm512_setzero_si512 (),
5238              (__mmask16__U);
5239}
5240
5241static __inline__ void __DEFAULT_FN_ATTRS512
5242_mm512_mask_store_epi32 (void *__P__mmask16 __U__m512i __A)
5243{
5244  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si__A,
5245          (__mmask16__U);
5246}
5247
5248static __inline__ __m512i __DEFAULT_FN_ATTRS512
5249_mm512_mask_mov_epi32 (__m512i __W__mmask16 __U__m512i __A)
5250{
5251  return (__m512i__builtin_ia32_selectd_512 ((__mmask16__U,
5252                 (__v16si__A,
5253                 (__v16si__W);
5254}
5255
5256static __inline__ __m512i __DEFAULT_FN_ATTRS512
5257_mm512_maskz_mov_epi32 (__mmask16 __U__m512i __A)
5258{
5259  return (__m512i__builtin_ia32_selectd_512 ((__mmask16__U,
5260                 (__v16si__A,
5261                 (__v16si_mm512_setzero_si512 ());
5262}
5263
5264static __inline__ __m512i __DEFAULT_FN_ATTRS512
5265_mm512_mask_mov_epi64 (__m512i __W__mmask8 __U__m512i __A)
5266{
5267  return (__m512i__builtin_ia32_selectq_512 ((__mmask8__U,
5268                 (__v8di__A,
5269                 (__v8di__W);
5270}
5271
5272static __inline__ __m512i __DEFAULT_FN_ATTRS512
5273_mm512_maskz_mov_epi64 (__mmask8 __U__m512i __A)
5274{
5275  return (__m512i__builtin_ia32_selectq_512 ((__mmask8__U,
5276                 (__v8di__A,
5277                 (__v8di_mm512_setzero_si512 ());
5278}
5279
5280static __inline__ __m512i __DEFAULT_FN_ATTRS512
5281_mm512_mask_load_epi64 (__m512i __W__mmask8 __Uvoid const *__P)
5282{
5283  return (__m512i__builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5284              (__v8di__W,
5285              (__mmask8__U);
5286}
5287
5288static __inline__ __m512i __DEFAULT_FN_ATTRS512
5289_mm512_maskz_load_epi64 (__mmask8 __Uvoid const *__P)
5290{
5291  return (__m512i__builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5292              (__v8di)
5293              _mm512_setzero_si512 (),
5294              (__mmask8__U);
5295}
5296
5297static __inline__ void __DEFAULT_FN_ATTRS512
5298_mm512_mask_store_epi64 (void *__P__mmask8 __U__m512i __A)
5299{
5300  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di__A,
5301          (__mmask8__U);
5302}
5303
5304static __inline__ __m512d __DEFAULT_FN_ATTRS512
5305_mm512_movedup_pd (__m512d __A)
5306{
5307  return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5308                                          00224466);
5309}
5310
5311static __inline__ __m512d __DEFAULT_FN_ATTRS512
5312_mm512_mask_movedup_pd (__m512d __W__mmask8 __U__m512d __A)
5313{
5314  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5315                                              (__v8df)_mm512_movedup_pd(__A),
5316                                              (__v8df)__W);
5317}
5318
5319static __inline__ __m512d __DEFAULT_FN_ATTRS512
5320_mm512_maskz_movedup_pd (__mmask8 __U__m512d __A)
5321{
5322  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5323                                              (__v8df)_mm512_movedup_pd(__A),
5324                                              (__v8df)_mm512_setzero_pd());
5325}
5326
5327#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5328  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5329                                             (__v8df)(__m512d)(B), \
5330                                             (__v8di)(__m512i)(C), (int)(imm), \
5331                                             (__mmask8)-1, (int)(R))
5332
5333#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5334  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5335                                             (__v8df)(__m512d)(B), \
5336                                             (__v8di)(__m512i)(C), (int)(imm), \
5337                                             (__mmask8)(U), (int)(R))
5338
5339#define _mm512_fixupimm_pd(A, B, C, imm) \
5340  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5341                                             (__v8df)(__m512d)(B), \
5342                                             (__v8di)(__m512i)(C), (int)(imm), \
5343                                             (__mmask8)-1, \
5344                                             _MM_FROUND_CUR_DIRECTION)
5345
5346#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5347  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5348                                             (__v8df)(__m512d)(B), \
5349                                             (__v8di)(__m512i)(C), (int)(imm), \
5350                                             (__mmask8)(U), \
5351                                             _MM_FROUND_CUR_DIRECTION)
5352
5353#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5354  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5355                                              (__v8df)(__m512d)(B), \
5356                                              (__v8di)(__m512i)(C), \
5357                                              (int)(imm), (__mmask8)(U), \
5358                                              (int)(R))
5359
5360#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5361  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5362                                              (__v8df)(__m512d)(B), \
5363                                              (__v8di)(__m512i)(C), \
5364                                              (int)(imm), (__mmask8)(U), \
5365                                              _MM_FROUND_CUR_DIRECTION)
5366
5367#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5368  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5369                                            (__v16sf)(__m512)(B), \
5370                                            (__v16si)(__m512i)(C), (int)(imm), \
5371                                            (__mmask16)-1, (int)(R))
5372
5373#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5374  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5375                                            (__v16sf)(__m512)(B), \
5376                                            (__v16si)(__m512i)(C), (int)(imm), \
5377                                            (__mmask16)(U), (int)(R))
5378
5379#define _mm512_fixupimm_ps(A, B, C, imm) \
5380  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5381                                            (__v16sf)(__m512)(B), \
5382                                            (__v16si)(__m512i)(C), (int)(imm), \
5383                                            (__mmask16)-1, \
5384                                            _MM_FROUND_CUR_DIRECTION)
5385
5386#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5387  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5388                                            (__v16sf)(__m512)(B), \
5389                                            (__v16si)(__m512i)(C), (int)(imm), \
5390                                            (__mmask16)(U), \
5391                                            _MM_FROUND_CUR_DIRECTION)
5392
5393#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5394  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5395                                             (__v16sf)(__m512)(B), \
5396                                             (__v16si)(__m512i)(C), \
5397                                             (int)(imm), (__mmask16)(U), \
5398                                             (int)(R))
5399
5400#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5401  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5402                                             (__v16sf)(__m512)(B), \
5403                                             (__v16si)(__m512i)(C), \
5404                                             (int)(imm), (__mmask16)(U), \
5405                                             _MM_FROUND_CUR_DIRECTION)
5406
5407#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5408  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5409                                          (__v2df)(__m128d)(B), \
5410                                          (__v2di)(__m128i)(C), (int)(imm), \
5411                                          (__mmask8)-1, (int)(R))
5412
5413#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5414  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5415                                          (__v2df)(__m128d)(B), \
5416                                          (__v2di)(__m128i)(C), (int)(imm), \
5417                                          (__mmask8)(U), (int)(R))
5418
5419#define _mm_fixupimm_sd(A, B, C, imm) \
5420  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5421                                          (__v2df)(__m128d)(B), \
5422                                          (__v2di)(__m128i)(C), (int)(imm), \
5423                                          (__mmask8)-1, \
5424                                          _MM_FROUND_CUR_DIRECTION)
5425
5426#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5427  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5428                                          (__v2df)(__m128d)(B), \
5429                                          (__v2di)(__m128i)(C), (int)(imm), \
5430                                          (__mmask8)(U), \
5431                                          _MM_FROUND_CUR_DIRECTION)
5432
5433#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5434  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5435                                           (__v2df)(__m128d)(B), \
5436                                           (__v2di)(__m128i)(C), (int)(imm), \
5437                                           (__mmask8)(U), (int)(R))
5438
5439#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5440  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5441                                           (__v2df)(__m128d)(B), \
5442                                           (__v2di)(__m128i)(C), (int)(imm), \
5443                                           (__mmask8)(U), \
5444                                           _MM_FROUND_CUR_DIRECTION)
5445
5446#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5447  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5448                                         (__v4sf)(__m128)(B), \
5449                                         (__v4si)(__m128i)(C), (int)(imm), \
5450                                         (__mmask8)-1, (int)(R))
5451
5452#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5453  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5454                                         (__v4sf)(__m128)(B), \
5455                                         (__v4si)(__m128i)(C), (int)(imm), \
5456                                         (__mmask8)(U), (int)(R))
5457
5458#define _mm_fixupimm_ss(A, B, C, imm) \
5459  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5460                                         (__v4sf)(__m128)(B), \
5461                                         (__v4si)(__m128i)(C), (int)(imm), \
5462                                         (__mmask8)-1, \
5463                                         _MM_FROUND_CUR_DIRECTION)
5464
5465#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5466  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5467                                         (__v4sf)(__m128)(B), \
5468                                         (__v4si)(__m128i)(C), (int)(imm), \
5469                                         (__mmask8)(U), \
5470                                         _MM_FROUND_CUR_DIRECTION)
5471
5472#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5473  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5474                                          (__v4sf)(__m128)(B), \
5475                                          (__v4si)(__m128i)(C), (int)(imm), \
5476                                          (__mmask8)(U), (int)(R))
5477
5478#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5479  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5480                                          (__v4sf)(__m128)(B), \
5481                                          (__v4si)(__m128i)(C), (int)(imm), \
5482                                          (__mmask8)(U), \
5483                                          _MM_FROUND_CUR_DIRECTION)
5484
5485#define _mm_getexp_round_sd(A, B, R) \
5486  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5487                                                 (__v2df)(__m128d)(B), \
5488                                                 (__v2df)_mm_setzero_pd(), \
5489                                                 (__mmask8)-1, (int)(R))
5490
5491
5492static __inline__ __m128d __DEFAULT_FN_ATTRS128
5493_mm_getexp_sd (__m128d __A__m128d __B)
5494{
5495  return (__m128d__builtin_ia32_getexpsd128_round_mask ((__v2df__A,
5496                 (__v2df__B, (__v2df_mm_setzero_pd(), (__mmask8) -1_MM_FROUND_CUR_DIRECTION);
5497}
5498
5499static __inline__ __m128d __DEFAULT_FN_ATTRS128
5500_mm_mask_getexp_sd (__m128d __W__mmask8 __U__m128d __A__m128d __B)
5501{
5502 return (__m128d__builtin_ia32_getexpsd128_round_mask ( (__v2df__A,
5503          (__v2df__B,
5504          (__v2df__W,
5505          (__mmask8__U,
5506          _MM_FROUND_CUR_DIRECTION);
5507}
5508
5509#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5510  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5511                                                 (__v2df)(__m128d)(B), \
5512                                                 (__v2df)(__m128d)(W), \
5513                                                 (__mmask8)(U), (int)(R))
5514
5515static __inline__ __m128d __DEFAULT_FN_ATTRS128
5516_mm_maskz_getexp_sd (__mmask8 __U__m128d __A__m128d __B)
5517{
5518 return (__m128d__builtin_ia32_getexpsd128_round_mask ( (__v2df__A,
5519          (__v2df__B,
5520          (__v2df_mm_setzero_pd (),
5521          (__mmask8__U,
5522          _MM_FROUND_CUR_DIRECTION);
5523}
5524
5525#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5526  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5527                                                 (__v2df)(__m128d)(B), \
5528                                                 (__v2df)_mm_setzero_pd(), \
5529                                                 (__mmask8)(U), (int)(R))
5530
5531#define _mm_getexp_round_ss(A, B, R) \
5532  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5533                                                (__v4sf)(__m128)(B), \
5534                                                (__v4sf)_mm_setzero_ps(), \
5535                                                (__mmask8)-1, (int)(R))
5536
5537static __inline__ __m128 __DEFAULT_FN_ATTRS128
5538_mm_getexp_ss (__m128 __A__m128 __B)
5539{
5540  return (__m128__builtin_ia32_getexpss128_round_mask ((__v4sf__A,
5541                (__v4sf__B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1_MM_FROUND_CUR_DIRECTION);
5542}
5543
5544static __inline__ __m128 __DEFAULT_FN_ATTRS128
5545_mm_mask_getexp_ss (__m128 __W__mmask8 __U__m128 __A__m128 __B)
5546{
5547 return (__m128__builtin_ia32_getexpss128_round_mask ((__v4sf__A,
5548          (__v4sf__B,
5549          (__v4sf__W,
5550          (__mmask8__U,
5551          _MM_FROUND_CUR_DIRECTION);
5552}
5553
5554#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5555  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5556                                                (__v4sf)(__m128)(B), \
5557                                                (__v4sf)(__m128)(W), \
5558                                                (__mmask8)(U), (int)(R))
5559
5560static __inline__ __m128 __DEFAULT_FN_ATTRS128
5561_mm_maskz_getexp_ss (__mmask8 __U__m128 __A__m128 __B)
5562{
5563 return (__m128__builtin_ia32_getexpss128_round_mask ((__v4sf__A,
5564          (__v4sf__B,
5565          (__v4sf_mm_setzero_ps (),
5566          (__mmask8__U,
5567          _MM_FROUND_CUR_DIRECTION);
5568}
5569
5570#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5571  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5572                                                (__v4sf)(__m128)(B), \
5573                                                (__v4sf)_mm_setzero_ps(), \
5574                                                (__mmask8)(U), (int)(R))
5575
5576#define _mm_getmant_round_sd(A, B, C, D, R) \
5577  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5578                                               (__v2df)(__m128d)(B), \
5579                                               (int)(((D)<<2) | (C)), \
5580                                               (__v2df)_mm_setzero_pd(), \
5581                                               (__mmask8)-1, (int)(R))
5582
5583#define _mm_getmant_sd(A, B, C, D)  \
5584  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5585                                               (__v2df)(__m128d)(B), \
5586                                               (int)(((D)<<2) | (C)), \
5587                                               (__v2df)_mm_setzero_pd(), \
5588                                               (__mmask8)-1, \
5589                                               _MM_FROUND_CUR_DIRECTION)
5590
5591#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5592  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5593                                               (__v2df)(__m128d)(B), \
5594                                               (int)(((D)<<2) | (C)), \
5595                                               (__v2df)(__m128d)(W), \
5596                                               (__mmask8)(U), \
5597                                               _MM_FROUND_CUR_DIRECTION)
5598
5599#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5600  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5601                                               (__v2df)(__m128d)(B), \
5602                                               (int)(((D)<<2) | (C)), \
5603                                               (__v2df)(__m128d)(W), \
5604                                               (__mmask8)(U), (int)(R))
5605
5606#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5607  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5608                                               (__v2df)(__m128d)(B), \
5609                                               (int)(((D)<<2) | (C)), \
5610                                               (__v2df)_mm_setzero_pd(), \
5611                                               (__mmask8)(U), \
5612                                               _MM_FROUND_CUR_DIRECTION)
5613
5614#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5615  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5616                                               (__v2df)(__m128d)(B), \
5617                                               (int)(((D)<<2) | (C)), \
5618                                               (__v2df)_mm_setzero_pd(), \
5619                                               (__mmask8)(U), (int)(R))
5620
5621#define _mm_getmant_round_ss(A, B, C, D, R) \
5622  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5623                                              (__v4sf)(__m128)(B), \
5624                                              (int)(((D)<<2) | (C)), \
5625                                              (__v4sf)_mm_setzero_ps(), \
5626                                              (__mmask8)-1, (int)(R))
5627
5628#define _mm_getmant_ss(A, B, C, D) \
5629  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5630                                              (__v4sf)(__m128)(B), \
5631                                              (int)(((D)<<2) | (C)), \
5632                                              (__v4sf)_mm_setzero_ps(), \
5633                                              (__mmask8)-1, \
5634                                              _MM_FROUND_CUR_DIRECTION)
5635
5636#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5637  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5638                                              (__v4sf)(__m128)(B), \
5639                                              (int)(((D)<<2) | (C)), \
5640                                              (__v4sf)(__m128)(W), \
5641                                              (__mmask8)(U), \
5642                                              _MM_FROUND_CUR_DIRECTION)
5643
5644#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5645  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5646                                              (__v4sf)(__m128)(B), \
5647                                              (int)(((D)<<2) | (C)), \
5648                                              (__v4sf)(__m128)(W), \
5649                                              (__mmask8)(U), (int)(R))
5650
5651#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5652  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5653                                              (__v4sf)(__m128)(B), \
5654                                              (int)(((D)<<2) | (C)), \
5655                                              (__v4sf)_mm_setzero_ps(), \
5656                                              (__mmask8)(U), \
5657                                              _MM_FROUND_CUR_DIRECTION)
5658
5659#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5660  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5661                                              (__v4sf)(__m128)(B), \
5662                                              (int)(((D)<<2) | (C)), \
5663                                              (__v4sf)_mm_setzero_ps(), \
5664                                              (__mmask8)(U), (int)(R))
5665
5666static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5667_mm512_kmov (__mmask16 __A)
5668{
5669  return  __A;
5670}
5671
5672#define _mm_comi_round_sd(A, B, P, R) \
5673  (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5674                              (int)(P), (int)(R))
5675
5676#define _mm_comi_round_ss(A, B, P, R) \
5677  (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5678                              (int)(P), (int)(R))
5679
5680#ifdef __x86_64__
5681#define _mm_cvt_roundsd_si64(A, R) \
5682  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
5683#endif
5684
5685static __inline__ __m512i __DEFAULT_FN_ATTRS512
5686_mm512_sll_epi32(__m512i __A__m128i __B)
5687{
5688  return (__m512i)__builtin_ia32_pslld512((__v16si__A, (__v4si)__B);
5689}
5690
5691static __inline__ __m512i __DEFAULT_FN_ATTRS512
5692_mm512_mask_sll_epi32(__m512i __W__mmask16 __U__m512i __A__m128i __B)
5693{
5694  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5695                                          (__v16si)_mm512_sll_epi32(__A__B),
5696                                          (__v16si)__W);
5697}
5698
5699static __inline__ __m512i __DEFAULT_FN_ATTRS512
5700_mm512_maskz_sll_epi32(__mmask16 __U__m512i __A__m128i __B)
5701{
5702  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5703                                          (__v16si)_mm512_sll_epi32(__A__B),
5704                                          (__v16si)_mm512_setzero_si512());
5705}
5706
5707static __inline__ __m512i __DEFAULT_FN_ATTRS512
5708_mm512_sll_epi64(__m512i __A__m128i __B)
5709{
5710  return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5711}
5712
5713static __inline__ __m512i __DEFAULT_FN_ATTRS512
5714_mm512_mask_sll_epi64(__m512i __W__mmask8 __U__m512i __A__m128i __B)
5715{
5716  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5717                                             (__v8di)_mm512_sll_epi64(__A__B),
5718                                             (__v8di)__W);
5719}
5720
5721static __inline__ __m512i __DEFAULT_FN_ATTRS512
5722_mm512_maskz_sll_epi64(__mmask8 __U__m512i __A__m128i __B)
5723{
5724  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5725                                           (__v8di)_mm512_sll_epi64(__A__B),
5726                                           (__v8di)_mm512_setzero_si512());
5727}
5728
5729static __inline__ __m512i __DEFAULT_FN_ATTRS512
5730_mm512_sllv_epi32(__m512i __X__m512i __Y)
5731{
5732  return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5733}
5734
5735static __inline__ __m512i __DEFAULT_FN_ATTRS512
5736_mm512_mask_sllv_epi32(__m512i __W__mmask16 __U__m512i __X__m512i __Y)
5737{
5738  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5739                                           (__v16si)_mm512_sllv_epi32(__X__Y),
5740                                           (__v16si)__W);
5741}
5742
5743static __inline__ __m512i __DEFAULT_FN_ATTRS512
5744_mm512_maskz_sllv_epi32(__mmask16 __U__m512i __X__m512i __Y)
5745{
5746  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5747                                           (__v16si)_mm512_sllv_epi32(__X__Y),
5748                                           (__v16si)_mm512_setzero_si512());
5749}
5750
5751static __inline__ __m512i __DEFAULT_FN_ATTRS512
5752_mm512_sllv_epi64(__m512i __X__m512i __Y)
5753{
5754  return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5755}
5756
5757static __inline__ __m512i __DEFAULT_FN_ATTRS512
5758_mm512_mask_sllv_epi64(__m512i __W__mmask8 __U__m512i __X__m512i __Y)
5759{
5760  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5761                                            (__v8di)_mm512_sllv_epi64(__X__Y),
5762                                            (__v8di)__W);
5763}
5764
5765static __inline__ __m512i __DEFAULT_FN_ATTRS512
5766_mm512_maskz_sllv_epi64(__mmask8 __U__m512i __X__m512i __Y)
5767{
5768  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5769                                            (__v8di)_mm512_sllv_epi64(__X__Y),
5770                                            (__v8di)_mm512_setzero_si512());
5771}
5772
5773static __inline__ __m512i __DEFAULT_FN_ATTRS512
5774_mm512_sra_epi32(__m512i __A__m128i __B)
5775{
5776  return (__m512i)__builtin_ia32_psrad512((__v16si__A, (__v4si)__B);
5777}
5778
5779static __inline__ __m512i __DEFAULT_FN_ATTRS512
5780_mm512_mask_sra_epi32(__m512i __W__mmask16 __U__m512i __A__m128i __B)
5781{
5782  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5783                                          (__v16si)_mm512_sra_epi32(__A__B),
5784                                          (__v16si)__W);
5785}
5786
5787static __inline__ __m512i __DEFAULT_FN_ATTRS512
5788_mm512_maskz_sra_epi32(__mmask16 __U__m512i __A__m128i __B)
5789{
5790  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5791                                          (__v16si)_mm512_sra_epi32(__A__B),
5792                                          (__v16si)_mm512_setzero_si512());
5793}
5794
5795static __inline__ __m512i __DEFAULT_FN_ATTRS512
5796_mm512_sra_epi64(__m512i __A__m128i __B)
5797{
5798  return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5799}
5800
5801static __inline__ __m512i __DEFAULT_FN_ATTRS512
5802_mm512_mask_sra_epi64(__m512i __W__mmask8 __U__m512i __A__m128i __B)
5803{
5804  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5805                                           (__v8di)_mm512_sra_epi64(__A__B),
5806                                           (__v8di)__W);
5807}
5808
5809static __inline__ __m512i __DEFAULT_FN_ATTRS512
5810_mm512_maskz_sra_epi64(__mmask8 __U__m512i __A__m128i __B)
5811{
5812  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5813                                           (__v8di)_mm512_sra_epi64(__A__B),
5814                                           (__v8di)_mm512_setzero_si512());
5815}
5816
5817static __inline__ __m512i __DEFAULT_FN_ATTRS512
5818_mm512_srav_epi32(__m512i __X__m512i __Y)
5819{
5820  return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5821}
5822
5823static __inline__ __m512i __DEFAULT_FN_ATTRS512
5824_mm512_mask_srav_epi32(__m512i __W__mmask16 __U__m512i __X__m512i __Y)
5825{
5826  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5827                                           (__v16si)_mm512_srav_epi32(__X__Y),
5828                                           (__v16si)__W);
5829}
5830
5831static __inline__ __m512i __DEFAULT_FN_ATTRS512
5832_mm512_maskz_srav_epi32(__mmask16 __U__m512i __X__m512i __Y)
5833{
5834  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5835                                           (__v16si)_mm512_srav_epi32(__X__Y),
5836                                           (__v16si)_mm512_setzero_si512());
5837}
5838
5839static __inline__ __m512i __DEFAULT_FN_ATTRS512
5840_mm512_srav_epi64(__m512i __X__m512i __Y)
5841{
5842  return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5843}
5844
5845static __inline__ __m512i __DEFAULT_FN_ATTRS512
5846_mm512_mask_srav_epi64(__m512i __W__mmask8 __U__m512i __X__m512i __Y)
5847{
5848  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5849                                            (__v8di)_mm512_srav_epi64(__X__Y),
5850                                            (__v8di)__W);
5851}
5852
5853static __inline__ __m512i __DEFAULT_FN_ATTRS512
5854_mm512_maskz_srav_epi64(__mmask8 __U__m512i __X__m512i __Y)
5855{
5856  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5857                                            (__v8di)_mm512_srav_epi64(__X__Y),
5858                                            (__v8di)_mm512_setzero_si512());
5859}
5860
5861static __inline__ __m512i __DEFAULT_FN_ATTRS512
5862_mm512_srl_epi32(__m512i __A__m128i __B)
5863{
5864  return (__m512i)__builtin_ia32_psrld512((__v16si__A, (__v4si)__B);
5865}
5866
5867static __inline__ __m512i __DEFAULT_FN_ATTRS512
5868_mm512_mask_srl_epi32(__m512i __W__mmask16 __U__m512i __A__m128i __B)
5869{
5870  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5871                                          (__v16si)_mm512_srl_epi32(__A__B),
5872                                          (__v16si)__W);
5873}
5874
5875static __inline__ __m512i __DEFAULT_FN_ATTRS512
5876_mm512_maskz_srl_epi32(__mmask16 __U__m512i __A__m128i __B)
5877{
5878  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5879                                          (__v16si)_mm512_srl_epi32(__A__B),
5880                                          (__v16si)_mm512_setzero_si512());
5881}
5882
5883static __inline__ __m512i __DEFAULT_FN_ATTRS512
5884_mm512_srl_epi64(__m512i __A__m128i __B)
5885{
5886  return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5887}
5888
5889static __inline__ __m512i __DEFAULT_FN_ATTRS512
5890_mm512_mask_srl_epi64(__m512i __W__mmask8 __U__m512i __A__m128i __B)
5891{
5892  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5893                                           (__v8di)_mm512_srl_epi64(__A__B),
5894                                           (__v8di)__W);
5895}
5896
5897static __inline__ __m512i __DEFAULT_FN_ATTRS512
5898_mm512_maskz_srl_epi64(__mmask8 __U__m512i __A__m128i __B)
5899{
5900  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5901                                           (__v8di)_mm512_srl_epi64(__A__B),
5902                                           (__v8di)_mm512_setzero_si512());
5903}
5904
5905static __inline__ __m512i __DEFAULT_FN_ATTRS512
5906_mm512_srlv_epi32(__m512i __X__m512i __Y)
5907{
5908  return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5909}
5910
5911static __inline__ __m512i __DEFAULT_FN_ATTRS512
5912_mm512_mask_srlv_epi32(__m512i __W__mmask16 __U__m512i __X__m512i __Y)
5913{
5914  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5915                                           (__v16si)_mm512_srlv_epi32(__X__Y),
5916                                           (__v16si)__W);
5917}
5918
5919static __inline__ __m512i __DEFAULT_FN_ATTRS512
5920_mm512_maskz_srlv_epi32(__mmask16 __U__m512i __X__m512i __Y)
5921{
5922  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5923                                           (__v16si)_mm512_srlv_epi32(__X__Y),
5924                                           (__v16si)_mm512_setzero_si512());
5925}
5926
5927static __inline__ __m512i __DEFAULT_FN_ATTRS512
5928_mm512_srlv_epi64 (__m512i __X__m512i __Y)
5929{
5930  return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5931}
5932
5933static __inline__ __m512i __DEFAULT_FN_ATTRS512
5934_mm512_mask_srlv_epi64(__m512i __W__mmask8 __U__m512i __X__m512i __Y)
5935{
5936  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5937                                            (__v8di)_mm512_srlv_epi64(__X__Y),
5938                                            (__v8di)__W);
5939}
5940
5941static __inline__ __m512i __DEFAULT_FN_ATTRS512
5942_mm512_maskz_srlv_epi64(__mmask8 __U__m512i __X__m512i __Y)
5943{
5944  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5945                                            (__v8di)_mm512_srlv_epi64(__X__Y),
5946                                            (__v8di)_mm512_setzero_si512());
5947}
5948
5949#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5950  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5951                                            (__v16si)(__m512i)(B), \
5952                                            (__v16si)(__m512i)(C), (int)(imm), \
5953                                            (__mmask16)-1)
5954
5955#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5956  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5957                                            (__v16si)(__m512i)(B), \
5958                                            (__v16si)(__m512i)(C), (int)(imm), \
5959                                            (__mmask16)(U))
5960
5961#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5962  (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
5963                                             (__v16si)(__m512i)(B), \
5964                                             (__v16si)(__m512i)(C), \
5965                                             (int)(imm), (__mmask16)(U))
5966
5967#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5968  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5969                                            (__v8di)(__m512i)(B), \
5970                                            (__v8di)(__m512i)(C), (int)(imm), \
5971                                            (__mmask8)-1)
5972
5973#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5974  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5975                                            (__v8di)(__m512i)(B), \
5976                                            (__v8di)(__m512i)(C), (int)(imm), \
5977                                            (__mmask8)(U))
5978
5979#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5980  (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
5981                                             (__v8di)(__m512i)(B), \
5982                                             (__v8di)(__m512i)(C), (int)(imm), \
5983                                             (__mmask8)(U))
5984
5985#ifdef __x86_64__
5986#define _mm_cvt_roundsd_i64(A, R) \
5987  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
5988#endif
5989
5990#define _mm_cvt_roundsd_si32(A, R) \
5991  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
5992
5993#define _mm_cvt_roundsd_i32(A, R) \
5994  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
5995
5996#define _mm_cvt_roundsd_u32(A, R) \
5997  (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))
5998
5999static __inline__ unsigned __DEFAULT_FN_ATTRS128
6000_mm_cvtsd_u32 (__m128d __A)
6001{
6002  return (unsigned__builtin_ia32_vcvtsd2usi32 ((__v2df__A,
6003             _MM_FROUND_CUR_DIRECTION);
6004}
6005
6006#ifdef __x86_64__
6007#define _mm_cvt_roundsd_u64(A, R) \
6008  (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6009                                                  (int)(R))
6010
6011static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6012_mm_cvtsd_u64 (__m128d __A)
6013{
6014  return (unsigned long long__builtin_ia32_vcvtsd2usi64 ((__v2df)
6015                 __A,
6016                 _MM_FROUND_CUR_DIRECTION);
6017}
6018#endif
6019
6020#define _mm_cvt_roundss_si32(A, R) \
6021  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
6022
6023#define _mm_cvt_roundss_i32(A, R) \
6024  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
6025
6026#ifdef __x86_64__
6027#define _mm_cvt_roundss_si64(A, R) \
6028  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
6029
6030#define _mm_cvt_roundss_i64(A, R) \
6031  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
6032#endif
6033
6034#define _mm_cvt_roundss_u32(A, R) \
6035  (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))
6036
6037static __inline__ unsigned __DEFAULT_FN_ATTRS128
6038_mm_cvtss_u32 (__m128 __A)
6039{
6040  return (unsigned__builtin_ia32_vcvtss2usi32 ((__v4sf__A,
6041             _MM_FROUND_CUR_DIRECTION);
6042}
6043
6044#ifdef __x86_64__
6045#define _mm_cvt_roundss_u64(A, R) \
6046  (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6047                                                  (int)(R))
6048
6049static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6050_mm_cvtss_u64 (__m128 __A)
6051{
6052  return (unsigned long long__builtin_ia32_vcvtss2usi64 ((__v4sf)
6053                 __A,
6054                 _MM_FROUND_CUR_DIRECTION);
6055}
6056#endif
6057
6058#define _mm_cvtt_roundsd_i32(A, R) \
6059  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
6060
6061#define _mm_cvtt_roundsd_si32(A, R) \
6062  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
6063
6064static __inline__ int __DEFAULT_FN_ATTRS128
6065_mm_cvttsd_i32 (__m128d __A)
6066{
6067  return (int__builtin_ia32_vcvttsd2si32 ((__v2df__A,
6068              _MM_FROUND_CUR_DIRECTION);
6069}
6070
6071#ifdef __x86_64__
6072#define _mm_cvtt_roundsd_si64(A, R) \
6073  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
6074
6075#define _mm_cvtt_roundsd_i64(A, R) \
6076  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
6077
6078static __inline__ long long __DEFAULT_FN_ATTRS128
6079_mm_cvttsd_i64 (__m128d __A)
6080{
6081  return (long long__builtin_ia32_vcvttsd2si64 ((__v2df__A,
6082              _MM_FROUND_CUR_DIRECTION);
6083}
6084#endif
6085
6086#define _mm_cvtt_roundsd_u32(A, R) \
6087  (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))
6088
6089static __inline__ unsigned __DEFAULT_FN_ATTRS128
6090_mm_cvttsd_u32 (__m128d __A)
6091{
6092  return (unsigned__builtin_ia32_vcvttsd2usi32 ((__v2df__A,
6093              _MM_FROUND_CUR_DIRECTION);
6094}
6095
6096#ifdef __x86_64__
6097#define _mm_cvtt_roundsd_u64(A, R) \
6098  (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6099                                                   (int)(R))
6100
6101static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6102_mm_cvttsd_u64 (__m128d __A)
6103{
6104  return (unsigned long long__builtin_ia32_vcvttsd2usi64 ((__v2df)
6105                  __A,
6106                  _MM_FROUND_CUR_DIRECTION);
6107}
6108#endif
6109
6110#define _mm_cvtt_roundss_i32(A, R) \
6111  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
6112
6113#define _mm_cvtt_roundss_si32(A, R) \
6114  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
6115
6116static __inline__ int __DEFAULT_FN_ATTRS128
6117_mm_cvttss_i32 (__m128 __A)
6118{
6119  return (int__builtin_ia32_vcvttss2si32 ((__v4sf__A,
6120              _MM_FROUND_CUR_DIRECTION);
6121}
6122
6123#ifdef __x86_64__
6124#define _mm_cvtt_roundss_i64(A, R) \
6125  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
6126
6127#define _mm_cvtt_roundss_si64(A, R) \
6128  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
6129
6130static __inline__ long long __DEFAULT_FN_ATTRS128
6131_mm_cvttss_i64 (__m128 __A)
6132{
6133  return (long long__builtin_ia32_vcvttss2si64 ((__v4sf__A,
6134              _MM_FROUND_CUR_DIRECTION);
6135}
6136#endif
6137
6138#define _mm_cvtt_roundss_u32(A, R) \
6139  (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))
6140
6141static __inline__ unsigned __DEFAULT_FN_ATTRS128
6142_mm_cvttss_u32 (__m128 __A)
6143{
6144  return (unsigned__builtin_ia32_vcvttss2usi32 ((__v4sf__A,
6145              _MM_FROUND_CUR_DIRECTION);
6146}
6147
6148#ifdef __x86_64__
6149#define _mm_cvtt_roundss_u64(A, R) \
6150  (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6151                                                   (int)(R))
6152
6153static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6154_mm_cvttss_u64 (__m128 __A)
6155{
6156  return (unsigned long long__builtin_ia32_vcvttss2usi64 ((__v4sf)
6157                  __A,
6158                  _MM_FROUND_CUR_DIRECTION);
6159}
6160#endif
6161
6162#define _mm512_permute_pd(X, C) \
6163  (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C))
6164
6165#define _mm512_mask_permute_pd(W, U, X, C) \
6166  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6167                                       (__v8df)_mm512_permute_pd((X), (C)), \
6168                                       (__v8df)(__m512d)(W))
6169
6170#define _mm512_maskz_permute_pd(U, X, C) \
6171  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6172                                       (__v8df)_mm512_permute_pd((X), (C)), \
6173                                       (__v8df)_mm512_setzero_pd())
6174
6175#define _mm512_permute_ps(X, C) \
6176  (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))
6177
6178#define _mm512_mask_permute_ps(W, U, X, C) \
6179  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6180                                      (__v16sf)_mm512_permute_ps((X), (C)), \
6181                                      (__v16sf)(__m512)(W))
6182
6183#define _mm512_maskz_permute_ps(U, X, C) \
6184  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6185                                      (__v16sf)_mm512_permute_ps((X), (C)), \
6186                                      (__v16sf)_mm512_setzero_ps())
6187
6188static __inline__ __m512d __DEFAULT_FN_ATTRS512
6189_mm512_permutevar_pd(__m512d __A__m512i __C)
6190{
6191  return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6192}
6193
6194static __inline__ __m512d __DEFAULT_FN_ATTRS512
6195_mm512_mask_permutevar_pd(__m512d __W__mmask8 __U__m512d __A__m512i __C)
6196{
6197  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6198                                         (__v8df)_mm512_permutevar_pd(__A__C),
6199                                         (__v8df)__W);
6200}
6201
6202static __inline__ __m512d __DEFAULT_FN_ATTRS512
6203_mm512_maskz_permutevar_pd(__mmask8 __U__m512d __A__m512i __C)
6204{
6205  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6206                                         (__v8df)_mm512_permutevar_pd(__A__C),
6207                                         (__v8df)_mm512_setzero_pd());
6208}
6209
6210static __inline__ __m512 __DEFAULT_FN_ATTRS512
6211_mm512_permutevar_ps(__m512 __A__m512i __C)
6212{
6213  return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6214}
6215
6216static __inline__ __m512 __DEFAULT_FN_ATTRS512
6217_mm512_mask_permutevar_ps(__m512 __W__mmask16 __U__m512 __A__m512i __C)
6218{
6219  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6220                                        (__v16sf)_mm512_permutevar_ps(__A__C),
6221                                        (__v16sf)__W);
6222}
6223
6224static __inline__ __m512 __DEFAULT_FN_ATTRS512
6225_mm512_maskz_permutevar_ps(__mmask16 __U__m512 __A__m512i __C)
6226{
6227  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6228                                        (__v16sf)_mm512_permutevar_ps(__A__C),
6229                                        (__v16sf)_mm512_setzero_ps());
6230}
6231
6232static __inline __m512d __DEFAULT_FN_ATTRS512
6233_mm512_permutex2var_pd(__m512d __A__m512i __I__m512d __B)
6234{
6235  return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6236                                                 (__v8df)__B);
6237}
6238
6239static __inline__ __m512d __DEFAULT_FN_ATTRS512
6240_mm512_mask_permutex2var_pd(__m512d __A__mmask8 __U__m512i __I__m512d __B)
6241{
6242  return (__m512d)__builtin_ia32_selectpd_512(__U,
6243                                  (__v8df)_mm512_permutex2var_pd(__A__I__B),
6244                                  (__v8df)__A);
6245}
6246
6247static __inline__ __m512d __DEFAULT_FN_ATTRS512
6248_mm512_mask2_permutex2var_pd(__m512d __A__m512i __I__mmask8 __U,
6249                             __m512d __B)
6250{
6251  return (__m512d)__builtin_ia32_selectpd_512(__U,
6252                                  (__v8df)_mm512_permutex2var_pd(__A__I__B),
6253                                  (__v8df)(__m512d)__I);
6254}
6255
6256static __inline__ __m512d __DEFAULT_FN_ATTRS512
6257_mm512_maskz_permutex2var_pd(__mmask8 __U__m512d __A__m512i __I,
6258                             __m512d __B)
6259{
6260  return (__m512d)__builtin_ia32_selectpd_512(__U,
6261                                  (__v8df)_mm512_permutex2var_pd(__A__I__B),
6262                                  (__v8df)_mm512_setzero_pd());
6263}
6264
6265static __inline __m512 __DEFAULT_FN_ATTRS512
6266_mm512_permutex2var_ps(__m512 __A__m512i __I__m512 __B)
6267{
6268  return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6269                                                (__v16sf__B);
6270}
6271
6272static __inline__ __m512 __DEFAULT_FN_ATTRS512
6273_mm512_mask_permutex2var_ps(__m512 __A__mmask16 __U__m512i __I__m512 __B)
6274{
6275  return (__m512)__builtin_ia32_selectps_512(__U,
6276                                 (__v16sf)_mm512_permutex2var_ps(__A__I__B),
6277                                 (__v16sf)__A);
6278}
6279
6280static __inline__ __m512 __DEFAULT_FN_ATTRS512
6281_mm512_mask2_permutex2var_ps(__m512 __A__m512i __I__mmask16 __U__m512 __B)
6282{
6283  return (__m512)__builtin_ia32_selectps_512(__U,
6284                                 (__v16sf)_mm512_permutex2var_ps(__A__I__B),
6285                                 (__v16sf)(__m512)__I);
6286}
6287
6288static __inline__ __m512 __DEFAULT_FN_ATTRS512
6289_mm512_maskz_permutex2var_ps(__mmask16 __U__m512 __A__m512i __I__m512 __B)
6290{
6291  return (__m512)__builtin_ia32_selectps_512(__U,
6292                                 (__v16sf)_mm512_permutex2var_ps(__A__I__B),
6293                                 (__v16sf)_mm512_setzero_ps());
6294}
6295
6296
6297#define _mm512_cvtt_roundpd_epu32(A, R) \
6298  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6299                                             (__v8si)_mm256_undefined_si256(), \
6300                                             (__mmask8)-1, (int)(R))
6301
6302#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6303  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6304                                             (__v8si)(__m256i)(W), \
6305                                             (__mmask8)(U), (int)(R))
6306
6307#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6308  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6309                                             (__v8si)_mm256_setzero_si256(), \
6310                                             (__mmask8)(U), (int)(R))
6311
6312static __inline__ __m256i __DEFAULT_FN_ATTRS512
6313_mm512_cvttpd_epu32 (__m512d __A)
6314{
6315  return (__m256i__builtin_ia32_cvttpd2udq512_mask ((__v8df__A,
6316                  (__v8si)
6317                  _mm256_undefined_si256 (),
6318                  (__mmask8) -1,
6319                  _MM_FROUND_CUR_DIRECTION);
6320}
6321
6322static __inline__ __m256i __DEFAULT_FN_ATTRS512
6323_mm512_mask_cvttpd_epu32 (__m256i __W__mmask8 __U__m512d __A)
6324{
6325  return (__m256i__builtin_ia32_cvttpd2udq512_mask ((__v8df__A,
6326                  (__v8si__W,
6327                  (__mmask8__U,
6328                  _MM_FROUND_CUR_DIRECTION);
6329}
6330
6331static __inline__ __m256i __DEFAULT_FN_ATTRS512
6332_mm512_maskz_cvttpd_epu32 (__mmask8 __U__m512d __A)
6333{
6334  return (__m256i__builtin_ia32_cvttpd2udq512_mask ((__v8df__A,
6335                  (__v8si)
6336                  _mm256_setzero_si256 (),
6337                  (__mmask8__U,
6338                  _MM_FROUND_CUR_DIRECTION);
6339}
6340
6341#define _mm_roundscale_round_sd(A, B, imm, R) \
6342  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6343                                                (__v2df)(__m128d)(B), \
6344                                                (__v2df)_mm_setzero_pd(), \
6345                                                (__mmask8)-1, (int)(imm), \
6346                                                (int)(R))
6347
6348#define _mm_roundscale_sd(A, B, imm) \
6349  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6350                                                (__v2df)(__m128d)(B), \
6351                                                (__v2df)_mm_setzero_pd(), \
6352                                                (__mmask8)-1, (int)(imm), \
6353                                                _MM_FROUND_CUR_DIRECTION)
6354
6355#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6356  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6357                                                (__v2df)(__m128d)(B), \
6358                                                (__v2df)(__m128d)(W), \
6359                                                (__mmask8)(U), (int)(imm), \
6360                                                _MM_FROUND_CUR_DIRECTION)
6361
6362#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6363  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6364                                                (__v2df)(__m128d)(B), \
6365                                                (__v2df)(__m128d)(W), \
6366                                                (__mmask8)(U), (int)(I), \
6367                                                (int)(R))
6368
6369#define _mm_maskz_roundscale_sd(U, A, B, I) \
6370  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6371                                                (__v2df)(__m128d)(B), \
6372                                                (__v2df)_mm_setzero_pd(), \
6373                                                (__mmask8)(U), (int)(I), \
6374                                                _MM_FROUND_CUR_DIRECTION)
6375
6376#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6377  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6378                                                (__v2df)(__m128d)(B), \
6379                                                (__v2df)_mm_setzero_pd(), \
6380                                                (__mmask8)(U), (int)(I), \
6381                                                (int)(R))
6382
6383#define _mm_roundscale_round_ss(A, B, imm, R) \
6384  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6385                                               (__v4sf)(__m128)(B), \
6386                                               (__v4sf)_mm_setzero_ps(), \
6387                                               (__mmask8)-1, (int)(imm), \
6388                                               (int)(R))
6389
6390#define _mm_roundscale_ss(A, B, imm) \
6391  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6392                                               (__v4sf)(__m128)(B), \
6393                                               (__v4sf)_mm_setzero_ps(), \
6394                                               (__mmask8)-1, (int)(imm), \
6395                                               _MM_FROUND_CUR_DIRECTION)
6396
6397#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6398  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6399                                               (__v4sf)(__m128)(B), \
6400                                               (__v4sf)(__m128)(W), \
6401                                               (__mmask8)(U), (int)(I), \
6402                                               _MM_FROUND_CUR_DIRECTION)
6403
6404#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6405  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6406                                               (__v4sf)(__m128)(B), \
6407                                               (__v4sf)(__m128)(W), \
6408                                               (__mmask8)(U), (int)(I), \
6409                                               (int)(R))
6410
6411#define _mm_maskz_roundscale_ss(U, A, B, I) \
6412  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6413                                               (__v4sf)(__m128)(B), \
6414                                               (__v4sf)_mm_setzero_ps(), \
6415                                               (__mmask8)(U), (int)(I), \
6416                                               _MM_FROUND_CUR_DIRECTION)
6417
6418#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6419  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6420                                               (__v4sf)(__m128)(B), \
6421                                               (__v4sf)_mm_setzero_ps(), \
6422                                               (__mmask8)(U), (int)(I), \
6423                                               (int)(R))
6424
6425#define _mm512_scalef_round_pd(A, B, R) \
6426  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6427                                           (__v8df)(__m512d)(B), \
6428                                           (__v8df)_mm512_undefined_pd(), \
6429                                           (__mmask8)-1, (int)(R))
6430
6431#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6432  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6433                                           (__v8df)(__m512d)(B), \
6434                                           (__v8df)(__m512d)(W), \
6435                                           (__mmask8)(U), (int)(R))
6436
6437#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6438  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6439                                           (__v8df)(__m512d)(B), \
6440                                           (__v8df)_mm512_setzero_pd(), \
6441                                           (__mmask8)(U), (int)(R))
6442
6443static __inline__ __m512d __DEFAULT_FN_ATTRS512
6444_mm512_scalef_pd (__m512d __A__m512d __B)
6445{
6446  return (__m512d__builtin_ia32_scalefpd512_mask ((__v8df__A,
6447                (__v8df__B,
6448                (__v8df)
6449                _mm512_undefined_pd (),
6450                (__mmask8) -1,
6451                _MM_FROUND_CUR_DIRECTION);
6452}
6453
6454static __inline__ __m512d __DEFAULT_FN_ATTRS512
6455_mm512_mask_scalef_pd (__m512d __W__mmask8 __U__m512d __A__m512d __B)
6456{
6457  return (__m512d__builtin_ia32_scalefpd512_mask ((__v8df__A,
6458                (__v8df__B,
6459                (__v8df__W,
6460                (__mmask8__U,
6461                _MM_FROUND_CUR_DIRECTION);
6462}
6463
6464static __inline__ __m512d __DEFAULT_FN_ATTRS512
6465_mm512_maskz_scalef_pd (__mmask8 __U__m512d __A__m512d __B)
6466{
6467  return (__m512d__builtin_ia32_scalefpd512_mask ((__v8df__A,
6468                (__v8df__B,
6469                (__v8df)
6470                _mm512_setzero_pd (),
6471                (__mmask8__U,
6472                _MM_FROUND_CUR_DIRECTION);
6473}
6474
6475#define _mm512_scalef_round_ps(A, B, R) \
6476  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6477                                          (__v16sf)(__m512)(B), \
6478                                          (__v16sf)_mm512_undefined_ps(), \
6479                                          (__mmask16)-1, (int)(R))
6480
6481#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6482  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6483                                          (__v16sf)(__m512)(B), \
6484                                          (__v16sf)(__m512)(W), \
6485                                          (__mmask16)(U), (int)(R))
6486
6487#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6488  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6489                                          (__v16sf)(__m512)(B), \
6490                                          (__v16sf)_mm512_setzero_ps(), \
6491                                          (__mmask16)(U), (int)(R))
6492
6493static __inline__ __m512 __DEFAULT_FN_ATTRS512
6494_mm512_scalef_ps (__m512 __A__m512 __B)
6495{
6496  return (__m512__builtin_ia32_scalefps512_mask ((__v16sf__A,
6497               (__v16sf__B,
6498               (__v16sf)
6499               _mm512_undefined_ps (),
6500               (__mmask16) -1,
6501               _MM_FROUND_CUR_DIRECTION);
6502}
6503
6504static __inline__ __m512 __DEFAULT_FN_ATTRS512
6505_mm512_mask_scalef_ps (__m512 __W__mmask16 __U__m512 __A__m512 __B)
6506{
6507  return (__m512__builtin_ia32_scalefps512_mask ((__v16sf__A,
6508               (__v16sf__B,
6509               (__v16sf__W,
6510               (__mmask16__U,
6511               _MM_FROUND_CUR_DIRECTION);
6512}
6513
6514static __inline__ __m512 __DEFAULT_FN_ATTRS512
6515_mm512_maskz_scalef_ps (__mmask16 __U__m512 __A__m512 __B)
6516{
6517  return (__m512__builtin_ia32_scalefps512_mask ((__v16sf__A,
6518               (__v16sf__B,
6519               (__v16sf)
6520               _mm512_setzero_ps (),
6521               (__mmask16__U,
6522               _MM_FROUND_CUR_DIRECTION);
6523}
6524
6525#define _mm_scalef_round_sd(A, B, R) \
6526  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6527                                              (__v2df)(__m128d)(B), \
6528                                              (__v2df)_mm_setzero_pd(), \
6529                                              (__mmask8)-1, (int)(R))
6530
6531static __inline__ __m128d __DEFAULT_FN_ATTRS128
6532_mm_scalef_sd (__m128d __A__m128d __B)
6533{
6534  return (__m128d__builtin_ia32_scalefsd_round_mask ((__v2df__A,
6535              (__v2df)( __B), (__v2df_mm_setzero_pd(),
6536              (__mmask8) -1,
6537              _MM_FROUND_CUR_DIRECTION);
6538}
6539
6540static __inline__ __m128d __DEFAULT_FN_ATTRS128
6541_mm_mask_scalef_sd (__m128d __W__mmask8 __U__m128d __A__m128d __B)
6542{
6543 return (__m128d__builtin_ia32_scalefsd_round_mask ( (__v2df__A,
6544                 (__v2df__B,
6545                (__v2df__W,
6546                (__mmask8__U,
6547                _MM_FROUND_CUR_DIRECTION);
6548}
6549
6550#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6551  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6552                                              (__v2df)(__m128d)(B), \
6553                                              (__v2df)(__m128d)(W), \
6554                                              (__mmask8)(U), (int)(R))
6555
6556static __inline__ __m128d __DEFAULT_FN_ATTRS128
6557_mm_maskz_scalef_sd (__mmask8 __U__m128d __A__m128d __B)
6558{
6559 return (__m128d__builtin_ia32_scalefsd_round_mask ( (__v2df__A,
6560                 (__v2df__B,
6561                (__v2df_mm_setzero_pd (),
6562                (__mmask8__U,
6563                _MM_FROUND_CUR_DIRECTION);
6564}
6565
6566#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6567  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6568                                              (__v2df)(__m128d)(B), \
6569                                              (__v2df)_mm_setzero_pd(), \
6570                                              (__mmask8)(U), (int)(R))
6571
6572#define _mm_scalef_round_ss(A, B, R) \
6573  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6574                                             (__v4sf)(__m128)(B), \
6575                                             (__v4sf)_mm_setzero_ps(), \
6576                                             (__mmask8)-1, (int)(R))
6577
6578static __inline__ __m128 __DEFAULT_FN_ATTRS128
6579_mm_scalef_ss (__m128 __A__m128 __B)
6580{
6581  return (__m128__builtin_ia32_scalefss_round_mask ((__v4sf__A,
6582             (__v4sf)( __B), (__v4sf_mm_setzero_ps(),
6583             (__mmask8) -1,
6584             _MM_FROUND_CUR_DIRECTION);
6585}
6586
6587static __inline__ __m128 __DEFAULT_FN_ATTRS128
6588_mm_mask_scalef_ss (__m128 __W__mmask8 __U__m128 __A__m128 __B)
6589{
6590 return (__m128__builtin_ia32_scalefss_round_mask ( (__v4sf__A,
6591                (__v4sf__B,
6592                (__v4sf__W,
6593                (__mmask8__U,
6594                _MM_FROUND_CUR_DIRECTION);
6595}
6596
6597#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6598  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6599                                             (__v4sf)(__m128)(B), \
6600                                             (__v4sf)(__m128)(W), \
6601                                             (__mmask8)(U), (int)(R))
6602
6603static __inline__ __m128 __DEFAULT_FN_ATTRS128
6604_mm_maskz_scalef_ss (__mmask8 __U__m128 __A__m128 __B)
6605{
6606 return (__m128__builtin_ia32_scalefss_round_mask ( (__v4sf__A,
6607                 (__v4sf__B,
6608                (__v4sf_mm_setzero_ps (),
6609                (__mmask8__U,
6610                _MM_FROUND_CUR_DIRECTION);
6611}
6612
6613#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6614  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6615                                             (__v4sf)(__m128)(B), \
6616                                             (__v4sf)_mm_setzero_ps(), \
6617                                             (__mmask8)(U), \
6618                                             (int)(R))
6619
6620static __inline__ __m512i __DEFAULT_FN_ATTRS512
6621_mm512_srai_epi32(__m512i __Aint __B)
6622{
6623  return (__m512i)__builtin_ia32_psradi512((__v16si)__A__B);
6624}
6625
6626static __inline__ __m512i __DEFAULT_FN_ATTRS512
6627_mm512_mask_srai_epi32(__m512i __W__mmask16 __U__m512i __Aint __B)
6628{
6629  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6630                                         (__v16si)_mm512_srai_epi32(__A__B),
6631                                         (__v16si)__W);
6632}
6633
6634static __inline__ __m512i __DEFAULT_FN_ATTRS512
6635_mm512_maskz_srai_epi32(__mmask16 __U__m512i __Aint __B) {
6636  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6637                                         (__v16si)_mm512_srai_epi32(__A__B),
6638                                         (__v16si)_mm512_setzero_si512());
6639}
6640
6641static __inline__ __m512i __DEFAULT_FN_ATTRS512
6642_mm512_srai_epi64(__m512i __Aint __B)
6643{
6644  return (__m512i)__builtin_ia32_psraqi512((__v8di)__A__B);
6645}
6646
6647static __inline__ __m512i __DEFAULT_FN_ATTRS512
6648_mm512_mask_srai_epi64(__m512i __W__mmask8 __U__m512i __Aint __B)
6649{
6650  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6651                                          (__v8di)_mm512_srai_epi64(__A__B),
6652                                          (__v8di)__W);
6653}
6654
6655static __inline__ __m512i __DEFAULT_FN_ATTRS512
6656_mm512_maskz_srai_epi64(__mmask8 __U__m512i __Aint __B)
6657{
6658  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6659                                          (__v8di)_mm512_srai_epi64(__A__B),
6660                                          (__v8di)_mm512_setzero_si512());
6661}
6662
6663#define _mm512_shuffle_f32x4(A, B, imm) \
6664  (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6665                                    (__v16sf)(__m512)(B), (int)(imm))
6666
6667#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6668  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6669                                      (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6670                                      (__v16sf)(__m512)(W))
6671
6672#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6673  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6674                                      (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6675                                      (__v16sf)_mm512_setzero_ps())
6676
6677#define _mm512_shuffle_f64x2(A, B, imm) \
6678  (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6679                                     (__v8df)(__m512d)(B), (int)(imm))
6680
6681#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6682  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6683                                       (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6684                                       (__v8df)(__m512d)(W))
6685
6686#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6687  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6688                                       (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6689                                       (__v8df)_mm512_setzero_pd())
6690
6691#define _mm512_shuffle_i32x4(A, B, imm) \
6692  (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6693                                     (__v16si)(__m512i)(B), (int)(imm))
6694
6695#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6696  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6697                                      (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6698                                      (__v16si)(__m512i)(W))
6699
6700#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6701  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6702                                      (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6703                                      (__v16si)_mm512_setzero_si512())
6704
6705#define _mm512_shuffle_i64x2(A, B, imm) \
6706  (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6707                                     (__v8di)(__m512i)(B), (int)(imm))
6708
6709#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6710  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6711                                      (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6712                                      (__v8di)(__m512i)(W))
6713
6714#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6715  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6716                                      (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6717                                      (__v8di)_mm512_setzero_si512())
6718
6719#define _mm512_shuffle_pd(A, B, M) \
6720  (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6721                                    (__v8df)(__m512d)(B), (int)(M))
6722
6723#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6724  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6725                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6726                                       (__v8df)(__m512d)(W))
6727
6728#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6729  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6730                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6731                                       (__v8df)_mm512_setzero_pd())
6732
6733#define _mm512_shuffle_ps(A, B, M) \
6734  (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6735                                   (__v16sf)(__m512)(B), (int)(M))
6736
6737#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6738  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6739                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6740                                      (__v16sf)(__m512)(W))
6741
6742#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6743  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6744                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6745                                      (__v16sf)_mm512_setzero_ps())
6746
6747#define _mm_sqrt_round_sd(A, B, R) \
6748  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6749                                            (__v2df)(__m128d)(B), \
6750                                            (__v2df)_mm_setzero_pd(), \
6751                                            (__mmask8)-1, (int)(R))
6752
6753static __inline__ __m128d __DEFAULT_FN_ATTRS128
6754_mm_mask_sqrt_sd (__m128d __W__mmask8 __U__m128d __A__m128d __B)
6755{
6756 return (__m128d__builtin_ia32_sqrtsd_round_mask ( (__v2df__A,
6757                 (__v2df__B,
6758                (__v2df__W,
6759                (__mmask8__U,
6760                _MM_FROUND_CUR_DIRECTION);
6761}
6762
6763#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6764  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6765                                            (__v2df)(__m128d)(B), \
6766                                            (__v2df)(__m128d)(W), \
6767                                            (__mmask8)(U), (int)(R))
6768
6769static __inline__ __m128d __DEFAULT_FN_ATTRS128
6770_mm_maskz_sqrt_sd (__mmask8 __U__m128d __A__m128d __B)
6771{
6772 return (__m128d__builtin_ia32_sqrtsd_round_mask ( (__v2df__A,
6773                 (__v2df__B,
6774                (__v2df_mm_setzero_pd (),
6775                (__mmask8__U,
6776                _MM_FROUND_CUR_DIRECTION);
6777}
6778
6779#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6780  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6781                                            (__v2df)(__m128d)(B), \
6782                                            (__v2df)_mm_setzero_pd(), \
6783                                            (__mmask8)(U), (int)(R))
6784
6785#define _mm_sqrt_round_ss(A, B, R) \
6786  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6787                                           (__v4sf)(__m128)(B), \
6788                                           (__v4sf)_mm_setzero_ps(), \
6789                                           (__mmask8)-1, (int)(R))
6790
6791static __inline__ __m128 __DEFAULT_FN_ATTRS128
6792_mm_mask_sqrt_ss (__m128 __W__mmask8 __U__m128 __A__m128 __B)
6793{
6794 return (__m128__builtin_ia32_sqrtss_round_mask ( (__v4sf__A,
6795                 (__v4sf__B,
6796                (__v4sf__W,
6797                (__mmask8__U,
6798                _MM_FROUND_CUR_DIRECTION);
6799}
6800
6801#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6802  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6803                                           (__v4sf)(__m128)(B), \
6804                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
6805                                           (int)(R))
6806
6807static __inline__ __m128 __DEFAULT_FN_ATTRS128
6808_mm_maskz_sqrt_ss (__mmask8 __U__m128 __A__m128 __B)
6809{
6810 return (__m128__builtin_ia32_sqrtss_round_mask ( (__v4sf__A,
6811                 (__v4sf__B,
6812                (__v4sf_mm_setzero_ps (),
6813                (__mmask8__U,
6814                _MM_FROUND_CUR_DIRECTION);
6815}
6816
6817#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6818  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6819                                           (__v4sf)(__m128)(B), \
6820                                           (__v4sf)_mm_setzero_ps(), \
6821                                           (__mmask8)(U), (int)(R))
6822
6823static __inline__ __m512 __DEFAULT_FN_ATTRS512
6824_mm512_broadcast_f32x4(__m128 __A)
6825{
6826  return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6827                                         01230123,
6828                                         01230123);
6829}
6830
6831static __inline__ __m512 __DEFAULT_FN_ATTRS512
6832_mm512_mask_broadcast_f32x4(__m512 __O__mmask16 __M__m128 __A)
6833{
6834  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6835                                           (__v16sf)_mm512_broadcast_f32x4(__A),
6836                                           (__v16sf)__O);
6837}
6838
6839static __inline__ __m512 __DEFAULT_FN_ATTRS512
6840_mm512_maskz_broadcast_f32x4(__mmask16 __M__m128 __A)
6841{
6842  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6843                                           (__v16sf)_mm512_broadcast_f32x4(__A),
6844                                           (__v16sf)_mm512_setzero_ps());
6845}
6846
6847static __inline__ __m512d __DEFAULT_FN_ATTRS512
6848_mm512_broadcast_f64x4(__m256d __A)
6849{
6850  return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6851                                          01230123);
6852}
6853
6854static __inline__ __m512d __DEFAULT_FN_ATTRS512
6855_mm512_mask_broadcast_f64x4(__m512d __O__mmask8 __M__m256d __A)
6856{
6857  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6858                                            (__v8df)_mm512_broadcast_f64x4(__A),
6859                                            (__v8df)__O);
6860}
6861
6862static __inline__ __m512d __DEFAULT_FN_ATTRS512
6863_mm512_maskz_broadcast_f64x4(__mmask8 __M__m256d __A)
6864{
6865  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6866                                            (__v8df)_mm512_broadcast_f64x4(__A),
6867                                            (__v8df)_mm512_setzero_pd());
6868}
6869
6870static __inline__ __m512i __DEFAULT_FN_ATTRS512
6871_mm512_broadcast_i32x4(__m128i __A)
6872{
6873  return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6874                                          01230123,
6875                                          01230123);
6876}
6877
6878static __inline__ __m512i __DEFAULT_FN_ATTRS512
6879_mm512_mask_broadcast_i32x4(__m512i __O__mmask16 __M__m128i __A)
6880{
6881  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6882                                           (__v16si)_mm512_broadcast_i32x4(__A),
6883                                           (__v16si)__O);
6884}
6885
6886static __inline__ __m512i __DEFAULT_FN_ATTRS512
6887_mm512_maskz_broadcast_i32x4(__mmask16 __M__m128i __A)
6888{
6889  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6890                                           (__v16si)_mm512_broadcast_i32x4(__A),
6891                                           (__v16si)_mm512_setzero_si512());
6892}
6893
6894static __inline__ __m512i __DEFAULT_FN_ATTRS512
6895_mm512_broadcast_i64x4(__m256i __A)
6896{
6897  return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6898                                          01230123);
6899}
6900
6901static __inline__ __m512i __DEFAULT_FN_ATTRS512
6902_mm512_mask_broadcast_i64x4(__m512i __O__mmask8 __M__m256i __A)
6903{
6904  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6905                                            (__v8di)_mm512_broadcast_i64x4(__A),
6906                                            (__v8di)__O);
6907}
6908
6909static __inline__ __m512i __DEFAULT_FN_ATTRS512
6910_mm512_maskz_broadcast_i64x4(__mmask8 __M__m256i __A)
6911{
6912  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6913                                            (__v8di)_mm512_broadcast_i64x4(__A),
6914                                            (__v8di)_mm512_setzero_si512());
6915}
6916
6917static __inline__ __m512d __DEFAULT_FN_ATTRS512
6918_mm512_mask_broadcastsd_pd (__m512d __O__mmask8 __M__m128d __A)
6919{
6920  return (__m512d)__builtin_ia32_selectpd_512(__M,
6921                                              (__v8df_mm512_broadcastsd_pd(__A),
6922                                              (__v8df__O);
6923}
6924
6925static __inline__ __m512d __DEFAULT_FN_ATTRS512
6926_mm512_maskz_broadcastsd_pd (__mmask8 __M__m128d __A)
6927{
6928  return (__m512d)__builtin_ia32_selectpd_512(__M,
6929                                              (__v8df_mm512_broadcastsd_pd(__A),
6930                                              (__v8df_mm512_setzero_pd());
6931}
6932
6933static __inline__ __m512 __DEFAULT_FN_ATTRS512
6934_mm512_mask_broadcastss_ps (__m512 __O__mmask16 __M__m128 __A)
6935{
6936  return (__m512)__builtin_ia32_selectps_512(__M,
6937                                             (__v16sf_mm512_broadcastss_ps(__A),
6938                                             (__v16sf__O);
6939}
6940
6941static __inline__ __m512 __DEFAULT_FN_ATTRS512
6942_mm512_maskz_broadcastss_ps (__mmask16 __M__m128 __A)
6943{
6944  return (__m512)__builtin_ia32_selectps_512(__M,
6945                                             (__v16sf_mm512_broadcastss_ps(__A),
6946                                             (__v16sf_mm512_setzero_ps());
6947}
6948
6949static __inline__ __m128i __DEFAULT_FN_ATTRS512
6950_mm512_cvtsepi32_epi8 (__m512i __A)
6951{
6952  return (__m128i__builtin_ia32_pmovsdb512_mask ((__v16si__A,
6953               (__v16qi_mm_undefined_si128 (),
6954               (__mmask16) -1);
6955}
6956
6957static __inline__ __m128i __DEFAULT_FN_ATTRS512
6958_mm512_mask_cvtsepi32_epi8 (__m128i __O__mmask16 __M__m512i __A)
6959{
6960  return (__m128i__builtin_ia32_pmovsdb512_mask ((__v16si__A,
6961               (__v16qi__O__M);
6962}
6963
6964static __inline__ __m128i __DEFAULT_FN_ATTRS512
6965_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M__m512i __A)
6966{
6967  return (__m128i__builtin_ia32_pmovsdb512_mask ((__v16si__A,
6968               (__v16qi_mm_setzero_si128 (),
6969               __M);
6970}
6971
6972static __inline__ void __DEFAULT_FN_ATTRS512
6973_mm512_mask_cvtsepi32_storeu_epi8 (void * __P__mmask16 __M__m512i __A)
6974{
6975  __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si__A__M);
6976}
6977
6978static __inline__ __m256i __DEFAULT_FN_ATTRS512
6979_mm512_cvtsepi32_epi16 (__m512i __A)
6980{
6981  return (__m256i__builtin_ia32_pmovsdw512_mask ((__v16si__A,
6982               (__v16hi_mm256_undefined_si256 (),
6983               (__mmask16) -1);
6984}
6985
6986static __inline__ __m256i __DEFAULT_FN_ATTRS512
6987_mm512_mask_cvtsepi32_epi16 (__m256i __O__mmask16 __M__m512i __A)
6988{
6989  return (__m256i__builtin_ia32_pmovsdw512_mask ((__v16si__A,
6990               (__v16hi__O__M);
6991}
6992
6993static __inline__ __m256i __DEFAULT_FN_ATTRS512
6994_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M__m512i __A)
6995{
6996  return (__m256i__builtin_ia32_pmovsdw512_mask ((__v16si__A,
6997               (__v16hi_mm256_setzero_si256 (),
6998               __M);
6999}
7000
7001static __inline__ void __DEFAULT_FN_ATTRS512
7002_mm512_mask_cvtsepi32_storeu_epi16 (void *__P__mmask16 __M__m512i __A)
7003{
7004  __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si__A__M);
7005}
7006
7007static __inline__ __m128i __DEFAULT_FN_ATTRS512
7008_mm512_cvtsepi64_epi8 (__m512i __A)
7009{
7010  return (__m128i__builtin_ia32_pmovsqb512_mask ((__v8di__A,
7011               (__v16qi_mm_undefined_si128 (),
7012               (__mmask8) -1);
7013}
7014
7015static __inline__ __m128i __DEFAULT_FN_ATTRS512
7016_mm512_mask_cvtsepi64_epi8 (__m128i __O__mmask8 __M__m512i __A)
7017{
7018  return (__m128i__builtin_ia32_pmovsqb512_mask ((__v8di__A,
7019               (__v16qi__O__M);
7020}
7021
7022static __inline__ __m128i __DEFAULT_FN_ATTRS512
7023_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M__m512i __A)
7024{
7025  return (__m128i__builtin_ia32_pmovsqb512_mask ((__v8di__A,
7026               (__v16qi_mm_setzero_si128 (),
7027               __M);
7028}
7029
7030static __inline__ void __DEFAULT_FN_ATTRS512
7031_mm512_mask_cvtsepi64_storeu_epi8 (void * __P__mmask8 __M__m512i __A)
7032{
7033  __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di__A__M);
7034}
7035
7036static __inline__ __m256i __DEFAULT_FN_ATTRS512
7037_mm512_cvtsepi64_epi32 (__m512i __A)
7038{
7039  return (__m256i__builtin_ia32_pmovsqd512_mask ((__v8di__A,
7040               (__v8si_mm256_undefined_si256 (),
7041               (__mmask8) -1);
7042}
7043
7044static __inline__ __m256i __DEFAULT_FN_ATTRS512
7045_mm512_mask_cvtsepi64_epi32 (__m256i __O__mmask8 __M__m512i __A)
7046{
7047  return (__m256i__builtin_ia32_pmovsqd512_mask ((__v8di__A,
7048               (__v8si__O__M);
7049}
7050
7051static __inline__ __m256i __DEFAULT_FN_ATTRS512
7052_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M__m512i __A)
7053{
7054  return (__m256i__builtin_ia32_pmovsqd512_mask ((__v8di__A,
7055               (__v8si_mm256_setzero_si256 (),
7056               __M);
7057}
7058
7059static __inline__ void __DEFAULT_FN_ATTRS512
7060_mm512_mask_cvtsepi64_storeu_epi32 (void *__P__mmask8 __M__m512i __A)
7061{
7062  __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di__A__M);
7063}
7064
7065static __inline__ __m128i __DEFAULT_FN_ATTRS512
7066_mm512_cvtsepi64_epi16 (__m512i __A)
7067{
7068  return (__m128i__builtin_ia32_pmovsqw512_mask ((__v8di__A,
7069               (__v8hi_mm_undefined_si128 (),
7070               (__mmask8) -1);
7071}
7072
7073static __inline__ __m128i __DEFAULT_FN_ATTRS512
7074_mm512_mask_cvtsepi64_epi16 (__m128i __O__mmask8 __M__m512i __A)
7075{
7076  return (__m128i__builtin_ia32_pmovsqw512_mask ((__v8di__A,
7077               (__v8hi__O__M);
7078}
7079
7080static __inline__ __m128i __DEFAULT_FN_ATTRS512
7081_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M__m512i __A)
7082{
7083  return (__m128i__builtin_ia32_pmovsqw512_mask ((__v8di__A,
7084               (__v8hi_mm_setzero_si128 (),
7085               __M);
7086}
7087
7088static __inline__ void __DEFAULT_FN_ATTRS512
7089_mm512_mask_cvtsepi64_storeu_epi16 (void * __P__mmask8 __M__m512i __A)
7090{
7091  __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di__A__M);
7092}
7093
7094static __inline__ __m128i __DEFAULT_FN_ATTRS512
7095_mm512_cvtusepi32_epi8 (__m512i __A)
7096{
7097  return (__m128i__builtin_ia32_pmovusdb512_mask ((__v16si__A,
7098                (__v16qi_mm_undefined_si128 (),
7099                (__mmask16) -1);
7100}
7101
7102static __inline__ __m128i __DEFAULT_FN_ATTRS512
7103_mm512_mask_cvtusepi32_epi8 (__m128i __O__mmask16 __M__m512i __A)
7104{
7105  return (__m128i__builtin_ia32_pmovusdb512_mask ((__v16si__A,
7106                (__v16qi__O,
7107                __M);
7108}
7109
7110static __inline__ __m128i __DEFAULT_FN_ATTRS512
7111_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M__m512i __A)
7112{
7113  return (__m128i__builtin_ia32_pmovusdb512_mask ((__v16si__A,
7114                (__v16qi_mm_setzero_si128 (),
7115                __M);
7116}
7117
7118static __inline__ void __DEFAULT_FN_ATTRS512
7119_mm512_mask_cvtusepi32_storeu_epi8 (void * __P__mmask16 __M__m512i __A)
7120{
7121  __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si__A__M);
7122}
7123
7124static __inline__ __m256i __DEFAULT_FN_ATTRS512
7125_mm512_cvtusepi32_epi16 (__m512i __A)
7126{
7127  return (__m256i__builtin_ia32_pmovusdw512_mask ((__v16si__A,
7128                (__v16hi_mm256_undefined_si256 (),
7129                (__mmask16) -1);
7130}
7131
7132static __inline__ __m256i __DEFAULT_FN_ATTRS512
7133_mm512_mask_cvtusepi32_epi16 (__m256i __O__mmask16 __M__m512i __A)
7134{
7135  return (__m256i__builtin_ia32_pmovusdw512_mask ((__v16si__A,
7136                (__v16hi__O,
7137                __M);
7138}
7139
7140static __inline__ __m256i __DEFAULT_FN_ATTRS512
7141_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M__m512i __A)
7142{
7143  return (__m256i__builtin_ia32_pmovusdw512_mask ((__v16si__A,
7144                (__v16hi_mm256_setzero_si256 (),
7145                __M);
7146}
7147
7148static __inline__ void __DEFAULT_FN_ATTRS512
7149_mm512_mask_cvtusepi32_storeu_epi16 (void *__P__mmask16 __M__m512i __A)
7150{
7151  __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si__A__M);
7152}
7153
7154static __inline__ __m128i __DEFAULT_FN_ATTRS512
7155_mm512_cvtusepi64_epi8 (__m512i __A)
7156{
7157  return (__m128i__builtin_ia32_pmovusqb512_mask ((__v8di__A,
7158                (__v16qi_mm_undefined_si128 (),
7159                (__mmask8) -1);
7160}
7161
7162static __inline__ __m128i __DEFAULT_FN_ATTRS512
7163_mm512_mask_cvtusepi64_epi8 (__m128i __O__mmask8 __M__m512i __A)
7164{
7165  return (__m128i__builtin_ia32_pmovusqb512_mask ((__v8di__A,
7166                (__v16qi__O,
7167                __M);
7168}
7169
7170static __inline__ __m128i __DEFAULT_FN_ATTRS512
7171_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M__m512i __A)
7172{
7173  return (__m128i__builtin_ia32_pmovusqb512_mask ((__v8di__A,
7174                (__v16qi_mm_setzero_si128 (),
7175                __M);
7176}
7177
7178static __inline__ void __DEFAULT_FN_ATTRS512
7179_mm512_mask_cvtusepi64_storeu_epi8 (void * __P__mmask8 __M__m512i __A)
7180{
7181  __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di__A__M);
7182}
7183
7184static __inline__ __m256i __DEFAULT_FN_ATTRS512
7185_mm512_cvtusepi64_epi32 (__m512i __A)
7186{
7187  return (__m256i__builtin_ia32_pmovusqd512_mask ((__v8di__A,
7188                (__v8si_mm256_undefined_si256 (),
7189                (__mmask8) -1);
7190}
7191
7192static __inline__ __m256i __DEFAULT_FN_ATTRS512
7193_mm512_mask_cvtusepi64_epi32 (__m256i __O__mmask8 __M__m512i __A)
7194{
7195  return (__m256i__builtin_ia32_pmovusqd512_mask ((__v8di__A,
7196                (__v8si__O__M);
7197}
7198
7199static __inline__ __m256i __DEFAULT_FN_ATTRS512
7200_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M__m512i __A)
7201{
7202  return (__m256i__builtin_ia32_pmovusqd512_mask ((__v8di__A,
7203                (__v8si_mm256_setzero_si256 (),
7204                __M);
7205}
7206
7207static __inline__ void __DEFAULT_FN_ATTRS512
7208_mm512_mask_cvtusepi64_storeu_epi32 (void__P__mmask8 __M__m512i __A)
7209{
7210  __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di__A__M);
7211}
7212
7213static __inline__ __m128i __DEFAULT_FN_ATTRS512
7214_mm512_cvtusepi64_epi16 (__m512i __A)
7215{
7216  return (__m128i__builtin_ia32_pmovusqw512_mask ((__v8di__A,
7217                (__v8hi_mm_undefined_si128 (),
7218                (__mmask8) -1);
7219}
7220
7221static __inline__ __m128i __DEFAULT_FN_ATTRS512
7222_mm512_mask_cvtusepi64_epi16 (__m128i __O__mmask8 __M__m512i __A)
7223{
7224  return (__m128i__builtin_ia32_pmovusqw512_mask ((__v8di__A,
7225                (__v8hi__O__M);
7226}
7227
7228static __inline__ __m128i __DEFAULT_FN_ATTRS512
7229_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M__m512i __A)
7230{
7231  return (__m128i__builtin_ia32_pmovusqw512_mask ((__v8di__A,
7232                (__v8hi_mm_setzero_si128 (),
7233                __M);
7234}
7235
7236static __inline__ void __DEFAULT_FN_ATTRS512
7237_mm512_mask_cvtusepi64_storeu_epi16 (void *__P__mmask8 __M__m512i __A)
7238{
7239  __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di__A__M);
7240}
7241
7242static __inline__ __m128i __DEFAULT_FN_ATTRS512
7243_mm512_cvtepi32_epi8 (__m512i __A)
7244{
7245  return (__m128i__builtin_ia32_pmovdb512_mask ((__v16si__A,
7246              (__v16qi_mm_undefined_si128 (),
7247              (__mmask16) -1);
7248}
7249
7250static __inline__ __m128i __DEFAULT_FN_ATTRS512
7251_mm512_mask_cvtepi32_epi8 (__m128i __O__mmask16 __M__m512i __A)
7252{
7253  return (__m128i__builtin_ia32_pmovdb512_mask ((__v16si__A,
7254              (__v16qi__O__M);
7255}
7256
7257static __inline__ __m128i __DEFAULT_FN_ATTRS512
7258_mm512_maskz_cvtepi32_epi8 (__mmask16 __M__m512i __A)
7259{
7260  return (__m128i__builtin_ia32_pmovdb512_mask ((__v16si__A,
7261              (__v16qi_mm_setzero_si128 (),
7262              __M);
7263}
7264
7265static __inline__ void __DEFAULT_FN_ATTRS512
7266_mm512_mask_cvtepi32_storeu_epi8 (void * __P__mmask16 __M__m512i __A)
7267{
7268  __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si__A__M);
7269}
7270
7271static __inline__ __m256i __DEFAULT_FN_ATTRS512
7272_mm512_cvtepi32_epi16 (__m512i __A)
7273{
7274  return (__m256i__builtin_ia32_pmovdw512_mask ((__v16si__A,
7275              (__v16hi_mm256_undefined_si256 (),
7276              (__mmask16) -1);
7277}
7278
7279static __inline__ __m256i __DEFAULT_FN_ATTRS512
7280_mm512_mask_cvtepi32_epi16 (__m256i __O__mmask16 __M__m512i __A)
7281{
7282  return (__m256i__builtin_ia32_pmovdw512_mask ((__v16si__A,
7283              (__v16hi__O__M);
7284}
7285
7286static __inline__ __m256i __DEFAULT_FN_ATTRS512
7287_mm512_maskz_cvtepi32_epi16 (__mmask16 __M__m512i __A)
7288{
7289  return (__m256i__builtin_ia32_pmovdw512_mask ((__v16si__A,
7290              (__v16hi_mm256_setzero_si256 (),
7291              __M);
7292}
7293
7294static __inline__ void __DEFAULT_FN_ATTRS512
7295_mm512_mask_cvtepi32_storeu_epi16 (void * __P__mmask16 __M__m512i __A)
7296{
7297  __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si__A__M);
7298}
7299
7300static __inline__ __m128i __DEFAULT_FN_ATTRS512
7301_mm512_cvtepi64_epi8 (__m512i __A)
7302{
7303  return (__m128i__builtin_ia32_pmovqb512_mask ((__v8di__A,
7304              (__v16qi_mm_undefined_si128 (),
7305              (__mmask8) -1);
7306}
7307
7308static __inline__ __m128i __DEFAULT_FN_ATTRS512
7309_mm512_mask_cvtepi64_epi8 (__m128i __O__mmask8 __M__m512i __A)
7310{
7311  return (__m128i__builtin_ia32_pmovqb512_mask ((__v8di__A,
7312              (__v16qi__O__M);
7313}
7314
7315static __inline__ __m128i __DEFAULT_FN_ATTRS512
7316_mm512_maskz_cvtepi64_epi8 (__mmask8 __M__m512i __A)
7317{
7318  return (__m128i__builtin_ia32_pmovqb512_mask ((__v8di__A,
7319              (__v16qi_mm_setzero_si128 (),
7320              __M);
7321}
7322
7323static __inline__ void __DEFAULT_FN_ATTRS512
7324_mm512_mask_cvtepi64_storeu_epi8 (void * __P__mmask8 __M__m512i __A)
7325{
7326  __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di__A__M);
7327}
7328
7329static __inline__ __m256i __DEFAULT_FN_ATTRS512
7330_mm512_cvtepi64_epi32 (__m512i __A)
7331{
7332  return (__m256i__builtin_ia32_pmovqd512_mask ((__v8di__A,
7333              (__v8si_mm256_undefined_si256 (),
7334              (__mmask8) -1);
7335}
7336
7337static __inline__ __m256i __DEFAULT_FN_ATTRS512
7338_mm512_mask_cvtepi64_epi32 (__m256i __O__mmask8 __M__m512i __A)
7339{
7340  return (__m256i__builtin_ia32_pmovqd512_mask ((__v8di__A,
7341              (__v8si__O__M);
7342}
7343
7344static __inline__ __m256i __DEFAULT_FN_ATTRS512
7345_mm512_maskz_cvtepi64_epi32 (__mmask8 __M__m512i __A)
7346{
7347  return (__m256i__builtin_ia32_pmovqd512_mask ((__v8di__A,
7348              (__v8si_mm256_setzero_si256 (),
7349              __M);
7350}
7351
7352static __inline__ void __DEFAULT_FN_ATTRS512
7353_mm512_mask_cvtepi64_storeu_epi32 (void__P__mmask8 __M__m512i __A)
7354{
7355  __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di__A__M);
7356}
7357
7358static __inline__ __m128i __DEFAULT_FN_ATTRS512
7359_mm512_cvtepi64_epi16 (__m512i __A)
7360{
7361  return (__m128i__builtin_ia32_pmovqw512_mask ((__v8di__A,
7362              (__v8hi_mm_undefined_si128 (),
7363              (__mmask8) -1);
7364}
7365
7366static __inline__ __m128i __DEFAULT_FN_ATTRS512
7367_mm512_mask_cvtepi64_epi16 (__m128i __O__mmask8 __M__m512i __A)
7368{
7369  return (__m128i__builtin_ia32_pmovqw512_mask ((__v8di__A,
7370              (__v8hi__O__M);
7371}
7372
7373static __inline__ __m128i __DEFAULT_FN_ATTRS512
7374_mm512_maskz_cvtepi64_epi16 (__mmask8 __M__m512i __A)
7375{
7376  return (__m128i__builtin_ia32_pmovqw512_mask ((__v8di__A,
7377              (__v8hi_mm_setzero_si128 (),
7378              __M);
7379}
7380
7381static __inline__ void __DEFAULT_FN_ATTRS512
7382_mm512_mask_cvtepi64_storeu_epi16 (void *__P__mmask8 __M__m512i __A)
7383{
7384  __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di__A__M);
7385}
7386
7387#define _mm512_extracti32x4_epi32(A, imm) \
7388  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7389                                            (__v4si)_mm_undefined_si128(), \
7390                                            (__mmask8)-1)
7391
7392#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7393  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7394                                            (__v4si)(__m128i)(W), \
7395                                            (__mmask8)(U))
7396
7397#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7398  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7399                                            (__v4si)_mm_setzero_si128(), \
7400                                            (__mmask8)(U))
7401
7402#define _mm512_extracti64x4_epi64(A, imm) \
7403  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7404                                            (__v4di)_mm256_undefined_si256(), \
7405                                            (__mmask8)-1)
7406
7407#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7408  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7409                                            (__v4di)(__m256i)(W), \
7410                                            (__mmask8)(U))
7411
7412#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7413  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7414                                            (__v4di)_mm256_setzero_si256(), \
7415                                            (__mmask8)(U))
7416
7417#define _mm512_insertf64x4(A, B, imm) \
7418  (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7419                                      (__v4df)(__m256d)(B), (int)(imm))
7420
7421#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7422  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7423                                  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7424                                  (__v8df)(__m512d)(W))
7425
7426#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7427  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7428                                  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7429                                  (__v8df)_mm512_setzero_pd())
7430
7431#define _mm512_inserti64x4(A, B, imm) \
7432  (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7433                                      (__v4di)(__m256i)(B), (int)(imm))
7434
7435#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7436  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7437                                  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7438                                  (__v8di)(__m512i)(W))
7439
7440#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7441  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7442                                  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7443                                  (__v8di)_mm512_setzero_si512())
7444
7445#define _mm512_insertf32x4(A, B, imm) \
7446  (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7447                                     (__v4sf)(__m128)(B), (int)(imm))
7448
7449#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7450  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7451                                 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7452                                 (__v16sf)(__m512)(W))
7453
7454#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7455  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7456                                 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7457                                 (__v16sf)_mm512_setzero_ps())
7458
7459#define _mm512_inserti32x4(A, B, imm) \
7460  (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7461                                      (__v4si)(__m128i)(B), (int)(imm))
7462
7463#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7464  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7465                                 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7466                                 (__v16si)(__m512i)(W))
7467
7468#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7469  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7470                                 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7471                                 (__v16si)_mm512_setzero_si512())
7472
7473#define _mm512_getmant_round_pd(A, B, C, R) \
7474  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7475                                            (int)(((C)<<2) | (B)), \
7476                                            (__v8df)_mm512_undefined_pd(), \
7477                                            (__mmask8)-1, (int)(R))
7478
7479#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7480  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7481                                            (int)(((C)<<2) | (B)), \
7482                                            (__v8df)(__m512d)(W), \
7483                                            (__mmask8)(U), (int)(R))
7484
7485#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7486  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7487                                            (int)(((C)<<2) | (B)), \
7488                                            (__v8df)_mm512_setzero_pd(), \
7489                                            (__mmask8)(U), (int)(R))
7490
7491#define _mm512_getmant_pd(A, B, C) \
7492  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7493                                            (int)(((C)<<2) | (B)), \
7494                                            (__v8df)_mm512_setzero_pd(), \
7495                                            (__mmask8)-1, \
7496                                            _MM_FROUND_CUR_DIRECTION)
7497
7498#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7499  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7500                                            (int)(((C)<<2) | (B)), \
7501                                            (__v8df)(__m512d)(W), \
7502                                            (__mmask8)(U), \
7503                                            _MM_FROUND_CUR_DIRECTION)
7504
7505#define _mm512_maskz_getmant_pd(U, A, B, C) \
7506  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7507                                            (int)(((C)<<2) | (B)), \
7508                                            (__v8df)_mm512_setzero_pd(), \
7509                                            (__mmask8)(U), \
7510                                            _MM_FROUND_CUR_DIRECTION)
7511
7512#define _mm512_getmant_round_ps(A, B, C, R) \
7513  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7514                                           (int)(((C)<<2) | (B)), \
7515                                           (__v16sf)_mm512_undefined_ps(), \
7516                                           (__mmask16)-1, (int)(R))
7517
7518#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7519  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7520                                           (int)(((C)<<2) | (B)), \
7521                                           (__v16sf)(__m512)(W), \
7522                                           (__mmask16)(U), (int)(R))
7523
7524#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7525  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7526                                           (int)(((C)<<2) | (B)), \
7527                                           (__v16sf)_mm512_setzero_ps(), \
7528                                           (__mmask16)(U), (int)(R))
7529
7530#define _mm512_getmant_ps(A, B, C) \
7531  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7532                                           (int)(((C)<<2)|(B)), \
7533                                           (__v16sf)_mm512_undefined_ps(), \
7534                                           (__mmask16)-1, \
7535                                           _MM_FROUND_CUR_DIRECTION)
7536
7537#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7538  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7539                                           (int)(((C)<<2)|(B)), \
7540                                           (__v16sf)(__m512)(W), \
7541                                           (__mmask16)(U), \
7542                                           _MM_FROUND_CUR_DIRECTION)
7543
7544#define _mm512_maskz_getmant_ps(U, A, B, C) \
7545  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7546                                           (int)(((C)<<2)|(B)), \
7547                                           (__v16sf)_mm512_setzero_ps(), \
7548                                           (__mmask16)(U), \
7549                                           _MM_FROUND_CUR_DIRECTION)
7550
7551#define _mm512_getexp_round_pd(A, R) \
7552  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7553                                           (__v8df)_mm512_undefined_pd(), \
7554                                           (__mmask8)-1, (int)(R))
7555
7556#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7557  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7558                                           (__v8df)(__m512d)(W), \
7559                                           (__mmask8)(U), (int)(R))
7560
7561#define _mm512_maskz_getexp_round_pd(U, A, R) \
7562  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7563                                           (__v8df)_mm512_setzero_pd(), \
7564                                           (__mmask8)(U), (int)(R))
7565
7566static __inline__ __m512d __DEFAULT_FN_ATTRS512
7567_mm512_getexp_pd (__m512d __A)
7568{
7569  return (__m512d__builtin_ia32_getexppd512_mask ((__v8df__A,
7570                (__v8df_mm512_undefined_pd (),
7571                (__mmask8) -1,
7572                _MM_FROUND_CUR_DIRECTION);
7573}
7574
7575static __inline__ __m512d __DEFAULT_FN_ATTRS512
7576_mm512_mask_getexp_pd (__m512d __W__mmask8 __U__m512d __A)
7577{
7578  return (__m512d__builtin_ia32_getexppd512_mask ((__v8df__A,
7579                (__v8df__W,
7580                (__mmask8__U,
7581                _MM_FROUND_CUR_DIRECTION);
7582}
7583
7584static __inline__ __m512d __DEFAULT_FN_ATTRS512
7585_mm512_maskz_getexp_pd (__mmask8 __U__m512d __A)
7586{
7587  return (__m512d__builtin_ia32_getexppd512_mask ((__v8df__A,
7588                (__v8df_mm512_setzero_pd (),
7589                (__mmask8__U,
7590                _MM_FROUND_CUR_DIRECTION);
7591}
7592
7593#define _mm512_getexp_round_ps(A, R) \
7594  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7595                                          (__v16sf)_mm512_undefined_ps(), \
7596                                          (__mmask16)-1, (int)(R))
7597
7598#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7599  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7600                                          (__v16sf)(__m512)(W), \
7601                                          (__mmask16)(U), (int)(R))
7602
7603#define _mm512_maskz_getexp_round_ps(U, A, R) \
7604  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7605                                          (__v16sf)_mm512_setzero_ps(), \
7606                                          (__mmask16)(U), (int)(R))
7607
7608static __inline__ __m512 __DEFAULT_FN_ATTRS512
7609_mm512_getexp_ps (__m512 __A)
7610{
7611  return (__m512__builtin_ia32_getexpps512_mask ((__v16sf__A,
7612               (__v16sf_mm512_undefined_ps (),
7613               (__mmask16) -1,
7614               _MM_FROUND_CUR_DIRECTION);
7615}
7616
7617static __inline__ __m512 __DEFAULT_FN_ATTRS512
7618_mm512_mask_getexp_ps (__m512 __W__mmask16 __U__m512 __A)
7619{
7620  return (__m512__builtin_ia32_getexpps512_mask ((__v16sf__A,
7621               (__v16sf__W,
7622               (__mmask16__U,
7623               _MM_FROUND_CUR_DIRECTION);
7624}
7625
7626static __inline__ __m512 __DEFAULT_FN_ATTRS512
7627_mm512_maskz_getexp_ps (__mmask16 __U__m512 __A)
7628{
7629  return (__m512__builtin_ia32_getexpps512_mask ((__v16sf__A,
7630               (__v16sf_mm512_setzero_ps (),
7631               (__mmask16__U,
7632               _MM_FROUND_CUR_DIRECTION);
7633}
7634
7635#define _mm512_i64gather_ps(index, addr, scale) \
7636  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7637                                       (void const *)(addr), \
7638                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
7639                                       (int)(scale))
7640
7641#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7642  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7643                                       (void const *)(addr), \
7644                                       (__v8di)(__m512i)(index), \
7645                                       (__mmask8)(mask), (int)(scale))
7646
7647#define _mm512_i64gather_epi32(index, addr, scale) \
7648  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7649                                        (void const *)(addr), \
7650                                        (__v8di)(__m512i)(index), \
7651                                        (__mmask8)-1, (int)(scale))
7652
7653#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7654  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7655                                        (void const *)(addr), \
7656                                        (__v8di)(__m512i)(index), \
7657                                        (__mmask8)(mask), (int)(scale))
7658
7659#define _mm512_i64gather_pd(index, addr, scale) \
7660  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7661                                       (void const *)(addr), \
7662                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
7663                                       (int)(scale))
7664
7665#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7666  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7667                                       (void const *)(addr), \
7668                                       (__v8di)(__m512i)(index), \
7669                                       (__mmask8)(mask), (int)(scale))
7670
7671#define _mm512_i64gather_epi64(index, addr, scale) \
7672  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7673                                       (void const *)(addr), \
7674                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
7675                                       (int)(scale))
7676
7677#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7678  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7679                                       (void const *)(addr), \
7680                                       (__v8di)(__m512i)(index), \
7681                                       (__mmask8)(mask), (int)(scale))
7682
7683#define _mm512_i32gather_ps(index, addr, scale) \
7684  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7685                                       (void const *)(addr), \
7686                                       (__v16sf)(__m512)(index), \
7687                                       (__mmask16)-1, (int)(scale))
7688
7689#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7690  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7691                                       (void const *)(addr), \
7692                                       (__v16sf)(__m512)(index), \
7693                                       (__mmask16)(mask), (int)(scale))
7694
7695#define _mm512_i32gather_epi32(index, addr, scale) \
7696  (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7697                                        (void const *)(addr), \
7698                                        (__v16si)(__m512i)(index), \
7699                                        (__mmask16)-1, (int)(scale))
7700
7701#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7702  (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7703                                        (void const *)(addr), \
7704                                        (__v16si)(__m512i)(index), \
7705                                        (__mmask16)(mask), (int)(scale))
7706
7707#define _mm512_i32gather_pd(index, addr, scale) \
7708  (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7709                                       (void const *)(addr), \
7710                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
7711                                       (int)(scale))
7712
7713#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7714  (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7715                                       (void const *)(addr), \
7716                                       (__v8si)(__m256i)(index), \
7717                                       (__mmask8)(mask), (int)(scale))
7718
7719#define _mm512_i32gather_epi64(index, addr, scale) \
7720  (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7721                                       (void const *)(addr), \
7722                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
7723                                       (int)(scale))
7724
7725#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7726  (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7727                                       (void const *)(addr), \
7728                                       (__v8si)(__m256i)(index), \
7729                                       (__mmask8)(mask), (int)(scale))
7730
7731#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7732  __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7733                                (__v8di)(__m512i)(index), \
7734                                (__v8sf)(__m256)(v1), (int)(scale))
7735
7736#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7737  __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7738                                (__v8di)(__m512i)(index), \
7739                                (__v8sf)(__m256)(v1), (int)(scale))
7740
7741#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7742  __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7743                                (__v8di)(__m512i)(index), \
7744                                (__v8si)(__m256i)(v1), (int)(scale))
7745
7746#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7747  __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7748                                (__v8di)(__m512i)(index), \
7749                                (__v8si)(__m256i)(v1), (int)(scale))
7750
7751#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7752  __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7753                               (__v8di)(__m512i)(index), \
7754                               (__v8df)(__m512d)(v1), (int)(scale))
7755
7756#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7757  __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7758                               (__v8di)(__m512i)(index), \
7759                               (__v8df)(__m512d)(v1), (int)(scale))
7760
7761#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7762  __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7763                               (__v8di)(__m512i)(index), \
7764                               (__v8di)(__m512i)(v1), (int)(scale))
7765
7766#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7767  __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7768                               (__v8di)(__m512i)(index), \
7769                               (__v8di)(__m512i)(v1), (int)(scale))
7770
7771#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7772  __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7773                                (__v16si)(__m512i)(index), \
7774                                (__v16sf)(__m512)(v1), (int)(scale))
7775
7776#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7777  __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7778                                (__v16si)(__m512i)(index), \
7779                                (__v16sf)(__m512)(v1), (int)(scale))
7780
7781#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7782  __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7783                                (__v16si)(__m512i)(index), \
7784                                (__v16si)(__m512i)(v1), (int)(scale))
7785
7786#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7787  __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7788                                (__v16si)(__m512i)(index), \
7789                                (__v16si)(__m512i)(v1), (int)(scale))
7790
7791#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7792  __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7793                               (__v8si)(__m256i)(index), \
7794                               (__v8df)(__m512d)(v1), (int)(scale))
7795
7796#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7797  __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7798                               (__v8si)(__m256i)(index), \
7799                               (__v8df)(__m512d)(v1), (int)(scale))
7800
7801#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7802  __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7803                               (__v8si)(__m256i)(index), \
7804                               (__v8di)(__m512i)(v1), (int)(scale))
7805
7806#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7807  __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7808                               (__v8si)(__m256i)(index), \
7809                               (__v8di)(__m512i)(v1), (int)(scale))
7810
7811static __inline__ __m128 __DEFAULT_FN_ATTRS128
7812_mm_mask_fmadd_ss (__m128 __W__mmask8 __U__m128 __A__m128 __B)
7813{
7814  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7815                                       (__v4sf)__A,
7816                                       (__v4sf)__B,
7817                                       (__mmask8)__U,
7818                                       _MM_FROUND_CUR_DIRECTION);
7819}
7820
7821#define _mm_fmadd_round_ss(A, B, C, R) \
7822  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7823                                        (__v4sf)(__m128)(B), \
7824                                        (__v4sf)(__m128)(C), (__mmask8)-1, \
7825                                        (int)(R))
7826
7827#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7828  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7829                                        (__v4sf)(__m128)(A), \
7830                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
7831                                        (int)(R))
7832
7833static __inline__ __m128 __DEFAULT_FN_ATTRS128
7834_mm_maskz_fmadd_ss (__mmask8 __U__m128 __A__m128 __B__m128 __C)
7835{
7836  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7837                                        (__v4sf)__B,
7838                                        (__v4sf)__C,
7839                                        (__mmask8)__U,
7840                                        _MM_FROUND_CUR_DIRECTION);
7841}
7842
7843#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7844  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7845                                         (__v4sf)(__m128)(B), \
7846                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
7847                                         (int)(R))
7848
7849static __inline__ __m128 __DEFAULT_FN_ATTRS128
7850_mm_mask3_fmadd_ss (__m128 __W__m128 __X__m128 __Y__mmask8 __U)
7851{
7852  return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7853                                        (__v4sf)__X,
7854                                        (__v4sf)__Y,
7855                                        (__mmask8)__U,
7856                                        _MM_FROUND_CUR_DIRECTION);
7857}
7858
7859#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7860  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7861                                         (__v4sf)(__m128)(X), \
7862                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
7863                                         (int)(R))
7864
7865static __inline__ __m128 __DEFAULT_FN_ATTRS128
7866_mm_mask_fmsub_ss (__m128 __W__mmask8 __U__m128 __A__m128 __B)
7867{
7868  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7869                                       (__v4sf)__A,
7870                                       -(__v4sf)__B,
7871                                       (__mmask8)__U,
7872                                       _MM_FROUND_CUR_DIRECTION);
7873}
7874
7875#define _mm_fmsub_round_ss(A, B, C, R) \
7876  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7877                                        (__v4sf)(__m128)(B), \
7878                                        -(__v4sf)(__m128)(C), (__mmask8)-1, \
7879                                        (int)(R))
7880
7881#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7882  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7883                                        (__v4sf)(__m128)(A), \
7884                                        -(__v4sf)(__m128)(B), (__mmask8)(U), \
7885                                        (int)(R))
7886
7887static __inline__ __m128 __DEFAULT_FN_ATTRS128
7888_mm_maskz_fmsub_ss (__mmask8 __U__m128 __A__m128 __B__m128 __C)
7889{
7890  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7891                                        (__v4sf)__B,
7892                                        -(__v4sf)__C,
7893                                        (__mmask8)__U,
7894                                        _MM_FROUND_CUR_DIRECTION);
7895}
7896
7897#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7898  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7899                                         (__v4sf)(__m128)(B), \
7900                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
7901                                         (int)(R))
7902
7903static __inline__ __m128 __DEFAULT_FN_ATTRS128
7904_mm_mask3_fmsub_ss (__m128 __W__m128 __X__m128 __Y__mmask8 __U)
7905{
7906  return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7907                                        (__v4sf)__X,
7908                                        (__v4sf)__Y,
7909                                        (__mmask8)__U,
7910                                        _MM_FROUND_CUR_DIRECTION);
7911}
7912
7913#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7914  (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7915                                         (__v4sf)(__m128)(X), \
7916                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
7917                                         (int)(R))
7918
7919static __inline__ __m128 __DEFAULT_FN_ATTRS128
7920_mm_mask_fnmadd_ss (__m128 __W__mmask8 __U__m128 __A__m128 __B)
7921{
7922  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7923                                       -(__v4sf)__A,
7924                                       (__v4sf)__B,
7925                                       (__mmask8)__U,
7926                                       _MM_FROUND_CUR_DIRECTION);
7927}
7928
7929#define _mm_fnmadd_round_ss(A, B, C, R) \
7930  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7931                                        -(__v4sf)(__m128)(B), \
7932                                        (__v4sf)(__m128)(C), (__mmask8)-1, \
7933                                        (int)(R))
7934
7935#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7936  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7937                                        -(__v4sf)(__m128)(A), \
7938                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
7939                                        (int)(R))
7940
7941static __inline__ __m128 __DEFAULT_FN_ATTRS128
7942_mm_maskz_fnmadd_ss (__mmask8 __U__m128 __A__m128 __B__m128 __C)
7943{
7944  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7945                                        -(__v4sf)__B,
7946                                        (__v4sf)__C,
7947                                        (__mmask8)__U,
7948                                        _MM_FROUND_CUR_DIRECTION);
7949}
7950
7951#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7952  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7953                                         -(__v4sf)(__m128)(B), \
7954                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
7955                                         (int)(R))
7956
7957static __inline__ __m128 __DEFAULT_FN_ATTRS128
7958_mm_mask3_fnmadd_ss (__m128 __W__m128 __X__m128 __Y__mmask8 __U)
7959{
7960  return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7961                                        -(__v4sf)__X,
7962                                        (__v4sf)__Y,
7963                                        (__mmask8)__U,
7964                                        _MM_FROUND_CUR_DIRECTION);
7965}
7966
7967#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7968  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7969                                         -(__v4sf)(__m128)(X), \
7970                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
7971                                         (int)(R))
7972
7973static __inline__ __m128 __DEFAULT_FN_ATTRS128
7974_mm_mask_fnmsub_ss (__m128 __W__mmask8 __U__m128 __A__m128 __B)
7975{
7976  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7977                                       -(__v4sf)__A,
7978                                       -(__v4sf)__B,
7979                                       (__mmask8)__U,
7980                                       _MM_FROUND_CUR_DIRECTION);
7981}
7982
7983#define _mm_fnmsub_round_ss(A, B, C, R) \
7984  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7985                                        -(__v4sf)(__m128)(B), \
7986                                        -(__v4sf)(__m128)(C), (__mmask8)-1, \
7987                                        (int)(R))
7988
7989#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7990  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7991                                        -(__v4sf)(__m128)(A), \
7992                                        -(__v4sf)(__m128)(B), (__mmask8)(U), \
7993                                        (int)(R))
7994
7995static __inline__ __m128 __DEFAULT_FN_ATTRS128
7996_mm_maskz_fnmsub_ss (__mmask8 __U__m128 __A__m128 __B__m128 __C)
7997{
7998  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7999                                        -(__v4sf)__B,
8000                                        -(__v4sf)__C,
8001                                        (__mmask8)__U,
8002                                        _MM_FROUND_CUR_DIRECTION);
8003}
8004
8005#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
8006  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8007                                         -(__v4sf)(__m128)(B), \
8008                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
8009                                         (int)(R))
8010
8011static __inline__ __m128 __DEFAULT_FN_ATTRS128
8012_mm_mask3_fnmsub_ss (__m128 __W__m128 __X__m128 __Y__mmask8 __U)
8013{
8014  return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
8015                                        -(__v4sf)__X,
8016                                        (__v4sf)__Y,
8017                                        (__mmask8)__U,
8018                                        _MM_FROUND_CUR_DIRECTION);
8019}
8020
8021#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
8022  (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8023                                         -(__v4sf)(__m128)(X), \
8024                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
8025                                         (int)(R))
8026
8027static __inline__ __m128d __DEFAULT_FN_ATTRS128
8028_mm_mask_fmadd_sd (__m128d __W__mmask8 __U__m128d __A__m128d __B)
8029{
8030  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8031                                       (__v2df)__A,
8032                                       (__v2df)__B,
8033                                       (__mmask8)__U,
8034                                       _MM_FROUND_CUR_DIRECTION);
8035}
8036
8037#define _mm_fmadd_round_sd(A, B, C, R) \
8038  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8039                                         (__v2df)(__m128d)(B), \
8040                                         (__v2df)(__m128d)(C), (__mmask8)-1, \
8041                                         (int)(R))
8042
8043#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
8044  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8045                                         (__v2df)(__m128d)(A), \
8046                                         (__v2df)(__m128d)(B), (__mmask8)(U), \
8047                                         (int)(R))
8048
8049static __inline__ __m128d __DEFAULT_FN_ATTRS128
8050_mm_maskz_fmadd_sd (__mmask8 __U__m128d __A__m128d __B__m128d __C)
8051{
8052  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8053                                        (__v2df)__B,
8054                                        (__v2df)__C,
8055                                        (__mmask8)__U,
8056                                        _MM_FROUND_CUR_DIRECTION);
8057}
8058
8059#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
8060  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8061                                          (__v2df)(__m128d)(B), \
8062                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
8063                                          (int)(R))
8064
8065static __inline__ __m128d __DEFAULT_FN_ATTRS128
8066_mm_mask3_fmadd_sd (__m128d __W__m128d __X__m128d __Y__mmask8 __U)
8067{
8068  return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8069                                        (__v2df)__X,
8070                                        (__v2df)__Y,
8071                                        (__mmask8)__U,
8072                                        _MM_FROUND_CUR_DIRECTION);
8073}
8074
8075#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
8076  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8077                                          (__v2df)(__m128d)(X), \
8078                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
8079                                          (int)(R))
8080
8081static __inline__ __m128d __DEFAULT_FN_ATTRS128
8082_mm_mask_fmsub_sd (__m128d __W__mmask8 __U__m128d __A__m128d __B)
8083{
8084  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8085                                       (__v2df)__A,
8086                                       -(__v2df)__B,
8087                                       (__mmask8)__U,
8088                                       _MM_FROUND_CUR_DIRECTION);
8089}
8090
8091#define _mm_fmsub_round_sd(A, B, C, R) \
8092  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8093                                         (__v2df)(__m128d)(B), \
8094                                         -(__v2df)(__m128d)(C), (__mmask8)-1, \
8095                                         (int)(R))
8096
8097#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
8098  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8099                                         (__v2df)(__m128d)(A), \
8100                                         -(__v2df)(__m128d)(B), (__mmask8)(U), \
8101                                         (int)(R))
8102
8103static __inline__ __m128d __DEFAULT_FN_ATTRS128
8104_mm_maskz_fmsub_sd (__mmask8 __U__m128d __A__m128d __B__m128d __C)
8105{
8106  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8107                                        (__v2df)__B,
8108                                        -(__v2df)__C,
8109                                        (__mmask8)__U,
8110                                        _MM_FROUND_CUR_DIRECTION);
8111}
8112
8113#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
8114  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8115                                          (__v2df)(__m128d)(B), \
8116                                          -(__v2df)(__m128d)(C), \
8117                                          (__mmask8)(U), (int)(R))
8118
8119static __inline__ __m128d __DEFAULT_FN_ATTRS128
8120_mm_mask3_fmsub_sd (__m128d __W__m128d __X__m128d __Y__mmask8 __U)
8121{
8122  return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8123                                        (__v2df)__X,
8124                                        (__v2df)__Y,
8125                                        (__mmask8)__U,
8126                                        _MM_FROUND_CUR_DIRECTION);
8127}
8128
8129#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
8130  (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8131                                          (__v2df)(__m128d)(X), \
8132                                          (__v2df)(__m128d)(Y), \
8133                                          (__mmask8)(U), (int)(R))
8134
8135static __inline__ __m128d __DEFAULT_FN_ATTRS128
8136_mm_mask_fnmadd_sd (__m128d __W__mmask8 __U__m128d __A__m128d __B)
8137{
8138  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8139                                       -(__v2df)__A,
8140                                       (__v2df)__B,
8141                                       (__mmask8)__U,
8142                                       _MM_FROUND_CUR_DIRECTION);
8143}
8144
8145#define _mm_fnmadd_round_sd(A, B, C, R) \
8146  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8147                                         -(__v2df)(__m128d)(B), \
8148                                         (__v2df)(__m128d)(C), (__mmask8)-1, \
8149                                         (int)(R))
8150
8151#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
8152  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8153                                         -(__v2df)(__m128d)(A), \
8154                                         (__v2df)(__m128d)(B), (__mmask8)(U), \
8155                                         (int)(R))
8156
8157static __inline__ __m128d __DEFAULT_FN_ATTRS128
8158_mm_maskz_fnmadd_sd (__mmask8 __U__m128d __A__m128d __B__m128d __C)
8159{
8160  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8161                                        -(__v2df)__B,
8162                                        (__v2df)__C,
8163                                        (__mmask8)__U,
8164                                        _MM_FROUND_CUR_DIRECTION);
8165}
8166
8167#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
8168  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8169                                          -(__v2df)(__m128d)(B), \
8170                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
8171                                          (int)(R))
8172
8173static __inline__ __m128d __DEFAULT_FN_ATTRS128
8174_mm_mask3_fnmadd_sd (__m128d __W__m128d __X__m128d __Y__mmask8 __U)
8175{
8176  return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8177                                        -(__v2df)__X,
8178                                        (__v2df)__Y,
8179                                        (__mmask8)__U,
8180                                        _MM_FROUND_CUR_DIRECTION);
8181}
8182
8183#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
8184  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8185                                          -(__v2df)(__m128d)(X), \
8186                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
8187                                          (int)(R))
8188
8189static __inline__ __m128d __DEFAULT_FN_ATTRS128
8190_mm_mask_fnmsub_sd (__m128d __W__mmask8 __U__m128d __A__m128d __B)
8191{
8192  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8193                                       -(__v2df)__A,
8194                                       -(__v2df)__B,
8195                                       (__mmask8)__U,
8196                                       _MM_FROUND_CUR_DIRECTION);
8197}
8198
8199#define _mm_fnmsub_round_sd(A, B, C, R) \
8200  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8201                                         -(__v2df)(__m128d)(B), \
8202                                         -(__v2df)(__m128d)(C), (__mmask8)-1, \
8203                                         (int)(R))
8204
8205#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
8206  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8207                                         -(__v2df)(__m128d)(A), \
8208                                         -(__v2df)(__m128d)(B), (__mmask8)(U), \
8209                                         (int)(R))
8210
8211static __inline__ __m128d __DEFAULT_FN_ATTRS128
8212_mm_maskz_fnmsub_sd (__mmask8 __U__m128d __A__m128d __B__m128d __C)
8213{
8214  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8215                                        -(__v2df)__B,
8216                                        -(__v2df)__C,
8217                                        (__mmask8)__U,
8218                                        _MM_FROUND_CUR_DIRECTION);
8219}
8220
8221#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
8222  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8223                                          -(__v2df)(__m128d)(B), \
8224                                          -(__v2df)(__m128d)(C), \
8225                                          (__mmask8)(U), \
8226                                          (int)(R))
8227
8228static __inline__ __m128d __DEFAULT_FN_ATTRS128
8229_mm_mask3_fnmsub_sd (__m128d __W__m128d __X__m128d __Y__mmask8 __U)
8230{
8231  return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8232                                        -(__v2df)__X,
8233                                        (__v2df)__Y,
8234                                        (__mmask8)__U,
8235                                        _MM_FROUND_CUR_DIRECTION);
8236}
8237
8238#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
8239  (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8240                                          -(__v2df)(__m128d)(X), \
8241                                          (__v2df)(__m128d)(Y), \
8242                                          (__mmask8)(U), (int)(R))
8243
8244#define _mm512_permutex_pd(X, C) \
8245  (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C))
8246
8247#define _mm512_mask_permutex_pd(W, U, X, C) \
8248  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8249                                       (__v8df)_mm512_permutex_pd((X), (C)), \
8250                                       (__v8df)(__m512d)(W))
8251
8252#define _mm512_maskz_permutex_pd(U, X, C) \
8253  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8254                                       (__v8df)_mm512_permutex_pd((X), (C)), \
8255                                       (__v8df)_mm512_setzero_pd())
8256
8257#define _mm512_permutex_epi64(X, C) \
8258  (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C))
8259
8260#define _mm512_mask_permutex_epi64(W, U, X, C) \
8261  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8262                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
8263                                      (__v8di)(__m512i)(W))
8264
8265#define _mm512_maskz_permutex_epi64(U, X, C) \
8266  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8267                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
8268                                      (__v8di)_mm512_setzero_si512())
8269
8270static __inline__ __m512d __DEFAULT_FN_ATTRS512
8271_mm512_permutexvar_pd (__m512i __X__m512d __Y)
8272{
8273  return (__m512d)__builtin_ia32_permvardf512((__v8df__Y, (__v8di__X);
8274}
8275
8276static __inline__ __m512d __DEFAULT_FN_ATTRS512
8277_mm512_mask_permutexvar_pd (__m512d __W__mmask8 __U__m512i __X__m512d __Y)
8278{
8279  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8280                                        (__v8df)_mm512_permutexvar_pd(__X__Y),
8281                                        (__v8df)__W);
8282}
8283
8284static __inline__ __m512d __DEFAULT_FN_ATTRS512
8285_mm512_maskz_permutexvar_pd (__mmask8 __U__m512i __X__m512d __Y)
8286{
8287  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8288                                        (__v8df)_mm512_permutexvar_pd(__X__Y),
8289                                        (__v8df)_mm512_setzero_pd());
8290}
8291
8292static __inline__ __m512i __DEFAULT_FN_ATTRS512
8293_mm512_permutexvar_epi64 (__m512i __X__m512i __Y)
8294{
8295  return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8296}
8297
8298static __inline__ __m512i __DEFAULT_FN_ATTRS512
8299_mm512_maskz_permutexvar_epi64 (__mmask8 __M__m512i __X__m512i __Y)
8300{
8301  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8302                                     (__v8di)_mm512_permutexvar_epi64(__X__Y),
8303                                     (__v8di)_mm512_setzero_si512());
8304}
8305
8306static __inline__ __m512i __DEFAULT_FN_ATTRS512
8307_mm512_mask_permutexvar_epi64 (__m512i __W__mmask8 __M__m512i __X,
8308             __m512i __Y)
8309{
8310  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8311                                     (__v8di)_mm512_permutexvar_epi64(__X__Y),
8312                                     (__v8di)__W);
8313}
8314
8315static __inline__ __m512 __DEFAULT_FN_ATTRS512
8316_mm512_permutexvar_ps (__m512i __X__m512 __Y)
8317{
8318  return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8319}
8320
8321static __inline__ __m512 __DEFAULT_FN_ATTRS512
8322_mm512_mask_permutexvar_ps (__m512 __W__mmask16 __U__m512i __X__m512 __Y)
8323{
8324  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8325                                       (__v16sf)_mm512_permutexvar_ps(__X__Y),
8326                                       (__v16sf)__W);
8327}
8328
8329static __inline__ __m512 __DEFAULT_FN_ATTRS512
8330_mm512_maskz_permutexvar_ps (__mmask16 __U__m512i __X__m512 __Y)
8331{
8332  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8333                                       (__v16sf)_mm512_permutexvar_ps(__X__Y),
8334                                       (__v16sf)_mm512_setzero_ps());
8335}
8336
8337static __inline__ __m512i __DEFAULT_FN_ATTRS512
8338_mm512_permutexvar_epi32 (__m512i __X__m512i __Y)
8339{
8340  return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8341}
8342
8343#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8344
8345static __inline__ __m512i __DEFAULT_FN_ATTRS512
8346_mm512_maskz_permutexvar_epi32 (__mmask16 __M__m512i __X__m512i __Y)
8347{
8348  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8349                                    (__v16si)_mm512_permutexvar_epi32(__X__Y),
8350                                    (__v16si)_mm512_setzero_si512());
8351}
8352
8353static __inline__ __m512i __DEFAULT_FN_ATTRS512
8354_mm512_mask_permutexvar_epi32 (__m512i __W__mmask16 __M__m512i __X,
8355             __m512i __Y)
8356{
8357  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8358                                    (__v16si)_mm512_permutexvar_epi32(__X__Y),
8359                                    (__v16si)__W);
8360}
8361
8362#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8363
8364static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8365_mm512_kand (__mmask16 __A__mmask16 __B)
8366{
8367  return (__mmask16__builtin_ia32_kandhi ((__mmask16__A, (__mmask16__B);
8368}
8369
8370static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8371_mm512_kandn (__mmask16 __A__mmask16 __B)
8372{
8373  return (__mmask16__builtin_ia32_kandnhi ((__mmask16__A, (__mmask16__B);
8374}
8375
8376static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8377_mm512_kor (__mmask16 __A__mmask16 __B)
8378{
8379  return (__mmask16__builtin_ia32_korhi ((__mmask16__A, (__mmask16__B);
8380}
8381
8382static __inline__ int __DEFAULT_FN_ATTRS
8383_mm512_kortestc (__mmask16 __A__mmask16 __B)
8384{
8385  return __builtin_ia32_kortestchi ((__mmask16__A, (__mmask16__B);
8386}
8387
8388static __inline__ int __DEFAULT_FN_ATTRS
8389_mm512_kortestz (__mmask16 __A__mmask16 __B)
8390{
8391  return __builtin_ia32_kortestzhi ((__mmask16__A, (__mmask16__B);
8392}
8393
8394static __inline__ unsigned char __DEFAULT_FN_ATTRS
8395_kortestc_mask16_u8(__mmask16 __A__mmask16 __B)
8396{
8397  return (unsigned char)__builtin_ia32_kortestchi(__A__B);
8398}
8399
8400static __inline__ unsigned char __DEFAULT_FN_ATTRS
8401_kortestz_mask16_u8(__mmask16 __A__mmask16 __B)
8402{
8403  return (unsigned char)__builtin_ia32_kortestzhi(__A__B);
8404}
8405
8406static __inline__ unsigned char __DEFAULT_FN_ATTRS
8407_kortest_mask16_u8(__mmask16 __A__mmask16 __Bunsigned char *__C) {
8408  *__C = (unsigned char)__builtin_ia32_kortestchi(__A__B);
8409  return (unsigned char)__builtin_ia32_kortestzhi(__A__B);
8410}
8411
8412static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8413_mm512_kunpackb (__mmask16 __A__mmask16 __B)
8414{
8415  return (__mmask16__builtin_ia32_kunpckhi ((__mmask16__A, (__mmask16__B);
8416}
8417
8418static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8419_mm512_kxnor (__mmask16 __A__mmask16 __B)
8420{
8421  return (__mmask16__builtin_ia32_kxnorhi ((__mmask16__A, (__mmask16__B);
8422}
8423
8424static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8425_mm512_kxor (__mmask16 __A__mmask16 __B)
8426{
8427  return (__mmask16__builtin_ia32_kxorhi ((__mmask16__A, (__mmask16__B);
8428}
8429
8430#define _kand_mask16 _mm512_kand
8431#define _kandn_mask16 _mm512_kandn
8432#define _knot_mask16 _mm512_knot
8433#define _kor_mask16 _mm512_kor
8434#define _kxnor_mask16 _mm512_kxnor
8435#define _kxor_mask16 _mm512_kxor
8436
8437#define _kshiftli_mask16(A, I) \
8438  (__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))
8439
8440#define _kshiftri_mask16(A, I) \
8441  (__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))
8442
8443static __inline__ unsigned int __DEFAULT_FN_ATTRS
8444_cvtmask16_u32(__mmask16 __A) {
8445  return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8446}
8447
8448static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8449_cvtu32_mask16(unsigned int __A) {
8450  return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8451}
8452
8453static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8454_load_mask16(__mmask16 *__A) {
8455  return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8456}
8457
8458static __inline__ void __DEFAULT_FN_ATTRS
8459_store_mask16(__mmask16 *__A__mmask16 __B) {
8460  *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8461}
8462
8463static __inline__ void __DEFAULT_FN_ATTRS512
8464_mm512_stream_si512 (__m512i * __P__m512i __A)
8465{
8466  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8467  __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8468}
8469
8470static __inline__ __m512i __DEFAULT_FN_ATTRS512
8471_mm512_stream_load_si512 (void const *__P)
8472{
8473  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8474  return (__m512i__builtin_nontemporal_load((const __v8di_aligned *)__P);
8475}
8476
8477static __inline__ void __DEFAULT_FN_ATTRS512
8478_mm512_stream_pd (double *__P__m512d __A)
8479{
8480  typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8481  __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8482}
8483
8484static __inline__ void __DEFAULT_FN_ATTRS512
8485_mm512_stream_ps (float *__P__m512 __A)
8486{
8487  typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8488  __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8489}
8490
8491static __inline__ __m512d __DEFAULT_FN_ATTRS512
8492_mm512_mask_compress_pd (__m512d __W__mmask8 __U__m512d __A)
8493{
8494  return (__m512d__builtin_ia32_compressdf512_mask ((__v8df__A,
8495                  (__v8df__W,
8496                  (__mmask8__U);
8497}
8498
8499static __inline__ __m512d __DEFAULT_FN_ATTRS512
8500_mm512_maskz_compress_pd (__mmask8 __U__m512d __A)
8501{
8502  return (__m512d__builtin_ia32_compressdf512_mask ((__v8df__A,
8503                  (__v8df)
8504                  _mm512_setzero_pd (),
8505                  (__mmask8__U);
8506}
8507
8508static __inline__ __m512i __DEFAULT_FN_ATTRS512
8509_mm512_mask_compress_epi64 (__m512i __W__mmask8 __U__m512i __A)
8510{
8511  return (__m512i__builtin_ia32_compressdi512_mask ((__v8di__A,
8512                  (__v8di__W,
8513                  (__mmask8__U);
8514}
8515
8516static __inline__ __m512i __DEFAULT_FN_ATTRS512
8517_mm512_maskz_compress_epi64 (__mmask8 __U__m512i __A)
8518{
8519  return (__m512i__builtin_ia32_compressdi512_mask ((__v8di__A,
8520                  (__v8di)
8521                  _mm512_setzero_si512 (),
8522                  (__mmask8__U);
8523}
8524
8525static __inline__ __m512 __DEFAULT_FN_ATTRS512
8526_mm512_mask_compress_ps (__m512 __W__mmask16 __U__m512 __A)
8527{
8528  return (__m512__builtin_ia32_compresssf512_mask ((__v16sf__A,
8529                 (__v16sf__W,
8530                 (__mmask16__U);
8531}
8532
8533static __inline__ __m512 __DEFAULT_FN_ATTRS512
8534_mm512_maskz_compress_ps (__mmask16 __U__m512 __A)
8535{
8536  return (__m512__builtin_ia32_compresssf512_mask ((__v16sf__A,
8537                 (__v16sf)
8538                 _mm512_setzero_ps (),
8539                 (__mmask16__U);
8540}
8541
8542static __inline__ __m512i __DEFAULT_FN_ATTRS512
8543_mm512_mask_compress_epi32 (__m512i __W__mmask16 __U__m512i __A)
8544{
8545  return (__m512i__builtin_ia32_compresssi512_mask ((__v16si__A,
8546                  (__v16si__W,
8547                  (__mmask16__U);
8548}
8549
8550static __inline__ __m512i __DEFAULT_FN_ATTRS512
8551_mm512_maskz_compress_epi32 (__mmask16 __U__m512i __A)
8552{
8553  return (__m512i__builtin_ia32_compresssi512_mask ((__v16si__A,
8554                  (__v16si)
8555                  _mm512_setzero_si512 (),
8556                  (__mmask16__U);
8557}
8558
8559#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8560  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8561                                      (__v4sf)(__m128)(Y), (int)(P), \
8562                                      (__mmask8)-1, (int)(R))
8563
8564#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8565  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8566                                      (__v4sf)(__m128)(Y), (int)(P), \
8567                                      (__mmask8)(M), (int)(R))
8568
8569#define _mm_cmp_ss_mask(X, Y, P) \
8570  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8571                                      (__v4sf)(__m128)(Y), (int)(P), \
8572                                      (__mmask8)-1, \
8573                                      _MM_FROUND_CUR_DIRECTION)
8574
8575#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8576  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8577                                      (__v4sf)(__m128)(Y), (int)(P), \
8578                                      (__mmask8)(M), \
8579                                      _MM_FROUND_CUR_DIRECTION)
8580
8581#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8582  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8583                                      (__v2df)(__m128d)(Y), (int)(P), \
8584                                      (__mmask8)-1, (int)(R))
8585
8586#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8587  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8588                                      (__v2df)(__m128d)(Y), (int)(P), \
8589                                      (__mmask8)(M), (int)(R))
8590
8591#define _mm_cmp_sd_mask(X, Y, P) \
8592  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8593                                      (__v2df)(__m128d)(Y), (int)(P), \
8594                                      (__mmask8)-1, \
8595                                      _MM_FROUND_CUR_DIRECTION)
8596
8597#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8598  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8599                                      (__v2df)(__m128d)(Y), (int)(P), \
8600                                      (__mmask8)(M), \
8601                                      _MM_FROUND_CUR_DIRECTION)
8602
8603/* Bit Test */
8604
8605static __inline __mmask16 __DEFAULT_FN_ATTRS512
8606_mm512_test_epi32_mask (__m512i __A__m512i __B)
8607{
8608  return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
8609                                   _mm512_setzero_si512());
8610}
8611
8612static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8613_mm512_mask_test_epi32_mask (__mmask16 __U__m512i __A__m512i __B)
8614{
8615  return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8616                                        _mm512_setzero_si512());
8617}
8618
8619static __inline __mmask8 __DEFAULT_FN_ATTRS512
8620_mm512_test_epi64_mask (__m512i __A__m512i __B)
8621{
8622  return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8623                                   _mm512_setzero_si512());
8624}
8625
8626static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8627_mm512_mask_test_epi64_mask (__mmask8 __U__m512i __A__m512i __B)
8628{
8629  return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8630                                        _mm512_setzero_si512());
8631}
8632
8633static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8634_mm512_testn_epi32_mask (__m512i __A__m512i __B)
8635{
8636  return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8637                                  _mm512_setzero_si512());
8638}
8639
8640static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8641_mm512_mask_testn_epi32_mask (__mmask16 __U__m512i __A__m512i __B)
8642{
8643  return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8644                                       _mm512_setzero_si512());
8645}
8646
8647static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8648_mm512_testn_epi64_mask (__m512i __A__m512i __B)
8649{
8650  return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8651                                  _mm512_setzero_si512());
8652}
8653
8654static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8655_mm512_mask_testn_epi64_mask (__mmask8 __U__m512i __A__m512i __B)
8656{
8657  return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8658                                       _mm512_setzero_si512());
8659}
8660
8661static __inline__ __m512 __DEFAULT_FN_ATTRS512
8662_mm512_movehdup_ps (__m512 __A)
8663{
8664  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8665                         1133557799111113131515);
8666}
8667
8668static __inline__ __m512 __DEFAULT_FN_ATTRS512
8669_mm512_mask_movehdup_ps (__m512 __W__mmask16 __U__m512 __A)
8670{
8671  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8672                                             (__v16sf)_mm512_movehdup_ps(__A),
8673                                             (__v16sf)__W);
8674}
8675
8676static __inline__ __m512 __DEFAULT_FN_ATTRS512
8677_mm512_maskz_movehdup_ps (__mmask16 __U__m512 __A)
8678{
8679  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8680                                             (__v16sf)_mm512_movehdup_ps(__A),
8681                                             (__v16sf)_mm512_setzero_ps());
8682}
8683
8684static __inline__ __m512 __DEFAULT_FN_ATTRS512
8685_mm512_moveldup_ps (__m512 __A)
8686{
8687  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8688                         0022446688101012121414);
8689}
8690
8691static __inline__ __m512 __DEFAULT_FN_ATTRS512
8692_mm512_mask_moveldup_ps (__m512 __W__mmask16 __U__m512 __A)
8693{
8694  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8695                                             (__v16sf)_mm512_moveldup_ps(__A),
8696                                             (__v16sf)__W);
8697}
8698
8699static __inline__ __m512 __DEFAULT_FN_ATTRS512
8700_mm512_maskz_moveldup_ps (__mmask16 __U__m512 __A)
8701{
8702  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8703                                             (__v16sf)_mm512_moveldup_ps(__A),
8704                                             (__v16sf)_mm512_setzero_ps());
8705}
8706
8707static __inline__ __m128 __DEFAULT_FN_ATTRS128
8708_mm_mask_move_ss (__m128 __W__mmask8 __U__m128 __A__m128 __B)
8709{
8710  return __builtin_ia32_selectss_128(__U_mm_move_ss(__A__B), __W);
8711}
8712
8713static __inline__ __m128 __DEFAULT_FN_ATTRS128
8714_mm_maskz_move_ss (__mmask8 __U__m128 __A__m128 __B)
8715{
8716  return __builtin_ia32_selectss_128(__U_mm_move_ss(__A__B),
8717                                     _mm_setzero_ps());
8718}
8719
8720static __inline__ __m128d __DEFAULT_FN_ATTRS128
8721_mm_mask_move_sd (__m128d __W__mmask8 __U__m128d __A__m128d __B)
8722{
8723  return __builtin_ia32_selectsd_128(__U_mm_move_sd(__A__B), __W);
8724}
8725
8726static __inline__ __m128d __DEFAULT_FN_ATTRS128
8727_mm_maskz_move_sd (__mmask8 __U__m128d __A__m128d __B)
8728{
8729  return __builtin_ia32_selectsd_128(__U_mm_move_sd(__A__B),
8730                                     _mm_setzero_pd());
8731}
8732
8733static __inline__ void __DEFAULT_FN_ATTRS128
8734_mm_mask_store_ss (float * __W__mmask8 __U__m128 __A)
8735{
8736  __builtin_ia32_storess128_mask ((__v4sf *)__W__A__U & 1);
8737}
8738
8739static __inline__ void __DEFAULT_FN_ATTRS128
8740_mm_mask_store_sd (double * __W__mmask8 __U__m128d __A)
8741{
8742  __builtin_ia32_storesd128_mask ((__v2df *)__W__A__U & 1);
8743}
8744
8745static __inline__ __m128 __DEFAULT_FN_ATTRS128
8746_mm_mask_load_ss (__m128 __W__mmask8 __Uconst float__A)
8747{
8748  __m128 src = (__v4sf) __builtin_shufflevector((__v4sf__W,
8749                                                (__v4sf)_mm_setzero_ps(),
8750                                                0444);
8751
8752  return (__m128__builtin_ia32_loadss128_mask ((__v4sf *) __Asrc__U & 1);
8753}
8754
8755static __inline__ __m128 __DEFAULT_FN_ATTRS128
8756_mm_maskz_load_ss (__mmask8 __Uconst float__A)
8757{
8758  return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A,
8759                                                (__v4sf_mm_setzero_ps(),
8760                                                __U & 1);
8761}
8762
8763static __inline__ __m128d __DEFAULT_FN_ATTRS128
8764_mm_mask_load_sd (__m128d __W__mmask8 __Uconst double__A)
8765{
8766  __m128d src = (__v2df) __builtin_shufflevector((__v2df__W,
8767                                                 (__v2df)_mm_setzero_pd(),
8768                                                 02);
8769
8770  return (__m128d__builtin_ia32_loadsd128_mask ((__v2df *) __Asrc__U & 1);
8771}
8772
8773static __inline__ __m128d __DEFAULT_FN_ATTRS128
8774_mm_maskz_load_sd (__mmask8 __Uconst double__A)
8775{
8776  return (__m128d__builtin_ia32_loadsd128_mask ((__v2df *) __A,
8777                                                  (__v2df_mm_setzero_pd(),
8778                                                  __U & 1);
8779}
8780
8781#define _mm512_shuffle_epi32(A, I) \
8782  (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I))
8783
8784#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8785  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8786                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
8787                                      (__v16si)(__m512i)(W))
8788
8789#define _mm512_maskz_shuffle_epi32(U, A, I) \
8790  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8791                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
8792                                      (__v16si)_mm512_setzero_si512())
8793
8794static __inline__ __m512d __DEFAULT_FN_ATTRS512
8795_mm512_mask_expand_pd (__m512d __W__mmask8 __U__m512d __A)
8796{
8797  return (__m512d__builtin_ia32_expanddf512_mask ((__v8df__A,
8798                (__v8df__W,
8799                (__mmask8__U);
8800}
8801
8802static __inline__ __m512d __DEFAULT_FN_ATTRS512
8803_mm512_maskz_expand_pd (__mmask8 __U__m512d __A)
8804{
8805  return (__m512d__builtin_ia32_expanddf512_mask ((__v8df__A,
8806                (__v8df_mm512_setzero_pd (),
8807                (__mmask8__U);
8808}
8809
8810static __inline__ __m512i __DEFAULT_FN_ATTRS512
8811_mm512_mask_expand_epi64 (__m512i __W__mmask8 __U__m512i __A)
8812{
8813  return (__m512i__builtin_ia32_expanddi512_mask ((__v8di__A,
8814                (__v8di__W,
8815                (__mmask8__U);
8816}
8817
8818static __inline__ __m512i __DEFAULT_FN_ATTRS512
8819_mm512_maskz_expand_epi64 ( __mmask8 __U__m512i __A)
8820{
8821  return (__m512i__builtin_ia32_expanddi512_mask ((__v8di__A,
8822                (__v8di_mm512_setzero_si512 (),
8823                (__mmask8__U);
8824}
8825
8826static __inline__ __m512d __DEFAULT_FN_ATTRS512
8827_mm512_mask_expandloadu_pd(__m512d __W__mmask8 __Uvoid const *__P)
8828{
8829  return (__m512d__builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8830              (__v8df__W,
8831              (__mmask8__U);
8832}
8833
8834static __inline__ __m512d __DEFAULT_FN_ATTRS512
8835_mm512_maskz_expandloadu_pd(__mmask8 __Uvoid const *__P)
8836{
8837  return (__m512d__builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8838              (__v8df_mm512_setzero_pd(),
8839              (__mmask8__U);
8840}
8841
8842static __inline__ __m512i __DEFAULT_FN_ATTRS512
8843_mm512_mask_expandloadu_epi64(__m512i __W__mmask8 __Uvoid const *__P)
8844{
8845  return (__m512i__builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8846              (__v8di__W,
8847              (__mmask8__U);
8848}
8849
8850static __inline__ __m512i __DEFAULT_FN_ATTRS512
8851_mm512_maskz_expandloadu_epi64(__mmask8 __Uvoid const *__P)
8852{
8853  return (__m512i__builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8854              (__v8di_mm512_setzero_si512(),
8855              (__mmask8__U);
8856}
8857
8858static __inline__ __m512 __DEFAULT_FN_ATTRS512
8859_mm512_mask_expandloadu_ps(__m512 __W__mmask16 __Uvoid const *__P)
8860{
8861  return (__m512__builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8862                   (__v16sf__W,
8863                   (__mmask16__U);
8864}
8865
8866static __inline__ __m512 __DEFAULT_FN_ATTRS512
8867_mm512_maskz_expandloadu_ps(__mmask16 __Uvoid const *__P)
8868{
8869  return (__m512__builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8870                   (__v16sf_mm512_setzero_ps(),
8871                   (__mmask16__U);
8872}
8873
8874static __inline__ __m512i __DEFAULT_FN_ATTRS512
8875_mm512_mask_expandloadu_epi32(__m512i __W__mmask16 __Uvoid const *__P)
8876{
8877  return (__m512i__builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8878              (__v16si__W,
8879              (__mmask16__U);
8880}
8881
8882static __inline__ __m512i __DEFAULT_FN_ATTRS512
8883_mm512_maskz_expandloadu_epi32(__mmask16 __Uvoid const *__P)
8884{
8885  return (__m512i__builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8886              (__v16si_mm512_setzero_si512(),
8887              (__mmask16__U);
8888}
8889
8890static __inline__ __m512 __DEFAULT_FN_ATTRS512
8891_mm512_mask_expand_ps (__m512 __W__mmask16 __U__m512 __A)
8892{
8893  return (__m512__builtin_ia32_expandsf512_mask ((__v16sf__A,
8894               (__v16sf__W,
8895               (__mmask16__U);
8896}
8897
8898static __inline__ __m512 __DEFAULT_FN_ATTRS512
8899_mm512_maskz_expand_ps (__mmask16 __U__m512 __A)
8900{
8901  return (__m512__builtin_ia32_expandsf512_mask ((__v16sf__A,
8902               (__v16sf_mm512_setzero_ps(),
8903               (__mmask16__U);
8904}
8905
8906static __inline__ __m512i __DEFAULT_FN_ATTRS512
8907_mm512_mask_expand_epi32 (__m512i __W__mmask16 __U__m512i __A)
8908{
8909  return (__m512i__builtin_ia32_expandsi512_mask ((__v16si__A,
8910                (__v16si__W,
8911                (__mmask16__U);
8912}
8913
8914static __inline__ __m512i __DEFAULT_FN_ATTRS512
8915_mm512_maskz_expand_epi32 (__mmask16 __U__m512i __A)
8916{
8917  return (__m512i__builtin_ia32_expandsi512_mask ((__v16si__A,
8918                (__v16si_mm512_setzero_si512(),
8919                (__mmask16__U);
8920}
8921
8922#define _mm512_cvt_roundps_pd(A, R) \
8923  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8924                                           (__v8df)_mm512_undefined_pd(), \
8925                                           (__mmask8)-1, (int)(R))
8926
8927#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8928  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8929                                           (__v8df)(__m512d)(W), \
8930                                           (__mmask8)(U), (int)(R))
8931
8932#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8933  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8934                                           (__v8df)_mm512_setzero_pd(), \
8935                                           (__mmask8)(U), (int)(R))
8936
8937static __inline__ __m512d __DEFAULT_FN_ATTRS512
8938_mm512_cvtps_pd (__m256 __A)
8939{
8940  return (__m512d__builtin_convertvector((__v8sf)__A, __v8df);
8941}
8942
8943static __inline__ __m512d __DEFAULT_FN_ATTRS512
8944_mm512_mask_cvtps_pd (__m512d __W__mmask8 __U__m256 __A)
8945{
8946  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8947                                              (__v8df)_mm512_cvtps_pd(__A),
8948                                              (__v8df)__W);
8949}
8950
8951static __inline__ __m512d __DEFAULT_FN_ATTRS512
8952_mm512_maskz_cvtps_pd (__mmask8 __U__m256 __A)
8953{
8954  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8955                                              (__v8df)_mm512_cvtps_pd(__A),
8956                                              (__v8df)_mm512_setzero_pd());
8957}
8958
8959static __inline__ __m512d __DEFAULT_FN_ATTRS512
8960_mm512_cvtpslo_pd (__m512 __A)
8961{
8962  return (__m512d_mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8963}
8964
8965static __inline__ __m512d __DEFAULT_FN_ATTRS512
8966_mm512_mask_cvtpslo_pd (__m512d __W__mmask8 __U__m512 __A)
8967{
8968  return (__m512d_mm512_mask_cvtps_pd(__W__U_mm512_castps512_ps256(__A));
8969}
8970
8971static __inline__ __m512d __DEFAULT_FN_ATTRS512
8972_mm512_mask_mov_pd (__m512d __W__mmask8 __U__m512d __A)
8973{
8974  return (__m512d__builtin_ia32_selectpd_512 ((__mmask8__U,
8975              (__v8df__A,
8976              (__v8df__W);
8977}
8978
8979static __inline__ __m512d __DEFAULT_FN_ATTRS512
8980_mm512_maskz_mov_pd (__mmask8 __U__m512d __A)
8981{
8982  return (__m512d__builtin_ia32_selectpd_512 ((__mmask8__U,
8983              (__v8df__A,
8984              (__v8df_mm512_setzero_pd ());
8985}
8986
8987static __inline__ __m512 __DEFAULT_FN_ATTRS512
8988_mm512_mask_mov_ps (__m512 __W__mmask16 __U__m512 __A)
8989{
8990  return (__m512__builtin_ia32_selectps_512 ((__mmask16__U,
8991             (__v16sf__A,
8992             (__v16sf__W);
8993}
8994
8995static __inline__ __m512 __DEFAULT_FN_ATTRS512
8996_mm512_maskz_mov_ps (__mmask16 __U__m512 __A)
8997{
8998  return (__m512__builtin_ia32_selectps_512 ((__mmask16__U,
8999             (__v16sf__A,
9000             (__v16sf_mm512_setzero_ps ());
9001}
9002
9003static __inline__ void __DEFAULT_FN_ATTRS512
9004_mm512_mask_compressstoreu_pd (void *__P__mmask8 __U__m512d __A)
9005{
9006  __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df__A,
9007            (__mmask8__U);
9008}
9009
9010static __inline__ void __DEFAULT_FN_ATTRS512
9011_mm512_mask_compressstoreu_epi64 (void *__P__mmask8 __U__m512i __A)
9012{
9013  __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di__A,
9014            (__mmask8__U);
9015}
9016
9017static __inline__ void __DEFAULT_FN_ATTRS512
9018_mm512_mask_compressstoreu_ps (void *__P__mmask16 __U__m512 __A)
9019{
9020  __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf__A,
9021            (__mmask16__U);
9022}
9023
9024static __inline__ void __DEFAULT_FN_ATTRS512
9025_mm512_mask_compressstoreu_epi32 (void *__P__mmask16 __U__m512i __A)
9026{
9027  __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si__A,
9028            (__mmask16__U);
9029}
9030
9031#define _mm_cvt_roundsd_ss(A, B, R) \
9032  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9033                                             (__v2df)(__m128d)(B), \
9034                                             (__v4sf)_mm_undefined_ps(), \
9035                                             (__mmask8)-1, (int)(R))
9036
9037#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
9038  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9039                                             (__v2df)(__m128d)(B), \
9040                                             (__v4sf)(__m128)(W), \
9041                                             (__mmask8)(U), (int)(R))
9042
9043#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
9044  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9045                                             (__v2df)(__m128d)(B), \
9046                                             (__v4sf)_mm_setzero_ps(), \
9047                                             (__mmask8)(U), (int)(R))
9048
9049static __inline__ __m128 __DEFAULT_FN_ATTRS128
9050_mm_mask_cvtsd_ss (__m128 __W__mmask8 __U__m128 __A__m128d __B)
9051{
9052  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9053                                             (__v2df)__B,
9054                                             (__v4sf)__W,
9055                                             (__mmask8)__U_MM_FROUND_CUR_DIRECTION);
9056}
9057
9058static __inline__ __m128 __DEFAULT_FN_ATTRS128
9059_mm_maskz_cvtsd_ss (__mmask8 __U__m128 __A__m128d __B)
9060{
9061  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9062                                             (__v2df)__B,
9063                                             (__v4sf)_mm_setzero_ps(),
9064                                             (__mmask8)__U_MM_FROUND_CUR_DIRECTION);
9065}
9066
9067#define _mm_cvtss_i32 _mm_cvtss_si32
9068#define _mm_cvtsd_i32 _mm_cvtsd_si32
9069#define _mm_cvti32_sd _mm_cvtsi32_sd
9070#define _mm_cvti32_ss _mm_cvtsi32_ss
9071#ifdef __x86_64__
9072#define _mm_cvtss_i64 _mm_cvtss_si64
9073#define _mm_cvtsd_i64 _mm_cvtsd_si64
9074#define _mm_cvti64_sd _mm_cvtsi64_sd
9075#define _mm_cvti64_ss _mm_cvtsi64_ss
9076#endif
9077
9078#ifdef __x86_64__
9079#define _mm_cvt_roundi64_sd(A, B, R) \
9080  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9081                                     (int)(R))
9082
9083#define _mm_cvt_roundsi64_sd(A, B, R) \
9084  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9085                                     (int)(R))
9086#endif
9087
9088#define _mm_cvt_roundsi32_ss(A, B, R) \
9089  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
9090
9091#define _mm_cvt_roundi32_ss(A, B, R) \
9092  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
9093
9094#ifdef __x86_64__
9095#define _mm_cvt_roundsi64_ss(A, B, R) \
9096  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9097                                    (int)(R))
9098
9099#define _mm_cvt_roundi64_ss(A, B, R) \
9100  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9101                                    (int)(R))
9102#endif
9103
9104#define _mm_cvt_roundss_sd(A, B, R) \
9105  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9106                                              (__v4sf)(__m128)(B), \
9107                                              (__v2df)_mm_undefined_pd(), \
9108                                              (__mmask8)-1, (int)(R))
9109
9110#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
9111  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9112                                              (__v4sf)(__m128)(B), \
9113                                              (__v2df)(__m128d)(W), \
9114                                              (__mmask8)(U), (int)(R))
9115
9116#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
9117  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9118                                              (__v4sf)(__m128)(B), \
9119                                              (__v2df)_mm_setzero_pd(), \
9120                                              (__mmask8)(U), (int)(R))
9121
9122static __inline__ __m128d __DEFAULT_FN_ATTRS128
9123_mm_mask_cvtss_sd (__m128d __W__mmask8 __U__m128d __A__m128 __B)
9124{
9125  return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9126                                            (__v4sf)__B,
9127                                            (__v2df)__W,
9128                                            (__mmask8)__U_MM_FROUND_CUR_DIRECTION);
9129}
9130
9131static __inline__ __m128d __DEFAULT_FN_ATTRS128
9132_mm_maskz_cvtss_sd (__mmask8 __U__m128d __A__m128 __B)
9133{
9134  return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9135                                            (__v4sf)__B,
9136                                            (__v2df)_mm_setzero_pd(),
9137                                            (__mmask8)__U_MM_FROUND_CUR_DIRECTION);
9138}
9139
9140static __inline__ __m128d __DEFAULT_FN_ATTRS128
9141_mm_cvtu32_sd (__m128d __Aunsigned __B)
9142{
9143  __A[0] = __B;
9144  return __A;
9145}
9146
9147#ifdef __x86_64__
9148#define _mm_cvt_roundu64_sd(A, B, R) \
9149  (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9150                                      (unsigned long long)(B), (int)(R))
9151
9152static __inline__ __m128d __DEFAULT_FN_ATTRS128
9153_mm_cvtu64_sd (__m128d __Aunsigned long long __B)
9154{
9155  __A[0] = __B;
9156  return __A;
9157}
9158#endif
9159
9160#define _mm_cvt_roundu32_ss(A, B, R) \
9161  (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9162                                     (int)(R))
9163
9164static __inline__ __m128 __DEFAULT_FN_ATTRS128
9165_mm_cvtu32_ss (__m128 __Aunsigned __B)
9166{
9167  __A[0] = __B;
9168  return __A;
9169}
9170
9171#ifdef __x86_64__
9172#define _mm_cvt_roundu64_ss(A, B, R) \
9173  (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9174                                     (unsigned long long)(B), (int)(R))
9175
9176static __inline__ __m128 __DEFAULT_FN_ATTRS128
9177_mm_cvtu64_ss (__m128 __Aunsigned long long __B)
9178{
9179  __A[0] = __B;
9180  return __A;
9181}
9182#endif
9183
9184static __inline__ __m512i __DEFAULT_FN_ATTRS512
9185_mm512_mask_set1_epi32 (__m512i __O__mmask16 __Mint __A)
9186{
9187  return (__m512i__builtin_ia32_selectd_512(__M,
9188                                              (__v16si_mm512_set1_epi32(__A),
9189                                              (__v16si__O);
9190}
9191
9192static __inline__ __m512i __DEFAULT_FN_ATTRS512
9193_mm512_mask_set1_epi64 (__m512i __O__mmask8 __Mlong long __A)
9194{
9195  return (__m512i__builtin_ia32_selectq_512(__M,
9196                                              (__v8di_mm512_set1_epi64(__A),
9197                                              (__v8di__O);
9198}
9199
9200static  __inline __m512i __DEFAULT_FN_ATTRS512
9201_mm512_set_epi8 (char __e63char __e62char __e61char __e60char __e59,
9202    char __e58char __e57char __e56char __e55char __e54char __e53,
9203    char __e52char __e51char __e50char __e49char __e48char __e47,
9204    char __e46char __e45char __e44char __e43char __e42char __e41,
9205    char __e40char __e39char __e38char __e37char __e36char __e35,
9206    char __e34char __e33char __e32char __e31char __e30char __e29,
9207    char __e28char __e27char __e26char __e25char __e24char __e23,
9208    char __e22char __e21char __e20char __e19char __e18char __e17,
9209    char __e16char __e15char __e14char __e13char __e12char __e11,
9210    char __e10char __e9char __e8char __e7char __e6char __e5,
9211    char __e4char __e3char __e2char __e1char __e0) {
9212
9213  return __extension__ (__m512i)(__v64qi)
9214    {__e0__e1__e2__e3__e4__e5__e6__e7,
9215     __e8__e9__e10__e11__e12__e13__e14__e15,
9216     __e16__e17__e18__e19__e20__e21__e22__e23,
9217     __e24__e25__e26__e27__e28__e29__e30__e31,
9218     __e32__e33__e34__e35__e36__e37__e38__e39,
9219     __e40__e41__e42__e43__e44__e45__e46__e47,
9220     __e48__e49__e50__e51__e52__e53__e54__e55,
9221     __e56__e57__e58__e59__e60__e61__e62__e63};
9222}
9223
9224static  __inline __m512i __DEFAULT_FN_ATTRS512
9225_mm512_set_epi16(short __e31short __e30short __e29short __e28,
9226    short __e27short __e26short __e25short __e24short __e23,
9227    short __e22short __e21short __e20short __e19short __e18,
9228    short __e17short __e16short __e15short __e14short __e13,
9229    short __e12short __e11short __e10short __e9short __e8,
9230    short __e7short __e6short __e5short __e4short __e3,
9231    short __e2short __e1short __e0) {
9232  return __extension__ (__m512i)(__v32hi)
9233    {__e0__e1__e2__e3__e4__e5__e6__e7,
9234     __e8__e9__e10__e11__e12__e13__e14__e15,
9235     __e16__e17__e18__e19__e20__e21__e22__e23,
9236     __e24__e25__e26__e27__e28__e29__e30__e31 };
9237}
9238
9239static __inline __m512i __DEFAULT_FN_ATTRS512
9240_mm512_set_epi32 (int __Aint __Bint __Cint __D,
9241     int __Eint __Fint __Gint __H,
9242     int __Iint __Jint __Kint __L,
9243     int __Mint __Nint __Oint __P)
9244{
9245  return __extension__ (__m512i)(__v16si)
9246  { __P__O__N__M__L__K__J__I,
9247    __H__G__F__E__D__C__B__A };
9248}
9249
9250#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
9251       e8,e9,e10,e11,e12,e13,e14,e15)          \
9252  _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9253                   (e5),(e4),(e3),(e2),(e1),(e0))
9254
9255static __inline__ __m512i __DEFAULT_FN_ATTRS512
9256_mm512_set_epi64 (long long __Along long __Blong long __C,
9257     long long __Dlong long __Elong long __F,
9258     long long __Glong long __H)
9259{
9260  return __extension__ (__m512i) (__v8di)
9261  { __H__G__F__E__D__C__B__A };
9262}
9263
9264#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
9265  _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9266
9267static __inline__ __m512d __DEFAULT_FN_ATTRS512
9268_mm512_set_pd (double __Adouble __Bdouble __Cdouble __D,
9269        double __Edouble __Fdouble __Gdouble __H)
9270{
9271  return __extension__ (__m512d)
9272  { __H__G__F__E__D__C__B__A };
9273}
9274
9275#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
9276  _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9277
9278static __inline__ __m512 __DEFAULT_FN_ATTRS512
9279_mm512_set_ps (float __Afloat __Bfloat __Cfloat __D,
9280        float __Efloat __Ffloat __Gfloat __H,
9281        float __Ifloat __Jfloat __Kfloat __L,
9282        float __Mfloat __Nfloat __Ofloat __P)
9283{
9284  return __extension__ (__m512)
9285  { __P__O__N__M__L__K__J__I,
9286    __H__G__F__E__D__C__B__A };
9287}
9288
9289#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9290  _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9291                (e4),(e3),(e2),(e1),(e0))
9292
9293static __inline__ __m512 __DEFAULT_FN_ATTRS512
9294_mm512_abs_ps(__m512 __A)
9295{
9296  return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9297}
9298
9299static __inline__ __m512 __DEFAULT_FN_ATTRS512
9300_mm512_mask_abs_ps(__m512 __W__mmask16 __K__m512 __A)
9301{
9302  return (__m512)_mm512_mask_and_epi32((__m512i)__W__K_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9303}
9304
9305static __inline__ __m512d __DEFAULT_FN_ATTRS512
9306_mm512_abs_pd(__m512d __A)
9307{
9308  return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9309}
9310
9311static __inline__ __m512d __DEFAULT_FN_ATTRS512
9312_mm512_mask_abs_pd(__m512d __W__mmask8 __K__m512d __A)
9313{
9314  return (__m512d)_mm512_mask_and_epi64((__v8di)__W__K_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9315}
9316
9317/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9318 * outputs. This class of vector operation forms the basis of many scientific
9319 * computations. In vector-reduction arithmetic, the evaluation off is
9320 * independent of the order of the input elements of V.
9321
9322 * Used bisection method. At each step, we partition the vector with previous
9323 * step in half, and the operation is performed on its two halves.
9324 * This takes log2(n) steps where n is the number of elements in the vector.
9325 */
9326
9327#define _mm512_mask_reduce_operator(op) \
9328  __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \
9329  __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \
9330  __m256i __t3 = (__m256i)(__t1 op __t2); \
9331  __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \
9332  __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \
9333  __v2du __t6 = __t4 op __t5; \
9334  __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9335  __v2du __t8 = __t6 op __t7; \
9336  return __t8[0];
9337
9338static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
9339  _mm512_mask_reduce_operator(+);
9340}
9341
9342static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
9343  _mm512_mask_reduce_operator(*);
9344}
9345
9346static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
9347  _mm512_mask_reduce_operator(&);
9348}
9349
9350static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
9351  _mm512_mask_reduce_operator(|);
9352}
9353
9354static __inline__ long long __DEFAULT_FN_ATTRS512
9355_mm512_mask_reduce_add_epi64(__mmask8 __M__m512i __W) {
9356  __W = _mm512_maskz_mov_epi64(__M__W);
9357  _mm512_mask_reduce_operator(+);
9358}
9359
9360static __inline__ long long __DEFAULT_FN_ATTRS512
9361_mm512_mask_reduce_mul_epi64(__mmask8 __M__m512i __W) {
9362  __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M__W);
9363  _mm512_mask_reduce_operator(*);
9364}
9365
9366static __inline__ long long __DEFAULT_FN_ATTRS512
9367_mm512_mask_reduce_and_epi64(__mmask8 __M__m512i __W) {
9368  __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M__W);
9369  _mm512_mask_reduce_operator(&);
9370}
9371
9372static __inline__ long long __DEFAULT_FN_ATTRS512
9373_mm512_mask_reduce_or_epi64(__mmask8 __M__m512i __W) {
9374  __W = _mm512_maskz_mov_epi64(__M__W);
9375  _mm512_mask_reduce_operator(|);
9376}
9377#undef _mm512_mask_reduce_operator
9378
9379#define _mm512_mask_reduce_operator(op) \
9380  __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \
9381  __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \
9382  __m256d __t3 = __t1 op __t2; \
9383  __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9384  __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9385  __m128d __t6 = __t4 op __t5; \
9386  __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9387  __m128d __t8 = __t6 op __t7; \
9388  return __t8[0];
9389
9390static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
9391  _mm512_mask_reduce_operator(+);
9392}
9393
9394static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
9395  _mm512_mask_reduce_operator(*);
9396}
9397
9398static __inline__ double __DEFAULT_FN_ATTRS512
9399_mm512_mask_reduce_add_pd(__mmask8 __M__m512d __W) {
9400  __W = _mm512_maskz_mov_pd(__M__W);
9401  _mm512_mask_reduce_operator(+);
9402}
9403
9404static __inline__ double __DEFAULT_FN_ATTRS512
9405_mm512_mask_reduce_mul_pd(__mmask8 __M__m512d __W) {
9406  __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M__W);
9407  _mm512_mask_reduce_operator(*);
9408}
9409#undef _mm512_mask_reduce_operator
9410
9411#define _mm512_mask_reduce_operator(op) \
9412  __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \
9413  __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \
9414  __m256i __t3 = (__m256i)(__t1 op __t2); \
9415  __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \
9416  __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \
9417  __v4su __t6 = __t4 op __t5; \
9418  __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9419  __v4su __t8 = __t6 op __t7; \
9420  __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9421  __v4su __t10 = __t8 op __t9; \
9422  return __t10[0];
9423
9424static __inline__ int __DEFAULT_FN_ATTRS512
9425_mm512_reduce_add_epi32(__m512i __W) {
9426  _mm512_mask_reduce_operator(+);
9427}
9428
9429static __inline__ int __DEFAULT_FN_ATTRS512
9430_mm512_reduce_mul_epi32(__m512i __W) {
9431  _mm512_mask_reduce_operator(*);
9432}
9433
9434static __inline__ int __DEFAULT_FN_ATTRS512
9435_mm512_reduce_and_epi32(__m512i __W) {
9436  _mm512_mask_reduce_operator(&);
9437}
9438
9439static __inline__ int __DEFAULT_FN_ATTRS512
9440_mm512_reduce_or_epi32(__m512i __W) {
9441  _mm512_mask_reduce_operator(|);
9442}
9443
9444static __inline__ int __DEFAULT_FN_ATTRS512
9445_mm512_mask_reduce_add_epi32__mmask16 __M__m512i __W) {
9446  __W = _mm512_maskz_mov_epi32(__M__W);
9447  _mm512_mask_reduce_operator(+);
9448}
9449
9450static __inline__ int __DEFAULT_FN_ATTRS512
9451_mm512_mask_reduce_mul_epi32__mmask16 __M__m512i __W) {
9452  __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M__W);
9453  _mm512_mask_reduce_operator(*);
9454}
9455
9456static __inline__ int __DEFAULT_FN_ATTRS512
9457_mm512_mask_reduce_and_epi32__mmask16 __M__m512i __W) {
9458  __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M__W);
9459  _mm512_mask_reduce_operator(&);
9460}
9461
9462static __inline__ int __DEFAULT_FN_ATTRS512
9463_mm512_mask_reduce_or_epi32(__mmask16 __M__m512i __W) {
9464  __W = _mm512_maskz_mov_epi32(__M__W);
9465  _mm512_mask_reduce_operator(|);
9466}
9467#undef _mm512_mask_reduce_operator
9468
9469#define _mm512_mask_reduce_operator(op) \
9470  __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \
9471  __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 1); \
9472  __m256 __t3 = __t1 op __t2; \
9473  __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9474  __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9475  __m128 __t6 = __t4 op __t5; \
9476  __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9477  __m128 __t8 = __t6 op __t7; \
9478  __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9479  __m128 __t10 = __t8 op __t9; \
9480  return __t10[0];
9481
9482static __inline__ float __DEFAULT_FN_ATTRS512
9483_mm512_reduce_add_ps(__m512 __W) {
9484  _mm512_mask_reduce_operator(+);
9485}
9486
9487static __inline__ float __DEFAULT_FN_ATTRS512
9488_mm512_reduce_mul_ps(__m512 __W) {
9489  _mm512_mask_reduce_operator(*);
9490}
9491
9492static __inline__ float __DEFAULT_FN_ATTRS512
9493_mm512_mask_reduce_add_ps(__mmask16 __M__m512 __W) {
9494  __W = _mm512_maskz_mov_ps(__M__W);
9495  _mm512_mask_reduce_operator(+);
9496}
9497
9498static __inline__ float __DEFAULT_FN_ATTRS512
9499_mm512_mask_reduce_mul_ps(__mmask16 __M__m512 __W) {
9500  __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M__W);
9501  _mm512_mask_reduce_operator(*);
9502}
9503#undef _mm512_mask_reduce_operator
9504
9505#define _mm512_mask_reduce_operator(op) \
9506  __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \
9507  __m512i __t2 = _mm512_##op(__V, __t1); \
9508  __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \
9509  __m512i __t4 = _mm512_##op(__t2, __t3); \
9510  __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \
9511  __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \
9512  return __t6[0];
9513
9514static __inline__ long long __DEFAULT_FN_ATTRS512
9515_mm512_reduce_max_epi64(__m512i __V) {
9516  _mm512_mask_reduce_operator(max_epi64);
9517}
9518
9519static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9520_mm512_reduce_max_epu64(__m512i __V) {
9521  _mm512_mask_reduce_operator(max_epu64);
9522}
9523
9524static __inline__ long long __DEFAULT_FN_ATTRS512
9525_mm512_reduce_min_epi64(__m512i __V) {
9526  _mm512_mask_reduce_operator(min_epi64);
9527}
9528
9529static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9530_mm512_reduce_min_epu64(__m512i __V) {
9531  _mm512_mask_reduce_operator(min_epu64);
9532}
9533
9534static __inline__ long long __DEFAULT_FN_ATTRS512
9535_mm512_mask_reduce_max_epi64(__mmask8 __M__m512i __V) {
9536  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M__V);
9537  _mm512_mask_reduce_operator(max_epi64);
9538}
9539
9540static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9541_mm512_mask_reduce_max_epu64(__mmask8 __M__m512i __V) {
9542  __V = _mm512_maskz_mov_epi64(__M__V);
9543  _mm512_mask_reduce_operator(max_epu64);
9544}
9545
9546static __inline__ long long __DEFAULT_FN_ATTRS512
9547_mm512_mask_reduce_min_epi64(__mmask8 __M__m512i __V) {
9548  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M__V);
9549  _mm512_mask_reduce_operator(min_epi64);
9550}
9551
9552static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9553_mm512_mask_reduce_min_epu64(__mmask8 __M__m512i __V) {
9554  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M__V);
9555  _mm512_mask_reduce_operator(min_epu64);
9556}
9557#undef _mm512_mask_reduce_operator
9558
9559#define _mm512_mask_reduce_operator(op) \
9560  __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \
9561  __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \
9562  __m256i __t3 = _mm256_##op(__t1, __t2); \
9563  __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \
9564  __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \
9565  __m128i __t6 = _mm_##op(__t4, __t5); \
9566  __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \
9567  __m128i __t8 = _mm_##op(__t6, __t7); \
9568  __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \
9569  __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \
9570  return __t10[0];
9571
9572static __inline__ int __DEFAULT_FN_ATTRS512
9573_mm512_reduce_max_epi32(__m512i __V) {
9574  _mm512_mask_reduce_operator(max_epi32);
9575}
9576
9577static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9578_mm512_reduce_max_epu32(__m512i __V) {
9579  _mm512_mask_reduce_operator(max_epu32);
9580}
9581
9582static __inline__ int __DEFAULT_FN_ATTRS512
9583_mm512_reduce_min_epi32(__m512i __V) {
9584  _mm512_mask_reduce_operator(min_epi32);
9585}
9586
9587static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9588_mm512_reduce_min_epu32(__m512i __V) {
9589  _mm512_mask_reduce_operator(min_epu32);
9590}
9591
9592static __inline__ int __DEFAULT_FN_ATTRS512
9593_mm512_mask_reduce_max_epi32(__mmask16 __M__m512i __V) {
9594  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M__V);
9595  _mm512_mask_reduce_operator(max_epi32);
9596}
9597
9598static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9599_mm512_mask_reduce_max_epu32(__mmask16 __M__m512i __V) {
9600  __V = _mm512_maskz_mov_epi32(__M__V);
9601  _mm512_mask_reduce_operator(max_epu32);
9602}
9603
9604static __inline__ int __DEFAULT_FN_ATTRS512
9605_mm512_mask_reduce_min_epi32(__mmask16 __M__m512i __V) {
9606  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M__V);
9607  _mm512_mask_reduce_operator(min_epi32);
9608}
9609
9610static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9611_mm512_mask_reduce_min_epu32(__mmask16 __M__m512i __V) {
9612  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M__V);
9613  _mm512_mask_reduce_operator(min_epu32);
9614}
9615#undef _mm512_mask_reduce_operator
9616
9617#define _mm512_mask_reduce_operator(op) \
9618  __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \
9619  __m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \
9620  __m256d __t3 = _mm256_##op(__t1, __t2); \
9621  __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9622  __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9623  __m128d __t6 = _mm_##op(__t4, __t5); \
9624  __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9625  __m128d __t8 = _mm_##op(__t6, __t7); \
9626  return __t8[0];
9627
9628static __inline__ double __DEFAULT_FN_ATTRS512
9629_mm512_reduce_max_pd(__m512d __V) {
9630  _mm512_mask_reduce_operator(max_pd);
9631}
9632
9633static __inline__ double __DEFAULT_FN_ATTRS512
9634_mm512_reduce_min_pd(__m512d __V) {
9635  _mm512_mask_reduce_operator(min_pd);
9636}
9637
9638static __inline__ double __DEFAULT_FN_ATTRS512
9639_mm512_mask_reduce_max_pd(__mmask8 __M__m512d __V) {
9640  __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M__V);
9641  _mm512_mask_reduce_operator(max_pd);
9642}
9643
9644static __inline__ double __DEFAULT_FN_ATTRS512
9645_mm512_mask_reduce_min_pd(__mmask8 __M__m512d __V) {
9646  __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M__V);
9647  _mm512_mask_reduce_operator(min_pd);
9648}
9649#undef _mm512_mask_reduce_operator
9650
9651#define _mm512_mask_reduce_operator(op) \
9652  __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \
9653  __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \
9654  __m256 __t3 = _mm256_##op(__t1, __t2); \
9655  __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9656  __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9657  __m128 __t6 = _mm_##op(__t4, __t5); \
9658  __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9659  __m128 __t8 = _mm_##op(__t6, __t7); \
9660  __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9661  __m128 __t10 = _mm_##op(__t8, __t9); \
9662  return __t10[0];
9663
9664static __inline__ float __DEFAULT_FN_ATTRS512
9665_mm512_reduce_max_ps(__m512 __V) {
9666  _mm512_mask_reduce_operator(max_ps);
9667}
9668
9669static __inline__ float __DEFAULT_FN_ATTRS512
9670_mm512_reduce_min_ps(__m512 __V) {
9671  _mm512_mask_reduce_operator(min_ps);
9672}
9673
9674static __inline__ float __DEFAULT_FN_ATTRS512
9675_mm512_mask_reduce_max_ps(__mmask16 __M__m512 __V) {
9676  __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M__V);
9677  _mm512_mask_reduce_operator(max_ps);
9678}
9679
9680static __inline__ float __DEFAULT_FN_ATTRS512
9681_mm512_mask_reduce_min_ps(__mmask16 __M__m512 __V) {
9682  __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M__V);
9683  _mm512_mask_reduce_operator(min_ps);
9684}
9685#undef _mm512_mask_reduce_operator
9686
9687#undef __DEFAULT_FN_ATTRS512
9688#undef __DEFAULT_FN_ATTRS128
9689#undef __DEFAULT_FN_ATTRS
9690
9691#endif /* __AVX512FINTRIN_H */
9692