Clang Project

clang_source_code/lib/Headers/avx512vlintrin.h
1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VLINTRIN_H
29#define __AVX512VLINTRIN_H
30
31#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
32#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
33
34typedef short __v2hi __attribute__((__vector_size__(4)));
35typedef char __v4qi __attribute__((__vector_size__(4)));
36typedef char __v2qi __attribute__((__vector_size__(2)));
37
38/* Integer compare */
39
40#define _mm_cmpeq_epi32_mask(A, B) \
41    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
42#define _mm_mask_cmpeq_epi32_mask(k, A, B) \
43    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
44#define _mm_cmpge_epi32_mask(A, B) \
45    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
46#define _mm_mask_cmpge_epi32_mask(k, A, B) \
47    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
48#define _mm_cmpgt_epi32_mask(A, B) \
49    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
50#define _mm_mask_cmpgt_epi32_mask(k, A, B) \
51    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
52#define _mm_cmple_epi32_mask(A, B) \
53    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
54#define _mm_mask_cmple_epi32_mask(k, A, B) \
55    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
56#define _mm_cmplt_epi32_mask(A, B) \
57    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
58#define _mm_mask_cmplt_epi32_mask(k, A, B) \
59    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
60#define _mm_cmpneq_epi32_mask(A, B) \
61    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
62#define _mm_mask_cmpneq_epi32_mask(k, A, B) \
63    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
64
65#define _mm256_cmpeq_epi32_mask(A, B) \
66    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
67#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
68    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
69#define _mm256_cmpge_epi32_mask(A, B) \
70    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
71#define _mm256_mask_cmpge_epi32_mask(k, A, B) \
72    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
73#define _mm256_cmpgt_epi32_mask(A, B) \
74    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
75#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
76    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
77#define _mm256_cmple_epi32_mask(A, B) \
78    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
79#define _mm256_mask_cmple_epi32_mask(k, A, B) \
80    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
81#define _mm256_cmplt_epi32_mask(A, B) \
82    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
83#define _mm256_mask_cmplt_epi32_mask(k, A, B) \
84    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
85#define _mm256_cmpneq_epi32_mask(A, B) \
86    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
87#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
88    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
89
90#define _mm_cmpeq_epu32_mask(A, B) \
91    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
92#define _mm_mask_cmpeq_epu32_mask(k, A, B) \
93    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
94#define _mm_cmpge_epu32_mask(A, B) \
95    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
96#define _mm_mask_cmpge_epu32_mask(k, A, B) \
97    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
98#define _mm_cmpgt_epu32_mask(A, B) \
99    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
100#define _mm_mask_cmpgt_epu32_mask(k, A, B) \
101    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
102#define _mm_cmple_epu32_mask(A, B) \
103    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
104#define _mm_mask_cmple_epu32_mask(k, A, B) \
105    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
106#define _mm_cmplt_epu32_mask(A, B) \
107    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
108#define _mm_mask_cmplt_epu32_mask(k, A, B) \
109    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
110#define _mm_cmpneq_epu32_mask(A, B) \
111    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
112#define _mm_mask_cmpneq_epu32_mask(k, A, B) \
113    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
114
115#define _mm256_cmpeq_epu32_mask(A, B) \
116    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
117#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
118    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
119#define _mm256_cmpge_epu32_mask(A, B) \
120    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
121#define _mm256_mask_cmpge_epu32_mask(k, A, B) \
122    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
123#define _mm256_cmpgt_epu32_mask(A, B) \
124    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
125#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
126    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
127#define _mm256_cmple_epu32_mask(A, B) \
128    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
129#define _mm256_mask_cmple_epu32_mask(k, A, B) \
130    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
131#define _mm256_cmplt_epu32_mask(A, B) \
132    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
133#define _mm256_mask_cmplt_epu32_mask(k, A, B) \
134    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
135#define _mm256_cmpneq_epu32_mask(A, B) \
136    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
137#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
138    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
139
140#define _mm_cmpeq_epi64_mask(A, B) \
141    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
142#define _mm_mask_cmpeq_epi64_mask(k, A, B) \
143    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
144#define _mm_cmpge_epi64_mask(A, B) \
145    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
146#define _mm_mask_cmpge_epi64_mask(k, A, B) \
147    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
148#define _mm_cmpgt_epi64_mask(A, B) \
149    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
150#define _mm_mask_cmpgt_epi64_mask(k, A, B) \
151    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
152#define _mm_cmple_epi64_mask(A, B) \
153    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
154#define _mm_mask_cmple_epi64_mask(k, A, B) \
155    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
156#define _mm_cmplt_epi64_mask(A, B) \
157    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
158#define _mm_mask_cmplt_epi64_mask(k, A, B) \
159    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
160#define _mm_cmpneq_epi64_mask(A, B) \
161    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
162#define _mm_mask_cmpneq_epi64_mask(k, A, B) \
163    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
164
165#define _mm256_cmpeq_epi64_mask(A, B) \
166    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
167#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
168    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
169#define _mm256_cmpge_epi64_mask(A, B) \
170    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
171#define _mm256_mask_cmpge_epi64_mask(k, A, B) \
172    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
173#define _mm256_cmpgt_epi64_mask(A, B) \
174    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
175#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
176    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
177#define _mm256_cmple_epi64_mask(A, B) \
178    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
179#define _mm256_mask_cmple_epi64_mask(k, A, B) \
180    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
181#define _mm256_cmplt_epi64_mask(A, B) \
182    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
183#define _mm256_mask_cmplt_epi64_mask(k, A, B) \
184    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
185#define _mm256_cmpneq_epi64_mask(A, B) \
186    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
187#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
188    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
189
190#define _mm_cmpeq_epu64_mask(A, B) \
191    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
192#define _mm_mask_cmpeq_epu64_mask(k, A, B) \
193    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
194#define _mm_cmpge_epu64_mask(A, B) \
195    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
196#define _mm_mask_cmpge_epu64_mask(k, A, B) \
197    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
198#define _mm_cmpgt_epu64_mask(A, B) \
199    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
200#define _mm_mask_cmpgt_epu64_mask(k, A, B) \
201    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
202#define _mm_cmple_epu64_mask(A, B) \
203    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
204#define _mm_mask_cmple_epu64_mask(k, A, B) \
205    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
206#define _mm_cmplt_epu64_mask(A, B) \
207    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
208#define _mm_mask_cmplt_epu64_mask(k, A, B) \
209    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
210#define _mm_cmpneq_epu64_mask(A, B) \
211    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
212#define _mm_mask_cmpneq_epu64_mask(k, A, B) \
213    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
214
215#define _mm256_cmpeq_epu64_mask(A, B) \
216    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
217#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
218    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
219#define _mm256_cmpge_epu64_mask(A, B) \
220    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
221#define _mm256_mask_cmpge_epu64_mask(k, A, B) \
222    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
223#define _mm256_cmpgt_epu64_mask(A, B) \
224    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
225#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
226    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
227#define _mm256_cmple_epu64_mask(A, B) \
228    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
229#define _mm256_mask_cmple_epu64_mask(k, A, B) \
230    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
231#define _mm256_cmplt_epu64_mask(A, B) \
232    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
233#define _mm256_mask_cmplt_epu64_mask(k, A, B) \
234    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
235#define _mm256_cmpneq_epu64_mask(A, B) \
236    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
237#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
238    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
239
240static __inline__ __m256i __DEFAULT_FN_ATTRS256
241_mm256_mask_add_epi32(__m256i __W__mmask8 __U__m256i __A__m256i __B)
242{
243  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
244                                             (__v8si)_mm256_add_epi32(__A__B),
245                                             (__v8si)__W);
246}
247
248static __inline__ __m256i __DEFAULT_FN_ATTRS256
249_mm256_maskz_add_epi32(__mmask8 __U__m256i __A__m256i __B)
250{
251  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
252                                             (__v8si)_mm256_add_epi32(__A__B),
253                                             (__v8si)_mm256_setzero_si256());
254}
255
256static __inline__ __m256i __DEFAULT_FN_ATTRS256
257_mm256_mask_add_epi64(__m256i __W__mmask8 __U__m256i __A__m256i __B)
258{
259  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
260                                             (__v4di)_mm256_add_epi64(__A__B),
261                                             (__v4di)__W);
262}
263
264static __inline__ __m256i __DEFAULT_FN_ATTRS256
265_mm256_maskz_add_epi64(__mmask8 __U__m256i __A__m256i __B)
266{
267  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
268                                             (__v4di)_mm256_add_epi64(__A__B),
269                                             (__v4di)_mm256_setzero_si256());
270}
271
272static __inline__ __m256i __DEFAULT_FN_ATTRS256
273_mm256_mask_sub_epi32(__m256i __W__mmask8 __U__m256i __A__m256i __B)
274{
275  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
276                                             (__v8si)_mm256_sub_epi32(__A__B),
277                                             (__v8si)__W);
278}
279
280static __inline__ __m256i __DEFAULT_FN_ATTRS256
281_mm256_maskz_sub_epi32(__mmask8 __U__m256i __A__m256i __B)
282{
283  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
284                                             (__v8si)_mm256_sub_epi32(__A__B),
285                                             (__v8si)_mm256_setzero_si256());
286}
287
288static __inline__ __m256i __DEFAULT_FN_ATTRS256
289_mm256_mask_sub_epi64(__m256i __W__mmask8 __U__m256i __A__m256i __B)
290{
291  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
292                                             (__v4di)_mm256_sub_epi64(__A__B),
293                                             (__v4di)__W);
294}
295
296static __inline__ __m256i __DEFAULT_FN_ATTRS256
297_mm256_maskz_sub_epi64(__mmask8 __U__m256i __A__m256i __B)
298{
299  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
300                                             (__v4di)_mm256_sub_epi64(__A__B),
301                                             (__v4di)_mm256_setzero_si256());
302}
303
304static __inline__ __m128i __DEFAULT_FN_ATTRS128
305_mm_mask_add_epi32(__m128i __W__mmask8 __U__m128i __A__m128i __B)
306{
307  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
308                                             (__v4si)_mm_add_epi32(__A__B),
309                                             (__v4si)__W);
310}
311
312static __inline__ __m128i __DEFAULT_FN_ATTRS128
313_mm_maskz_add_epi32(__mmask8 __U__m128i __A__m128i __B)
314{
315  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
316                                             (__v4si)_mm_add_epi32(__A__B),
317                                             (__v4si)_mm_setzero_si128());
318}
319
320static __inline__ __m128i __DEFAULT_FN_ATTRS128
321_mm_mask_add_epi64(__m128i __W__mmask8 __U__m128i __A__m128i __B)
322{
323  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
324                                             (__v2di)_mm_add_epi64(__A__B),
325                                             (__v2di)__W);
326}
327
328static __inline__ __m128i __DEFAULT_FN_ATTRS128
329_mm_maskz_add_epi64(__mmask8 __U__m128i __A__m128i __B)
330{
331  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
332                                             (__v2di)_mm_add_epi64(__A__B),
333                                             (__v2di)_mm_setzero_si128());
334}
335
336static __inline__ __m128i __DEFAULT_FN_ATTRS128
337_mm_mask_sub_epi32(__m128i __W__mmask8 __U__m128i __A__m128i __B)
338{
339  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
340                                             (__v4si)_mm_sub_epi32(__A__B),
341                                             (__v4si)__W);
342}
343
344static __inline__ __m128i __DEFAULT_FN_ATTRS128
345_mm_maskz_sub_epi32(__mmask8 __U__m128i __A__m128i __B)
346{
347  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
348                                             (__v4si)_mm_sub_epi32(__A__B),
349                                             (__v4si)_mm_setzero_si128());
350}
351
352static __inline__ __m128i __DEFAULT_FN_ATTRS128
353_mm_mask_sub_epi64(__m128i __W__mmask8 __U__m128i __A__m128i __B)
354{
355  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
356                                             (__v2di)_mm_sub_epi64(__A__B),
357                                             (__v2di)__W);
358}
359
360static __inline__ __m128i __DEFAULT_FN_ATTRS128
361_mm_maskz_sub_epi64(__mmask8 __U__m128i __A__m128i __B)
362{
363  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
364                                             (__v2di)_mm_sub_epi64(__A__B),
365                                             (__v2di)_mm_setzero_si128());
366}
367
368static __inline__ __m256i __DEFAULT_FN_ATTRS256
369_mm256_mask_mul_epi32(__m256i __W__mmask8 __M__m256i __X__m256i __Y)
370{
371  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
372                                             (__v4di)_mm256_mul_epi32(__X__Y),
373                                             (__v4di)__W);
374}
375
376static __inline__ __m256i __DEFAULT_FN_ATTRS256
377_mm256_maskz_mul_epi32(__mmask8 __M__m256i __X__m256i __Y)
378{
379  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
380                                             (__v4di)_mm256_mul_epi32(__X__Y),
381                                             (__v4di)_mm256_setzero_si256());
382}
383
384static __inline__ __m128i __DEFAULT_FN_ATTRS128
385_mm_mask_mul_epi32(__m128i __W__mmask8 __M__m128i __X__m128i __Y)
386{
387  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
388                                             (__v2di)_mm_mul_epi32(__X__Y),
389                                             (__v2di)__W);
390}
391
392static __inline__ __m128i __DEFAULT_FN_ATTRS128
393_mm_maskz_mul_epi32(__mmask8 __M__m128i __X__m128i __Y)
394{
395  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
396                                             (__v2di)_mm_mul_epi32(__X__Y),
397                                             (__v2di)_mm_setzero_si128());
398}
399
400static __inline__ __m256i __DEFAULT_FN_ATTRS256
401_mm256_mask_mul_epu32(__m256i __W__mmask8 __M__m256i __X__m256i __Y)
402{
403  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
404                                             (__v4di)_mm256_mul_epu32(__X__Y),
405                                             (__v4di)__W);
406}
407
408static __inline__ __m256i __DEFAULT_FN_ATTRS256
409_mm256_maskz_mul_epu32(__mmask8 __M__m256i __X__m256i __Y)
410{
411  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
412                                             (__v4di)_mm256_mul_epu32(__X__Y),
413                                             (__v4di)_mm256_setzero_si256());
414}
415
416static __inline__ __m128i __DEFAULT_FN_ATTRS128
417_mm_mask_mul_epu32(__m128i __W__mmask8 __M__m128i __X__m128i __Y)
418{
419  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
420                                             (__v2di)_mm_mul_epu32(__X__Y),
421                                             (__v2di)__W);
422}
423
424static __inline__ __m128i __DEFAULT_FN_ATTRS128
425_mm_maskz_mul_epu32(__mmask8 __M__m128i __X__m128i __Y)
426{
427  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
428                                             (__v2di)_mm_mul_epu32(__X__Y),
429                                             (__v2di)_mm_setzero_si128());
430}
431
432static __inline__ __m256i __DEFAULT_FN_ATTRS256
433_mm256_maskz_mullo_epi32(__mmask8 __M__m256i __A__m256i __B)
434{
435  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
436                                             (__v8si)_mm256_mullo_epi32(__A__B),
437                                             (__v8si)_mm256_setzero_si256());
438}
439
440static __inline__ __m256i __DEFAULT_FN_ATTRS256
441_mm256_mask_mullo_epi32(__m256i __W__mmask8 __M__m256i __A__m256i __B)
442{
443  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
444                                             (__v8si)_mm256_mullo_epi32(__A__B),
445                                             (__v8si)__W);
446}
447
448static __inline__ __m128i __DEFAULT_FN_ATTRS128
449_mm_maskz_mullo_epi32(__mmask8 __M__m128i __A__m128i __B)
450{
451  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
452                                             (__v4si)_mm_mullo_epi32(__A__B),
453                                             (__v4si)_mm_setzero_si128());
454}
455
456static __inline__ __m128i __DEFAULT_FN_ATTRS128
457_mm_mask_mullo_epi32(__m128i __W__mmask8 __M__m128i __A__m128i __B)
458{
459  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
460                                             (__v4si)_mm_mullo_epi32(__A__B),
461                                             (__v4si)__W);
462}
463
464static __inline__ __m256i __DEFAULT_FN_ATTRS256
465_mm256_and_epi32(__m256i __a__m256i __b)
466{
467  return (__m256i)((__v8su)__a & (__v8su)__b);
468}
469
470static __inline__ __m256i __DEFAULT_FN_ATTRS256
471_mm256_mask_and_epi32(__m256i __W__mmask8 __U__m256i __A__m256i __B)
472{
473  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
474                                             (__v8si)_mm256_and_epi32(__A__B),
475                                             (__v8si)__W);
476}
477
478static __inline__ __m256i __DEFAULT_FN_ATTRS256
479_mm256_maskz_and_epi32(__mmask8 __U__m256i __A__m256i __B)
480{
481  return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U__A__B);
482}
483
484static __inline__ __m128i __DEFAULT_FN_ATTRS128
485_mm_and_epi32(__m128i __a__m128i __b)
486{
487  return (__m128i)((__v4su)__a & (__v4su)__b);
488}
489
490static __inline__ __m128i __DEFAULT_FN_ATTRS128
491_mm_mask_and_epi32(__m128i __W__mmask8 __U__m128i __A__m128i __B)
492{
493  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
494                                             (__v4si)_mm_and_epi32(__A__B),
495                                             (__v4si)__W);
496}
497
498static __inline__ __m128i __DEFAULT_FN_ATTRS128
499_mm_maskz_and_epi32(__mmask8 __U__m128i __A__m128i __B)
500{
501  return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U__A__B);
502}
503
504static __inline__ __m256i __DEFAULT_FN_ATTRS256
505_mm256_andnot_epi32(__m256i __A__m256i __B)
506{
507  return (__m256i)(~(__v8su)__A & (__v8su)__B);
508}
509
510static __inline__ __m256i __DEFAULT_FN_ATTRS256
511_mm256_mask_andnot_epi32(__m256i __W__mmask8 __U__m256i __A__m256i __B)
512{
513  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
514                                          (__v8si)_mm256_andnot_epi32(__A__B),
515                                          (__v8si)__W);
516}
517
518static __inline__ __m256i __DEFAULT_FN_ATTRS256
519_mm256_maskz_andnot_epi32(__mmask8 __U__m256i __A__m256i __B)
520{
521  return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
522                                           __U__A__B);
523}
524
525static __inline__ __m128i __DEFAULT_FN_ATTRS128
526_mm_andnot_epi32(__m128i __A__m128i __B)
527{
528  return (__m128i)(~(__v4su)__A & (__v4su)__B);
529}
530
531static __inline__ __m128i __DEFAULT_FN_ATTRS128
532_mm_mask_andnot_epi32(__m128i __W__mmask8 __U__m128i __A__m128i __B)
533{
534  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
535                                             (__v4si)_mm_andnot_epi32(__A__B),
536                                             (__v4si)__W);
537}
538
539static __inline__ __m128i __DEFAULT_FN_ATTRS128
540_mm_maskz_andnot_epi32(__mmask8 __U__m128i __A__m128i __B)
541{
542  return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U__A__B);
543}
544
545static __inline__ __m256i __DEFAULT_FN_ATTRS256
546_mm256_or_epi32(__m256i __a__m256i __b)
547{
548  return (__m256i)((__v8su)__a | (__v8su)__b);
549}
550
551static __inline__ __m256i __DEFAULT_FN_ATTRS256
552_mm256_mask_or_epi32 (__m256i __W__mmask8 __U__m256i __A__m256i __B)
553{
554  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
555                                             (__v8si)_mm256_or_epi32(__A__B),
556                                             (__v8si)__W);
557}
558
559static __inline__ __m256i __DEFAULT_FN_ATTRS256
560_mm256_maskz_or_epi32(__mmask8 __U__m256i __A__m256i __B)
561{
562  return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U__A__B);
563}
564
565static __inline__ __m128i __DEFAULT_FN_ATTRS128
566_mm_or_epi32(__m128i __a__m128i __b)
567{
568  return (__m128i)((__v4su)__a | (__v4su)__b);
569}
570
571static __inline__ __m128i __DEFAULT_FN_ATTRS128
572_mm_mask_or_epi32(__m128i __W__mmask8 __U__m128i __A__m128i __B)
573{
574  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
575                                             (__v4si)_mm_or_epi32(__A__B),
576                                             (__v4si)__W);
577}
578
579static __inline__ __m128i __DEFAULT_FN_ATTRS128
580_mm_maskz_or_epi32(__mmask8 __U__m128i __A__m128i __B)
581{
582  return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U__A__B);
583}
584
585static __inline__ __m256i __DEFAULT_FN_ATTRS256
586_mm256_xor_epi32(__m256i __a__m256i __b)
587{
588  return (__m256i)((__v8su)__a ^ (__v8su)__b);
589}
590
591static __inline__ __m256i __DEFAULT_FN_ATTRS256
592_mm256_mask_xor_epi32(__m256i __W__mmask8 __U__m256i __A__m256i __B)
593{
594  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
595                                             (__v8si)_mm256_xor_epi32(__A__B),
596                                             (__v8si)__W);
597}
598
599static __inline__ __m256i __DEFAULT_FN_ATTRS256
600_mm256_maskz_xor_epi32(__mmask8 __U__m256i __A__m256i __B)
601{
602  return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U__A__B);
603}
604
605static __inline__ __m128i __DEFAULT_FN_ATTRS128
606_mm_xor_epi32(__m128i __a__m128i __b)
607{
608  return (__m128i)((__v4su)__a ^ (__v4su)__b);
609}
610
611static __inline__ __m128i __DEFAULT_FN_ATTRS128
612_mm_mask_xor_epi32(__m128i __W__mmask8 __U__m128i __A__m128i __B)
613{
614  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
615                                             (__v4si)_mm_xor_epi32(__A__B),
616                                             (__v4si)__W);
617}
618
619static __inline__ __m128i __DEFAULT_FN_ATTRS128
620_mm_maskz_xor_epi32(__mmask8 __U__m128i __A__m128i __B)
621{
622  return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U__A__B);
623}
624
625static __inline__ __m256i __DEFAULT_FN_ATTRS256
626_mm256_and_epi64(__m256i __a__m256i __b)
627{
628  return (__m256i)((__v4du)__a & (__v4du)__b);
629}
630
631static __inline__ __m256i __DEFAULT_FN_ATTRS256
632_mm256_mask_and_epi64(__m256i __W__mmask8 __U__m256i __A__m256i __B)
633{
634  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
635                                             (__v4di)_mm256_and_epi64(__A__B),
636                                             (__v4di)__W);
637}
638
639static __inline__ __m256i __DEFAULT_FN_ATTRS256
640_mm256_maskz_and_epi64(__mmask8 __U__m256i __A__m256i __B)
641{
642  return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U__A__B);
643}
644
645static __inline__ __m128i __DEFAULT_FN_ATTRS128
646_mm_and_epi64(__m128i __a__m128i __b)
647{
648  return (__m128i)((__v2du)__a & (__v2du)__b);
649}
650
651static __inline__ __m128i __DEFAULT_FN_ATTRS128
652_mm_mask_and_epi64(__m128i __W__mmask8 __U__m128i __A__m128i __B)
653{
654  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
655                                             (__v2di)_mm_and_epi64(__A__B),
656                                             (__v2di)__W);
657}
658
659static __inline__ __m128i __DEFAULT_FN_ATTRS128
660_mm_maskz_and_epi64(__mmask8 __U__m128i __A__m128i __B)
661{
662  return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U__A__B);
663}
664
665static __inline__ __m256i __DEFAULT_FN_ATTRS256
666_mm256_andnot_epi64(__m256i __A__m256i __B)
667{
668  return (__m256i)(~(__v4du)__A & (__v4du)__B);
669}
670
671static __inline__ __m256i __DEFAULT_FN_ATTRS256
672_mm256_mask_andnot_epi64(__m256i __W__mmask8 __U__m256i __A__m256i __B)
673{
674  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
675                                          (__v4di)_mm256_andnot_epi64(__A__B),
676                                          (__v4di)__W);
677}
678
679static __inline__ __m256i __DEFAULT_FN_ATTRS256
680_mm256_maskz_andnot_epi64(__mmask8 __U__m256i __A__m256i __B)
681{
682  return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
683                                           __U__A__B);
684}
685
686static __inline__ __m128i __DEFAULT_FN_ATTRS128
687_mm_andnot_epi64(__m128i __A__m128i __B)
688{
689  return (__m128i)(~(__v2du)__A & (__v2du)__B);
690}
691
692static __inline__ __m128i __DEFAULT_FN_ATTRS128
693_mm_mask_andnot_epi64(__m128i __W__mmask8 __U__m128i __A__m128i __B)
694{
695  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
696                                             (__v2di)_mm_andnot_epi64(__A__B),
697                                             (__v2di)__W);
698}
699
700static __inline__ __m128i __DEFAULT_FN_ATTRS128
701_mm_maskz_andnot_epi64(__mmask8 __U__m128i __A__m128i __B)
702{
703  return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U__A__B);
704}
705
706static __inline__ __m256i __DEFAULT_FN_ATTRS256
707_mm256_or_epi64(__m256i __a__m256i __b)
708{
709  return (__m256i)((__v4du)__a | (__v4du)__b);
710}
711
712static __inline__ __m256i __DEFAULT_FN_ATTRS256
713_mm256_mask_or_epi64(__m256i __W__mmask8 __U__m256i __A__m256i __B)
714{
715  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
716                                             (__v4di)_mm256_or_epi64(__A__B),
717                                             (__v4di)__W);
718}
719
720static __inline__ __m256i __DEFAULT_FN_ATTRS256
721_mm256_maskz_or_epi64(__mmask8 __U__m256i __A__m256i __B)
722{
723  return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U__A__B);
724}
725
726static __inline__ __m128i __DEFAULT_FN_ATTRS128
727_mm_or_epi64(__m128i __a__m128i __b)
728{
729  return (__m128i)((__v2du)__a | (__v2du)__b);
730}
731
732static __inline__ __m128i __DEFAULT_FN_ATTRS128
733_mm_mask_or_epi64(__m128i __W__mmask8 __U__m128i __A__m128i __B)
734{
735  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
736                                             (__v2di)_mm_or_epi64(__A__B),
737                                             (__v2di)__W);
738}
739
740static __inline__ __m128i __DEFAULT_FN_ATTRS128
741_mm_maskz_or_epi64(__mmask8 __U__m128i __A__m128i __B)
742{
743  return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U__A__B);
744}
745
746static __inline__ __m256i __DEFAULT_FN_ATTRS256
747_mm256_xor_epi64(__m256i __a__m256i __b)
748{
749  return (__m256i)((__v4du)__a ^ (__v4du)__b);
750}
751
752static __inline__ __m256i __DEFAULT_FN_ATTRS256
753_mm256_mask_xor_epi64(__m256i __W__mmask8 __U__m256i __A__m256i __B)
754{
755  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
756                                             (__v4di)_mm256_xor_epi64(__A__B),
757                                             (__v4di)__W);
758}
759
760static __inline__ __m256i __DEFAULT_FN_ATTRS256
761_mm256_maskz_xor_epi64(__mmask8 __U__m256i __A__m256i __B)
762{
763  return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U__A__B);
764}
765
766static __inline__ __m128i __DEFAULT_FN_ATTRS128
767_mm_xor_epi64(__m128i __a__m128i __b)
768{
769  return (__m128i)((__v2du)__a ^ (__v2du)__b);
770}
771
772static __inline__ __m128i __DEFAULT_FN_ATTRS128
773_mm_mask_xor_epi64(__m128i __W__mmask8 __U__m128i __A,
774        __m128i __B)
775{
776  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
777                                             (__v2di)_mm_xor_epi64(__A__B),
778                                             (__v2di)__W);
779}
780
781static __inline__ __m128i __DEFAULT_FN_ATTRS128
782_mm_maskz_xor_epi64(__mmask8 __U__m128i __A__m128i __B)
783{
784  return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U__A__B);
785}
786
787#define _mm_cmp_epi32_mask(a, b, p) \
788  (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
789                                        (__v4si)(__m128i)(b), (int)(p), \
790                                        (__mmask8)-1)
791
792#define _mm_mask_cmp_epi32_mask(m, a, b, p) \
793  (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
794                                        (__v4si)(__m128i)(b), (int)(p), \
795                                        (__mmask8)(m))
796
797#define _mm_cmp_epu32_mask(a, b, p) \
798  (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
799                                         (__v4si)(__m128i)(b), (int)(p), \
800                                         (__mmask8)-1)
801
802#define _mm_mask_cmp_epu32_mask(m, a, b, p) \
803  (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
804                                         (__v4si)(__m128i)(b), (int)(p), \
805                                         (__mmask8)(m))
806
807#define _mm256_cmp_epi32_mask(a, b, p) \
808  (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
809                                        (__v8si)(__m256i)(b), (int)(p), \
810                                        (__mmask8)-1)
811
812#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
813  (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
814                                        (__v8si)(__m256i)(b), (int)(p), \
815                                        (__mmask8)(m))
816
817#define _mm256_cmp_epu32_mask(a, b, p) \
818  (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
819                                         (__v8si)(__m256i)(b), (int)(p), \
820                                         (__mmask8)-1)
821
822#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
823  (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
824                                         (__v8si)(__m256i)(b), (int)(p), \
825                                         (__mmask8)(m))
826
827#define _mm_cmp_epi64_mask(a, b, p) \
828  (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
829                                        (__v2di)(__m128i)(b), (int)(p), \
830                                        (__mmask8)-1)
831
832#define _mm_mask_cmp_epi64_mask(m, a, b, p) \
833  (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
834                                        (__v2di)(__m128i)(b), (int)(p), \
835                                        (__mmask8)(m))
836
837#define _mm_cmp_epu64_mask(a, b, p) \
838  (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
839                                         (__v2di)(__m128i)(b), (int)(p), \
840                                         (__mmask8)-1)
841
842#define _mm_mask_cmp_epu64_mask(m, a, b, p) \
843  (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
844                                         (__v2di)(__m128i)(b), (int)(p), \
845                                         (__mmask8)(m))
846
847#define _mm256_cmp_epi64_mask(a, b, p) \
848  (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
849                                        (__v4di)(__m256i)(b), (int)(p), \
850                                        (__mmask8)-1)
851
852#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
853  (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
854                                        (__v4di)(__m256i)(b), (int)(p), \
855                                        (__mmask8)(m))
856
857#define _mm256_cmp_epu64_mask(a, b, p) \
858  (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
859                                         (__v4di)(__m256i)(b), (int)(p), \
860                                         (__mmask8)-1)
861
862#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
863  (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
864                                         (__v4di)(__m256i)(b), (int)(p), \
865                                         (__mmask8)(m))
866
867#define _mm256_cmp_ps_mask(a, b, p)  \
868  (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
869                                         (__v8sf)(__m256)(b), (int)(p), \
870                                         (__mmask8)-1)
871
872#define _mm256_mask_cmp_ps_mask(m, a, b, p)  \
873  (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
874                                         (__v8sf)(__m256)(b), (int)(p), \
875                                         (__mmask8)(m))
876
877#define _mm256_cmp_pd_mask(a, b, p)  \
878  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
879                                         (__v4df)(__m256d)(b), (int)(p), \
880                                         (__mmask8)-1)
881
882#define _mm256_mask_cmp_pd_mask(m, a, b, p)  \
883  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
884                                         (__v4df)(__m256d)(b), (int)(p), \
885                                         (__mmask8)(m))
886
887#define _mm_cmp_ps_mask(a, b, p)  \
888  (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
889                                         (__v4sf)(__m128)(b), (int)(p), \
890                                         (__mmask8)-1)
891
892#define _mm_mask_cmp_ps_mask(m, a, b, p)  \
893  (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
894                                         (__v4sf)(__m128)(b), (int)(p), \
895                                         (__mmask8)(m))
896
897#define _mm_cmp_pd_mask(a, b, p)  \
898  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
899                                         (__v2df)(__m128d)(b), (int)(p), \
900                                         (__mmask8)-1)
901
902#define _mm_mask_cmp_pd_mask(m, a, b, p)  \
903  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
904                                         (__v2df)(__m128d)(b), (int)(p), \
905                                         (__mmask8)(m))
906
907static __inline__ __m128d __DEFAULT_FN_ATTRS128
908_mm_mask_fmadd_pd(__m128d __A__mmask8 __U__m128d __B__m128d __C)
909{
910  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
911                    __builtin_ia32_vfmaddpd ((__v2df__A,
912                                             (__v2df__B,
913                                             (__v2df__C),
914                    (__v2df__A);
915}
916
917static __inline__ __m128d __DEFAULT_FN_ATTRS128
918_mm_mask3_fmadd_pd(__m128d __A__m128d __B__m128d __C__mmask8 __U)
919{
920  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
921                    __builtin_ia32_vfmaddpd ((__v2df__A,
922                                             (__v2df__B,
923                                             (__v2df__C),
924                    (__v2df__C);
925}
926
927static __inline__ __m128d __DEFAULT_FN_ATTRS128
928_mm_maskz_fmadd_pd(__mmask8 __U__m128d __A__m128d __B__m128d __C)
929{
930  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
931                    __builtin_ia32_vfmaddpd ((__v2df__A,
932                                             (__v2df__B,
933                                             (__v2df__C),
934                    (__v2df)_mm_setzero_pd());
935}
936
937static __inline__ __m128d __DEFAULT_FN_ATTRS128
938_mm_mask_fmsub_pd(__m128d __A__mmask8 __U__m128d __B__m128d __C)
939{
940  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
941                    __builtin_ia32_vfmaddpd ((__v2df__A,
942                                             (__v2df__B,
943                                             -(__v2df__C),
944                    (__v2df__A);
945}
946
947static __inline__ __m128d __DEFAULT_FN_ATTRS128
948_mm_maskz_fmsub_pd(__mmask8 __U__m128d __A__m128d __B__m128d __C)
949{
950  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
951                    __builtin_ia32_vfmaddpd ((__v2df__A,
952                                             (__v2df__B,
953                                             -(__v2df__C),
954                    (__v2df)_mm_setzero_pd());
955}
956
957static __inline__ __m128d __DEFAULT_FN_ATTRS128
958_mm_mask3_fnmadd_pd(__m128d __A__m128d __B__m128d __C__mmask8 __U)
959{
960  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
961                    __builtin_ia32_vfmaddpd (-(__v2df__A,
962                                             (__v2df__B,
963                                             (__v2df__C),
964                    (__v2df__C);
965}
966
967static __inline__ __m128d __DEFAULT_FN_ATTRS128
968_mm_maskz_fnmadd_pd(__mmask8 __U__m128d __A__m128d __B__m128d __C)
969{
970  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
971                    __builtin_ia32_vfmaddpd (-(__v2df__A,
972                                             (__v2df__B,
973                                             (__v2df__C),
974                    (__v2df)_mm_setzero_pd());
975}
976
977static __inline__ __m128d __DEFAULT_FN_ATTRS128
978_mm_maskz_fnmsub_pd(__mmask8 __U__m128d __A__m128d __B__m128d __C)
979{
980  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
981                    __builtin_ia32_vfmaddpd (-(__v2df__A,
982                                             (__v2df__B,
983                                             -(__v2df__C),
984                    (__v2df)_mm_setzero_pd());
985}
986
987static __inline__ __m256d __DEFAULT_FN_ATTRS256
988_mm256_mask_fmadd_pd(__m256d __A__mmask8 __U__m256d __B__m256d __C)
989{
990  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
991                    __builtin_ia32_vfmaddpd256 ((__v4df__A,
992                                                (__v4df__B,
993                                                (__v4df__C),
994                    (__v4df__A);
995}
996
997static __inline__ __m256d __DEFAULT_FN_ATTRS256
998_mm256_mask3_fmadd_pd(__m256d __A__m256d __B__m256d __C__mmask8 __U)
999{
1000  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1001                    __builtin_ia32_vfmaddpd256 ((__v4df__A,
1002                                                (__v4df__B,
1003                                                (__v4df__C),
1004                    (__v4df__C);
1005}
1006
1007static __inline__ __m256d __DEFAULT_FN_ATTRS256
1008_mm256_maskz_fmadd_pd(__mmask8 __U__m256d __A__m256d __B__m256d __C)
1009{
1010  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1011                    __builtin_ia32_vfmaddpd256 ((__v4df__A,
1012                                                (__v4df__B,
1013                                                (__v4df__C),
1014                    (__v4df)_mm256_setzero_pd());
1015}
1016
1017static __inline__ __m256d __DEFAULT_FN_ATTRS256
1018_mm256_mask_fmsub_pd(__m256d __A__mmask8 __U__m256d __B__m256d __C)
1019{
1020  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1021                    __builtin_ia32_vfmaddpd256 ((__v4df__A,
1022                                                (__v4df__B,
1023                                                -(__v4df__C),
1024                    (__v4df__A);
1025}
1026
1027static __inline__ __m256d __DEFAULT_FN_ATTRS256
1028_mm256_maskz_fmsub_pd(__mmask8 __U__m256d __A__m256d __B__m256d __C)
1029{
1030  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1031                    __builtin_ia32_vfmaddpd256 ((__v4df__A,
1032                                                (__v4df__B,
1033                                                -(__v4df__C),
1034                    (__v4df)_mm256_setzero_pd());
1035}
1036
1037static __inline__ __m256d __DEFAULT_FN_ATTRS256
1038_mm256_mask3_fnmadd_pd(__m256d __A__m256d __B__m256d __C__mmask8 __U)
1039{
1040  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1041                    __builtin_ia32_vfmaddpd256 (-(__v4df__A,
1042                                                (__v4df__B,
1043                                                (__v4df__C),
1044                    (__v4df__C);
1045}
1046
1047static __inline__ __m256d __DEFAULT_FN_ATTRS256
1048_mm256_maskz_fnmadd_pd(__mmask8 __U__m256d __A__m256d __B__m256d __C)
1049{
1050  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1051                    __builtin_ia32_vfmaddpd256 (-(__v4df__A,
1052                                                (__v4df__B,
1053                                                (__v4df__C),
1054                    (__v4df)_mm256_setzero_pd());
1055}
1056
1057static __inline__ __m256d __DEFAULT_FN_ATTRS256
1058_mm256_maskz_fnmsub_pd(__mmask8 __U__m256d __A__m256d __B__m256d __C)
1059{
1060  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1061                    __builtin_ia32_vfmaddpd256 (-(__v4df__A,
1062                                                (__v4df__B,
1063                                                -(__v4df__C),
1064                    (__v4df)_mm256_setzero_pd());
1065}
1066
1067static __inline__ __m128 __DEFAULT_FN_ATTRS128
1068_mm_mask_fmadd_ps(__m128 __A__mmask8 __U__m128 __B__m128 __C)
1069{
1070  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1071                    __builtin_ia32_vfmaddps ((__v4sf__A,
1072                                             (__v4sf__B,
1073                                             (__v4sf__C),
1074                    (__v4sf__A);
1075}
1076
1077static __inline__ __m128 __DEFAULT_FN_ATTRS128
1078_mm_mask3_fmadd_ps(__m128 __A__m128 __B__m128 __C__mmask8 __U)
1079{
1080  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1081                    __builtin_ia32_vfmaddps ((__v4sf__A,
1082                                             (__v4sf__B,
1083                                             (__v4sf__C),
1084                    (__v4sf__C);
1085}
1086
1087static __inline__ __m128 __DEFAULT_FN_ATTRS128
1088_mm_maskz_fmadd_ps(__mmask8 __U__m128 __A__m128 __B__m128 __C)
1089{
1090  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1091                    __builtin_ia32_vfmaddps ((__v4sf__A,
1092                                             (__v4sf__B,
1093                                             (__v4sf__C),
1094                    (__v4sf)_mm_setzero_ps());
1095}
1096
1097static __inline__ __m128 __DEFAULT_FN_ATTRS128
1098_mm_mask_fmsub_ps(__m128 __A__mmask8 __U__m128 __B__m128 __C)
1099{
1100  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1101                    __builtin_ia32_vfmaddps ((__v4sf__A,
1102                                             (__v4sf__B,
1103                                             -(__v4sf__C),
1104                    (__v4sf__A);
1105}
1106
1107static __inline__ __m128 __DEFAULT_FN_ATTRS128
1108_mm_maskz_fmsub_ps(__mmask8 __U__m128 __A__m128 __B__m128 __C)
1109{
1110  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1111                    __builtin_ia32_vfmaddps ((__v4sf__A,
1112                                             (__v4sf__B,
1113                                             -(__v4sf__C),
1114                    (__v4sf)_mm_setzero_ps());
1115}
1116
1117static __inline__ __m128 __DEFAULT_FN_ATTRS128
1118_mm_mask3_fnmadd_ps(__m128 __A__m128 __B__m128 __C__mmask8 __U)
1119{
1120  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1121                    __builtin_ia32_vfmaddps (-(__v4sf__A,
1122                                             (__v4sf__B,
1123                                             (__v4sf__C),
1124                    (__v4sf__C);
1125}
1126
1127static __inline__ __m128 __DEFAULT_FN_ATTRS128
1128_mm_maskz_fnmadd_ps(__mmask8 __U__m128 __A__m128 __B__m128 __C)
1129{
1130  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1131                    __builtin_ia32_vfmaddps (-(__v4sf__A,
1132                                             (__v4sf__B,
1133                                             (__v4sf__C),
1134                    (__v4sf)_mm_setzero_ps());
1135}
1136
1137static __inline__ __m128 __DEFAULT_FN_ATTRS128
1138_mm_maskz_fnmsub_ps(__mmask8 __U__m128 __A__m128 __B__m128 __C)
1139{
1140  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1141                    __builtin_ia32_vfmaddps (-(__v4sf__A,
1142                                             (__v4sf__B,
1143                                             -(__v4sf__C),
1144                    (__v4sf)_mm_setzero_ps());
1145}
1146
1147static __inline__ __m256 __DEFAULT_FN_ATTRS256
1148_mm256_mask_fmadd_ps(__m256 __A__mmask8 __U__m256 __B__m256 __C)
1149{
1150  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1151                    __builtin_ia32_vfmaddps256 ((__v8sf__A,
1152                                                (__v8sf__B,
1153                                                (__v8sf__C),
1154                    (__v8sf__A);
1155}
1156
1157static __inline__ __m256 __DEFAULT_FN_ATTRS256
1158_mm256_mask3_fmadd_ps(__m256 __A__m256 __B__m256 __C__mmask8 __U)
1159{
1160  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1161                    __builtin_ia32_vfmaddps256 ((__v8sf__A,
1162                                                (__v8sf__B,
1163                                                (__v8sf__C),
1164                    (__v8sf__C);
1165}
1166
1167static __inline__ __m256 __DEFAULT_FN_ATTRS256
1168_mm256_maskz_fmadd_ps(__mmask8 __U__m256 __A__m256 __B__m256 __C)
1169{
1170  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1171                    __builtin_ia32_vfmaddps256 ((__v8sf__A,
1172                                                (__v8sf__B,
1173                                                (__v8sf__C),
1174                    (__v8sf)_mm256_setzero_ps());
1175}
1176
1177static __inline__ __m256 __DEFAULT_FN_ATTRS256
1178_mm256_mask_fmsub_ps(__m256 __A__mmask8 __U__m256 __B__m256 __C)
1179{
1180  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1181                    __builtin_ia32_vfmaddps256 ((__v8sf__A,
1182                                                (__v8sf__B,
1183                                                -(__v8sf__C),
1184                    (__v8sf__A);
1185}
1186
1187static __inline__ __m256 __DEFAULT_FN_ATTRS256
1188_mm256_maskz_fmsub_ps(__mmask8 __U__m256 __A__m256 __B__m256 __C)
1189{
1190  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1191                    __builtin_ia32_vfmaddps256 ((__v8sf__A,
1192                                                (__v8sf__B,
1193                                                -(__v8sf__C),
1194                    (__v8sf)_mm256_setzero_ps());
1195}
1196
1197static __inline__ __m256 __DEFAULT_FN_ATTRS256
1198_mm256_mask3_fnmadd_ps(__m256 __A__m256 __B__m256 __C__mmask8 __U)
1199{
1200  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1201                    __builtin_ia32_vfmaddps256 (-(__v8sf__A,
1202                                                (__v8sf__B,
1203                                                (__v8sf__C),
1204                    (__v8sf__C);
1205}
1206
1207static __inline__ __m256 __DEFAULT_FN_ATTRS256
1208_mm256_maskz_fnmadd_ps(__mmask8 __U__m256 __A__m256 __B__m256 __C)
1209{
1210  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1211                    __builtin_ia32_vfmaddps256 (-(__v8sf__A,
1212                                                (__v8sf__B,
1213                                                (__v8sf__C),
1214                    (__v8sf)_mm256_setzero_ps());
1215}
1216
1217static __inline__ __m256 __DEFAULT_FN_ATTRS256
1218_mm256_maskz_fnmsub_ps(__mmask8 __U__m256 __A__m256 __B__m256 __C)
1219{
1220  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1221                    __builtin_ia32_vfmaddps256 (-(__v8sf__A,
1222                                                (__v8sf__B,
1223                                                -(__v8sf__C),
1224                    (__v8sf)_mm256_setzero_ps());
1225}
1226
1227static __inline__ __m128d __DEFAULT_FN_ATTRS128
1228_mm_mask_fmaddsub_pd(__m128d __A__mmask8 __U__m128d __B__m128d __C)
1229{
1230  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
1231                    __builtin_ia32_vfmaddsubpd ((__v2df__A,
1232                                                (__v2df__B,
1233                                                (__v2df__C),
1234                    (__v2df__A);
1235}
1236
1237static __inline__ __m128d __DEFAULT_FN_ATTRS128
1238_mm_mask3_fmaddsub_pd(__m128d __A__m128d __B__m128d __C__mmask8 __U)
1239{
1240  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
1241                    __builtin_ia32_vfmaddsubpd ((__v2df__A,
1242                                                (__v2df__B,
1243                                                (__v2df__C),
1244                    (__v2df__C);
1245}
1246
1247static __inline__ __m128d __DEFAULT_FN_ATTRS128
1248_mm_maskz_fmaddsub_pd(__mmask8 __U__m128d __A__m128d __B__m128d __C)
1249{
1250  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
1251                    __builtin_ia32_vfmaddsubpd ((__v2df__A,
1252                                                (__v2df__B,
1253                                                (__v2df__C),
1254                    (__v2df)_mm_setzero_pd());
1255}
1256
1257static __inline__ __m128d __DEFAULT_FN_ATTRS128
1258_mm_mask_fmsubadd_pd(__m128d __A__mmask8 __U__m128d __B__m128d __C)
1259{
1260  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
1261                    __builtin_ia32_vfmaddsubpd ((__v2df__A,
1262                                                (__v2df__B,
1263                                                -(__v2df__C),
1264                    (__v2df__A);
1265}
1266
1267static __inline__ __m128d __DEFAULT_FN_ATTRS128
1268_mm_maskz_fmsubadd_pd(__mmask8 __U__m128d __A__m128d __B__m128d __C)
1269{
1270  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
1271                    __builtin_ia32_vfmaddsubpd ((__v2df__A,
1272                                                (__v2df__B,
1273                                                -(__v2df__C),
1274                    (__v2df)_mm_setzero_pd());
1275}
1276
1277static __inline__ __m256d __DEFAULT_FN_ATTRS256
1278_mm256_mask_fmaddsub_pd(__m256d __A__mmask8 __U__m256d __B__m256d __C)
1279{
1280  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1281                    __builtin_ia32_vfmaddsubpd256 ((__v4df__A,
1282                                                   (__v4df__B,
1283                                                   (__v4df__C),
1284                    (__v4df__A);
1285}
1286
1287static __inline__ __m256d __DEFAULT_FN_ATTRS256
1288_mm256_mask3_fmaddsub_pd(__m256d __A__m256d __B__m256d __C__mmask8 __U)
1289{
1290  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1291                    __builtin_ia32_vfmaddsubpd256 ((__v4df__A,
1292                                                   (__v4df__B,
1293                                                   (__v4df__C),
1294                    (__v4df__C);
1295}
1296
1297static __inline__ __m256d __DEFAULT_FN_ATTRS256
1298_mm256_maskz_fmaddsub_pd(__mmask8 __U__m256d __A__m256d __B__m256d __C)
1299{
1300  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1301                    __builtin_ia32_vfmaddsubpd256 ((__v4df__A,
1302                                                   (__v4df__B,
1303                                                   (__v4df__C),
1304                    (__v4df)_mm256_setzero_pd());
1305}
1306
1307static __inline__ __m256d __DEFAULT_FN_ATTRS256
1308_mm256_mask_fmsubadd_pd(__m256d __A__mmask8 __U__m256d __B__m256d __C)
1309{
1310  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1311                    __builtin_ia32_vfmaddsubpd256 ((__v4df__A,
1312                                                   (__v4df__B,
1313                                                   -(__v4df__C),
1314                    (__v4df__A);
1315}
1316
1317static __inline__ __m256d __DEFAULT_FN_ATTRS256
1318_mm256_maskz_fmsubadd_pd(__mmask8 __U__m256d __A__m256d __B__m256d __C)
1319{
1320  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1321                    __builtin_ia32_vfmaddsubpd256 ((__v4df__A,
1322                                                   (__v4df__B,
1323                                                   -(__v4df__C),
1324                    (__v4df)_mm256_setzero_pd());
1325}
1326
1327static __inline__ __m128 __DEFAULT_FN_ATTRS128
1328_mm_mask_fmaddsub_ps(__m128 __A__mmask8 __U__m128 __B__m128 __C)
1329{
1330  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1331                    __builtin_ia32_vfmaddsubps ((__v4sf__A,
1332                                                (__v4sf__B,
1333                                                (__v4sf__C),
1334                    (__v4sf__A);
1335}
1336
1337static __inline__ __m128 __DEFAULT_FN_ATTRS128
1338_mm_mask3_fmaddsub_ps(__m128 __A__m128 __B__m128 __C__mmask8 __U)
1339{
1340  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1341                    __builtin_ia32_vfmaddsubps ((__v4sf__A,
1342                                                (__v4sf__B,
1343                                                (__v4sf__C),
1344                    (__v4sf__C);
1345}
1346
1347static __inline__ __m128 __DEFAULT_FN_ATTRS128
1348_mm_maskz_fmaddsub_ps(__mmask8 __U__m128 __A__m128 __B__m128 __C)
1349{
1350  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1351                    __builtin_ia32_vfmaddsubps ((__v4sf__A,
1352                                                (__v4sf__B,
1353                                                (__v4sf__C),
1354                    (__v4sf)_mm_setzero_ps());
1355}
1356
1357static __inline__ __m128 __DEFAULT_FN_ATTRS128
1358_mm_mask_fmsubadd_ps(__m128 __A__mmask8 __U__m128 __B__m128 __C)
1359{
1360  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1361                    __builtin_ia32_vfmaddsubps ((__v4sf__A,
1362                                                (__v4sf__B,
1363                                                -(__v4sf__C),
1364                    (__v4sf__A);
1365}
1366
1367static __inline__ __m128 __DEFAULT_FN_ATTRS128
1368_mm_maskz_fmsubadd_ps(__mmask8 __U__m128 __A__m128 __B__m128 __C)
1369{
1370  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1371                    __builtin_ia32_vfmaddsubps ((__v4sf__A,
1372                                                (__v4sf__B,
1373                                                -(__v4sf__C),
1374                    (__v4sf)_mm_setzero_ps());
1375}
1376
1377static __inline__ __m256 __DEFAULT_FN_ATTRS256
1378_mm256_mask_fmaddsub_ps(__m256 __A__mmask8 __U__m256 __B,
1379                         __m256 __C)
1380{
1381  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1382                    __builtin_ia32_vfmaddsubps256 ((__v8sf__A,
1383                                                   (__v8sf__B,
1384                                                   (__v8sf__C),
1385                    (__v8sf__A);
1386}
1387
1388static __inline__ __m256 __DEFAULT_FN_ATTRS256
1389_mm256_mask3_fmaddsub_ps(__m256 __A__m256 __B__m256 __C__mmask8 __U)
1390{
1391  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1392                    __builtin_ia32_vfmaddsubps256 ((__v8sf__A,
1393                                                   (__v8sf__B,
1394                                                   (__v8sf__C),
1395                    (__v8sf__C);
1396}
1397
1398static __inline__ __m256 __DEFAULT_FN_ATTRS256
1399_mm256_maskz_fmaddsub_ps(__mmask8 __U__m256 __A__m256 __B__m256 __C)
1400{
1401  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1402                    __builtin_ia32_vfmaddsubps256 ((__v8sf__A,
1403                                                   (__v8sf__B,
1404                                                   (__v8sf__C),
1405                    (__v8sf)_mm256_setzero_ps());
1406}
1407
1408static __inline__ __m256 __DEFAULT_FN_ATTRS256
1409_mm256_mask_fmsubadd_ps(__m256 __A__mmask8 __U__m256 __B__m256 __C)
1410{
1411  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1412                    __builtin_ia32_vfmaddsubps256 ((__v8sf__A,
1413                                                   (__v8sf__B,
1414                                                   -(__v8sf__C),
1415                    (__v8sf__A);
1416}
1417
1418static __inline__ __m256 __DEFAULT_FN_ATTRS256
1419_mm256_maskz_fmsubadd_ps(__mmask8 __U__m256 __A__m256 __B__m256 __C)
1420{
1421  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1422                    __builtin_ia32_vfmaddsubps256 ((__v8sf__A,
1423                                                   (__v8sf__B,
1424                                                   -(__v8sf__C),
1425                    (__v8sf)_mm256_setzero_ps());
1426}
1427
1428static __inline__ __m128d __DEFAULT_FN_ATTRS128
1429_mm_mask3_fmsub_pd(__m128d __A__m128d __B__m128d __C__mmask8 __U)
1430{
1431  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
1432                    __builtin_ia32_vfmaddpd ((__v2df__A,
1433                                             (__v2df__B,
1434                                             -(__v2df__C),
1435                    (__v2df__C);
1436}
1437
1438static __inline__ __m256d __DEFAULT_FN_ATTRS256
1439_mm256_mask3_fmsub_pd(__m256d __A__m256d __B__m256d __C__mmask8 __U)
1440{
1441  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1442                    __builtin_ia32_vfmaddpd256 ((__v4df__A,
1443                                                (__v4df__B,
1444                                                -(__v4df__C),
1445                    (__v4df__C);
1446}
1447
1448static __inline__ __m128 __DEFAULT_FN_ATTRS128
1449_mm_mask3_fmsub_ps(__m128 __A__m128 __B__m128 __C__mmask8 __U)
1450{
1451  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1452                    __builtin_ia32_vfmaddps ((__v4sf__A,
1453                                             (__v4sf__B,
1454                                             -(__v4sf__C),
1455                    (__v4sf__C);
1456}
1457
1458static __inline__ __m256 __DEFAULT_FN_ATTRS256
1459_mm256_mask3_fmsub_ps(__m256 __A__m256 __B__m256 __C__mmask8 __U)
1460{
1461  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1462                    __builtin_ia32_vfmaddps256 ((__v8sf__A,
1463                                                (__v8sf__B,
1464                                                -(__v8sf__C),
1465                    (__v8sf__C);
1466}
1467
1468static __inline__ __m128d __DEFAULT_FN_ATTRS128
1469_mm_mask3_fmsubadd_pd(__m128d __A__m128d __B__m128d __C__mmask8 __U)
1470{
1471  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
1472                    __builtin_ia32_vfmaddsubpd ((__v2df__A,
1473                                                (__v2df__B,
1474                                                -(__v2df__C),
1475                    (__v2df__C);
1476}
1477
1478static __inline__ __m256d __DEFAULT_FN_ATTRS256
1479_mm256_mask3_fmsubadd_pd(__m256d __A__m256d __B__m256d __C__mmask8 __U)
1480{
1481  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1482                    __builtin_ia32_vfmaddsubpd256 ((__v4df__A,
1483                                                   (__v4df__B,
1484                                                   -(__v4df__C),
1485                    (__v4df__C);
1486}
1487
1488static __inline__ __m128 __DEFAULT_FN_ATTRS128
1489_mm_mask3_fmsubadd_ps(__m128 __A__m128 __B__m128 __C__mmask8 __U)
1490{
1491  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1492                    __builtin_ia32_vfmaddsubps ((__v4sf__A,
1493                                                (__v4sf__B,
1494                                                -(__v4sf__C),
1495                    (__v4sf__C);
1496}
1497
1498static __inline__ __m256 __DEFAULT_FN_ATTRS256
1499_mm256_mask3_fmsubadd_ps(__m256 __A__m256 __B__m256 __C__mmask8 __U)
1500{
1501  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1502                    __builtin_ia32_vfmaddsubps256 ((__v8sf__A,
1503                                                   (__v8sf__B,
1504                                                   -(__v8sf__C),
1505                    (__v8sf__C);
1506}
1507
1508static __inline__ __m128d __DEFAULT_FN_ATTRS128
1509_mm_mask_fnmadd_pd(__m128d __A__mmask8 __U__m128d __B__m128d __C)
1510{
1511  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
1512                    __builtin_ia32_vfmaddpd ((__v2df__A,
1513                                             -(__v2df__B,
1514                                             (__v2df__C),
1515                    (__v2df__A);
1516}
1517
1518static __inline__ __m256d __DEFAULT_FN_ATTRS256
1519_mm256_mask_fnmadd_pd(__m256d __A__mmask8 __U__m256d __B__m256d __C)
1520{
1521  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1522                    __builtin_ia32_vfmaddpd256 ((__v4df__A,
1523                                                -(__v4df__B,
1524                                                (__v4df__C),
1525                    (__v4df__A);
1526}
1527
1528static __inline__ __m128 __DEFAULT_FN_ATTRS128
1529_mm_mask_fnmadd_ps(__m128 __A__mmask8 __U__m128 __B__m128 __C)
1530{
1531  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1532                    __builtin_ia32_vfmaddps ((__v4sf__A,
1533                                             -(__v4sf__B,
1534                                             (__v4sf__C),
1535                    (__v4sf__A);
1536}
1537
1538static __inline__ __m256 __DEFAULT_FN_ATTRS256
1539_mm256_mask_fnmadd_ps(__m256 __A__mmask8 __U__m256 __B__m256 __C)
1540{
1541  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1542                    __builtin_ia32_vfmaddps256 ((__v8sf__A,
1543                                                -(__v8sf__B,
1544                                                (__v8sf__C),
1545                    (__v8sf__A);
1546}
1547
1548static __inline__ __m128d __DEFAULT_FN_ATTRS128
1549_mm_mask_fnmsub_pd(__m128d __A__mmask8 __U__m128d __B__m128d __C)
1550{
1551  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
1552                    __builtin_ia32_vfmaddpd ((__v2df__A,
1553                                             -(__v2df__B,
1554                                             -(__v2df__C),
1555                    (__v2df__A);
1556}
1557
1558static __inline__ __m128d __DEFAULT_FN_ATTRS128
1559_mm_mask3_fnmsub_pd(__m128d __A__m128d __B__m128d __C__mmask8 __U)
1560{
1561  return (__m128d__builtin_ia32_selectpd_128((__mmask8__U,
1562                    __builtin_ia32_vfmaddpd ((__v2df__A,
1563                                             -(__v2df__B,
1564                                             -(__v2df__C),
1565                    (__v2df__C);
1566}
1567
1568static __inline__ __m256d __DEFAULT_FN_ATTRS256
1569_mm256_mask_fnmsub_pd(__m256d __A__mmask8 __U__m256d __B__m256d __C)
1570{
1571  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1572                    __builtin_ia32_vfmaddpd256 ((__v4df__A,
1573                                                -(__v4df__B,
1574                                                -(__v4df__C),
1575                    (__v4df__A);
1576}
1577
1578static __inline__ __m256d __DEFAULT_FN_ATTRS256
1579_mm256_mask3_fnmsub_pd(__m256d __A__m256d __B__m256d __C__mmask8 __U)
1580{
1581  return (__m256d__builtin_ia32_selectpd_256((__mmask8__U,
1582                    __builtin_ia32_vfmaddpd256 ((__v4df__A,
1583                                                -(__v4df__B,
1584                                                -(__v4df__C),
1585                    (__v4df__C);
1586}
1587
1588static __inline__ __m128 __DEFAULT_FN_ATTRS128
1589_mm_mask_fnmsub_ps(__m128 __A__mmask8 __U__m128 __B__m128 __C)
1590{
1591  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1592                    __builtin_ia32_vfmaddps ((__v4sf__A,
1593                                             -(__v4sf__B,
1594                                             -(__v4sf__C),
1595                    (__v4sf__A);
1596}
1597
1598static __inline__ __m128 __DEFAULT_FN_ATTRS128
1599_mm_mask3_fnmsub_ps(__m128 __A__m128 __B__m128 __C__mmask8 __U)
1600{
1601  return (__m128__builtin_ia32_selectps_128((__mmask8__U,
1602                    __builtin_ia32_vfmaddps ((__v4sf__A,
1603                                             -(__v4sf__B,
1604                                             -(__v4sf__C),
1605                    (__v4sf__C);
1606}
1607
1608static __inline__ __m256 __DEFAULT_FN_ATTRS256
1609_mm256_mask_fnmsub_ps(__m256 __A__mmask8 __U__m256 __B__m256 __C)
1610{
1611  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1612                    __builtin_ia32_vfmaddps256 ((__v8sf__A,
1613                                                -(__v8sf__B,
1614                                                -(__v8sf__C),
1615                    (__v8sf__A);
1616}
1617
1618static __inline__ __m256 __DEFAULT_FN_ATTRS256
1619_mm256_mask3_fnmsub_ps(__m256 __A__m256 __B__m256 __C__mmask8 __U)
1620{
1621  return (__m256__builtin_ia32_selectps_256((__mmask8__U,
1622                    __builtin_ia32_vfmaddps256 ((__v8sf__A,
1623                                                -(__v8sf__B,
1624                                                -(__v8sf__C),
1625                    (__v8sf__C);
1626}
1627
1628static __inline__ __m128d __DEFAULT_FN_ATTRS128
1629_mm_mask_add_pd(__m128d __W__mmask8 __U__m128d __A__m128d __B) {
1630  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1631                                              (__v2df)_mm_add_pd(__A__B),
1632                                              (__v2df)__W);
1633}
1634
1635static __inline__ __m128d __DEFAULT_FN_ATTRS128
1636_mm_maskz_add_pd(__mmask8 __U__m128d __A__m128d __B) {
1637  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1638                                              (__v2df)_mm_add_pd(__A__B),
1639                                              (__v2df)_mm_setzero_pd());
1640}
1641
1642static __inline__ __m256d __DEFAULT_FN_ATTRS256
1643_mm256_mask_add_pd(__m256d __W__mmask8 __U__m256d __A__m256d __B) {
1644  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1645                                              (__v4df)_mm256_add_pd(__A__B),
1646                                              (__v4df)__W);
1647}
1648
1649static __inline__ __m256d __DEFAULT_FN_ATTRS256
1650_mm256_maskz_add_pd(__mmask8 __U__m256d __A__m256d __B) {
1651  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1652                                              (__v4df)_mm256_add_pd(__A__B),
1653                                              (__v4df)_mm256_setzero_pd());
1654}
1655
1656static __inline__ __m128 __DEFAULT_FN_ATTRS128
1657_mm_mask_add_ps(__m128 __W__mmask8 __U__m128 __A__m128 __B) {
1658  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1659                                             (__v4sf)_mm_add_ps(__A__B),
1660                                             (__v4sf)__W);
1661}
1662
1663static __inline__ __m128 __DEFAULT_FN_ATTRS128
1664_mm_maskz_add_ps(__mmask8 __U__m128 __A__m128 __B) {
1665  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1666                                             (__v4sf)_mm_add_ps(__A__B),
1667                                             (__v4sf)_mm_setzero_ps());
1668}
1669
1670static __inline__ __m256 __DEFAULT_FN_ATTRS256
1671_mm256_mask_add_ps(__m256 __W__mmask8 __U__m256 __A__m256 __B) {
1672  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1673                                             (__v8sf)_mm256_add_ps(__A__B),
1674                                             (__v8sf)__W);
1675}
1676
1677static __inline__ __m256 __DEFAULT_FN_ATTRS256
1678_mm256_maskz_add_ps(__mmask8 __U__m256 __A__m256 __B) {
1679  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1680                                             (__v8sf)_mm256_add_ps(__A__B),
1681                                             (__v8sf)_mm256_setzero_ps());
1682}
1683
1684static __inline__ __m128i __DEFAULT_FN_ATTRS128
1685_mm_mask_blend_epi32 (__mmask8 __U__m128i __A__m128i __W) {
1686  return (__m128i__builtin_ia32_selectd_128 ((__mmask8__U,
1687                (__v4si__W,
1688                (__v4si__A);
1689}
1690
1691static __inline__ __m256i __DEFAULT_FN_ATTRS256
1692_mm256_mask_blend_epi32 (__mmask8 __U__m256i __A__m256i __W) {
1693  return (__m256i__builtin_ia32_selectd_256 ((__mmask8__U,
1694                (__v8si__W,
1695                (__v8si__A);
1696}
1697
1698static __inline__ __m128d __DEFAULT_FN_ATTRS128
1699_mm_mask_blend_pd (__mmask8 __U__m128d __A__m128d __W) {
1700  return (__m128d__builtin_ia32_selectpd_128 ((__mmask8__U,
1701                 (__v2df__W,
1702                 (__v2df__A);
1703}
1704
1705static __inline__ __m256d __DEFAULT_FN_ATTRS256
1706_mm256_mask_blend_pd (__mmask8 __U__m256d __A__m256d __W) {
1707  return (__m256d__builtin_ia32_selectpd_256 ((__mmask8__U,
1708                 (__v4df__W,
1709                 (__v4df__A);
1710}
1711
1712static __inline__ __m128 __DEFAULT_FN_ATTRS128
1713_mm_mask_blend_ps (__mmask8 __U__m128 __A__m128 __W) {
1714  return (__m128__builtin_ia32_selectps_128 ((__mmask8__U,
1715                (__v4sf__W,
1716                (__v4sf__A);
1717}
1718
1719static __inline__ __m256 __DEFAULT_FN_ATTRS256
1720_mm256_mask_blend_ps (__mmask8 __U__m256 __A__m256 __W) {
1721  return (__m256__builtin_ia32_selectps_256 ((__mmask8__U,
1722                (__v8sf__W,
1723                (__v8sf__A);
1724}
1725
1726static __inline__ __m128i __DEFAULT_FN_ATTRS128
1727_mm_mask_blend_epi64 (__mmask8 __U__m128i __A__m128i __W) {
1728  return (__m128i__builtin_ia32_selectq_128 ((__mmask8__U,
1729                (__v2di__W,
1730                (__v2di__A);
1731}
1732
1733static __inline__ __m256i __DEFAULT_FN_ATTRS256
1734_mm256_mask_blend_epi64 (__mmask8 __U__m256i __A__m256i __W) {
1735  return (__m256i__builtin_ia32_selectq_256 ((__mmask8__U,
1736                (__v4di__W,
1737                (__v4di__A);
1738}
1739
1740static __inline__ __m128d __DEFAULT_FN_ATTRS128
1741_mm_mask_compress_pd (__m128d __W__mmask8 __U__m128d __A) {
1742  return (__m128d__builtin_ia32_compressdf128_mask ((__v2df__A,
1743                  (__v2df__W,
1744                  (__mmask8__U);
1745}
1746
1747static __inline__ __m128d __DEFAULT_FN_ATTRS128
1748_mm_maskz_compress_pd (__mmask8 __U__m128d __A) {
1749  return (__m128d__builtin_ia32_compressdf128_mask ((__v2df__A,
1750                  (__v2df)
1751                  _mm_setzero_pd (),
1752                  (__mmask8__U);
1753}
1754
1755static __inline__ __m256d __DEFAULT_FN_ATTRS256
1756_mm256_mask_compress_pd (__m256d __W__mmask8 __U__m256d __A) {
1757  return (__m256d__builtin_ia32_compressdf256_mask ((__v4df__A,
1758                  (__v4df__W,
1759                  (__mmask8__U);
1760}
1761
1762static __inline__ __m256d __DEFAULT_FN_ATTRS256
1763_mm256_maskz_compress_pd (__mmask8 __U__m256d __A) {
1764  return (__m256d__builtin_ia32_compressdf256_mask ((__v4df__A,
1765                  (__v4df)
1766                  _mm256_setzero_pd (),
1767                  (__mmask8__U);
1768}
1769
1770static __inline__ __m128i __DEFAULT_FN_ATTRS128
1771_mm_mask_compress_epi64 (__m128i __W__mmask8 __U__m128i __A) {
1772  return (__m128i__builtin_ia32_compressdi128_mask ((__v2di__A,
1773                  (__v2di__W,
1774                  (__mmask8__U);
1775}
1776
1777static __inline__ __m128i __DEFAULT_FN_ATTRS128
1778_mm_maskz_compress_epi64 (__mmask8 __U__m128i __A) {
1779  return (__m128i__builtin_ia32_compressdi128_mask ((__v2di__A,
1780                  (__v2di)
1781                  _mm_setzero_si128 (),
1782                  (__mmask8__U);
1783}
1784
1785static __inline__ __m256i __DEFAULT_FN_ATTRS256
1786_mm256_mask_compress_epi64 (__m256i __W__mmask8 __U__m256i __A) {
1787  return (__m256i__builtin_ia32_compressdi256_mask ((__v4di__A,
1788                  (__v4di__W,
1789                  (__mmask8__U);
1790}
1791
1792static __inline__ __m256i __DEFAULT_FN_ATTRS256
1793_mm256_maskz_compress_epi64 (__mmask8 __U__m256i __A) {
1794  return (__m256i__builtin_ia32_compressdi256_mask ((__v4di__A,
1795                  (__v4di)
1796                  _mm256_setzero_si256 (),
1797                  (__mmask8__U);
1798}
1799
1800static __inline__ __m128 __DEFAULT_FN_ATTRS128
1801_mm_mask_compress_ps (__m128 __W__mmask8 __U__m128 __A) {
1802  return (__m128__builtin_ia32_compresssf128_mask ((__v4sf__A,
1803                 (__v4sf__W,
1804                 (__mmask8__U);
1805}
1806
1807static __inline__ __m128 __DEFAULT_FN_ATTRS128
1808_mm_maskz_compress_ps (__mmask8 __U__m128 __A) {
1809  return (__m128__builtin_ia32_compresssf128_mask ((__v4sf__A,
1810                 (__v4sf)
1811                 _mm_setzero_ps (),
1812                 (__mmask8__U);
1813}
1814
1815static __inline__ __m256 __DEFAULT_FN_ATTRS256
1816_mm256_mask_compress_ps (__m256 __W__mmask8 __U__m256 __A) {
1817  return (__m256__builtin_ia32_compresssf256_mask ((__v8sf__A,
1818                 (__v8sf__W,
1819                 (__mmask8__U);
1820}
1821
1822static __inline__ __m256 __DEFAULT_FN_ATTRS256
1823_mm256_maskz_compress_ps (__mmask8 __U__m256 __A) {
1824  return (__m256__builtin_ia32_compresssf256_mask ((__v8sf__A,
1825                 (__v8sf)
1826                 _mm256_setzero_ps (),
1827                 (__mmask8__U);
1828}
1829
1830static __inline__ __m128i __DEFAULT_FN_ATTRS128
1831_mm_mask_compress_epi32 (__m128i __W__mmask8 __U__m128i __A) {
1832  return (__m128i__builtin_ia32_compresssi128_mask ((__v4si__A,
1833                  (__v4si__W,
1834                  (__mmask8__U);
1835}
1836
1837static __inline__ __m128i __DEFAULT_FN_ATTRS128
1838_mm_maskz_compress_epi32 (__mmask8 __U__m128i __A) {
1839  return (__m128i__builtin_ia32_compresssi128_mask ((__v4si__A,
1840                  (__v4si)
1841                  _mm_setzero_si128 (),
1842                  (__mmask8__U);
1843}
1844
1845static __inline__ __m256i __DEFAULT_FN_ATTRS256
1846_mm256_mask_compress_epi32 (__m256i __W__mmask8 __U__m256i __A) {
1847  return (__m256i__builtin_ia32_compresssi256_mask ((__v8si__A,
1848                  (__v8si__W,
1849                  (__mmask8__U);
1850}
1851
1852static __inline__ __m256i __DEFAULT_FN_ATTRS256
1853_mm256_maskz_compress_epi32 (__mmask8 __U__m256i __A) {
1854  return (__m256i__builtin_ia32_compresssi256_mask ((__v8si__A,
1855                  (__v8si)
1856                  _mm256_setzero_si256 (),
1857                  (__mmask8__U);
1858}
1859
1860static __inline__ void __DEFAULT_FN_ATTRS128
1861_mm_mask_compressstoreu_pd (void *__P__mmask8 __U__m128d __A) {
1862  __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1863            (__v2df__A,
1864            (__mmask8__U);
1865}
1866
1867static __inline__ void __DEFAULT_FN_ATTRS256
1868_mm256_mask_compressstoreu_pd (void *__P__mmask8 __U__m256d __A) {
1869  __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1870            (__v4df__A,
1871            (__mmask8__U);
1872}
1873
1874static __inline__ void __DEFAULT_FN_ATTRS128
1875_mm_mask_compressstoreu_epi64 (void *__P__mmask8 __U__m128i __A) {
1876  __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1877            (__v2di__A,
1878            (__mmask8__U);
1879}
1880
1881static __inline__ void __DEFAULT_FN_ATTRS256
1882_mm256_mask_compressstoreu_epi64 (void *__P__mmask8 __U__m256i __A) {
1883  __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1884            (__v4di__A,
1885            (__mmask8__U);
1886}
1887
1888static __inline__ void __DEFAULT_FN_ATTRS128
1889_mm_mask_compressstoreu_ps (void *__P__mmask8 __U__m128 __A) {
1890  __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1891            (__v4sf__A,
1892            (__mmask8__U);
1893}
1894
1895static __inline__ void __DEFAULT_FN_ATTRS256
1896_mm256_mask_compressstoreu_ps (void *__P__mmask8 __U__m256 __A) {
1897  __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1898            (__v8sf__A,
1899            (__mmask8__U);
1900}
1901
1902static __inline__ void __DEFAULT_FN_ATTRS128
1903_mm_mask_compressstoreu_epi32 (void *__P__mmask8 __U__m128i __A) {
1904  __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1905            (__v4si__A,
1906            (__mmask8__U);
1907}
1908
1909static __inline__ void __DEFAULT_FN_ATTRS256
1910_mm256_mask_compressstoreu_epi32 (void *__P__mmask8 __U__m256i __A) {
1911  __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1912            (__v8si__A,
1913            (__mmask8__U);
1914}
1915
1916static __inline__ __m128d __DEFAULT_FN_ATTRS128
1917_mm_mask_cvtepi32_pd (__m128d __W__mmask8 __U__m128i __A) {
1918  return (__m128d)__builtin_ia32_selectpd_128((__mmask8__U,
1919                                              (__v2df)_mm_cvtepi32_pd(__A),
1920                                              (__v2df)__W);
1921}
1922
1923static __inline__ __m128d __DEFAULT_FN_ATTRS128
1924_mm_maskz_cvtepi32_pd (__mmask8 __U__m128i __A) {
1925  return (__m128d)__builtin_ia32_selectpd_128((__mmask8__U,
1926                                              (__v2df)_mm_cvtepi32_pd(__A),
1927                                              (__v2df)_mm_setzero_pd());
1928}
1929
1930static __inline__ __m256d __DEFAULT_FN_ATTRS256
1931_mm256_mask_cvtepi32_pd (__m256d __W__mmask8 __U__m128i __A) {
1932  return (__m256d)__builtin_ia32_selectpd_256((__mmask8__U,
1933                                              (__v4df)_mm256_cvtepi32_pd(__A),
1934                                              (__v4df)__W);
1935}
1936
1937static __inline__ __m256d __DEFAULT_FN_ATTRS256
1938_mm256_maskz_cvtepi32_pd (__mmask8 __U__m128i __A) {
1939  return (__m256d)__builtin_ia32_selectpd_256((__mmask8__U,
1940                                              (__v4df)_mm256_cvtepi32_pd(__A),
1941                                              (__v4df)_mm256_setzero_pd());
1942}
1943
1944static __inline__ __m128 __DEFAULT_FN_ATTRS128
1945_mm_mask_cvtepi32_ps (__m128 __W__mmask8 __U__m128i __A) {
1946  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1947                                             (__v4sf)_mm_cvtepi32_ps(__A),
1948                                             (__v4sf)__W);
1949}
1950
1951static __inline__ __m128 __DEFAULT_FN_ATTRS128
1952_mm_maskz_cvtepi32_ps (__mmask8 __U__m128i __A) {
1953  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1954                                             (__v4sf)_mm_cvtepi32_ps(__A),
1955                                             (__v4sf)_mm_setzero_ps());
1956}
1957
1958static __inline__ __m256 __DEFAULT_FN_ATTRS256
1959_mm256_mask_cvtepi32_ps (__m256 __W__mmask8 __U__m256i __A) {
1960  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1961                                             (__v8sf)_mm256_cvtepi32_ps(__A),
1962                                             (__v8sf)__W);
1963}
1964
1965static __inline__ __m256 __DEFAULT_FN_ATTRS256
1966_mm256_maskz_cvtepi32_ps (__mmask8 __U__m256i __A) {
1967  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1968                                             (__v8sf)_mm256_cvtepi32_ps(__A),
1969                                             (__v8sf)_mm256_setzero_ps());
1970}
1971
1972static __inline__ __m128i __DEFAULT_FN_ATTRS128
1973_mm_mask_cvtpd_epi32 (__m128i __W__mmask8 __U__m128d __A) {
1974  return (__m128i__builtin_ia32_cvtpd2dq128_mask ((__v2df__A,
1975                (__v4si__W,
1976                (__mmask8__U);
1977}
1978
1979static __inline__ __m128i __DEFAULT_FN_ATTRS128
1980_mm_maskz_cvtpd_epi32 (__mmask8 __U__m128d __A) {
1981  return (__m128i__builtin_ia32_cvtpd2dq128_mask ((__v2df__A,
1982                (__v4si)
1983                _mm_setzero_si128 (),
1984                (__mmask8__U);
1985}
1986
1987static __inline__ __m128i __DEFAULT_FN_ATTRS256
1988_mm256_mask_cvtpd_epi32 (__m128i __W__mmask8 __U__m256d __A) {
1989  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1990                                             (__v4si)_mm256_cvtpd_epi32(__A),
1991                                             (__v4si)__W);
1992}
1993
1994static __inline__ __m128i __DEFAULT_FN_ATTRS256
1995_mm256_maskz_cvtpd_epi32 (__mmask8 __U__m256d __A) {
1996  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1997                                             (__v4si)_mm256_cvtpd_epi32(__A),
1998                                             (__v4si)_mm_setzero_si128());
1999}
2000
2001static __inline__ __m128 __DEFAULT_FN_ATTRS128
2002_mm_mask_cvtpd_ps (__m128 __W__mmask8 __U__m128d __A) {
2003  return (__m128__builtin_ia32_cvtpd2ps_mask ((__v2df__A,
2004            (__v4sf__W,
2005            (__mmask8__U);
2006}
2007
2008static __inline__ __m128 __DEFAULT_FN_ATTRS128
2009_mm_maskz_cvtpd_ps (__mmask8 __U__m128d __A) {
2010  return (__m128__builtin_ia32_cvtpd2ps_mask ((__v2df__A,
2011            (__v4sf)
2012            _mm_setzero_ps (),
2013            (__mmask8__U);
2014}
2015
2016static __inline__ __m128 __DEFAULT_FN_ATTRS256
2017_mm256_mask_cvtpd_ps (__m128 __W__mmask8 __U__m256d __A) {
2018  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2019                                             (__v4sf)_mm256_cvtpd_ps(__A),
2020                                             (__v4sf)__W);
2021}
2022
2023static __inline__ __m128 __DEFAULT_FN_ATTRS256
2024_mm256_maskz_cvtpd_ps (__mmask8 __U__m256d __A) {
2025  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2026                                             (__v4sf)_mm256_cvtpd_ps(__A),
2027                                             (__v4sf)_mm_setzero_ps());
2028}
2029
2030static __inline__ __m128i __DEFAULT_FN_ATTRS128
2031_mm_cvtpd_epu32 (__m128d __A) {
2032  return (__m128i__builtin_ia32_cvtpd2udq128_mask ((__v2df__A,
2033                 (__v4si)
2034                 _mm_setzero_si128 (),
2035                 (__mmask8) -1);
2036}
2037
2038static __inline__ __m128i __DEFAULT_FN_ATTRS128
2039_mm_mask_cvtpd_epu32 (__m128i __W__mmask8 __U__m128d __A) {
2040  return (__m128i__builtin_ia32_cvtpd2udq128_mask ((__v2df__A,
2041                 (__v4si__W,
2042                 (__mmask8__U);
2043}
2044
2045static __inline__ __m128i __DEFAULT_FN_ATTRS128
2046_mm_maskz_cvtpd_epu32 (__mmask8 __U__m128d __A) {
2047  return (__m128i__builtin_ia32_cvtpd2udq128_mask ((__v2df__A,
2048                 (__v4si)
2049                 _mm_setzero_si128 (),
2050                 (__mmask8__U);
2051}
2052
2053static __inline__ __m128i __DEFAULT_FN_ATTRS256
2054_mm256_cvtpd_epu32 (__m256d __A) {
2055  return (__m128i__builtin_ia32_cvtpd2udq256_mask ((__v4df__A,
2056                 (__v4si)
2057                 _mm_setzero_si128 (),
2058                 (__mmask8) -1);
2059}
2060
2061static __inline__ __m128i __DEFAULT_FN_ATTRS256
2062_mm256_mask_cvtpd_epu32 (__m128i __W__mmask8 __U__m256d __A) {
2063  return (__m128i__builtin_ia32_cvtpd2udq256_mask ((__v4df__A,
2064                 (__v4si__W,
2065                 (__mmask8__U);
2066}
2067
2068static __inline__ __m128i __DEFAULT_FN_ATTRS256
2069_mm256_maskz_cvtpd_epu32 (__mmask8 __U__m256d __A) {
2070  return (__m128i__builtin_ia32_cvtpd2udq256_mask ((__v4df__A,
2071                 (__v4si)
2072                 _mm_setzero_si128 (),
2073                 (__mmask8__U);
2074}
2075
2076static __inline__ __m128i __DEFAULT_FN_ATTRS128
2077_mm_mask_cvtps_epi32 (__m128i __W__mmask8 __U__m128 __A) {
2078  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2079                                             (__v4si)_mm_cvtps_epi32(__A),
2080                                             (__v4si)__W);
2081}
2082
2083static __inline__ __m128i __DEFAULT_FN_ATTRS128
2084_mm_maskz_cvtps_epi32 (__mmask8 __U__m128 __A) {
2085  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2086                                             (__v4si)_mm_cvtps_epi32(__A),
2087                                             (__v4si)_mm_setzero_si128());
2088}
2089
2090static __inline__ __m256i __DEFAULT_FN_ATTRS256
2091_mm256_mask_cvtps_epi32 (__m256i __W__mmask8 __U__m256 __A) {
2092  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2093                                             (__v8si)_mm256_cvtps_epi32(__A),
2094                                             (__v8si)__W);
2095}
2096
2097static __inline__ __m256i __DEFAULT_FN_ATTRS256
2098_mm256_maskz_cvtps_epi32 (__mmask8 __U__m256 __A) {
2099  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2100                                             (__v8si)_mm256_cvtps_epi32(__A),
2101                                             (__v8si)_mm256_setzero_si256());
2102}
2103
2104static __inline__ __m128d __DEFAULT_FN_ATTRS128
2105_mm_mask_cvtps_pd (__m128d __W__mmask8 __U__m128 __A) {
2106  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2107                                              (__v2df)_mm_cvtps_pd(__A),
2108                                              (__v2df)__W);
2109}
2110
2111static __inline__ __m128d __DEFAULT_FN_ATTRS128
2112_mm_maskz_cvtps_pd (__mmask8 __U__m128 __A) {
2113  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2114                                              (__v2df)_mm_cvtps_pd(__A),
2115                                              (__v2df)_mm_setzero_pd());
2116}
2117
2118static __inline__ __m256d __DEFAULT_FN_ATTRS256
2119_mm256_mask_cvtps_pd (__m256d __W__mmask8 __U__m128 __A) {
2120  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2121                                              (__v4df)_mm256_cvtps_pd(__A),
2122                                              (__v4df)__W);
2123}
2124
2125static __inline__ __m256d __DEFAULT_FN_ATTRS256
2126_mm256_maskz_cvtps_pd (__mmask8 __U__m128 __A) {
2127  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2128                                              (__v4df)_mm256_cvtps_pd(__A),
2129                                              (__v4df)_mm256_setzero_pd());
2130}
2131
2132static __inline__ __m128i __DEFAULT_FN_ATTRS128
2133_mm_cvtps_epu32 (__m128 __A) {
2134  return (__m128i__builtin_ia32_cvtps2udq128_mask ((__v4sf__A,
2135                 (__v4si)
2136                 _mm_setzero_si128 (),
2137                 (__mmask8) -1);
2138}
2139
2140static __inline__ __m128i __DEFAULT_FN_ATTRS128
2141_mm_mask_cvtps_epu32 (__m128i __W__mmask8 __U__m128 __A) {
2142  return (__m128i__builtin_ia32_cvtps2udq128_mask ((__v4sf__A,
2143                 (__v4si__W,
2144                 (__mmask8__U);
2145}
2146
2147static __inline__ __m128i __DEFAULT_FN_ATTRS128
2148_mm_maskz_cvtps_epu32 (__mmask8 __U__m128 __A) {
2149  return (__m128i__builtin_ia32_cvtps2udq128_mask ((__v4sf__A,
2150                 (__v4si)
2151                 _mm_setzero_si128 (),
2152                 (__mmask8__U);
2153}
2154
2155static __inline__ __m256i __DEFAULT_FN_ATTRS256
2156_mm256_cvtps_epu32 (__m256 __A) {
2157  return (__m256i__builtin_ia32_cvtps2udq256_mask ((__v8sf__A,
2158                 (__v8si)
2159                 _mm256_setzero_si256 (),
2160                 (__mmask8) -1);
2161}
2162
2163static __inline__ __m256i __DEFAULT_FN_ATTRS256
2164_mm256_mask_cvtps_epu32 (__m256i __W__mmask8 __U__m256 __A) {
2165  return (__m256i__builtin_ia32_cvtps2udq256_mask ((__v8sf__A,
2166                 (__v8si__W,
2167                 (__mmask8__U);
2168}
2169
2170static __inline__ __m256i __DEFAULT_FN_ATTRS256
2171_mm256_maskz_cvtps_epu32 (__mmask8 __U__m256 __A) {
2172  return (__m256i__builtin_ia32_cvtps2udq256_mask ((__v8sf__A,
2173                 (__v8si)
2174                 _mm256_setzero_si256 (),
2175                 (__mmask8__U);
2176}
2177
2178static __inline__ __m128i __DEFAULT_FN_ATTRS128
2179_mm_mask_cvttpd_epi32 (__m128i __W__mmask8 __U__m128d __A) {
2180  return (__m128i__builtin_ia32_cvttpd2dq128_mask ((__v2df__A,
2181                 (__v4si__W,
2182                 (__mmask8__U);
2183}
2184
2185static __inline__ __m128i __DEFAULT_FN_ATTRS128
2186_mm_maskz_cvttpd_epi32 (__mmask8 __U__m128d __A) {
2187  return (__m128i__builtin_ia32_cvttpd2dq128_mask ((__v2df__A,
2188                 (__v4si)
2189                 _mm_setzero_si128 (),
2190                 (__mmask8__U);
2191}
2192
2193static __inline__ __m128i __DEFAULT_FN_ATTRS256
2194_mm256_mask_cvttpd_epi32 (__m128i __W__mmask8 __U__m256d __A) {
2195  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2196                                             (__v4si)_mm256_cvttpd_epi32(__A),
2197                                             (__v4si)__W);
2198}
2199
2200static __inline__ __m128i __DEFAULT_FN_ATTRS256
2201_mm256_maskz_cvttpd_epi32 (__mmask8 __U__m256d __A) {
2202  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2203                                             (__v4si)_mm256_cvttpd_epi32(__A),
2204                                             (__v4si)_mm_setzero_si128());
2205}
2206
2207static __inline__ __m128i __DEFAULT_FN_ATTRS128
2208_mm_cvttpd_epu32 (__m128d __A) {
2209  return (__m128i__builtin_ia32_cvttpd2udq128_mask ((__v2df__A,
2210                  (__v4si)
2211                  _mm_setzero_si128 (),
2212                  (__mmask8) -1);
2213}
2214
2215static __inline__ __m128i __DEFAULT_FN_ATTRS128
2216_mm_mask_cvttpd_epu32 (__m128i __W__mmask8 __U__m128d __A) {
2217  return (__m128i__builtin_ia32_cvttpd2udq128_mask ((__v2df__A,
2218                  (__v4si__W,
2219                  (__mmask8__U);
2220}
2221
2222static __inline__ __m128i __DEFAULT_FN_ATTRS128
2223_mm_maskz_cvttpd_epu32 (__mmask8 __U__m128d __A) {
2224  return (__m128i__builtin_ia32_cvttpd2udq128_mask ((__v2df__A,
2225                  (__v4si)
2226                  _mm_setzero_si128 (),
2227                  (__mmask8__U);
2228}
2229
2230static __inline__ __m128i __DEFAULT_FN_ATTRS256
2231_mm256_cvttpd_epu32 (__m256d __A) {
2232  return (__m128i__builtin_ia32_cvttpd2udq256_mask ((__v4df__A,
2233                  (__v4si)
2234                  _mm_setzero_si128 (),
2235                  (__mmask8) -1);
2236}
2237
2238static __inline__ __m128i __DEFAULT_FN_ATTRS256
2239_mm256_mask_cvttpd_epu32 (__m128i __W__mmask8 __U__m256d __A) {
2240  return (__m128i__builtin_ia32_cvttpd2udq256_mask ((__v4df__A,
2241                  (__v4si__W,
2242                  (__mmask8__U);
2243}
2244
2245static __inline__ __m128i __DEFAULT_FN_ATTRS256
2246_mm256_maskz_cvttpd_epu32 (__mmask8 __U__m256d __A) {
2247  return (__m128i__builtin_ia32_cvttpd2udq256_mask ((__v4df__A,
2248                  (__v4si)
2249                  _mm_setzero_si128 (),
2250                  (__mmask8__U);
2251}
2252
2253static __inline__ __m128i __DEFAULT_FN_ATTRS128
2254_mm_mask_cvttps_epi32 (__m128i __W__mmask8 __U__m128 __A) {
2255  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2256                                             (__v4si)_mm_cvttps_epi32(__A),
2257                                             (__v4si)__W);
2258}
2259
2260static __inline__ __m128i __DEFAULT_FN_ATTRS128
2261_mm_maskz_cvttps_epi32 (__mmask8 __U__m128 __A) {
2262  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2263                                             (__v4si)_mm_cvttps_epi32(__A),
2264                                             (__v4si)_mm_setzero_si128());
2265}
2266
2267static __inline__ __m256i __DEFAULT_FN_ATTRS256
2268_mm256_mask_cvttps_epi32 (__m256i __W__mmask8 __U__m256 __A) {
2269  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2270                                             (__v8si)_mm256_cvttps_epi32(__A),
2271                                             (__v8si)__W);
2272}
2273
2274static __inline__ __m256i __DEFAULT_FN_ATTRS256
2275_mm256_maskz_cvttps_epi32 (__mmask8 __U__m256 __A) {
2276  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2277                                             (__v8si)_mm256_cvttps_epi32(__A),
2278                                             (__v8si)_mm256_setzero_si256());
2279}
2280
2281static __inline__ __m128i __DEFAULT_FN_ATTRS128
2282_mm_cvttps_epu32 (__m128 __A) {
2283  return (__m128i__builtin_ia32_cvttps2udq128_mask ((__v4sf__A,
2284                  (__v4si)
2285                  _mm_setzero_si128 (),
2286                  (__mmask8) -1);
2287}
2288
2289static __inline__ __m128i __DEFAULT_FN_ATTRS128
2290_mm_mask_cvttps_epu32 (__m128i __W__mmask8 __U__m128 __A) {
2291  return (__m128i__builtin_ia32_cvttps2udq128_mask ((__v4sf__A,
2292                  (__v4si__W,
2293                  (__mmask8__U);
2294}
2295
2296static __inline__ __m128i __DEFAULT_FN_ATTRS128
2297_mm_maskz_cvttps_epu32 (__mmask8 __U__m128 __A) {
2298  return (__m128i__builtin_ia32_cvttps2udq128_mask ((__v4sf__A,
2299                  (__v4si)
2300                  _mm_setzero_si128 (),
2301                  (__mmask8__U);
2302}
2303
2304static __inline__ __m256i __DEFAULT_FN_ATTRS256
2305_mm256_cvttps_epu32 (__m256 __A) {
2306  return (__m256i__builtin_ia32_cvttps2udq256_mask ((__v8sf__A,
2307                  (__v8si)
2308                  _mm256_setzero_si256 (),
2309                  (__mmask8) -1);
2310}
2311
2312static __inline__ __m256i __DEFAULT_FN_ATTRS256
2313_mm256_mask_cvttps_epu32 (__m256i __W__mmask8 __U__m256 __A) {
2314  return (__m256i__builtin_ia32_cvttps2udq256_mask ((__v8sf__A,
2315                  (__v8si__W,
2316                  (__mmask8__U);
2317}
2318
2319static __inline__ __m256i __DEFAULT_FN_ATTRS256
2320_mm256_maskz_cvttps_epu32 (__mmask8 __U__m256 __A) {
2321  return (__m256i__builtin_ia32_cvttps2udq256_mask ((__v8sf__A,
2322                  (__v8si)
2323                  _mm256_setzero_si256 (),
2324                  (__mmask8__U);
2325}
2326
2327static __inline__ __m128d __DEFAULT_FN_ATTRS128
2328_mm_cvtepu32_pd (__m128i __A) {
2329  return (__m128d__builtin_convertvector(
2330      __builtin_shufflevector((__v4su)__A, (__v4su)__A01), __v2df);
2331}
2332
2333static __inline__ __m128d __DEFAULT_FN_ATTRS128
2334_mm_mask_cvtepu32_pd (__m128d __W__mmask8 __U__m128i __A) {
2335  return (__m128d)__builtin_ia32_selectpd_128((__mmask8__U,
2336                                              (__v2df)_mm_cvtepu32_pd(__A),
2337                                              (__v2df)__W);
2338}
2339
2340static __inline__ __m128d __DEFAULT_FN_ATTRS128
2341_mm_maskz_cvtepu32_pd (__mmask8 __U__m128i __A) {
2342  return (__m128d)__builtin_ia32_selectpd_128((__mmask8__U,
2343                                              (__v2df)_mm_cvtepu32_pd(__A),
2344                                              (__v2df)_mm_setzero_pd());
2345}
2346
2347static __inline__ __m256d __DEFAULT_FN_ATTRS256
2348_mm256_cvtepu32_pd (__m128i __A) {
2349  return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2350}
2351
2352static __inline__ __m256d __DEFAULT_FN_ATTRS256
2353_mm256_mask_cvtepu32_pd (__m256d __W__mmask8 __U__m128i __A) {
2354  return (__m256d)__builtin_ia32_selectpd_256((__mmask8__U,
2355                                              (__v4df)_mm256_cvtepu32_pd(__A),
2356                                              (__v4df)__W);
2357}
2358
2359static __inline__ __m256d __DEFAULT_FN_ATTRS256
2360_mm256_maskz_cvtepu32_pd (__mmask8 __U__m128i __A) {
2361  return (__m256d)__builtin_ia32_selectpd_256((__mmask8__U,
2362                                              (__v4df)_mm256_cvtepu32_pd(__A),
2363                                              (__v4df)_mm256_setzero_pd());
2364}
2365
2366static __inline__ __m128 __DEFAULT_FN_ATTRS128
2367_mm_cvtepu32_ps (__m128i __A) {
2368  return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2369}
2370
2371static __inline__ __m128 __DEFAULT_FN_ATTRS128
2372_mm_mask_cvtepu32_ps (__m128 __W__mmask8 __U__m128i __A) {
2373  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2374                                             (__v4sf)_mm_cvtepu32_ps(__A),
2375                                             (__v4sf)__W);
2376}
2377
2378static __inline__ __m128 __DEFAULT_FN_ATTRS128
2379_mm_maskz_cvtepu32_ps (__mmask8 __U__m128i __A) {
2380  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2381                                             (__v4sf)_mm_cvtepu32_ps(__A),
2382                                             (__v4sf)_mm_setzero_ps());
2383}
2384
2385static __inline__ __m256 __DEFAULT_FN_ATTRS256
2386_mm256_cvtepu32_ps (__m256i __A) {
2387  return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2388}
2389
2390static __inline__ __m256 __DEFAULT_FN_ATTRS256
2391_mm256_mask_cvtepu32_ps (__m256 __W__mmask8 __U__m256i __A) {
2392  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2393                                             (__v8sf)_mm256_cvtepu32_ps(__A),
2394                                             (__v8sf)__W);
2395}
2396
2397static __inline__ __m256 __DEFAULT_FN_ATTRS256
2398_mm256_maskz_cvtepu32_ps (__mmask8 __U__m256i __A) {
2399  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2400                                             (__v8sf)_mm256_cvtepu32_ps(__A),
2401                                             (__v8sf)_mm256_setzero_ps());
2402}
2403
2404static __inline__ __m128d __DEFAULT_FN_ATTRS128
2405_mm_mask_div_pd(__m128d __W__mmask8 __U__m128d __A__m128d __B) {
2406  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2407                                              (__v2df)_mm_div_pd(__A__B),
2408                                              (__v2df)__W);
2409}
2410
2411static __inline__ __m128d __DEFAULT_FN_ATTRS128
2412_mm_maskz_div_pd(__mmask8 __U__m128d __A__m128d __B) {
2413  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2414                                              (__v2df)_mm_div_pd(__A__B),
2415                                              (__v2df)_mm_setzero_pd());
2416}
2417
2418static __inline__ __m256d __DEFAULT_FN_ATTRS256
2419_mm256_mask_div_pd(__m256d __W__mmask8 __U__m256d __A__m256d __B) {
2420  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2421                                              (__v4df)_mm256_div_pd(__A__B),
2422                                              (__v4df)__W);
2423}
2424
2425static __inline__ __m256d __DEFAULT_FN_ATTRS256
2426_mm256_maskz_div_pd(__mmask8 __U__m256d __A__m256d __B) {
2427  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2428                                              (__v4df)_mm256_div_pd(__A__B),
2429                                              (__v4df)_mm256_setzero_pd());
2430}
2431
2432static __inline__ __m128 __DEFAULT_FN_ATTRS128
2433_mm_mask_div_ps(__m128 __W__mmask8 __U__m128 __A__m128 __B) {
2434  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2435                                             (__v4sf)_mm_div_ps(__A__B),
2436                                             (__v4sf)__W);
2437}
2438
2439static __inline__ __m128 __DEFAULT_FN_ATTRS128
2440_mm_maskz_div_ps(__mmask8 __U__m128 __A__m128 __B) {
2441  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2442                                             (__v4sf)_mm_div_ps(__A__B),
2443                                             (__v4sf)_mm_setzero_ps());
2444}
2445
2446static __inline__ __m256 __DEFAULT_FN_ATTRS256
2447_mm256_mask_div_ps(__m256 __W__mmask8 __U__m256 __A__m256 __B) {
2448  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2449                                             (__v8sf)_mm256_div_ps(__A__B),
2450                                             (__v8sf)__W);
2451}
2452
2453static __inline__ __m256 __DEFAULT_FN_ATTRS256
2454_mm256_maskz_div_ps(__mmask8 __U__m256 __A__m256 __B) {
2455  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2456                                             (__v8sf)_mm256_div_ps(__A__B),
2457                                             (__v8sf)_mm256_setzero_ps());
2458}
2459
2460static __inline__ __m128d __DEFAULT_FN_ATTRS128
2461_mm_mask_expand_pd (__m128d __W__mmask8 __U__m128d __A) {
2462  return (__m128d__builtin_ia32_expanddf128_mask ((__v2df__A,
2463                (__v2df__W,
2464                (__mmask8__U);
2465}
2466
2467static __inline__ __m128d __DEFAULT_FN_ATTRS128
2468_mm_maskz_expand_pd (__mmask8 __U__m128d __A) {
2469  return (__m128d__builtin_ia32_expanddf128_mask ((__v2df__A,
2470                 (__v2df)
2471                 _mm_setzero_pd (),
2472                 (__mmask8__U);
2473}
2474
2475static __inline__ __m256d __DEFAULT_FN_ATTRS256
2476_mm256_mask_expand_pd (__m256d __W__mmask8 __U__m256d __A) {
2477  return (__m256d__builtin_ia32_expanddf256_mask ((__v4df__A,
2478                (__v4df__W,
2479                (__mmask8__U);
2480}
2481
2482static __inline__ __m256d __DEFAULT_FN_ATTRS256
2483_mm256_maskz_expand_pd (__mmask8 __U__m256d __A) {
2484  return (__m256d__builtin_ia32_expanddf256_mask ((__v4df__A,
2485                 (__v4df)
2486                 _mm256_setzero_pd (),
2487                 (__mmask8__U);
2488}
2489
2490static __inline__ __m128i __DEFAULT_FN_ATTRS128
2491_mm_mask_expand_epi64 (__m128i __W__mmask8 __U__m128i __A) {
2492  return (__m128i__builtin_ia32_expanddi128_mask ((__v2di__A,
2493                (__v2di__W,
2494                (__mmask8__U);
2495}
2496
2497static __inline__ __m128i __DEFAULT_FN_ATTRS128
2498_mm_maskz_expand_epi64 (__mmask8 __U__m128i __A) {
2499  return (__m128i__builtin_ia32_expanddi128_mask ((__v2di__A,
2500                 (__v2di)
2501                 _mm_setzero_si128 (),
2502                 (__mmask8__U);
2503}
2504
2505static __inline__ __m256i __DEFAULT_FN_ATTRS256
2506_mm256_mask_expand_epi64 (__m256i __W__mmask8 __U__m256i __A) {
2507  return (__m256i__builtin_ia32_expanddi256_mask ((__v4di__A,
2508                (__v4di__W,
2509                (__mmask8__U);
2510}
2511
2512static __inline__ __m256i __DEFAULT_FN_ATTRS256
2513_mm256_maskz_expand_epi64 (__mmask8 __U__m256i __A) {
2514  return (__m256i__builtin_ia32_expanddi256_mask ((__v4di__A,
2515                 (__v4di)
2516                 _mm256_setzero_si256 (),
2517                 (__mmask8__U);
2518}
2519
2520static __inline__ __m128d __DEFAULT_FN_ATTRS128
2521_mm_mask_expandloadu_pd (__m128d __W__mmask8 __Uvoid const *__P) {
2522  return (__m128d__builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2523              (__v2df__W,
2524              (__mmask8)
2525              __U);
2526}
2527
2528static __inline__ __m128d __DEFAULT_FN_ATTRS128
2529_mm_maskz_expandloadu_pd (__mmask8 __Uvoid const *__P) {
2530  return (__m128d__builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2531               (__v2df)
2532               _mm_setzero_pd (),
2533               (__mmask8)
2534               __U);
2535}
2536
2537static __inline__ __m256d __DEFAULT_FN_ATTRS256
2538_mm256_mask_expandloadu_pd (__m256d __W__mmask8 __Uvoid const *__P) {
2539  return (__m256d__builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2540              (__v4df__W,
2541              (__mmask8)
2542              __U);
2543}
2544
2545static __inline__ __m256d __DEFAULT_FN_ATTRS256
2546_mm256_maskz_expandloadu_pd (__mmask8 __Uvoid const *__P) {
2547  return (__m256d__builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2548               (__v4df)
2549               _mm256_setzero_pd (),
2550               (__mmask8)
2551               __U);
2552}
2553
2554static __inline__ __m128i __DEFAULT_FN_ATTRS128
2555_mm_mask_expandloadu_epi64 (__m128i __W__mmask8 __Uvoid const *__P) {
2556  return (__m128i__builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2557              (__v2di__W,
2558              (__mmask8)
2559              __U);
2560}
2561
2562static __inline__ __m128i __DEFAULT_FN_ATTRS128
2563_mm_maskz_expandloadu_epi64 (__mmask8 __Uvoid const *__P) {
2564  return (__m128i__builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2565               (__v2di)
2566               _mm_setzero_si128 (),
2567               (__mmask8)
2568               __U);
2569}
2570
2571static __inline__ __m256i __DEFAULT_FN_ATTRS256
2572_mm256_mask_expandloadu_epi64 (__m256i __W__mmask8 __U,
2573             void const *__P) {
2574  return (__m256i__builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2575              (__v4di__W,
2576              (__mmask8)
2577              __U);
2578}
2579
2580static __inline__ __m256i __DEFAULT_FN_ATTRS256
2581_mm256_maskz_expandloadu_epi64 (__mmask8 __Uvoid const *__P) {
2582  return (__m256i__builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2583               (__v4di)
2584               _mm256_setzero_si256 (),
2585               (__mmask8)
2586               __U);
2587}
2588
2589static __inline__ __m128 __DEFAULT_FN_ATTRS128
2590_mm_mask_expandloadu_ps (__m128 __W__mmask8 __Uvoid const *__P) {
2591  return (__m128__builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2592                   (__v4sf__W,
2593                   (__mmask8__U);
2594}
2595
2596static __inline__ __m128 __DEFAULT_FN_ATTRS128
2597_mm_maskz_expandloadu_ps (__mmask8 __Uvoid const *__P) {
2598  return (__m128__builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2599              (__v4sf)
2600              _mm_setzero_ps (),
2601              (__mmask8)
2602              __U);
2603}
2604
2605static __inline__ __m256 __DEFAULT_FN_ATTRS256
2606_mm256_mask_expandloadu_ps (__m256 __W__mmask8 __Uvoid const *__P) {
2607  return (__m256__builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2608                   (__v8sf__W,
2609                   (__mmask8__U);
2610}
2611
2612static __inline__ __m256 __DEFAULT_FN_ATTRS256
2613_mm256_maskz_expandloadu_ps (__mmask8 __Uvoid const *__P) {
2614  return (__m256__builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2615              (__v8sf)
2616              _mm256_setzero_ps (),
2617              (__mmask8)
2618              __U);
2619}
2620
2621static __inline__ __m128i __DEFAULT_FN_ATTRS128
2622_mm_mask_expandloadu_epi32 (__m128i __W__mmask8 __Uvoid const *__P) {
2623  return (__m128i__builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2624              (__v4si__W,
2625              (__mmask8)
2626              __U);
2627}
2628
2629static __inline__ __m128i __DEFAULT_FN_ATTRS128
2630_mm_maskz_expandloadu_epi32 (__mmask8 __Uvoid const *__P) {
2631  return (__m128i__builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2632               (__v4si)
2633               _mm_setzero_si128 (),
2634               (__mmask8)     __U);
2635}
2636
2637static __inline__ __m256i __DEFAULT_FN_ATTRS256
2638_mm256_mask_expandloadu_epi32 (__m256i __W__mmask8 __U,
2639             void const *__P) {
2640  return (__m256i__builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2641              (__v8si__W,
2642              (__mmask8)
2643              __U);
2644}
2645
2646static __inline__ __m256i __DEFAULT_FN_ATTRS256
2647_mm256_maskz_expandloadu_epi32 (__mmask8 __Uvoid const *__P) {
2648  return (__m256i__builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2649               (__v8si)
2650               _mm256_setzero_si256 (),
2651               (__mmask8)
2652               __U);
2653}
2654
2655static __inline__ __m128 __DEFAULT_FN_ATTRS128
2656_mm_mask_expand_ps (__m128 __W__mmask8 __U__m128 __A) {
2657  return (__m128__builtin_ia32_expandsf128_mask ((__v4sf__A,
2658               (__v4sf__W,
2659               (__mmask8__U);
2660}
2661
2662static __inline__ __m128 __DEFAULT_FN_ATTRS128
2663_mm_maskz_expand_ps (__mmask8 __U__m128 __A) {
2664  return (__m128__builtin_ia32_expandsf128_mask ((__v4sf__A,
2665                (__v4sf)
2666                _mm_setzero_ps (),
2667                (__mmask8__U);
2668}
2669
2670static __inline__ __m256 __DEFAULT_FN_ATTRS256
2671_mm256_mask_expand_ps (__m256 __W__mmask8 __U__m256 __A) {
2672  return (__m256__builtin_ia32_expandsf256_mask ((__v8sf__A,
2673               (__v8sf__W,
2674               (__mmask8__U);
2675}
2676
2677static __inline__ __m256 __DEFAULT_FN_ATTRS256
2678_mm256_maskz_expand_ps (__mmask8 __U__m256 __A) {
2679  return (__m256__builtin_ia32_expandsf256_mask ((__v8sf__A,
2680                (__v8sf)
2681                _mm256_setzero_ps (),
2682                (__mmask8__U);
2683}
2684
2685static __inline__ __m128i __DEFAULT_FN_ATTRS128
2686_mm_mask_expand_epi32 (__m128i __W__mmask8 __U__m128i __A) {
2687  return (__m128i__builtin_ia32_expandsi128_mask ((__v4si__A,
2688                (__v4si__W,
2689                (__mmask8__U);
2690}
2691
2692static __inline__ __m128i __DEFAULT_FN_ATTRS128
2693_mm_maskz_expand_epi32 (__mmask8 __U__m128i __A) {
2694  return (__m128i__builtin_ia32_expandsi128_mask ((__v4si__A,
2695                 (__v4si)
2696                 _mm_setzero_si128 (),
2697                 (__mmask8__U);
2698}
2699
2700static __inline__ __m256i __DEFAULT_FN_ATTRS256
2701_mm256_mask_expand_epi32 (__m256i __W__mmask8 __U__m256i __A) {
2702  return (__m256i__builtin_ia32_expandsi256_mask ((__v8si__A,
2703                (__v8si__W,
2704                (__mmask8__U);
2705}
2706
2707static __inline__ __m256i __DEFAULT_FN_ATTRS256
2708_mm256_maskz_expand_epi32 (__mmask8 __U__m256i __A) {
2709  return (__m256i__builtin_ia32_expandsi256_mask ((__v8si__A,
2710                 (__v8si)
2711                 _mm256_setzero_si256 (),
2712                 (__mmask8__U);
2713}
2714
2715static __inline__ __m128d __DEFAULT_FN_ATTRS128
2716_mm_getexp_pd (__m128d __A) {
2717  return (__m128d__builtin_ia32_getexppd128_mask ((__v2df__A,
2718                (__v2df)
2719                _mm_setzero_pd (),
2720                (__mmask8) -1);
2721}
2722
2723static __inline__ __m128d __DEFAULT_FN_ATTRS128
2724_mm_mask_getexp_pd (__m128d __W__mmask8 __U__m128d __A) {
2725  return (__m128d__builtin_ia32_getexppd128_mask ((__v2df__A,
2726                (__v2df__W,
2727                (__mmask8__U);
2728}
2729
2730static __inline__ __m128d __DEFAULT_FN_ATTRS128
2731_mm_maskz_getexp_pd (__mmask8 __U__m128d __A) {
2732  return (__m128d__builtin_ia32_getexppd128_mask ((__v2df__A,
2733                (__v2df)
2734                _mm_setzero_pd (),
2735                (__mmask8__U);
2736}
2737
2738static __inline__ __m256d __DEFAULT_FN_ATTRS256
2739_mm256_getexp_pd (__m256d __A) {
2740  return (__m256d__builtin_ia32_getexppd256_mask ((__v4df__A,
2741                (__v4df)
2742                _mm256_setzero_pd (),
2743                (__mmask8) -1);
2744}
2745
2746static __inline__ __m256d __DEFAULT_FN_ATTRS256
2747_mm256_mask_getexp_pd (__m256d __W__mmask8 __U__m256d __A) {
2748  return (__m256d__builtin_ia32_getexppd256_mask ((__v4df__A,
2749                (__v4df__W,
2750                (__mmask8__U);
2751}
2752
2753static __inline__ __m256d __DEFAULT_FN_ATTRS256
2754_mm256_maskz_getexp_pd (__mmask8 __U__m256d __A) {
2755  return (__m256d__builtin_ia32_getexppd256_mask ((__v4df__A,
2756                (__v4df)
2757                _mm256_setzero_pd (),
2758                (__mmask8__U);
2759}
2760
2761static __inline__ __m128 __DEFAULT_FN_ATTRS128
2762_mm_getexp_ps (__m128 __A) {
2763  return (__m128__builtin_ia32_getexpps128_mask ((__v4sf__A,
2764               (__v4sf)
2765               _mm_setzero_ps (),
2766               (__mmask8) -1);
2767}
2768
2769static __inline__ __m128 __DEFAULT_FN_ATTRS128
2770_mm_mask_getexp_ps (__m128 __W__mmask8 __U__m128 __A) {
2771  return (__m128__builtin_ia32_getexpps128_mask ((__v4sf__A,
2772               (__v4sf__W,
2773               (__mmask8__U);
2774}
2775
2776static __inline__ __m128 __DEFAULT_FN_ATTRS128
2777_mm_maskz_getexp_ps (__mmask8 __U__m128 __A) {
2778  return (__m128__builtin_ia32_getexpps128_mask ((__v4sf__A,
2779               (__v4sf)
2780               _mm_setzero_ps (),
2781               (__mmask8__U);
2782}
2783
2784static __inline__ __m256 __DEFAULT_FN_ATTRS256
2785_mm256_getexp_ps (__m256 __A) {
2786  return (__m256__builtin_ia32_getexpps256_mask ((__v8sf__A,
2787               (__v8sf)
2788               _mm256_setzero_ps (),
2789               (__mmask8) -1);
2790}
2791
2792static __inline__ __m256 __DEFAULT_FN_ATTRS256
2793_mm256_mask_getexp_ps (__m256 __W__mmask8 __U__m256 __A) {
2794  return (__m256__builtin_ia32_getexpps256_mask ((__v8sf__A,
2795               (__v8sf__W,
2796               (__mmask8__U);
2797}
2798
2799static __inline__ __m256 __DEFAULT_FN_ATTRS256
2800_mm256_maskz_getexp_ps (__mmask8 __U__m256 __A) {
2801  return (__m256__builtin_ia32_getexpps256_mask ((__v8sf__A,
2802               (__v8sf)
2803               _mm256_setzero_ps (),
2804               (__mmask8__U);
2805}
2806
2807static __inline__ __m128d __DEFAULT_FN_ATTRS128
2808_mm_mask_max_pd(__m128d __W__mmask8 __U__m128d __A__m128d __B) {
2809  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2810                                              (__v2df)_mm_max_pd(__A__B),
2811                                              (__v2df)__W);
2812}
2813
2814static __inline__ __m128d __DEFAULT_FN_ATTRS128
2815_mm_maskz_max_pd(__mmask8 __U__m128d __A__m128d __B) {
2816  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2817                                              (__v2df)_mm_max_pd(__A__B),
2818                                              (__v2df)_mm_setzero_pd());
2819}
2820
2821static __inline__ __m256d __DEFAULT_FN_ATTRS256
2822_mm256_mask_max_pd(__m256d __W__mmask8 __U__m256d __A__m256d __B) {
2823  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2824                                              (__v4df)_mm256_max_pd(__A__B),
2825                                              (__v4df)__W);
2826}
2827
2828static __inline__ __m256d __DEFAULT_FN_ATTRS256
2829_mm256_maskz_max_pd(__mmask8 __U__m256d __A__m256d __B) {
2830  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2831                                              (__v4df)_mm256_max_pd(__A__B),
2832                                              (__v4df)_mm256_setzero_pd());
2833}
2834
2835static __inline__ __m128 __DEFAULT_FN_ATTRS128
2836_mm_mask_max_ps(__m128 __W__mmask8 __U__m128 __A__m128 __B) {
2837  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2838                                             (__v4sf)_mm_max_ps(__A__B),
2839                                             (__v4sf)__W);
2840}
2841
2842static __inline__ __m128 __DEFAULT_FN_ATTRS128
2843_mm_maskz_max_ps(__mmask8 __U__m128 __A__m128 __B) {
2844  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2845                                             (__v4sf)_mm_max_ps(__A__B),
2846                                             (__v4sf)_mm_setzero_ps());
2847}
2848
2849static __inline__ __m256 __DEFAULT_FN_ATTRS256
2850_mm256_mask_max_ps(__m256 __W__mmask8 __U__m256 __A__m256 __B) {
2851  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2852                                             (__v8sf)_mm256_max_ps(__A__B),
2853                                             (__v8sf)__W);
2854}
2855
2856static __inline__ __m256 __DEFAULT_FN_ATTRS256
2857_mm256_maskz_max_ps(__mmask8 __U__m256 __A__m256 __B) {
2858  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2859                                             (__v8sf)_mm256_max_ps(__A__B),
2860                                             (__v8sf)_mm256_setzero_ps());
2861}
2862
2863static __inline__ __m128d __DEFAULT_FN_ATTRS128
2864_mm_mask_min_pd(__m128d __W__mmask8 __U__m128d __A__m128d __B) {
2865  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2866                                              (__v2df)_mm_min_pd(__A__B),
2867                                              (__v2df)__W);
2868}
2869
2870static __inline__ __m128d __DEFAULT_FN_ATTRS128
2871_mm_maskz_min_pd(__mmask8 __U__m128d __A__m128d __B) {
2872  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2873                                              (__v2df)_mm_min_pd(__A__B),
2874                                              (__v2df)_mm_setzero_pd());
2875}
2876
2877static __inline__ __m256d __DEFAULT_FN_ATTRS256
2878_mm256_mask_min_pd(__m256d __W__mmask8 __U__m256d __A__m256d __B) {
2879  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2880                                              (__v4df)_mm256_min_pd(__A__B),
2881                                              (__v4df)__W);
2882}
2883
2884static __inline__ __m256d __DEFAULT_FN_ATTRS256
2885_mm256_maskz_min_pd(__mmask8 __U__m256d __A__m256d __B) {
2886  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2887                                              (__v4df)_mm256_min_pd(__A__B),
2888                                              (__v4df)_mm256_setzero_pd());
2889}
2890
2891static __inline__ __m128 __DEFAULT_FN_ATTRS128
2892_mm_mask_min_ps(__m128 __W__mmask8 __U__m128 __A__m128 __B) {
2893  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2894                                             (__v4sf)_mm_min_ps(__A__B),
2895                                             (__v4sf)__W);
2896}
2897
2898static __inline__ __m128 __DEFAULT_FN_ATTRS128
2899_mm_maskz_min_ps(__mmask8 __U__m128 __A__m128 __B) {
2900  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2901                                             (__v4sf)_mm_min_ps(__A__B),
2902                                             (__v4sf)_mm_setzero_ps());
2903}
2904
2905static __inline__ __m256 __DEFAULT_FN_ATTRS256
2906_mm256_mask_min_ps(__m256 __W__mmask8 __U__m256 __A__m256 __B) {
2907  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2908                                             (__v8sf)_mm256_min_ps(__A__B),
2909                                             (__v8sf)__W);
2910}
2911
2912static __inline__ __m256 __DEFAULT_FN_ATTRS256
2913_mm256_maskz_min_ps(__mmask8 __U__m256 __A__m256 __B) {
2914  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2915                                             (__v8sf)_mm256_min_ps(__A__B),
2916                                             (__v8sf)_mm256_setzero_ps());
2917}
2918
2919static __inline__ __m128d __DEFAULT_FN_ATTRS128
2920_mm_mask_mul_pd(__m128d __W__mmask8 __U__m128d __A__m128d __B) {
2921  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2922                                              (__v2df)_mm_mul_pd(__A__B),
2923                                              (__v2df)__W);
2924}
2925
2926static __inline__ __m128d __DEFAULT_FN_ATTRS128
2927_mm_maskz_mul_pd(__mmask8 __U__m128d __A__m128d __B) {
2928  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2929                                              (__v2df)_mm_mul_pd(__A__B),
2930                                              (__v2df)_mm_setzero_pd());
2931}
2932
2933static __inline__ __m256d __DEFAULT_FN_ATTRS256
2934_mm256_mask_mul_pd(__m256d __W__mmask8 __U__m256d __A__m256d __B) {
2935  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2936                                              (__v4df)_mm256_mul_pd(__A__B),
2937                                              (__v4df)__W);
2938}
2939
2940static __inline__ __m256d __DEFAULT_FN_ATTRS256
2941_mm256_maskz_mul_pd(__mmask8 __U__m256d __A__m256d __B) {
2942  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2943                                              (__v4df)_mm256_mul_pd(__A__B),
2944                                              (__v4df)_mm256_setzero_pd());
2945}
2946
2947static __inline__ __m128 __DEFAULT_FN_ATTRS128
2948_mm_mask_mul_ps(__m128 __W__mmask8 __U__m128 __A__m128 __B) {
2949  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2950                                             (__v4sf)_mm_mul_ps(__A__B),
2951                                             (__v4sf)__W);
2952}
2953
2954static __inline__ __m128 __DEFAULT_FN_ATTRS128
2955_mm_maskz_mul_ps(__mmask8 __U__m128 __A__m128 __B) {
2956  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2957                                             (__v4sf)_mm_mul_ps(__A__B),
2958                                             (__v4sf)_mm_setzero_ps());
2959}
2960
2961static __inline__ __m256 __DEFAULT_FN_ATTRS256
2962_mm256_mask_mul_ps(__m256 __W__mmask8 __U__m256 __A__m256 __B) {
2963  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2964                                             (__v8sf)_mm256_mul_ps(__A__B),
2965                                             (__v8sf)__W);
2966}
2967
2968static __inline__ __m256 __DEFAULT_FN_ATTRS256
2969_mm256_maskz_mul_ps(__mmask8 __U__m256 __A__m256 __B) {
2970  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2971                                             (__v8sf)_mm256_mul_ps(__A__B),
2972                                             (__v8sf)_mm256_setzero_ps());
2973}
2974
2975static __inline__ __m128i __DEFAULT_FN_ATTRS128
2976_mm_mask_abs_epi32(__m128i __W__mmask8 __U__m128i __A) {
2977  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2978                                             (__v4si)_mm_abs_epi32(__A),
2979                                             (__v4si)__W);
2980}
2981
2982static __inline__ __m128i __DEFAULT_FN_ATTRS128
2983_mm_maskz_abs_epi32(__mmask8 __U__m128i __A) {
2984  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2985                                             (__v4si)_mm_abs_epi32(__A),
2986                                             (__v4si)_mm_setzero_si128());
2987}
2988
2989static __inline__ __m256i __DEFAULT_FN_ATTRS256
2990_mm256_mask_abs_epi32(__m256i __W__mmask8 __U__m256i __A) {
2991  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2992                                             (__v8si)_mm256_abs_epi32(__A),
2993                                             (__v8si)__W);
2994}
2995
2996static __inline__ __m256i __DEFAULT_FN_ATTRS256
2997_mm256_maskz_abs_epi32(__mmask8 __U__m256i __A) {
2998  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2999                                             (__v8si)_mm256_abs_epi32(__A),
3000                                             (__v8si)_mm256_setzero_si256());
3001}
3002
3003static __inline__ __m128i __DEFAULT_FN_ATTRS128
3004_mm_abs_epi64 (__m128i __A) {
3005  return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
3006}
3007
3008static __inline__ __m128i __DEFAULT_FN_ATTRS128
3009_mm_mask_abs_epi64 (__m128i __W__mmask8 __U__m128i __A) {
3010  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3011                                             (__v2di)_mm_abs_epi64(__A),
3012                                             (__v2di)__W);
3013}
3014
3015static __inline__ __m128i __DEFAULT_FN_ATTRS128
3016_mm_maskz_abs_epi64 (__mmask8 __U__m128i __A) {
3017  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3018                                             (__v2di)_mm_abs_epi64(__A),
3019                                             (__v2di)_mm_setzero_si128());
3020}
3021
3022static __inline__ __m256i __DEFAULT_FN_ATTRS256
3023_mm256_abs_epi64 (__m256i __A) {
3024  return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
3025}
3026
3027static __inline__ __m256i __DEFAULT_FN_ATTRS256
3028_mm256_mask_abs_epi64 (__m256i __W__mmask8 __U__m256i __A) {
3029  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3030                                             (__v4di)_mm256_abs_epi64(__A),
3031                                             (__v4di)__W);
3032}
3033
3034static __inline__ __m256i __DEFAULT_FN_ATTRS256
3035_mm256_maskz_abs_epi64 (__mmask8 __U__m256i __A) {
3036  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3037                                             (__v4di)_mm256_abs_epi64(__A),
3038                                             (__v4di)_mm256_setzero_si256());
3039}
3040
3041static __inline__ __m128i __DEFAULT_FN_ATTRS128
3042_mm_maskz_max_epi32(__mmask8 __M__m128i __A__m128i __B) {
3043  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3044                                             (__v4si)_mm_max_epi32(__A__B),
3045                                             (__v4si)_mm_setzero_si128());
3046}
3047
3048static __inline__ __m128i __DEFAULT_FN_ATTRS128
3049_mm_mask_max_epi32(__m128i __W__mmask8 __M__m128i __A__m128i __B) {
3050  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3051                                             (__v4si)_mm_max_epi32(__A__B),
3052                                             (__v4si)__W);
3053}
3054
3055static __inline__ __m256i __DEFAULT_FN_ATTRS256
3056_mm256_maskz_max_epi32(__mmask8 __M__m256i __A__m256i __B) {
3057  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3058                                             (__v8si)_mm256_max_epi32(__A__B),
3059                                             (__v8si)_mm256_setzero_si256());
3060}
3061
3062static __inline__ __m256i __DEFAULT_FN_ATTRS256
3063_mm256_mask_max_epi32(__m256i __W__mmask8 __M__m256i __A__m256i __B) {
3064  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3065                                             (__v8si)_mm256_max_epi32(__A__B),
3066                                             (__v8si)__W);
3067}
3068
3069static __inline__ __m128i __DEFAULT_FN_ATTRS128
3070_mm_max_epi64 (__m128i __A__m128i __B) {
3071  return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
3072}
3073
3074static __inline__ __m128i __DEFAULT_FN_ATTRS128
3075_mm_maskz_max_epi64 (__mmask8 __M__m128i __A__m128i __B) {
3076  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3077                                             (__v2di)_mm_max_epi64(__A__B),
3078                                             (__v2di)_mm_setzero_si128());
3079}
3080
3081static __inline__ __m128i __DEFAULT_FN_ATTRS128
3082_mm_mask_max_epi64 (__m128i __W__mmask8 __M__m128i __A__m128i __B) {
3083  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3084                                             (__v2di)_mm_max_epi64(__A__B),
3085                                             (__v2di)__W);
3086}
3087
3088static __inline__ __m256i __DEFAULT_FN_ATTRS256
3089_mm256_max_epi64 (__m256i __A__m256i __B) {
3090  return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
3091}
3092
3093static __inline__ __m256i __DEFAULT_FN_ATTRS256
3094_mm256_maskz_max_epi64 (__mmask8 __M__m256i __A__m256i __B) {
3095  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3096                                             (__v4di)_mm256_max_epi64(__A__B),
3097                                             (__v4di)_mm256_setzero_si256());
3098}
3099
3100static __inline__ __m256i __DEFAULT_FN_ATTRS256
3101_mm256_mask_max_epi64 (__m256i __W__mmask8 __M__m256i __A__m256i __B) {
3102  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3103                                             (__v4di)_mm256_max_epi64(__A__B),
3104                                             (__v4di)__W);
3105}
3106
3107static __inline__ __m128i __DEFAULT_FN_ATTRS128
3108_mm_maskz_max_epu32(__mmask8 __M__m128i __A__m128i __B) {
3109  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3110                                             (__v4si)_mm_max_epu32(__A__B),
3111                                             (__v4si)_mm_setzero_si128());
3112}
3113
3114static __inline__ __m128i __DEFAULT_FN_ATTRS128
3115_mm_mask_max_epu32(__m128i __W__mmask8 __M__m128i __A__m128i __B) {
3116  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3117                                             (__v4si)_mm_max_epu32(__A__B),
3118                                             (__v4si)__W);
3119}
3120
3121static __inline__ __m256i __DEFAULT_FN_ATTRS256
3122_mm256_maskz_max_epu32(__mmask8 __M__m256i __A__m256i __B) {
3123  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3124                                             (__v8si)_mm256_max_epu32(__A__B),
3125                                             (__v8si)_mm256_setzero_si256());
3126}
3127
3128static __inline__ __m256i __DEFAULT_FN_ATTRS256
3129_mm256_mask_max_epu32(__m256i __W__mmask8 __M__m256i __A__m256i __B) {
3130  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3131                                             (__v8si)_mm256_max_epu32(__A__B),
3132                                             (__v8si)__W);
3133}
3134
3135static __inline__ __m128i __DEFAULT_FN_ATTRS128
3136_mm_max_epu64 (__m128i __A__m128i __B) {
3137  return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
3138}
3139
3140static __inline__ __m128i __DEFAULT_FN_ATTRS128
3141_mm_maskz_max_epu64 (__mmask8 __M__m128i __A__m128i __B) {
3142  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3143                                             (__v2di)_mm_max_epu64(__A__B),
3144                                             (__v2di)_mm_setzero_si128());
3145}
3146
3147static __inline__ __m128i __DEFAULT_FN_ATTRS128
3148_mm_mask_max_epu64 (__m128i __W__mmask8 __M__m128i __A__m128i __B) {
3149  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3150                                             (__v2di)_mm_max_epu64(__A__B),
3151                                             (__v2di)__W);
3152}
3153
3154static __inline__ __m256i __DEFAULT_FN_ATTRS256
3155_mm256_max_epu64 (__m256i __A__m256i __B) {
3156  return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
3157}
3158
3159static __inline__ __m256i __DEFAULT_FN_ATTRS256
3160_mm256_maskz_max_epu64 (__mmask8 __M__m256i __A__m256i __B) {
3161  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3162                                             (__v4di)_mm256_max_epu64(__A__B),
3163                                             (__v4di)_mm256_setzero_si256());
3164}
3165
3166static __inline__ __m256i __DEFAULT_FN_ATTRS256
3167_mm256_mask_max_epu64 (__m256i __W__mmask8 __M__m256i __A__m256i __B) {
3168  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3169                                             (__v4di)_mm256_max_epu64(__A__B),
3170                                             (__v4di)__W);
3171}
3172
3173static __inline__ __m128i __DEFAULT_FN_ATTRS128
3174_mm_maskz_min_epi32(__mmask8 __M__m128i __A__m128i __B) {
3175  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3176                                             (__v4si)_mm_min_epi32(__A__B),
3177                                             (__v4si)_mm_setzero_si128());
3178}
3179
3180static __inline__ __m128i __DEFAULT_FN_ATTRS128
3181_mm_mask_min_epi32(__m128i __W__mmask8 __M__m128i __A__m128i __B) {
3182  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3183                                             (__v4si)_mm_min_epi32(__A__B),
3184                                             (__v4si)__W);
3185}
3186
3187static __inline__ __m256i __DEFAULT_FN_ATTRS256
3188_mm256_maskz_min_epi32(__mmask8 __M__m256i __A__m256i __B) {
3189  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3190                                             (__v8si)_mm256_min_epi32(__A__B),
3191                                             (__v8si)_mm256_setzero_si256());
3192}
3193
3194static __inline__ __m256i __DEFAULT_FN_ATTRS256
3195_mm256_mask_min_epi32(__m256i __W__mmask8 __M__m256i __A__m256i __B) {
3196  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3197                                             (__v8si)_mm256_min_epi32(__A__B),
3198                                             (__v8si)__W);
3199}
3200
3201static __inline__ __m128i __DEFAULT_FN_ATTRS128
3202_mm_min_epi64 (__m128i __A__m128i __B) {
3203  return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
3204}
3205
3206static __inline__ __m128i __DEFAULT_FN_ATTRS128
3207_mm_mask_min_epi64 (__m128i __W__mmask8 __M__m128i __A__m128i __B) {
3208  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3209                                             (__v2di)_mm_min_epi64(__A__B),
3210                                             (__v2di)__W);
3211}
3212
3213static __inline__ __m128i __DEFAULT_FN_ATTRS128
3214_mm_maskz_min_epi64 (__mmask8 __M__m128i __A__m128i __B) {
3215  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3216                                             (__v2di)_mm_min_epi64(__A__B),
3217                                             (__v2di)_mm_setzero_si128());
3218}
3219
3220static __inline__ __m256i __DEFAULT_FN_ATTRS256
3221_mm256_min_epi64 (__m256i __A__m256i __B) {
3222  return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
3223}
3224
3225static __inline__ __m256i __DEFAULT_FN_ATTRS256
3226_mm256_mask_min_epi64 (__m256i __W__mmask8 __M__m256i __A__m256i __B) {
3227  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3228                                             (__v4di)_mm256_min_epi64(__A__B),
3229                                             (__v4di)__W);
3230}
3231
3232static __inline__ __m256i __DEFAULT_FN_ATTRS256
3233_mm256_maskz_min_epi64 (__mmask8 __M__m256i __A__m256i __B) {
3234  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3235                                             (__v4di)_mm256_min_epi64(__A__B),
3236                                             (__v4di)_mm256_setzero_si256());
3237}
3238
3239static __inline__ __m128i __DEFAULT_FN_ATTRS128
3240_mm_maskz_min_epu32(__mmask8 __M__m128i __A__m128i __B) {
3241  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3242                                             (__v4si)_mm_min_epu32(__A__B),
3243                                             (__v4si)_mm_setzero_si128());
3244}
3245
3246static __inline__ __m128i __DEFAULT_FN_ATTRS128
3247_mm_mask_min_epu32(__m128i __W__mmask8 __M__m128i __A__m128i __B) {
3248  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3249                                             (__v4si)_mm_min_epu32(__A__B),
3250                                             (__v4si)__W);
3251}
3252
3253static __inline__ __m256i __DEFAULT_FN_ATTRS256
3254_mm256_maskz_min_epu32(__mmask8 __M__m256i __A__m256i __B) {
3255  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3256                                             (__v8si)_mm256_min_epu32(__A__B),
3257                                             (__v8si)_mm256_setzero_si256());
3258}
3259
3260static __inline__ __m256i __DEFAULT_FN_ATTRS256
3261_mm256_mask_min_epu32(__m256i __W__mmask8 __M__m256i __A__m256i __B) {
3262  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3263                                             (__v8si)_mm256_min_epu32(__A__B),
3264                                             (__v8si)__W);
3265}
3266
3267static __inline__ __m128i __DEFAULT_FN_ATTRS128
3268_mm_min_epu64 (__m128i __A__m128i __B) {
3269  return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
3270}
3271
3272static __inline__ __m128i __DEFAULT_FN_ATTRS128
3273_mm_mask_min_epu64 (__m128i __W__mmask8 __M__m128i __A__m128i __B) {
3274  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3275                                             (__v2di)_mm_min_epu64(__A__B),
3276                                             (__v2di)__W);
3277}
3278
3279static __inline__ __m128i __DEFAULT_FN_ATTRS128
3280_mm_maskz_min_epu64 (__mmask8 __M__m128i __A__m128i __B) {
3281  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3282                                             (__v2di)_mm_min_epu64(__A__B),
3283                                             (__v2di)_mm_setzero_si128());
3284}
3285
3286static __inline__ __m256i __DEFAULT_FN_ATTRS256
3287_mm256_min_epu64 (__m256i __A__m256i __B) {
3288  return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
3289}
3290
3291static __inline__ __m256i __DEFAULT_FN_ATTRS256
3292_mm256_mask_min_epu64 (__m256i __W__mmask8 __M__m256i __A__m256i __B) {
3293  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3294                                             (__v4di)_mm256_min_epu64(__A__B),
3295                                             (__v4di)__W);
3296}
3297
3298static __inline__ __m256i __DEFAULT_FN_ATTRS256
3299_mm256_maskz_min_epu64 (__mmask8 __M__m256i __A__m256i __B) {
3300  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3301                                             (__v4di)_mm256_min_epu64(__A__B),
3302                                             (__v4di)_mm256_setzero_si256());
3303}
3304
3305#define _mm_roundscale_pd(A, imm) \
3306  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3307                                              (int)(imm), \
3308                                              (__v2df)_mm_setzero_pd(), \
3309                                              (__mmask8)-1)
3310
3311
3312#define _mm_mask_roundscale_pd(W, U, A, imm) \
3313  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3314                                              (int)(imm), \
3315                                              (__v2df)(__m128d)(W), \
3316                                              (__mmask8)(U))
3317
3318
3319#define _mm_maskz_roundscale_pd(U, A, imm) \
3320  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3321                                              (int)(imm), \
3322                                              (__v2df)_mm_setzero_pd(), \
3323                                              (__mmask8)(U))
3324
3325
3326#define _mm256_roundscale_pd(A, imm) \
3327  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3328                                              (int)(imm), \
3329                                              (__v4df)_mm256_setzero_pd(), \
3330                                              (__mmask8)-1)
3331
3332
3333#define _mm256_mask_roundscale_pd(W, U, A, imm) \
3334  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3335                                              (int)(imm), \
3336                                              (__v4df)(__m256d)(W), \
3337                                              (__mmask8)(U))
3338
3339
3340#define _mm256_maskz_roundscale_pd(U, A, imm)  \
3341  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3342                                              (int)(imm), \
3343                                              (__v4df)_mm256_setzero_pd(), \
3344                                              (__mmask8)(U))
3345
3346#define _mm_roundscale_ps(A, imm)  \
3347  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3348                                             (__v4sf)_mm_setzero_ps(), \
3349                                             (__mmask8)-1)
3350
3351
3352#define _mm_mask_roundscale_ps(W, U, A, imm)  \
3353  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3354                                             (__v4sf)(__m128)(W), \
3355                                             (__mmask8)(U))
3356
3357
3358#define _mm_maskz_roundscale_ps(U, A, imm)  \
3359  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3360                                             (__v4sf)_mm_setzero_ps(), \
3361                                             (__mmask8)(U))
3362
3363#define _mm256_roundscale_ps(A, imm)  \
3364  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3365                                             (__v8sf)_mm256_setzero_ps(), \
3366                                             (__mmask8)-1)
3367
3368#define _mm256_mask_roundscale_ps(W, U, A, imm)  \
3369  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3370                                             (__v8sf)(__m256)(W), \
3371                                             (__mmask8)(U))
3372
3373
3374#define _mm256_maskz_roundscale_ps(U, A, imm)  \
3375  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3376                                             (__v8sf)_mm256_setzero_ps(), \
3377                                             (__mmask8)(U))
3378
3379static __inline__ __m128d __DEFAULT_FN_ATTRS128
3380_mm_scalef_pd (__m128d __A__m128d __B) {
3381  return (__m128d__builtin_ia32_scalefpd128_mask ((__v2df__A,
3382                (__v2df__B,
3383                (__v2df)
3384                _mm_setzero_pd (),
3385                (__mmask8) -1);
3386}
3387
3388static __inline__ __m128d __DEFAULT_FN_ATTRS128
3389_mm_mask_scalef_pd (__m128d __W__mmask8 __U__m128d __A,
3390        __m128d __B) {
3391  return (__m128d__builtin_ia32_scalefpd128_mask ((__v2df__A,
3392                (__v2df__B,
3393                (__v2df__W,
3394                (__mmask8__U);
3395}
3396
3397static __inline__ __m128d __DEFAULT_FN_ATTRS128
3398_mm_maskz_scalef_pd (__mmask8 __U__m128d __A__m128d __B) {
3399  return (__m128d__builtin_ia32_scalefpd128_mask ((__v2df__A,
3400                (__v2df__B,
3401                (__v2df)
3402                _mm_setzero_pd (),
3403                (__mmask8__U);
3404}
3405
3406static __inline__ __m256d __DEFAULT_FN_ATTRS256
3407_mm256_scalef_pd (__m256d __A__m256d __B) {
3408  return (__m256d__builtin_ia32_scalefpd256_mask ((__v4df__A,
3409                (__v4df__B,
3410                (__v4df)
3411                _mm256_setzero_pd (),
3412                (__mmask8) -1);
3413}
3414
3415static __inline__ __m256d __DEFAULT_FN_ATTRS256
3416_mm256_mask_scalef_pd (__m256d __W__mmask8 __U__m256d __A,
3417           __m256d __B) {
3418  return (__m256d__builtin_ia32_scalefpd256_mask ((__v4df__A,
3419                (__v4df__B,
3420                (__v4df__W,
3421                (__mmask8__U);
3422}
3423
3424static __inline__ __m256d __DEFAULT_FN_ATTRS256
3425_mm256_maskz_scalef_pd (__mmask8 __U__m256d __A__m256d __B) {
3426  return (__m256d__builtin_ia32_scalefpd256_mask ((__v4df__A,
3427                (__v4df__B,
3428                (__v4df)
3429                _mm256_setzero_pd (),
3430                (__mmask8__U);
3431}
3432
3433static __inline__ __m128 __DEFAULT_FN_ATTRS128
3434_mm_scalef_ps (__m128 __A__m128 __B) {
3435  return (__m128__builtin_ia32_scalefps128_mask ((__v4sf__A,
3436               (__v4sf__B,
3437               (__v4sf)
3438               _mm_setzero_ps (),
3439               (__mmask8) -1);
3440}
3441
3442static __inline__ __m128 __DEFAULT_FN_ATTRS128
3443_mm_mask_scalef_ps (__m128 __W__mmask8 __U__m128 __A__m128 __B) {
3444  return (__m128__builtin_ia32_scalefps128_mask ((__v4sf__A,
3445               (__v4sf__B,
3446               (__v4sf__W,
3447               (__mmask8__U);
3448}
3449
3450static __inline__ __m128 __DEFAULT_FN_ATTRS128
3451_mm_maskz_scalef_ps (__mmask8 __U__m128 __A__m128 __B) {
3452  return (__m128__builtin_ia32_scalefps128_mask ((__v4sf__A,
3453               (__v4sf__B,
3454               (__v4sf)
3455               _mm_setzero_ps (),
3456               (__mmask8__U);
3457}
3458
3459static __inline__ __m256 __DEFAULT_FN_ATTRS256
3460_mm256_scalef_ps (__m256 __A__m256 __B) {
3461  return (__m256__builtin_ia32_scalefps256_mask ((__v8sf__A,
3462               (__v8sf__B,
3463               (__v8sf)
3464               _mm256_setzero_ps (),
3465               (__mmask8) -1);
3466}
3467
3468static __inline__ __m256 __DEFAULT_FN_ATTRS256
3469_mm256_mask_scalef_ps (__m256 __W__mmask8 __U__m256 __A,
3470           __m256 __B) {
3471  return (__m256__builtin_ia32_scalefps256_mask ((__v8sf__A,
3472               (__v8sf__B,
3473               (__v8sf__W,
3474               (__mmask8__U);
3475}
3476
3477static __inline__ __m256 __DEFAULT_FN_ATTRS256
3478_mm256_maskz_scalef_ps (__mmask8 __U__m256 __A__m256 __B) {
3479  return (__m256__builtin_ia32_scalefps256_mask ((__v8sf__A,
3480               (__v8sf__B,
3481               (__v8sf)
3482               _mm256_setzero_ps (),
3483               (__mmask8__U);
3484}
3485
3486#define _mm_i64scatter_pd(addr, index, v1, scale) \
3487  __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3488                               (__v2di)(__m128i)(index), \
3489                               (__v2df)(__m128d)(v1), (int)(scale))
3490
3491#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3492  __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3493                               (__v2di)(__m128i)(index), \
3494                               (__v2df)(__m128d)(v1), (int)(scale))
3495
3496#define _mm_i64scatter_epi64(addr, index, v1, scale) \
3497  __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3498                               (__v2di)(__m128i)(index), \
3499                               (__v2di)(__m128i)(v1), (int)(scale))
3500
3501#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3502  __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3503                               (__v2di)(__m128i)(index), \
3504                               (__v2di)(__m128i)(v1), (int)(scale))
3505
3506#define _mm256_i64scatter_pd(addr, index, v1, scale) \
3507  __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3508                               (__v4di)(__m256i)(index), \
3509                               (__v4df)(__m256d)(v1), (int)(scale))
3510
3511#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3512  __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3513                               (__v4di)(__m256i)(index), \
3514                               (__v4df)(__m256d)(v1), (int)(scale))
3515
3516#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3517  __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3518                               (__v4di)(__m256i)(index), \
3519                               (__v4di)(__m256i)(v1), (int)(scale))
3520
3521#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3522  __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3523                               (__v4di)(__m256i)(index), \
3524                               (__v4di)(__m256i)(v1), (int)(scale))
3525
3526#define _mm_i64scatter_ps(addr, index, v1, scale) \
3527  __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3528                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3529                               (int)(scale))
3530
3531#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3532  __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3533                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3534                               (int)(scale))
3535
3536#define _mm_i64scatter_epi32(addr, index, v1, scale) \
3537  __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3538                               (__v2di)(__m128i)(index), \
3539                               (__v4si)(__m128i)(v1), (int)(scale))
3540
3541#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3542  __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3543                               (__v2di)(__m128i)(index), \
3544                               (__v4si)(__m128i)(v1), (int)(scale))
3545
3546#define _mm256_i64scatter_ps(addr, index, v1, scale) \
3547  __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3548                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3549                               (int)(scale))
3550
3551#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3552  __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3553                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3554                               (int)(scale))
3555
3556#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3557  __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3558                               (__v4di)(__m256i)(index), \
3559                               (__v4si)(__m128i)(v1), (int)(scale))
3560
3561#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3562  __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3563                               (__v4di)(__m256i)(index), \
3564                               (__v4si)(__m128i)(v1), (int)(scale))
3565
3566#define _mm_i32scatter_pd(addr, index, v1, scale) \
3567  __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3568                               (__v4si)(__m128i)(index), \
3569                               (__v2df)(__m128d)(v1), (int)(scale))
3570
3571#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3572    __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3573                                 (__v4si)(__m128i)(index), \
3574                                 (__v2df)(__m128d)(v1), (int)(scale))
3575
3576#define _mm_i32scatter_epi64(addr, index, v1, scale) \
3577    __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3578                                 (__v4si)(__m128i)(index), \
3579                                 (__v2di)(__m128i)(v1), (int)(scale))
3580
3581#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3582    __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3583                                 (__v4si)(__m128i)(index), \
3584                                 (__v2di)(__m128i)(v1), (int)(scale))
3585
3586#define _mm256_i32scatter_pd(addr, index, v1, scale) \
3587    __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3588                                 (__v4si)(__m128i)(index), \
3589                                 (__v4df)(__m256d)(v1), (int)(scale))
3590
3591#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3592    __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3593                                 (__v4si)(__m128i)(index), \
3594                                 (__v4df)(__m256d)(v1), (int)(scale))
3595
3596#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3597    __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3598                                 (__v4si)(__m128i)(index), \
3599                                 (__v4di)(__m256i)(v1), (int)(scale))
3600
3601#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3602    __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3603                                 (__v4si)(__m128i)(index), \
3604                                 (__v4di)(__m256i)(v1), (int)(scale))
3605
3606#define _mm_i32scatter_ps(addr, index, v1, scale) \
3607    __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3608                                 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3609                                 (int)(scale))
3610
3611#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3612    __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3613                                 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3614                                 (int)(scale))
3615
3616#define _mm_i32scatter_epi32(addr, index, v1, scale) \
3617    __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3618                                 (__v4si)(__m128i)(index), \
3619                                 (__v4si)(__m128i)(v1), (int)(scale))
3620
3621#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3622    __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3623                                 (__v4si)(__m128i)(index), \
3624                                 (__v4si)(__m128i)(v1), (int)(scale))
3625
3626#define _mm256_i32scatter_ps(addr, index, v1, scale) \
3627    __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3628                                 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3629                                 (int)(scale))
3630
3631#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3632    __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3633                                 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3634                                 (int)(scale))
3635
3636#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3637    __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3638                                 (__v8si)(__m256i)(index), \
3639                                 (__v8si)(__m256i)(v1), (int)(scale))
3640
3641#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3642    __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3643                                 (__v8si)(__m256i)(index), \
3644                                 (__v8si)(__m256i)(v1), (int)(scale))
3645
3646  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3647  _mm_mask_sqrt_pd(__m128d __W__mmask8 __U__m128d __A) {
3648    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3649                                                (__v2df)_mm_sqrt_pd(__A),
3650                                                (__v2df)__W);
3651  }
3652
3653  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3654  _mm_maskz_sqrt_pd(__mmask8 __U__m128d __A) {
3655    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3656                                                (__v2df)_mm_sqrt_pd(__A),
3657                                                (__v2df)_mm_setzero_pd());
3658  }
3659
3660  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3661  _mm256_mask_sqrt_pd(__m256d __W__mmask8 __U__m256d __A) {
3662    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3663                                                (__v4df)_mm256_sqrt_pd(__A),
3664                                                (__v4df)__W);
3665  }
3666
3667  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3668  _mm256_maskz_sqrt_pd(__mmask8 __U__m256d __A) {
3669    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3670                                                (__v4df)_mm256_sqrt_pd(__A),
3671                                                (__v4df)_mm256_setzero_pd());
3672  }
3673
3674  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3675  _mm_mask_sqrt_ps(__m128 __W__mmask8 __U__m128 __A) {
3676    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3677                                               (__v4sf)_mm_sqrt_ps(__A),
3678                                               (__v4sf)__W);
3679  }
3680
3681  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3682  _mm_maskz_sqrt_ps(__mmask8 __U__m128 __A) {
3683    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3684                                               (__v4sf)_mm_sqrt_ps(__A),
3685                                               (__v4sf)_mm_setzero_ps());
3686  }
3687
3688  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3689  _mm256_mask_sqrt_ps(__m256 __W__mmask8 __U__m256 __A) {
3690    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3691                                               (__v8sf)_mm256_sqrt_ps(__A),
3692                                               (__v8sf)__W);
3693  }
3694
3695  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3696  _mm256_maskz_sqrt_ps(__mmask8 __U__m256 __A) {
3697    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3698                                               (__v8sf)_mm256_sqrt_ps(__A),
3699                                               (__v8sf)_mm256_setzero_ps());
3700  }
3701
3702  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3703  _mm_mask_sub_pd(__m128d __W__mmask8 __U__m128d __A__m128d __B) {
3704    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3705                                                (__v2df)_mm_sub_pd(__A__B),
3706                                                (__v2df)__W);
3707  }
3708
3709  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3710  _mm_maskz_sub_pd(__mmask8 __U__m128d __A__m128d __B) {
3711    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3712                                                (__v2df)_mm_sub_pd(__A__B),
3713                                                (__v2df)_mm_setzero_pd());
3714  }
3715
3716  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3717  _mm256_mask_sub_pd(__m256d __W__mmask8 __U__m256d __A__m256d __B) {
3718    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3719                                                (__v4df)_mm256_sub_pd(__A__B),
3720                                                (__v4df)__W);
3721  }
3722
3723  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3724  _mm256_maskz_sub_pd(__mmask8 __U__m256d __A__m256d __B) {
3725    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3726                                                (__v4df)_mm256_sub_pd(__A__B),
3727                                                (__v4df)_mm256_setzero_pd());
3728  }
3729
3730  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3731  _mm_mask_sub_ps(__m128 __W__mmask8 __U__m128 __A__m128 __B) {
3732    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3733                                               (__v4sf)_mm_sub_ps(__A__B),
3734                                               (__v4sf)__W);
3735  }
3736
3737  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3738  _mm_maskz_sub_ps(__mmask8 __U__m128 __A__m128 __B) {
3739    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3740                                               (__v4sf)_mm_sub_ps(__A__B),
3741                                               (__v4sf)_mm_setzero_ps());
3742  }
3743
3744  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3745  _mm256_mask_sub_ps(__m256 __W__mmask8 __U__m256 __A__m256 __B) {
3746    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3747                                               (__v8sf)_mm256_sub_ps(__A__B),
3748                                               (__v8sf)__W);
3749  }
3750
3751  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3752  _mm256_maskz_sub_ps(__mmask8 __U__m256 __A__m256 __B) {
3753    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3754                                               (__v8sf)_mm256_sub_ps(__A__B),
3755                                               (__v8sf)_mm256_setzero_ps());
3756  }
3757
3758  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3759  _mm_permutex2var_epi32(__m128i __A__m128i __I__m128i __B) {
3760    return (__m128i)__builtin_ia32_vpermi2vard128((__v4si__A, (__v4si)__I,
3761                                                  (__v4si)__B);
3762  }
3763
3764  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3765  _mm_mask_permutex2var_epi32(__m128i __A__mmask8 __U__m128i __I,
3766                              __m128i __B) {
3767    return (__m128i)__builtin_ia32_selectd_128(__U,
3768                                    (__v4si)_mm_permutex2var_epi32(__A__I__B),
3769                                    (__v4si)__A);
3770  }
3771
3772  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3773  _mm_mask2_permutex2var_epi32(__m128i __A__m128i __I__mmask8 __U,
3774                               __m128i __B) {
3775    return (__m128i)__builtin_ia32_selectd_128(__U,
3776                                    (__v4si)_mm_permutex2var_epi32(__A__I__B),
3777                                    (__v4si)__I);
3778  }
3779
3780  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3781  _mm_maskz_permutex2var_epi32(__mmask8 __U__m128i __A__m128i __I,
3782                               __m128i __B) {
3783    return (__m128i)__builtin_ia32_selectd_128(__U,
3784                                    (__v4si)_mm_permutex2var_epi32(__A__I__B),
3785                                    (__v4si)_mm_setzero_si128());
3786  }
3787
3788  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3789  _mm256_permutex2var_epi32(__m256i __A__m256i __I__m256i __B) {
3790    return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si__I,
3791                                                  (__v8si__B);
3792  }
3793
3794  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3795  _mm256_mask_permutex2var_epi32(__m256i __A__mmask8 __U__m256i __I,
3796                                 __m256i __B) {
3797    return (__m256i)__builtin_ia32_selectd_256(__U,
3798                                 (__v8si)_mm256_permutex2var_epi32(__A__I__B),
3799                                 (__v8si)__A);
3800  }
3801
3802  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3803  _mm256_mask2_permutex2var_epi32(__m256i __A__m256i __I__mmask8 __U,
3804                                  __m256i __B) {
3805    return (__m256i)__builtin_ia32_selectd_256(__U,
3806                                 (__v8si)_mm256_permutex2var_epi32(__A__I__B),
3807                                 (__v8si)__I);
3808  }
3809
3810  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3811  _mm256_maskz_permutex2var_epi32(__mmask8 __U__m256i __A__m256i __I,
3812                                  __m256i __B) {
3813    return (__m256i)__builtin_ia32_selectd_256(__U,
3814                                 (__v8si)_mm256_permutex2var_epi32(__A__I__B),
3815                                 (__v8si)_mm256_setzero_si256());
3816  }
3817
3818  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3819  _mm_permutex2var_pd(__m128d __A__m128i __I__m128d __B) {
3820    return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3821                                                   (__v2df)__B);
3822  }
3823
3824  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3825  _mm_mask_permutex2var_pd(__m128d __A__mmask8 __U__m128i __I__m128d __B) {
3826    return (__m128d)__builtin_ia32_selectpd_128(__U,
3827                                       (__v2df)_mm_permutex2var_pd(__A__I__B),
3828                                       (__v2df)__A);
3829  }
3830
3831  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3832  _mm_mask2_permutex2var_pd(__m128d __A__m128i __I__mmask8 __U__m128d __B) {
3833    return (__m128d)__builtin_ia32_selectpd_128(__U,
3834                                       (__v2df)_mm_permutex2var_pd(__A__I__B),
3835                                       (__v2df)(__m128d)__I);
3836  }
3837
3838  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3839  _mm_maskz_permutex2var_pd(__mmask8 __U__m128d __A__m128i __I__m128d __B) {
3840    return (__m128d)__builtin_ia32_selectpd_128(__U,
3841                                       (__v2df)_mm_permutex2var_pd(__A__I__B),
3842                                       (__v2df)_mm_setzero_pd());
3843  }
3844
3845  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3846  _mm256_permutex2var_pd(__m256d __A__m256i __I__m256d __B) {
3847    return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3848                                                   (__v4df)__B);
3849  }
3850
3851  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3852  _mm256_mask_permutex2var_pd(__m256d __A__mmask8 __U__m256i __I,
3853                              __m256d __B) {
3854    return (__m256d)__builtin_ia32_selectpd_256(__U,
3855                                    (__v4df)_mm256_permutex2var_pd(__A__I__B),
3856                                    (__v4df)__A);
3857  }
3858
3859  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3860  _mm256_mask2_permutex2var_pd(__m256d __A__m256i __I__mmask8 __U,
3861                               __m256d __B) {
3862    return (__m256d)__builtin_ia32_selectpd_256(__U,
3863                                    (__v4df)_mm256_permutex2var_pd(__A__I__B),
3864                                    (__v4df)(__m256d)__I);
3865  }
3866
3867  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3868  _mm256_maskz_permutex2var_pd(__mmask8 __U__m256d __A__m256i __I,
3869                               __m256d __B) {
3870    return (__m256d)__builtin_ia32_selectpd_256(__U,
3871                                    (__v4df)_mm256_permutex2var_pd(__A__I__B),
3872                                    (__v4df)_mm256_setzero_pd());
3873  }
3874
3875  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3876  _mm_permutex2var_ps(__m128 __A__m128i __I__m128 __B) {
3877    return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3878                                                  (__v4sf)__B);
3879  }
3880
3881  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3882  _mm_mask_permutex2var_ps(__m128 __A__mmask8 __U__m128i __I__m128 __B) {
3883    return (__m128)__builtin_ia32_selectps_128(__U,
3884                                       (__v4sf)_mm_permutex2var_ps(__A__I__B),
3885                                       (__v4sf)__A);
3886  }
3887
3888  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3889  _mm_mask2_permutex2var_ps(__m128 __A__m128i __I__mmask8 __U__m128 __B) {
3890    return (__m128)__builtin_ia32_selectps_128(__U,
3891                                       (__v4sf)_mm_permutex2var_ps(__A__I__B),
3892                                       (__v4sf)(__m128)__I);
3893  }
3894
3895  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3896  _mm_maskz_permutex2var_ps(__mmask8 __U__m128 __A__m128i __I__m128 __B) {
3897    return (__m128)__builtin_ia32_selectps_128(__U,
3898                                       (__v4sf)_mm_permutex2var_ps(__A__I__B),
3899                                       (__v4sf)_mm_setzero_ps());
3900  }
3901
3902  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3903  _mm256_permutex2var_ps(__m256 __A__m256i __I__m256 __B) {
3904    return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3905                                                  (__v8sf__B);
3906  }
3907
3908  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3909  _mm256_mask_permutex2var_ps(__m256 __A__mmask8 __U__m256i __I__m256 __B) {
3910    return (__m256)__builtin_ia32_selectps_256(__U,
3911                                    (__v8sf)_mm256_permutex2var_ps(__A__I__B),
3912                                    (__v8sf)__A);
3913  }
3914
3915  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3916  _mm256_mask2_permutex2var_ps(__m256 __A__m256i __I__mmask8 __U,
3917                               __m256 __B) {
3918    return (__m256)__builtin_ia32_selectps_256(__U,
3919                                    (__v8sf)_mm256_permutex2var_ps(__A__I__B),
3920                                    (__v8sf)(__m256)__I);
3921  }
3922
3923  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3924  _mm256_maskz_permutex2var_ps(__mmask8 __U__m256 __A__m256i __I,
3925                               __m256 __B) {
3926    return (__m256)__builtin_ia32_selectps_256(__U,
3927                                    (__v8sf)_mm256_permutex2var_ps(__A__I__B),
3928                                    (__v8sf)_mm256_setzero_ps());
3929  }
3930
3931  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3932  _mm_permutex2var_epi64(__m128i __A__m128i __I__m128i __B) {
3933    return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3934                                                  (__v2di)__B);
3935  }
3936
3937  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3938  _mm_mask_permutex2var_epi64(__m128i __A__mmask8 __U__m128i __I,
3939                              __m128i __B) {
3940    return (__m128i)__builtin_ia32_selectq_128(__U,
3941                                    (__v2di)_mm_permutex2var_epi64(__A__I__B),
3942                                    (__v2di)__A);
3943  }
3944
3945  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3946  _mm_mask2_permutex2var_epi64(__m128i __A__m128i __I__mmask8 __U,
3947                               __m128i __B) {
3948    return (__m128i)__builtin_ia32_selectq_128(__U,
3949                                    (__v2di)_mm_permutex2var_epi64(__A__I__B),
3950                                    (__v2di)__I);
3951  }
3952
3953  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3954  _mm_maskz_permutex2var_epi64(__mmask8 __U__m128i __A__m128i __I,
3955                               __m128i __B) {
3956    return (__m128i)__builtin_ia32_selectq_128(__U,
3957                                    (__v2di)_mm_permutex2var_epi64(__A__I__B),
3958                                    (__v2di)_mm_setzero_si128());
3959  }
3960
3961
3962  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3963  _mm256_permutex2var_epi64(__m256i __A__m256i __I__m256i __B) {
3964    return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di__I,
3965                                                  (__v4di__B);
3966  }
3967
3968  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3969  _mm256_mask_permutex2var_epi64(__m256i __A__mmask8 __U__m256i __I,
3970                                 __m256i __B) {
3971    return (__m256i)__builtin_ia32_selectq_256(__U,
3972                                 (__v4di)_mm256_permutex2var_epi64(__A__I__B),
3973                                 (__v4di)__A);
3974  }
3975
3976  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3977  _mm256_mask2_permutex2var_epi64(__m256i __A__m256i __I__mmask8 __U,
3978                                  __m256i __B) {
3979    return (__m256i)__builtin_ia32_selectq_256(__U,
3980                                 (__v4di)_mm256_permutex2var_epi64(__A__I__B),
3981                                 (__v4di)__I);
3982  }
3983
3984  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3985  _mm256_maskz_permutex2var_epi64(__mmask8 __U__m256i __A__m256i __I,
3986                                  __m256i __B) {
3987    return (__m256i)__builtin_ia32_selectq_256(__U,
3988                                 (__v4di)_mm256_permutex2var_epi64(__A__I__B),
3989                                 (__v4di)_mm256_setzero_si256());
3990  }
3991
3992  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3993  _mm_mask_cvtepi8_epi32(__m128i __W__mmask8 __U__m128i __A)
3994  {
3995    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3996                                               (__v4si)_mm_cvtepi8_epi32(__A),
3997                                               (__v4si)__W);
3998  }
3999
4000  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4001  _mm_maskz_cvtepi8_epi32(__mmask8 __U__m128i __A)
4002  {
4003    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4004                                               (__v4si)_mm_cvtepi8_epi32(__A),
4005                                               (__v4si)_mm_setzero_si128());
4006  }
4007
4008  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4009  _mm256_mask_cvtepi8_epi32 (__m256i __W__mmask8 __U__m128i __A)
4010  {
4011    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4012                                               (__v8si)_mm256_cvtepi8_epi32(__A),
4013                                               (__v8si)__W);
4014  }
4015
4016  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4017  _mm256_maskz_cvtepi8_epi32 (__mmask8 __U__m128i __A)
4018  {
4019    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4020                                               (__v8si)_mm256_cvtepi8_epi32(__A),
4021                                               (__v8si)_mm256_setzero_si256());
4022  }
4023
4024  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4025  _mm_mask_cvtepi8_epi64(__m128i __W__mmask8 __U__m128i __A)
4026  {
4027    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4028                                               (__v2di)_mm_cvtepi8_epi64(__A),
4029                                               (__v2di)__W);
4030  }
4031
4032  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4033  _mm_maskz_cvtepi8_epi64(__mmask8 __U__m128i __A)
4034  {
4035    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4036                                               (__v2di)_mm_cvtepi8_epi64(__A),
4037                                               (__v2di)_mm_setzero_si128());
4038  }
4039
4040  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4041  _mm256_mask_cvtepi8_epi64(__m256i __W__mmask8 __U__m128i __A)
4042  {
4043    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4044                                               (__v4di)_mm256_cvtepi8_epi64(__A),
4045                                               (__v4di)__W);
4046  }
4047
4048  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4049  _mm256_maskz_cvtepi8_epi64(__mmask8 __U__m128i __A)
4050  {
4051    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4052                                               (__v4di)_mm256_cvtepi8_epi64(__A),
4053                                               (__v4di)_mm256_setzero_si256());
4054  }
4055
4056  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4057  _mm_mask_cvtepi32_epi64(__m128i __W__mmask8 __U__m128i __X)
4058  {
4059    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4060                                               (__v2di)_mm_cvtepi32_epi64(__X),
4061                                               (__v2di)__W);
4062  }
4063
4064  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4065  _mm_maskz_cvtepi32_epi64(__mmask8 __U__m128i __X)
4066  {
4067    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4068                                               (__v2di)_mm_cvtepi32_epi64(__X),
4069                                               (__v2di)_mm_setzero_si128());
4070  }
4071
4072  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4073  _mm256_mask_cvtepi32_epi64(__m256i __W__mmask8 __U__m128i __X)
4074  {
4075    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4076                                               (__v4di)_mm256_cvtepi32_epi64(__X),
4077                                               (__v4di)__W);
4078  }
4079
4080  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4081  _mm256_maskz_cvtepi32_epi64(__mmask8 __U__m128i __X)
4082  {
4083    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4084                                               (__v4di)_mm256_cvtepi32_epi64(__X),
4085                                               (__v4di)_mm256_setzero_si256());
4086  }
4087
4088  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4089  _mm_mask_cvtepi16_epi32(__m128i __W__mmask8 __U__m128i __A)
4090  {
4091    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4092                                               (__v4si)_mm_cvtepi16_epi32(__A),
4093                                               (__v4si)__W);
4094  }
4095
4096  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4097  _mm_maskz_cvtepi16_epi32(__mmask8 __U__m128i __A)
4098  {
4099    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4100                                               (__v4si)_mm_cvtepi16_epi32(__A),
4101                                               (__v4si)_mm_setzero_si128());
4102  }
4103
4104  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4105  _mm256_mask_cvtepi16_epi32(__m256i __W__mmask8 __U__m128i __A)
4106  {
4107    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4108                                               (__v8si)_mm256_cvtepi16_epi32(__A),
4109                                               (__v8si)__W);
4110  }
4111
4112  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4113  _mm256_maskz_cvtepi16_epi32 (__mmask8 __U__m128i __A)
4114  {
4115    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4116                                               (__v8si)_mm256_cvtepi16_epi32(__A),
4117                                               (__v8si)_mm256_setzero_si256());
4118  }
4119
4120  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4121  _mm_mask_cvtepi16_epi64(__m128i __W__mmask8 __U__m128i __A)
4122  {
4123    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4124                                               (__v2di)_mm_cvtepi16_epi64(__A),
4125                                               (__v2di)__W);
4126  }
4127
4128  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4129  _mm_maskz_cvtepi16_epi64(__mmask8 __U__m128i __A)
4130  {
4131    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4132                                               (__v2di)_mm_cvtepi16_epi64(__A),
4133                                               (__v2di)_mm_setzero_si128());
4134  }
4135
4136  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4137  _mm256_mask_cvtepi16_epi64(__m256i __W__mmask8 __U__m128i __A)
4138  {
4139    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4140                                               (__v4di)_mm256_cvtepi16_epi64(__A),
4141                                               (__v4di)__W);
4142  }
4143
4144  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4145  _mm256_maskz_cvtepi16_epi64(__mmask8 __U__m128i __A)
4146  {
4147    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4148                                               (__v4di)_mm256_cvtepi16_epi64(__A),
4149                                               (__v4di)_mm256_setzero_si256());
4150  }
4151
4152
4153  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4154  _mm_mask_cvtepu8_epi32(__m128i __W__mmask8 __U__m128i __A)
4155  {
4156    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4157                                               (__v4si)_mm_cvtepu8_epi32(__A),
4158                                               (__v4si)__W);
4159  }
4160
4161  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4162  _mm_maskz_cvtepu8_epi32(__mmask8 __U__m128i __A)
4163  {
4164    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4165                                               (__v4si)_mm_cvtepu8_epi32(__A),
4166                                               (__v4si)_mm_setzero_si128());
4167  }
4168
4169  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4170  _mm256_mask_cvtepu8_epi32(__m256i __W__mmask8 __U__m128i __A)
4171  {
4172    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4173                                               (__v8si)_mm256_cvtepu8_epi32(__A),
4174                                               (__v8si)__W);
4175  }
4176
4177  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4178  _mm256_maskz_cvtepu8_epi32(__mmask8 __U__m128i __A)
4179  {
4180    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4181                                               (__v8si)_mm256_cvtepu8_epi32(__A),
4182                                               (__v8si)_mm256_setzero_si256());
4183  }
4184
4185  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4186  _mm_mask_cvtepu8_epi64(__m128i __W__mmask8 __U__m128i __A)
4187  {
4188    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4189                                               (__v2di)_mm_cvtepu8_epi64(__A),
4190                                               (__v2di)__W);
4191  }
4192
4193  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4194  _mm_maskz_cvtepu8_epi64(__mmask8 __U__m128i __A)
4195  {
4196    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4197                                               (__v2di)_mm_cvtepu8_epi64(__A),
4198                                               (__v2di)_mm_setzero_si128());
4199  }
4200
4201  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4202  _mm256_mask_cvtepu8_epi64(__m256i __W__mmask8 __U__m128i __A)
4203  {
4204    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4205                                               (__v4di)_mm256_cvtepu8_epi64(__A),
4206                                               (__v4di)__W);
4207  }
4208
4209  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4210  _mm256_maskz_cvtepu8_epi64 (__mmask8 __U__m128i __A)
4211  {
4212    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4213                                               (__v4di)_mm256_cvtepu8_epi64(__A),
4214                                               (__v4di)_mm256_setzero_si256());
4215  }
4216
4217  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4218  _mm_mask_cvtepu32_epi64(__m128i __W__mmask8 __U__m128i __X)
4219  {
4220    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4221                                               (__v2di)_mm_cvtepu32_epi64(__X),
4222                                               (__v2di)__W);
4223  }
4224
4225  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4226  _mm_maskz_cvtepu32_epi64(__mmask8 __U__m128i __X)
4227  {
4228    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4229                                               (__v2di)_mm_cvtepu32_epi64(__X),
4230                                               (__v2di)_mm_setzero_si128());
4231  }
4232
4233  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4234  _mm256_mask_cvtepu32_epi64(__m256i __W__mmask8 __U__m128i __X)
4235  {
4236    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4237                                               (__v4di)_mm256_cvtepu32_epi64(__X),
4238                                               (__v4di)__W);
4239  }
4240
4241  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4242  _mm256_maskz_cvtepu32_epi64(__mmask8 __U__m128i __X)
4243  {
4244    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4245                                               (__v4di)_mm256_cvtepu32_epi64(__X),
4246                                               (__v4di)_mm256_setzero_si256());
4247  }
4248
4249  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4250  _mm_mask_cvtepu16_epi32(__m128i __W__mmask8 __U__m128i __A)
4251  {
4252    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4253                                               (__v4si)_mm_cvtepu16_epi32(__A),
4254                                               (__v4si)__W);
4255  }
4256
4257  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4258  _mm_maskz_cvtepu16_epi32(__mmask8 __U__m128i __A)
4259  {
4260    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4261                                               (__v4si)_mm_cvtepu16_epi32(__A),
4262                                               (__v4si)_mm_setzero_si128());
4263  }
4264
4265  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4266  _mm256_mask_cvtepu16_epi32(__m256i __W__mmask8 __U__m128i __A)
4267  {
4268    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4269                                               (__v8si)_mm256_cvtepu16_epi32(__A),
4270                                               (__v8si)__W);
4271  }
4272
4273  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4274  _mm256_maskz_cvtepu16_epi32(__mmask8 __U__m128i __A)
4275  {
4276    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4277                                               (__v8si)_mm256_cvtepu16_epi32(__A),
4278                                               (__v8si)_mm256_setzero_si256());
4279  }
4280
4281  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4282  _mm_mask_cvtepu16_epi64(__m128i __W__mmask8 __U__m128i __A)
4283  {
4284    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4285                                               (__v2di)_mm_cvtepu16_epi64(__A),
4286                                               (__v2di)__W);
4287  }
4288
4289  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4290  _mm_maskz_cvtepu16_epi64(__mmask8 __U__m128i __A)
4291  {
4292    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4293                                               (__v2di)_mm_cvtepu16_epi64(__A),
4294                                               (__v2di)_mm_setzero_si128());
4295  }
4296
4297  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4298  _mm256_mask_cvtepu16_epi64(__m256i __W__mmask8 __U__m128i __A)
4299  {
4300    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4301                                               (__v4di)_mm256_cvtepu16_epi64(__A),
4302                                               (__v4di)__W);
4303  }
4304
4305  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4306  _mm256_maskz_cvtepu16_epi64(__mmask8 __U__m128i __A)
4307  {
4308    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4309                                               (__v4di)_mm256_cvtepu16_epi64(__A),
4310                                               (__v4di)_mm256_setzero_si256());
4311  }
4312
4313
4314#define _mm_rol_epi32(a, b) \
4315  (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))
4316
4317#define _mm_mask_rol_epi32(w, u, a, b) \
4318  (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4319                                      (__v4si)_mm_rol_epi32((a), (b)), \
4320                                      (__v4si)(__m128i)(w))
4321
4322#define _mm_maskz_rol_epi32(u, a, b) \
4323  (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4324                                      (__v4si)_mm_rol_epi32((a), (b)), \
4325                                      (__v4si)_mm_setzero_si128())
4326
4327#define _mm256_rol_epi32(a, b) \
4328  (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))
4329
4330#define _mm256_mask_rol_epi32(w, u, a, b) \
4331  (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4332                                      (__v8si)_mm256_rol_epi32((a), (b)), \
4333                                      (__v8si)(__m256i)(w))
4334
4335#define _mm256_maskz_rol_epi32(u, a, b) \
4336  (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4337                                      (__v8si)_mm256_rol_epi32((a), (b)), \
4338                                      (__v8si)_mm256_setzero_si256())
4339
4340#define _mm_rol_epi64(a, b) \
4341  (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))
4342
4343#define _mm_mask_rol_epi64(w, u, a, b) \
4344  (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4345                                      (__v2di)_mm_rol_epi64((a), (b)), \
4346                                      (__v2di)(__m128i)(w))
4347
4348#define _mm_maskz_rol_epi64(u, a, b) \
4349  (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4350                                      (__v2di)_mm_rol_epi64((a), (b)), \
4351                                      (__v2di)_mm_setzero_si128())
4352
4353#define _mm256_rol_epi64(a, b) \
4354  (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))
4355
4356#define _mm256_mask_rol_epi64(w, u, a, b) \
4357  (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4358                                      (__v4di)_mm256_rol_epi64((a), (b)), \
4359                                      (__v4di)(__m256i)(w))
4360
4361#define _mm256_maskz_rol_epi64(u, a, b) \
4362  (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4363                                      (__v4di)_mm256_rol_epi64((a), (b)), \
4364                                      (__v4di)_mm256_setzero_si256())
4365
4366static __inline__ __m128i __DEFAULT_FN_ATTRS128
4367_mm_rolv_epi32 (__m128i __A__m128i __B)
4368{
4369  return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4370}
4371
4372static __inline__ __m128i __DEFAULT_FN_ATTRS128
4373_mm_mask_rolv_epi32 (__m128i __W__mmask8 __U__m128i __A__m128i __B)
4374{
4375  return (__m128i)__builtin_ia32_selectd_128(__U,
4376                                             (__v4si)_mm_rolv_epi32(__A__B),
4377                                             (__v4si)__W);
4378}
4379
4380static __inline__ __m128i __DEFAULT_FN_ATTRS128
4381_mm_maskz_rolv_epi32 (__mmask8 __U__m128i __A__m128i __B)
4382{
4383  return (__m128i)__builtin_ia32_selectd_128(__U,
4384                                             (__v4si)_mm_rolv_epi32(__A__B),
4385                                             (__v4si)_mm_setzero_si128());
4386}
4387
4388static __inline__ __m256i __DEFAULT_FN_ATTRS256
4389_mm256_rolv_epi32 (__m256i __A__m256i __B)
4390{
4391  return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4392}
4393
4394static __inline__ __m256i __DEFAULT_FN_ATTRS256
4395_mm256_mask_rolv_epi32 (__m256i __W__mmask8 __U__m256i __A__m256i __B)
4396{
4397  return (__m256i)__builtin_ia32_selectd_256(__U,
4398                                            (__v8si)_mm256_rolv_epi32(__A__B),
4399                                            (__v8si)__W);
4400}
4401
4402static __inline__ __m256i __DEFAULT_FN_ATTRS256
4403_mm256_maskz_rolv_epi32 (__mmask8 __U__m256i __A__m256i __B)
4404{
4405  return (__m256i)__builtin_ia32_selectd_256(__U,
4406                                            (__v8si)_mm256_rolv_epi32(__A__B),
4407                                            (__v8si)_mm256_setzero_si256());
4408}
4409
4410static __inline__ __m128i __DEFAULT_FN_ATTRS128
4411_mm_rolv_epi64 (__m128i __A__m128i __B)
4412{
4413  return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4414}
4415
4416static __inline__ __m128i __DEFAULT_FN_ATTRS128
4417_mm_mask_rolv_epi64 (__m128i __W__mmask8 __U__m128i __A__m128i __B)
4418{
4419  return (__m128i)__builtin_ia32_selectq_128(__U,
4420                                             (__v2di)_mm_rolv_epi64(__A__B),
4421                                             (__v2di)__W);
4422}
4423
4424static __inline__ __m128i __DEFAULT_FN_ATTRS128
4425_mm_maskz_rolv_epi64 (__mmask8 __U__m128i __A__m128i __B)
4426{
4427  return (__m128i)__builtin_ia32_selectq_128(__U,
4428                                             (__v2di)_mm_rolv_epi64(__A__B),
4429                                             (__v2di)_mm_setzero_si128());
4430}
4431
4432static __inline__ __m256i __DEFAULT_FN_ATTRS256
4433_mm256_rolv_epi64 (__m256i __A__m256i __B)
4434{
4435  return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4436}
4437
4438static __inline__ __m256i __DEFAULT_FN_ATTRS256
4439_mm256_mask_rolv_epi64 (__m256i __W__mmask8 __U__m256i __A__m256i __B)
4440{
4441  return (__m256i)__builtin_ia32_selectq_256(__U,
4442                                            (__v4di)_mm256_rolv_epi64(__A__B),
4443                                            (__v4di)__W);
4444}
4445
4446static __inline__ __m256i __DEFAULT_FN_ATTRS256
4447_mm256_maskz_rolv_epi64 (__mmask8 __U__m256i __A__m256i __B)
4448{
4449  return (__m256i)__builtin_ia32_selectq_256(__U,
4450                                            (__v4di)_mm256_rolv_epi64(__A__B),
4451                                            (__v4di)_mm256_setzero_si256());
4452}
4453
4454#define _mm_ror_epi32(a, b) \
4455  (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))
4456
4457#define _mm_mask_ror_epi32(w, u, a, b) \
4458  (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4459                                      (__v4si)_mm_ror_epi32((a), (b)), \
4460                                      (__v4si)(__m128i)(w))
4461
4462#define _mm_maskz_ror_epi32(u, a, b) \
4463  (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4464                                      (__v4si)_mm_ror_epi32((a), (b)), \
4465                                      (__v4si)_mm_setzero_si128())
4466
4467#define _mm256_ror_epi32(a, b) \
4468  (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))
4469
4470#define _mm256_mask_ror_epi32(w, u, a, b) \
4471  (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4472                                      (__v8si)_mm256_ror_epi32((a), (b)), \
4473                                      (__v8si)(__m256i)(w))
4474
4475#define _mm256_maskz_ror_epi32(u, a, b) \
4476  (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4477                                      (__v8si)_mm256_ror_epi32((a), (b)), \
4478                                      (__v8si)_mm256_setzero_si256())
4479
4480#define _mm_ror_epi64(a, b) \
4481  (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))
4482
4483#define _mm_mask_ror_epi64(w, u, a, b) \
4484  (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4485                                      (__v2di)_mm_ror_epi64((a), (b)), \
4486                                      (__v2di)(__m128i)(w))
4487
4488#define _mm_maskz_ror_epi64(u, a, b) \
4489  (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4490                                      (__v2di)_mm_ror_epi64((a), (b)), \
4491                                      (__v2di)_mm_setzero_si128())
4492
4493#define _mm256_ror_epi64(a, b) \
4494  (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))
4495
4496#define _mm256_mask_ror_epi64(w, u, a, b) \
4497  (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4498                                      (__v4di)_mm256_ror_epi64((a), (b)), \
4499                                      (__v4di)(__m256i)(w))
4500
4501#define _mm256_maskz_ror_epi64(u, a, b) \
4502  (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4503                                      (__v4di)_mm256_ror_epi64((a), (b)), \
4504                                      (__v4di)_mm256_setzero_si256())
4505
4506static __inline__ __m128i __DEFAULT_FN_ATTRS128
4507_mm_mask_sll_epi32(__m128i __W__mmask8 __U__m128i __A__m128i __B)
4508{
4509  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4510                                             (__v4si)_mm_sll_epi32(__A__B),
4511                                             (__v4si)__W);
4512}
4513
4514static __inline__ __m128i __DEFAULT_FN_ATTRS128
4515_mm_maskz_sll_epi32(__mmask8 __U__m128i __A__m128i __B)
4516{
4517  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4518                                             (__v4si)_mm_sll_epi32(__A__B),
4519                                             (__v4si)_mm_setzero_si128());
4520}
4521
4522static __inline__ __m256i __DEFAULT_FN_ATTRS256
4523_mm256_mask_sll_epi32(__m256i __W__mmask8 __U__m256i __A__m128i __B)
4524{
4525  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4526                                             (__v8si)_mm256_sll_epi32(__A__B),
4527                                             (__v8si)__W);
4528}
4529
4530static __inline__ __m256i __DEFAULT_FN_ATTRS256
4531_mm256_maskz_sll_epi32(__mmask8 __U__m256i __A__m128i __B)
4532{
4533  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4534                                             (__v8si)_mm256_sll_epi32(__A__B),
4535                                             (__v8si)_mm256_setzero_si256());
4536}
4537
4538static __inline__ __m128i __DEFAULT_FN_ATTRS128
4539_mm_mask_slli_epi32(__m128i __W__mmask8 __U__m128i __Aint __B)
4540{
4541  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4542                                             (__v4si)_mm_slli_epi32(__A__B),
4543                                             (__v4si)__W);
4544}
4545
4546static __inline__ __m128i __DEFAULT_FN_ATTRS128
4547_mm_maskz_slli_epi32(__mmask8 __U__m128i __Aint __B)
4548{
4549  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4550                                             (__v4si)_mm_slli_epi32(__A__B),
4551                                             (__v4si)_mm_setzero_si128());
4552}
4553
4554static __inline__ __m256i __DEFAULT_FN_ATTRS256
4555_mm256_mask_slli_epi32(__m256i __W__mmask8 __U__m256i __Aint __B)
4556{
4557  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4558                                             (__v8si)_mm256_slli_epi32(__A__B),
4559                                             (__v8si)__W);
4560}
4561
4562static __inline__ __m256i __DEFAULT_FN_ATTRS256
4563_mm256_maskz_slli_epi32(__mmask8 __U__m256i __Aint __B)
4564{
4565  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4566                                             (__v8si)_mm256_slli_epi32(__A__B),
4567                                             (__v8si)_mm256_setzero_si256());
4568}
4569
4570static __inline__ __m128i __DEFAULT_FN_ATTRS128
4571_mm_mask_sll_epi64(__m128i __W__mmask8 __U__m128i __A__m128i __B)
4572{
4573  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4574                                             (__v2di)_mm_sll_epi64(__A__B),
4575                                             (__v2di)__W);
4576}
4577
4578static __inline__ __m128i __DEFAULT_FN_ATTRS128
4579_mm_maskz_sll_epi64(__mmask8 __U__m128i __A__m128i __B)
4580{
4581  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4582                                             (__v2di)_mm_sll_epi64(__A__B),
4583                                             (__v2di)_mm_setzero_si128());
4584}
4585
4586static __inline__ __m256i __DEFAULT_FN_ATTRS256
4587_mm256_mask_sll_epi64(__m256i __W__mmask8 __U__m256i __A__m128i __B)
4588{
4589  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4590                                             (__v4di)_mm256_sll_epi64(__A__B),
4591                                             (__v4di)__W);
4592}
4593
4594static __inline__ __m256i __DEFAULT_FN_ATTRS256
4595_mm256_maskz_sll_epi64(__mmask8 __U__m256i __A__m128i __B)
4596{
4597  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4598                                             (__v4di)_mm256_sll_epi64(__A__B),
4599                                             (__v4di)_mm256_setzero_si256());
4600}
4601
4602static __inline__ __m128i __DEFAULT_FN_ATTRS128
4603_mm_mask_slli_epi64(__m128i __W__mmask8 __U__m128i __Aint __B)
4604{
4605  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4606                                             (__v2di)_mm_slli_epi64(__A__B),
4607                                             (__v2di)__W);
4608}
4609
4610static __inline__ __m128i __DEFAULT_FN_ATTRS128
4611_mm_maskz_slli_epi64(__mmask8 __U__m128i __Aint __B)
4612{
4613  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4614                                             (__v2di)_mm_slli_epi64(__A__B),
4615                                             (__v2di)_mm_setzero_si128());
4616}
4617
4618static __inline__ __m256i __DEFAULT_FN_ATTRS256
4619_mm256_mask_slli_epi64(__m256i __W__mmask8 __U__m256i __Aint __B)
4620{
4621  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4622                                             (__v4di)_mm256_slli_epi64(__A__B),
4623                                             (__v4di)__W);
4624}
4625
4626static __inline__ __m256i __DEFAULT_FN_ATTRS256
4627_mm256_maskz_slli_epi64(__mmask8 __U__m256i __Aint __B)
4628{
4629  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4630                                             (__v4di)_mm256_slli_epi64(__A__B),
4631                                             (__v4di)_mm256_setzero_si256());
4632}
4633
4634static __inline__ __m128i __DEFAULT_FN_ATTRS128
4635_mm_rorv_epi32 (__m128i __A__m128i __B)
4636{
4637  return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4638}
4639
4640static __inline__ __m128i __DEFAULT_FN_ATTRS128
4641_mm_mask_rorv_epi32 (__m128i __W__mmask8 __U__m128i __A__m128i __B)
4642{
4643  return (__m128i)__builtin_ia32_selectd_128(__U,
4644                                             (__v4si)_mm_rorv_epi32(__A__B),
4645                                             (__v4si)__W);
4646}
4647
4648static __inline__ __m128i __DEFAULT_FN_ATTRS128
4649_mm_maskz_rorv_epi32 (__mmask8 __U__m128i __A__m128i __B)
4650{
4651  return (__m128i)__builtin_ia32_selectd_128(__U,
4652                                             (__v4si)_mm_rorv_epi32(__A__B),
4653                                             (__v4si)_mm_setzero_si128());
4654}
4655
4656static __inline__ __m256i __DEFAULT_FN_ATTRS256
4657_mm256_rorv_epi32 (__m256i __A__m256i __B)
4658{
4659  return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4660}
4661
4662static __inline__ __m256i __DEFAULT_FN_ATTRS256
4663_mm256_mask_rorv_epi32 (__m256i __W__mmask8 __U__m256i __A__m256i __B)
4664{
4665  return (__m256i)__builtin_ia32_selectd_256(__U,
4666                                            (__v8si)_mm256_rorv_epi32(__A__B),
4667                                            (__v8si)__W);
4668}
4669
4670static __inline__ __m256i __DEFAULT_FN_ATTRS256
4671_mm256_maskz_rorv_epi32 (__mmask8 __U__m256i __A__m256i __B)
4672{
4673  return (__m256i)__builtin_ia32_selectd_256(__U,
4674                                            (__v8si)_mm256_rorv_epi32(__A__B),
4675                                            (__v8si)_mm256_setzero_si256());
4676}
4677
4678static __inline__ __m128i __DEFAULT_FN_ATTRS128
4679_mm_rorv_epi64 (__m128i __A__m128i __B)
4680{
4681  return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4682}
4683
4684static __inline__ __m128i __DEFAULT_FN_ATTRS128
4685_mm_mask_rorv_epi64 (__m128i __W__mmask8 __U__m128i __A__m128i __B)
4686{
4687  return (__m128i)__builtin_ia32_selectq_128(__U,
4688                                             (__v2di)_mm_rorv_epi64(__A__B),
4689                                             (__v2di)__W);
4690}
4691
4692static __inline__ __m128i __DEFAULT_FN_ATTRS128
4693_mm_maskz_rorv_epi64 (__mmask8 __U__m128i __A__m128i __B)
4694{
4695  return (__m128i)__builtin_ia32_selectq_128(__U,
4696                                             (__v2di)_mm_rorv_epi64(__A__B),
4697                                             (__v2di)_mm_setzero_si128());
4698}
4699
4700static __inline__ __m256i __DEFAULT_FN_ATTRS256
4701_mm256_rorv_epi64 (__m256i __A__m256i __B)
4702{
4703  return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4704}
4705
4706static __inline__ __m256i __DEFAULT_FN_ATTRS256
4707_mm256_mask_rorv_epi64 (__m256i __W__mmask8 __U__m256i __A__m256i __B)
4708{
4709  return (__m256i)__builtin_ia32_selectq_256(__U,
4710                                            (__v4di)_mm256_rorv_epi64(__A__B),
4711                                            (__v4di)__W);
4712}
4713
4714static __inline__ __m256i __DEFAULT_FN_ATTRS256
4715_mm256_maskz_rorv_epi64 (__mmask8 __U__m256i __A__m256i __B)
4716{
4717  return (__m256i)__builtin_ia32_selectq_256(__U,
4718                                            (__v4di)_mm256_rorv_epi64(__A__B),
4719                                            (__v4di)_mm256_setzero_si256());
4720}
4721
4722static __inline__ __m128i __DEFAULT_FN_ATTRS128
4723_mm_mask_sllv_epi64(__m128i __W__mmask8 __U__m128i __X__m128i __Y)
4724{
4725  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4726                                             (__v2di)_mm_sllv_epi64(__X__Y),
4727                                             (__v2di)__W);
4728}
4729
4730static __inline__ __m128i __DEFAULT_FN_ATTRS128
4731_mm_maskz_sllv_epi64(__mmask8 __U__m128i __X__m128i __Y)
4732{
4733  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4734                                             (__v2di)_mm_sllv_epi64(__X__Y),
4735                                             (__v2di)_mm_setzero_si128());
4736}
4737
4738static __inline__ __m256i __DEFAULT_FN_ATTRS256
4739_mm256_mask_sllv_epi64(__m256i __W__mmask8 __U__m256i __X__m256i __Y)
4740{
4741  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4742                                            (__v4di)_mm256_sllv_epi64(__X__Y),
4743                                            (__v4di)__W);
4744}
4745
4746static __inline__ __m256i __DEFAULT_FN_ATTRS256
4747_mm256_maskz_sllv_epi64(__mmask8 __U__m256i __X__m256i __Y)
4748{
4749  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4750                                            (__v4di)_mm256_sllv_epi64(__X__Y),
4751                                            (__v4di)_mm256_setzero_si256());
4752}
4753
4754static __inline__ __m128i __DEFAULT_FN_ATTRS128
4755_mm_mask_sllv_epi32(__m128i __W__mmask8 __U__m128i __X__m128i __Y)
4756{
4757  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4758                                             (__v4si)_mm_sllv_epi32(__X__Y),
4759                                             (__v4si)__W);
4760}
4761
4762static __inline__ __m128i __DEFAULT_FN_ATTRS128
4763_mm_maskz_sllv_epi32(__mmask8 __U__m128i __X__m128i __Y)
4764{
4765  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4766                                             (__v4si)_mm_sllv_epi32(__X__Y),
4767                                             (__v4si)_mm_setzero_si128());
4768}
4769
4770static __inline__ __m256i __DEFAULT_FN_ATTRS256
4771_mm256_mask_sllv_epi32(__m256i __W__mmask8 __U__m256i __X__m256i __Y)
4772{
4773  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4774                                            (__v8si)_mm256_sllv_epi32(__X__Y),
4775                                            (__v8si)__W);
4776}
4777
4778static __inline__ __m256i __DEFAULT_FN_ATTRS256
4779_mm256_maskz_sllv_epi32(__mmask8 __U__m256i __X__m256i __Y)
4780{
4781  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4782                                            (__v8si)_mm256_sllv_epi32(__X__Y),
4783                                            (__v8si)_mm256_setzero_si256());
4784}
4785
4786static __inline__ __m128i __DEFAULT_FN_ATTRS128
4787_mm_mask_srlv_epi64(__m128i __W__mmask8 __U__m128i __X__m128i __Y)
4788{
4789  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4790                                             (__v2di)_mm_srlv_epi64(__X__Y),
4791                                             (__v2di)__W);
4792}
4793
4794static __inline__ __m128i __DEFAULT_FN_ATTRS128
4795_mm_maskz_srlv_epi64(__mmask8 __U__m128i __X__m128i __Y)
4796{
4797  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4798                                             (__v2di)_mm_srlv_epi64(__X__Y),
4799                                             (__v2di)_mm_setzero_si128());
4800}
4801
4802static __inline__ __m256i __DEFAULT_FN_ATTRS256
4803_mm256_mask_srlv_epi64(__m256i __W__mmask8 __U__m256i __X__m256i __Y)
4804{
4805  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4806                                            (__v4di)_mm256_srlv_epi64(__X__Y),
4807                                            (__v4di)__W);
4808}
4809
4810static __inline__ __m256i __DEFAULT_FN_ATTRS256
4811_mm256_maskz_srlv_epi64(__mmask8 __U__m256i __X__m256i __Y)
4812{
4813  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4814                                            (__v4di)_mm256_srlv_epi64(__X__Y),
4815                                            (__v4di)_mm256_setzero_si256());
4816}
4817
4818static __inline__ __m128i __DEFAULT_FN_ATTRS128
4819_mm_mask_srlv_epi32(__m128i __W__mmask8 __U__m128i __X__m128i __Y)
4820{
4821  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4822                                            (__v4si)_mm_srlv_epi32(__X__Y),
4823                                            (__v4si)__W);
4824}
4825
4826static __inline__ __m128i __DEFAULT_FN_ATTRS128
4827_mm_maskz_srlv_epi32(__mmask8 __U__m128i __X__m128i __Y)
4828{
4829  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4830                                            (__v4si)_mm_srlv_epi32(__X__Y),
4831                                            (__v4si)_mm_setzero_si128());
4832}
4833
4834static __inline__ __m256i __DEFAULT_FN_ATTRS256
4835_mm256_mask_srlv_epi32(__m256i __W__mmask8 __U__m256i __X__m256i __Y)
4836{
4837  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4838                                            (__v8si)_mm256_srlv_epi32(__X__Y),
4839                                            (__v8si)__W);
4840}
4841
4842static __inline__ __m256i __DEFAULT_FN_ATTRS256
4843_mm256_maskz_srlv_epi32(__mmask8 __U__m256i __X__m256i __Y)
4844{
4845  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4846                                            (__v8si)_mm256_srlv_epi32(__X__Y),
4847                                            (__v8si)_mm256_setzero_si256());
4848}
4849
4850static __inline__ __m128i __DEFAULT_FN_ATTRS128
4851_mm_mask_srl_epi32(__m128i __W__mmask8 __U__m128i __A__m128i __B)
4852{
4853  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4854                                             (__v4si)_mm_srl_epi32(__A__B),
4855                                             (__v4si)__W);
4856}
4857
4858static __inline__ __m128i __DEFAULT_FN_ATTRS128
4859_mm_maskz_srl_epi32(__mmask8 __U__m128i __A__m128i __B)
4860{
4861  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4862                                             (__v4si)_mm_srl_epi32(__A__B),
4863                                             (__v4si)_mm_setzero_si128());
4864}
4865
4866static __inline__ __m256i __DEFAULT_FN_ATTRS256
4867_mm256_mask_srl_epi32(__m256i __W__mmask8 __U__m256i __A__m128i __B)
4868{
4869  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4870                                             (__v8si)_mm256_srl_epi32(__A__B),
4871                                             (__v8si)__W);
4872}
4873
4874static __inline__ __m256i __DEFAULT_FN_ATTRS256
4875_mm256_maskz_srl_epi32(__mmask8 __U__m256i __A__m128i __B)
4876{
4877  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4878                                             (__v8si)_mm256_srl_epi32(__A__B),
4879                                             (__v8si)_mm256_setzero_si256());
4880}
4881
4882static __inline__ __m128i __DEFAULT_FN_ATTRS128
4883_mm_mask_srli_epi32(__m128i __W__mmask8 __U__m128i __Aint __B)
4884{
4885  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4886                                             (__v4si)_mm_srli_epi32(__A__B),
4887                                             (__v4si)__W);
4888}
4889
4890static __inline__ __m128i __DEFAULT_FN_ATTRS128
4891_mm_maskz_srli_epi32(__mmask8 __U__m128i __Aint __B)
4892{
4893  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4894                                             (__v4si)_mm_srli_epi32(__A__B),
4895                                             (__v4si)_mm_setzero_si128());
4896}
4897
4898static __inline__ __m256i __DEFAULT_FN_ATTRS256
4899_mm256_mask_srli_epi32(__m256i __W__mmask8 __U__m256i __Aint __B)
4900{
4901  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4902                                             (__v8si)_mm256_srli_epi32(__A__B),
4903                                             (__v8si)__W);
4904}
4905
4906static __inline__ __m256i __DEFAULT_FN_ATTRS256
4907_mm256_maskz_srli_epi32(__mmask8 __U__m256i __Aint __B)
4908{
4909  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4910                                             (__v8si)_mm256_srli_epi32(__A__B),
4911                                             (__v8si)_mm256_setzero_si256());
4912}
4913
4914static __inline__ __m128i __DEFAULT_FN_ATTRS128
4915_mm_mask_srl_epi64(__m128i __W__mmask8 __U__m128i __A__m128i __B)
4916{
4917  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4918                                             (__v2di)_mm_srl_epi64(__A__B),
4919                                             (__v2di)__W);
4920}
4921
4922static __inline__ __m128i __DEFAULT_FN_ATTRS128
4923_mm_maskz_srl_epi64(__mmask8 __U__m128i __A__m128i __B)
4924{
4925  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4926                                             (__v2di)_mm_srl_epi64(__A__B),
4927                                             (__v2di)_mm_setzero_si128());
4928}
4929
4930static __inline__ __m256i __DEFAULT_FN_ATTRS256
4931_mm256_mask_srl_epi64(__m256i __W__mmask8 __U__m256i __A__m128i __B)
4932{
4933  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4934                                             (__v4di)_mm256_srl_epi64(__A__B),
4935                                             (__v4di)__W);
4936}
4937
4938static __inline__ __m256i __DEFAULT_FN_ATTRS256
4939_mm256_maskz_srl_epi64(__mmask8 __U__m256i __A__m128i __B)
4940{
4941  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4942                                             (__v4di)_mm256_srl_epi64(__A__B),
4943                                             (__v4di)_mm256_setzero_si256());
4944}
4945
4946static __inline__ __m128i __DEFAULT_FN_ATTRS128
4947_mm_mask_srli_epi64(__m128i __W__mmask8 __U__m128i __Aint __B)
4948{
4949  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4950                                             (__v2di)_mm_srli_epi64(__A__B),
4951                                             (__v2di)__W);
4952}
4953
4954static __inline__ __m128i __DEFAULT_FN_ATTRS128
4955_mm_maskz_srli_epi64(__mmask8 __U__m128i __Aint __B)
4956{
4957  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4958                                             (__v2di)_mm_srli_epi64(__A__B),
4959                                             (__v2di)_mm_setzero_si128());
4960}
4961
4962static __inline__ __m256i __DEFAULT_FN_ATTRS256
4963_mm256_mask_srli_epi64(__m256i __W__mmask8 __U__m256i __Aint __B)
4964{
4965  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4966                                             (__v4di)_mm256_srli_epi64(__A__B),
4967                                             (__v4di)__W);
4968}
4969
4970static __inline__ __m256i __DEFAULT_FN_ATTRS256
4971_mm256_maskz_srli_epi64(__mmask8 __U__m256i __Aint __B)
4972{
4973  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4974                                             (__v4di)_mm256_srli_epi64(__A__B),
4975                                             (__v4di)_mm256_setzero_si256());
4976}
4977
4978static __inline__ __m128i __DEFAULT_FN_ATTRS128
4979_mm_mask_srav_epi32(__m128i __W__mmask8 __U__m128i __X__m128i __Y)
4980{
4981  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4982                                            (__v4si)_mm_srav_epi32(__X__Y),
4983                                            (__v4si)__W);
4984}
4985
4986static __inline__ __m128i __DEFAULT_FN_ATTRS128
4987_mm_maskz_srav_epi32(__mmask8 __U__m128i __X__m128i __Y)
4988{
4989  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4990                                            (__v4si)_mm_srav_epi32(__X__Y),
4991                                            (__v4si)_mm_setzero_si128());
4992}
4993
4994static __inline__ __m256i __DEFAULT_FN_ATTRS256
4995_mm256_mask_srav_epi32(__m256i __W__mmask8 __U__m256i __X__m256i __Y)
4996{
4997  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4998                                            (__v8si)_mm256_srav_epi32(__X__Y),
4999                                            (__v8si)__W);
5000}
5001
5002static __inline__ __m256i __DEFAULT_FN_ATTRS256
5003_mm256_maskz_srav_epi32(__mmask8 __U__m256i __X__m256i __Y)
5004{
5005  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5006                                            (__v8si)_mm256_srav_epi32(__X__Y),
5007                                            (__v8si)_mm256_setzero_si256());
5008}
5009
5010static __inline__ __m128i __DEFAULT_FN_ATTRS128
5011_mm_srav_epi64(__m128i __X__m128i __Y)
5012{
5013  return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5014}
5015
5016static __inline__ __m128i __DEFAULT_FN_ATTRS128
5017_mm_mask_srav_epi64(__m128i __W__mmask8 __U__m128i __X__m128i __Y)
5018{
5019  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5020                                             (__v2di)_mm_srav_epi64(__X__Y),
5021                                             (__v2di)__W);
5022}
5023
5024static __inline__ __m128i __DEFAULT_FN_ATTRS128
5025_mm_maskz_srav_epi64(__mmask8 __U__m128i __X__m128i __Y)
5026{
5027  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5028                                             (__v2di)_mm_srav_epi64(__X__Y),
5029                                             (__v2di)_mm_setzero_si128());
5030}
5031
5032static __inline__ __m256i __DEFAULT_FN_ATTRS256
5033_mm256_srav_epi64(__m256i __X__m256i __Y)
5034{
5035  return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di__Y);
5036}
5037
5038static __inline__ __m256i __DEFAULT_FN_ATTRS256
5039_mm256_mask_srav_epi64(__m256i __W__mmask8 __U__m256i __X__m256i __Y)
5040{
5041  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5042                                             (__v4di)_mm256_srav_epi64(__X__Y),
5043                                             (__v4di)__W);
5044}
5045
5046static __inline__ __m256i __DEFAULT_FN_ATTRS256
5047_mm256_maskz_srav_epi64 (__mmask8 __U__m256i __X__m256i __Y)
5048{
5049  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5050                                             (__v4di)_mm256_srav_epi64(__X__Y),
5051                                             (__v4di)_mm256_setzero_si256());
5052}
5053
5054static __inline__ __m128i __DEFAULT_FN_ATTRS128
5055_mm_mask_mov_epi32 (__m128i __W__mmask8 __U__m128i __A)
5056{
5057  return (__m128i__builtin_ia32_selectd_128 ((__mmask8__U,
5058                 (__v4si__A,
5059                 (__v4si__W);
5060}
5061
5062static __inline__ __m128i __DEFAULT_FN_ATTRS128
5063_mm_maskz_mov_epi32 (__mmask8 __U__m128i __A)
5064{
5065  return (__m128i__builtin_ia32_selectd_128 ((__mmask8__U,
5066                 (__v4si__A,
5067                 (__v4si_mm_setzero_si128 ());
5068}
5069
5070
5071static __inline__ __m256i __DEFAULT_FN_ATTRS256
5072_mm256_mask_mov_epi32 (__m256i __W__mmask8 __U__m256i __A)
5073{
5074  return (__m256i__builtin_ia32_selectd_256 ((__mmask8__U,
5075                 (__v8si__A,
5076                 (__v8si__W);
5077}
5078
5079static __inline__ __m256i __DEFAULT_FN_ATTRS256
5080_mm256_maskz_mov_epi32 (__mmask8 __U__m256i __A)
5081{
5082  return (__m256i__builtin_ia32_selectd_256 ((__mmask8__U,
5083                 (__v8si__A,
5084                 (__v8si_mm256_setzero_si256 ());
5085}
5086
5087static __inline __m128i __DEFAULT_FN_ATTRS128
5088_mm_load_epi32 (void const *__P)
5089{
5090  return *(__m128i *) __P;
5091}
5092
5093static __inline__ __m128i __DEFAULT_FN_ATTRS128
5094_mm_mask_load_epi32 (__m128i __W__mmask8 __Uvoid const *__P)
5095{
5096  return (__m128i__builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5097              (__v4si__W,
5098              (__mmask8)
5099              __U);
5100}
5101
5102static __inline__ __m128i __DEFAULT_FN_ATTRS128
5103_mm_maskz_load_epi32 (__mmask8 __Uvoid const *__P)
5104{
5105  return (__m128i__builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5106              (__v4si)
5107              _mm_setzero_si128 (),
5108              (__mmask8)
5109              __U);
5110}
5111
5112static __inline __m256i __DEFAULT_FN_ATTRS256
5113_mm256_load_epi32 (void const *__P)
5114{
5115  return *(__m256i *) __P;
5116}
5117
5118static __inline__ __m256i __DEFAULT_FN_ATTRS256
5119_mm256_mask_load_epi32 (__m256i __W__mmask8 __Uvoid const *__P)
5120{
5121  return (__m256i__builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5122              (__v8si__W,
5123              (__mmask8)
5124              __U);
5125}
5126
5127static __inline__ __m256i __DEFAULT_FN_ATTRS256
5128_mm256_maskz_load_epi32 (__mmask8 __Uvoid const *__P)
5129{
5130  return (__m256i__builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5131              (__v8si)
5132              _mm256_setzero_si256 (),
5133              (__mmask8)
5134              __U);
5135}
5136
5137static __inline void __DEFAULT_FN_ATTRS128
5138_mm_store_epi32 (void *__P__m128i __A)
5139{
5140  *(__m128i *) __P = __A;
5141}
5142
5143static __inline__ void __DEFAULT_FN_ATTRS128
5144_mm_mask_store_epi32 (void *__P__mmask8 __U__m128i __A)
5145{
5146  __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5147          (__v4si__A,
5148          (__mmask8__U);
5149}
5150
5151static __inline void __DEFAULT_FN_ATTRS256
5152_mm256_store_epi32 (void *__P__m256i __A)
5153{
5154  *(__m256i *) __P = __A;
5155}
5156
5157static __inline__ void __DEFAULT_FN_ATTRS256
5158_mm256_mask_store_epi32 (void *__P__mmask8 __U__m256i __A)
5159{
5160  __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5161          (__v8si__A,
5162          (__mmask8__U);
5163}
5164
5165static __inline__ __m128i __DEFAULT_FN_ATTRS128
5166_mm_mask_mov_epi64 (__m128i __W__mmask8 __U__m128i __A)
5167{
5168  return (__m128i__builtin_ia32_selectq_128 ((__mmask8__U,
5169                 (__v2di__A,
5170                 (__v2di__W);
5171}
5172
5173static __inline__ __m128i __DEFAULT_FN_ATTRS128
5174_mm_maskz_mov_epi64 (__mmask8 __U__m128i __A)
5175{
5176  return (__m128i__builtin_ia32_selectq_128 ((__mmask8__U,
5177                 (__v2di__A,
5178                 (__v2di_mm_setzero_si128 ());
5179}
5180
5181static __inline__ __m256i __DEFAULT_FN_ATTRS256
5182_mm256_mask_mov_epi64 (__m256i __W__mmask8 __U__m256i __A)
5183{
5184  return (__m256i__builtin_ia32_selectq_256 ((__mmask8__U,
5185                 (__v4di__A,
5186                 (__v4di__W);
5187}
5188
5189static __inline__ __m256i __DEFAULT_FN_ATTRS256
5190_mm256_maskz_mov_epi64 (__mmask8 __U__m256i __A)
5191{
5192  return (__m256i__builtin_ia32_selectq_256 ((__mmask8__U,
5193                 (__v4di__A,
5194                 (__v4di_mm256_setzero_si256 ());
5195}
5196
5197static __inline __m128i __DEFAULT_FN_ATTRS128
5198_mm_load_epi64 (void const *__P)
5199{
5200  return *(__m128i *) __P;
5201}
5202
5203static __inline__ __m128i __DEFAULT_FN_ATTRS128
5204_mm_mask_load_epi64 (__m128i __W__mmask8 __Uvoid const *__P)
5205{
5206  return (__m128i__builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5207              (__v2di__W,
5208              (__mmask8)
5209              __U);
5210}
5211
5212static __inline__ __m128i __DEFAULT_FN_ATTRS128
5213_mm_maskz_load_epi64 (__mmask8 __Uvoid const *__P)
5214{
5215  return (__m128i__builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5216              (__v2di)
5217              _mm_setzero_si128 (),
5218              (__mmask8)
5219              __U);
5220}
5221
5222static __inline __m256i __DEFAULT_FN_ATTRS256
5223_mm256_load_epi64 (void const *__P)
5224{
5225  return *(__m256i *) __P;
5226}
5227
5228static __inline__ __m256i __DEFAULT_FN_ATTRS256
5229_mm256_mask_load_epi64 (__m256i __W__mmask8 __Uvoid const *__P)
5230{
5231  return (__m256i__builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5232              (__v4di__W,
5233              (__mmask8)
5234              __U);
5235}
5236
5237static __inline__ __m256i __DEFAULT_FN_ATTRS256
5238_mm256_maskz_load_epi64 (__mmask8 __Uvoid const *__P)
5239{
5240  return (__m256i__builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5241              (__v4di)
5242              _mm256_setzero_si256 (),
5243              (__mmask8)
5244              __U);
5245}
5246
5247static __inline void __DEFAULT_FN_ATTRS128
5248_mm_store_epi64 (void *__P__m128i __A)
5249{
5250  *(__m128i *) __P = __A;
5251}
5252
5253static __inline__ void __DEFAULT_FN_ATTRS128
5254_mm_mask_store_epi64 (void *__P__mmask8 __U__m128i __A)
5255{
5256  __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5257          (__v2di__A,
5258          (__mmask8__U);
5259}
5260
5261static __inline void __DEFAULT_FN_ATTRS256
5262_mm256_store_epi64 (void *__P__m256i __A)
5263{
5264  *(__m256i *) __P = __A;
5265}
5266
5267static __inline__ void __DEFAULT_FN_ATTRS256
5268_mm256_mask_store_epi64 (void *__P__mmask8 __U__m256i __A)
5269{
5270  __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5271          (__v4di__A,
5272          (__mmask8__U);
5273}
5274
5275static __inline__ __m128d __DEFAULT_FN_ATTRS128
5276_mm_mask_movedup_pd (__m128d __W__mmask8 __U__m128d __A)
5277{
5278  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5279                                              (__v2df)_mm_movedup_pd(__A),
5280                                              (__v2df)__W);
5281}
5282
5283static __inline__ __m128d __DEFAULT_FN_ATTRS128
5284_mm_maskz_movedup_pd (__mmask8 __U__m128d __A)
5285{
5286  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5287                                              (__v2df)_mm_movedup_pd(__A),
5288                                              (__v2df)_mm_setzero_pd());
5289}
5290
5291static __inline__ __m256d __DEFAULT_FN_ATTRS256
5292_mm256_mask_movedup_pd (__m256d __W__mmask8 __U__m256d __A)
5293{
5294  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5295                                              (__v4df)_mm256_movedup_pd(__A),
5296                                              (__v4df)__W);
5297}
5298
5299static __inline__ __m256d __DEFAULT_FN_ATTRS256
5300_mm256_maskz_movedup_pd (__mmask8 __U__m256d __A)
5301{
5302  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5303                                              (__v4df)_mm256_movedup_pd(__A),
5304                                              (__v4df)_mm256_setzero_pd());
5305}
5306
5307static __inline__ __m128i __DEFAULT_FN_ATTRS128
5308_mm_mask_set1_epi32(__m128i __O__mmask8 __Mint __A)
5309{
5310   return (__m128i)__builtin_ia32_selectd_128(__M,
5311                                              (__v4si_mm_set1_epi32(__A),
5312                                              (__v4si)__O);
5313}
5314
5315static __inline__ __m128i __DEFAULT_FN_ATTRS128
5316_mm_maskz_set1_epi32__mmask8 __Mint __A)
5317{
5318   return (__m128i)__builtin_ia32_selectd_128(__M,
5319                                              (__v4si_mm_set1_epi32(__A),
5320                                              (__v4si)_mm_setzero_si128());
5321}
5322
5323static __inline__ __m256i __DEFAULT_FN_ATTRS256
5324_mm256_mask_set1_epi32(__m256i __O__mmask8 __Mint __A)
5325{
5326   return (__m256i)__builtin_ia32_selectd_256(__M,
5327                                              (__v8si_mm256_set1_epi32(__A),
5328                                              (__v8si)__O);
5329}
5330
5331static __inline__ __m256i __DEFAULT_FN_ATTRS256
5332_mm256_maskz_set1_epi32__mmask8 __Mint __A)
5333{
5334   return (__m256i)__builtin_ia32_selectd_256(__M,
5335                                              (__v8si_mm256_set1_epi32(__A),
5336                                              (__v8si)_mm256_setzero_si256());
5337}
5338
5339
5340static __inline__ __m128i __DEFAULT_FN_ATTRS128
5341_mm_mask_set1_epi64 (__m128i __O__mmask8 __Mlong long __A)
5342{
5343  return (__m128i__builtin_ia32_selectq_128(__M,
5344                                              (__v2di_mm_set1_epi64x(__A),
5345                                              (__v2di__O);
5346}
5347
5348static __inline__ __m128i __DEFAULT_FN_ATTRS128
5349_mm_maskz_set1_epi64 (__mmask8 __Mlong long __A)
5350{
5351  return (__m128i__builtin_ia32_selectq_128(__M,
5352                                              (__v2di_mm_set1_epi64x(__A),
5353                                              (__v2di_mm_setzero_si128());
5354}
5355
5356static __inline__ __m256i __DEFAULT_FN_ATTRS256
5357_mm256_mask_set1_epi64 (__m256i __O__mmask8 __Mlong long __A)
5358{
5359  return (__m256i__builtin_ia32_selectq_256(__M,
5360                                              (__v4di_mm256_set1_epi64x(__A),
5361                                              (__v4di__O) ;
5362}
5363
5364static __inline__ __m256i __DEFAULT_FN_ATTRS256
5365_mm256_maskz_set1_epi64 (__mmask8 __Mlong long __A)
5366{
5367   return (__m256i__builtin_ia32_selectq_256(__M,
5368                                               (__v4di_mm256_set1_epi64x(__A),
5369                                               (__v4di_mm256_setzero_si256());
5370}
5371
5372#define _mm_fixupimm_pd(A, B, C, imm) \
5373  (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5374                                             (__v2df)(__m128d)(B), \
5375                                             (__v2di)(__m128i)(C), (int)(imm), \
5376                                             (__mmask8)-1)
5377
5378#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5379  (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5380                                             (__v2df)(__m128d)(B), \
5381                                             (__v2di)(__m128i)(C), (int)(imm), \
5382                                             (__mmask8)(U))
5383
5384#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5385  (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5386                                              (__v2df)(__m128d)(B), \
5387                                              (__v2di)(__m128i)(C), \
5388                                              (int)(imm), (__mmask8)(U))
5389
5390#define _mm256_fixupimm_pd(A, B, C, imm) \
5391  (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5392                                             (__v4df)(__m256d)(B), \
5393                                             (__v4di)(__m256i)(C), (int)(imm), \
5394                                             (__mmask8)-1)
5395
5396#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5397  (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5398                                             (__v4df)(__m256d)(B), \
5399                                             (__v4di)(__m256i)(C), (int)(imm), \
5400                                             (__mmask8)(U))
5401
5402#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5403  (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5404                                              (__v4df)(__m256d)(B), \
5405                                              (__v4di)(__m256i)(C), \
5406                                              (int)(imm), (__mmask8)(U))
5407
5408#define _mm_fixupimm_ps(A, B, C, imm) \
5409  (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5410                                            (__v4sf)(__m128)(B), \
5411                                            (__v4si)(__m128i)(C), (int)(imm), \
5412                                            (__mmask8)-1)
5413
5414#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5415  (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5416                                            (__v4sf)(__m128)(B), \
5417                                            (__v4si)(__m128i)(C), (int)(imm), \
5418                                            (__mmask8)(U))
5419
5420#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5421  (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5422                                             (__v4sf)(__m128)(B), \
5423                                             (__v4si)(__m128i)(C), (int)(imm), \
5424                                             (__mmask8)(U))
5425
5426#define _mm256_fixupimm_ps(A, B, C, imm) \
5427  (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5428                                            (__v8sf)(__m256)(B), \
5429                                            (__v8si)(__m256i)(C), (int)(imm), \
5430                                            (__mmask8)-1)
5431
5432#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5433  (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5434                                            (__v8sf)(__m256)(B), \
5435                                            (__v8si)(__m256i)(C), (int)(imm), \
5436                                            (__mmask8)(U))
5437
5438#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5439  (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5440                                             (__v8sf)(__m256)(B), \
5441                                             (__v8si)(__m256i)(C), (int)(imm), \
5442                                             (__mmask8)(U))
5443
5444static __inline__ __m128d __DEFAULT_FN_ATTRS128
5445_mm_mask_load_pd (__m128d __W__mmask8 __Uvoid const *__P)
5446{
5447  return (__m128d__builtin_ia32_loadapd128_mask ((__v2df *) __P,
5448               (__v2df__W,
5449               (__mmask8__U);
5450}
5451
5452static __inline__ __m128d __DEFAULT_FN_ATTRS128
5453_mm_maskz_load_pd (__mmask8 __Uvoid const *__P)
5454{
5455  return (__m128d__builtin_ia32_loadapd128_mask ((__v2df *) __P,
5456               (__v2df)
5457               _mm_setzero_pd (),
5458               (__mmask8__U);
5459}
5460
5461static __inline__ __m256d __DEFAULT_FN_ATTRS256
5462_mm256_mask_load_pd (__m256d __W__mmask8 __Uvoid const *__P)
5463{
5464  return (__m256d__builtin_ia32_loadapd256_mask ((__v4df *) __P,
5465               (__v4df__W,
5466               (__mmask8__U);
5467}
5468
5469static __inline__ __m256d __DEFAULT_FN_ATTRS256
5470_mm256_maskz_load_pd (__mmask8 __Uvoid const *__P)
5471{
5472  return (__m256d__builtin_ia32_loadapd256_mask ((__v4df *) __P,
5473               (__v4df)
5474               _mm256_setzero_pd (),
5475               (__mmask8__U);
5476}
5477
5478static __inline__ __m128 __DEFAULT_FN_ATTRS128
5479_mm_mask_load_ps (__m128 __W__mmask8 __Uvoid const *__P)
5480{
5481  return (__m128__builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5482              (__v4sf__W,
5483              (__mmask8__U);
5484}
5485
5486static __inline__ __m128 __DEFAULT_FN_ATTRS128
5487_mm_maskz_load_ps (__mmask8 __Uvoid const *__P)
5488{
5489  return (__m128__builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5490              (__v4sf)
5491              _mm_setzero_ps (),
5492              (__mmask8__U);
5493}
5494
5495static __inline__ __m256 __DEFAULT_FN_ATTRS256
5496_mm256_mask_load_ps (__m256 __W__mmask8 __Uvoid const *__P)
5497{
5498  return (__m256__builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5499              (__v8sf__W,
5500              (__mmask8__U);
5501}
5502
5503static __inline__ __m256 __DEFAULT_FN_ATTRS256
5504_mm256_maskz_load_ps (__mmask8 __Uvoid const *__P)
5505{
5506  return (__m256__builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5507              (__v8sf)
5508              _mm256_setzero_ps (),
5509              (__mmask8__U);
5510}
5511
5512static __inline __m128i __DEFAULT_FN_ATTRS128
5513_mm_loadu_epi64 (void const *__P)
5514{
5515  struct __loadu_epi64 {
5516    __m128i_u __v;
5517  } __attribute__((__packed__, __may_alias__));
5518  return ((struct __loadu_epi64*)__P)->__v;
5519}
5520
5521static __inline__ __m128i __DEFAULT_FN_ATTRS128
5522_mm_mask_loadu_epi64 (__m128i __W__mmask8 __Uvoid const *__P)
5523{
5524  return (__m128i__builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5525                 (__v2di__W,
5526                 (__mmask8__U);
5527}
5528
5529static __inline__ __m128i __DEFAULT_FN_ATTRS128
5530_mm_maskz_loadu_epi64 (__mmask8 __Uvoid const *__P)
5531{
5532  return (__m128i__builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5533                 (__v2di)
5534                 _mm_setzero_si128 (),
5535                 (__mmask8__U);
5536}
5537
5538static __inline __m256i __DEFAULT_FN_ATTRS256
5539_mm256_loadu_epi64 (void const *__P)
5540{
5541  struct __loadu_epi64 {
5542    __m256i_u __v;
5543  } __attribute__((__packed__, __may_alias__));
5544  return ((struct __loadu_epi64*)__P)->__v;
5545}
5546
5547static __inline__ __m256i __DEFAULT_FN_ATTRS256
5548_mm256_mask_loadu_epi64 (__m256i __W__mmask8 __Uvoid const *__P)
5549{
5550  return (__m256i__builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5551                 (__v4di__W,
5552                 (__mmask8__U);
5553}
5554
5555static __inline__ __m256i __DEFAULT_FN_ATTRS256
5556_mm256_maskz_loadu_epi64 (__mmask8 __Uvoid const *__P)
5557{
5558  return (__m256i__builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5559                 (__v4di)
5560                 _mm256_setzero_si256 (),
5561                 (__mmask8__U);
5562}
5563
5564static __inline __m128i __DEFAULT_FN_ATTRS128
5565_mm_loadu_epi32 (void const *__P)
5566{
5567  struct __loadu_epi32 {
5568    __m128i_u __v;
5569  } __attribute__((__packed__, __may_alias__));
5570  return ((struct __loadu_epi32*)__P)->__v;
5571}
5572
5573static __inline__ __m128i __DEFAULT_FN_ATTRS128
5574_mm_mask_loadu_epi32 (__m128i __W__mmask8 __Uvoid const *__P)
5575{
5576  return (__m128i__builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5577                 (__v4si__W,
5578                 (__mmask8__U);
5579}
5580
5581static __inline__ __m128i __DEFAULT_FN_ATTRS128
5582_mm_maskz_loadu_epi32 (__mmask8 __Uvoid const *__P)
5583{
5584  return (__m128i__builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5585                 (__v4si)
5586                 _mm_setzero_si128 (),
5587                 (__mmask8__U);
5588}
5589
5590static __inline __m256i __DEFAULT_FN_ATTRS256
5591_mm256_loadu_epi32 (void const *__P)
5592{
5593  struct __loadu_epi32 {
5594    __m256i_u __v;
5595  } __attribute__((__packed__, __may_alias__));
5596  return ((struct __loadu_epi32*)__P)->__v;
5597}
5598
5599static __inline__ __m256i __DEFAULT_FN_ATTRS256
5600_mm256_mask_loadu_epi32 (__m256i __W__mmask8 __Uvoid const *__P)
5601{
5602  return (__m256i__builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5603                 (__v8si__W,
5604                 (__mmask8__U);
5605}
5606
5607static __inline__ __m256i __DEFAULT_FN_ATTRS256
5608_mm256_maskz_loadu_epi32 (__mmask8 __Uvoid const *__P)
5609{
5610  return (__m256i__builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5611                 (__v8si)
5612                 _mm256_setzero_si256 (),
5613                 (__mmask8__U);
5614}
5615
5616static __inline__ __m128d __DEFAULT_FN_ATTRS128
5617_mm_mask_loadu_pd (__m128d __W__mmask8 __Uvoid const *__P)
5618{
5619  return (__m128d__builtin_ia32_loadupd128_mask ((__v2df *) __P,
5620               (__v2df__W,
5621               (__mmask8__U);
5622}
5623
5624static __inline__ __m128d __DEFAULT_FN_ATTRS128
5625_mm_maskz_loadu_pd (__mmask8 __Uvoid const *__P)
5626{
5627  return (__m128d__builtin_ia32_loadupd128_mask ((__v2df *) __P,
5628               (__v2df)
5629               _mm_setzero_pd (),
5630               (__mmask8__U);
5631}
5632
5633static __inline__ __m256d __DEFAULT_FN_ATTRS256
5634_mm256_mask_loadu_pd (__m256d __W__mmask8 __Uvoid const *__P)
5635{
5636  return (__m256d__builtin_ia32_loadupd256_mask ((__v4df *) __P,
5637               (__v4df__W,
5638               (__mmask8__U);
5639}
5640
5641static __inline__ __m256d __DEFAULT_FN_ATTRS256
5642_mm256_maskz_loadu_pd (__mmask8 __Uvoid const *__P)
5643{
5644  return (__m256d__builtin_ia32_loadupd256_mask ((__v4df *) __P,
5645               (__v4df)
5646               _mm256_setzero_pd (),
5647               (__mmask8__U);
5648}
5649
5650static __inline__ __m128 __DEFAULT_FN_ATTRS128
5651_mm_mask_loadu_ps (__m128 __W__mmask8 __Uvoid const *__P)
5652{
5653  return (__m128__builtin_ia32_loadups128_mask ((__v4sf *) __P,
5654              (__v4sf__W,
5655              (__mmask8__U);
5656}
5657
5658static __inline__ __m128 __DEFAULT_FN_ATTRS128
5659_mm_maskz_loadu_ps (__mmask8 __Uvoid const *__P)
5660{
5661  return (__m128__builtin_ia32_loadups128_mask ((__v4sf *) __P,
5662              (__v4sf)
5663              _mm_setzero_ps (),
5664              (__mmask8__U);
5665}
5666
5667static __inline__ __m256 __DEFAULT_FN_ATTRS256
5668_mm256_mask_loadu_ps (__m256 __W__mmask8 __Uvoid const *__P)
5669{
5670  return (__m256__builtin_ia32_loadups256_mask ((__v8sf *) __P,
5671              (__v8sf__W,
5672              (__mmask8__U);
5673}
5674
5675static __inline__ __m256 __DEFAULT_FN_ATTRS256
5676_mm256_maskz_loadu_ps (__mmask8 __Uvoid const *__P)
5677{
5678  return (__m256__builtin_ia32_loadups256_mask ((__v8sf *) __P,
5679              (__v8sf)
5680              _mm256_setzero_ps (),
5681              (__mmask8__U);
5682}
5683
5684static __inline__ void __DEFAULT_FN_ATTRS128
5685_mm_mask_store_pd (void *__P__mmask8 __U__m128d __A)
5686{
5687  __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5688           (__v2df__A,
5689           (__mmask8__U);
5690}
5691
5692static __inline__ void __DEFAULT_FN_ATTRS256
5693_mm256_mask_store_pd (void *__P__mmask8 __U__m256d __A)
5694{
5695  __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5696           (__v4df__A,
5697           (__mmask8__U);
5698}
5699
5700static __inline__ void __DEFAULT_FN_ATTRS128
5701_mm_mask_store_ps (void *__P__mmask8 __U__m128 __A)
5702{
5703  __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5704           (__v4sf__A,
5705           (__mmask8__U);
5706}
5707
5708static __inline__ void __DEFAULT_FN_ATTRS256
5709_mm256_mask_store_ps (void *__P__mmask8 __U__m256 __A)
5710{
5711  __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5712           (__v8sf__A,
5713           (__mmask8__U);
5714}
5715
5716static __inline void __DEFAULT_FN_ATTRS128
5717_mm_storeu_epi64 (void *__P__m128i __A)
5718{
5719  struct __storeu_epi64 {
5720    __m128i_u __v;
5721  } __attribute__((__packed__, __may_alias__));
5722  ((struct __storeu_epi64*)__P)->__v = __A;
5723}
5724
5725static __inline__ void __DEFAULT_FN_ATTRS128
5726_mm_mask_storeu_epi64 (void *__P__mmask8 __U__m128i __A)
5727{
5728  __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5729             (__v2di__A,
5730             (__mmask8__U);
5731}
5732
5733static __inline void __DEFAULT_FN_ATTRS256
5734_mm256_storeu_epi64 (void *__P__m256i __A)
5735{
5736  struct __storeu_epi64 {
5737    __m256i_u __v;
5738  } __attribute__((__packed__, __may_alias__));
5739  ((struct __storeu_epi64*)__P)->__v = __A;
5740}
5741
5742static __inline__ void __DEFAULT_FN_ATTRS256
5743_mm256_mask_storeu_epi64 (void *__P__mmask8 __U__m256i __A)
5744{
5745  __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5746             (__v4di__A,
5747             (__mmask8__U);
5748}
5749
5750static __inline void __DEFAULT_FN_ATTRS128
5751_mm_storeu_epi32 (void *__P__m128i __A)
5752{
5753  struct __storeu_epi32 {
5754    __m128i_u __v;
5755  } __attribute__((__packed__, __may_alias__));
5756  ((struct __storeu_epi32*)__P)->__v = __A;
5757}
5758
5759static __inline__ void __DEFAULT_FN_ATTRS128
5760_mm_mask_storeu_epi32 (void *__P__mmask8 __U__m128i __A)
5761{
5762  __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5763             (__v4si__A,
5764             (__mmask8__U);
5765}
5766
5767static __inline void __DEFAULT_FN_ATTRS256
5768_mm256_storeu_epi32 (void *__P__m256i __A)
5769{
5770  struct __storeu_epi32 {
5771    __m256i_u __v;
5772  } __attribute__((__packed__, __may_alias__));
5773  ((struct __storeu_epi32*)__P)->__v = __A;
5774}
5775
5776static __inline__ void __DEFAULT_FN_ATTRS256
5777_mm256_mask_storeu_epi32 (void *__P__mmask8 __U__m256i __A)
5778{
5779  __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5780             (__v8si__A,
5781             (__mmask8__U);
5782}
5783
5784static __inline__ void __DEFAULT_FN_ATTRS128
5785_mm_mask_storeu_pd (void *__P__mmask8 __U__m128d __A)
5786{
5787  __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5788           (__v2df__A,
5789           (__mmask8__U);
5790}
5791
5792static __inline__ void __DEFAULT_FN_ATTRS256
5793_mm256_mask_storeu_pd (void *__P__mmask8 __U__m256d __A)
5794{
5795  __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5796           (__v4df__A,
5797           (__mmask8__U);
5798}
5799
5800static __inline__ void __DEFAULT_FN_ATTRS128
5801_mm_mask_storeu_ps (void *__P__mmask8 __U__m128 __A)
5802{
5803  __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5804           (__v4sf__A,
5805           (__mmask8__U);
5806}
5807
5808static __inline__ void __DEFAULT_FN_ATTRS256
5809_mm256_mask_storeu_ps (void *__P__mmask8 __U__m256 __A)
5810{
5811  __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5812           (__v8sf__A,
5813           (__mmask8__U);
5814}
5815
5816
5817static __inline__ __m128d __DEFAULT_FN_ATTRS128
5818_mm_mask_unpackhi_pd(__m128d __W__mmask8 __U__m128d __A__m128d __B)
5819{
5820  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5821                                              (__v2df)_mm_unpackhi_pd(__A__B),
5822                                              (__v2df)__W);
5823}
5824
5825static __inline__ __m128d __DEFAULT_FN_ATTRS128
5826_mm_maskz_unpackhi_pd(__mmask8 __U__m128d __A__m128d __B)
5827{
5828  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5829                                              (__v2df)_mm_unpackhi_pd(__A__B),
5830                                              (__v2df)_mm_setzero_pd());
5831}
5832
5833static __inline__ __m256d __DEFAULT_FN_ATTRS256
5834_mm256_mask_unpackhi_pd(__m256d __W__mmask8 __U__m256d __A__m256d __B)
5835{
5836  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5837                                           (__v4df)_mm256_unpackhi_pd(__A__B),
5838                                           (__v4df)__W);
5839}
5840
5841static __inline__ __m256d __DEFAULT_FN_ATTRS256
5842_mm256_maskz_unpackhi_pd(__mmask8 __U__m256d __A__m256d __B)
5843{
5844  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5845                                           (__v4df)_mm256_unpackhi_pd(__A__B),
5846                                           (__v4df)_mm256_setzero_pd());
5847}
5848
5849static __inline__ __m128 __DEFAULT_FN_ATTRS128
5850_mm_mask_unpackhi_ps(__m128 __W__mmask8 __U__m128 __A__m128 __B)
5851{
5852  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5853                                             (__v4sf)_mm_unpackhi_ps(__A__B),
5854                                             (__v4sf)__W);
5855}
5856
5857static __inline__ __m128 __DEFAULT_FN_ATTRS128
5858_mm_maskz_unpackhi_ps(__mmask8 __U__m128 __A__m128 __B)
5859{
5860  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5861                                             (__v4sf)_mm_unpackhi_ps(__A__B),
5862                                             (__v4sf)_mm_setzero_ps());
5863}
5864
5865static __inline__ __m256 __DEFAULT_FN_ATTRS256
5866_mm256_mask_unpackhi_ps(__m256 __W__mmask8 __U__m256 __A__m256 __B)
5867{
5868  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5869                                           (__v8sf)_mm256_unpackhi_ps(__A__B),
5870                                           (__v8sf)__W);
5871}
5872
5873static __inline__ __m256 __DEFAULT_FN_ATTRS256
5874_mm256_maskz_unpackhi_ps(__mmask8 __U__m256 __A__m256 __B)
5875{
5876  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5877                                           (__v8sf)_mm256_unpackhi_ps(__A__B),
5878                                           (__v8sf)_mm256_setzero_ps());
5879}
5880
5881static __inline__ __m128d __DEFAULT_FN_ATTRS128
5882_mm_mask_unpacklo_pd(__m128d __W__mmask8 __U__m128d __A__m128d __B)
5883{
5884  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5885                                              (__v2df)_mm_unpacklo_pd(__A__B),
5886                                              (__v2df)__W);
5887}
5888
5889static __inline__ __m128d __DEFAULT_FN_ATTRS128
5890_mm_maskz_unpacklo_pd(__mmask8 __U__m128d __A__m128d __B)
5891{
5892  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5893                                              (__v2df)_mm_unpacklo_pd(__A__B),
5894                                              (__v2df)_mm_setzero_pd());
5895}
5896
5897static __inline__ __m256d __DEFAULT_FN_ATTRS256
5898_mm256_mask_unpacklo_pd(__m256d __W__mmask8 __U__m256d __A__m256d __B)
5899{
5900  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5901                                           (__v4df)_mm256_unpacklo_pd(__A__B),
5902                                           (__v4df)__W);
5903}
5904
5905static __inline__ __m256d __DEFAULT_FN_ATTRS256
5906_mm256_maskz_unpacklo_pd(__mmask8 __U__m256d __A__m256d __B)
5907{
5908  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5909                                           (__v4df)_mm256_unpacklo_pd(__A__B),
5910                                           (__v4df)_mm256_setzero_pd());
5911}
5912
5913static __inline__ __m128 __DEFAULT_FN_ATTRS128
5914_mm_mask_unpacklo_ps(__m128 __W__mmask8 __U__m128 __A__m128 __B)
5915{
5916  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5917                                             (__v4sf)_mm_unpacklo_ps(__A__B),
5918                                             (__v4sf)__W);
5919}
5920
5921static __inline__ __m128 __DEFAULT_FN_ATTRS128
5922_mm_maskz_unpacklo_ps(__mmask8 __U__m128 __A__m128 __B)
5923{
5924  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5925                                             (__v4sf)_mm_unpacklo_ps(__A__B),
5926                                             (__v4sf)_mm_setzero_ps());
5927}
5928
5929static __inline__ __m256 __DEFAULT_FN_ATTRS256
5930_mm256_mask_unpacklo_ps(__m256 __W__mmask8 __U__m256 __A__m256 __B)
5931{
5932  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5933                                           (__v8sf)_mm256_unpacklo_ps(__A__B),
5934                                           (__v8sf)__W);
5935}
5936
5937static __inline__ __m256 __DEFAULT_FN_ATTRS256
5938_mm256_maskz_unpacklo_ps(__mmask8 __U__m256 __A__m256 __B)
5939{
5940  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5941                                           (__v8sf)_mm256_unpacklo_ps(__A__B),
5942                                           (__v8sf)_mm256_setzero_ps());
5943}
5944
5945static __inline__ __m128d __DEFAULT_FN_ATTRS128
5946_mm_rcp14_pd (__m128d __A)
5947{
5948  return (__m128d__builtin_ia32_rcp14pd128_mask ((__v2df__A,
5949                (__v2df)
5950                _mm_setzero_pd (),
5951                (__mmask8) -1);
5952}
5953
5954static __inline__ __m128d __DEFAULT_FN_ATTRS128
5955_mm_mask_rcp14_pd (__m128d __W__mmask8 __U__m128d __A)
5956{
5957  return (__m128d__builtin_ia32_rcp14pd128_mask ((__v2df__A,
5958                (__v2df__W,
5959                (__mmask8__U);
5960}
5961
5962static __inline__ __m128d __DEFAULT_FN_ATTRS128
5963_mm_maskz_rcp14_pd (__mmask8 __U__m128d __A)
5964{
5965  return (__m128d__builtin_ia32_rcp14pd128_mask ((__v2df__A,
5966                (__v2df)
5967                _mm_setzero_pd (),
5968                (__mmask8__U);
5969}
5970
5971static __inline__ __m256d __DEFAULT_FN_ATTRS256
5972_mm256_rcp14_pd (__m256d __A)
5973{
5974  return (__m256d__builtin_ia32_rcp14pd256_mask ((__v4df__A,
5975                (__v4df)
5976                _mm256_setzero_pd (),
5977                (__mmask8) -1);
5978}
5979
5980static __inline__ __m256d __DEFAULT_FN_ATTRS256
5981_mm256_mask_rcp14_pd (__m256d __W__mmask8 __U__m256d __A)
5982{
5983  return (__m256d__builtin_ia32_rcp14pd256_mask ((__v4df__A,
5984                (__v4df__W,
5985                (__mmask8__U);
5986}
5987
5988static __inline__ __m256d __DEFAULT_FN_ATTRS256
5989_mm256_maskz_rcp14_pd (__mmask8 __U__m256d __A)
5990{
5991  return (__m256d__builtin_ia32_rcp14pd256_mask ((__v4df__A,
5992                (__v4df)
5993                _mm256_setzero_pd (),
5994                (__mmask8__U);
5995}
5996
5997static __inline__ __m128 __DEFAULT_FN_ATTRS128
5998_mm_rcp14_ps (__m128 __A)
5999{
6000  return (__m128__builtin_ia32_rcp14ps128_mask ((__v4sf__A,
6001               (__v4sf)
6002               _mm_setzero_ps (),
6003               (__mmask8) -1);
6004}
6005
6006static __inline__ __m128 __DEFAULT_FN_ATTRS128
6007_mm_mask_rcp14_ps (__m128 __W__mmask8 __U__m128 __A)
6008{
6009  return (__m128__builtin_ia32_rcp14ps128_mask ((__v4sf__A,
6010               (__v4sf__W,
6011               (__mmask8__U);
6012}
6013
6014static __inline__ __m128 __DEFAULT_FN_ATTRS128
6015_mm_maskz_rcp14_ps (__mmask8 __U__m128 __A)
6016{
6017  return (__m128__builtin_ia32_rcp14ps128_mask ((__v4sf__A,
6018               (__v4sf)
6019               _mm_setzero_ps (),
6020               (__mmask8__U);
6021}
6022
6023static __inline__ __m256 __DEFAULT_FN_ATTRS256
6024_mm256_rcp14_ps (__m256 __A)
6025{
6026  return (__m256__builtin_ia32_rcp14ps256_mask ((__v8sf__A,
6027               (__v8sf)
6028               _mm256_setzero_ps (),
6029               (__mmask8) -1);
6030}
6031
6032static __inline__ __m256 __DEFAULT_FN_ATTRS256
6033_mm256_mask_rcp14_ps (__m256 __W__mmask8 __U__m256 __A)
6034{
6035  return (__m256__builtin_ia32_rcp14ps256_mask ((__v8sf__A,
6036               (__v8sf__W,
6037               (__mmask8__U);
6038}
6039
6040static __inline__ __m256 __DEFAULT_FN_ATTRS256
6041_mm256_maskz_rcp14_ps (__mmask8 __U__m256 __A)
6042{
6043  return (__m256__builtin_ia32_rcp14ps256_mask ((__v8sf__A,
6044               (__v8sf)
6045               _mm256_setzero_ps (),
6046               (__mmask8__U);
6047}
6048
6049#define _mm_mask_permute_pd(W, U, X, C) \
6050  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6051                                       (__v2df)_mm_permute_pd((X), (C)), \
6052                                       (__v2df)(__m128d)(W))
6053
6054#define _mm_maskz_permute_pd(U, X, C) \
6055  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6056                                       (__v2df)_mm_permute_pd((X), (C)), \
6057                                       (__v2df)_mm_setzero_pd())
6058
6059#define _mm256_mask_permute_pd(W, U, X, C) \
6060  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6061                                       (__v4df)_mm256_permute_pd((X), (C)), \
6062                                       (__v4df)(__m256d)(W))
6063
6064#define _mm256_maskz_permute_pd(U, X, C) \
6065  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6066                                       (__v4df)_mm256_permute_pd((X), (C)), \
6067                                       (__v4df)_mm256_setzero_pd())
6068
6069#define _mm_mask_permute_ps(W, U, X, C) \
6070  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6071                                      (__v4sf)_mm_permute_ps((X), (C)), \
6072                                      (__v4sf)(__m128)(W))
6073
6074#define _mm_maskz_permute_ps(U, X, C) \
6075  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6076                                      (__v4sf)_mm_permute_ps((X), (C)), \
6077                                      (__v4sf)_mm_setzero_ps())
6078
6079#define _mm256_mask_permute_ps(W, U, X, C) \
6080  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6081                                      (__v8sf)_mm256_permute_ps((X), (C)), \
6082                                      (__v8sf)(__m256)(W))
6083
6084#define _mm256_maskz_permute_ps(U, X, C) \
6085  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6086                                      (__v8sf)_mm256_permute_ps((X), (C)), \
6087                                      (__v8sf)_mm256_setzero_ps())
6088
6089static __inline__ __m128d __DEFAULT_FN_ATTRS128
6090_mm_mask_permutevar_pd(__m128d __W__mmask8 __U__m128d __A__m128i __C)
6091{
6092  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6093                                            (__v2df)_mm_permutevar_pd(__A__C),
6094                                            (__v2df)__W);
6095}
6096
6097static __inline__ __m128d __DEFAULT_FN_ATTRS128
6098_mm_maskz_permutevar_pd(__mmask8 __U__m128d __A__m128i __C)
6099{
6100  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6101                                            (__v2df)_mm_permutevar_pd(__A__C),
6102                                            (__v2df)_mm_setzero_pd());
6103}
6104
6105static __inline__ __m256d __DEFAULT_FN_ATTRS256
6106_mm256_mask_permutevar_pd(__m256d __W__mmask8 __U__m256d __A__m256i __C)
6107{
6108  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6109                                         (__v4df)_mm256_permutevar_pd(__A__C),
6110                                         (__v4df)__W);
6111}
6112
6113static __inline__ __m256d __DEFAULT_FN_ATTRS256
6114_mm256_maskz_permutevar_pd(__mmask8 __U__m256d __A__m256i __C)
6115{
6116  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6117                                         (__v4df)_mm256_permutevar_pd(__A__C),
6118                                         (__v4df)_mm256_setzero_pd());
6119}
6120
6121static __inline__ __m128 __DEFAULT_FN_ATTRS128
6122_mm_mask_permutevar_ps(__m128 __W__mmask8 __U__m128 __A__m128i __C)
6123{
6124  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6125                                            (__v4sf)_mm_permutevar_ps(__A__C),
6126                                            (__v4sf)__W);
6127}
6128
6129static __inline__ __m128 __DEFAULT_FN_ATTRS128
6130_mm_maskz_permutevar_ps(__mmask8 __U__m128 __A__m128i __C)
6131{
6132  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6133                                            (__v4sf)_mm_permutevar_ps(__A__C),
6134                                            (__v4sf)_mm_setzero_ps());
6135}
6136
6137static __inline__ __m256 __DEFAULT_FN_ATTRS256
6138_mm256_mask_permutevar_ps(__m256 __W__mmask8 __U__m256 __A__m256i __C)
6139{
6140  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6141                                          (__v8sf)_mm256_permutevar_ps(__A__C),
6142                                          (__v8sf)__W);
6143}
6144
6145static __inline__ __m256 __DEFAULT_FN_ATTRS256
6146_mm256_maskz_permutevar_ps(__mmask8 __U__m256 __A__m256i __C)
6147{
6148  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6149                                          (__v8sf)_mm256_permutevar_ps(__A__C),
6150                                          (__v8sf)_mm256_setzero_ps());
6151}
6152
6153static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6154_mm_test_epi32_mask (__m128i __A__m128i __B)
6155{
6156  return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6157}
6158
6159static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6160_mm_mask_test_epi32_mask (__mmask8 __U__m128i __A__m128i __B)
6161{
6162  return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6163                                     _mm_setzero_si128());
6164}
6165
6166static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6167_mm256_test_epi32_mask (__m256i __A__m256i __B)
6168{
6169  return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
6170                                   _mm256_setzero_si256());
6171}
6172
6173static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6174_mm256_mask_test_epi32_mask (__mmask8 __U__m256i __A__m256i __B)
6175{
6176  return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6177                                        _mm256_setzero_si256());
6178}
6179
6180static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6181_mm_test_epi64_mask (__m128i __A__m128i __B)
6182{
6183  return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6184}
6185
6186static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6187_mm_mask_test_epi64_mask (__mmask8 __U__m128i __A__m128i __B)
6188{
6189  return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6190                                     _mm_setzero_si128());
6191}
6192
6193static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6194_mm256_test_epi64_mask (__m256i __A__m256i __B)
6195{
6196  return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
6197                                   _mm256_setzero_si256());
6198}
6199
6200static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6201_mm256_mask_test_epi64_mask (__mmask8 __U__m256i __A__m256i __B)
6202{
6203  return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6204                                        _mm256_setzero_si256());
6205}
6206
6207static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6208_mm_testn_epi32_mask (__m128i __A__m128i __B)
6209{
6210  return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6211}
6212
6213static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6214_mm_mask_testn_epi32_mask (__mmask8 __U__m128i __A__m128i __B)
6215{
6216  return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6217                                    _mm_setzero_si128());
6218}
6219
6220static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6221_mm256_testn_epi32_mask (__m256i __A__m256i __B)
6222{
6223  return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
6224                                  _mm256_setzero_si256());
6225}
6226
6227static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6228_mm256_mask_testn_epi32_mask (__mmask8 __U__m256i __A__m256i __B)
6229{
6230  return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6231                                       _mm256_setzero_si256());
6232}
6233
6234static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6235_mm_testn_epi64_mask (__m128i __A__m128i __B)
6236{
6237  return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6238}
6239
6240static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6241_mm_mask_testn_epi64_mask (__mmask8 __U__m128i __A__m128i __B)
6242{
6243  return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6244                                    _mm_setzero_si128());
6245}
6246
6247static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6248_mm256_testn_epi64_mask (__m256i __A__m256i __B)
6249{
6250  return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6251                                  _mm256_setzero_si256());
6252}
6253
6254static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6255_mm256_mask_testn_epi64_mask (__mmask8 __U__m256i __A__m256i __B)
6256{
6257  return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6258                                       _mm256_setzero_si256());
6259}
6260
6261static __inline__ __m128i __DEFAULT_FN_ATTRS128
6262_mm_mask_unpackhi_epi32(__m128i __W__mmask8 __U__m128i __A__m128i __B)
6263{
6264  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6265                                           (__v4si)_mm_unpackhi_epi32(__A__B),
6266                                           (__v4si)__W);
6267}
6268
6269static __inline__ __m128i __DEFAULT_FN_ATTRS128
6270_mm_maskz_unpackhi_epi32(__mmask8 __U__m128i __A__m128i __B)
6271{
6272  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6273                                           (__v4si)_mm_unpackhi_epi32(__A__B),
6274                                           (__v4si)_mm_setzero_si128());
6275}
6276
6277static __inline__ __m256i __DEFAULT_FN_ATTRS256
6278_mm256_mask_unpackhi_epi32(__m256i __W__mmask8 __U__m256i __A__m256i __B)
6279{
6280  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6281                                        (__v8si)_mm256_unpackhi_epi32(__A__B),
6282                                        (__v8si)__W);
6283}
6284
6285static __inline__ __m256i __DEFAULT_FN_ATTRS256
6286_mm256_maskz_unpackhi_epi32(__mmask8 __U__m256i __A__m256i __B)
6287{
6288  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6289                                        (__v8si)_mm256_unpackhi_epi32(__A__B),
6290                                        (__v8si)_mm256_setzero_si256());
6291}
6292
6293static __inline__ __m128i __DEFAULT_FN_ATTRS128
6294_mm_mask_unpackhi_epi64(__m128i __W__mmask8 __U__m128i __A__m128i __B)
6295{
6296  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6297                                           (__v2di)_mm_unpackhi_epi64(__A__B),
6298                                           (__v2di)__W);
6299}
6300
6301static __inline__ __m128i __DEFAULT_FN_ATTRS128
6302_mm_maskz_unpackhi_epi64(__mmask8 __U__m128i __A__m128i __B)
6303{
6304  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6305                                           (__v2di)_mm_unpackhi_epi64(__A__B),
6306                                           (__v2di)_mm_setzero_si128());
6307}
6308
6309static __inline__ __m256i __DEFAULT_FN_ATTRS256
6310_mm256_mask_unpackhi_epi64(__m256i __W__mmask8 __U__m256i __A__m256i __B)
6311{
6312  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6313                                        (__v4di)_mm256_unpackhi_epi64(__A__B),
6314                                        (__v4di)__W);
6315}
6316
6317static __inline__ __m256i __DEFAULT_FN_ATTRS256
6318_mm256_maskz_unpackhi_epi64(__mmask8 __U__m256i __A__m256i __B)
6319{
6320  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6321                                        (__v4di)_mm256_unpackhi_epi64(__A__B),
6322                                        (__v4di)_mm256_setzero_si256());
6323}
6324
6325static __inline__ __m128i __DEFAULT_FN_ATTRS128
6326_mm_mask_unpacklo_epi32(__m128i __W__mmask8 __U__m128i __A__m128i __B)
6327{
6328  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6329                                           (__v4si)_mm_unpacklo_epi32(__A__B),
6330                                           (__v4si)__W);
6331}
6332
6333static __inline__ __m128i __DEFAULT_FN_ATTRS128
6334_mm_maskz_unpacklo_epi32(__mmask8 __U__m128i __A__m128i __B)
6335{
6336  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6337                                           (__v4si)_mm_unpacklo_epi32(__A__B),
6338                                           (__v4si)_mm_setzero_si128());
6339}
6340
6341static __inline__ __m256i __DEFAULT_FN_ATTRS256
6342_mm256_mask_unpacklo_epi32(__m256i __W__mmask8 __U__m256i __A__m256i __B)
6343{
6344  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6345                                        (__v8si)_mm256_unpacklo_epi32(__A__B),
6346                                        (__v8si)__W);
6347}
6348
6349static __inline__ __m256i __DEFAULT_FN_ATTRS256
6350_mm256_maskz_unpacklo_epi32(__mmask8 __U__m256i __A__m256i __B)
6351{
6352  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6353                                        (__v8si)_mm256_unpacklo_epi32(__A__B),
6354                                        (__v8si)_mm256_setzero_si256());
6355}
6356
6357static __inline__ __m128i __DEFAULT_FN_ATTRS128
6358_mm_mask_unpacklo_epi64(__m128i __W__mmask8 __U__m128i __A__m128i __B)
6359{
6360  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6361                                           (__v2di)_mm_unpacklo_epi64(__A__B),
6362                                           (__v2di)__W);
6363}
6364
6365static __inline__ __m128i __DEFAULT_FN_ATTRS128
6366_mm_maskz_unpacklo_epi64(__mmask8 __U__m128i __A__m128i __B)
6367{
6368  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6369                                           (__v2di)_mm_unpacklo_epi64(__A__B),
6370                                           (__v2di)_mm_setzero_si128());
6371}
6372
6373static __inline__ __m256i __DEFAULT_FN_ATTRS256
6374_mm256_mask_unpacklo_epi64(__m256i __W__mmask8 __U__m256i __A__m256i __B)
6375{
6376  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6377                                        (__v4di)_mm256_unpacklo_epi64(__A__B),
6378                                        (__v4di)__W);
6379}
6380
6381static __inline__ __m256i __DEFAULT_FN_ATTRS256
6382_mm256_maskz_unpacklo_epi64(__mmask8 __U__m256i __A__m256i __B)
6383{
6384  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6385                                        (__v4di)_mm256_unpacklo_epi64(__A__B),
6386                                        (__v4di)_mm256_setzero_si256());
6387}
6388
6389static __inline__ __m128i __DEFAULT_FN_ATTRS128
6390_mm_mask_sra_epi32(__m128i __W__mmask8 __U__m128i __A__m128i __B)
6391{
6392  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6393                                             (__v4si)_mm_sra_epi32(__A__B),
6394                                             (__v4si)__W);
6395}
6396
6397static __inline__ __m128i __DEFAULT_FN_ATTRS128
6398_mm_maskz_sra_epi32(__mmask8 __U__m128i __A__m128i __B)
6399{
6400  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6401                                             (__v4si)_mm_sra_epi32(__A__B),
6402                                             (__v4si)_mm_setzero_si128());
6403}
6404
6405static __inline__ __m256i __DEFAULT_FN_ATTRS256
6406_mm256_mask_sra_epi32(__m256i __W__mmask8 __U__m256i __A__m128i __B)
6407{
6408  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6409                                             (__v8si)_mm256_sra_epi32(__A__B),
6410                                             (__v8si)__W);
6411}
6412
6413static __inline__ __m256i __DEFAULT_FN_ATTRS256
6414_mm256_maskz_sra_epi32(__mmask8 __U__m256i __A__m128i __B)
6415{
6416  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6417                                             (__v8si)_mm256_sra_epi32(__A__B),
6418                                             (__v8si)_mm256_setzero_si256());
6419}
6420
6421static __inline__ __m128i __DEFAULT_FN_ATTRS128
6422_mm_mask_srai_epi32(__m128i __W__mmask8 __U__m128i __Aint __B)
6423{
6424  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6425                                             (__v4si)_mm_srai_epi32(__A__B),
6426                                             (__v4si)__W);
6427}
6428
6429static __inline__ __m128i __DEFAULT_FN_ATTRS128
6430_mm_maskz_srai_epi32(__mmask8 __U__m128i __Aint __B)
6431{
6432  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6433                                             (__v4si)_mm_srai_epi32(__A__B),
6434                                             (__v4si)_mm_setzero_si128());
6435}
6436
6437static __inline__ __m256i __DEFAULT_FN_ATTRS256
6438_mm256_mask_srai_epi32(__m256i __W__mmask8 __U__m256i __Aint __B)
6439{
6440  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6441                                             (__v8si)_mm256_srai_epi32(__A__B),
6442                                             (__v8si)__W);
6443}
6444
6445static __inline__ __m256i __DEFAULT_FN_ATTRS256
6446_mm256_maskz_srai_epi32(__mmask8 __U__m256i __Aint __B)
6447{
6448  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6449                                             (__v8si)_mm256_srai_epi32(__A__B),
6450                                             (__v8si)_mm256_setzero_si256());
6451}
6452
6453static __inline__ __m128i __DEFAULT_FN_ATTRS128
6454_mm_sra_epi64(__m128i __A__m128i __B)
6455{
6456  return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6457}
6458
6459static __inline__ __m128i __DEFAULT_FN_ATTRS128
6460_mm_mask_sra_epi64(__m128i __W__mmask8 __U__m128i __A__m128i __B)
6461{
6462  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6463                                             (__v2di)_mm_sra_epi64(__A__B), \
6464                                             (__v2di)__W);
6465}
6466
6467static __inline__ __m128i __DEFAULT_FN_ATTRS128
6468_mm_maskz_sra_epi64(__mmask8 __U__m128i __A__m128i __B)
6469{
6470  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6471                                             (__v2di)_mm_sra_epi64(__A__B), \
6472                                             (__v2di)_mm_setzero_si128());
6473}
6474
6475static __inline__ __m256i __DEFAULT_FN_ATTRS256
6476_mm256_sra_epi64(__m256i __A__m128i __B)
6477{
6478  return (__m256i)__builtin_ia32_psraq256((__v4di__A, (__v2di__B);
6479}
6480
6481static __inline__ __m256i __DEFAULT_FN_ATTRS256
6482_mm256_mask_sra_epi64(__m256i __W__mmask8 __U__m256i __A__m128i __B)
6483{
6484  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6485                                           (__v4di)_mm256_sra_epi64(__A__B), \
6486                                           (__v4di)__W);
6487}
6488
6489static __inline__ __m256i __DEFAULT_FN_ATTRS256
6490_mm256_maskz_sra_epi64(__mmask8 __U__m256i __A__m128i __B)
6491{
6492  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6493                                           (__v4di)_mm256_sra_epi64(__A__B), \
6494                                           (__v4di)_mm256_setzero_si256());
6495}
6496
6497static __inline__ __m128i __DEFAULT_FN_ATTRS128
6498_mm_srai_epi64(__m128i __Aint __imm)
6499{
6500  return (__m128i)__builtin_ia32_psraqi128((__v2di)__A__imm);
6501}
6502
6503static __inline__ __m128i __DEFAULT_FN_ATTRS128
6504_mm_mask_srai_epi64(__m128i __W__mmask8 __U__m128i __Aint __imm)
6505{
6506  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6507                                           (__v2di)_mm_srai_epi64(__A__imm), \
6508                                           (__v2di)__W);
6509}
6510
6511static __inline__ __m128i __DEFAULT_FN_ATTRS128
6512_mm_maskz_srai_epi64(__mmask8 __U__m128i __Aint __imm)
6513{
6514  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6515                                           (__v2di)_mm_srai_epi64(__A__imm), \
6516                                           (__v2di)_mm_setzero_si128());
6517}
6518
6519static __inline__ __m256i __DEFAULT_FN_ATTRS256
6520_mm256_srai_epi64(__m256i __Aint __imm)
6521{
6522  return (__m256i)__builtin_ia32_psraqi256((__v4di)__A__imm);
6523}
6524
6525static __inline__ __m256i __DEFAULT_FN_ATTRS256
6526_mm256_mask_srai_epi64(__m256i __W__mmask8 __U__m256i __Aint __imm)
6527{
6528  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6529                                        (__v4di)_mm256_srai_epi64(__A__imm), \
6530                                        (__v4di)__W);
6531}
6532
6533static __inline__ __m256i __DEFAULT_FN_ATTRS256
6534_mm256_maskz_srai_epi64(__mmask8 __U__m256i __Aint __imm)
6535{
6536  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6537                                        (__v4di)_mm256_srai_epi64(__A__imm), \
6538                                        (__v4di)_mm256_setzero_si256());
6539}
6540
6541#define _mm_ternarylogic_epi32(A, B, C, imm) \
6542  (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6543                                            (__v4si)(__m128i)(B), \
6544                                            (__v4si)(__m128i)(C), (int)(imm), \
6545                                            (__mmask8)-1)
6546
6547#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6548  (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6549                                            (__v4si)(__m128i)(B), \
6550                                            (__v4si)(__m128i)(C), (int)(imm), \
6551                                            (__mmask8)(U))
6552
6553#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6554  (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
6555                                             (__v4si)(__m128i)(B), \
6556                                             (__v4si)(__m128i)(C), (int)(imm), \
6557                                             (__mmask8)(U))
6558
6559#define _mm256_ternarylogic_epi32(A, B, C, imm) \
6560  (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6561                                            (__v8si)(__m256i)(B), \
6562                                            (__v8si)(__m256i)(C), (int)(imm), \
6563                                            (__mmask8)-1)
6564
6565#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6566  (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6567                                            (__v8si)(__m256i)(B), \
6568                                            (__v8si)(__m256i)(C), (int)(imm), \
6569                                            (__mmask8)(U))
6570
6571#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6572  (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
6573                                             (__v8si)(__m256i)(B), \
6574                                             (__v8si)(__m256i)(C), (int)(imm), \
6575                                             (__mmask8)(U))
6576
6577#define _mm_ternarylogic_epi64(A, B, C, imm) \
6578  (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6579                                            (__v2di)(__m128i)(B), \
6580                                            (__v2di)(__m128i)(C), (int)(imm), \
6581                                            (__mmask8)-1)
6582
6583#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6584  (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6585                                            (__v2di)(__m128i)(B), \
6586                                            (__v2di)(__m128i)(C), (int)(imm), \
6587                                            (__mmask8)(U))
6588
6589#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6590  (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
6591                                             (__v2di)(__m128i)(B), \
6592                                             (__v2di)(__m128i)(C), (int)(imm), \
6593                                             (__mmask8)(U))
6594
6595#define _mm256_ternarylogic_epi64(A, B, C, imm) \
6596  (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6597                                            (__v4di)(__m256i)(B), \
6598                                            (__v4di)(__m256i)(C), (int)(imm), \
6599                                            (__mmask8)-1)
6600
6601#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6602  (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6603                                            (__v4di)(__m256i)(B), \
6604                                            (__v4di)(__m256i)(C), (int)(imm), \
6605                                            (__mmask8)(U))
6606
6607#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6608  (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
6609                                             (__v4di)(__m256i)(B), \
6610                                             (__v4di)(__m256i)(C), (int)(imm), \
6611                                             (__mmask8)(U))
6612
6613
6614
6615#define _mm256_shuffle_f32x4(A, B, imm) \
6616  (__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6617                                        (__v8sf)(__m256)(B), (int)(imm))
6618
6619#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6620  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6621                                      (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6622                                      (__v8sf)(__m256)(W))
6623
6624#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6625  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6626                                      (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6627                                      (__v8sf)_mm256_setzero_ps())
6628
6629#define _mm256_shuffle_f64x2(A, B, imm) \
6630  (__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6631                                         (__v4df)(__m256d)(B), (int)(imm))
6632
6633#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6634  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6635                                      (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6636                                      (__v4df)(__m256d)(W))
6637
6638#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6639  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6640                                      (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6641                                      (__v4df)_mm256_setzero_pd())
6642
6643#define _mm256_shuffle_i32x4(A, B, imm) \
6644  (__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6645                                         (__v8si)(__m256i)(B), (int)(imm))
6646
6647#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6648  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6649                                      (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6650                                      (__v8si)(__m256i)(W))
6651
6652#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6653  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6654                                      (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6655                                      (__v8si)_mm256_setzero_si256())
6656
6657#define _mm256_shuffle_i64x2(A, B, imm) \
6658  (__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6659                                         (__v4di)(__m256i)(B), (int)(imm))
6660
6661#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6662  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6663                                      (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6664                                      (__v4di)(__m256i)(W))
6665
6666
6667#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6668  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6669                                      (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6670                                      (__v4di)_mm256_setzero_si256())
6671
6672#define _mm_mask_shuffle_pd(W, U, A, B, M) \
6673  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6674                                       (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6675                                       (__v2df)(__m128d)(W))
6676
6677#define _mm_maskz_shuffle_pd(U, A, B, M) \
6678  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6679                                       (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6680                                       (__v2df)_mm_setzero_pd())
6681
6682#define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6683  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6684                                       (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6685                                       (__v4df)(__m256d)(W))
6686
6687#define _mm256_maskz_shuffle_pd(U, A, B, M) \
6688  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6689                                       (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6690                                       (__v4df)_mm256_setzero_pd())
6691
6692#define _mm_mask_shuffle_ps(W, U, A, B, M) \
6693  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6694                                      (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6695                                      (__v4sf)(__m128)(W))
6696
6697#define _mm_maskz_shuffle_ps(U, A, B, M) \
6698  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6699                                      (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6700                                      (__v4sf)_mm_setzero_ps())
6701
6702#define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6703  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6704                                      (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6705                                      (__v8sf)(__m256)(W))
6706
6707#define _mm256_maskz_shuffle_ps(U, A, B, M) \
6708  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6709                                      (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6710                                      (__v8sf)_mm256_setzero_ps())
6711
6712static __inline__ __m128d __DEFAULT_FN_ATTRS128
6713_mm_rsqrt14_pd (__m128d __A)
6714{
6715  return (__m128d__builtin_ia32_rsqrt14pd128_mask ((__v2df__A,
6716                 (__v2df)
6717                 _mm_setzero_pd (),
6718                 (__mmask8) -1);
6719}
6720
6721static __inline__ __m128d __DEFAULT_FN_ATTRS128
6722_mm_mask_rsqrt14_pd (__m128d __W__mmask8 __U__m128d __A)
6723{
6724  return (__m128d__builtin_ia32_rsqrt14pd128_mask ((__v2df__A,
6725                 (__v2df__W,
6726                 (__mmask8__U);
6727}
6728
6729static __inline__ __m128d __DEFAULT_FN_ATTRS128
6730_mm_maskz_rsqrt14_pd (__mmask8 __U__m128d __A)
6731{
6732  return (__m128d__builtin_ia32_rsqrt14pd128_mask ((__v2df__A,
6733                 (__v2df)
6734                 _mm_setzero_pd (),
6735                 (__mmask8__U);
6736}
6737
6738static __inline__ __m256d __DEFAULT_FN_ATTRS256
6739_mm256_rsqrt14_pd (__m256d __A)
6740{
6741  return (__m256d__builtin_ia32_rsqrt14pd256_mask ((__v4df__A,
6742                 (__v4df)
6743                 _mm256_setzero_pd (),
6744                 (__mmask8) -1);
6745}
6746
6747static __inline__ __m256d __DEFAULT_FN_ATTRS256
6748_mm256_mask_rsqrt14_pd (__m256d __W__mmask8 __U__m256d __A)
6749{
6750  return (__m256d__builtin_ia32_rsqrt14pd256_mask ((__v4df__A,
6751                 (__v4df__W,
6752                 (__mmask8__U);
6753}
6754
6755static __inline__ __m256d __DEFAULT_FN_ATTRS256
6756_mm256_maskz_rsqrt14_pd (__mmask8 __U__m256d __A)
6757{
6758  return (__m256d__builtin_ia32_rsqrt14pd256_mask ((__v4df__A,
6759                 (__v4df)
6760                 _mm256_setzero_pd (),
6761                 (__mmask8__U);
6762}
6763
6764static __inline__ __m128 __DEFAULT_FN_ATTRS128
6765_mm_rsqrt14_ps (__m128 __A)
6766{
6767  return (__m128__builtin_ia32_rsqrt14ps128_mask ((__v4sf__A,
6768                (__v4sf)
6769                _mm_setzero_ps (),
6770                (__mmask8) -1);
6771}
6772
6773static __inline__ __m128 __DEFAULT_FN_ATTRS128
6774_mm_mask_rsqrt14_ps (__m128 __W__mmask8 __U__m128 __A)
6775{
6776  return (__m128__builtin_ia32_rsqrt14ps128_mask ((__v4sf__A,
6777                (__v4sf__W,
6778                (__mmask8__U);
6779}
6780
6781static __inline__ __m128 __DEFAULT_FN_ATTRS128
6782_mm_maskz_rsqrt14_ps (__mmask8 __U__m128 __A)
6783{
6784  return (__m128__builtin_ia32_rsqrt14ps128_mask ((__v4sf__A,
6785                (__v4sf)
6786                _mm_setzero_ps (),
6787                (__mmask8__U);
6788}
6789
6790static __inline__ __m256 __DEFAULT_FN_ATTRS256
6791_mm256_rsqrt14_ps (__m256 __A)
6792{
6793  return (__m256__builtin_ia32_rsqrt14ps256_mask ((__v8sf__A,
6794                (__v8sf)
6795                _mm256_setzero_ps (),
6796                (__mmask8) -1);
6797}
6798
6799static __inline__ __m256 __DEFAULT_FN_ATTRS256
6800_mm256_mask_rsqrt14_ps (__m256 __W__mmask8 __U__m256 __A)
6801{
6802  return (__m256__builtin_ia32_rsqrt14ps256_mask ((__v8sf__A,
6803                (__v8sf__W,
6804                (__mmask8__U);
6805}
6806
6807static __inline__ __m256 __DEFAULT_FN_ATTRS256
6808_mm256_maskz_rsqrt14_ps (__mmask8 __U__m256 __A)
6809{
6810  return (__m256__builtin_ia32_rsqrt14ps256_mask ((__v8sf__A,
6811                (__v8sf)
6812                _mm256_setzero_ps (),
6813                (__mmask8__U);
6814}
6815
6816static __inline__ __m256 __DEFAULT_FN_ATTRS256
6817_mm256_broadcast_f32x4(__m128 __A)
6818{
6819  return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6820                                         01230123);
6821}
6822
6823static __inline__ __m256 __DEFAULT_FN_ATTRS256
6824_mm256_mask_broadcast_f32x4(__m256 __O__mmask8 __M__m128 __A)
6825{
6826  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6827                                            (__v8sf)_mm256_broadcast_f32x4(__A),
6828                                            (__v8sf)__O);
6829}
6830
6831static __inline__ __m256 __DEFAULT_FN_ATTRS256
6832_mm256_maskz_broadcast_f32x4 (__mmask8 __M__m128 __A)
6833{
6834  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6835                                            (__v8sf)_mm256_broadcast_f32x4(__A),
6836                                            (__v8sf)_mm256_setzero_ps());
6837}
6838
6839static __inline__ __m256i __DEFAULT_FN_ATTRS256
6840_mm256_broadcast_i32x4(__m128i __A)
6841{
6842  return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6843                                          01230123);
6844}
6845
6846static __inline__ __m256i __DEFAULT_FN_ATTRS256
6847_mm256_mask_broadcast_i32x4(__m256i __O__mmask8 __M__m128i __A)
6848{
6849  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6850                                            (__v8si)_mm256_broadcast_i32x4(__A),
6851                                            (__v8si)__O);
6852}
6853
6854static __inline__ __m256i __DEFAULT_FN_ATTRS256
6855_mm256_maskz_broadcast_i32x4(__mmask8 __M__m128i __A)
6856{
6857  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6858                                            (__v8si)_mm256_broadcast_i32x4(__A),
6859                                            (__v8si)_mm256_setzero_si256());
6860}
6861
6862static __inline__ __m256d __DEFAULT_FN_ATTRS256
6863_mm256_mask_broadcastsd_pd (__m256d __O__mmask8 __M__m128d __A)
6864{
6865  return (__m256d)__builtin_ia32_selectpd_256(__M,
6866                                              (__v4df_mm256_broadcastsd_pd(__A),
6867                                              (__v4df__O);
6868}
6869
6870static __inline__ __m256d __DEFAULT_FN_ATTRS256
6871_mm256_maskz_broadcastsd_pd (__mmask8 __M__m128d __A)
6872{
6873  return (__m256d)__builtin_ia32_selectpd_256(__M,
6874                                              (__v4df_mm256_broadcastsd_pd(__A),
6875                                              (__v4df_mm256_setzero_pd());
6876}
6877
6878static __inline__ __m128 __DEFAULT_FN_ATTRS128
6879_mm_mask_broadcastss_ps (__m128 __O__mmask8 __M__m128 __A)
6880{
6881  return (__m128)__builtin_ia32_selectps_128(__M,
6882                                             (__v4sf_mm_broadcastss_ps(__A),
6883                                             (__v4sf__O);
6884}
6885
6886static __inline__ __m128 __DEFAULT_FN_ATTRS128
6887_mm_maskz_broadcastss_ps (__mmask8 __M__m128 __A)
6888{
6889  return (__m128)__builtin_ia32_selectps_128(__M,
6890                                             (__v4sf_mm_broadcastss_ps(__A),
6891                                             (__v4sf_mm_setzero_ps());
6892}
6893
6894static __inline__ __m256 __DEFAULT_FN_ATTRS256
6895_mm256_mask_broadcastss_ps (__m256 __O__mmask8 __M__m128 __A)
6896{
6897  return (__m256)__builtin_ia32_selectps_256(__M,
6898                                             (__v8sf_mm256_broadcastss_ps(__A),
6899                                             (__v8sf__O);
6900}
6901
6902static __inline__ __m256 __DEFAULT_FN_ATTRS256
6903_mm256_maskz_broadcastss_ps (__mmask8 __M__m128 __A)
6904{
6905  return (__m256)__builtin_ia32_selectps_256(__M,
6906                                             (__v8sf_mm256_broadcastss_ps(__A),
6907                                             (__v8sf_mm256_setzero_ps());
6908}
6909
6910static __inline__ __m128i __DEFAULT_FN_ATTRS128
6911_mm_mask_broadcastd_epi32 (__m128i __O__mmask8 __M__m128i __A)
6912{
6913  return (__m128i)__builtin_ia32_selectd_128(__M,
6914                                             (__v4si_mm_broadcastd_epi32(__A),
6915                                             (__v4si__O);
6916}
6917
6918static __inline__ __m128i __DEFAULT_FN_ATTRS128
6919_mm_maskz_broadcastd_epi32 (__mmask8 __M__m128i __A)
6920{
6921  return (__m128i)__builtin_ia32_selectd_128(__M,
6922                                             (__v4si_mm_broadcastd_epi32(__A),
6923                                             (__v4si_mm_setzero_si128());
6924}
6925
6926static __inline__ __m256i __DEFAULT_FN_ATTRS256
6927_mm256_mask_broadcastd_epi32 (__m256i __O__mmask8 __M__m128i __A)
6928{
6929  return (__m256i)__builtin_ia32_selectd_256(__M,
6930                                             (__v8si_mm256_broadcastd_epi32(__A),
6931                                             (__v8si__O);
6932}
6933
6934static __inline__ __m256i __DEFAULT_FN_ATTRS256
6935_mm256_maskz_broadcastd_epi32 (__mmask8 __M__m128i __A)
6936{
6937  return (__m256i)__builtin_ia32_selectd_256(__M,
6938                                             (__v8si_mm256_broadcastd_epi32(__A),
6939                                             (__v8si_mm256_setzero_si256());
6940}
6941
6942static __inline__ __m128i __DEFAULT_FN_ATTRS128
6943_mm_mask_broadcastq_epi64 (__m128i __O__mmask8 __M__m128i __A)
6944{
6945  return (__m128i)__builtin_ia32_selectq_128(__M,
6946                                             (__v2di_mm_broadcastq_epi64(__A),
6947                                             (__v2di__O);
6948}
6949
6950static __inline__ __m128i __DEFAULT_FN_ATTRS128
6951_mm_maskz_broadcastq_epi64 (__mmask8 __M__m128i __A)
6952{
6953  return (__m128i)__builtin_ia32_selectq_128(__M,
6954                                             (__v2di_mm_broadcastq_epi64(__A),
6955                                             (__v2di_mm_setzero_si128());
6956}
6957
6958static __inline__ __m256i __DEFAULT_FN_ATTRS256
6959_mm256_mask_broadcastq_epi64 (__m256i __O__mmask8 __M__m128i __A)
6960{
6961  return (__m256i)__builtin_ia32_selectq_256(__M,
6962                                             (__v4di_mm256_broadcastq_epi64(__A),
6963                                             (__v4di__O);
6964}
6965
6966static __inline__ __m256i __DEFAULT_FN_ATTRS256
6967_mm256_maskz_broadcastq_epi64 (__mmask8 __M__m128i __A)
6968{
6969  return (__m256i)__builtin_ia32_selectq_256(__M,
6970                                             (__v4di_mm256_broadcastq_epi64(__A),
6971                                             (__v4di_mm256_setzero_si256());
6972}
6973
6974static __inline__ __m128i __DEFAULT_FN_ATTRS128
6975_mm_cvtsepi32_epi8 (__m128i __A)
6976{
6977  return (__m128i__builtin_ia32_pmovsdb128_mask ((__v4si__A,
6978               (__v16qi)_mm_undefined_si128(),
6979               (__mmask8) -1);
6980}
6981
6982static __inline__ __m128i __DEFAULT_FN_ATTRS128
6983_mm_mask_cvtsepi32_epi8 (__m128i __O__mmask8 __M__m128i __A)
6984{
6985  return (__m128i__builtin_ia32_pmovsdb128_mask ((__v4si__A,
6986               (__v16qi__O__M);
6987}
6988
6989static __inline__ __m128i __DEFAULT_FN_ATTRS128
6990_mm_maskz_cvtsepi32_epi8 (__mmask8 __M__m128i __A)
6991{
6992  return (__m128i__builtin_ia32_pmovsdb128_mask ((__v4si__A,
6993               (__v16qi_mm_setzero_si128 (),
6994               __M);
6995}
6996
6997static __inline__ void __DEFAULT_FN_ATTRS128
6998_mm_mask_cvtsepi32_storeu_epi8 (void * __P__mmask8 __M__m128i __A)
6999{
7000  __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si__A__M);
7001}
7002
7003static __inline__ __m128i __DEFAULT_FN_ATTRS128
7004_mm256_cvtsepi32_epi8 (__m256i __A)
7005{
7006  return (__m128i__builtin_ia32_pmovsdb256_mask ((__v8si__A,
7007               (__v16qi)_mm_undefined_si128(),
7008               (__mmask8) -1);
7009}
7010
7011static __inline__ __m128i __DEFAULT_FN_ATTRS256
7012_mm256_mask_cvtsepi32_epi8 (__m128i __O__mmask8 __M__m256i __A)
7013{
7014  return (__m128i__builtin_ia32_pmovsdb256_mask ((__v8si__A,
7015               (__v16qi__O__M);
7016}
7017
7018static __inline__ __m128i __DEFAULT_FN_ATTRS256
7019_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M__m256i __A)
7020{
7021  return (__m128i__builtin_ia32_pmovsdb256_mask ((__v8si__A,
7022               (__v16qi_mm_setzero_si128 (),
7023               __M);
7024}
7025
7026static __inline__ void __DEFAULT_FN_ATTRS128
7027_mm256_mask_cvtsepi32_storeu_epi8 (void * __P__mmask8 __M__m256i __A)
7028{
7029  __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si__A__M);
7030}
7031
7032static __inline__ __m128i __DEFAULT_FN_ATTRS128
7033_mm_cvtsepi32_epi16 (__m128i __A)
7034{
7035  return (__m128i__builtin_ia32_pmovsdw128_mask ((__v4si__A,
7036               (__v8hi)_mm_setzero_si128 (),
7037               (__mmask8) -1);
7038}
7039
7040static __inline__ __m128i __DEFAULT_FN_ATTRS128
7041_mm_mask_cvtsepi32_epi16 (__m128i __O__mmask8 __M__m128i __A)
7042{
7043  return (__m128i__builtin_ia32_pmovsdw128_mask ((__v4si__A,
7044               (__v8hi)__O,
7045               __M);
7046}
7047
7048static __inline__ __m128i __DEFAULT_FN_ATTRS128
7049_mm_maskz_cvtsepi32_epi16 (__mmask8 __M__m128i __A)
7050{
7051  return (__m128i__builtin_ia32_pmovsdw128_mask ((__v4si__A,
7052               (__v8hi_mm_setzero_si128 (),
7053               __M);
7054}
7055
7056static __inline__ void __DEFAULT_FN_ATTRS128
7057_mm_mask_cvtsepi32_storeu_epi16 (void * __P__mmask8 __M__m128i __A)
7058{
7059  __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si__A__M);
7060}
7061
7062static __inline__ __m128i __DEFAULT_FN_ATTRS256
7063_mm256_cvtsepi32_epi16 (__m256i __A)
7064{
7065  return (__m128i__builtin_ia32_pmovsdw256_mask ((__v8si__A,
7066               (__v8hi)_mm_undefined_si128(),
7067               (__mmask8) -1);
7068}
7069
7070static __inline__ __m128i __DEFAULT_FN_ATTRS256
7071_mm256_mask_cvtsepi32_epi16 (__m128i __O__mmask8 __M__m256i __A)
7072{
7073  return (__m128i__builtin_ia32_pmovsdw256_mask ((__v8si__A,
7074               (__v8hi__O__M);
7075}
7076
7077static __inline__ __m128i __DEFAULT_FN_ATTRS256
7078_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M__m256i __A)
7079{
7080  return (__m128i__builtin_ia32_pmovsdw256_mask ((__v8si__A,
7081               (__v8hi_mm_setzero_si128 (),
7082               __M);
7083}
7084
7085static __inline__ void __DEFAULT_FN_ATTRS256
7086_mm256_mask_cvtsepi32_storeu_epi16 (void * __P__mmask8 __M__m256i __A)
7087{
7088  __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si__A__M);
7089}
7090
7091static __inline__ __m128i __DEFAULT_FN_ATTRS128
7092_mm_cvtsepi64_epi8 (__m128i __A)
7093{
7094  return (__m128i__builtin_ia32_pmovsqb128_mask ((__v2di__A,
7095               (__v16qi)_mm_undefined_si128(),
7096               (__mmask8) -1);
7097}
7098
7099static __inline__ __m128i __DEFAULT_FN_ATTRS128
7100_mm_mask_cvtsepi64_epi8 (__m128i __O__mmask8 __M__m128i __A)
7101{
7102  return (__m128i__builtin_ia32_pmovsqb128_mask ((__v2di__A,
7103               (__v16qi__O__M);
7104}
7105
7106static __inline__ __m128i __DEFAULT_FN_ATTRS128
7107_mm_maskz_cvtsepi64_epi8 (__mmask8 __M__m128i __A)
7108{
7109  return (__m128i__builtin_ia32_pmovsqb128_mask ((__v2di__A,
7110               (__v16qi_mm_setzero_si128 (),
7111               __M);
7112}
7113
7114static __inline__ void __DEFAULT_FN_ATTRS128
7115_mm_mask_cvtsepi64_storeu_epi8 (void * __P__mmask8 __M__m128i __A)
7116{
7117  __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di__A__M);
7118}
7119
7120static __inline__ __m128i __DEFAULT_FN_ATTRS256
7121_mm256_cvtsepi64_epi8 (__m256i __A)
7122{
7123  return (__m128i__builtin_ia32_pmovsqb256_mask ((__v4di__A,
7124               (__v16qi)_mm_undefined_si128(),
7125               (__mmask8) -1);
7126}
7127
7128static __inline__ __m128i __DEFAULT_FN_ATTRS256
7129_mm256_mask_cvtsepi64_epi8 (__m128i __O__mmask8 __M__m256i __A)
7130{
7131  return (__m128i__builtin_ia32_pmovsqb256_mask ((__v4di__A,
7132               (__v16qi__O__M);
7133}
7134
7135static __inline__ __m128i __DEFAULT_FN_ATTRS256
7136_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M__m256i __A)
7137{
7138  return (__m128i__builtin_ia32_pmovsqb256_mask ((__v4di__A,
7139               (__v16qi_mm_setzero_si128 (),
7140               __M);
7141}
7142
7143static __inline__ void __DEFAULT_FN_ATTRS256
7144_mm256_mask_cvtsepi64_storeu_epi8 (void * __P__mmask8 __M__m256i __A)
7145{
7146  __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di__A__M);
7147}
7148
7149static __inline__ __m128i __DEFAULT_FN_ATTRS128
7150_mm_cvtsepi64_epi32 (__m128i __A)
7151{
7152  return (__m128i__builtin_ia32_pmovsqd128_mask ((__v2di__A,
7153               (__v4si)_mm_undefined_si128(),
7154               (__mmask8) -1);
7155}
7156
7157static __inline__ __m128i __DEFAULT_FN_ATTRS128
7158_mm_mask_cvtsepi64_epi32 (__m128i __O__mmask8 __M__m128i __A)
7159{
7160  return (__m128i__builtin_ia32_pmovsqd128_mask ((__v2di__A,
7161               (__v4si__O__M);
7162}
7163
7164static __inline__ __m128i __DEFAULT_FN_ATTRS128
7165_mm_maskz_cvtsepi64_epi32 (__mmask8 __M__m128i __A)
7166{
7167  return (__m128i__builtin_ia32_pmovsqd128_mask ((__v2di__A,
7168               (__v4si_mm_setzero_si128 (),
7169               __M);
7170}
7171
7172static __inline__ void __DEFAULT_FN_ATTRS128
7173_mm_mask_cvtsepi64_storeu_epi32 (void * __P__mmask8 __M__m128i __A)
7174{
7175  __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di__A__M);
7176}
7177
7178static __inline__ __m128i __DEFAULT_FN_ATTRS256
7179_mm256_cvtsepi64_epi32 (__m256i __A)
7180{
7181  return (__m128i__builtin_ia32_pmovsqd256_mask ((__v4di__A,
7182               (__v4si)_mm_undefined_si128(),
7183               (__mmask8) -1);
7184}
7185
7186static __inline__ __m128i __DEFAULT_FN_ATTRS256
7187_mm256_mask_cvtsepi64_epi32 (__m128i __O__mmask8 __M__m256i __A)
7188{
7189  return (__m128i__builtin_ia32_pmovsqd256_mask ((__v4di__A,
7190               (__v4si)__O,
7191               __M);
7192}
7193
7194static __inline__ __m128i __DEFAULT_FN_ATTRS256
7195_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M__m256i __A)
7196{
7197  return (__m128i__builtin_ia32_pmovsqd256_mask ((__v4di__A,
7198               (__v4si_mm_setzero_si128 (),
7199               __M);
7200}
7201
7202static __inline__ void __DEFAULT_FN_ATTRS256
7203_mm256_mask_cvtsepi64_storeu_epi32 (void * __P__mmask8 __M__m256i __A)
7204{
7205  __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di__A__M);
7206}
7207
7208static __inline__ __m128i __DEFAULT_FN_ATTRS128
7209_mm_cvtsepi64_epi16 (__m128i __A)
7210{
7211  return (__m128i__builtin_ia32_pmovsqw128_mask ((__v2di__A,
7212               (__v8hi)_mm_undefined_si128(),
7213               (__mmask8) -1);
7214}
7215
7216static __inline__ __m128i __DEFAULT_FN_ATTRS128
7217_mm_mask_cvtsepi64_epi16 (__m128i __O__mmask8 __M__m128i __A)
7218{
7219  return (__m128i__builtin_ia32_pmovsqw128_mask ((__v2di__A,
7220               (__v8hi__O__M);
7221}
7222
7223static __inline__ __m128i __DEFAULT_FN_ATTRS128
7224_mm_maskz_cvtsepi64_epi16 (__mmask8 __M__m128i __A)
7225{
7226  return (__m128i__builtin_ia32_pmovsqw128_mask ((__v2di__A,
7227               (__v8hi_mm_setzero_si128 (),
7228               __M);
7229}
7230
7231static __inline__ void __DEFAULT_FN_ATTRS128
7232_mm_mask_cvtsepi64_storeu_epi16 (void * __P__mmask8 __M__m128i __A)
7233{
7234  __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di__A__M);
7235}
7236
7237static __inline__ __m128i __DEFAULT_FN_ATTRS256
7238_mm256_cvtsepi64_epi16 (__m256i __A)
7239{
7240  return (__m128i__builtin_ia32_pmovsqw256_mask ((__v4di__A,
7241               (__v8hi)_mm_undefined_si128(),
7242               (__mmask8) -1);
7243}
7244
7245static __inline__ __m128i __DEFAULT_FN_ATTRS256
7246_mm256_mask_cvtsepi64_epi16 (__m128i __O__mmask8 __M__m256i __A)
7247{
7248  return (__m128i__builtin_ia32_pmovsqw256_mask ((__v4di__A,
7249               (__v8hi__O__M);
7250}
7251
7252static __inline__ __m128i __DEFAULT_FN_ATTRS256
7253_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M__m256i __A)
7254{
7255  return (__m128i__builtin_ia32_pmovsqw256_mask ((__v4di__A,
7256               (__v8hi_mm_setzero_si128 (),
7257               __M);
7258}
7259
7260static __inline__ void __DEFAULT_FN_ATTRS256
7261_mm256_mask_cvtsepi64_storeu_epi16 (void * __P__mmask8 __M__m256i __A)
7262{
7263  __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di__A__M);
7264}
7265
7266static __inline__ __m128i __DEFAULT_FN_ATTRS128
7267_mm_cvtusepi32_epi8 (__m128i __A)
7268{
7269  return (__m128i__builtin_ia32_pmovusdb128_mask ((__v4si__A,
7270                (__v16qi)_mm_undefined_si128(),
7271                (__mmask8) -1);
7272}
7273
7274static __inline__ __m128i __DEFAULT_FN_ATTRS128
7275_mm_mask_cvtusepi32_epi8 (__m128i __O__mmask8 __M__m128i __A)
7276{
7277  return (__m128i__builtin_ia32_pmovusdb128_mask ((__v4si__A,
7278                (__v16qi__O,
7279                __M);
7280}
7281
7282static __inline__ __m128i __DEFAULT_FN_ATTRS128
7283_mm_maskz_cvtusepi32_epi8 (__mmask8 __M__m128i __A)
7284{
7285  return (__m128i__builtin_ia32_pmovusdb128_mask ((__v4si__A,
7286                (__v16qi_mm_setzero_si128 (),
7287                __M);
7288}
7289
7290static __inline__ void __DEFAULT_FN_ATTRS128
7291_mm_mask_cvtusepi32_storeu_epi8 (void * __P__mmask8 __M__m128i __A)
7292{
7293  __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si__A__M);
7294}
7295
7296static __inline__ __m128i __DEFAULT_FN_ATTRS256
7297_mm256_cvtusepi32_epi8 (__m256i __A)
7298{
7299  return (__m128i__builtin_ia32_pmovusdb256_mask ((__v8si__A,
7300                (__v16qi)_mm_undefined_si128(),
7301                (__mmask8) -1);
7302}
7303
7304static __inline__ __m128i __DEFAULT_FN_ATTRS256
7305_mm256_mask_cvtusepi32_epi8 (__m128i __O__mmask8 __M__m256i __A)
7306{
7307  return (__m128i__builtin_ia32_pmovusdb256_mask ((__v8si__A,
7308                (__v16qi__O,
7309                __M);
7310}
7311
7312static __inline__ __m128i __DEFAULT_FN_ATTRS256
7313_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M__m256i __A)
7314{
7315  return (__m128i__builtin_ia32_pmovusdb256_mask ((__v8si__A,
7316                (__v16qi_mm_setzero_si128 (),
7317                __M);
7318}
7319
7320static __inline__ void __DEFAULT_FN_ATTRS256
7321_mm256_mask_cvtusepi32_storeu_epi8 (void * __P__mmask8 __M__m256i __A)
7322{
7323  __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si__A__M);
7324}
7325
7326static __inline__ __m128i __DEFAULT_FN_ATTRS128
7327_mm_cvtusepi32_epi16 (__m128i __A)
7328{
7329  return (__m128i__builtin_ia32_pmovusdw128_mask ((__v4si__A,
7330                (__v8hi)_mm_undefined_si128(),
7331                (__mmask8) -1);
7332}
7333
7334static __inline__ __m128i __DEFAULT_FN_ATTRS128
7335_mm_mask_cvtusepi32_epi16 (__m128i __O__mmask8 __M__m128i __A)
7336{
7337  return (__m128i__builtin_ia32_pmovusdw128_mask ((__v4si__A,
7338                (__v8hi__O__M);
7339}
7340
7341static __inline__ __m128i __DEFAULT_FN_ATTRS128
7342_mm_maskz_cvtusepi32_epi16 (__mmask8 __M__m128i __A)
7343{
7344  return (__m128i__builtin_ia32_pmovusdw128_mask ((__v4si__A,
7345                (__v8hi_mm_setzero_si128 (),
7346                __M);
7347}
7348
7349static __inline__ void __DEFAULT_FN_ATTRS128
7350_mm_mask_cvtusepi32_storeu_epi16 (void * __P__mmask8 __M__m128i __A)
7351{
7352  __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si__A__M);
7353}
7354
7355static __inline__ __m128i __DEFAULT_FN_ATTRS256
7356_mm256_cvtusepi32_epi16 (__m256i __A)
7357{
7358  return (__m128i__builtin_ia32_pmovusdw256_mask ((__v8si__A,
7359                (__v8hi_mm_undefined_si128(),
7360                (__mmask8) -1);
7361}
7362
7363static __inline__ __m128i __DEFAULT_FN_ATTRS256
7364_mm256_mask_cvtusepi32_epi16 (__m128i __O__mmask8 __M__m256i __A)
7365{
7366  return (__m128i__builtin_ia32_pmovusdw256_mask ((__v8si__A,
7367                (__v8hi__O__M);
7368}
7369
7370static __inline__ __m128i __DEFAULT_FN_ATTRS256
7371_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M__m256i __A)
7372{
7373  return (__m128i__builtin_ia32_pmovusdw256_mask ((__v8si__A,
7374                (__v8hi_mm_setzero_si128 (),
7375                __M);
7376}
7377
7378static __inline__ void __DEFAULT_FN_ATTRS256
7379_mm256_mask_cvtusepi32_storeu_epi16 (void * __P__mmask8 __M__m256i __A)
7380{
7381  __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si__A__M);
7382}
7383
7384static __inline__ __m128i __DEFAULT_FN_ATTRS128
7385_mm_cvtusepi64_epi8 (__m128i __A)
7386{
7387  return (__m128i__builtin_ia32_pmovusqb128_mask ((__v2di__A,
7388                (__v16qi)_mm_undefined_si128(),
7389                (__mmask8) -1);
7390}
7391
7392static __inline__ __m128i __DEFAULT_FN_ATTRS128
7393_mm_mask_cvtusepi64_epi8 (__m128i __O__mmask8 __M__m128i __A)
7394{
7395  return (__m128i__builtin_ia32_pmovusqb128_mask ((__v2di__A,
7396                (__v16qi__O,
7397                __M);
7398}
7399
7400static __inline__ __m128i __DEFAULT_FN_ATTRS128
7401_mm_maskz_cvtusepi64_epi8 (__mmask8 __M__m128i __A)
7402{
7403  return (__m128i__builtin_ia32_pmovusqb128_mask ((__v2di__A,
7404                (__v16qi_mm_setzero_si128 (),
7405                __M);
7406}
7407
7408static __inline__ void __DEFAULT_FN_ATTRS128
7409_mm_mask_cvtusepi64_storeu_epi8 (void * __P__mmask8 __M__m128i __A)
7410{
7411  __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di__A__M);
7412}
7413
7414static __inline__ __m128i __DEFAULT_FN_ATTRS256
7415_mm256_cvtusepi64_epi8 (__m256i __A)
7416{
7417  return (__m128i__builtin_ia32_pmovusqb256_mask ((__v4di__A,
7418                (__v16qi)_mm_undefined_si128(),
7419                (__mmask8) -1);
7420}
7421
7422static __inline__ __m128i __DEFAULT_FN_ATTRS256
7423_mm256_mask_cvtusepi64_epi8 (__m128i __O__mmask8 __M__m256i __A)
7424{
7425  return (__m128i__builtin_ia32_pmovusqb256_mask ((__v4di__A,
7426                (__v16qi__O,
7427                __M);
7428}
7429
7430static __inline__ __m128i __DEFAULT_FN_ATTRS256
7431_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M__m256i __A)
7432{
7433  return (__m128i__builtin_ia32_pmovusqb256_mask ((__v4di__A,
7434                (__v16qi_mm_setzero_si128 (),
7435                __M);
7436}
7437
7438static __inline__ void __DEFAULT_FN_ATTRS256
7439_mm256_mask_cvtusepi64_storeu_epi8 (void * __P__mmask8 __M__m256i __A)
7440{
7441  __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di__A__M);
7442}
7443
7444static __inline__ __m128i __DEFAULT_FN_ATTRS128
7445_mm_cvtusepi64_epi32 (__m128i __A)
7446{
7447  return (__m128i__builtin_ia32_pmovusqd128_mask ((__v2di__A,
7448                (__v4si)_mm_undefined_si128(),
7449                (__mmask8) -1);
7450}
7451
7452static __inline__ __m128i __DEFAULT_FN_ATTRS128
7453_mm_mask_cvtusepi64_epi32 (__m128i __O__mmask8 __M__m128i __A)
7454{
7455  return (__m128i__builtin_ia32_pmovusqd128_mask ((__v2di__A,
7456                (__v4si__O__M);
7457}
7458
7459static __inline__ __m128i __DEFAULT_FN_ATTRS128
7460_mm_maskz_cvtusepi64_epi32 (__mmask8 __M__m128i __A)
7461{
7462  return (__m128i__builtin_ia32_pmovusqd128_mask ((__v2di__A,
7463                (__v4si_mm_setzero_si128 (),
7464                __M);
7465}
7466
7467static __inline__ void __DEFAULT_FN_ATTRS128
7468_mm_mask_cvtusepi64_storeu_epi32 (void * __P__mmask8 __M__m128i __A)
7469{
7470  __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di__A__M);
7471}
7472
7473static __inline__ __m128i __DEFAULT_FN_ATTRS256
7474_mm256_cvtusepi64_epi32 (__m256i __A)
7475{
7476  return (__m128i__builtin_ia32_pmovusqd256_mask ((__v4di__A,
7477                (__v4si)_mm_undefined_si128(),
7478                (__mmask8) -1);
7479}
7480
7481static __inline__ __m128i __DEFAULT_FN_ATTRS256
7482_mm256_mask_cvtusepi64_epi32 (__m128i __O__mmask8 __M__m256i __A)
7483{
7484  return (__m128i__builtin_ia32_pmovusqd256_mask ((__v4di__A,
7485                (__v4si__O__M);
7486}
7487
7488static __inline__ __m128i __DEFAULT_FN_ATTRS256
7489_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M__m256i __A)
7490{
7491  return (__m128i__builtin_ia32_pmovusqd256_mask ((__v4di__A,
7492                (__v4si_mm_setzero_si128 (),
7493                __M);
7494}
7495
7496static __inline__ void __DEFAULT_FN_ATTRS256
7497_mm256_mask_cvtusepi64_storeu_epi32 (void * __P__mmask8 __M__m256i __A)
7498{
7499  __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di__A__M);
7500}
7501
7502static __inline__ __m128i __DEFAULT_FN_ATTRS128
7503_mm_cvtusepi64_epi16 (__m128i __A)
7504{
7505  return (__m128i__builtin_ia32_pmovusqw128_mask ((__v2di__A,
7506                (__v8hi)_mm_undefined_si128(),
7507                (__mmask8) -1);
7508}
7509
7510static __inline__ __m128i __DEFAULT_FN_ATTRS128
7511_mm_mask_cvtusepi64_epi16 (__m128i __O__mmask8 __M__m128i __A)
7512{
7513  return (__m128i__builtin_ia32_pmovusqw128_mask ((__v2di__A,
7514                (__v8hi__O__M);
7515}
7516
7517static __inline__ __m128i __DEFAULT_FN_ATTRS128
7518_mm_maskz_cvtusepi64_epi16 (__mmask8 __M__m128i __A)
7519{
7520  return (__m128i__builtin_ia32_pmovusqw128_mask ((__v2di__A,
7521                (__v8hi_mm_setzero_si128 (),
7522                __M);
7523}
7524
7525static __inline__ void __DEFAULT_FN_ATTRS128
7526_mm_mask_cvtusepi64_storeu_epi16 (void * __P__mmask8 __M__m128i __A)
7527{
7528  __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di__A__M);
7529}
7530
7531static __inline__ __m128i __DEFAULT_FN_ATTRS256
7532_mm256_cvtusepi64_epi16 (__m256i __A)
7533{
7534  return (__m128i__builtin_ia32_pmovusqw256_mask ((__v4di__A,
7535                (__v8hi)_mm_undefined_si128(),
7536                (__mmask8) -1);
7537}
7538
7539static __inline__ __m128i __DEFAULT_FN_ATTRS256
7540_mm256_mask_cvtusepi64_epi16 (__m128i __O__mmask8 __M__m256i __A)
7541{
7542  return (__m128i__builtin_ia32_pmovusqw256_mask ((__v4di__A,
7543                (__v8hi__O__M);
7544}
7545
7546static __inline__ __m128i __DEFAULT_FN_ATTRS256
7547_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M__m256i __A)
7548{
7549  return (__m128i__builtin_ia32_pmovusqw256_mask ((__v4di__A,
7550                (__v8hi_mm_setzero_si128 (),
7551                __M);
7552}
7553
7554static __inline__ void __DEFAULT_FN_ATTRS256
7555_mm256_mask_cvtusepi64_storeu_epi16 (void * __P__mmask8 __M__m256i __A)
7556{
7557  __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di__A__M);
7558}
7559
7560static __inline__ __m128i __DEFAULT_FN_ATTRS128
7561_mm_cvtepi32_epi8 (__m128i __A)
7562{
7563  return (__m128i)__builtin_shufflevector(
7564      __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0000}, 01,
7565      23456777777777);
7566}
7567
7568static __inline__ __m128i __DEFAULT_FN_ATTRS128
7569_mm_mask_cvtepi32_epi8 (__m128i __O__mmask8 __M__m128i __A)
7570{
7571  return (__m128i__builtin_ia32_pmovdb128_mask ((__v4si__A,
7572              (__v16qi__O__M);
7573}
7574
7575static __inline__ __m128i __DEFAULT_FN_ATTRS128
7576_mm_maskz_cvtepi32_epi8 (__mmask8 __M__m128i __A)
7577{
7578  return (__m128i__builtin_ia32_pmovdb128_mask ((__v4si__A,
7579              (__v16qi)
7580              _mm_setzero_si128 (),
7581              __M);
7582}
7583
7584static __inline__ void __DEFAULT_FN_ATTRS256
7585_mm_mask_cvtepi32_storeu_epi8 (void * __P__mmask8 __M__m128i __A)
7586{
7587  __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si__A__M);
7588}
7589
7590static __inline__ __m128i __DEFAULT_FN_ATTRS256
7591_mm256_cvtepi32_epi8 (__m256i __A)
7592{
7593  return (__m128i)__builtin_shufflevector(
7594      __builtin_convertvector((__v8si)__A, __v8qi),
7595      (__v8qi){00000000}, 01234567891011,
7596      12131415);
7597}
7598
7599static __inline__ __m128i __DEFAULT_FN_ATTRS256
7600_mm256_mask_cvtepi32_epi8 (__m128i __O__mmask8 __M__m256i __A)
7601{
7602  return (__m128i__builtin_ia32_pmovdb256_mask ((__v8si__A,
7603              (__v16qi__O__M);
7604}
7605
7606static __inline__ __m128i __DEFAULT_FN_ATTRS256
7607_mm256_maskz_cvtepi32_epi8 (__mmask8 __M__m256i __A)
7608{
7609  return (__m128i__builtin_ia32_pmovdb256_mask ((__v8si__A,
7610              (__v16qi_mm_setzero_si128 (),
7611              __M);
7612}
7613
7614static __inline__ void __DEFAULT_FN_ATTRS256
7615_mm256_mask_cvtepi32_storeu_epi8 (void * __P__mmask8 __M__m256i __A)
7616{
7617  __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si__A__M);
7618}
7619
7620static __inline__ __m128i __DEFAULT_FN_ATTRS128
7621_mm_cvtepi32_epi16 (__m128i __A)
7622{
7623  return (__m128i)__builtin_shufflevector(
7624      __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0000}, 01,
7625      234567);
7626}
7627
7628static __inline__ __m128i __DEFAULT_FN_ATTRS128
7629_mm_mask_cvtepi32_epi16 (__m128i __O__mmask8 __M__m128i __A)
7630{
7631  return (__m128i__builtin_ia32_pmovdw128_mask ((__v4si__A,
7632              (__v8hi__O__M);
7633}
7634
7635static __inline__ __m128i __DEFAULT_FN_ATTRS128
7636_mm_maskz_cvtepi32_epi16 (__mmask8 __M__m128i __A)
7637{
7638  return (__m128i__builtin_ia32_pmovdw128_mask ((__v4si__A,
7639              (__v8hi_mm_setzero_si128 (),
7640              __M);
7641}
7642
7643static __inline__ void __DEFAULT_FN_ATTRS128
7644_mm_mask_cvtepi32_storeu_epi16 (void * __P__mmask8 __M__m128i __A)
7645{
7646  __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si__A__M);
7647}
7648
7649static __inline__ __m128i __DEFAULT_FN_ATTRS256
7650_mm256_cvtepi32_epi16 (__m256i __A)
7651{
7652  return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7653}
7654
7655static __inline__ __m128i __DEFAULT_FN_ATTRS256
7656_mm256_mask_cvtepi32_epi16 (__m128i __O__mmask8 __M__m256i __A)
7657{
7658  return (__m128i__builtin_ia32_pmovdw256_mask ((__v8si__A,
7659              (__v8hi__O__M);
7660}
7661
7662static __inline__ __m128i __DEFAULT_FN_ATTRS256
7663_mm256_maskz_cvtepi32_epi16 (__mmask8 __M__m256i __A)
7664{
7665  return (__m128i__builtin_ia32_pmovdw256_mask ((__v8si__A,
7666              (__v8hi_mm_setzero_si128 (),
7667              __M);
7668}
7669
7670static __inline__ void __DEFAULT_FN_ATTRS256
7671_mm256_mask_cvtepi32_storeu_epi16 (void *  __P__mmask8 __M__m256i __A)
7672{
7673  __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si__A__M);
7674}
7675
7676static __inline__ __m128i __DEFAULT_FN_ATTRS128
7677_mm_cvtepi64_epi8 (__m128i __A)
7678{
7679  return (__m128i)__builtin_shufflevector(
7680      __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){00}, 0123,
7681      333333333333);
7682}
7683
7684static __inline__ __m128i __DEFAULT_FN_ATTRS128
7685_mm_mask_cvtepi64_epi8 (__m128i __O__mmask8 __M__m128i __A)
7686{
7687  return (__m128i__builtin_ia32_pmovqb128_mask ((__v2di__A,
7688              (__v16qi__O__M);
7689}
7690
7691static __inline__ __m128i __DEFAULT_FN_ATTRS128
7692_mm_maskz_cvtepi64_epi8 (__mmask8 __M__m128i __A)
7693{
7694  return (__m128i__builtin_ia32_pmovqb128_mask ((__v2di__A,
7695              (__v16qi_mm_setzero_si128 (),
7696              __M);
7697}
7698
7699static __inline__ void __DEFAULT_FN_ATTRS128
7700_mm_mask_cvtepi64_storeu_epi8 (void * __P__mmask8 __M__m128i __A)
7701{
7702  __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di__A__M);
7703}
7704
7705static __inline__ __m128i __DEFAULT_FN_ATTRS256
7706_mm256_cvtepi64_epi8 (__m256i __A)
7707{
7708  return (__m128i)__builtin_shufflevector(
7709      __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0000}, 01,
7710      23456777777777);
7711}
7712
7713static __inline__ __m128i __DEFAULT_FN_ATTRS256
7714_mm256_mask_cvtepi64_epi8 (__m128i __O__mmask8 __M__m256i __A)
7715{
7716  return (__m128i__builtin_ia32_pmovqb256_mask ((__v4di__A,
7717              (__v16qi__O__M);
7718}
7719
7720static __inline__ __m128i __DEFAULT_FN_ATTRS256
7721_mm256_maskz_cvtepi64_epi8 (__mmask8 __M__m256i __A)
7722{
7723  return (__m128i__builtin_ia32_pmovqb256_mask ((__v4di__A,
7724              (__v16qi_mm_setzero_si128 (),
7725              __M);
7726}
7727
7728static __inline__ void __DEFAULT_FN_ATTRS256
7729_mm256_mask_cvtepi64_storeu_epi8 (void * __P__mmask8 __M__m256i __A)
7730{
7731  __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di__A__M);
7732}
7733
7734static __inline__ __m128i __DEFAULT_FN_ATTRS128
7735_mm_cvtepi64_epi32 (__m128i __A)
7736{
7737  return (__m128i)__builtin_shufflevector(
7738      __builtin_convertvector((__v2di)__A, __v2si), (__v2si){00}, 0123);
7739}
7740
7741static __inline__ __m128i __DEFAULT_FN_ATTRS128
7742_mm_mask_cvtepi64_epi32 (__m128i __O__mmask8 __M__m128i __A)
7743{
7744  return (__m128i__builtin_ia32_pmovqd128_mask ((__v2di__A,
7745              (__v4si__O__M);
7746}
7747
7748static __inline__ __m128i __DEFAULT_FN_ATTRS128
7749_mm_maskz_cvtepi64_epi32 (__mmask8 __M__m128i __A)
7750{
7751  return (__m128i__builtin_ia32_pmovqd128_mask ((__v2di__A,
7752              (__v4si_mm_setzero_si128 (),
7753              __M);
7754}
7755
7756static __inline__ void __DEFAULT_FN_ATTRS128
7757_mm_mask_cvtepi64_storeu_epi32 (void * __P__mmask8 __M__m128i __A)
7758{
7759  __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di__A__M);
7760}
7761
7762static __inline__ __m128i __DEFAULT_FN_ATTRS256
7763_mm256_cvtepi64_epi32 (__m256i __A)
7764{
7765  return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7766}
7767
7768static __inline__ __m128i __DEFAULT_FN_ATTRS256
7769_mm256_mask_cvtepi64_epi32 (__m128i __O__mmask8 __M__m256i __A)
7770{
7771  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7772                                             (__v4si)_mm256_cvtepi64_epi32(__A),
7773                                             (__v4si)__O);
7774}
7775
7776static __inline__ __m128i __DEFAULT_FN_ATTRS256
7777_mm256_maskz_cvtepi64_epi32 (__mmask8 __M__m256i __A)
7778{
7779  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7780                                             (__v4si)_mm256_cvtepi64_epi32(__A),
7781                                             (__v4si)_mm_setzero_si128());
7782}
7783
7784static __inline__ void __DEFAULT_FN_ATTRS256
7785_mm256_mask_cvtepi64_storeu_epi32 (void * __P__mmask8 __M__m256i __A)
7786{
7787  __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di__A__M);
7788}
7789
7790static __inline__ __m128i __DEFAULT_FN_ATTRS128
7791_mm_cvtepi64_epi16 (__m128i __A)
7792{
7793  return (__m128i)__builtin_shufflevector(
7794      __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){00}, 0123,
7795      3333);
7796}
7797
7798static __inline__ __m128i __DEFAULT_FN_ATTRS128
7799_mm_mask_cvtepi64_epi16 (__m128i __O__mmask8 __M__m128i __A)
7800{
7801  return (__m128i__builtin_ia32_pmovqw128_mask ((__v2di__A,
7802              (__v8hi)__O,
7803              __M);
7804}
7805
7806static __inline__ __m128i __DEFAULT_FN_ATTRS128
7807_mm_maskz_cvtepi64_epi16 (__mmask8 __M__m128i __A)
7808{
7809  return (__m128i__builtin_ia32_pmovqw128_mask ((__v2di__A,
7810              (__v8hi_mm_setzero_si128 (),
7811              __M);
7812}
7813
7814static __inline__ void __DEFAULT_FN_ATTRS128
7815_mm_mask_cvtepi64_storeu_epi16 (void * __P__mmask8 __M__m128i __A)
7816{
7817  __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di__A__M);
7818}
7819
7820static __inline__ __m128i __DEFAULT_FN_ATTRS256
7821_mm256_cvtepi64_epi16 (__m256i __A)
7822{
7823  return (__m128i)__builtin_shufflevector(
7824      __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0000}, 01,
7825      234567);
7826}
7827
7828static __inline__ __m128i __DEFAULT_FN_ATTRS256
7829_mm256_mask_cvtepi64_epi16 (__m128i __O__mmask8 __M__m256i __A)
7830{
7831  return (__m128i__builtin_ia32_pmovqw256_mask ((__v4di__A,
7832              (__v8hi__O__M);
7833}
7834
7835static __inline__ __m128i __DEFAULT_FN_ATTRS256
7836_mm256_maskz_cvtepi64_epi16 (__mmask8 __M__m256i __A)
7837{
7838  return (__m128i__builtin_ia32_pmovqw256_mask ((__v4di__A,
7839              (__v8hi_mm_setzero_si128 (),
7840              __M);
7841}
7842
7843static __inline__ void __DEFAULT_FN_ATTRS256
7844_mm256_mask_cvtepi64_storeu_epi16 (void * __P__mmask8 __M__m256i __A)
7845{
7846  __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di__A__M);
7847}
7848
7849#define _mm256_extractf32x4_ps(A, imm) \
7850  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7851                                               (int)(imm), \
7852                                               (__v4sf)_mm_undefined_ps(), \
7853                                               (__mmask8)-1)
7854
7855#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7856  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7857                                               (int)(imm), \
7858                                               (__v4sf)(__m128)(W), \
7859                                               (__mmask8)(U))
7860
7861#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7862  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7863                                               (int)(imm), \
7864                                               (__v4sf)_mm_setzero_ps(), \
7865                                               (__mmask8)(U))
7866
7867#define _mm256_extracti32x4_epi32(A, imm) \
7868  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7869                                                (int)(imm), \
7870                                                (__v4si)_mm_undefined_si128(), \
7871                                                (__mmask8)-1)
7872
7873#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7874  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7875                                                (int)(imm), \
7876                                                (__v4si)(__m128i)(W), \
7877                                                (__mmask8)(U))
7878
7879#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7880  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7881                                                (int)(imm), \
7882                                                (__v4si)_mm_setzero_si128(), \
7883                                                (__mmask8)(U))
7884
7885#define _mm256_insertf32x4(A, B, imm) \
7886  (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7887                                         (__v4sf)(__m128)(B), (int)(imm))
7888
7889#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7890  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7891                                  (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7892                                  (__v8sf)(__m256)(W))
7893
7894#define _mm256_maskz_insertf32x4(U, A, B, imm) \
7895  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7896                                  (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7897                                  (__v8sf)_mm256_setzero_ps())
7898
7899#define _mm256_inserti32x4(A, B, imm) \
7900  (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7901                                          (__v4si)(__m128i)(B), (int)(imm))
7902
7903#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7904  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7905                                  (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7906                                  (__v8si)(__m256i)(W))
7907
7908#define _mm256_maskz_inserti32x4(U, A, B, imm) \
7909  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7910                                  (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7911                                  (__v8si)_mm256_setzero_si256())
7912
7913#define _mm_getmant_pd(A, B, C) \
7914  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7915                                            (int)(((C)<<2) | (B)), \
7916                                            (__v2df)_mm_setzero_pd(), \
7917                                            (__mmask8)-1)
7918
7919#define _mm_mask_getmant_pd(W, U, A, B, C) \
7920  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7921                                            (int)(((C)<<2) | (B)), \
7922                                            (__v2df)(__m128d)(W), \
7923                                            (__mmask8)(U))
7924
7925#define _mm_maskz_getmant_pd(U, A, B, C) \
7926  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7927                                            (int)(((C)<<2) | (B)), \
7928                                            (__v2df)_mm_setzero_pd(), \
7929                                            (__mmask8)(U))
7930
7931#define _mm256_getmant_pd(A, B, C) \
7932  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7933                                            (int)(((C)<<2) | (B)), \
7934                                            (__v4df)_mm256_setzero_pd(), \
7935                                            (__mmask8)-1)
7936
7937#define _mm256_mask_getmant_pd(W, U, A, B, C) \
7938  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7939                                            (int)(((C)<<2) | (B)), \
7940                                            (__v4df)(__m256d)(W), \
7941                                            (__mmask8)(U))
7942
7943#define _mm256_maskz_getmant_pd(U, A, B, C) \
7944  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7945                                            (int)(((C)<<2) | (B)), \
7946                                            (__v4df)_mm256_setzero_pd(), \
7947                                            (__mmask8)(U))
7948
7949#define _mm_getmant_ps(A, B, C) \
7950  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7951                                           (int)(((C)<<2) | (B)), \
7952                                           (__v4sf)_mm_setzero_ps(), \
7953                                           (__mmask8)-1)
7954
7955#define _mm_mask_getmant_ps(W, U, A, B, C) \
7956  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7957                                           (int)(((C)<<2) | (B)), \
7958                                           (__v4sf)(__m128)(W), \
7959                                           (__mmask8)(U))
7960
7961#define _mm_maskz_getmant_ps(U, A, B, C) \
7962  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7963                                           (int)(((C)<<2) | (B)), \
7964                                           (__v4sf)_mm_setzero_ps(), \
7965                                           (__mmask8)(U))
7966
7967#define _mm256_getmant_ps(A, B, C) \
7968  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7969                                           (int)(((C)<<2) | (B)), \
7970                                           (__v8sf)_mm256_setzero_ps(), \
7971                                           (__mmask8)-1)
7972
7973#define _mm256_mask_getmant_ps(W, U, A, B, C) \
7974  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7975                                           (int)(((C)<<2) | (B)), \
7976                                           (__v8sf)(__m256)(W), \
7977                                           (__mmask8)(U))
7978
7979#define _mm256_maskz_getmant_ps(U, A, B, C) \
7980  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7981                                           (int)(((C)<<2) | (B)), \
7982                                           (__v8sf)_mm256_setzero_ps(), \
7983                                           (__mmask8)(U))
7984
7985#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7986  (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7987                                        (void const *)(addr), \
7988                                        (__v2di)(__m128i)(index), \
7989                                        (__mmask8)(mask), (int)(scale))
7990
7991#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7992  (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7993                                        (void const *)(addr), \
7994                                        (__v2di)(__m128i)(index), \
7995                                        (__mmask8)(mask), (int)(scale))
7996
7997#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7998  (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7999                                        (void const *)(addr), \
8000                                        (__v4di)(__m256i)(index), \
8001                                        (__mmask8)(mask), (int)(scale))
8002
8003#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
8004  (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
8005                                        (void const *)(addr), \
8006                                        (__v4di)(__m256i)(index), \
8007                                        (__mmask8)(mask), (int)(scale))
8008
8009#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
8010  (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
8011                                       (void const *)(addr), \
8012                                       (__v2di)(__m128i)(index), \
8013                                       (__mmask8)(mask), (int)(scale))
8014
8015#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8016  (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8017                                        (void const *)(addr), \
8018                                        (__v2di)(__m128i)(index), \
8019                                        (__mmask8)(mask), (int)(scale))
8020
8021#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
8022  (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8023                                       (void const *)(addr), \
8024                                       (__v4di)(__m256i)(index), \
8025                                       (__mmask8)(mask), (int)(scale))
8026
8027#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8028  (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8029                                        (void const *)(addr), \
8030                                        (__v4di)(__m256i)(index), \
8031                                        (__mmask8)(mask), (int)(scale))
8032
8033#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8034  (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8035                                        (void const *)(addr), \
8036                                        (__v4si)(__m128i)(index), \
8037                                        (__mmask8)(mask), (int)(scale))
8038
8039#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8040  (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8041                                        (void const *)(addr), \
8042                                        (__v4si)(__m128i)(index), \
8043                                        (__mmask8)(mask), (int)(scale))
8044
8045#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8046  (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8047                                        (void const *)(addr), \
8048                                        (__v4si)(__m128i)(index), \
8049                                        (__mmask8)(mask), (int)(scale))
8050
8051#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8052  (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8053                                        (void const *)(addr), \
8054                                        (__v4si)(__m128i)(index), \
8055                                        (__mmask8)(mask), (int)(scale))
8056
8057#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8058  (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8059                                       (void const *)(addr), \
8060                                       (__v4si)(__m128i)(index), \
8061                                       (__mmask8)(mask), (int)(scale))
8062
8063#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8064  (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8065                                        (void const *)(addr), \
8066                                        (__v4si)(__m128i)(index), \
8067                                        (__mmask8)(mask), (int)(scale))
8068
8069#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8070  (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8071                                       (void const *)(addr), \
8072                                       (__v8si)(__m256i)(index), \
8073                                       (__mmask8)(mask), (int)(scale))
8074
8075#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8076  (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8077                                        (void const *)(addr), \
8078                                        (__v8si)(__m256i)(index), \
8079                                        (__mmask8)(mask), (int)(scale))
8080
8081#define _mm256_permutex_pd(X, C) \
8082  (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))
8083
8084#define _mm256_mask_permutex_pd(W, U, X, C) \
8085  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8086                                       (__v4df)_mm256_permutex_pd((X), (C)), \
8087                                       (__v4df)(__m256d)(W))
8088
8089#define _mm256_maskz_permutex_pd(U, X, C) \
8090  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8091                                       (__v4df)_mm256_permutex_pd((X), (C)), \
8092                                       (__v4df)_mm256_setzero_pd())
8093
8094#define _mm256_permutex_epi64(X, C) \
8095  (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))
8096
8097#define _mm256_mask_permutex_epi64(W, U, X, C) \
8098  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8099                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
8100                                      (__v4di)(__m256i)(W))
8101
8102#define _mm256_maskz_permutex_epi64(U, X, C) \
8103  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8104                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
8105                                      (__v4di)_mm256_setzero_si256())
8106
8107static __inline__ __m256d __DEFAULT_FN_ATTRS256
8108_mm256_permutexvar_pd (__m256i __X__m256d __Y)
8109{
8110  return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
8111}
8112
8113static __inline__ __m256d __DEFAULT_FN_ATTRS256
8114_mm256_mask_permutexvar_pd (__m256d __W__mmask8 __U__m256i __X,
8115          __m256d __Y)
8116{
8117  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8118                                        (__v4df)_mm256_permutexvar_pd(__X__Y),
8119                                        (__v4df)__W);
8120}
8121
8122static __inline__ __m256d __DEFAULT_FN_ATTRS256
8123_mm256_maskz_permutexvar_pd (__mmask8 __U__m256i __X__m256d __Y)
8124{
8125  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8126                                        (__v4df)_mm256_permutexvar_pd(__X__Y),
8127                                        (__v4df)_mm256_setzero_pd());
8128}
8129
8130static __inline__ __m256i __DEFAULT_FN_ATTRS256
8131_mm256_permutexvar_epi64 ( __m256i __X__m256i __Y)
8132{
8133  return (__m256i)__builtin_ia32_permvardi256((__v4di__Y, (__v4di__X);
8134}
8135
8136static __inline__ __m256i __DEFAULT_FN_ATTRS256
8137_mm256_maskz_permutexvar_epi64 (__mmask8 __M__m256i __X__m256i __Y)
8138{
8139  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8140                                     (__v4di)_mm256_permutexvar_epi64(__X__Y),
8141                                     (__v4di)_mm256_setzero_si256());
8142}
8143
8144static __inline__ __m256i __DEFAULT_FN_ATTRS256
8145_mm256_mask_permutexvar_epi64 (__m256i __W__mmask8 __M__m256i __X,
8146             __m256i __Y)
8147{
8148  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8149                                     (__v4di)_mm256_permutexvar_epi64(__X__Y),
8150                                     (__v4di)__W);
8151}
8152
8153#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
8154
8155static __inline__ __m256 __DEFAULT_FN_ATTRS256
8156_mm256_mask_permutexvar_ps(__m256 __W__mmask8 __U__m256i __X__m256 __Y)
8157{
8158  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8159                                        (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8160                                        (__v8sf)__W);
8161}
8162
8163static __inline__ __m256 __DEFAULT_FN_ATTRS256
8164_mm256_maskz_permutexvar_ps(__mmask8 __U__m256i __X__m256 __Y)
8165{
8166  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8167                                        (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8168                                        (__v8sf)_mm256_setzero_ps());
8169}
8170
8171#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
8172
8173static __inline__ __m256i __DEFAULT_FN_ATTRS256
8174_mm256_mask_permutexvar_epi32(__m256i __W__mmask8 __M__m256i __X,
8175                              __m256i __Y)
8176{
8177  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8178                                     (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8179                                     (__v8si)__W);
8180}
8181
8182static __inline__ __m256i __DEFAULT_FN_ATTRS256
8183_mm256_maskz_permutexvar_epi32(__mmask8 __M__m256i __X__m256i __Y)
8184{
8185  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8186                                     (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8187                                     (__v8si)_mm256_setzero_si256());
8188}
8189
8190#define _mm_alignr_epi32(A, B, imm) \
8191  (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
8192                                    (__v4si)(__m128i)(B), (int)(imm))
8193
8194#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
8195  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8196                                    (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8197                                    (__v4si)(__m128i)(W))
8198
8199#define _mm_maskz_alignr_epi32(U, A, B, imm) \
8200  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8201                                    (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8202                                    (__v4si)_mm_setzero_si128())
8203
8204#define _mm256_alignr_epi32(A, B, imm) \
8205  (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
8206                                    (__v8si)(__m256i)(B), (int)(imm))
8207
8208#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
8209  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8210                                 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8211                                 (__v8si)(__m256i)(W))
8212
8213#define _mm256_maskz_alignr_epi32(U, A, B, imm) \
8214  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8215                                 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8216                                 (__v8si)_mm256_setzero_si256())
8217
8218#define _mm_alignr_epi64(A, B, imm) \
8219  (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8220                                    (__v2di)(__m128i)(B), (int)(imm))
8221
8222#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8223  (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8224                                    (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8225                                    (__v2di)(__m128i)(W))
8226
8227#define _mm_maskz_alignr_epi64(U, A, B, imm) \
8228  (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8229                                    (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8230                                    (__v2di)_mm_setzero_si128())
8231
8232#define _mm256_alignr_epi64(A, B, imm) \
8233  (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8234                                    (__v4di)(__m256i)(B), (int)(imm))
8235
8236#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8237  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8238                                 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8239                                 (__v4di)(__m256i)(W))
8240
8241#define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8242  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8243                                 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8244                                 (__v4di)_mm256_setzero_si256())
8245
8246static __inline__ __m128 __DEFAULT_FN_ATTRS128
8247_mm_mask_movehdup_ps (__m128 __W__mmask8 __U__m128 __A)
8248{
8249  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8250                                             (__v4sf)_mm_movehdup_ps(__A),
8251                                             (__v4sf)__W);
8252}
8253
8254static __inline__ __m128 __DEFAULT_FN_ATTRS128
8255_mm_maskz_movehdup_ps (__mmask8 __U__m128 __A)
8256{
8257  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8258                                             (__v4sf)_mm_movehdup_ps(__A),
8259                                             (__v4sf)_mm_setzero_ps());
8260}
8261
8262static __inline__ __m256 __DEFAULT_FN_ATTRS256
8263_mm256_mask_movehdup_ps (__m256 __W__mmask8 __U__m256 __A)
8264{
8265  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8266                                             (__v8sf)_mm256_movehdup_ps(__A),
8267                                             (__v8sf)__W);
8268}
8269
8270static __inline__ __m256 __DEFAULT_FN_ATTRS256
8271_mm256_maskz_movehdup_ps (__mmask8 __U__m256 __A)
8272{
8273  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8274                                             (__v8sf)_mm256_movehdup_ps(__A),
8275                                             (__v8sf)_mm256_setzero_ps());
8276}
8277
8278static __inline__ __m128 __DEFAULT_FN_ATTRS128
8279_mm_mask_moveldup_ps (__m128 __W__mmask8 __U__m128 __A)
8280{
8281  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8282                                             (__v4sf)_mm_moveldup_ps(__A),
8283                                             (__v4sf)__W);
8284}
8285
8286static __inline__ __m128 __DEFAULT_FN_ATTRS128
8287_mm_maskz_moveldup_ps (__mmask8 __U__m128 __A)
8288{
8289  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8290                                             (__v4sf)_mm_moveldup_ps(__A),
8291                                             (__v4sf)_mm_setzero_ps());
8292}
8293
8294static __inline__ __m256 __DEFAULT_FN_ATTRS256
8295_mm256_mask_moveldup_ps (__m256 __W__mmask8 __U__m256 __A)
8296{
8297  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8298                                             (__v8sf)_mm256_moveldup_ps(__A),
8299                                             (__v8sf)__W);
8300}
8301
8302static __inline__ __m256 __DEFAULT_FN_ATTRS256
8303_mm256_maskz_moveldup_ps (__mmask8 __U__m256 __A)
8304{
8305  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8306                                             (__v8sf)_mm256_moveldup_ps(__A),
8307                                             (__v8sf)_mm256_setzero_ps());
8308}
8309
8310#define _mm256_mask_shuffle_epi32(W, U, A, I) \
8311  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8312                                      (__v8si)_mm256_shuffle_epi32((A), (I)), \
8313                                      (__v8si)(__m256i)(W))
8314
8315#define _mm256_maskz_shuffle_epi32(U, A, I) \
8316  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8317                                      (__v8si)_mm256_shuffle_epi32((A), (I)), \
8318                                      (__v8si)_mm256_setzero_si256())
8319
8320#define _mm_mask_shuffle_epi32(W, U, A, I) \
8321  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8322                                      (__v4si)_mm_shuffle_epi32((A), (I)), \
8323                                      (__v4si)(__m128i)(W))
8324
8325#define _mm_maskz_shuffle_epi32(U, A, I) \
8326  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8327                                      (__v4si)_mm_shuffle_epi32((A), (I)), \
8328                                      (__v4si)_mm_setzero_si128())
8329
8330static __inline__ __m128d __DEFAULT_FN_ATTRS128
8331_mm_mask_mov_pd (__m128d __W__mmask8 __U__m128d __A)
8332{
8333  return (__m128d__builtin_ia32_selectpd_128 ((__mmask8__U,
8334              (__v2df__A,
8335              (__v2df__W);
8336}
8337
8338static __inline__ __m128d __DEFAULT_FN_ATTRS128
8339_mm_maskz_mov_pd (__mmask8 __U__m128d __A)
8340{
8341  return (__m128d__builtin_ia32_selectpd_128 ((__mmask8__U,
8342              (__v2df__A,
8343              (__v2df_mm_setzero_pd ());
8344}
8345
8346static __inline__ __m256d __DEFAULT_FN_ATTRS256
8347_mm256_mask_mov_pd (__m256d __W__mmask8 __U__m256d __A)
8348{
8349  return (__m256d__builtin_ia32_selectpd_256 ((__mmask8__U,
8350              (__v4df__A,
8351              (__v4df__W);
8352}
8353
8354static __inline__ __m256d __DEFAULT_FN_ATTRS256
8355_mm256_maskz_mov_pd (__mmask8 __U__m256d __A)
8356{
8357  return (__m256d__builtin_ia32_selectpd_256 ((__mmask8__U,
8358              (__v4df__A,
8359              (__v4df_mm256_setzero_pd ());
8360}
8361
8362static __inline__ __m128 __DEFAULT_FN_ATTRS128
8363_mm_mask_mov_ps (__m128 __W__mmask8 __U__m128 __A)
8364{
8365  return (__m128__builtin_ia32_selectps_128 ((__mmask8__U,
8366             (__v4sf__A,
8367             (__v4sf__W);
8368}
8369
8370static __inline__ __m128 __DEFAULT_FN_ATTRS128
8371_mm_maskz_mov_ps (__mmask8 __U__m128 __A)
8372{
8373  return (__m128__builtin_ia32_selectps_128 ((__mmask8__U,
8374             (__v4sf__A,
8375             (__v4sf_mm_setzero_ps ());
8376}
8377
8378static __inline__ __m256 __DEFAULT_FN_ATTRS256
8379_mm256_mask_mov_ps (__m256 __W__mmask8 __U__m256 __A)
8380{
8381  return (__m256__builtin_ia32_selectps_256 ((__mmask8__U,
8382             (__v8sf__A,
8383             (__v8sf__W);
8384}
8385
8386static __inline__ __m256 __DEFAULT_FN_ATTRS256
8387_mm256_maskz_mov_ps (__mmask8 __U__m256 __A)
8388{
8389  return (__m256__builtin_ia32_selectps_256 ((__mmask8__U,
8390             (__v8sf__A,
8391             (__v8sf_mm256_setzero_ps ());
8392}
8393
8394static __inline__ __m128 __DEFAULT_FN_ATTRS128
8395_mm_mask_cvtph_ps (__m128 __W__mmask8 __U__m128i __A)
8396{
8397  return (__m128__builtin_ia32_vcvtph2ps_mask ((__v8hi__A,
8398             (__v4sf__W,
8399             (__mmask8__U);
8400}
8401
8402static __inline__ __m128 __DEFAULT_FN_ATTRS128
8403_mm_maskz_cvtph_ps (__mmask8 __U__m128i __A)
8404{
8405  return (__m128__builtin_ia32_vcvtph2ps_mask ((__v8hi__A,
8406             (__v4sf)
8407             _mm_setzero_ps (),
8408             (__mmask8__U);
8409}
8410
8411static __inline__ __m256 __DEFAULT_FN_ATTRS256
8412_mm256_mask_cvtph_ps (__m256 __W__mmask8 __U__m128i __A)
8413{
8414  return (__m256__builtin_ia32_vcvtph2ps256_mask ((__v8hi__A,
8415                (__v8sf__W,
8416                (__mmask8__U);
8417}
8418
8419static __inline__ __m256 __DEFAULT_FN_ATTRS256
8420_mm256_maskz_cvtph_ps (__mmask8 __U__m128i __A)
8421{
8422  return (__m256__builtin_ia32_vcvtph2ps256_mask ((__v8hi__A,
8423                (__v8sf)
8424                _mm256_setzero_ps (),
8425                (__mmask8__U);
8426}
8427
8428static __inline __m128i __DEFAULT_FN_ATTRS128
8429_mm_mask_cvtps_ph (__m128i __W__mmask8 __U__m128 __A)
8430{
8431  return (__m128i__builtin_ia32_vcvtps2ph_mask ((__v4sf__A_MM_FROUND_CUR_DIRECTION,
8432                                                  (__v8hi__W,
8433                                                  (__mmask8__U);
8434}
8435
8436static __inline __m128i __DEFAULT_FN_ATTRS128
8437_mm_maskz_cvtps_ph (__mmask8 __U__m128 __A)
8438{
8439  return (__m128i__builtin_ia32_vcvtps2ph_mask ((__v4sf__A_MM_FROUND_CUR_DIRECTION,
8440                                                  (__v8hi_mm_setzero_si128 (),
8441                                                  (__mmask8__U);
8442}
8443
8444#define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8445  (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8446                                         (__v8hi)(__m128i)(W), \
8447                                         (__mmask8)(U))
8448
8449#define _mm_maskz_cvt_roundps_ph(U, A, I) \
8450  (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8451                                         (__v8hi)_mm_setzero_si128(), \
8452                                         (__mmask8)(U))
8453
8454static __inline __m128i __DEFAULT_FN_ATTRS256
8455_mm256_mask_cvtps_ph (__m128i __W__mmask8 __U__m256 __A)
8456{
8457  return (__m128i__builtin_ia32_vcvtps2ph256_mask ((__v8sf__A_MM_FROUND_CUR_DIRECTION,
8458                                                      (__v8hi__W,
8459                                                      (__mmask8__U);
8460}
8461
8462static __inline __m128i __DEFAULT_FN_ATTRS256
8463_mm256_maskz_cvtps_ph ( __mmask8 __U__m256 __A)
8464{
8465  return (__m128i__builtin_ia32_vcvtps2ph256_mask ((__v8sf__A_MM_FROUND_CUR_DIRECTION,
8466                                                      (__v8hi_mm_setzero_si128(),
8467                                                      (__mmask8__U);
8468}
8469#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8470  (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8471                                            (__v8hi)(__m128i)(W), \
8472                                            (__mmask8)(U))
8473
8474#define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8475  (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8476                                            (__v8hi)_mm_setzero_si128(), \
8477                                            (__mmask8)(U))
8478
8479
8480#undef __DEFAULT_FN_ATTRS128
8481#undef __DEFAULT_FN_ATTRS256
8482
8483#endif /* __AVX512VLINTRIN_H */
8484