Clang Project

clang_source_code/lib/Headers/avx512vldqintrin.h
1/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VLDQINTRIN_H
29#define __AVX512VLDQINTRIN_H
30
31/* Define the default attributes for the functions in this file. */
32#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
33#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
34
35static __inline__ __m256i __DEFAULT_FN_ATTRS256
36_mm256_mullo_epi64 (__m256i __A__m256i __B) {
37  return (__m256i) ((__v4du__A * (__v4du__B);
38}
39
40static __inline__ __m256i __DEFAULT_FN_ATTRS256
41_mm256_mask_mullo_epi64(__m256i __W__mmask8 __U__m256i __A__m256i __B) {
42  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
43                                             (__v4di)_mm256_mullo_epi64(__A__B),
44                                             (__v4di)__W);
45}
46
47static __inline__ __m256i __DEFAULT_FN_ATTRS256
48_mm256_maskz_mullo_epi64(__mmask8 __U__m256i __A__m256i __B) {
49  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
50                                             (__v4di)_mm256_mullo_epi64(__A__B),
51                                             (__v4di)_mm256_setzero_si256());
52}
53
54static __inline__ __m128i __DEFAULT_FN_ATTRS128
55_mm_mullo_epi64 (__m128i __A__m128i __B) {
56  return (__m128i) ((__v2du__A * (__v2du__B);
57}
58
59static __inline__ __m128i __DEFAULT_FN_ATTRS128
60_mm_mask_mullo_epi64(__m128i __W__mmask8 __U__m128i __A__m128i __B) {
61  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
62                                             (__v2di)_mm_mullo_epi64(__A__B),
63                                             (__v2di)__W);
64}
65
66static __inline__ __m128i __DEFAULT_FN_ATTRS128
67_mm_maskz_mullo_epi64(__mmask8 __U__m128i __A__m128i __B) {
68  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
69                                             (__v2di)_mm_mullo_epi64(__A__B),
70                                             (__v2di)_mm_setzero_si128());
71}
72
73static __inline__ __m256d __DEFAULT_FN_ATTRS256
74_mm256_mask_andnot_pd(__m256d __W__mmask8 __U__m256d __A__m256d __B) {
75  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
76                                              (__v4df)_mm256_andnot_pd(__A__B),
77                                              (__v4df)__W);
78}
79
80static __inline__ __m256d __DEFAULT_FN_ATTRS256
81_mm256_maskz_andnot_pd(__mmask8 __U__m256d __A__m256d __B) {
82  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
83                                              (__v4df)_mm256_andnot_pd(__A__B),
84                                              (__v4df)_mm256_setzero_pd());
85}
86
87static __inline__ __m128d __DEFAULT_FN_ATTRS128
88_mm_mask_andnot_pd(__m128d __W__mmask8 __U__m128d __A__m128d __B) {
89  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
90                                              (__v2df)_mm_andnot_pd(__A__B),
91                                              (__v2df)__W);
92}
93
94static __inline__ __m128d __DEFAULT_FN_ATTRS128
95_mm_maskz_andnot_pd(__mmask8 __U__m128d __A__m128d __B) {
96  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
97                                              (__v2df)_mm_andnot_pd(__A__B),
98                                              (__v2df)_mm_setzero_pd());
99}
100
101static __inline__ __m256 __DEFAULT_FN_ATTRS256
102_mm256_mask_andnot_ps(__m256 __W__mmask8 __U__m256 __A__m256 __B) {
103  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
104                                             (__v8sf)_mm256_andnot_ps(__A__B),
105                                             (__v8sf)__W);
106}
107
108static __inline__ __m256 __DEFAULT_FN_ATTRS256
109_mm256_maskz_andnot_ps(__mmask8 __U__m256 __A__m256 __B) {
110  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
111                                             (__v8sf)_mm256_andnot_ps(__A__B),
112                                             (__v8sf)_mm256_setzero_ps());
113}
114
115static __inline__ __m128 __DEFAULT_FN_ATTRS128
116_mm_mask_andnot_ps(__m128 __W__mmask8 __U__m128 __A__m128 __B) {
117  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
118                                             (__v4sf)_mm_andnot_ps(__A__B),
119                                             (__v4sf)__W);
120}
121
122static __inline__ __m128 __DEFAULT_FN_ATTRS128
123_mm_maskz_andnot_ps(__mmask8 __U__m128 __A__m128 __B) {
124  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
125                                             (__v4sf)_mm_andnot_ps(__A__B),
126                                             (__v4sf)_mm_setzero_ps());
127}
128
129static __inline__ __m256d __DEFAULT_FN_ATTRS256
130_mm256_mask_and_pd(__m256d __W__mmask8 __U__m256d __A__m256d __B) {
131  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
132                                              (__v4df)_mm256_and_pd(__A__B),
133                                              (__v4df)__W);
134}
135
136static __inline__ __m256d __DEFAULT_FN_ATTRS256
137_mm256_maskz_and_pd(__mmask8 __U__m256d __A__m256d __B) {
138  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
139                                              (__v4df)_mm256_and_pd(__A__B),
140                                              (__v4df)_mm256_setzero_pd());
141}
142
143static __inline__ __m128d __DEFAULT_FN_ATTRS128
144_mm_mask_and_pd(__m128d __W__mmask8 __U__m128d __A__m128d __B) {
145  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
146                                              (__v2df)_mm_and_pd(__A__B),
147                                              (__v2df)__W);
148}
149
150static __inline__ __m128d __DEFAULT_FN_ATTRS128
151_mm_maskz_and_pd(__mmask8 __U__m128d __A__m128d __B) {
152  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
153                                              (__v2df)_mm_and_pd(__A__B),
154                                              (__v2df)_mm_setzero_pd());
155}
156
157static __inline__ __m256 __DEFAULT_FN_ATTRS256
158_mm256_mask_and_ps(__m256 __W__mmask8 __U__m256 __A__m256 __B) {
159  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
160                                             (__v8sf)_mm256_and_ps(__A__B),
161                                             (__v8sf)__W);
162}
163
164static __inline__ __m256 __DEFAULT_FN_ATTRS256
165_mm256_maskz_and_ps(__mmask8 __U__m256 __A__m256 __B) {
166  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
167                                             (__v8sf)_mm256_and_ps(__A__B),
168                                             (__v8sf)_mm256_setzero_ps());
169}
170
171static __inline__ __m128 __DEFAULT_FN_ATTRS128
172_mm_mask_and_ps(__m128 __W__mmask8 __U__m128 __A__m128 __B) {
173  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
174                                             (__v4sf)_mm_and_ps(__A__B),
175                                             (__v4sf)__W);
176}
177
178static __inline__ __m128 __DEFAULT_FN_ATTRS128
179_mm_maskz_and_ps(__mmask8 __U__m128 __A__m128 __B) {
180  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
181                                             (__v4sf)_mm_and_ps(__A__B),
182                                             (__v4sf)_mm_setzero_ps());
183}
184
185static __inline__ __m256d __DEFAULT_FN_ATTRS256
186_mm256_mask_xor_pd(__m256d __W__mmask8 __U__m256d __A__m256d __B) {
187  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
188                                              (__v4df)_mm256_xor_pd(__A__B),
189                                              (__v4df)__W);
190}
191
192static __inline__ __m256d __DEFAULT_FN_ATTRS256
193_mm256_maskz_xor_pd(__mmask8 __U__m256d __A__m256d __B) {
194  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
195                                              (__v4df)_mm256_xor_pd(__A__B),
196                                              (__v4df)_mm256_setzero_pd());
197}
198
199static __inline__ __m128d __DEFAULT_FN_ATTRS128
200_mm_mask_xor_pd(__m128d __W__mmask8 __U__m128d __A__m128d __B) {
201  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
202                                              (__v2df)_mm_xor_pd(__A__B),
203                                              (__v2df)__W);
204}
205
206static __inline__ __m128d __DEFAULT_FN_ATTRS128
207_mm_maskz_xor_pd (__mmask8 __U__m128d __A__m128d __B) {
208  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
209                                              (__v2df)_mm_xor_pd(__A__B),
210                                              (__v2df)_mm_setzero_pd());
211}
212
213static __inline__ __m256 __DEFAULT_FN_ATTRS256
214_mm256_mask_xor_ps(__m256 __W__mmask8 __U__m256 __A__m256 __B) {
215  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
216                                             (__v8sf)_mm256_xor_ps(__A__B),
217                                             (__v8sf)__W);
218}
219
220static __inline__ __m256 __DEFAULT_FN_ATTRS256
221_mm256_maskz_xor_ps(__mmask8 __U__m256 __A__m256 __B) {
222  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
223                                             (__v8sf)_mm256_xor_ps(__A__B),
224                                             (__v8sf)_mm256_setzero_ps());
225}
226
227static __inline__ __m128 __DEFAULT_FN_ATTRS128
228_mm_mask_xor_ps(__m128 __W__mmask8 __U__m128 __A__m128 __B) {
229  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
230                                             (__v4sf)_mm_xor_ps(__A__B),
231                                             (__v4sf)__W);
232}
233
234static __inline__ __m128 __DEFAULT_FN_ATTRS128
235_mm_maskz_xor_ps(__mmask8 __U__m128 __A__m128 __B) {
236  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
237                                             (__v4sf)_mm_xor_ps(__A__B),
238                                             (__v4sf)_mm_setzero_ps());
239}
240
241static __inline__ __m256d __DEFAULT_FN_ATTRS256
242_mm256_mask_or_pd(__m256d __W__mmask8 __U__m256d __A__m256d __B) {
243  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
244                                              (__v4df)_mm256_or_pd(__A__B),
245                                              (__v4df)__W);
246}
247
248static __inline__ __m256d __DEFAULT_FN_ATTRS256
249_mm256_maskz_or_pd(__mmask8 __U__m256d __A__m256d __B) {
250  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
251                                              (__v4df)_mm256_or_pd(__A__B),
252                                              (__v4df)_mm256_setzero_pd());
253}
254
255static __inline__ __m128d __DEFAULT_FN_ATTRS128
256_mm_mask_or_pd(__m128d __W__mmask8 __U__m128d __A__m128d __B) {
257  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
258                                              (__v2df)_mm_or_pd(__A__B),
259                                              (__v2df)__W);
260}
261
262static __inline__ __m128d __DEFAULT_FN_ATTRS128
263_mm_maskz_or_pd(__mmask8 __U__m128d __A__m128d __B) {
264  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
265                                              (__v2df)_mm_or_pd(__A__B),
266                                              (__v2df)_mm_setzero_pd());
267}
268
269static __inline__ __m256 __DEFAULT_FN_ATTRS256
270_mm256_mask_or_ps(__m256 __W__mmask8 __U__m256 __A__m256 __B) {
271  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
272                                             (__v8sf)_mm256_or_ps(__A__B),
273                                             (__v8sf)__W);
274}
275
276static __inline__ __m256 __DEFAULT_FN_ATTRS256
277_mm256_maskz_or_ps(__mmask8 __U__m256 __A__m256 __B) {
278  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
279                                             (__v8sf)_mm256_or_ps(__A__B),
280                                             (__v8sf)_mm256_setzero_ps());
281}
282
283static __inline__ __m128 __DEFAULT_FN_ATTRS128
284_mm_mask_or_ps(__m128 __W__mmask8 __U__m128 __A__m128 __B) {
285  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
286                                             (__v4sf)_mm_or_ps(__A__B),
287                                             (__v4sf)__W);
288}
289
290static __inline__ __m128 __DEFAULT_FN_ATTRS128
291_mm_maskz_or_ps(__mmask8 __U__m128 __A__m128 __B) {
292  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
293                                             (__v4sf)_mm_or_ps(__A__B),
294                                             (__v4sf)_mm_setzero_ps());
295}
296
297static __inline__ __m128i __DEFAULT_FN_ATTRS128
298_mm_cvtpd_epi64 (__m128d __A) {
299  return (__m128i__builtin_ia32_cvtpd2qq128_mask ((__v2df__A,
300                (__v2di_mm_setzero_si128(),
301                (__mmask8) -1);
302}
303
304static __inline__ __m128i __DEFAULT_FN_ATTRS128
305_mm_mask_cvtpd_epi64 (__m128i __W__mmask8 __U__m128d __A) {
306  return (__m128i__builtin_ia32_cvtpd2qq128_mask ((__v2df__A,
307                (__v2di__W,
308                (__mmask8__U);
309}
310
311static __inline__ __m128i __DEFAULT_FN_ATTRS128
312_mm_maskz_cvtpd_epi64 (__mmask8 __U__m128d __A) {
313  return (__m128i__builtin_ia32_cvtpd2qq128_mask ((__v2df__A,
314                (__v2di_mm_setzero_si128(),
315                (__mmask8__U);
316}
317
318static __inline__ __m256i __DEFAULT_FN_ATTRS256
319_mm256_cvtpd_epi64 (__m256d __A) {
320  return (__m256i__builtin_ia32_cvtpd2qq256_mask ((__v4df__A,
321                (__v4di_mm256_setzero_si256(),
322                (__mmask8) -1);
323}
324
325static __inline__ __m256i __DEFAULT_FN_ATTRS256
326_mm256_mask_cvtpd_epi64 (__m256i __W__mmask8 __U__m256d __A) {
327  return (__m256i__builtin_ia32_cvtpd2qq256_mask ((__v4df__A,
328                (__v4di__W,
329                (__mmask8__U);
330}
331
332static __inline__ __m256i __DEFAULT_FN_ATTRS256
333_mm256_maskz_cvtpd_epi64 (__mmask8 __U__m256d __A) {
334  return (__m256i__builtin_ia32_cvtpd2qq256_mask ((__v4df__A,
335                (__v4di_mm256_setzero_si256(),
336                (__mmask8__U);
337}
338
339static __inline__ __m128i __DEFAULT_FN_ATTRS128
340_mm_cvtpd_epu64 (__m128d __A) {
341  return (__m128i__builtin_ia32_cvtpd2uqq128_mask ((__v2df__A,
342                (__v2di_mm_setzero_si128(),
343                (__mmask8) -1);
344}
345
346static __inline__ __m128i __DEFAULT_FN_ATTRS128
347_mm_mask_cvtpd_epu64 (__m128i __W__mmask8 __U__m128d __A) {
348  return (__m128i__builtin_ia32_cvtpd2uqq128_mask ((__v2df__A,
349                (__v2di__W,
350                (__mmask8__U);
351}
352
353static __inline__ __m128i __DEFAULT_FN_ATTRS128
354_mm_maskz_cvtpd_epu64 (__mmask8 __U__m128d __A) {
355  return (__m128i__builtin_ia32_cvtpd2uqq128_mask ((__v2df__A,
356                (__v2di_mm_setzero_si128(),
357                (__mmask8__U);
358}
359
360static __inline__ __m256i __DEFAULT_FN_ATTRS256
361_mm256_cvtpd_epu64 (__m256d __A) {
362  return (__m256i__builtin_ia32_cvtpd2uqq256_mask ((__v4df__A,
363                (__v4di_mm256_setzero_si256(),
364                (__mmask8) -1);
365}
366
367static __inline__ __m256i __DEFAULT_FN_ATTRS256
368_mm256_mask_cvtpd_epu64 (__m256i __W__mmask8 __U__m256d __A) {
369  return (__m256i__builtin_ia32_cvtpd2uqq256_mask ((__v4df__A,
370                (__v4di__W,
371                (__mmask8__U);
372}
373
374static __inline__ __m256i __DEFAULT_FN_ATTRS256
375_mm256_maskz_cvtpd_epu64 (__mmask8 __U__m256d __A) {
376  return (__m256i__builtin_ia32_cvtpd2uqq256_mask ((__v4df__A,
377                (__v4di_mm256_setzero_si256(),
378                (__mmask8__U);
379}
380
381static __inline__ __m128i __DEFAULT_FN_ATTRS128
382_mm_cvtps_epi64 (__m128 __A) {
383  return (__m128i__builtin_ia32_cvtps2qq128_mask ((__v4sf__A,
384                (__v2di_mm_setzero_si128(),
385                (__mmask8) -1);
386}
387
388static __inline__ __m128i __DEFAULT_FN_ATTRS128
389_mm_mask_cvtps_epi64 (__m128i __W__mmask8 __U__m128 __A) {
390  return (__m128i__builtin_ia32_cvtps2qq128_mask ((__v4sf__A,
391                (__v2di__W,
392                (__mmask8__U);
393}
394
395static __inline__ __m128i __DEFAULT_FN_ATTRS128
396_mm_maskz_cvtps_epi64 (__mmask8 __U__m128 __A) {
397  return (__m128i__builtin_ia32_cvtps2qq128_mask ((__v4sf__A,
398                (__v2di_mm_setzero_si128(),
399                (__mmask8__U);
400}
401
402static __inline__ __m256i __DEFAULT_FN_ATTRS256
403_mm256_cvtps_epi64 (__m128 __A) {
404  return (__m256i__builtin_ia32_cvtps2qq256_mask ((__v4sf__A,
405                (__v4di_mm256_setzero_si256(),
406                (__mmask8) -1);
407}
408
409static __inline__ __m256i __DEFAULT_FN_ATTRS256
410_mm256_mask_cvtps_epi64 (__m256i __W__mmask8 __U__m128 __A) {
411  return (__m256i__builtin_ia32_cvtps2qq256_mask ((__v4sf__A,
412                (__v4di__W,
413                (__mmask8__U);
414}
415
416static __inline__ __m256i __DEFAULT_FN_ATTRS256
417_mm256_maskz_cvtps_epi64 (__mmask8 __U__m128 __A) {
418  return (__m256i__builtin_ia32_cvtps2qq256_mask ((__v4sf__A,
419                (__v4di_mm256_setzero_si256(),
420                (__mmask8__U);
421}
422
423static __inline__ __m128i __DEFAULT_FN_ATTRS128
424_mm_cvtps_epu64 (__m128 __A) {
425  return (__m128i__builtin_ia32_cvtps2uqq128_mask ((__v4sf__A,
426                (__v2di_mm_setzero_si128(),
427                (__mmask8) -1);
428}
429
430static __inline__ __m128i __DEFAULT_FN_ATTRS128
431_mm_mask_cvtps_epu64 (__m128i __W__mmask8 __U__m128 __A) {
432  return (__m128i__builtin_ia32_cvtps2uqq128_mask ((__v4sf__A,
433                (__v2di__W,
434                (__mmask8__U);
435}
436
437static __inline__ __m128i __DEFAULT_FN_ATTRS128
438_mm_maskz_cvtps_epu64 (__mmask8 __U__m128 __A) {
439  return (__m128i__builtin_ia32_cvtps2uqq128_mask ((__v4sf__A,
440                (__v2di_mm_setzero_si128(),
441                (__mmask8__U);
442}
443
444static __inline__ __m256i __DEFAULT_FN_ATTRS256
445_mm256_cvtps_epu64 (__m128 __A) {
446  return (__m256i__builtin_ia32_cvtps2uqq256_mask ((__v4sf__A,
447                (__v4di_mm256_setzero_si256(),
448                (__mmask8) -1);
449}
450
451static __inline__ __m256i __DEFAULT_FN_ATTRS256
452_mm256_mask_cvtps_epu64 (__m256i __W__mmask8 __U__m128 __A) {
453  return (__m256i__builtin_ia32_cvtps2uqq256_mask ((__v4sf__A,
454                (__v4di__W,
455                (__mmask8__U);
456}
457
458static __inline__ __m256i __DEFAULT_FN_ATTRS256
459_mm256_maskz_cvtps_epu64 (__mmask8 __U__m128 __A) {
460  return (__m256i__builtin_ia32_cvtps2uqq256_mask ((__v4sf__A,
461                (__v4di_mm256_setzero_si256(),
462                (__mmask8__U);
463}
464
465static __inline__ __m128d __DEFAULT_FN_ATTRS128
466_mm_cvtepi64_pd (__m128i __A) {
467  return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
468}
469
470static __inline__ __m128d __DEFAULT_FN_ATTRS128
471_mm_mask_cvtepi64_pd (__m128d __W__mmask8 __U__m128i __A) {
472  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
473                                              (__v2df)_mm_cvtepi64_pd(__A),
474                                              (__v2df)__W);
475}
476
477static __inline__ __m128d __DEFAULT_FN_ATTRS128
478_mm_maskz_cvtepi64_pd (__mmask8 __U__m128i __A) {
479  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
480                                              (__v2df)_mm_cvtepi64_pd(__A),
481                                              (__v2df)_mm_setzero_pd());
482}
483
484static __inline__ __m256d __DEFAULT_FN_ATTRS256
485_mm256_cvtepi64_pd (__m256i __A) {
486  return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
487}
488
489static __inline__ __m256d __DEFAULT_FN_ATTRS256
490_mm256_mask_cvtepi64_pd (__m256d __W__mmask8 __U__m256i __A) {
491  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
492                                              (__v4df)_mm256_cvtepi64_pd(__A),
493                                              (__v4df)__W);
494}
495
496static __inline__ __m256d __DEFAULT_FN_ATTRS256
497_mm256_maskz_cvtepi64_pd (__mmask8 __U__m256i __A) {
498  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
499                                              (__v4df)_mm256_cvtepi64_pd(__A),
500                                              (__v4df)_mm256_setzero_pd());
501}
502
503static __inline__ __m128 __DEFAULT_FN_ATTRS128
504_mm_cvtepi64_ps (__m128i __A) {
505  return (__m128__builtin_ia32_cvtqq2ps128_mask ((__v2di__A,
506                (__v4sf_mm_setzero_ps(),
507                (__mmask8) -1);
508}
509
510static __inline__ __m128 __DEFAULT_FN_ATTRS128
511_mm_mask_cvtepi64_ps (__m128 __W__mmask8 __U__m128i __A) {
512  return (__m128__builtin_ia32_cvtqq2ps128_mask ((__v2di__A,
513                (__v4sf__W,
514                (__mmask8__U);
515}
516
517static __inline__ __m128 __DEFAULT_FN_ATTRS128
518_mm_maskz_cvtepi64_ps (__mmask8 __U__m128i __A) {
519  return (__m128__builtin_ia32_cvtqq2ps128_mask ((__v2di__A,
520                (__v4sf_mm_setzero_ps(),
521                (__mmask8__U);
522}
523
524static __inline__ __m128 __DEFAULT_FN_ATTRS256
525_mm256_cvtepi64_ps (__m256i __A) {
526  return (__m128)__builtin_convertvector((__v4di)__A, __v4sf);
527}
528
529static __inline__ __m128 __DEFAULT_FN_ATTRS256
530_mm256_mask_cvtepi64_ps (__m128 __W__mmask8 __U__m256i __A) {
531  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
532                                             (__v4sf)_mm256_cvtepi64_ps(__A),
533                                             (__v4sf)__W);
534}
535
536static __inline__ __m128 __DEFAULT_FN_ATTRS256
537_mm256_maskz_cvtepi64_ps (__mmask8 __U__m256i __A) {
538  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
539                                             (__v4sf)_mm256_cvtepi64_ps(__A),
540                                             (__v4sf)_mm_setzero_ps());
541}
542
543static __inline__ __m128i __DEFAULT_FN_ATTRS128
544_mm_cvttpd_epi64 (__m128d __A) {
545  return (__m128i__builtin_ia32_cvttpd2qq128_mask ((__v2df__A,
546                (__v2di_mm_setzero_si128(),
547                (__mmask8) -1);
548}
549
550static __inline__ __m128i __DEFAULT_FN_ATTRS128
551_mm_mask_cvttpd_epi64 (__m128i __W__mmask8 __U__m128d __A) {
552  return (__m128i__builtin_ia32_cvttpd2qq128_mask ((__v2df__A,
553                (__v2di__W,
554                (__mmask8__U);
555}
556
557static __inline__ __m128i __DEFAULT_FN_ATTRS128
558_mm_maskz_cvttpd_epi64 (__mmask8 __U__m128d __A) {
559  return (__m128i__builtin_ia32_cvttpd2qq128_mask ((__v2df__A,
560                (__v2di_mm_setzero_si128(),
561                (__mmask8__U);
562}
563
564static __inline__ __m256i __DEFAULT_FN_ATTRS256
565_mm256_cvttpd_epi64 (__m256d __A) {
566  return (__m256i__builtin_ia32_cvttpd2qq256_mask ((__v4df__A,
567                (__v4di_mm256_setzero_si256(),
568                (__mmask8) -1);
569}
570
571static __inline__ __m256i __DEFAULT_FN_ATTRS256
572_mm256_mask_cvttpd_epi64 (__m256i __W__mmask8 __U__m256d __A) {
573  return (__m256i__builtin_ia32_cvttpd2qq256_mask ((__v4df__A,
574                (__v4di__W,
575                (__mmask8__U);
576}
577
578static __inline__ __m256i __DEFAULT_FN_ATTRS256
579_mm256_maskz_cvttpd_epi64 (__mmask8 __U__m256d __A) {
580  return (__m256i__builtin_ia32_cvttpd2qq256_mask ((__v4df__A,
581                (__v4di_mm256_setzero_si256(),
582                (__mmask8__U);
583}
584
585static __inline__ __m128i __DEFAULT_FN_ATTRS128
586_mm_cvttpd_epu64 (__m128d __A) {
587  return (__m128i__builtin_ia32_cvttpd2uqq128_mask ((__v2df__A,
588                (__v2di_mm_setzero_si128(),
589                (__mmask8) -1);
590}
591
592static __inline__ __m128i __DEFAULT_FN_ATTRS128
593_mm_mask_cvttpd_epu64 (__m128i __W__mmask8 __U__m128d __A) {
594  return (__m128i__builtin_ia32_cvttpd2uqq128_mask ((__v2df__A,
595                (__v2di__W,
596                (__mmask8__U);
597}
598
599static __inline__ __m128i __DEFAULT_FN_ATTRS128
600_mm_maskz_cvttpd_epu64 (__mmask8 __U__m128d __A) {
601  return (__m128i__builtin_ia32_cvttpd2uqq128_mask ((__v2df__A,
602                (__v2di_mm_setzero_si128(),
603                (__mmask8__U);
604}
605
606static __inline__ __m256i __DEFAULT_FN_ATTRS256
607_mm256_cvttpd_epu64 (__m256d __A) {
608  return (__m256i__builtin_ia32_cvttpd2uqq256_mask ((__v4df__A,
609                (__v4di_mm256_setzero_si256(),
610                (__mmask8) -1);
611}
612
613static __inline__ __m256i __DEFAULT_FN_ATTRS256
614_mm256_mask_cvttpd_epu64 (__m256i __W__mmask8 __U__m256d __A) {
615  return (__m256i__builtin_ia32_cvttpd2uqq256_mask ((__v4df__A,
616                (__v4di__W,
617                (__mmask8__U);
618}
619
620static __inline__ __m256i __DEFAULT_FN_ATTRS256
621_mm256_maskz_cvttpd_epu64 (__mmask8 __U__m256d __A) {
622  return (__m256i__builtin_ia32_cvttpd2uqq256_mask ((__v4df__A,
623                (__v4di_mm256_setzero_si256(),
624                (__mmask8__U);
625}
626
627static __inline__ __m128i __DEFAULT_FN_ATTRS128
628_mm_cvttps_epi64 (__m128 __A) {
629  return (__m128i__builtin_ia32_cvttps2qq128_mask ((__v4sf__A,
630                (__v2di_mm_setzero_si128(),
631                (__mmask8) -1);
632}
633
634static __inline__ __m128i __DEFAULT_FN_ATTRS128
635_mm_mask_cvttps_epi64 (__m128i __W__mmask8 __U__m128 __A) {
636  return (__m128i__builtin_ia32_cvttps2qq128_mask ((__v4sf__A,
637                (__v2di__W,
638                (__mmask8__U);
639}
640
641static __inline__ __m128i __DEFAULT_FN_ATTRS128
642_mm_maskz_cvttps_epi64 (__mmask8 __U__m128 __A) {
643  return (__m128i__builtin_ia32_cvttps2qq128_mask ((__v4sf__A,
644                (__v2di_mm_setzero_si128(),
645                (__mmask8__U);
646}
647
648static __inline__ __m256i __DEFAULT_FN_ATTRS256
649_mm256_cvttps_epi64 (__m128 __A) {
650  return (__m256i__builtin_ia32_cvttps2qq256_mask ((__v4sf__A,
651                (__v4di_mm256_setzero_si256(),
652                (__mmask8) -1);
653}
654
655static __inline__ __m256i __DEFAULT_FN_ATTRS256
656_mm256_mask_cvttps_epi64 (__m256i __W__mmask8 __U__m128 __A) {
657  return (__m256i__builtin_ia32_cvttps2qq256_mask ((__v4sf__A,
658                (__v4di__W,
659                (__mmask8__U);
660}
661
662static __inline__ __m256i __DEFAULT_FN_ATTRS256
663_mm256_maskz_cvttps_epi64 (__mmask8 __U__m128 __A) {
664  return (__m256i__builtin_ia32_cvttps2qq256_mask ((__v4sf__A,
665                (__v4di_mm256_setzero_si256(),
666                (__mmask8__U);
667}
668
669static __inline__ __m128i __DEFAULT_FN_ATTRS128
670_mm_cvttps_epu64 (__m128 __A) {
671  return (__m128i__builtin_ia32_cvttps2uqq128_mask ((__v4sf__A,
672                (__v2di_mm_setzero_si128(),
673                (__mmask8) -1);
674}
675
676static __inline__ __m128i __DEFAULT_FN_ATTRS128
677_mm_mask_cvttps_epu64 (__m128i __W__mmask8 __U__m128 __A) {
678  return (__m128i__builtin_ia32_cvttps2uqq128_mask ((__v4sf__A,
679                (__v2di__W,
680                (__mmask8__U);
681}
682
683static __inline__ __m128i __DEFAULT_FN_ATTRS128
684_mm_maskz_cvttps_epu64 (__mmask8 __U__m128 __A) {
685  return (__m128i__builtin_ia32_cvttps2uqq128_mask ((__v4sf__A,
686                (__v2di_mm_setzero_si128(),
687                (__mmask8__U);
688}
689
690static __inline__ __m256i __DEFAULT_FN_ATTRS256
691_mm256_cvttps_epu64 (__m128 __A) {
692  return (__m256i__builtin_ia32_cvttps2uqq256_mask ((__v4sf__A,
693                (__v4di_mm256_setzero_si256(),
694                (__mmask8) -1);
695}
696
697static __inline__ __m256i __DEFAULT_FN_ATTRS256
698_mm256_mask_cvttps_epu64 (__m256i __W__mmask8 __U__m128 __A) {
699  return (__m256i__builtin_ia32_cvttps2uqq256_mask ((__v4sf__A,
700                (__v4di__W,
701                (__mmask8__U);
702}
703
704static __inline__ __m256i __DEFAULT_FN_ATTRS256
705_mm256_maskz_cvttps_epu64 (__mmask8 __U__m128 __A) {
706  return (__m256i__builtin_ia32_cvttps2uqq256_mask ((__v4sf__A,
707                (__v4di_mm256_setzero_si256(),
708                (__mmask8__U);
709}
710
711static __inline__ __m128d __DEFAULT_FN_ATTRS128
712_mm_cvtepu64_pd (__m128i __A) {
713  return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
714}
715
716static __inline__ __m128d __DEFAULT_FN_ATTRS128
717_mm_mask_cvtepu64_pd (__m128d __W__mmask8 __U__m128i __A) {
718  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
719                                              (__v2df)_mm_cvtepu64_pd(__A),
720                                              (__v2df)__W);
721}
722
723static __inline__ __m128d __DEFAULT_FN_ATTRS128
724_mm_maskz_cvtepu64_pd (__mmask8 __U__m128i __A) {
725  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
726                                              (__v2df)_mm_cvtepu64_pd(__A),
727                                              (__v2df)_mm_setzero_pd());
728}
729
730static __inline__ __m256d __DEFAULT_FN_ATTRS256
731_mm256_cvtepu64_pd (__m256i __A) {
732  return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
733}
734
735static __inline__ __m256d __DEFAULT_FN_ATTRS256
736_mm256_mask_cvtepu64_pd (__m256d __W__mmask8 __U__m256i __A) {
737  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
738                                              (__v4df)_mm256_cvtepu64_pd(__A),
739                                              (__v4df)__W);
740}
741
742static __inline__ __m256d __DEFAULT_FN_ATTRS256
743_mm256_maskz_cvtepu64_pd (__mmask8 __U__m256i __A) {
744  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
745                                              (__v4df)_mm256_cvtepu64_pd(__A),
746                                              (__v4df)_mm256_setzero_pd());
747}
748
749static __inline__ __m128 __DEFAULT_FN_ATTRS128
750_mm_cvtepu64_ps (__m128i __A) {
751  return (__m128__builtin_ia32_cvtuqq2ps128_mask ((__v2di__A,
752                (__v4sf_mm_setzero_ps(),
753                (__mmask8) -1);
754}
755
756static __inline__ __m128 __DEFAULT_FN_ATTRS128
757_mm_mask_cvtepu64_ps (__m128 __W__mmask8 __U__m128i __A) {
758  return (__m128__builtin_ia32_cvtuqq2ps128_mask ((__v2di__A,
759                (__v4sf__W,
760                (__mmask8__U);
761}
762
763static __inline__ __m128 __DEFAULT_FN_ATTRS128
764_mm_maskz_cvtepu64_ps (__mmask8 __U__m128i __A) {
765  return (__m128__builtin_ia32_cvtuqq2ps128_mask ((__v2di__A,
766                (__v4sf_mm_setzero_ps(),
767                (__mmask8__U);
768}
769
770static __inline__ __m128 __DEFAULT_FN_ATTRS256
771_mm256_cvtepu64_ps (__m256i __A) {
772  return (__m128)__builtin_convertvector((__v4du)__A, __v4sf);
773}
774
775static __inline__ __m128 __DEFAULT_FN_ATTRS256
776_mm256_mask_cvtepu64_ps (__m128 __W__mmask8 __U__m256i __A) {
777  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
778                                             (__v4sf)_mm256_cvtepu64_ps(__A),
779                                             (__v4sf)__W);
780}
781
782static __inline__ __m128 __DEFAULT_FN_ATTRS256
783_mm256_maskz_cvtepu64_ps (__mmask8 __U__m256i __A) {
784  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
785                                             (__v4sf)_mm256_cvtepu64_ps(__A),
786                                             (__v4sf)_mm_setzero_ps());
787}
788
789#define _mm_range_pd(A, B, C) \
790  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
791                                          (__v2df)(__m128d)(B), (int)(C), \
792                                          (__v2df)_mm_setzero_pd(), \
793                                          (__mmask8)-1)
794
795#define _mm_mask_range_pd(W, U, A, B, C) \
796  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
797                                          (__v2df)(__m128d)(B), (int)(C), \
798                                          (__v2df)(__m128d)(W), \
799                                          (__mmask8)(U))
800
801#define _mm_maskz_range_pd(U, A, B, C) \
802  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
803                                          (__v2df)(__m128d)(B), (int)(C), \
804                                          (__v2df)_mm_setzero_pd(), \
805                                          (__mmask8)(U))
806
807#define _mm256_range_pd(A, B, C) \
808  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
809                                          (__v4df)(__m256d)(B), (int)(C), \
810                                          (__v4df)_mm256_setzero_pd(), \
811                                          (__mmask8)-1)
812
813#define _mm256_mask_range_pd(W, U, A, B, C) \
814  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
815                                          (__v4df)(__m256d)(B), (int)(C), \
816                                          (__v4df)(__m256d)(W), \
817                                          (__mmask8)(U))
818
819#define _mm256_maskz_range_pd(U, A, B, C) \
820  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
821                                          (__v4df)(__m256d)(B), (int)(C), \
822                                          (__v4df)_mm256_setzero_pd(), \
823                                          (__mmask8)(U))
824
825#define _mm_range_ps(A, B, C) \
826  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
827                                         (__v4sf)(__m128)(B), (int)(C), \
828                                         (__v4sf)_mm_setzero_ps(), \
829                                         (__mmask8)-1)
830
831#define _mm_mask_range_ps(W, U, A, B, C) \
832  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
833                                         (__v4sf)(__m128)(B), (int)(C), \
834                                         (__v4sf)(__m128)(W), (__mmask8)(U))
835
836#define _mm_maskz_range_ps(U, A, B, C) \
837  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
838                                         (__v4sf)(__m128)(B), (int)(C), \
839                                         (__v4sf)_mm_setzero_ps(), \
840                                         (__mmask8)(U))
841
842#define _mm256_range_ps(A, B, C) \
843  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
844                                         (__v8sf)(__m256)(B), (int)(C), \
845                                         (__v8sf)_mm256_setzero_ps(), \
846                                         (__mmask8)-1)
847
848#define _mm256_mask_range_ps(W, U, A, B, C) \
849  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
850                                         (__v8sf)(__m256)(B), (int)(C), \
851                                         (__v8sf)(__m256)(W), (__mmask8)(U))
852
853#define _mm256_maskz_range_ps(U, A, B, C) \
854  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
855                                         (__v8sf)(__m256)(B), (int)(C), \
856                                         (__v8sf)_mm256_setzero_ps(), \
857                                         (__mmask8)(U))
858
859#define _mm_reduce_pd(A, B) \
860  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
861                                           (__v2df)_mm_setzero_pd(), \
862                                           (__mmask8)-1)
863
864#define _mm_mask_reduce_pd(W, U, A, B) \
865  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
866                                           (__v2df)(__m128d)(W), \
867                                           (__mmask8)(U))
868
869#define _mm_maskz_reduce_pd(U, A, B) \
870  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
871                                           (__v2df)_mm_setzero_pd(), \
872                                           (__mmask8)(U))
873
874#define _mm256_reduce_pd(A, B) \
875  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
876                                           (__v4df)_mm256_setzero_pd(), \
877                                           (__mmask8)-1)
878
879#define _mm256_mask_reduce_pd(W, U, A, B) \
880  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
881                                           (__v4df)(__m256d)(W), \
882                                           (__mmask8)(U))
883
884#define _mm256_maskz_reduce_pd(U, A, B) \
885  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
886                                           (__v4df)_mm256_setzero_pd(), \
887                                           (__mmask8)(U))
888
889#define _mm_reduce_ps(A, B) \
890  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
891                                          (__v4sf)_mm_setzero_ps(), \
892                                          (__mmask8)-1)
893
894#define _mm_mask_reduce_ps(W, U, A, B) \
895  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
896                                          (__v4sf)(__m128)(W), \
897                                          (__mmask8)(U))
898
899#define _mm_maskz_reduce_ps(U, A, B) \
900  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
901                                          (__v4sf)_mm_setzero_ps(), \
902                                          (__mmask8)(U))
903
904#define _mm256_reduce_ps(A, B) \
905  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
906                                          (__v8sf)_mm256_setzero_ps(), \
907                                          (__mmask8)-1)
908
909#define _mm256_mask_reduce_ps(W, U, A, B) \
910  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
911                                          (__v8sf)(__m256)(W), \
912                                          (__mmask8)(U))
913
914#define _mm256_maskz_reduce_ps(U, A, B) \
915  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
916                                          (__v8sf)_mm256_setzero_ps(), \
917                                          (__mmask8)(U))
918
919static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
920_mm_movepi32_mask (__m128i __A)
921{
922  return (__mmask8__builtin_ia32_cvtd2mask128 ((__v4si__A);
923}
924
925static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
926_mm256_movepi32_mask (__m256i __A)
927{
928  return (__mmask8__builtin_ia32_cvtd2mask256 ((__v8si__A);
929}
930
931static __inline__ __m128i __DEFAULT_FN_ATTRS128
932_mm_movm_epi32 (__mmask8 __A)
933{
934  return (__m128i__builtin_ia32_cvtmask2d128 (__A);
935}
936
937static __inline__ __m256i __DEFAULT_FN_ATTRS256
938_mm256_movm_epi32 (__mmask8 __A)
939{
940  return (__m256i__builtin_ia32_cvtmask2d256 (__A);
941}
942
943static __inline__ __m128i __DEFAULT_FN_ATTRS128
944_mm_movm_epi64 (__mmask8 __A)
945{
946  return (__m128i__builtin_ia32_cvtmask2q128 (__A);
947}
948
949static __inline__ __m256i __DEFAULT_FN_ATTRS256
950_mm256_movm_epi64 (__mmask8 __A)
951{
952  return (__m256i__builtin_ia32_cvtmask2q256 (__A);
953}
954
955static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
956_mm_movepi64_mask (__m128i __A)
957{
958  return (__mmask8__builtin_ia32_cvtq2mask128 ((__v2di__A);
959}
960
961static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
962_mm256_movepi64_mask (__m256i __A)
963{
964  return (__mmask8__builtin_ia32_cvtq2mask256 ((__v4di__A);
965}
966
967static __inline__ __m256 __DEFAULT_FN_ATTRS256
968_mm256_broadcast_f32x2 (__m128 __A)
969{
970  return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
971                                         01010101);
972}
973
974static __inline__ __m256 __DEFAULT_FN_ATTRS256
975_mm256_mask_broadcast_f32x2 (__m256 __O__mmask8 __M__m128 __A)
976{
977  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
978                                             (__v8sf)_mm256_broadcast_f32x2(__A),
979                                             (__v8sf)__O);
980}
981
982static __inline__ __m256 __DEFAULT_FN_ATTRS256
983_mm256_maskz_broadcast_f32x2 (__mmask8 __M__m128 __A)
984{
985  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
986                                             (__v8sf)_mm256_broadcast_f32x2(__A),
987                                             (__v8sf)_mm256_setzero_ps());
988}
989
990static __inline__ __m256d __DEFAULT_FN_ATTRS256
991_mm256_broadcast_f64x2(__m128d __A)
992{
993  return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
994                                          0101);
995}
996
997static __inline__ __m256d __DEFAULT_FN_ATTRS256
998_mm256_mask_broadcast_f64x2(__m256d __O__mmask8 __M__m128d __A)
999{
1000  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
1001                                            (__v4df)_mm256_broadcast_f64x2(__A),
1002                                            (__v4df)__O);
1003}
1004
1005static __inline__ __m256d __DEFAULT_FN_ATTRS256
1006_mm256_maskz_broadcast_f64x2 (__mmask8 __M__m128d __A)
1007{
1008  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
1009                                            (__v4df)_mm256_broadcast_f64x2(__A),
1010                                            (__v4df)_mm256_setzero_pd());
1011}
1012
1013static __inline__ __m128i __DEFAULT_FN_ATTRS128
1014_mm_broadcast_i32x2 (__m128i __A)
1015{
1016  return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1017                                          0101);
1018}
1019
1020static __inline__ __m128i __DEFAULT_FN_ATTRS128
1021_mm_mask_broadcast_i32x2 (__m128i __O__mmask8 __M__m128i __A)
1022{
1023  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1024                                             (__v4si)_mm_broadcast_i32x2(__A),
1025                                             (__v4si)__O);
1026}
1027
1028static __inline__ __m128i __DEFAULT_FN_ATTRS128
1029_mm_maskz_broadcast_i32x2 (__mmask8 __M__m128i __A)
1030{
1031  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1032                                             (__v4si)_mm_broadcast_i32x2(__A),
1033                                             (__v4si)_mm_setzero_si128());
1034}
1035
1036static __inline__ __m256i __DEFAULT_FN_ATTRS256
1037_mm256_broadcast_i32x2 (__m128i __A)
1038{
1039  return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1040                                          01010101);
1041}
1042
1043static __inline__ __m256i __DEFAULT_FN_ATTRS256
1044_mm256_mask_broadcast_i32x2 (__m256i __O__mmask8 __M__m128i __A)
1045{
1046  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1047                                             (__v8si)_mm256_broadcast_i32x2(__A),
1048                                             (__v8si)__O);
1049}
1050
1051static __inline__ __m256i __DEFAULT_FN_ATTRS256
1052_mm256_maskz_broadcast_i32x2 (__mmask8 __M__m128i __A)
1053{
1054  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1055                                             (__v8si)_mm256_broadcast_i32x2(__A),
1056                                             (__v8si)_mm256_setzero_si256());
1057}
1058
1059static __inline__ __m256i __DEFAULT_FN_ATTRS256
1060_mm256_broadcast_i64x2(__m128i __A)
1061{
1062  return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1063                                          0101);
1064}
1065
1066static __inline__ __m256i __DEFAULT_FN_ATTRS256
1067_mm256_mask_broadcast_i64x2(__m256i __O__mmask8 __M__m128i __A)
1068{
1069  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1070                                            (__v4di)_mm256_broadcast_i64x2(__A),
1071                                            (__v4di)__O);
1072}
1073
1074static __inline__ __m256i __DEFAULT_FN_ATTRS256
1075_mm256_maskz_broadcast_i64x2 (__mmask8 __M__m128i __A)
1076{
1077  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1078                                            (__v4di)_mm256_broadcast_i64x2(__A),
1079                                            (__v4di)_mm256_setzero_si256());
1080}
1081
1082#define _mm256_extractf64x2_pd(A, imm) \
1083  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1084                                                (int)(imm), \
1085                                                (__v2df)_mm_undefined_pd(), \
1086                                                (__mmask8)-1)
1087
1088#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
1089  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1090                                                (int)(imm), \
1091                                                (__v2df)(__m128d)(W), \
1092                                                (__mmask8)(U))
1093
1094#define _mm256_maskz_extractf64x2_pd(U, A, imm) \
1095  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1096                                                (int)(imm), \
1097                                                (__v2df)_mm_setzero_pd(), \
1098                                                (__mmask8)(U))
1099
1100#define _mm256_extracti64x2_epi64(A, imm) \
1101  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1102                                                (int)(imm), \
1103                                                (__v2di)_mm_undefined_si128(), \
1104                                                (__mmask8)-1)
1105
1106#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
1107  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1108                                                (int)(imm), \
1109                                                (__v2di)(__m128i)(W), \
1110                                                (__mmask8)(U))
1111
1112#define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
1113  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1114                                                (int)(imm), \
1115                                                (__v2di)_mm_setzero_si128(), \
1116                                                (__mmask8)(U))
1117
1118#define _mm256_insertf64x2(A, B, imm) \
1119  (__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
1120                                          (__v2df)(__m128d)(B), (int)(imm))
1121
1122#define _mm256_mask_insertf64x2(W, U, A, B, imm) \
1123  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1124                                  (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1125                                  (__v4df)(__m256d)(W))
1126
1127#define _mm256_maskz_insertf64x2(U, A, B, imm) \
1128  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1129                                  (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1130                                  (__v4df)_mm256_setzero_pd())
1131
1132#define _mm256_inserti64x2(A, B, imm) \
1133  (__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
1134                                          (__v2di)(__m128i)(B), (int)(imm))
1135
1136#define _mm256_mask_inserti64x2(W, U, A, B, imm) \
1137  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1138                                  (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1139                                  (__v4di)(__m256i)(W))
1140
1141#define _mm256_maskz_inserti64x2(U, A, B, imm) \
1142  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1143                                  (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1144                                  (__v4di)_mm256_setzero_si256())
1145
1146#define _mm_mask_fpclass_pd_mask(U, A, imm) \
1147  (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1148                                             (__mmask8)(U))
1149
1150#define _mm_fpclass_pd_mask(A, imm) \
1151  (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1152                                             (__mmask8)-1)
1153
1154#define _mm256_mask_fpclass_pd_mask(U, A, imm) \
1155  (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1156                                             (__mmask8)(U))
1157
1158#define _mm256_fpclass_pd_mask(A, imm) \
1159  (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1160                                             (__mmask8)-1)
1161
1162#define _mm_mask_fpclass_ps_mask(U, A, imm) \
1163  (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1164                                             (__mmask8)(U))
1165
1166#define _mm_fpclass_ps_mask(A, imm) \
1167  (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1168                                             (__mmask8)-1)
1169
1170#define _mm256_mask_fpclass_ps_mask(U, A, imm) \
1171  (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1172                                             (__mmask8)(U))
1173
1174#define _mm256_fpclass_ps_mask(A, imm) \
1175  (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1176                                             (__mmask8)-1)
1177
1178#undef __DEFAULT_FN_ATTRS128
1179#undef __DEFAULT_FN_ATTRS256
1180
1181#endif
1182