Clang Project

clang_source_code/lib/Headers/avx512vlvbmi2intrin.h
1/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------===
2 *
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 *===-----------------------------------------------------------------------===
23 */
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VLVBMI2INTRIN_H
29#define __AVX512VLVBMI2INTRIN_H
30
31/* Define the default attributes for the functions in this file. */
32#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128)))
33#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256)))
34
35static __inline__ __m128i __DEFAULT_FN_ATTRS128
36_mm_mask_compress_epi16(__m128i __S__mmask8 __U__m128i __D)
37{
38  return (__m128i__builtin_ia32_compresshi128_mask ((__v8hi__D,
39              (__v8hi__S,
40              __U);
41}
42
43static __inline__ __m128i __DEFAULT_FN_ATTRS128
44_mm_maskz_compress_epi16(__mmask8 __U__m128i __D)
45{
46  return (__m128i__builtin_ia32_compresshi128_mask ((__v8hi__D,
47              (__v8hi_mm_setzero_si128(),
48              __U);
49}
50
51static __inline__ __m128i __DEFAULT_FN_ATTRS128
52_mm_mask_compress_epi8(__m128i __S__mmask16 __U__m128i __D)
53{
54  return (__m128i__builtin_ia32_compressqi128_mask ((__v16qi__D,
55              (__v16qi__S,
56              __U);
57}
58
59static __inline__ __m128i __DEFAULT_FN_ATTRS128
60_mm_maskz_compress_epi8(__mmask16 __U__m128i __D)
61{
62  return (__m128i__builtin_ia32_compressqi128_mask ((__v16qi__D,
63              (__v16qi_mm_setzero_si128(),
64              __U);
65}
66
67static __inline__ void __DEFAULT_FN_ATTRS128
68_mm_mask_compressstoreu_epi16(void *__P__mmask8 __U__m128i __D)
69{
70  __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi__D,
71              __U);
72}
73
74static __inline__ void __DEFAULT_FN_ATTRS128
75_mm_mask_compressstoreu_epi8(void *__P__mmask16 __U__m128i __D)
76{
77  __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi__D,
78              __U);
79}
80
81static __inline__ __m128i __DEFAULT_FN_ATTRS128
82_mm_mask_expand_epi16(__m128i __S__mmask8 __U__m128i __D)
83{
84  return (__m128i__builtin_ia32_expandhi128_mask ((__v8hi__D,
85              (__v8hi__S,
86              __U);
87}
88
89static __inline__ __m128i __DEFAULT_FN_ATTRS128
90_mm_maskz_expand_epi16(__mmask8 __U__m128i __D)
91{
92  return (__m128i__builtin_ia32_expandhi128_mask ((__v8hi__D,
93              (__v8hi_mm_setzero_si128(),
94              __U);
95}
96
97static __inline__ __m128i __DEFAULT_FN_ATTRS128
98_mm_mask_expand_epi8(__m128i __S__mmask16 __U__m128i __D)
99{
100  return (__m128i__builtin_ia32_expandqi128_mask ((__v16qi__D,
101              (__v16qi__S,
102              __U);
103}
104
105static __inline__ __m128i __DEFAULT_FN_ATTRS128
106_mm_maskz_expand_epi8(__mmask16 __U__m128i __D)
107{
108  return (__m128i__builtin_ia32_expandqi128_mask ((__v16qi__D,
109              (__v16qi_mm_setzero_si128(),
110              __U);
111}
112
113static __inline__ __m128i __DEFAULT_FN_ATTRS128
114_mm_mask_expandloadu_epi16(__m128i __S__mmask8 __Uvoid const *__P)
115{
116  return (__m128i__builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
117              (__v8hi__S,
118              __U);
119}
120
121static __inline__ __m128i __DEFAULT_FN_ATTRS128
122_mm_maskz_expandloadu_epi16(__mmask8 __Uvoid const *__P)
123{
124  return (__m128i__builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
125              (__v8hi_mm_setzero_si128(),
126              __U);
127}
128
129static __inline__ __m128i __DEFAULT_FN_ATTRS128
130_mm_mask_expandloadu_epi8(__m128i __S__mmask16 __Uvoid const *__P)
131{
132  return (__m128i__builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
133              (__v16qi__S,
134              __U);
135}
136
137static __inline__ __m128i __DEFAULT_FN_ATTRS128
138_mm_maskz_expandloadu_epi8(__mmask16 __Uvoid const *__P)
139{
140  return (__m128i__builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
141              (__v16qi_mm_setzero_si128(),
142              __U);
143}
144
145static __inline__ __m256i __DEFAULT_FN_ATTRS256
146_mm256_mask_compress_epi16(__m256i __S__mmask16 __U__m256i __D)
147{
148  return (__m256i__builtin_ia32_compresshi256_mask ((__v16hi__D,
149              (__v16hi__S,
150              __U);
151}
152
153static __inline__ __m256i __DEFAULT_FN_ATTRS256
154_mm256_maskz_compress_epi16(__mmask16 __U__m256i __D)
155{
156  return (__m256i__builtin_ia32_compresshi256_mask ((__v16hi__D,
157              (__v16hi_mm256_setzero_si256(),
158              __U);
159}
160
161static __inline__ __m256i __DEFAULT_FN_ATTRS256
162_mm256_mask_compress_epi8(__m256i __S__mmask32 __U__m256i __D)
163{
164  return (__m256i__builtin_ia32_compressqi256_mask ((__v32qi__D,
165              (__v32qi__S,
166              __U);
167}
168
169static __inline__ __m256i __DEFAULT_FN_ATTRS256
170_mm256_maskz_compress_epi8(__mmask32 __U__m256i __D)
171{
172  return (__m256i__builtin_ia32_compressqi256_mask ((__v32qi__D,
173              (__v32qi_mm256_setzero_si256(),
174              __U);
175}
176
177static __inline__ void __DEFAULT_FN_ATTRS256
178_mm256_mask_compressstoreu_epi16(void *__P__mmask16 __U__m256i __D)
179{
180  __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi__D,
181              __U);
182}
183
184static __inline__ void __DEFAULT_FN_ATTRS256
185_mm256_mask_compressstoreu_epi8(void *__P__mmask32 __U__m256i __D)
186{
187  __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi__D,
188              __U);
189}
190
191static __inline__ __m256i __DEFAULT_FN_ATTRS256
192_mm256_mask_expand_epi16(__m256i __S__mmask16 __U__m256i __D)
193{
194  return (__m256i__builtin_ia32_expandhi256_mask ((__v16hi__D,
195              (__v16hi__S,
196              __U);
197}
198
199static __inline__ __m256i __DEFAULT_FN_ATTRS256
200_mm256_maskz_expand_epi16(__mmask16 __U__m256i __D)
201{
202  return (__m256i__builtin_ia32_expandhi256_mask ((__v16hi__D,
203              (__v16hi_mm256_setzero_si256(),
204              __U);
205}
206
207static __inline__ __m256i __DEFAULT_FN_ATTRS256
208_mm256_mask_expand_epi8(__m256i __S__mmask32 __U__m256i __D)
209{
210  return (__m256i__builtin_ia32_expandqi256_mask ((__v32qi__D,
211              (__v32qi__S,
212              __U);
213}
214
215static __inline__ __m256i __DEFAULT_FN_ATTRS256
216_mm256_maskz_expand_epi8(__mmask32 __U__m256i __D)
217{
218  return (__m256i__builtin_ia32_expandqi256_mask ((__v32qi__D,
219              (__v32qi_mm256_setzero_si256(),
220              __U);
221}
222
223static __inline__ __m256i __DEFAULT_FN_ATTRS256
224_mm256_mask_expandloadu_epi16(__m256i __S__mmask16 __Uvoid const *__P)
225{
226  return (__m256i__builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
227              (__v16hi__S,
228              __U);
229}
230
231static __inline__ __m256i __DEFAULT_FN_ATTRS256
232_mm256_maskz_expandloadu_epi16(__mmask16 __Uvoid const *__P)
233{
234  return (__m256i__builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
235              (__v16hi_mm256_setzero_si256(),
236              __U);
237}
238
239static __inline__ __m256i __DEFAULT_FN_ATTRS256
240_mm256_mask_expandloadu_epi8(__m256i __S__mmask32 __Uvoid const *__P)
241{
242  return (__m256i__builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
243              (__v32qi__S,
244              __U);
245}
246
247static __inline__ __m256i __DEFAULT_FN_ATTRS256
248_mm256_maskz_expandloadu_epi8(__mmask32 __Uvoid const *__P)
249{
250  return (__m256i__builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
251              (__v32qi_mm256_setzero_si256(),
252              __U);
253}
254
255#define _mm256_shldi_epi64(A, B, I) \
256  (__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \
257                                     (__v4di)(__m256i)(B), (int)(I))
258
259#define _mm256_mask_shldi_epi64(S, U, A, B, I) \
260  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
261                                    (__v4di)_mm256_shldi_epi64((A), (B), (I)), \
262                                    (__v4di)(__m256i)(S))
263
264#define _mm256_maskz_shldi_epi64(U, A, B, I) \
265  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
266                                    (__v4di)_mm256_shldi_epi64((A), (B), (I)), \
267                                    (__v4di)_mm256_setzero_si256())
268
269#define _mm_shldi_epi64(A, B, I) \
270  (__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \
271                                     (__v2di)(__m128i)(B), (int)(I))
272
273#define _mm_mask_shldi_epi64(S, U, A, B, I) \
274  (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
275                                      (__v2di)_mm_shldi_epi64((A), (B), (I)), \
276                                      (__v2di)(__m128i)(S))
277
278#define _mm_maskz_shldi_epi64(U, A, B, I) \
279  (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
280                                      (__v2di)_mm_shldi_epi64((A), (B), (I)), \
281                                      (__v2di)_mm_setzero_si128())
282
283#define _mm256_shldi_epi32(A, B, I) \
284  (__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \
285                                     (__v8si)(__m256i)(B), (int)(I))
286
287#define _mm256_mask_shldi_epi32(S, U, A, B, I) \
288  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
289                                    (__v8si)_mm256_shldi_epi32((A), (B), (I)), \
290                                    (__v8si)(__m256i)(S))
291
292#define _mm256_maskz_shldi_epi32(U, A, B, I) \
293  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
294                                    (__v8si)_mm256_shldi_epi32((A), (B), (I)), \
295                                    (__v8si)_mm256_setzero_si256())
296
297#define _mm_shldi_epi32(A, B, I) \
298  (__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \
299                                     (__v4si)(__m128i)(B), (int)(I))
300
301#define _mm_mask_shldi_epi32(S, U, A, B, I) \
302  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
303                                      (__v4si)_mm_shldi_epi32((A), (B), (I)), \
304                                      (__v4si)(__m128i)(S))
305
306#define _mm_maskz_shldi_epi32(U, A, B, I) \
307  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
308                                      (__v4si)_mm_shldi_epi32((A), (B), (I)), \
309                                      (__v4si)_mm_setzero_si128())
310
311#define _mm256_shldi_epi16(A, B, I) \
312  (__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \
313                                     (__v16hi)(__m256i)(B), (int)(I))
314
315#define _mm256_mask_shldi_epi16(S, U, A, B, I) \
316  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
317                                   (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
318                                   (__v16hi)(__m256i)(S))
319
320#define _mm256_maskz_shldi_epi16(U, A, B, I) \
321  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
322                                   (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
323                                   (__v16hi)_mm256_setzero_si256())
324
325#define _mm_shldi_epi16(A, B, I) \
326  (__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \
327                                     (__v8hi)(__m128i)(B), (int)(I))
328
329#define _mm_mask_shldi_epi16(S, U, A, B, I) \
330  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
331                                      (__v8hi)_mm_shldi_epi16((A), (B), (I)), \
332                                      (__v8hi)(__m128i)(S))
333
334#define _mm_maskz_shldi_epi16(U, A, B, I) \
335  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
336                                      (__v8hi)_mm_shldi_epi16((A), (B), (I)), \
337                                      (__v8hi)_mm_setzero_si128())
338
339#define _mm256_shrdi_epi64(A, B, I) \
340  (__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \
341                                     (__v4di)(__m256i)(B), (int)(I))
342
343#define _mm256_mask_shrdi_epi64(S, U, A, B, I) \
344  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
345                                    (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
346                                    (__v4di)(__m256i)(S))
347
348#define _mm256_maskz_shrdi_epi64(U, A, B, I) \
349  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
350                                    (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
351                                    (__v4di)_mm256_setzero_si256())
352
353#define _mm_shrdi_epi64(A, B, I) \
354  (__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \
355                                     (__v2di)(__m128i)(B), (int)(I))
356
357#define _mm_mask_shrdi_epi64(S, U, A, B, I) \
358  (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
359                                      (__v2di)_mm_shrdi_epi64((A), (B), (I)), \
360                                      (__v2di)(__m128i)(S))
361
362#define _mm_maskz_shrdi_epi64(U, A, B, I) \
363  (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
364                                      (__v2di)_mm_shrdi_epi64((A), (B), (I)), \
365                                      (__v2di)_mm_setzero_si128())
366
367#define _mm256_shrdi_epi32(A, B, I) \
368  (__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \
369                                     (__v8si)(__m256i)(B), (int)(I))
370
371#define _mm256_mask_shrdi_epi32(S, U, A, B, I) \
372  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
373                                    (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
374                                    (__v8si)(__m256i)(S))
375
376#define _mm256_maskz_shrdi_epi32(U, A, B, I) \
377  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
378                                    (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
379                                    (__v8si)_mm256_setzero_si256())
380
381#define _mm_shrdi_epi32(A, B, I) \
382  (__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \
383                                     (__v4si)(__m128i)(B), (int)(I))
384
385#define _mm_mask_shrdi_epi32(S, U, A, B, I) \
386  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
387                                      (__v4si)_mm_shrdi_epi32((A), (B), (I)), \
388                                      (__v4si)(__m128i)(S))
389
390#define _mm_maskz_shrdi_epi32(U, A, B, I) \
391  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
392                                      (__v4si)_mm_shrdi_epi32((A), (B), (I)), \
393                                      (__v4si)_mm_setzero_si128())
394
395#define _mm256_shrdi_epi16(A, B, I) \
396  (__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \
397                                     (__v16hi)(__m256i)(B), (int)(I))
398
399#define _mm256_mask_shrdi_epi16(S, U, A, B, I) \
400  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
401                                   (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
402                                   (__v16hi)(__m256i)(S))
403
404#define _mm256_maskz_shrdi_epi16(U, A, B, I) \
405  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
406                                   (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
407                                   (__v16hi)_mm256_setzero_si256())
408
409#define _mm_shrdi_epi16(A, B, I) \
410  (__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \
411                                     (__v8hi)(__m128i)(B), (int)(I))
412
413#define _mm_mask_shrdi_epi16(S, U, A, B, I) \
414  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
415                                      (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
416                                      (__v8hi)(__m128i)(S))
417
418#define _mm_maskz_shrdi_epi16(U, A, B, I) \
419  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
420                                      (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
421                                      (__v8hi)_mm_setzero_si128())
422
423static __inline__ __m256i __DEFAULT_FN_ATTRS256
424_mm256_shldv_epi64(__m256i __A__m256i __B__m256i __C)
425{
426  return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B,
427                                             (__v4di)__C);
428}
429
430static __inline__ __m256i __DEFAULT_FN_ATTRS256
431_mm256_mask_shldv_epi64(__m256i __A__mmask8 __U__m256i __B__m256i __C)
432{
433  return (__m256i)__builtin_ia32_selectq_256(__U,
434                                      (__v4di)_mm256_shldv_epi64(__A__B__C),
435                                      (__v4di)__A);
436}
437
438static __inline__ __m256i __DEFAULT_FN_ATTRS256
439_mm256_maskz_shldv_epi64(__mmask8 __U__m256i __A__m256i __B__m256i __C)
440{
441  return (__m256i)__builtin_ia32_selectq_256(__U,
442                                      (__v4di)_mm256_shldv_epi64(__A__B__C),
443                                      (__v4di)_mm256_setzero_si256());
444}
445
446static __inline__ __m128i __DEFAULT_FN_ATTRS128
447_mm_shldv_epi64(__m128i __A__m128i __B__m128i __C)
448{
449  return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B,
450                                             (__v2di)__C);
451}
452
453static __inline__ __m128i __DEFAULT_FN_ATTRS128
454_mm_mask_shldv_epi64(__m128i __A__mmask8 __U__m128i __B__m128i __C)
455{
456  return (__m128i)__builtin_ia32_selectq_128(__U,
457                                         (__v2di)_mm_shldv_epi64(__A__B__C),
458                                         (__v2di)__A);
459}
460
461static __inline__ __m128i __DEFAULT_FN_ATTRS128
462_mm_maskz_shldv_epi64(__mmask8 __U__m128i __A__m128i __B__m128i __C)
463{
464  return (__m128i)__builtin_ia32_selectq_128(__U,
465                                         (__v2di)_mm_shldv_epi64(__A__B__C),
466                                         (__v2di)_mm_setzero_si128());
467}
468
469static __inline__ __m256i __DEFAULT_FN_ATTRS256
470_mm256_shldv_epi32(__m256i __A__m256i __B__m256i __C)
471{
472  return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B,
473                                             (__v8si)__C);
474}
475
476static __inline__ __m256i __DEFAULT_FN_ATTRS256
477_mm256_mask_shldv_epi32(__m256i __A__mmask8 __U__m256i __B__m256i __C)
478{
479  return (__m256i)__builtin_ia32_selectd_256(__U,
480                                      (__v8si)_mm256_shldv_epi32(__A__B__C),
481                                      (__v8si)__A);
482}
483
484static __inline__ __m256i __DEFAULT_FN_ATTRS256
485_mm256_maskz_shldv_epi32(__mmask8 __U__m256i __A__m256i __B__m256i __C)
486{
487  return (__m256i)__builtin_ia32_selectd_256(__U,
488                                      (__v8si)_mm256_shldv_epi32(__A__B__C),
489                                      (__v8si)_mm256_setzero_si256());
490}
491
492static __inline__ __m128i __DEFAULT_FN_ATTRS128
493_mm_shldv_epi32(__m128i __A__m128i __B__m128i __C)
494{
495  return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B,
496                                             (__v4si)__C);
497}
498
499static __inline__ __m128i __DEFAULT_FN_ATTRS128
500_mm_mask_shldv_epi32(__m128i __A__mmask8 __U__m128i __B__m128i __C)
501{
502  return (__m128i)__builtin_ia32_selectd_128(__U,
503                                         (__v4si)_mm_shldv_epi32(__A__B__C),
504                                         (__v4si)__A);
505}
506
507static __inline__ __m128i __DEFAULT_FN_ATTRS128
508_mm_maskz_shldv_epi32(__mmask8 __U__m128i __A__m128i __B__m128i __C)
509{
510  return (__m128i)__builtin_ia32_selectd_128(__U,
511                                         (__v4si)_mm_shldv_epi32(__A__B__C),
512                                         (__v4si)_mm_setzero_si128());
513}
514
515static __inline__ __m256i __DEFAULT_FN_ATTRS256
516_mm256_shldv_epi16(__m256i __A__m256i __B__m256i __C)
517{
518  return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B,
519                                             (__v16hi)__C);
520}
521
522static __inline__ __m256i __DEFAULT_FN_ATTRS256
523_mm256_mask_shldv_epi16(__m256i __A__mmask16 __U__m256i __B__m256i __C)
524{
525  return (__m256i)__builtin_ia32_selectw_256(__U,
526                                      (__v16hi)_mm256_shldv_epi16(__A__B__C),
527                                      (__v16hi)__A);
528}
529
530static __inline__ __m256i __DEFAULT_FN_ATTRS256
531_mm256_maskz_shldv_epi16(__mmask16 __U__m256i __A__m256i __B__m256i __C)
532{
533  return (__m256i)__builtin_ia32_selectw_256(__U,
534                                      (__v16hi)_mm256_shldv_epi16(__A__B__C),
535                                      (__v16hi)_mm256_setzero_si256());
536}
537
538static __inline__ __m128i __DEFAULT_FN_ATTRS128
539_mm_shldv_epi16(__m128i __A__m128i __B__m128i __C)
540{
541  return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B,
542                                             (__v8hi)__C);
543}
544
545static __inline__ __m128i __DEFAULT_FN_ATTRS128
546_mm_mask_shldv_epi16(__m128i __A__mmask8 __U__m128i __B__m128i __C)
547{
548  return (__m128i)__builtin_ia32_selectw_128(__U,
549                                         (__v8hi)_mm_shldv_epi16(__A__B__C),
550                                         (__v8hi)__A);
551}
552
553static __inline__ __m128i __DEFAULT_FN_ATTRS128
554_mm_maskz_shldv_epi16(__mmask8 __U__m128i __A__m128i __B__m128i __C)
555{
556  return (__m128i)__builtin_ia32_selectw_128(__U,
557                                         (__v8hi)_mm_shldv_epi16(__A__B__C),
558                                         (__v8hi)_mm_setzero_si128());
559}
560
561static __inline__ __m256i __DEFAULT_FN_ATTRS256
562_mm256_shrdv_epi64(__m256i __A__m256i __B__m256i __C)
563{
564  return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B,
565                                             (__v4di)__C);
566}
567
568static __inline__ __m256i __DEFAULT_FN_ATTRS256
569_mm256_mask_shrdv_epi64(__m256i __A__mmask8 __U__m256i __B__m256i __C)
570{
571  return (__m256i)__builtin_ia32_selectq_256(__U,
572                                      (__v4di)_mm256_shrdv_epi64(__A__B__C),
573                                      (__v4di)__A);
574}
575
576static __inline__ __m256i __DEFAULT_FN_ATTRS256
577_mm256_maskz_shrdv_epi64(__mmask8 __U__m256i __A__m256i __B__m256i __C)
578{
579  return (__m256i)__builtin_ia32_selectq_256(__U,
580                                      (__v4di)_mm256_shrdv_epi64(__A__B__C),
581                                      (__v4di)_mm256_setzero_si256());
582}
583
584static __inline__ __m128i __DEFAULT_FN_ATTRS128
585_mm_shrdv_epi64(__m128i __A__m128i __B__m128i __C)
586{
587  return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B,
588                                             (__v2di)__C);
589}
590
591static __inline__ __m128i __DEFAULT_FN_ATTRS128
592_mm_mask_shrdv_epi64(__m128i __A__mmask8 __U__m128i __B__m128i __C)
593{
594  return (__m128i)__builtin_ia32_selectq_128(__U,
595                                         (__v2di)_mm_shrdv_epi64(__A__B__C),
596                                         (__v2di)__A);
597}
598
599static __inline__ __m128i __DEFAULT_FN_ATTRS128
600_mm_maskz_shrdv_epi64(__mmask8 __U__m128i __A__m128i __B__m128i __C)
601{
602  return (__m128i)__builtin_ia32_selectq_128(__U,
603                                         (__v2di)_mm_shrdv_epi64(__A__B__C),
604                                         (__v2di)_mm_setzero_si128());
605}
606
607static __inline__ __m256i __DEFAULT_FN_ATTRS256
608_mm256_shrdv_epi32(__m256i __A__m256i __B__m256i __C)
609{
610  return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B,
611                                             (__v8si)__C);
612}
613
614static __inline__ __m256i __DEFAULT_FN_ATTRS256
615_mm256_mask_shrdv_epi32(__m256i __A__mmask8 __U__m256i __B__m256i __C)
616{
617  return (__m256i)__builtin_ia32_selectd_256(__U,
618                                      (__v8si)_mm256_shrdv_epi32(__A__B__C),
619                                      (__v8si)__A);
620}
621
622static __inline__ __m256i __DEFAULT_FN_ATTRS256
623_mm256_maskz_shrdv_epi32(__mmask8 __U__m256i __A__m256i __B__m256i __C)
624{
625  return (__m256i)__builtin_ia32_selectd_256(__U,
626                                      (__v8si)_mm256_shrdv_epi32(__A__B__C),
627                                      (__v8si)_mm256_setzero_si256());
628}
629
630static __inline__ __m128i __DEFAULT_FN_ATTRS128
631_mm_shrdv_epi32(__m128i __A__m128i __B__m128i __C)
632{
633  return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B,
634                                             (__v4si)__C);
635}
636
637static __inline__ __m128i __DEFAULT_FN_ATTRS128
638_mm_mask_shrdv_epi32(__m128i __A__mmask8 __U__m128i __B__m128i __C)
639{
640  return (__m128i)__builtin_ia32_selectd_128(__U,
641                                         (__v4si)_mm_shrdv_epi32(__A__B__C),
642                                         (__v4si)__A);
643}
644
645static __inline__ __m128i __DEFAULT_FN_ATTRS128
646_mm_maskz_shrdv_epi32(__mmask8 __U__m128i __A__m128i __B__m128i __C)
647{
648  return (__m128i)__builtin_ia32_selectd_128(__U,
649                                         (__v4si)_mm_shrdv_epi32(__A__B__C),
650                                         (__v4si)_mm_setzero_si128());
651}
652
653static __inline__ __m256i __DEFAULT_FN_ATTRS256
654_mm256_shrdv_epi16(__m256i __A__m256i __B__m256i __C)
655{
656  return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B,
657                                             (__v16hi)__C);
658}
659
660static __inline__ __m256i __DEFAULT_FN_ATTRS256
661_mm256_mask_shrdv_epi16(__m256i __A__mmask16 __U__m256i __B__m256i __C)
662{
663  return (__m256i)__builtin_ia32_selectw_256(__U,
664                                     (__v16hi)_mm256_shrdv_epi16(__A__B__C),
665                                     (__v16hi)__A);
666}
667
668static __inline__ __m256i __DEFAULT_FN_ATTRS256
669_mm256_maskz_shrdv_epi16(__mmask16 __U__m256i __A__m256i __B__m256i __C)
670{
671  return (__m256i)__builtin_ia32_selectw_256(__U,
672                                     (__v16hi)_mm256_shrdv_epi16(__A__B__C),
673                                     (__v16hi)_mm256_setzero_si256());
674}
675
676static __inline__ __m128i __DEFAULT_FN_ATTRS128
677_mm_shrdv_epi16(__m128i __A__m128i __B__m128i __C)
678{
679  return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B,
680                                             (__v8hi)__C);
681}
682
683static __inline__ __m128i __DEFAULT_FN_ATTRS128
684_mm_mask_shrdv_epi16(__m128i __A__mmask8 __U__m128i __B__m128i __C)
685{
686  return (__m128i)__builtin_ia32_selectw_128(__U,
687                                         (__v8hi)_mm_shrdv_epi16(__A__B__C),
688                                         (__v8hi)__A);
689}
690
691static __inline__ __m128i __DEFAULT_FN_ATTRS128
692_mm_maskz_shrdv_epi16(__mmask8 __U__m128i __A__m128i __B__m128i __C)
693{
694  return (__m128i)__builtin_ia32_selectw_128(__U,
695                                         (__v8hi)_mm_shrdv_epi16(__A__B__C),
696                                         (__v8hi)_mm_setzero_si128());
697}
698
699
700#undef __DEFAULT_FN_ATTRS128
701#undef __DEFAULT_FN_ATTRS256
702
703#endif
704