Clang Project

clang_source_code/lib/Headers/avx512vlbwintrin.h
1/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VLBWINTRIN_H
29#define __AVX512VLBWINTRIN_H
30
31/* Define the default attributes for the functions in this file. */
32#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128)))
33#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256)))
34
35/* Integer compare */
36
37#define _mm_cmp_epi8_mask(a, b, p) \
38  (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
39                                         (__v16qi)(__m128i)(b), (int)(p), \
40                                         (__mmask16)-1)
41
42#define _mm_mask_cmp_epi8_mask(m, a, b, p) \
43  (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
44                                         (__v16qi)(__m128i)(b), (int)(p), \
45                                         (__mmask16)(m))
46
47#define _mm_cmp_epu8_mask(a, b, p) \
48  (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
49                                          (__v16qi)(__m128i)(b), (int)(p), \
50                                          (__mmask16)-1)
51
52#define _mm_mask_cmp_epu8_mask(m, a, b, p) \
53  (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
54                                          (__v16qi)(__m128i)(b), (int)(p), \
55                                          (__mmask16)(m))
56
57#define _mm256_cmp_epi8_mask(a, b, p) \
58  (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
59                                         (__v32qi)(__m256i)(b), (int)(p), \
60                                         (__mmask32)-1)
61
62#define _mm256_mask_cmp_epi8_mask(m, a, b, p) \
63  (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
64                                         (__v32qi)(__m256i)(b), (int)(p), \
65                                         (__mmask32)(m))
66
67#define _mm256_cmp_epu8_mask(a, b, p) \
68  (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
69                                          (__v32qi)(__m256i)(b), (int)(p), \
70                                          (__mmask32)-1)
71
72#define _mm256_mask_cmp_epu8_mask(m, a, b, p) \
73  (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
74                                          (__v32qi)(__m256i)(b), (int)(p), \
75                                          (__mmask32)(m))
76
77#define _mm_cmp_epi16_mask(a, b, p) \
78  (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
79                                        (__v8hi)(__m128i)(b), (int)(p), \
80                                        (__mmask8)-1)
81
82#define _mm_mask_cmp_epi16_mask(m, a, b, p) \
83  (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
84                                        (__v8hi)(__m128i)(b), (int)(p), \
85                                        (__mmask8)(m))
86
87#define _mm_cmp_epu16_mask(a, b, p) \
88  (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
89                                         (__v8hi)(__m128i)(b), (int)(p), \
90                                         (__mmask8)-1)
91
92#define _mm_mask_cmp_epu16_mask(m, a, b, p) \
93  (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
94                                         (__v8hi)(__m128i)(b), (int)(p), \
95                                         (__mmask8)(m))
96
97#define _mm256_cmp_epi16_mask(a, b, p) \
98  (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
99                                         (__v16hi)(__m256i)(b), (int)(p), \
100                                         (__mmask16)-1)
101
102#define _mm256_mask_cmp_epi16_mask(m, a, b, p) \
103  (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
104                                         (__v16hi)(__m256i)(b), (int)(p), \
105                                         (__mmask16)(m))
106
107#define _mm256_cmp_epu16_mask(a, b, p) \
108  (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
109                                          (__v16hi)(__m256i)(b), (int)(p), \
110                                          (__mmask16)-1)
111
112#define _mm256_mask_cmp_epu16_mask(m, a, b, p) \
113  (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
114                                          (__v16hi)(__m256i)(b), (int)(p), \
115                                          (__mmask16)(m))
116
117#define _mm_cmpeq_epi8_mask(A, B) \
118    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
119#define _mm_mask_cmpeq_epi8_mask(k, A, B) \
120    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
121#define _mm_cmpge_epi8_mask(A, B) \
122    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
123#define _mm_mask_cmpge_epi8_mask(k, A, B) \
124    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
125#define _mm_cmpgt_epi8_mask(A, B) \
126    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
127#define _mm_mask_cmpgt_epi8_mask(k, A, B) \
128    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
129#define _mm_cmple_epi8_mask(A, B) \
130    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
131#define _mm_mask_cmple_epi8_mask(k, A, B) \
132    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
133#define _mm_cmplt_epi8_mask(A, B) \
134    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
135#define _mm_mask_cmplt_epi8_mask(k, A, B) \
136    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
137#define _mm_cmpneq_epi8_mask(A, B) \
138    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
139#define _mm_mask_cmpneq_epi8_mask(k, A, B) \
140    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
141
142#define _mm256_cmpeq_epi8_mask(A, B) \
143    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
144#define _mm256_mask_cmpeq_epi8_mask(k, A, B) \
145    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
146#define _mm256_cmpge_epi8_mask(A, B) \
147    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
148#define _mm256_mask_cmpge_epi8_mask(k, A, B) \
149    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
150#define _mm256_cmpgt_epi8_mask(A, B) \
151    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
152#define _mm256_mask_cmpgt_epi8_mask(k, A, B) \
153    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
154#define _mm256_cmple_epi8_mask(A, B) \
155    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
156#define _mm256_mask_cmple_epi8_mask(k, A, B) \
157    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
158#define _mm256_cmplt_epi8_mask(A, B) \
159    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
160#define _mm256_mask_cmplt_epi8_mask(k, A, B) \
161    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
162#define _mm256_cmpneq_epi8_mask(A, B) \
163    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
164#define _mm256_mask_cmpneq_epi8_mask(k, A, B) \
165    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
166
167#define _mm_cmpeq_epu8_mask(A, B) \
168    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
169#define _mm_mask_cmpeq_epu8_mask(k, A, B) \
170    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
171#define _mm_cmpge_epu8_mask(A, B) \
172    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
173#define _mm_mask_cmpge_epu8_mask(k, A, B) \
174    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
175#define _mm_cmpgt_epu8_mask(A, B) \
176    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
177#define _mm_mask_cmpgt_epu8_mask(k, A, B) \
178    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
179#define _mm_cmple_epu8_mask(A, B) \
180    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
181#define _mm_mask_cmple_epu8_mask(k, A, B) \
182    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
183#define _mm_cmplt_epu8_mask(A, B) \
184    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
185#define _mm_mask_cmplt_epu8_mask(k, A, B) \
186    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
187#define _mm_cmpneq_epu8_mask(A, B) \
188    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
189#define _mm_mask_cmpneq_epu8_mask(k, A, B) \
190    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
191
192#define _mm256_cmpeq_epu8_mask(A, B) \
193    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
194#define _mm256_mask_cmpeq_epu8_mask(k, A, B) \
195    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
196#define _mm256_cmpge_epu8_mask(A, B) \
197    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
198#define _mm256_mask_cmpge_epu8_mask(k, A, B) \
199    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
200#define _mm256_cmpgt_epu8_mask(A, B) \
201    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
202#define _mm256_mask_cmpgt_epu8_mask(k, A, B) \
203    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
204#define _mm256_cmple_epu8_mask(A, B) \
205    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
206#define _mm256_mask_cmple_epu8_mask(k, A, B) \
207    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
208#define _mm256_cmplt_epu8_mask(A, B) \
209    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
210#define _mm256_mask_cmplt_epu8_mask(k, A, B) \
211    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
212#define _mm256_cmpneq_epu8_mask(A, B) \
213    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
214#define _mm256_mask_cmpneq_epu8_mask(k, A, B) \
215    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
216
217#define _mm_cmpeq_epi16_mask(A, B) \
218    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
219#define _mm_mask_cmpeq_epi16_mask(k, A, B) \
220    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
221#define _mm_cmpge_epi16_mask(A, B) \
222    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
223#define _mm_mask_cmpge_epi16_mask(k, A, B) \
224    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
225#define _mm_cmpgt_epi16_mask(A, B) \
226    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
227#define _mm_mask_cmpgt_epi16_mask(k, A, B) \
228    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
229#define _mm_cmple_epi16_mask(A, B) \
230    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
231#define _mm_mask_cmple_epi16_mask(k, A, B) \
232    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
233#define _mm_cmplt_epi16_mask(A, B) \
234    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
235#define _mm_mask_cmplt_epi16_mask(k, A, B) \
236    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
237#define _mm_cmpneq_epi16_mask(A, B) \
238    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
239#define _mm_mask_cmpneq_epi16_mask(k, A, B) \
240    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
241
242#define _mm256_cmpeq_epi16_mask(A, B) \
243    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
244#define _mm256_mask_cmpeq_epi16_mask(k, A, B) \
245    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
246#define _mm256_cmpge_epi16_mask(A, B) \
247    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
248#define _mm256_mask_cmpge_epi16_mask(k, A, B) \
249    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
250#define _mm256_cmpgt_epi16_mask(A, B) \
251    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
252#define _mm256_mask_cmpgt_epi16_mask(k, A, B) \
253    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
254#define _mm256_cmple_epi16_mask(A, B) \
255    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
256#define _mm256_mask_cmple_epi16_mask(k, A, B) \
257    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
258#define _mm256_cmplt_epi16_mask(A, B) \
259    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
260#define _mm256_mask_cmplt_epi16_mask(k, A, B) \
261    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
262#define _mm256_cmpneq_epi16_mask(A, B) \
263    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
264#define _mm256_mask_cmpneq_epi16_mask(k, A, B) \
265    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
266
267#define _mm_cmpeq_epu16_mask(A, B) \
268    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
269#define _mm_mask_cmpeq_epu16_mask(k, A, B) \
270    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
271#define _mm_cmpge_epu16_mask(A, B) \
272    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
273#define _mm_mask_cmpge_epu16_mask(k, A, B) \
274    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
275#define _mm_cmpgt_epu16_mask(A, B) \
276    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
277#define _mm_mask_cmpgt_epu16_mask(k, A, B) \
278    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
279#define _mm_cmple_epu16_mask(A, B) \
280    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
281#define _mm_mask_cmple_epu16_mask(k, A, B) \
282    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
283#define _mm_cmplt_epu16_mask(A, B) \
284    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
285#define _mm_mask_cmplt_epu16_mask(k, A, B) \
286    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
287#define _mm_cmpneq_epu16_mask(A, B) \
288    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
289#define _mm_mask_cmpneq_epu16_mask(k, A, B) \
290    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
291
292#define _mm256_cmpeq_epu16_mask(A, B) \
293    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
294#define _mm256_mask_cmpeq_epu16_mask(k, A, B) \
295    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
296#define _mm256_cmpge_epu16_mask(A, B) \
297    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
298#define _mm256_mask_cmpge_epu16_mask(k, A, B) \
299    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
300#define _mm256_cmpgt_epu16_mask(A, B) \
301    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
302#define _mm256_mask_cmpgt_epu16_mask(k, A, B) \
303    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
304#define _mm256_cmple_epu16_mask(A, B) \
305    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
306#define _mm256_mask_cmple_epu16_mask(k, A, B) \
307    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
308#define _mm256_cmplt_epu16_mask(A, B) \
309    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
310#define _mm256_mask_cmplt_epu16_mask(k, A, B) \
311    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
312#define _mm256_cmpneq_epu16_mask(A, B) \
313    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
314#define _mm256_mask_cmpneq_epu16_mask(k, A, B) \
315    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
316
317static __inline__ __m256i __DEFAULT_FN_ATTRS256
318_mm256_mask_add_epi8(__m256i __W__mmask32 __U__m256i __A__m256i __B){
319  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
320                                             (__v32qi)_mm256_add_epi8(__A__B),
321                                             (__v32qi)__W);
322}
323
324static __inline__ __m256i __DEFAULT_FN_ATTRS256
325_mm256_maskz_add_epi8(__mmask32 __U__m256i __A__m256i __B) {
326  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
327                                             (__v32qi)_mm256_add_epi8(__A__B),
328                                             (__v32qi)_mm256_setzero_si256());
329}
330
331static __inline__ __m256i __DEFAULT_FN_ATTRS256
332_mm256_mask_add_epi16(__m256i __W__mmask16 __U__m256i __A__m256i __B) {
333  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
334                                             (__v16hi)_mm256_add_epi16(__A__B),
335                                             (__v16hi)__W);
336}
337
338static __inline__ __m256i __DEFAULT_FN_ATTRS256
339_mm256_maskz_add_epi16(__mmask16 __U__m256i __A__m256i __B) {
340  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
341                                             (__v16hi)_mm256_add_epi16(__A__B),
342                                             (__v16hi)_mm256_setzero_si256());
343}
344
345static __inline__ __m256i __DEFAULT_FN_ATTRS256
346_mm256_mask_sub_epi8(__m256i __W__mmask32 __U__m256i __A__m256i __B) {
347  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
348                                             (__v32qi)_mm256_sub_epi8(__A__B),
349                                             (__v32qi)__W);
350}
351
352static __inline__ __m256i __DEFAULT_FN_ATTRS256
353_mm256_maskz_sub_epi8(__mmask32 __U__m256i __A__m256i __B) {
354  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
355                                             (__v32qi)_mm256_sub_epi8(__A__B),
356                                             (__v32qi)_mm256_setzero_si256());
357}
358
359static __inline__ __m256i __DEFAULT_FN_ATTRS256
360_mm256_mask_sub_epi16(__m256i __W__mmask16 __U__m256i __A__m256i __B) {
361  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
362                                             (__v16hi)_mm256_sub_epi16(__A__B),
363                                             (__v16hi)__W);
364}
365
366static __inline__ __m256i __DEFAULT_FN_ATTRS256
367_mm256_maskz_sub_epi16(__mmask16 __U__m256i __A__m256i __B) {
368  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
369                                             (__v16hi)_mm256_sub_epi16(__A__B),
370                                             (__v16hi)_mm256_setzero_si256());
371}
372
373static __inline__ __m128i __DEFAULT_FN_ATTRS128
374_mm_mask_add_epi8(__m128i __W__mmask16 __U__m128i __A__m128i __B) {
375  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
376                                             (__v16qi)_mm_add_epi8(__A__B),
377                                             (__v16qi)__W);
378}
379
380static __inline__ __m128i __DEFAULT_FN_ATTRS128
381_mm_maskz_add_epi8(__mmask16 __U__m128i __A__m128i __B) {
382  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
383                                             (__v16qi)_mm_add_epi8(__A__B),
384                                             (__v16qi)_mm_setzero_si128());
385}
386
387static __inline__ __m128i __DEFAULT_FN_ATTRS128
388_mm_mask_add_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B) {
389  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
390                                             (__v8hi)_mm_add_epi16(__A__B),
391                                             (__v8hi)__W);
392}
393
394static __inline__ __m128i __DEFAULT_FN_ATTRS128
395_mm_maskz_add_epi16(__mmask8 __U__m128i __A__m128i __B) {
396  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
397                                             (__v8hi)_mm_add_epi16(__A__B),
398                                             (__v8hi)_mm_setzero_si128());
399}
400
401static __inline__ __m128i __DEFAULT_FN_ATTRS128
402_mm_mask_sub_epi8(__m128i __W__mmask16 __U__m128i __A__m128i __B) {
403  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
404                                             (__v16qi)_mm_sub_epi8(__A__B),
405                                             (__v16qi)__W);
406}
407
408static __inline__ __m128i __DEFAULT_FN_ATTRS128
409_mm_maskz_sub_epi8(__mmask16 __U__m128i __A__m128i __B) {
410  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
411                                             (__v16qi)_mm_sub_epi8(__A__B),
412                                             (__v16qi)_mm_setzero_si128());
413}
414
415static __inline__ __m128i __DEFAULT_FN_ATTRS128
416_mm_mask_sub_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B) {
417  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
418                                             (__v8hi)_mm_sub_epi16(__A__B),
419                                             (__v8hi)__W);
420}
421
422static __inline__ __m128i __DEFAULT_FN_ATTRS128
423_mm_maskz_sub_epi16(__mmask8 __U__m128i __A__m128i __B) {
424  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
425                                             (__v8hi)_mm_sub_epi16(__A__B),
426                                             (__v8hi)_mm_setzero_si128());
427}
428
429static __inline__ __m256i __DEFAULT_FN_ATTRS256
430_mm256_mask_mullo_epi16(__m256i __W__mmask16 __U__m256i __A__m256i __B) {
431  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
432                                             (__v16hi)_mm256_mullo_epi16(__A__B),
433                                             (__v16hi)__W);
434}
435
436static __inline__ __m256i __DEFAULT_FN_ATTRS256
437_mm256_maskz_mullo_epi16(__mmask16 __U__m256i __A__m256i __B) {
438  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
439                                             (__v16hi)_mm256_mullo_epi16(__A__B),
440                                             (__v16hi)_mm256_setzero_si256());
441}
442
443static __inline__ __m128i __DEFAULT_FN_ATTRS128
444_mm_mask_mullo_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B) {
445  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
446                                             (__v8hi)_mm_mullo_epi16(__A__B),
447                                             (__v8hi)__W);
448}
449
450static __inline__ __m128i __DEFAULT_FN_ATTRS128
451_mm_maskz_mullo_epi16(__mmask8 __U__m128i __A__m128i __B) {
452  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
453                                             (__v8hi)_mm_mullo_epi16(__A__B),
454                                             (__v8hi)_mm_setzero_si128());
455}
456
457static __inline__ __m128i __DEFAULT_FN_ATTRS128
458_mm_mask_blend_epi8 (__mmask16 __U__m128i __A__m128i __W)
459{
460  return (__m128i__builtin_ia32_selectb_128 ((__mmask16__U,
461              (__v16qi__W,
462              (__v16qi__A);
463}
464
465static __inline__ __m256i __DEFAULT_FN_ATTRS256
466_mm256_mask_blend_epi8 (__mmask32 __U__m256i __A__m256i __W)
467{
468  return (__m256i__builtin_ia32_selectb_256 ((__mmask32__U,
469               (__v32qi__W,
470               (__v32qi__A);
471}
472
473static __inline__ __m128i __DEFAULT_FN_ATTRS128
474_mm_mask_blend_epi16 (__mmask8 __U__m128i __A__m128i __W)
475{
476  return (__m128i__builtin_ia32_selectw_128 ((__mmask8__U,
477               (__v8hi__W,
478               (__v8hi__A);
479}
480
481static __inline__ __m256i __DEFAULT_FN_ATTRS256
482_mm256_mask_blend_epi16 (__mmask16 __U__m256i __A__m256i __W)
483{
484  return (__m256i__builtin_ia32_selectw_256 ((__mmask16__U,
485               (__v16hi__W,
486               (__v16hi__A);
487}
488
489static __inline__ __m128i __DEFAULT_FN_ATTRS128
490_mm_mask_abs_epi8(__m128i __W__mmask16 __U__m128i __A)
491{
492  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
493                                             (__v16qi)_mm_abs_epi8(__A),
494                                             (__v16qi)__W);
495}
496
497static __inline__ __m128i __DEFAULT_FN_ATTRS128
498_mm_maskz_abs_epi8(__mmask16 __U__m128i __A)
499{
500  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
501                                             (__v16qi)_mm_abs_epi8(__A),
502                                             (__v16qi)_mm_setzero_si128());
503}
504
505static __inline__ __m256i __DEFAULT_FN_ATTRS256
506_mm256_mask_abs_epi8(__m256i __W__mmask32 __U__m256i __A)
507{
508  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
509                                             (__v32qi)_mm256_abs_epi8(__A),
510                                             (__v32qi)__W);
511}
512
513static __inline__ __m256i __DEFAULT_FN_ATTRS256
514_mm256_maskz_abs_epi8 (__mmask32 __U__m256i __A)
515{
516  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
517                                             (__v32qi)_mm256_abs_epi8(__A),
518                                             (__v32qi)_mm256_setzero_si256());
519}
520
521static __inline__ __m128i __DEFAULT_FN_ATTRS128
522_mm_mask_abs_epi16(__m128i __W__mmask8 __U__m128i __A)
523{
524  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
525                                             (__v8hi)_mm_abs_epi16(__A),
526                                             (__v8hi)__W);
527}
528
529static __inline__ __m128i __DEFAULT_FN_ATTRS128
530_mm_maskz_abs_epi16(__mmask8 __U__m128i __A)
531{
532  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
533                                             (__v8hi)_mm_abs_epi16(__A),
534                                             (__v8hi)_mm_setzero_si128());
535}
536
537static __inline__ __m256i __DEFAULT_FN_ATTRS256
538_mm256_mask_abs_epi16(__m256i __W__mmask16 __U__m256i __A)
539{
540  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
541                                             (__v16hi)_mm256_abs_epi16(__A),
542                                             (__v16hi)__W);
543}
544
545static __inline__ __m256i __DEFAULT_FN_ATTRS256
546_mm256_maskz_abs_epi16(__mmask16 __U__m256i __A)
547{
548  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
549                                             (__v16hi)_mm256_abs_epi16(__A),
550                                             (__v16hi)_mm256_setzero_si256());
551}
552
553static __inline__ __m128i __DEFAULT_FN_ATTRS128
554_mm_maskz_packs_epi32(__mmask8 __M__m128i __A__m128i __B) {
555  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
556                                             (__v8hi)_mm_packs_epi32(__A__B),
557                                             (__v8hi)_mm_setzero_si128());
558}
559
560static __inline__ __m128i __DEFAULT_FN_ATTRS128
561_mm_mask_packs_epi32(__m128i __W__mmask8 __M__m128i __A__m128i __B)
562{
563  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
564                                             (__v8hi)_mm_packs_epi32(__A__B),
565                                             (__v8hi)__W);
566}
567
568static __inline__ __m256i __DEFAULT_FN_ATTRS256
569_mm256_maskz_packs_epi32(__mmask16 __M__m256i __A__m256i __B)
570{
571  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
572                                          (__v16hi)_mm256_packs_epi32(__A__B),
573                                          (__v16hi)_mm256_setzero_si256());
574}
575
576static __inline__ __m256i __DEFAULT_FN_ATTRS256
577_mm256_mask_packs_epi32(__m256i __W__mmask16 __M__m256i __A__m256i __B)
578{
579  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
580                                          (__v16hi)_mm256_packs_epi32(__A__B),
581                                          (__v16hi)__W);
582}
583
584static __inline__ __m128i __DEFAULT_FN_ATTRS128
585_mm_maskz_packs_epi16(__mmask16 __M__m128i __A__m128i __B)
586{
587  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
588                                             (__v16qi)_mm_packs_epi16(__A__B),
589                                             (__v16qi)_mm_setzero_si128());
590}
591
592static __inline__ __m128i __DEFAULT_FN_ATTRS128
593_mm_mask_packs_epi16(__m128i __W__mmask16 __M__m128i __A__m128i __B)
594{
595  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
596                                             (__v16qi)_mm_packs_epi16(__A__B),
597                                             (__v16qi)__W);
598}
599
600static __inline__ __m256i __DEFAULT_FN_ATTRS256
601_mm256_maskz_packs_epi16(__mmask32 __M__m256i __A__m256i __B)
602{
603  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
604                                          (__v32qi)_mm256_packs_epi16(__A__B),
605                                          (__v32qi)_mm256_setzero_si256());
606}
607
608static __inline__ __m256i __DEFAULT_FN_ATTRS256
609_mm256_mask_packs_epi16(__m256i __W__mmask32 __M__m256i __A__m256i __B)
610{
611  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
612                                          (__v32qi)_mm256_packs_epi16(__A__B),
613                                          (__v32qi)__W);
614}
615
616static __inline__ __m128i __DEFAULT_FN_ATTRS128
617_mm_maskz_packus_epi32(__mmask8 __M__m128i __A__m128i __B)
618{
619  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
620                                             (__v8hi)_mm_packus_epi32(__A__B),
621                                             (__v8hi)_mm_setzero_si128());
622}
623
624static __inline__ __m128i __DEFAULT_FN_ATTRS128
625_mm_mask_packus_epi32(__m128i __W__mmask8 __M__m128i __A__m128i __B)
626{
627  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
628                                             (__v8hi)_mm_packus_epi32(__A__B),
629                                             (__v8hi)__W);
630}
631
632static __inline__ __m256i __DEFAULT_FN_ATTRS256
633_mm256_maskz_packus_epi32(__mmask16 __M__m256i __A__m256i __B)
634{
635  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
636                                         (__v16hi)_mm256_packus_epi32(__A__B),
637                                         (__v16hi)_mm256_setzero_si256());
638}
639
640static __inline__ __m256i __DEFAULT_FN_ATTRS256
641_mm256_mask_packus_epi32(__m256i __W__mmask16 __M__m256i __A__m256i __B)
642{
643  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
644                                         (__v16hi)_mm256_packus_epi32(__A__B),
645                                         (__v16hi)__W);
646}
647
648static __inline__ __m128i __DEFAULT_FN_ATTRS128
649_mm_maskz_packus_epi16(__mmask16 __M__m128i __A__m128i __B)
650{
651  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
652                                            (__v16qi)_mm_packus_epi16(__A__B),
653                                            (__v16qi)_mm_setzero_si128());
654}
655
656static __inline__ __m128i __DEFAULT_FN_ATTRS128
657_mm_mask_packus_epi16(__m128i __W__mmask16 __M__m128i __A__m128i __B)
658{
659  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
660                                            (__v16qi)_mm_packus_epi16(__A__B),
661                                            (__v16qi)__W);
662}
663
664static __inline__ __m256i __DEFAULT_FN_ATTRS256
665_mm256_maskz_packus_epi16(__mmask32 __M__m256i __A__m256i __B)
666{
667  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
668                                         (__v32qi)_mm256_packus_epi16(__A__B),
669                                         (__v32qi)_mm256_setzero_si256());
670}
671
672static __inline__ __m256i __DEFAULT_FN_ATTRS256
673_mm256_mask_packus_epi16(__m256i __W__mmask32 __M__m256i __A__m256i __B)
674{
675  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
676                                         (__v32qi)_mm256_packus_epi16(__A__B),
677                                         (__v32qi)__W);
678}
679
680static __inline__ __m128i __DEFAULT_FN_ATTRS128
681_mm_mask_adds_epi8(__m128i __W__mmask16 __U__m128i __A__m128i __B)
682{
683  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
684                                             (__v16qi)_mm_adds_epi8(__A__B),
685                                             (__v16qi)__W);
686}
687
688static __inline__ __m128i __DEFAULT_FN_ATTRS128
689_mm_maskz_adds_epi8(__mmask16 __U__m128i __A__m128i __B)
690{
691  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
692                                             (__v16qi)_mm_adds_epi8(__A__B),
693                                             (__v16qi)_mm_setzero_si128());
694}
695
696static __inline__ __m256i __DEFAULT_FN_ATTRS256
697_mm256_mask_adds_epi8(__m256i __W__mmask32 __U__m256i __A__m256i __B)
698{
699  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
700                                            (__v32qi)_mm256_adds_epi8(__A__B),
701                                            (__v32qi)__W);
702}
703
704static __inline__ __m256i __DEFAULT_FN_ATTRS256
705_mm256_maskz_adds_epi8(__mmask32 __U__m256i __A__m256i __B)
706{
707  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
708                                            (__v32qi)_mm256_adds_epi8(__A__B),
709                                            (__v32qi)_mm256_setzero_si256());
710}
711
712static __inline__ __m128i __DEFAULT_FN_ATTRS128
713_mm_mask_adds_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B)
714{
715  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
716                                             (__v8hi)_mm_adds_epi16(__A__B),
717                                             (__v8hi)__W);
718}
719
720static __inline__ __m128i __DEFAULT_FN_ATTRS128
721_mm_maskz_adds_epi16(__mmask8 __U__m128i __A__m128i __B)
722{
723  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
724                                             (__v8hi)_mm_adds_epi16(__A__B),
725                                             (__v8hi)_mm_setzero_si128());
726}
727
728static __inline__ __m256i __DEFAULT_FN_ATTRS256
729_mm256_mask_adds_epi16(__m256i __W__mmask16 __U__m256i __A__m256i __B)
730{
731  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
732                                           (__v16hi)_mm256_adds_epi16(__A__B),
733                                           (__v16hi)__W);
734}
735
736static __inline__ __m256i __DEFAULT_FN_ATTRS256
737_mm256_maskz_adds_epi16(__mmask16 __U__m256i __A__m256i __B)
738{
739  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
740                                           (__v16hi)_mm256_adds_epi16(__A__B),
741                                           (__v16hi)_mm256_setzero_si256());
742}
743
744static __inline__ __m128i __DEFAULT_FN_ATTRS128
745_mm_mask_adds_epu8(__m128i __W__mmask16 __U__m128i __A__m128i __B)
746{
747  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
748                                             (__v16qi)_mm_adds_epu8(__A__B),
749                                             (__v16qi)__W);
750}
751
752static __inline__ __m128i __DEFAULT_FN_ATTRS128
753_mm_maskz_adds_epu8(__mmask16 __U__m128i __A__m128i __B)
754{
755  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
756                                             (__v16qi)_mm_adds_epu8(__A__B),
757                                             (__v16qi)_mm_setzero_si128());
758}
759
760static __inline__ __m256i __DEFAULT_FN_ATTRS256
761_mm256_mask_adds_epu8(__m256i __W__mmask32 __U__m256i __A__m256i __B)
762{
763  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
764                                            (__v32qi)_mm256_adds_epu8(__A__B),
765                                            (__v32qi)__W);
766}
767
768static __inline__ __m256i __DEFAULT_FN_ATTRS256
769_mm256_maskz_adds_epu8(__mmask32 __U__m256i __A__m256i __B)
770{
771  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
772                                            (__v32qi)_mm256_adds_epu8(__A__B),
773                                            (__v32qi)_mm256_setzero_si256());
774}
775
776static __inline__ __m128i __DEFAULT_FN_ATTRS128
777_mm_mask_adds_epu16(__m128i __W__mmask8 __U__m128i __A__m128i __B)
778{
779  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
780                                             (__v8hi)_mm_adds_epu16(__A__B),
781                                             (__v8hi)__W);
782}
783
784static __inline__ __m128i __DEFAULT_FN_ATTRS128
785_mm_maskz_adds_epu16(__mmask8 __U__m128i __A__m128i __B)
786{
787  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
788                                             (__v8hi)_mm_adds_epu16(__A__B),
789                                             (__v8hi)_mm_setzero_si128());
790}
791
792static __inline__ __m256i __DEFAULT_FN_ATTRS256
793_mm256_mask_adds_epu16(__m256i __W__mmask16 __U__m256i __A__m256i __B)
794{
795  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
796                                           (__v16hi)_mm256_adds_epu16(__A__B),
797                                           (__v16hi)__W);
798}
799
800static __inline__ __m256i __DEFAULT_FN_ATTRS256
801_mm256_maskz_adds_epu16(__mmask16 __U__m256i __A__m256i __B)
802{
803  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
804                                           (__v16hi)_mm256_adds_epu16(__A__B),
805                                           (__v16hi)_mm256_setzero_si256());
806}
807
808static __inline__ __m128i __DEFAULT_FN_ATTRS128
809_mm_mask_avg_epu8(__m128i __W__mmask16 __U__m128i __A__m128i __B)
810{
811  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
812                                             (__v16qi)_mm_avg_epu8(__A__B),
813                                             (__v16qi)__W);
814}
815
816static __inline__ __m128i __DEFAULT_FN_ATTRS128
817_mm_maskz_avg_epu8(__mmask16 __U__m128i __A__m128i __B)
818{
819  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
820                                             (__v16qi)_mm_avg_epu8(__A__B),
821                                             (__v16qi)_mm_setzero_si128());
822}
823
824static __inline__ __m256i __DEFAULT_FN_ATTRS256
825_mm256_mask_avg_epu8(__m256i __W__mmask32 __U__m256i __A__m256i __B)
826{
827  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
828                                             (__v32qi)_mm256_avg_epu8(__A__B),
829                                             (__v32qi)__W);
830}
831
832static __inline__ __m256i __DEFAULT_FN_ATTRS256
833_mm256_maskz_avg_epu8(__mmask32 __U__m256i __A__m256i __B)
834{
835  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
836                                             (__v32qi)_mm256_avg_epu8(__A__B),
837                                             (__v32qi)_mm256_setzero_si256());
838}
839
840static __inline__ __m128i __DEFAULT_FN_ATTRS128
841_mm_mask_avg_epu16(__m128i __W__mmask8 __U__m128i __A__m128i __B)
842{
843  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
844                                             (__v8hi)_mm_avg_epu16(__A__B),
845                                             (__v8hi)__W);
846}
847
848static __inline__ __m128i __DEFAULT_FN_ATTRS128
849_mm_maskz_avg_epu16(__mmask8 __U__m128i __A__m128i __B)
850{
851  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
852                                             (__v8hi)_mm_avg_epu16(__A__B),
853                                             (__v8hi)_mm_setzero_si128());
854}
855
856static __inline__ __m256i __DEFAULT_FN_ATTRS256
857_mm256_mask_avg_epu16(__m256i __W__mmask16 __U__m256i __A__m256i __B)
858{
859  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
860                                            (__v16hi)_mm256_avg_epu16(__A__B),
861                                            (__v16hi)__W);
862}
863
864static __inline__ __m256i __DEFAULT_FN_ATTRS256
865_mm256_maskz_avg_epu16(__mmask16 __U__m256i __A__m256i __B)
866{
867  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
868                                            (__v16hi)_mm256_avg_epu16(__A__B),
869                                            (__v16hi)_mm256_setzero_si256());
870}
871
872static __inline__ __m128i __DEFAULT_FN_ATTRS128
873_mm_maskz_max_epi8(__mmask16 __M__m128i __A__m128i __B)
874{
875  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
876                                             (__v16qi)_mm_max_epi8(__A__B),
877                                             (__v16qi)_mm_setzero_si128());
878}
879
880static __inline__ __m128i __DEFAULT_FN_ATTRS128
881_mm_mask_max_epi8(__m128i __W__mmask16 __M__m128i __A__m128i __B)
882{
883  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
884                                             (__v16qi)_mm_max_epi8(__A__B),
885                                             (__v16qi)__W);
886}
887
888static __inline__ __m256i __DEFAULT_FN_ATTRS256
889_mm256_maskz_max_epi8(__mmask32 __M__m256i __A__m256i __B)
890{
891  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
892                                             (__v32qi)_mm256_max_epi8(__A__B),
893                                             (__v32qi)_mm256_setzero_si256());
894}
895
896static __inline__ __m256i __DEFAULT_FN_ATTRS256
897_mm256_mask_max_epi8(__m256i __W__mmask32 __M__m256i __A__m256i __B)
898{
899  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
900                                             (__v32qi)_mm256_max_epi8(__A__B),
901                                             (__v32qi)__W);
902}
903
904static __inline__ __m128i __DEFAULT_FN_ATTRS128
905_mm_maskz_max_epi16(__mmask8 __M__m128i __A__m128i __B)
906{
907  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
908                                             (__v8hi)_mm_max_epi16(__A__B),
909                                             (__v8hi)_mm_setzero_si128());
910}
911
912static __inline__ __m128i __DEFAULT_FN_ATTRS128
913_mm_mask_max_epi16(__m128i __W__mmask8 __M__m128i __A__m128i __B)
914{
915  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
916                                             (__v8hi)_mm_max_epi16(__A__B),
917                                             (__v8hi)__W);
918}
919
920static __inline__ __m256i __DEFAULT_FN_ATTRS256
921_mm256_maskz_max_epi16(__mmask16 __M__m256i __A__m256i __B)
922{
923  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
924                                            (__v16hi)_mm256_max_epi16(__A__B),
925                                            (__v16hi)_mm256_setzero_si256());
926}
927
928static __inline__ __m256i __DEFAULT_FN_ATTRS256
929_mm256_mask_max_epi16(__m256i __W__mmask16 __M__m256i __A__m256i __B)
930{
931  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
932                                            (__v16hi)_mm256_max_epi16(__A__B),
933                                            (__v16hi)__W);
934}
935
936static __inline__ __m128i __DEFAULT_FN_ATTRS128
937_mm_maskz_max_epu8(__mmask16 __M__m128i __A__m128i __B)
938{
939  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
940                                             (__v16qi)_mm_max_epu8(__A__B),
941                                             (__v16qi)_mm_setzero_si128());
942}
943
944static __inline__ __m128i __DEFAULT_FN_ATTRS128
945_mm_mask_max_epu8(__m128i __W__mmask16 __M__m128i __A__m128i __B)
946{
947  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
948                                             (__v16qi)_mm_max_epu8(__A__B),
949                                             (__v16qi)__W);
950}
951
952static __inline__ __m256i __DEFAULT_FN_ATTRS256
953_mm256_maskz_max_epu8 (__mmask32 __M__m256i __A__m256i __B)
954{
955  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
956                                             (__v32qi)_mm256_max_epu8(__A__B),
957                                             (__v32qi)_mm256_setzero_si256());
958}
959
960static __inline__ __m256i __DEFAULT_FN_ATTRS256
961_mm256_mask_max_epu8(__m256i __W__mmask32 __M__m256i __A__m256i __B)
962{
963  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
964                                             (__v32qi)_mm256_max_epu8(__A__B),
965                                             (__v32qi)__W);
966}
967
968static __inline__ __m128i __DEFAULT_FN_ATTRS128
969_mm_maskz_max_epu16(__mmask8 __M__m128i __A__m128i __B)
970{
971  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
972                                             (__v8hi)_mm_max_epu16(__A__B),
973                                             (__v8hi)_mm_setzero_si128());
974}
975
976static __inline__ __m128i __DEFAULT_FN_ATTRS128
977_mm_mask_max_epu16(__m128i __W__mmask8 __M__m128i __A__m128i __B)
978{
979  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
980                                             (__v8hi)_mm_max_epu16(__A__B),
981                                             (__v8hi)__W);
982}
983
984static __inline__ __m256i __DEFAULT_FN_ATTRS256
985_mm256_maskz_max_epu16(__mmask16 __M__m256i __A__m256i __B)
986{
987  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
988                                            (__v16hi)_mm256_max_epu16(__A__B),
989                                            (__v16hi)_mm256_setzero_si256());
990}
991
992static __inline__ __m256i __DEFAULT_FN_ATTRS256
993_mm256_mask_max_epu16(__m256i __W__mmask16 __M__m256i __A__m256i __B)
994{
995  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
996                                            (__v16hi)_mm256_max_epu16(__A__B),
997                                            (__v16hi)__W);
998}
999
1000static __inline__ __m128i __DEFAULT_FN_ATTRS128
1001_mm_maskz_min_epi8(__mmask16 __M__m128i __A__m128i __B)
1002{
1003  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1004                                             (__v16qi)_mm_min_epi8(__A__B),
1005                                             (__v16qi)_mm_setzero_si128());
1006}
1007
1008static __inline__ __m128i __DEFAULT_FN_ATTRS128
1009_mm_mask_min_epi8(__m128i __W__mmask16 __M__m128i __A__m128i __B)
1010{
1011  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1012                                             (__v16qi)_mm_min_epi8(__A__B),
1013                                             (__v16qi)__W);
1014}
1015
1016static __inline__ __m256i __DEFAULT_FN_ATTRS256
1017_mm256_maskz_min_epi8(__mmask32 __M__m256i __A__m256i __B)
1018{
1019  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1020                                             (__v32qi)_mm256_min_epi8(__A__B),
1021                                             (__v32qi)_mm256_setzero_si256());
1022}
1023
1024static __inline__ __m256i __DEFAULT_FN_ATTRS256
1025_mm256_mask_min_epi8(__m256i __W__mmask32 __M__m256i __A__m256i __B)
1026{
1027  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1028                                             (__v32qi)_mm256_min_epi8(__A__B),
1029                                             (__v32qi)__W);
1030}
1031
1032static __inline__ __m128i __DEFAULT_FN_ATTRS128
1033_mm_maskz_min_epi16(__mmask8 __M__m128i __A__m128i __B)
1034{
1035  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1036                                             (__v8hi)_mm_min_epi16(__A__B),
1037                                             (__v8hi)_mm_setzero_si128());
1038}
1039
1040static __inline__ __m128i __DEFAULT_FN_ATTRS128
1041_mm_mask_min_epi16(__m128i __W__mmask8 __M__m128i __A__m128i __B)
1042{
1043  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1044                                             (__v8hi)_mm_min_epi16(__A__B),
1045                                             (__v8hi)__W);
1046}
1047
1048static __inline__ __m256i __DEFAULT_FN_ATTRS256
1049_mm256_maskz_min_epi16(__mmask16 __M__m256i __A__m256i __B)
1050{
1051  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1052                                            (__v16hi)_mm256_min_epi16(__A__B),
1053                                            (__v16hi)_mm256_setzero_si256());
1054}
1055
1056static __inline__ __m256i __DEFAULT_FN_ATTRS256
1057_mm256_mask_min_epi16(__m256i __W__mmask16 __M__m256i __A__m256i __B)
1058{
1059  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1060                                            (__v16hi)_mm256_min_epi16(__A__B),
1061                                            (__v16hi)__W);
1062}
1063
1064static __inline__ __m128i __DEFAULT_FN_ATTRS128
1065_mm_maskz_min_epu8(__mmask16 __M__m128i __A__m128i __B)
1066{
1067  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1068                                             (__v16qi)_mm_min_epu8(__A__B),
1069                                             (__v16qi)_mm_setzero_si128());
1070}
1071
1072static __inline__ __m128i __DEFAULT_FN_ATTRS128
1073_mm_mask_min_epu8(__m128i __W__mmask16 __M__m128i __A__m128i __B)
1074{
1075  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1076                                             (__v16qi)_mm_min_epu8(__A__B),
1077                                             (__v16qi)__W);
1078}
1079
1080static __inline__ __m256i __DEFAULT_FN_ATTRS256
1081_mm256_maskz_min_epu8 (__mmask32 __M__m256i __A__m256i __B)
1082{
1083  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1084                                             (__v32qi)_mm256_min_epu8(__A__B),
1085                                             (__v32qi)_mm256_setzero_si256());
1086}
1087
1088static __inline__ __m256i __DEFAULT_FN_ATTRS256
1089_mm256_mask_min_epu8(__m256i __W__mmask32 __M__m256i __A__m256i __B)
1090{
1091  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1092                                             (__v32qi)_mm256_min_epu8(__A__B),
1093                                             (__v32qi)__W);
1094}
1095
1096static __inline__ __m128i __DEFAULT_FN_ATTRS128
1097_mm_maskz_min_epu16(__mmask8 __M__m128i __A__m128i __B)
1098{
1099  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1100                                             (__v8hi)_mm_min_epu16(__A__B),
1101                                             (__v8hi)_mm_setzero_si128());
1102}
1103
1104static __inline__ __m128i __DEFAULT_FN_ATTRS128
1105_mm_mask_min_epu16(__m128i __W__mmask8 __M__m128i __A__m128i __B)
1106{
1107  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1108                                             (__v8hi)_mm_min_epu16(__A__B),
1109                                             (__v8hi)__W);
1110}
1111
1112static __inline__ __m256i __DEFAULT_FN_ATTRS256
1113_mm256_maskz_min_epu16(__mmask16 __M__m256i __A__m256i __B)
1114{
1115  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1116                                            (__v16hi)_mm256_min_epu16(__A__B),
1117                                            (__v16hi)_mm256_setzero_si256());
1118}
1119
1120static __inline__ __m256i __DEFAULT_FN_ATTRS256
1121_mm256_mask_min_epu16(__m256i __W__mmask16 __M__m256i __A__m256i __B)
1122{
1123  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1124                                            (__v16hi)_mm256_min_epu16(__A__B),
1125                                            (__v16hi)__W);
1126}
1127
1128static __inline__ __m128i __DEFAULT_FN_ATTRS128
1129_mm_mask_shuffle_epi8(__m128i __W__mmask16 __U__m128i __A__m128i __B)
1130{
1131  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1132                                            (__v16qi)_mm_shuffle_epi8(__A__B),
1133                                            (__v16qi)__W);
1134}
1135
1136static __inline__ __m128i __DEFAULT_FN_ATTRS128
1137_mm_maskz_shuffle_epi8(__mmask16 __U__m128i __A__m128i __B)
1138{
1139  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1140                                            (__v16qi)_mm_shuffle_epi8(__A__B),
1141                                            (__v16qi)_mm_setzero_si128());
1142}
1143
1144static __inline__ __m256i __DEFAULT_FN_ATTRS256
1145_mm256_mask_shuffle_epi8(__m256i __W__mmask32 __U__m256i __A__m256i __B)
1146{
1147  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1148                                         (__v32qi)_mm256_shuffle_epi8(__A__B),
1149                                         (__v32qi)__W);
1150}
1151
1152static __inline__ __m256i __DEFAULT_FN_ATTRS256
1153_mm256_maskz_shuffle_epi8(__mmask32 __U__m256i __A__m256i __B)
1154{
1155  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1156                                         (__v32qi)_mm256_shuffle_epi8(__A__B),
1157                                         (__v32qi)_mm256_setzero_si256());
1158}
1159
1160static __inline__ __m128i __DEFAULT_FN_ATTRS128
1161_mm_mask_subs_epi8(__m128i __W__mmask16 __U__m128i __A__m128i __B)
1162{
1163  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1164                                             (__v16qi)_mm_subs_epi8(__A__B),
1165                                             (__v16qi)__W);
1166}
1167
1168static __inline__ __m128i __DEFAULT_FN_ATTRS128
1169_mm_maskz_subs_epi8(__mmask16 __U__m128i __A__m128i __B)
1170{
1171  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1172                                             (__v16qi)_mm_subs_epi8(__A__B),
1173                                             (__v16qi)_mm_setzero_si128());
1174}
1175
1176static __inline__ __m256i __DEFAULT_FN_ATTRS256
1177_mm256_mask_subs_epi8(__m256i __W__mmask32 __U__m256i __A__m256i __B)
1178{
1179  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1180                                            (__v32qi)_mm256_subs_epi8(__A__B),
1181                                            (__v32qi)__W);
1182}
1183
1184static __inline__ __m256i __DEFAULT_FN_ATTRS256
1185_mm256_maskz_subs_epi8(__mmask32 __U__m256i __A__m256i __B)
1186{
1187  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1188                                            (__v32qi)_mm256_subs_epi8(__A__B),
1189                                            (__v32qi)_mm256_setzero_si256());
1190}
1191
1192static __inline__ __m128i __DEFAULT_FN_ATTRS128
1193_mm_mask_subs_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B)
1194{
1195  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1196                                             (__v8hi)_mm_subs_epi16(__A__B),
1197                                             (__v8hi)__W);
1198}
1199
1200static __inline__ __m128i __DEFAULT_FN_ATTRS128
1201_mm_maskz_subs_epi16(__mmask8 __U__m128i __A__m128i __B)
1202{
1203  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1204                                             (__v8hi)_mm_subs_epi16(__A__B),
1205                                             (__v8hi)_mm_setzero_si128());
1206}
1207
1208static __inline__ __m256i __DEFAULT_FN_ATTRS256
1209_mm256_mask_subs_epi16(__m256i __W__mmask16 __U__m256i __A__m256i __B)
1210{
1211  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1212                                           (__v16hi)_mm256_subs_epi16(__A__B),
1213                                           (__v16hi)__W);
1214}
1215
1216static __inline__ __m256i __DEFAULT_FN_ATTRS256
1217_mm256_maskz_subs_epi16(__mmask16 __U__m256i __A__m256i __B)
1218{
1219  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1220                                           (__v16hi)_mm256_subs_epi16(__A__B),
1221                                           (__v16hi)_mm256_setzero_si256());
1222}
1223
1224static __inline__ __m128i __DEFAULT_FN_ATTRS128
1225_mm_mask_subs_epu8(__m128i __W__mmask16 __U__m128i __A__m128i __B)
1226{
1227  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1228                                             (__v16qi)_mm_subs_epu8(__A__B),
1229                                             (__v16qi)__W);
1230}
1231
1232static __inline__ __m128i __DEFAULT_FN_ATTRS128
1233_mm_maskz_subs_epu8(__mmask16 __U__m128i __A__m128i __B)
1234{
1235  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1236                                             (__v16qi)_mm_subs_epu8(__A__B),
1237                                             (__v16qi)_mm_setzero_si128());
1238}
1239
1240static __inline__ __m256i __DEFAULT_FN_ATTRS256
1241_mm256_mask_subs_epu8(__m256i __W__mmask32 __U__m256i __A__m256i __B)
1242{
1243  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1244                                            (__v32qi)_mm256_subs_epu8(__A__B),
1245                                            (__v32qi)__W);
1246}
1247
1248static __inline__ __m256i __DEFAULT_FN_ATTRS256
1249_mm256_maskz_subs_epu8(__mmask32 __U__m256i __A__m256i __B)
1250{
1251  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1252                                            (__v32qi)_mm256_subs_epu8(__A__B),
1253                                            (__v32qi)_mm256_setzero_si256());
1254}
1255
1256static __inline__ __m128i __DEFAULT_FN_ATTRS128
1257_mm_mask_subs_epu16(__m128i __W__mmask8 __U__m128i __A__m128i __B)
1258{
1259  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1260                                             (__v8hi)_mm_subs_epu16(__A__B),
1261                                             (__v8hi)__W);
1262}
1263
1264static __inline__ __m128i __DEFAULT_FN_ATTRS128
1265_mm_maskz_subs_epu16(__mmask8 __U__m128i __A__m128i __B)
1266{
1267  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1268                                             (__v8hi)_mm_subs_epu16(__A__B),
1269                                             (__v8hi)_mm_setzero_si128());
1270}
1271
1272static __inline__ __m256i __DEFAULT_FN_ATTRS256
1273_mm256_mask_subs_epu16(__m256i __W__mmask16 __U__m256i __A,
1274      __m256i __B) {
1275  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1276                                           (__v16hi)_mm256_subs_epu16(__A__B),
1277                                           (__v16hi)__W);
1278}
1279
1280static __inline__ __m256i __DEFAULT_FN_ATTRS256
1281_mm256_maskz_subs_epu16(__mmask16 __U__m256i __A__m256i __B)
1282{
1283  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1284                                           (__v16hi)_mm256_subs_epu16(__A__B),
1285                                           (__v16hi)_mm256_setzero_si256());
1286}
1287
1288static __inline__ __m128i __DEFAULT_FN_ATTRS128
1289_mm_permutex2var_epi16(__m128i __A__m128i __I__m128i __B)
1290{
1291  return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
1292                                                 (__v8hi__B);
1293}
1294
1295static __inline__ __m128i __DEFAULT_FN_ATTRS128
1296_mm_mask_permutex2var_epi16(__m128i __A__mmask8 __U__m128i __I,
1297                            __m128i __B)
1298{
1299  return (__m128i)__builtin_ia32_selectw_128(__U,
1300                                  (__v8hi)_mm_permutex2var_epi16(__A__I__B),
1301                                  (__v8hi)__A);
1302}
1303
1304static __inline__ __m128i __DEFAULT_FN_ATTRS128
1305_mm_mask2_permutex2var_epi16(__m128i __A__m128i __I__mmask8 __U,
1306                             __m128i __B)
1307{
1308  return (__m128i)__builtin_ia32_selectw_128(__U,
1309                                  (__v8hi)_mm_permutex2var_epi16(__A__I__B),
1310                                  (__v8hi)__I);
1311}
1312
1313static __inline__ __m128i __DEFAULT_FN_ATTRS128
1314_mm_maskz_permutex2var_epi16 (__mmask8 __U__m128i __A__m128i __I,
1315            __m128i __B)
1316{
1317  return (__m128i)__builtin_ia32_selectw_128(__U,
1318                                  (__v8hi)_mm_permutex2var_epi16(__A__I__B),
1319                                  (__v8hi)_mm_setzero_si128());
1320}
1321
1322static __inline__ __m256i __DEFAULT_FN_ATTRS256
1323_mm256_permutex2var_epi16(__m256i __A__m256i __I__m256i __B)
1324{
1325  return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
1326                                                 (__v16hi)__B);
1327}
1328
1329static __inline__ __m256i __DEFAULT_FN_ATTRS256
1330_mm256_mask_permutex2var_epi16(__m256i __A__mmask16 __U__m256i __I,
1331                               __m256i __B)
1332{
1333  return (__m256i)__builtin_ia32_selectw_256(__U,
1334                              (__v16hi)_mm256_permutex2var_epi16(__A__I__B),
1335                              (__v16hi)__A);
1336}
1337
1338static __inline__ __m256i __DEFAULT_FN_ATTRS256
1339_mm256_mask2_permutex2var_epi16(__m256i __A__m256i __I__mmask16 __U,
1340                                __m256i __B)
1341{
1342  return (__m256i)__builtin_ia32_selectw_256(__U,
1343                              (__v16hi)_mm256_permutex2var_epi16(__A__I__B),
1344                              (__v16hi)__I);
1345}
1346
1347static __inline__ __m256i __DEFAULT_FN_ATTRS256
1348_mm256_maskz_permutex2var_epi16 (__mmask16 __U__m256i __A__m256i __I,
1349                                 __m256i __B)
1350{
1351  return (__m256i)__builtin_ia32_selectw_256(__U,
1352                              (__v16hi)_mm256_permutex2var_epi16(__A__I__B),
1353                              (__v16hi)_mm256_setzero_si256());
1354}
1355
1356static __inline__ __m128i __DEFAULT_FN_ATTRS128
1357_mm_mask_maddubs_epi16(__m128i __W__mmask8 __U__m128i __X__m128i __Y) {
1358  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1359                                            (__v8hi)_mm_maddubs_epi16(__X__Y),
1360                                            (__v8hi)__W);
1361}
1362
1363static __inline__ __m128i __DEFAULT_FN_ATTRS128
1364_mm_maskz_maddubs_epi16(__mmask8 __U__m128i __X__m128i __Y) {
1365  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1366                                            (__v8hi)_mm_maddubs_epi16(__X__Y),
1367                                            (__v8hi)_mm_setzero_si128());
1368}
1369
1370static __inline__ __m256i __DEFAULT_FN_ATTRS256
1371_mm256_mask_maddubs_epi16(__m256i __W__mmask16 __U__m256i __X,
1372                          __m256i __Y) {
1373  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1374                                        (__v16hi)_mm256_maddubs_epi16(__X__Y),
1375                                        (__v16hi)__W);
1376}
1377
1378static __inline__ __m256i __DEFAULT_FN_ATTRS256
1379_mm256_maskz_maddubs_epi16(__mmask16 __U__m256i __X__m256i __Y) {
1380  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1381                                        (__v16hi)_mm256_maddubs_epi16(__X__Y),
1382                                        (__v16hi)_mm256_setzero_si256());
1383}
1384
1385static __inline__ __m128i __DEFAULT_FN_ATTRS128
1386_mm_mask_madd_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B) {
1387  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1388                                             (__v4si)_mm_madd_epi16(__A__B),
1389                                             (__v4si)__W);
1390}
1391
1392static __inline__ __m128i __DEFAULT_FN_ATTRS128
1393_mm_maskz_madd_epi16(__mmask8 __U__m128i __A__m128i __B) {
1394  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1395                                             (__v4si)_mm_madd_epi16(__A__B),
1396                                             (__v4si)_mm_setzero_si128());
1397}
1398
1399static __inline__ __m256i __DEFAULT_FN_ATTRS256
1400_mm256_mask_madd_epi16(__m256i __W__mmask8 __U__m256i __A__m256i __B) {
1401  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1402                                            (__v8si)_mm256_madd_epi16(__A__B),
1403                                            (__v8si)__W);
1404}
1405
1406static __inline__ __m256i __DEFAULT_FN_ATTRS256
1407_mm256_maskz_madd_epi16(__mmask8 __U__m256i __A__m256i __B) {
1408  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1409                                            (__v8si)_mm256_madd_epi16(__A__B),
1410                                            (__v8si)_mm256_setzero_si256());
1411}
1412
1413static __inline__ __m128i __DEFAULT_FN_ATTRS128
1414_mm_cvtsepi16_epi8 (__m128i __A) {
1415  return (__m128i__builtin_ia32_pmovswb128_mask ((__v8hi__A,
1416               (__v16qi_mm_setzero_si128(),
1417               (__mmask8) -1);
1418}
1419
1420static __inline__ __m128i __DEFAULT_FN_ATTRS128
1421_mm_mask_cvtsepi16_epi8 (__m128i __O__mmask8 __M__m128i __A) {
1422  return (__m128i__builtin_ia32_pmovswb128_mask ((__v8hi__A,
1423               (__v16qi__O,
1424                __M);
1425}
1426
1427static __inline__ __m128i __DEFAULT_FN_ATTRS128
1428_mm_maskz_cvtsepi16_epi8 (__mmask8 __M__m128i __A) {
1429  return (__m128i__builtin_ia32_pmovswb128_mask ((__v8hi__A,
1430               (__v16qi_mm_setzero_si128(),
1431               __M);
1432}
1433
1434static __inline__ __m128i __DEFAULT_FN_ATTRS256
1435_mm256_cvtsepi16_epi8 (__m256i __A) {
1436  return (__m128i__builtin_ia32_pmovswb256_mask ((__v16hi__A,
1437               (__v16qi_mm_setzero_si128(),
1438               (__mmask16) -1);
1439}
1440
1441static __inline__ __m128i __DEFAULT_FN_ATTRS256
1442_mm256_mask_cvtsepi16_epi8 (__m128i __O__mmask16 __M__m256i __A) {
1443  return (__m128i__builtin_ia32_pmovswb256_mask ((__v16hi__A,
1444               (__v16qi__O,
1445                __M);
1446}
1447
1448static __inline__ __m128i __DEFAULT_FN_ATTRS256
1449_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M__m256i __A) {
1450  return (__m128i__builtin_ia32_pmovswb256_mask ((__v16hi__A,
1451               (__v16qi_mm_setzero_si128(),
1452               __M);
1453}
1454
1455static __inline__ __m128i __DEFAULT_FN_ATTRS128
1456_mm_cvtusepi16_epi8 (__m128i __A) {
1457  return (__m128i__builtin_ia32_pmovuswb128_mask ((__v8hi__A,
1458                (__v16qi_mm_setzero_si128(),
1459                (__mmask8) -1);
1460}
1461
1462static __inline__ __m128i __DEFAULT_FN_ATTRS128
1463_mm_mask_cvtusepi16_epi8 (__m128i __O__mmask8 __M__m128i __A) {
1464  return (__m128i__builtin_ia32_pmovuswb128_mask ((__v8hi__A,
1465                (__v16qi__O,
1466                __M);
1467}
1468
1469static __inline__ __m128i __DEFAULT_FN_ATTRS128
1470_mm_maskz_cvtusepi16_epi8 (__mmask8 __M__m128i __A) {
1471  return (__m128i__builtin_ia32_pmovuswb128_mask ((__v8hi__A,
1472                (__v16qi_mm_setzero_si128(),
1473                __M);
1474}
1475
1476static __inline__ __m128i __DEFAULT_FN_ATTRS256
1477_mm256_cvtusepi16_epi8 (__m256i __A) {
1478  return (__m128i__builtin_ia32_pmovuswb256_mask ((__v16hi__A,
1479                (__v16qi_mm_setzero_si128(),
1480                (__mmask16) -1);
1481}
1482
1483static __inline__ __m128i __DEFAULT_FN_ATTRS256
1484_mm256_mask_cvtusepi16_epi8 (__m128i __O__mmask16 __M__m256i __A) {
1485  return (__m128i__builtin_ia32_pmovuswb256_mask ((__v16hi__A,
1486                (__v16qi__O,
1487                __M);
1488}
1489
1490static __inline__ __m128i __DEFAULT_FN_ATTRS256
1491_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M__m256i __A) {
1492  return (__m128i__builtin_ia32_pmovuswb256_mask ((__v16hi__A,
1493                (__v16qi_mm_setzero_si128(),
1494                __M);
1495}
1496
1497static __inline__ __m128i __DEFAULT_FN_ATTRS128
1498_mm_cvtepi16_epi8 (__m128i __A) {
1499  return (__m128i)__builtin_shufflevector(
1500      __builtin_convertvector((__v8hi)__A, __v8qi),
1501      (__v8qi){00000000}, 01234567891011,
1502      12131415);
1503}
1504
1505static __inline__ __m128i __DEFAULT_FN_ATTRS128
1506_mm_mask_cvtepi16_epi8 (__m128i __O__mmask8 __M__m128i __A) {
1507  return (__m128i__builtin_ia32_pmovwb128_mask ((__v8hi__A,
1508               (__v16qi__O,
1509               __M);
1510}
1511
1512static __inline__ __m128i __DEFAULT_FN_ATTRS128
1513_mm_maskz_cvtepi16_epi8 (__mmask8 __M__m128i __A) {
1514  return (__m128i__builtin_ia32_pmovwb128_mask ((__v8hi__A,
1515               (__v16qi_mm_setzero_si128(),
1516               __M);
1517}
1518
1519static __inline__ void __DEFAULT_FN_ATTRS128
1520_mm_mask_cvtepi16_storeu_epi8 (void * __P__mmask8 __M__m128i __A)
1521{
1522  __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi__A__M);
1523}
1524
1525
1526static __inline__ void __DEFAULT_FN_ATTRS128
1527_mm_mask_cvtsepi16_storeu_epi8 (void * __P__mmask8 __M__m128i __A)
1528{
1529  __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi__A__M);
1530}
1531
1532static __inline__ void __DEFAULT_FN_ATTRS128
1533_mm_mask_cvtusepi16_storeu_epi8 (void * __P__mmask8 __M__m128i __A)
1534{
1535  __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi__A__M);
1536}
1537
1538static __inline__ __m128i __DEFAULT_FN_ATTRS256
1539_mm256_cvtepi16_epi8 (__m256i __A) {
1540  return (__m128i)__builtin_convertvector((__v16hi__A, __v16qi);
1541}
1542
1543static __inline__ __m128i __DEFAULT_FN_ATTRS256
1544_mm256_mask_cvtepi16_epi8 (__m128i __O__mmask16 __M__m256i __A) {
1545  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1546                                             (__v16qi)_mm256_cvtepi16_epi8(__A),
1547                                             (__v16qi)__O);
1548}
1549
1550static __inline__ __m128i __DEFAULT_FN_ATTRS256
1551_mm256_maskz_cvtepi16_epi8 (__mmask16 __M__m256i __A) {
1552  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1553                                             (__v16qi)_mm256_cvtepi16_epi8(__A),
1554                                             (__v16qi)_mm_setzero_si128());
1555}
1556
1557static __inline__ void __DEFAULT_FN_ATTRS256
1558_mm256_mask_cvtepi16_storeu_epi8 (void * __P__mmask16 __M__m256i __A)
1559{
1560  __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi__A__M);
1561}
1562
1563static __inline__ void __DEFAULT_FN_ATTRS256
1564_mm256_mask_cvtsepi16_storeu_epi8 (void * __P__mmask16 __M__m256i __A)
1565{
1566  __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi__A__M);
1567}
1568
1569static __inline__ void __DEFAULT_FN_ATTRS256
1570_mm256_mask_cvtusepi16_storeu_epi8 (void * __P__mmask16 __M__m256i __A)
1571{
1572  __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi__A__M);
1573}
1574
1575static __inline__ __m128i __DEFAULT_FN_ATTRS128
1576_mm_mask_mulhrs_epi16(__m128i __W__mmask8 __U__m128i __X__m128i __Y) {
1577  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1578                                             (__v8hi)_mm_mulhrs_epi16(__X__Y),
1579                                             (__v8hi)__W);
1580}
1581
1582static __inline__ __m128i __DEFAULT_FN_ATTRS128
1583_mm_maskz_mulhrs_epi16(__mmask8 __U__m128i __X__m128i __Y) {
1584  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1585                                             (__v8hi)_mm_mulhrs_epi16(__X__Y),
1586                                             (__v8hi)_mm_setzero_si128());
1587}
1588
1589static __inline__ __m256i __DEFAULT_FN_ATTRS256
1590_mm256_mask_mulhrs_epi16(__m256i __W__mmask16 __U__m256i __X__m256i __Y) {
1591  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1592                                         (__v16hi)_mm256_mulhrs_epi16(__X__Y),
1593                                         (__v16hi)__W);
1594}
1595
1596static __inline__ __m256i __DEFAULT_FN_ATTRS256
1597_mm256_maskz_mulhrs_epi16(__mmask16 __U__m256i __X__m256i __Y) {
1598  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1599                                         (__v16hi)_mm256_mulhrs_epi16(__X__Y),
1600                                         (__v16hi)_mm256_setzero_si256());
1601}
1602
1603static __inline__ __m128i __DEFAULT_FN_ATTRS128
1604_mm_mask_mulhi_epu16(__m128i __W__mmask8 __U__m128i __A__m128i __B) {
1605  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1606                                             (__v8hi)_mm_mulhi_epu16(__A__B),
1607                                             (__v8hi)__W);
1608}
1609
1610static __inline__ __m128i __DEFAULT_FN_ATTRS128
1611_mm_maskz_mulhi_epu16(__mmask8 __U__m128i __A__m128i __B) {
1612  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1613                                             (__v8hi)_mm_mulhi_epu16(__A__B),
1614                                             (__v8hi)_mm_setzero_si128());
1615}
1616
1617static __inline__ __m256i __DEFAULT_FN_ATTRS256
1618_mm256_mask_mulhi_epu16(__m256i __W__mmask16 __U__m256i __A__m256i __B) {
1619  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1620                                          (__v16hi)_mm256_mulhi_epu16(__A__B),
1621                                          (__v16hi)__W);
1622}
1623
1624static __inline__ __m256i __DEFAULT_FN_ATTRS256
1625_mm256_maskz_mulhi_epu16(__mmask16 __U__m256i __A__m256i __B) {
1626  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1627                                          (__v16hi)_mm256_mulhi_epu16(__A__B),
1628                                          (__v16hi)_mm256_setzero_si256());
1629}
1630
1631static __inline__ __m128i __DEFAULT_FN_ATTRS128
1632_mm_mask_mulhi_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B) {
1633  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1634                                             (__v8hi)_mm_mulhi_epi16(__A__B),
1635                                             (__v8hi)__W);
1636}
1637
1638static __inline__ __m128i __DEFAULT_FN_ATTRS128
1639_mm_maskz_mulhi_epi16(__mmask8 __U__m128i __A__m128i __B) {
1640  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1641                                             (__v8hi)_mm_mulhi_epi16(__A__B),
1642                                             (__v8hi)_mm_setzero_si128());
1643}
1644
1645static __inline__ __m256i __DEFAULT_FN_ATTRS256
1646_mm256_mask_mulhi_epi16(__m256i __W__mmask16 __U__m256i __A__m256i __B) {
1647  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1648                                          (__v16hi)_mm256_mulhi_epi16(__A__B),
1649                                          (__v16hi)__W);
1650}
1651
1652static __inline__ __m256i __DEFAULT_FN_ATTRS256
1653_mm256_maskz_mulhi_epi16(__mmask16 __U__m256i __A__m256i __B) {
1654  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1655                                          (__v16hi)_mm256_mulhi_epi16(__A__B),
1656                                          (__v16hi)_mm256_setzero_si256());
1657}
1658
1659static __inline__ __m128i __DEFAULT_FN_ATTRS128
1660_mm_mask_unpackhi_epi8(__m128i __W__mmask16 __U__m128i __A__m128i __B) {
1661  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1662                                           (__v16qi)_mm_unpackhi_epi8(__A__B),
1663                                           (__v16qi)__W);
1664}
1665
1666static __inline__ __m128i __DEFAULT_FN_ATTRS128
1667_mm_maskz_unpackhi_epi8(__mmask16 __U__m128i __A__m128i __B) {
1668  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1669                                           (__v16qi)_mm_unpackhi_epi8(__A__B),
1670                                           (__v16qi)_mm_setzero_si128());
1671}
1672
1673static __inline__ __m256i __DEFAULT_FN_ATTRS256
1674_mm256_mask_unpackhi_epi8(__m256i __W__mmask32 __U__m256i __A__m256i __B) {
1675  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1676                                        (__v32qi)_mm256_unpackhi_epi8(__A__B),
1677                                        (__v32qi)__W);
1678}
1679
1680static __inline__ __m256i __DEFAULT_FN_ATTRS256
1681_mm256_maskz_unpackhi_epi8(__mmask32 __U__m256i __A__m256i __B) {
1682  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1683                                        (__v32qi)_mm256_unpackhi_epi8(__A__B),
1684                                        (__v32qi)_mm256_setzero_si256());
1685}
1686
1687static __inline__ __m128i __DEFAULT_FN_ATTRS128
1688_mm_mask_unpackhi_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B) {
1689  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1690                                           (__v8hi)_mm_unpackhi_epi16(__A__B),
1691                                           (__v8hi)__W);
1692}
1693
1694static __inline__ __m128i __DEFAULT_FN_ATTRS128
1695_mm_maskz_unpackhi_epi16(__mmask8 __U__m128i __A__m128i __B) {
1696  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1697                                           (__v8hi)_mm_unpackhi_epi16(__A__B),
1698                                           (__v8hi_mm_setzero_si128());
1699}
1700
1701static __inline__ __m256i __DEFAULT_FN_ATTRS256
1702_mm256_mask_unpackhi_epi16(__m256i __W__mmask16 __U__m256i __A__m256i __B) {
1703  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1704                                       (__v16hi)_mm256_unpackhi_epi16(__A__B),
1705                                       (__v16hi)__W);
1706}
1707
1708static __inline__ __m256i __DEFAULT_FN_ATTRS256
1709_mm256_maskz_unpackhi_epi16(__mmask16 __U__m256i __A__m256i __B) {
1710  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1711                                       (__v16hi)_mm256_unpackhi_epi16(__A__B),
1712                                       (__v16hi)_mm256_setzero_si256());
1713}
1714
1715static __inline__ __m128i __DEFAULT_FN_ATTRS128
1716_mm_mask_unpacklo_epi8(__m128i __W__mmask16 __U__m128i __A__m128i __B) {
1717  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1718                                           (__v16qi)_mm_unpacklo_epi8(__A__B),
1719                                           (__v16qi)__W);
1720}
1721
1722static __inline__ __m128i __DEFAULT_FN_ATTRS128
1723_mm_maskz_unpacklo_epi8(__mmask16 __U__m128i __A__m128i __B) {
1724  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1725                                           (__v16qi)_mm_unpacklo_epi8(__A__B),
1726                                           (__v16qi)_mm_setzero_si128());
1727}
1728
1729static __inline__ __m256i __DEFAULT_FN_ATTRS256
1730_mm256_mask_unpacklo_epi8(__m256i __W__mmask32 __U__m256i __A__m256i __B) {
1731  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1732                                        (__v32qi)_mm256_unpacklo_epi8(__A__B),
1733                                        (__v32qi)__W);
1734}
1735
1736static __inline__ __m256i __DEFAULT_FN_ATTRS256
1737_mm256_maskz_unpacklo_epi8(__mmask32 __U__m256i __A__m256i __B) {
1738  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1739                                        (__v32qi)_mm256_unpacklo_epi8(__A__B),
1740                                        (__v32qi)_mm256_setzero_si256());
1741}
1742
1743static __inline__ __m128i __DEFAULT_FN_ATTRS128
1744_mm_mask_unpacklo_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B) {
1745  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1746                                           (__v8hi)_mm_unpacklo_epi16(__A__B),
1747                                           (__v8hi)__W);
1748}
1749
1750static __inline__ __m128i __DEFAULT_FN_ATTRS128
1751_mm_maskz_unpacklo_epi16(__mmask8 __U__m128i __A__m128i __B) {
1752  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1753                                           (__v8hi)_mm_unpacklo_epi16(__A__B),
1754                                           (__v8hi_mm_setzero_si128());
1755}
1756
1757static __inline__ __m256i __DEFAULT_FN_ATTRS256
1758_mm256_mask_unpacklo_epi16(__m256i __W__mmask16 __U__m256i __A__m256i __B) {
1759  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1760                                       (__v16hi)_mm256_unpacklo_epi16(__A__B),
1761                                       (__v16hi)__W);
1762}
1763
1764static __inline__ __m256i __DEFAULT_FN_ATTRS256
1765_mm256_maskz_unpacklo_epi16(__mmask16 __U__m256i __A__m256i __B) {
1766  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1767                                       (__v16hi)_mm256_unpacklo_epi16(__A__B),
1768                                       (__v16hi)_mm256_setzero_si256());
1769}
1770
1771static __inline__ __m128i __DEFAULT_FN_ATTRS128
1772_mm_mask_cvtepi8_epi16(__m128i __W__mmask8 __U__m128i __A)
1773{
1774  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1775                                             (__v8hi)_mm_cvtepi8_epi16(__A),
1776                                             (__v8hi)__W);
1777}
1778
1779static __inline__ __m128i __DEFAULT_FN_ATTRS128
1780_mm_maskz_cvtepi8_epi16(__mmask8 __U__m128i __A)
1781{
1782  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1783                                             (__v8hi)_mm_cvtepi8_epi16(__A),
1784                                             (__v8hi)_mm_setzero_si128());
1785}
1786
1787static __inline__ __m256i __DEFAULT_FN_ATTRS256
1788_mm256_mask_cvtepi8_epi16(__m256i __W__mmask16 __U__m128i __A)
1789{
1790  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1791                                             (__v16hi)_mm256_cvtepi8_epi16(__A),
1792                                             (__v16hi)__W);
1793}
1794
1795static __inline__ __m256i __DEFAULT_FN_ATTRS256
1796_mm256_maskz_cvtepi8_epi16(__mmask16 __U__m128i __A)
1797{
1798  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1799                                             (__v16hi)_mm256_cvtepi8_epi16(__A),
1800                                             (__v16hi)_mm256_setzero_si256());
1801}
1802
1803
1804static __inline__ __m128i __DEFAULT_FN_ATTRS128
1805_mm_mask_cvtepu8_epi16(__m128i __W__mmask8 __U__m128i __A)
1806{
1807  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1808                                             (__v8hi)_mm_cvtepu8_epi16(__A),
1809                                             (__v8hi)__W);
1810}
1811
1812static __inline__ __m128i __DEFAULT_FN_ATTRS128
1813_mm_maskz_cvtepu8_epi16(__mmask8 __U__m128i __A)
1814{
1815  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1816                                             (__v8hi)_mm_cvtepu8_epi16(__A),
1817                                             (__v8hi)_mm_setzero_si128());
1818}
1819
1820static __inline__ __m256i __DEFAULT_FN_ATTRS256
1821_mm256_mask_cvtepu8_epi16(__m256i __W__mmask16 __U__m128i __A)
1822{
1823  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1824                                             (__v16hi)_mm256_cvtepu8_epi16(__A),
1825                                             (__v16hi)__W);
1826}
1827
1828static __inline__ __m256i __DEFAULT_FN_ATTRS256
1829_mm256_maskz_cvtepu8_epi16 (__mmask16 __U__m128i __A)
1830{
1831  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1832                                             (__v16hi)_mm256_cvtepu8_epi16(__A),
1833                                             (__v16hi)_mm256_setzero_si256());
1834}
1835
1836
1837#define _mm_mask_shufflehi_epi16(W, U, A, imm) \
1838  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1839                                      (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1840                                      (__v8hi)(__m128i)(W))
1841
1842#define _mm_maskz_shufflehi_epi16(U, A, imm) \
1843  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1844                                      (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1845                                      (__v8hi)_mm_setzero_si128())
1846
1847#define _mm256_mask_shufflehi_epi16(W, U, A, imm) \
1848  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1849                                      (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1850                                      (__v16hi)(__m256i)(W))
1851
1852#define _mm256_maskz_shufflehi_epi16(U, A, imm) \
1853  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1854                                      (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1855                                      (__v16hi)_mm256_setzero_si256())
1856
1857#define _mm_mask_shufflelo_epi16(W, U, A, imm) \
1858  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1859                                      (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1860                                      (__v8hi)(__m128i)(W))
1861
1862#define _mm_maskz_shufflelo_epi16(U, A, imm) \
1863  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1864                                      (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1865                                      (__v8hi)_mm_setzero_si128())
1866
1867#define _mm256_mask_shufflelo_epi16(W, U, A, imm) \
1868  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1869                                      (__v16hi)_mm256_shufflelo_epi16((A), \
1870                                                                      (imm)), \
1871                                      (__v16hi)(__m256i)(W))
1872
1873#define _mm256_maskz_shufflelo_epi16(U, A, imm) \
1874  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1875                                      (__v16hi)_mm256_shufflelo_epi16((A), \
1876                                                                      (imm)), \
1877                                      (__v16hi)_mm256_setzero_si256())
1878
1879static __inline__ __m256i __DEFAULT_FN_ATTRS256
1880_mm256_sllv_epi16(__m256i __A__m256i __B)
1881{
1882  return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B);
1883}
1884
1885static __inline__ __m256i __DEFAULT_FN_ATTRS256
1886_mm256_mask_sllv_epi16(__m256i __W__mmask16 __U__m256i __A__m256i __B)
1887{
1888  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1889                                           (__v16hi)_mm256_sllv_epi16(__A__B),
1890                                           (__v16hi)__W);
1891}
1892
1893static __inline__ __m256i __DEFAULT_FN_ATTRS256
1894_mm256_maskz_sllv_epi16(__mmask16 __U__m256i __A__m256i __B)
1895{
1896  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1897                                           (__v16hi)_mm256_sllv_epi16(__A__B),
1898                                           (__v16hi)_mm256_setzero_si256());
1899}
1900
1901static __inline__ __m128i __DEFAULT_FN_ATTRS128
1902_mm_sllv_epi16(__m128i __A__m128i __B)
1903{
1904  return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B);
1905}
1906
1907static __inline__ __m128i __DEFAULT_FN_ATTRS128
1908_mm_mask_sllv_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B)
1909{
1910  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1911                                             (__v8hi)_mm_sllv_epi16(__A__B),
1912                                             (__v8hi)__W);
1913}
1914
1915static __inline__ __m128i __DEFAULT_FN_ATTRS128
1916_mm_maskz_sllv_epi16(__mmask8 __U__m128i __A__m128i __B)
1917{
1918  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1919                                             (__v8hi)_mm_sllv_epi16(__A__B),
1920                                             (__v8hi)_mm_setzero_si128());
1921}
1922
1923static __inline__ __m128i __DEFAULT_FN_ATTRS128
1924_mm_mask_sll_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B)
1925{
1926  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1927                                             (__v8hi)_mm_sll_epi16(__A__B),
1928                                             (__v8hi)__W);
1929}
1930
1931static __inline__ __m128i __DEFAULT_FN_ATTRS128
1932_mm_maskz_sll_epi16 (__mmask8 __U__m128i __A__m128i __B)
1933{
1934  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1935                                             (__v8hi)_mm_sll_epi16(__A__B),
1936                                             (__v8hi)_mm_setzero_si128());
1937}
1938
1939static __inline__ __m256i __DEFAULT_FN_ATTRS256
1940_mm256_mask_sll_epi16(__m256i __W__mmask16 __U__m256i __A__m128i __B)
1941{
1942  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1943                                          (__v16hi)_mm256_sll_epi16(__A__B),
1944                                          (__v16hi)__W);
1945}
1946
1947static __inline__ __m256i __DEFAULT_FN_ATTRS256
1948_mm256_maskz_sll_epi16(__mmask16 __U__m256i __A__m128i __B)
1949{
1950  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1951                                          (__v16hi)_mm256_sll_epi16(__A__B),
1952                                          (__v16hi)_mm256_setzero_si256());
1953}
1954
1955static __inline__ __m128i __DEFAULT_FN_ATTRS128
1956_mm_mask_slli_epi16(__m128i __W__mmask8 __U__m128i __Aint __B)
1957{
1958  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1959                                             (__v8hi)_mm_slli_epi16(__A__B),
1960                                             (__v8hi)__W);
1961}
1962
1963static __inline__ __m128i __DEFAULT_FN_ATTRS128
1964_mm_maskz_slli_epi16 (__mmask8 __U__m128i __Aint __B)
1965{
1966  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1967                                             (__v8hi)_mm_slli_epi16(__A__B),
1968                                             (__v8hi)_mm_setzero_si128());
1969}
1970
1971static __inline__ __m256i __DEFAULT_FN_ATTRS256
1972_mm256_mask_slli_epi16(__m256i __W__mmask16 __U__m256i __Aint __B)
1973{
1974  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1975                                         (__v16hi)_mm256_slli_epi16(__A__B),
1976                                         (__v16hi)__W);
1977}
1978
1979static __inline__ __m256i __DEFAULT_FN_ATTRS256
1980_mm256_maskz_slli_epi16(__mmask16 __U__m256i __Aint __B)
1981{
1982  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1983                                         (__v16hi)_mm256_slli_epi16(__A__B),
1984                                         (__v16hi)_mm256_setzero_si256());
1985}
1986
1987static __inline__ __m256i __DEFAULT_FN_ATTRS256
1988_mm256_srlv_epi16(__m256i __A__m256i __B)
1989{
1990  return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B);
1991}
1992
1993static __inline__ __m256i __DEFAULT_FN_ATTRS256
1994_mm256_mask_srlv_epi16(__m256i __W__mmask16 __U__m256i __A__m256i __B)
1995{
1996  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1997                                           (__v16hi)_mm256_srlv_epi16(__A__B),
1998                                           (__v16hi)__W);
1999}
2000
2001static __inline__ __m256i __DEFAULT_FN_ATTRS256
2002_mm256_maskz_srlv_epi16(__mmask16 __U__m256i __A__m256i __B)
2003{
2004  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2005                                           (__v16hi)_mm256_srlv_epi16(__A__B),
2006                                           (__v16hi)_mm256_setzero_si256());
2007}
2008
2009static __inline__ __m128i __DEFAULT_FN_ATTRS128
2010_mm_srlv_epi16(__m128i __A__m128i __B)
2011{
2012  return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B);
2013}
2014
2015static __inline__ __m128i __DEFAULT_FN_ATTRS128
2016_mm_mask_srlv_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B)
2017{
2018  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2019                                             (__v8hi)_mm_srlv_epi16(__A__B),
2020                                             (__v8hi)__W);
2021}
2022
2023static __inline__ __m128i __DEFAULT_FN_ATTRS128
2024_mm_maskz_srlv_epi16(__mmask8 __U__m128i __A__m128i __B)
2025{
2026  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2027                                             (__v8hi)_mm_srlv_epi16(__A__B),
2028                                             (__v8hi)_mm_setzero_si128());
2029}
2030
2031static __inline__ __m256i __DEFAULT_FN_ATTRS256
2032_mm256_srav_epi16(__m256i __A__m256i __B)
2033{
2034  return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B);
2035}
2036
2037static __inline__ __m256i __DEFAULT_FN_ATTRS256
2038_mm256_mask_srav_epi16(__m256i __W__mmask16 __U__m256i __A__m256i __B)
2039{
2040  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2041                                           (__v16hi)_mm256_srav_epi16(__A__B),
2042                                           (__v16hi)__W);
2043}
2044
2045static __inline__ __m256i __DEFAULT_FN_ATTRS256
2046_mm256_maskz_srav_epi16(__mmask16 __U__m256i __A__m256i __B)
2047{
2048  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2049                                           (__v16hi)_mm256_srav_epi16(__A__B),
2050                                           (__v16hi)_mm256_setzero_si256());
2051}
2052
2053static __inline__ __m128i __DEFAULT_FN_ATTRS128
2054_mm_srav_epi16(__m128i __A__m128i __B)
2055{
2056  return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B);
2057}
2058
2059static __inline__ __m128i __DEFAULT_FN_ATTRS128
2060_mm_mask_srav_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B)
2061{
2062  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2063                                             (__v8hi)_mm_srav_epi16(__A__B),
2064                                             (__v8hi)__W);
2065}
2066
2067static __inline__ __m128i __DEFAULT_FN_ATTRS128
2068_mm_maskz_srav_epi16(__mmask8 __U__m128i __A__m128i __B)
2069{
2070  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2071                                             (__v8hi)_mm_srav_epi16(__A__B),
2072                                             (__v8hi)_mm_setzero_si128());
2073}
2074
2075static __inline__ __m128i __DEFAULT_FN_ATTRS128
2076_mm_mask_sra_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B)
2077{
2078  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2079                                             (__v8hi)_mm_sra_epi16(__A__B),
2080                                             (__v8hi)__W);
2081}
2082
2083static __inline__ __m128i __DEFAULT_FN_ATTRS128
2084_mm_maskz_sra_epi16(__mmask8 __U__m128i __A__m128i __B)
2085{
2086  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2087                                             (__v8hi)_mm_sra_epi16(__A__B),
2088                                             (__v8hi)_mm_setzero_si128());
2089}
2090
2091static __inline__ __m256i __DEFAULT_FN_ATTRS256
2092_mm256_mask_sra_epi16(__m256i __W__mmask16 __U__m256i __A__m128i __B)
2093{
2094  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2095                                          (__v16hi)_mm256_sra_epi16(__A__B),
2096                                          (__v16hi)__W);
2097}
2098
2099static __inline__ __m256i __DEFAULT_FN_ATTRS256
2100_mm256_maskz_sra_epi16(__mmask16 __U__m256i __A__m128i __B)
2101{
2102  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2103                                          (__v16hi)_mm256_sra_epi16(__A__B),
2104                                          (__v16hi)_mm256_setzero_si256());
2105}
2106
2107static __inline__ __m128i __DEFAULT_FN_ATTRS128
2108_mm_mask_srai_epi16(__m128i __W__mmask8 __U__m128i __Aint __B)
2109{
2110  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2111                                             (__v8hi)_mm_srai_epi16(__A__B),
2112                                             (__v8hi)__W);
2113}
2114
2115static __inline__ __m128i __DEFAULT_FN_ATTRS128
2116_mm_maskz_srai_epi16(__mmask8 __U__m128i __Aint __B)
2117{
2118  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2119                                             (__v8hi)_mm_srai_epi16(__A__B),
2120                                             (__v8hi)_mm_setzero_si128());
2121}
2122
2123static __inline__ __m256i __DEFAULT_FN_ATTRS256
2124_mm256_mask_srai_epi16(__m256i __W__mmask16 __U__m256i __Aint __B)
2125{
2126  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2127                                         (__v16hi)_mm256_srai_epi16(__A__B),
2128                                         (__v16hi)__W);
2129}
2130
2131static __inline__ __m256i __DEFAULT_FN_ATTRS256
2132_mm256_maskz_srai_epi16(__mmask16 __U__m256i __Aint __B)
2133{
2134  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2135                                         (__v16hi)_mm256_srai_epi16(__A__B),
2136                                         (__v16hi)_mm256_setzero_si256());
2137}
2138
2139static __inline__ __m128i __DEFAULT_FN_ATTRS128
2140_mm_mask_srl_epi16(__m128i __W__mmask8 __U__m128i __A__m128i __B)
2141{
2142  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2143                                             (__v8hi)_mm_srl_epi16(__A__B),
2144                                             (__v8hi)__W);
2145}
2146
2147static __inline__ __m128i __DEFAULT_FN_ATTRS128
2148_mm_maskz_srl_epi16 (__mmask8 __U__m128i __A__m128i __B)
2149{
2150  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2151                                             (__v8hi)_mm_srl_epi16(__A__B),
2152                                             (__v8hi)_mm_setzero_si128());
2153}
2154
2155static __inline__ __m256i __DEFAULT_FN_ATTRS256
2156_mm256_mask_srl_epi16(__m256i __W__mmask16 __U__m256i __A__m128i __B)
2157{
2158  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2159                                          (__v16hi)_mm256_srl_epi16(__A__B),
2160                                          (__v16hi)__W);
2161}
2162
2163static __inline__ __m256i __DEFAULT_FN_ATTRS256
2164_mm256_maskz_srl_epi16(__mmask16 __U__m256i __A__m128i __B)
2165{
2166  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2167                                          (__v16hi)_mm256_srl_epi16(__A__B),
2168                                          (__v16hi)_mm256_setzero_si256());
2169}
2170
2171static __inline__ __m128i __DEFAULT_FN_ATTRS128
2172_mm_mask_srli_epi16(__m128i __W__mmask8 __U__m128i __Aint __B)
2173{
2174  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2175                                             (__v8hi)_mm_srli_epi16(__A__B),
2176                                             (__v8hi)__W);
2177}
2178
2179static __inline__ __m128i __DEFAULT_FN_ATTRS128
2180_mm_maskz_srli_epi16 (__mmask8 __U__m128i __Aint __B)
2181{
2182  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2183                                             (__v8hi)_mm_srli_epi16(__A__B),
2184                                             (__v8hi)_mm_setzero_si128());
2185}
2186
2187static __inline__ __m256i __DEFAULT_FN_ATTRS256
2188_mm256_mask_srli_epi16(__m256i __W__mmask16 __U__m256i __Aint __B)
2189{
2190  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2191                                         (__v16hi)_mm256_srli_epi16(__A__B),
2192                                         (__v16hi)__W);
2193}
2194
2195static __inline__ __m256i __DEFAULT_FN_ATTRS256
2196_mm256_maskz_srli_epi16(__mmask16 __U__m256i __Aint __B)
2197{
2198  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2199                                         (__v16hi)_mm256_srli_epi16(__A__B),
2200                                         (__v16hi)_mm256_setzero_si256());
2201}
2202
2203static __inline__ __m128i __DEFAULT_FN_ATTRS128
2204_mm_mask_mov_epi16 (__m128i __W__mmask8 __U__m128i __A)
2205{
2206  return (__m128i__builtin_ia32_selectw_128 ((__mmask8__U,
2207                (__v8hi__A,
2208                (__v8hi__W);
2209}
2210
2211static __inline__ __m128i __DEFAULT_FN_ATTRS128
2212_mm_maskz_mov_epi16 (__mmask8 __U__m128i __A)
2213{
2214  return (__m128i__builtin_ia32_selectw_128 ((__mmask8__U,
2215                (__v8hi__A,
2216                (__v8hi_mm_setzero_si128 ());
2217}
2218
2219static __inline__ __m256i __DEFAULT_FN_ATTRS256
2220_mm256_mask_mov_epi16 (__m256i __W__mmask16 __U__m256i __A)
2221{
2222  return (__m256i__builtin_ia32_selectw_256 ((__mmask16__U,
2223                (__v16hi__A,
2224                (__v16hi__W);
2225}
2226
2227static __inline__ __m256i __DEFAULT_FN_ATTRS256
2228_mm256_maskz_mov_epi16 (__mmask16 __U__m256i __A)
2229{
2230  return (__m256i__builtin_ia32_selectw_256 ((__mmask16__U,
2231                (__v16hi__A,
2232                (__v16hi_mm256_setzero_si256 ());
2233}
2234
2235static __inline__ __m128i __DEFAULT_FN_ATTRS128
2236_mm_mask_mov_epi8 (__m128i __W__mmask16 __U__m128i __A)
2237{
2238  return (__m128i__builtin_ia32_selectb_128 ((__mmask16__U,
2239                (__v16qi__A,
2240                (__v16qi__W);
2241}
2242
2243static __inline__ __m128i __DEFAULT_FN_ATTRS128
2244_mm_maskz_mov_epi8 (__mmask16 __U__m128i __A)
2245{
2246  return (__m128i__builtin_ia32_selectb_128 ((__mmask16__U,
2247                (__v16qi__A,
2248                (__v16qi_mm_setzero_si128 ());
2249}
2250
2251static __inline__ __m256i __DEFAULT_FN_ATTRS256
2252_mm256_mask_mov_epi8 (__m256i __W__mmask32 __U__m256i __A)
2253{
2254  return (__m256i__builtin_ia32_selectb_256 ((__mmask32__U,
2255                (__v32qi__A,
2256                (__v32qi__W);
2257}
2258
2259static __inline__ __m256i __DEFAULT_FN_ATTRS256
2260_mm256_maskz_mov_epi8 (__mmask32 __U__m256i __A)
2261{
2262  return (__m256i__builtin_ia32_selectb_256 ((__mmask32__U,
2263                (__v32qi__A,
2264                (__v32qi_mm256_setzero_si256 ());
2265}
2266
2267
2268static __inline__ __m128i __DEFAULT_FN_ATTRS128
2269_mm_mask_set1_epi8 (__m128i __O__mmask16 __Mchar __A)
2270{
2271  return (__m128i__builtin_ia32_selectb_128(__M,
2272                                              (__v16qi_mm_set1_epi8(__A),
2273                                              (__v16qi__O);
2274}
2275
2276static __inline__ __m128i __DEFAULT_FN_ATTRS128
2277_mm_maskz_set1_epi8 (__mmask16 __Mchar __A)
2278{
2279 return (__m128i__builtin_ia32_selectb_128(__M,
2280                                             (__v16qi_mm_set1_epi8(__A),
2281                                             (__v16qi_mm_setzero_si128());
2282}
2283
2284static __inline__ __m256i __DEFAULT_FN_ATTRS256
2285_mm256_mask_set1_epi8 (__m256i __O__mmask32 __Mchar __A)
2286{
2287  return (__m256i__builtin_ia32_selectb_256(__M,
2288                                              (__v32qi_mm256_set1_epi8(__A),
2289                                              (__v32qi__O);
2290}
2291
2292static __inline__ __m256i __DEFAULT_FN_ATTRS256
2293_mm256_maskz_set1_epi8 (__mmask32 __Mchar __A)
2294{
2295  return (__m256i__builtin_ia32_selectb_256(__M,
2296                                              (__v32qi_mm256_set1_epi8(__A),
2297                                              (__v32qi_mm256_setzero_si256());
2298}
2299
2300static __inline __m128i __DEFAULT_FN_ATTRS128
2301_mm_loadu_epi16 (void const *__P)
2302{
2303  struct __loadu_epi16 {
2304    __m128i_u __v;
2305  } __attribute__((__packed__, __may_alias__));
2306  return ((struct __loadu_epi16*)__P)->__v;
2307}
2308
2309static __inline__ __m128i __DEFAULT_FN_ATTRS128
2310_mm_mask_loadu_epi16 (__m128i __W__mmask8 __Uvoid const *__P)
2311{
2312  return (__m128i__builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
2313                 (__v8hi__W,
2314                 (__mmask8__U);
2315}
2316
2317static __inline__ __m128i __DEFAULT_FN_ATTRS128
2318_mm_maskz_loadu_epi16 (__mmask8 __Uvoid const *__P)
2319{
2320  return (__m128i__builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
2321                 (__v8hi)
2322                 _mm_setzero_si128 (),
2323                 (__mmask8__U);
2324}
2325
2326static __inline __m256i __DEFAULT_FN_ATTRS256
2327_mm256_loadu_epi16 (void const *__P)
2328{
2329  struct __loadu_epi16 {
2330    __m256i_u __v;
2331  } __attribute__((__packed__, __may_alias__));
2332  return ((struct __loadu_epi16*)__P)->__v;
2333}
2334
2335static __inline__ __m256i __DEFAULT_FN_ATTRS256
2336_mm256_mask_loadu_epi16 (__m256i __W__mmask16 __Uvoid const *__P)
2337{
2338  return (__m256i__builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
2339                 (__v16hi__W,
2340                 (__mmask16__U);
2341}
2342
2343static __inline__ __m256i __DEFAULT_FN_ATTRS256
2344_mm256_maskz_loadu_epi16 (__mmask16 __Uvoid const *__P)
2345{
2346  return (__m256i__builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
2347                 (__v16hi)
2348                 _mm256_setzero_si256 (),
2349                 (__mmask16__U);
2350}
2351
2352static __inline __m128i __DEFAULT_FN_ATTRS128
2353_mm_loadu_epi8 (void const *__P)
2354{
2355  struct __loadu_epi8 {
2356    __m128i_u __v;
2357  } __attribute__((__packed__, __may_alias__));
2358  return ((struct __loadu_epi8*)__P)->__v;
2359}
2360
2361static __inline__ __m128i __DEFAULT_FN_ATTRS128
2362_mm_mask_loadu_epi8 (__m128i __W__mmask16 __Uvoid const *__P)
2363{
2364  return (__m128i__builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
2365                 (__v16qi__W,
2366                 (__mmask16__U);
2367}
2368
2369static __inline__ __m128i __DEFAULT_FN_ATTRS128
2370_mm_maskz_loadu_epi8 (__mmask16 __Uvoid const *__P)
2371{
2372  return (__m128i__builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
2373                 (__v16qi)
2374                 _mm_setzero_si128 (),
2375                 (__mmask16__U);
2376}
2377
2378static __inline __m256i __DEFAULT_FN_ATTRS256
2379_mm256_loadu_epi8 (void const *__P)
2380{
2381  struct __loadu_epi8 {
2382    __m256i_u __v;
2383  } __attribute__((__packed__, __may_alias__));
2384  return ((struct __loadu_epi8*)__P)->__v;
2385}
2386
2387static __inline__ __m256i __DEFAULT_FN_ATTRS256
2388_mm256_mask_loadu_epi8 (__m256i __W__mmask32 __Uvoid const *__P)
2389{
2390  return (__m256i__builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
2391                 (__v32qi__W,
2392                 (__mmask32__U);
2393}
2394
2395static __inline__ __m256i __DEFAULT_FN_ATTRS256
2396_mm256_maskz_loadu_epi8 (__mmask32 __Uvoid const *__P)
2397{
2398  return (__m256i__builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
2399                 (__v32qi)
2400                 _mm256_setzero_si256 (),
2401                 (__mmask32__U);
2402}
2403
2404static __inline void __DEFAULT_FN_ATTRS128
2405_mm_storeu_epi16 (void *__P__m128i __A)
2406{
2407  struct __storeu_epi16 {
2408    __m128i_u __v;
2409  } __attribute__((__packed__, __may_alias__));
2410  ((struct __storeu_epi16*)__P)->__v = __A;
2411}
2412
2413static __inline__ void __DEFAULT_FN_ATTRS128
2414_mm_mask_storeu_epi16 (void *__P__mmask8 __U__m128i __A)
2415{
2416  __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
2417             (__v8hi__A,
2418             (__mmask8__U);
2419}
2420
2421static __inline void __DEFAULT_FN_ATTRS256
2422_mm256_storeu_epi16 (void *__P__m256i __A)
2423{
2424  struct __storeu_epi16 {
2425    __m256i_u __v;
2426  } __attribute__((__packed__, __may_alias__));
2427  ((struct __storeu_epi16*)__P)->__v = __A;
2428}
2429
2430static __inline__ void __DEFAULT_FN_ATTRS256
2431_mm256_mask_storeu_epi16 (void *__P__mmask16 __U__m256i __A)
2432{
2433  __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
2434             (__v16hi__A,
2435             (__mmask16__U);
2436}
2437
2438static __inline void __DEFAULT_FN_ATTRS128
2439_mm_storeu_epi8 (void *__P__m128i __A)
2440{
2441  struct __storeu_epi8 {
2442    __m128i_u __v;
2443  } __attribute__((__packed__, __may_alias__));
2444  ((struct __storeu_epi8*)__P)->__v = __A;
2445}
2446
2447static __inline__ void __DEFAULT_FN_ATTRS128
2448_mm_mask_storeu_epi8 (void *__P__mmask16 __U__m128i __A)
2449{
2450  __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
2451             (__v16qi__A,
2452             (__mmask16__U);
2453}
2454
2455static __inline void __DEFAULT_FN_ATTRS256
2456_mm256_storeu_epi8 (void *__P__m256i __A)
2457{
2458  struct __storeu_epi8 {
2459    __m256i_u __v;
2460  } __attribute__((__packed__, __may_alias__));
2461  ((struct __storeu_epi8*)__P)->__v = __A;
2462}
2463
2464static __inline__ void __DEFAULT_FN_ATTRS256
2465_mm256_mask_storeu_epi8 (void *__P__mmask32 __U__m256i __A)
2466{
2467  __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
2468             (__v32qi__A,
2469             (__mmask32__U);
2470}
2471
2472static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2473_mm_test_epi8_mask (__m128i __A__m128i __B)
2474{
2475  return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128());
2476}
2477
2478static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2479_mm_mask_test_epi8_mask (__mmask16 __U__m128i __A__m128i __B)
2480{
2481  return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
2482                                    _mm_setzero_si128());
2483}
2484
2485static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2486_mm256_test_epi8_mask (__m256i __A__m256i __B)
2487{
2488  return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
2489                                  _mm256_setzero_si256());
2490}
2491
2492static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2493_mm256_mask_test_epi8_mask (__mmask32 __U__m256i __A__m256i __B)
2494{
2495  return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B),
2496                                       _mm256_setzero_si256());
2497}
2498
2499static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2500_mm_test_epi16_mask (__m128i __A__m128i __B)
2501{
2502  return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
2503}
2504
2505static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2506_mm_mask_test_epi16_mask (__mmask8 __U__m128i __A__m128i __B)
2507{
2508  return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B),
2509                                     _mm_setzero_si128());
2510}
2511
2512static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2513_mm256_test_epi16_mask (__m256i __A__m256i __B)
2514{
2515  return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B),
2516                                   _mm256_setzero_si256 ());
2517}
2518
2519static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2520_mm256_mask_test_epi16_mask (__mmask16 __U__m256i __A__m256i __B)
2521{
2522  return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B),
2523                                        _mm256_setzero_si256());
2524}
2525
2526static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2527_mm_testn_epi8_mask (__m128i __A__m128i __B)
2528{
2529  return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
2530}
2531
2532static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2533_mm_mask_testn_epi8_mask (__mmask16 __U__m128i __A__m128i __B)
2534{
2535  return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B),
2536                                  _mm_setzero_si128());
2537}
2538
2539static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2540_mm256_testn_epi8_mask (__m256i __A__m256i __B)
2541{
2542  return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B),
2543                                 _mm256_setzero_si256());
2544}
2545
2546static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2547_mm256_mask_testn_epi8_mask (__mmask32 __U__m256i __A__m256i __B)
2548{
2549  return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B),
2550                                      _mm256_setzero_si256());
2551}
2552
2553static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2554_mm_testn_epi16_mask (__m128i __A__m128i __B)
2555{
2556  return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
2557}
2558
2559static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2560_mm_mask_testn_epi16_mask (__mmask8 __U__m128i __A__m128i __B)
2561{
2562  return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_si128());
2563}
2564
2565static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2566_mm256_testn_epi16_mask (__m256i __A__m256i __B)
2567{
2568  return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B),
2569                                  _mm256_setzero_si256());
2570}
2571
2572static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2573_mm256_mask_testn_epi16_mask (__mmask16 __U__m256i __A__m256i __B)
2574{
2575  return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B),
2576                                       _mm256_setzero_si256());
2577}
2578
2579static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2580_mm_movepi8_mask (__m128i __A)
2581{
2582  return (__mmask16__builtin_ia32_cvtb2mask128 ((__v16qi__A);
2583}
2584
2585static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2586_mm256_movepi8_mask (__m256i __A)
2587{
2588  return (__mmask32__builtin_ia32_cvtb2mask256 ((__v32qi__A);
2589}
2590
2591static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2592_mm_movepi16_mask (__m128i __A)
2593{
2594  return (__mmask8__builtin_ia32_cvtw2mask128 ((__v8hi__A);
2595}
2596
2597static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2598_mm256_movepi16_mask (__m256i __A)
2599{
2600  return (__mmask16__builtin_ia32_cvtw2mask256 ((__v16hi__A);
2601}
2602
2603static __inline__ __m128i __DEFAULT_FN_ATTRS128
2604_mm_movm_epi8 (__mmask16 __A)
2605{
2606  return (__m128i__builtin_ia32_cvtmask2b128 (__A);
2607}
2608
2609static __inline__ __m256i __DEFAULT_FN_ATTRS256
2610_mm256_movm_epi8 (__mmask32 __A)
2611{
2612  return (__m256i__builtin_ia32_cvtmask2b256 (__A);
2613}
2614
2615static __inline__ __m128i __DEFAULT_FN_ATTRS128
2616_mm_movm_epi16 (__mmask8 __A)
2617{
2618  return (__m128i__builtin_ia32_cvtmask2w128 (__A);
2619}
2620
2621static __inline__ __m256i __DEFAULT_FN_ATTRS256
2622_mm256_movm_epi16 (__mmask16 __A)
2623{
2624  return (__m256i__builtin_ia32_cvtmask2w256 (__A);
2625}
2626
2627static __inline__ __m128i __DEFAULT_FN_ATTRS128
2628_mm_mask_broadcastb_epi8 (__m128i __O__mmask16 __M__m128i __A)
2629{
2630  return (__m128i)__builtin_ia32_selectb_128(__M,
2631                                             (__v16qi_mm_broadcastb_epi8(__A),
2632                                             (__v16qi__O);
2633}
2634
2635static __inline__ __m128i __DEFAULT_FN_ATTRS128
2636_mm_maskz_broadcastb_epi8 (__mmask16 __M__m128i __A)
2637{
2638  return (__m128i)__builtin_ia32_selectb_128(__M,
2639                                             (__v16qi_mm_broadcastb_epi8(__A),
2640                                             (__v16qi_mm_setzero_si128());
2641}
2642
2643static __inline__ __m256i __DEFAULT_FN_ATTRS256
2644_mm256_mask_broadcastb_epi8 (__m256i __O__mmask32 __M__m128i __A)
2645{
2646  return (__m256i)__builtin_ia32_selectb_256(__M,
2647                                             (__v32qi_mm256_broadcastb_epi8(__A),
2648                                             (__v32qi__O);
2649}
2650
2651static __inline__ __m256i __DEFAULT_FN_ATTRS256
2652_mm256_maskz_broadcastb_epi8 (__mmask32 __M__m128i __A)
2653{
2654  return (__m256i)__builtin_ia32_selectb_256(__M,
2655                                             (__v32qi_mm256_broadcastb_epi8(__A),
2656                                             (__v32qi_mm256_setzero_si256());
2657}
2658
2659static __inline__ __m128i __DEFAULT_FN_ATTRS128
2660_mm_mask_broadcastw_epi16 (__m128i __O__mmask8 __M__m128i __A)
2661{
2662  return (__m128i)__builtin_ia32_selectw_128(__M,
2663                                             (__v8hi_mm_broadcastw_epi16(__A),
2664                                             (__v8hi__O);
2665}
2666
2667static __inline__ __m128i __DEFAULT_FN_ATTRS128
2668_mm_maskz_broadcastw_epi16 (__mmask8 __M__m128i __A)
2669{
2670  return (__m128i)__builtin_ia32_selectw_128(__M,
2671                                             (__v8hi_mm_broadcastw_epi16(__A),
2672                                             (__v8hi_mm_setzero_si128());
2673}
2674
2675static __inline__ __m256i __DEFAULT_FN_ATTRS256
2676_mm256_mask_broadcastw_epi16 (__m256i __O__mmask16 __M__m128i __A)
2677{
2678  return (__m256i)__builtin_ia32_selectw_256(__M,
2679                                             (__v16hi_mm256_broadcastw_epi16(__A),
2680                                             (__v16hi__O);
2681}
2682
2683static __inline__ __m256i __DEFAULT_FN_ATTRS256
2684_mm256_maskz_broadcastw_epi16 (__mmask16 __M__m128i __A)
2685{
2686  return (__m256i)__builtin_ia32_selectw_256(__M,
2687                                             (__v16hi_mm256_broadcastw_epi16(__A),
2688                                             (__v16hi_mm256_setzero_si256());
2689}
2690
2691static __inline__ __m256i __DEFAULT_FN_ATTRS256
2692_mm256_mask_set1_epi16 (__m256i __O__mmask16 __Mshort __A)
2693{
2694  return (__m256i__builtin_ia32_selectw_256 (__M,
2695                                               (__v16hi_mm256_set1_epi16(__A),
2696                                               (__v16hi__O);
2697}
2698
2699static __inline__ __m256i __DEFAULT_FN_ATTRS256
2700_mm256_maskz_set1_epi16 (__mmask16 __Mshort __A)
2701{
2702  return (__m256i__builtin_ia32_selectw_256(__M,
2703                                              (__v16hi)_mm256_set1_epi16(__A),
2704                                              (__v16hi_mm256_setzero_si256());
2705}
2706
2707static __inline__ __m128i __DEFAULT_FN_ATTRS128
2708_mm_mask_set1_epi16 (__m128i __O__mmask8 __Mshort __A)
2709{
2710  return (__m128i__builtin_ia32_selectw_128(__M,
2711                                              (__v8hi_mm_set1_epi16(__A),
2712                                              (__v8hi__O);
2713}
2714
2715static __inline__ __m128i __DEFAULT_FN_ATTRS128
2716_mm_maskz_set1_epi16 (__mmask8 __Mshort __A)
2717{
2718  return (__m128i__builtin_ia32_selectw_128(__M,
2719                                              (__v8hi_mm_set1_epi16(__A),
2720                                              (__v8hi_mm_setzero_si128());
2721}
2722
2723static __inline__ __m128i __DEFAULT_FN_ATTRS128
2724_mm_permutexvar_epi16 (__m128i __A__m128i __B)
2725{
2726  return (__m128i)__builtin_ia32_permvarhi128((__v8hi__B, (__v8hi__A);
2727}
2728
2729static __inline__ __m128i __DEFAULT_FN_ATTRS128
2730_mm_maskz_permutexvar_epi16 (__mmask8 __M__m128i __A__m128i __B)
2731{
2732  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
2733                                        (__v8hi)_mm_permutexvar_epi16(__A__B),
2734                                        (__v8hi_mm_setzero_si128());
2735}
2736
2737static __inline__ __m128i __DEFAULT_FN_ATTRS128
2738_mm_mask_permutexvar_epi16 (__m128i __W__mmask8 __M__m128i __A,
2739          __m128i __B)
2740{
2741  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
2742                                        (__v8hi)_mm_permutexvar_epi16(__A__B),
2743                                        (__v8hi)__W);
2744}
2745
2746static __inline__ __m256i __DEFAULT_FN_ATTRS256
2747_mm256_permutexvar_epi16 (__m256i __A__m256i __B)
2748{
2749  return (__m256i)__builtin_ia32_permvarhi256((__v16hi__B, (__v16hi__A);
2750}
2751
2752static __inline__ __m256i __DEFAULT_FN_ATTRS256
2753_mm256_maskz_permutexvar_epi16 (__mmask16 __M__m256i __A,
2754        __m256i __B)
2755{
2756  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
2757                                    (__v16hi)_mm256_permutexvar_epi16(__A__B),
2758                                    (__v16hi)_mm256_setzero_si256());
2759}
2760
2761static __inline__ __m256i __DEFAULT_FN_ATTRS256
2762_mm256_mask_permutexvar_epi16 (__m256i __W__mmask16 __M__m256i __A,
2763             __m256i __B)
2764{
2765  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
2766                                    (__v16hi)_mm256_permutexvar_epi16(__A__B),
2767                                    (__v16hi)__W);
2768}
2769
2770#define _mm_mask_alignr_epi8(W, U, A, B, N) \
2771  (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2772                                 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2773                                 (__v16qi)(__m128i)(W))
2774
2775#define _mm_maskz_alignr_epi8(U, A, B, N) \
2776  (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2777                                 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2778                                 (__v16qi)_mm_setzero_si128())
2779
2780#define _mm256_mask_alignr_epi8(W, U, A, B, N) \
2781  (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2782                              (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2783                              (__v32qi)(__m256i)(W))
2784
2785#define _mm256_maskz_alignr_epi8(U, A, B, N) \
2786  (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2787                              (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2788                              (__v32qi)_mm256_setzero_si256())
2789
2790#define _mm_dbsad_epu8(A, B, imm) \
2791  (__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \
2792                                      (__v16qi)(__m128i)(B), (int)(imm))
2793
2794#define _mm_mask_dbsad_epu8(W, U, A, B, imm) \
2795  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2796                                      (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2797                                      (__v8hi)(__m128i)(W))
2798
2799#define _mm_maskz_dbsad_epu8(U, A, B, imm) \
2800  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2801                                      (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2802                                      (__v8hi)_mm_setzero_si128())
2803
2804#define _mm256_dbsad_epu8(A, B, imm) \
2805  (__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \
2806                                      (__v32qi)(__m256i)(B), (int)(imm))
2807
2808#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \
2809  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2810                                  (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2811                                  (__v16hi)(__m256i)(W))
2812
2813#define _mm256_maskz_dbsad_epu8(U, A, B, imm) \
2814  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2815                                  (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2816                                  (__v16hi)_mm256_setzero_si256())
2817
2818#undef __DEFAULT_FN_ATTRS128
2819#undef __DEFAULT_FN_ATTRS256
2820
2821#endif /* __AVX512VLBWINTRIN_H */
2822