1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | #ifndef __IMMINTRIN_H |
25 | #error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead." |
26 | #endif |
27 | |
28 | #ifndef __AVX512BWINTRIN_H |
29 | #define __AVX512BWINTRIN_H |
30 | |
31 | typedef unsigned int __mmask32; |
32 | typedef unsigned long long __mmask64; |
33 | |
34 | |
35 | #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) |
36 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) |
37 | |
38 | static __inline __mmask32 __DEFAULT_FN_ATTRS |
39 | _knot_mask32(__mmask32 __M) |
40 | { |
41 | return __builtin_ia32_knotsi(__M); |
42 | } |
43 | |
44 | static __inline __mmask64 __DEFAULT_FN_ATTRS |
45 | _knot_mask64(__mmask64 __M) |
46 | { |
47 | return __builtin_ia32_knotdi(__M); |
48 | } |
49 | |
50 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS |
51 | _kand_mask32(__mmask32 __A, __mmask32 __B) |
52 | { |
53 | return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); |
54 | } |
55 | |
56 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS |
57 | _kand_mask64(__mmask64 __A, __mmask64 __B) |
58 | { |
59 | return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); |
60 | } |
61 | |
62 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS |
63 | _kandn_mask32(__mmask32 __A, __mmask32 __B) |
64 | { |
65 | return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); |
66 | } |
67 | |
68 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS |
69 | _kandn_mask64(__mmask64 __A, __mmask64 __B) |
70 | { |
71 | return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); |
72 | } |
73 | |
74 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS |
75 | _kor_mask32(__mmask32 __A, __mmask32 __B) |
76 | { |
77 | return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); |
78 | } |
79 | |
80 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS |
81 | _kor_mask64(__mmask64 __A, __mmask64 __B) |
82 | { |
83 | return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); |
84 | } |
85 | |
86 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS |
87 | _kxnor_mask32(__mmask32 __A, __mmask32 __B) |
88 | { |
89 | return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); |
90 | } |
91 | |
92 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS |
93 | _kxnor_mask64(__mmask64 __A, __mmask64 __B) |
94 | { |
95 | return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); |
96 | } |
97 | |
98 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS |
99 | _kxor_mask32(__mmask32 __A, __mmask32 __B) |
100 | { |
101 | return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); |
102 | } |
103 | |
104 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS |
105 | _kxor_mask64(__mmask64 __A, __mmask64 __B) |
106 | { |
107 | return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); |
108 | } |
109 | |
110 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
111 | _kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) |
112 | { |
113 | return (unsigned char)__builtin_ia32_kortestcsi(__A, __B); |
114 | } |
115 | |
116 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
117 | _kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) |
118 | { |
119 | return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); |
120 | } |
121 | |
122 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
123 | _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { |
124 | *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B); |
125 | return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); |
126 | } |
127 | |
128 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
129 | _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) |
130 | { |
131 | return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); |
132 | } |
133 | |
134 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
135 | _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) |
136 | { |
137 | return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); |
138 | } |
139 | |
140 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
141 | _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { |
142 | *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B); |
143 | return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); |
144 | } |
145 | |
146 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
147 | _ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) |
148 | { |
149 | return (unsigned char)__builtin_ia32_ktestcsi(__A, __B); |
150 | } |
151 | |
152 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
153 | _ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) |
154 | { |
155 | return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); |
156 | } |
157 | |
158 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
159 | _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { |
160 | *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B); |
161 | return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); |
162 | } |
163 | |
164 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
165 | _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) |
166 | { |
167 | return (unsigned char)__builtin_ia32_ktestcdi(__A, __B); |
168 | } |
169 | |
170 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
171 | _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) |
172 | { |
173 | return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); |
174 | } |
175 | |
176 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
177 | _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { |
178 | *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B); |
179 | return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); |
180 | } |
181 | |
182 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS |
183 | _kadd_mask32(__mmask32 __A, __mmask32 __B) |
184 | { |
185 | return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); |
186 | } |
187 | |
188 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS |
189 | _kadd_mask64(__mmask64 __A, __mmask64 __B) |
190 | { |
191 | return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); |
192 | } |
193 | |
194 | #define _kshiftli_mask32(A, I) \ |
195 | (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)) |
196 | |
197 | #define _kshiftri_mask32(A, I) \ |
198 | (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)) |
199 | |
200 | #define _kshiftli_mask64(A, I) \ |
201 | (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)) |
202 | |
203 | #define _kshiftri_mask64(A, I) \ |
204 | (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)) |
205 | |
206 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
207 | _cvtmask32_u32(__mmask32 __A) { |
208 | return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A); |
209 | } |
210 | |
211 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
212 | _cvtmask64_u64(__mmask64 __A) { |
213 | return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A); |
214 | } |
215 | |
216 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS |
217 | _cvtu32_mask32(unsigned int __A) { |
218 | return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A); |
219 | } |
220 | |
221 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS |
222 | _cvtu64_mask64(unsigned long long __A) { |
223 | return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A); |
224 | } |
225 | |
226 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS |
227 | _load_mask32(__mmask32 *__A) { |
228 | return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A); |
229 | } |
230 | |
231 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS |
232 | _load_mask64(__mmask64 *__A) { |
233 | return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A); |
234 | } |
235 | |
236 | static __inline__ void __DEFAULT_FN_ATTRS |
237 | _store_mask32(__mmask32 *__A, __mmask32 __B) { |
238 | *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B); |
239 | } |
240 | |
241 | static __inline__ void __DEFAULT_FN_ATTRS |
242 | _store_mask64(__mmask64 *__A, __mmask64 __B) { |
243 | *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B); |
244 | } |
245 | |
246 | |
247 | |
248 | #define _mm512_cmp_epi8_mask(a, b, p) \ |
249 | (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ |
250 | (__v64qi)(__m512i)(b), (int)(p), \ |
251 | (__mmask64)-1) |
252 | |
253 | #define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ |
254 | (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ |
255 | (__v64qi)(__m512i)(b), (int)(p), \ |
256 | (__mmask64)(m)) |
257 | |
258 | #define _mm512_cmp_epu8_mask(a, b, p) \ |
259 | (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ |
260 | (__v64qi)(__m512i)(b), (int)(p), \ |
261 | (__mmask64)-1) |
262 | |
263 | #define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ |
264 | (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ |
265 | (__v64qi)(__m512i)(b), (int)(p), \ |
266 | (__mmask64)(m)) |
267 | |
268 | #define _mm512_cmp_epi16_mask(a, b, p) \ |
269 | (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ |
270 | (__v32hi)(__m512i)(b), (int)(p), \ |
271 | (__mmask32)-1) |
272 | |
273 | #define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ |
274 | (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ |
275 | (__v32hi)(__m512i)(b), (int)(p), \ |
276 | (__mmask32)(m)) |
277 | |
278 | #define _mm512_cmp_epu16_mask(a, b, p) \ |
279 | (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ |
280 | (__v32hi)(__m512i)(b), (int)(p), \ |
281 | (__mmask32)-1) |
282 | |
283 | #define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ |
284 | (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ |
285 | (__v32hi)(__m512i)(b), (int)(p), \ |
286 | (__mmask32)(m)) |
287 | |
288 | #define _mm512_cmpeq_epi8_mask(A, B) \ |
289 | _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) |
290 | #define _mm512_mask_cmpeq_epi8_mask(k, A, B) \ |
291 | _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) |
292 | #define _mm512_cmpge_epi8_mask(A, B) \ |
293 | _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) |
294 | #define _mm512_mask_cmpge_epi8_mask(k, A, B) \ |
295 | _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) |
296 | #define _mm512_cmpgt_epi8_mask(A, B) \ |
297 | _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) |
298 | #define _mm512_mask_cmpgt_epi8_mask(k, A, B) \ |
299 | _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) |
300 | #define _mm512_cmple_epi8_mask(A, B) \ |
301 | _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) |
302 | #define _mm512_mask_cmple_epi8_mask(k, A, B) \ |
303 | _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) |
304 | #define _mm512_cmplt_epi8_mask(A, B) \ |
305 | _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) |
306 | #define _mm512_mask_cmplt_epi8_mask(k, A, B) \ |
307 | _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) |
308 | #define _mm512_cmpneq_epi8_mask(A, B) \ |
309 | _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) |
310 | #define _mm512_mask_cmpneq_epi8_mask(k, A, B) \ |
311 | _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) |
312 | |
313 | #define _mm512_cmpeq_epu8_mask(A, B) \ |
314 | _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) |
315 | #define _mm512_mask_cmpeq_epu8_mask(k, A, B) \ |
316 | _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) |
317 | #define _mm512_cmpge_epu8_mask(A, B) \ |
318 | _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) |
319 | #define _mm512_mask_cmpge_epu8_mask(k, A, B) \ |
320 | _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) |
321 | #define _mm512_cmpgt_epu8_mask(A, B) \ |
322 | _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) |
323 | #define _mm512_mask_cmpgt_epu8_mask(k, A, B) \ |
324 | _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) |
325 | #define _mm512_cmple_epu8_mask(A, B) \ |
326 | _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) |
327 | #define _mm512_mask_cmple_epu8_mask(k, A, B) \ |
328 | _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) |
329 | #define _mm512_cmplt_epu8_mask(A, B) \ |
330 | _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) |
331 | #define _mm512_mask_cmplt_epu8_mask(k, A, B) \ |
332 | _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) |
333 | #define _mm512_cmpneq_epu8_mask(A, B) \ |
334 | _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) |
335 | #define _mm512_mask_cmpneq_epu8_mask(k, A, B) \ |
336 | _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) |
337 | |
338 | #define _mm512_cmpeq_epi16_mask(A, B) \ |
339 | _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) |
340 | #define _mm512_mask_cmpeq_epi16_mask(k, A, B) \ |
341 | _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) |
342 | #define _mm512_cmpge_epi16_mask(A, B) \ |
343 | _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) |
344 | #define _mm512_mask_cmpge_epi16_mask(k, A, B) \ |
345 | _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) |
346 | #define _mm512_cmpgt_epi16_mask(A, B) \ |
347 | _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) |
348 | #define _mm512_mask_cmpgt_epi16_mask(k, A, B) \ |
349 | _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) |
350 | #define _mm512_cmple_epi16_mask(A, B) \ |
351 | _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) |
352 | #define _mm512_mask_cmple_epi16_mask(k, A, B) \ |
353 | _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) |
354 | #define _mm512_cmplt_epi16_mask(A, B) \ |
355 | _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) |
356 | #define _mm512_mask_cmplt_epi16_mask(k, A, B) \ |
357 | _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) |
358 | #define _mm512_cmpneq_epi16_mask(A, B) \ |
359 | _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) |
360 | #define _mm512_mask_cmpneq_epi16_mask(k, A, B) \ |
361 | _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) |
362 | |
363 | #define _mm512_cmpeq_epu16_mask(A, B) \ |
364 | _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) |
365 | #define _mm512_mask_cmpeq_epu16_mask(k, A, B) \ |
366 | _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) |
367 | #define _mm512_cmpge_epu16_mask(A, B) \ |
368 | _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) |
369 | #define _mm512_mask_cmpge_epu16_mask(k, A, B) \ |
370 | _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) |
371 | #define _mm512_cmpgt_epu16_mask(A, B) \ |
372 | _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) |
373 | #define _mm512_mask_cmpgt_epu16_mask(k, A, B) \ |
374 | _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) |
375 | #define _mm512_cmple_epu16_mask(A, B) \ |
376 | _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) |
377 | #define _mm512_mask_cmple_epu16_mask(k, A, B) \ |
378 | _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) |
379 | #define _mm512_cmplt_epu16_mask(A, B) \ |
380 | _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) |
381 | #define _mm512_mask_cmplt_epu16_mask(k, A, B) \ |
382 | _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) |
383 | #define _mm512_cmpneq_epu16_mask(A, B) \ |
384 | _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) |
385 | #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ |
386 | _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) |
387 | |
388 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
389 | _mm512_add_epi8 (__m512i __A, __m512i __B) { |
390 | return (__m512i) ((__v64qu) __A + (__v64qu) __B); |
391 | } |
392 | |
393 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
394 | _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { |
395 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
396 | (__v64qi)_mm512_add_epi8(__A, __B), |
397 | (__v64qi)__W); |
398 | } |
399 | |
400 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
401 | _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { |
402 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
403 | (__v64qi)_mm512_add_epi8(__A, __B), |
404 | (__v64qi)_mm512_setzero_si512()); |
405 | } |
406 | |
407 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
408 | _mm512_sub_epi8 (__m512i __A, __m512i __B) { |
409 | return (__m512i) ((__v64qu) __A - (__v64qu) __B); |
410 | } |
411 | |
412 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
413 | _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { |
414 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
415 | (__v64qi)_mm512_sub_epi8(__A, __B), |
416 | (__v64qi)__W); |
417 | } |
418 | |
419 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
420 | _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { |
421 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
422 | (__v64qi)_mm512_sub_epi8(__A, __B), |
423 | (__v64qi)_mm512_setzero_si512()); |
424 | } |
425 | |
426 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
427 | _mm512_add_epi16 (__m512i __A, __m512i __B) { |
428 | return (__m512i) ((__v32hu) __A + (__v32hu) __B); |
429 | } |
430 | |
431 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
432 | _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { |
433 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
434 | (__v32hi)_mm512_add_epi16(__A, __B), |
435 | (__v32hi)__W); |
436 | } |
437 | |
438 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
439 | _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { |
440 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
441 | (__v32hi)_mm512_add_epi16(__A, __B), |
442 | (__v32hi)_mm512_setzero_si512()); |
443 | } |
444 | |
445 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
446 | _mm512_sub_epi16 (__m512i __A, __m512i __B) { |
447 | return (__m512i) ((__v32hu) __A - (__v32hu) __B); |
448 | } |
449 | |
450 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
451 | _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { |
452 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
453 | (__v32hi)_mm512_sub_epi16(__A, __B), |
454 | (__v32hi)__W); |
455 | } |
456 | |
457 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
458 | _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { |
459 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
460 | (__v32hi)_mm512_sub_epi16(__A, __B), |
461 | (__v32hi)_mm512_setzero_si512()); |
462 | } |
463 | |
464 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
465 | _mm512_mullo_epi16 (__m512i __A, __m512i __B) { |
466 | return (__m512i) ((__v32hu) __A * (__v32hu) __B); |
467 | } |
468 | |
469 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
470 | _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { |
471 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
472 | (__v32hi)_mm512_mullo_epi16(__A, __B), |
473 | (__v32hi)__W); |
474 | } |
475 | |
476 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
477 | _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { |
478 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
479 | (__v32hi)_mm512_mullo_epi16(__A, __B), |
480 | (__v32hi)_mm512_setzero_si512()); |
481 | } |
482 | |
483 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
484 | _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) |
485 | { |
486 | return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, |
487 | (__v64qi) __W, |
488 | (__v64qi) __A); |
489 | } |
490 | |
491 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
492 | _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) |
493 | { |
494 | return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, |
495 | (__v32hi) __W, |
496 | (__v32hi) __A); |
497 | } |
498 | |
499 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
500 | _mm512_abs_epi8 (__m512i __A) |
501 | { |
502 | return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A); |
503 | } |
504 | |
505 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
506 | _mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) |
507 | { |
508 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
509 | (__v64qi)_mm512_abs_epi8(__A), |
510 | (__v64qi)__W); |
511 | } |
512 | |
513 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
514 | _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) |
515 | { |
516 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
517 | (__v64qi)_mm512_abs_epi8(__A), |
518 | (__v64qi)_mm512_setzero_si512()); |
519 | } |
520 | |
521 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
522 | _mm512_abs_epi16 (__m512i __A) |
523 | { |
524 | return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A); |
525 | } |
526 | |
527 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
528 | _mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) |
529 | { |
530 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
531 | (__v32hi)_mm512_abs_epi16(__A), |
532 | (__v32hi)__W); |
533 | } |
534 | |
535 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
536 | _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) |
537 | { |
538 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
539 | (__v32hi)_mm512_abs_epi16(__A), |
540 | (__v32hi)_mm512_setzero_si512()); |
541 | } |
542 | |
543 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
544 | _mm512_packs_epi32(__m512i __A, __m512i __B) |
545 | { |
546 | return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B); |
547 | } |
548 | |
549 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
550 | _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) |
551 | { |
552 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
553 | (__v32hi)_mm512_packs_epi32(__A, __B), |
554 | (__v32hi)_mm512_setzero_si512()); |
555 | } |
556 | |
557 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
558 | _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) |
559 | { |
560 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
561 | (__v32hi)_mm512_packs_epi32(__A, __B), |
562 | (__v32hi)__W); |
563 | } |
564 | |
565 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
566 | _mm512_packs_epi16(__m512i __A, __m512i __B) |
567 | { |
568 | return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B); |
569 | } |
570 | |
571 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
572 | _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) |
573 | { |
574 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, |
575 | (__v64qi)_mm512_packs_epi16(__A, __B), |
576 | (__v64qi)__W); |
577 | } |
578 | |
579 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
580 | _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) |
581 | { |
582 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, |
583 | (__v64qi)_mm512_packs_epi16(__A, __B), |
584 | (__v64qi)_mm512_setzero_si512()); |
585 | } |
586 | |
587 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
588 | _mm512_packus_epi32(__m512i __A, __m512i __B) |
589 | { |
590 | return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B); |
591 | } |
592 | |
593 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
594 | _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) |
595 | { |
596 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
597 | (__v32hi)_mm512_packus_epi32(__A, __B), |
598 | (__v32hi)_mm512_setzero_si512()); |
599 | } |
600 | |
601 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
602 | _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) |
603 | { |
604 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
605 | (__v32hi)_mm512_packus_epi32(__A, __B), |
606 | (__v32hi)__W); |
607 | } |
608 | |
609 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
610 | _mm512_packus_epi16(__m512i __A, __m512i __B) |
611 | { |
612 | return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B); |
613 | } |
614 | |
615 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
616 | _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) |
617 | { |
618 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, |
619 | (__v64qi)_mm512_packus_epi16(__A, __B), |
620 | (__v64qi)__W); |
621 | } |
622 | |
623 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
624 | _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) |
625 | { |
626 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, |
627 | (__v64qi)_mm512_packus_epi16(__A, __B), |
628 | (__v64qi)_mm512_setzero_si512()); |
629 | } |
630 | |
631 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
632 | _mm512_adds_epi8 (__m512i __A, __m512i __B) |
633 | { |
634 | return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B); |
635 | } |
636 | |
637 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
638 | _mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) |
639 | { |
640 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
641 | (__v64qi)_mm512_adds_epi8(__A, __B), |
642 | (__v64qi)__W); |
643 | } |
644 | |
645 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
646 | _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) |
647 | { |
648 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
649 | (__v64qi)_mm512_adds_epi8(__A, __B), |
650 | (__v64qi)_mm512_setzero_si512()); |
651 | } |
652 | |
653 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
654 | _mm512_adds_epi16 (__m512i __A, __m512i __B) |
655 | { |
656 | return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B); |
657 | } |
658 | |
659 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
660 | _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) |
661 | { |
662 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
663 | (__v32hi)_mm512_adds_epi16(__A, __B), |
664 | (__v32hi)__W); |
665 | } |
666 | |
667 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
668 | _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) |
669 | { |
670 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
671 | (__v32hi)_mm512_adds_epi16(__A, __B), |
672 | (__v32hi)_mm512_setzero_si512()); |
673 | } |
674 | |
675 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
676 | _mm512_adds_epu8 (__m512i __A, __m512i __B) |
677 | { |
678 | return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B); |
679 | } |
680 | |
681 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
682 | _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) |
683 | { |
684 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
685 | (__v64qi)_mm512_adds_epu8(__A, __B), |
686 | (__v64qi)__W); |
687 | } |
688 | |
689 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
690 | _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) |
691 | { |
692 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
693 | (__v64qi)_mm512_adds_epu8(__A, __B), |
694 | (__v64qi)_mm512_setzero_si512()); |
695 | } |
696 | |
697 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
698 | _mm512_adds_epu16 (__m512i __A, __m512i __B) |
699 | { |
700 | return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B); |
701 | } |
702 | |
703 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
704 | _mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) |
705 | { |
706 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
707 | (__v32hi)_mm512_adds_epu16(__A, __B), |
708 | (__v32hi)__W); |
709 | } |
710 | |
711 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
712 | _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) |
713 | { |
714 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
715 | (__v32hi)_mm512_adds_epu16(__A, __B), |
716 | (__v32hi)_mm512_setzero_si512()); |
717 | } |
718 | |
719 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
720 | _mm512_avg_epu8 (__m512i __A, __m512i __B) |
721 | { |
722 | typedef unsigned short __v64hu __attribute__((__vector_size__(128))); |
723 | return (__m512i)__builtin_convertvector( |
724 | ((__builtin_convertvector((__v64qu) __A, __v64hu) + |
725 | __builtin_convertvector((__v64qu) __B, __v64hu)) + 1) |
726 | >> 1, __v64qu); |
727 | } |
728 | |
729 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
730 | _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, |
731 | __m512i __B) |
732 | { |
733 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
734 | (__v64qi)_mm512_avg_epu8(__A, __B), |
735 | (__v64qi)__W); |
736 | } |
737 | |
738 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
739 | _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) |
740 | { |
741 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
742 | (__v64qi)_mm512_avg_epu8(__A, __B), |
743 | (__v64qi)_mm512_setzero_si512()); |
744 | } |
745 | |
746 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
747 | _mm512_avg_epu16 (__m512i __A, __m512i __B) |
748 | { |
749 | typedef unsigned int __v32su __attribute__((__vector_size__(128))); |
750 | return (__m512i)__builtin_convertvector( |
751 | ((__builtin_convertvector((__v32hu) __A, __v32su) + |
752 | __builtin_convertvector((__v32hu) __B, __v32su)) + 1) |
753 | >> 1, __v32hu); |
754 | } |
755 | |
756 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
757 | _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, |
758 | __m512i __B) |
759 | { |
760 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
761 | (__v32hi)_mm512_avg_epu16(__A, __B), |
762 | (__v32hi)__W); |
763 | } |
764 | |
765 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
766 | _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) |
767 | { |
768 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
769 | (__v32hi)_mm512_avg_epu16(__A, __B), |
770 | (__v32hi) _mm512_setzero_si512()); |
771 | } |
772 | |
773 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
774 | _mm512_max_epi8 (__m512i __A, __m512i __B) |
775 | { |
776 | return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B); |
777 | } |
778 | |
779 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
780 | _mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) |
781 | { |
782 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, |
783 | (__v64qi)_mm512_max_epi8(__A, __B), |
784 | (__v64qi)_mm512_setzero_si512()); |
785 | } |
786 | |
787 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
788 | _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) |
789 | { |
790 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, |
791 | (__v64qi)_mm512_max_epi8(__A, __B), |
792 | (__v64qi)__W); |
793 | } |
794 | |
795 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
796 | _mm512_max_epi16 (__m512i __A, __m512i __B) |
797 | { |
798 | return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B); |
799 | } |
800 | |
801 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
802 | _mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) |
803 | { |
804 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
805 | (__v32hi)_mm512_max_epi16(__A, __B), |
806 | (__v32hi)_mm512_setzero_si512()); |
807 | } |
808 | |
809 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
810 | _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, |
811 | __m512i __B) |
812 | { |
813 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
814 | (__v32hi)_mm512_max_epi16(__A, __B), |
815 | (__v32hi)__W); |
816 | } |
817 | |
818 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
819 | _mm512_max_epu8 (__m512i __A, __m512i __B) |
820 | { |
821 | return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B); |
822 | } |
823 | |
824 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
825 | _mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) |
826 | { |
827 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, |
828 | (__v64qi)_mm512_max_epu8(__A, __B), |
829 | (__v64qi)_mm512_setzero_si512()); |
830 | } |
831 | |
832 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
833 | _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) |
834 | { |
835 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, |
836 | (__v64qi)_mm512_max_epu8(__A, __B), |
837 | (__v64qi)__W); |
838 | } |
839 | |
840 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
841 | _mm512_max_epu16 (__m512i __A, __m512i __B) |
842 | { |
843 | return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B); |
844 | } |
845 | |
846 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
847 | _mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) |
848 | { |
849 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
850 | (__v32hi)_mm512_max_epu16(__A, __B), |
851 | (__v32hi)_mm512_setzero_si512()); |
852 | } |
853 | |
854 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
855 | _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) |
856 | { |
857 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
858 | (__v32hi)_mm512_max_epu16(__A, __B), |
859 | (__v32hi)__W); |
860 | } |
861 | |
862 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
863 | _mm512_min_epi8 (__m512i __A, __m512i __B) |
864 | { |
865 | return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B); |
866 | } |
867 | |
868 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
869 | _mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) |
870 | { |
871 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, |
872 | (__v64qi)_mm512_min_epi8(__A, __B), |
873 | (__v64qi)_mm512_setzero_si512()); |
874 | } |
875 | |
876 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
877 | _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) |
878 | { |
879 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, |
880 | (__v64qi)_mm512_min_epi8(__A, __B), |
881 | (__v64qi)__W); |
882 | } |
883 | |
884 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
885 | _mm512_min_epi16 (__m512i __A, __m512i __B) |
886 | { |
887 | return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B); |
888 | } |
889 | |
890 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
891 | _mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) |
892 | { |
893 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
894 | (__v32hi)_mm512_min_epi16(__A, __B), |
895 | (__v32hi)_mm512_setzero_si512()); |
896 | } |
897 | |
898 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
899 | _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) |
900 | { |
901 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
902 | (__v32hi)_mm512_min_epi16(__A, __B), |
903 | (__v32hi)__W); |
904 | } |
905 | |
906 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
907 | _mm512_min_epu8 (__m512i __A, __m512i __B) |
908 | { |
909 | return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B); |
910 | } |
911 | |
912 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
913 | _mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) |
914 | { |
915 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, |
916 | (__v64qi)_mm512_min_epu8(__A, __B), |
917 | (__v64qi)_mm512_setzero_si512()); |
918 | } |
919 | |
920 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
921 | _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) |
922 | { |
923 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, |
924 | (__v64qi)_mm512_min_epu8(__A, __B), |
925 | (__v64qi)__W); |
926 | } |
927 | |
928 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
929 | _mm512_min_epu16 (__m512i __A, __m512i __B) |
930 | { |
931 | return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B); |
932 | } |
933 | |
934 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
935 | _mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) |
936 | { |
937 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
938 | (__v32hi)_mm512_min_epu16(__A, __B), |
939 | (__v32hi)_mm512_setzero_si512()); |
940 | } |
941 | |
942 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
943 | _mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) |
944 | { |
945 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
946 | (__v32hi)_mm512_min_epu16(__A, __B), |
947 | (__v32hi)__W); |
948 | } |
949 | |
950 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
951 | _mm512_shuffle_epi8(__m512i __A, __m512i __B) |
952 | { |
953 | return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); |
954 | } |
955 | |
956 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
957 | _mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) |
958 | { |
959 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
960 | (__v64qi)_mm512_shuffle_epi8(__A, __B), |
961 | (__v64qi)__W); |
962 | } |
963 | |
964 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
965 | _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) |
966 | { |
967 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
968 | (__v64qi)_mm512_shuffle_epi8(__A, __B), |
969 | (__v64qi)_mm512_setzero_si512()); |
970 | } |
971 | |
972 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
973 | _mm512_subs_epi8 (__m512i __A, __m512i __B) |
974 | { |
975 | return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B); |
976 | } |
977 | |
978 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
979 | _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) |
980 | { |
981 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
982 | (__v64qi)_mm512_subs_epi8(__A, __B), |
983 | (__v64qi)__W); |
984 | } |
985 | |
986 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
987 | _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) |
988 | { |
989 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
990 | (__v64qi)_mm512_subs_epi8(__A, __B), |
991 | (__v64qi)_mm512_setzero_si512()); |
992 | } |
993 | |
994 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
995 | _mm512_subs_epi16 (__m512i __A, __m512i __B) |
996 | { |
997 | return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B); |
998 | } |
999 | |
1000 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1001 | _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) |
1002 | { |
1003 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1004 | (__v32hi)_mm512_subs_epi16(__A, __B), |
1005 | (__v32hi)__W); |
1006 | } |
1007 | |
1008 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1009 | _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) |
1010 | { |
1011 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1012 | (__v32hi)_mm512_subs_epi16(__A, __B), |
1013 | (__v32hi)_mm512_setzero_si512()); |
1014 | } |
1015 | |
1016 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1017 | _mm512_subs_epu8 (__m512i __A, __m512i __B) |
1018 | { |
1019 | return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B); |
1020 | } |
1021 | |
1022 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1023 | _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) |
1024 | { |
1025 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
1026 | (__v64qi)_mm512_subs_epu8(__A, __B), |
1027 | (__v64qi)__W); |
1028 | } |
1029 | |
1030 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1031 | _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) |
1032 | { |
1033 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
1034 | (__v64qi)_mm512_subs_epu8(__A, __B), |
1035 | (__v64qi)_mm512_setzero_si512()); |
1036 | } |
1037 | |
1038 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1039 | _mm512_subs_epu16 (__m512i __A, __m512i __B) |
1040 | { |
1041 | return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B); |
1042 | } |
1043 | |
1044 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1045 | _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) |
1046 | { |
1047 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1048 | (__v32hi)_mm512_subs_epu16(__A, __B), |
1049 | (__v32hi)__W); |
1050 | } |
1051 | |
1052 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1053 | _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) |
1054 | { |
1055 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1056 | (__v32hi)_mm512_subs_epu16(__A, __B), |
1057 | (__v32hi)_mm512_setzero_si512()); |
1058 | } |
1059 | |
1060 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1061 | _mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) |
1062 | { |
1063 | return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, |
1064 | (__v32hi)__B); |
1065 | } |
1066 | |
1067 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1068 | _mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, |
1069 | __m512i __B) |
1070 | { |
1071 | return (__m512i)__builtin_ia32_selectw_512(__U, |
1072 | (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), |
1073 | (__v32hi)__A); |
1074 | } |
1075 | |
1076 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1077 | _mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, |
1078 | __m512i __B) |
1079 | { |
1080 | return (__m512i)__builtin_ia32_selectw_512(__U, |
1081 | (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), |
1082 | (__v32hi)__I); |
1083 | } |
1084 | |
1085 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1086 | _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, |
1087 | __m512i __B) |
1088 | { |
1089 | return (__m512i)__builtin_ia32_selectw_512(__U, |
1090 | (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), |
1091 | (__v32hi)_mm512_setzero_si512()); |
1092 | } |
1093 | |
1094 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1095 | _mm512_mulhrs_epi16(__m512i __A, __m512i __B) |
1096 | { |
1097 | return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); |
1098 | } |
1099 | |
1100 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1101 | _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) |
1102 | { |
1103 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1104 | (__v32hi)_mm512_mulhrs_epi16(__A, __B), |
1105 | (__v32hi)__W); |
1106 | } |
1107 | |
1108 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1109 | _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) |
1110 | { |
1111 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1112 | (__v32hi)_mm512_mulhrs_epi16(__A, __B), |
1113 | (__v32hi)_mm512_setzero_si512()); |
1114 | } |
1115 | |
1116 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1117 | _mm512_mulhi_epi16(__m512i __A, __m512i __B) |
1118 | { |
1119 | return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); |
1120 | } |
1121 | |
1122 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1123 | _mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, |
1124 | __m512i __B) |
1125 | { |
1126 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1127 | (__v32hi)_mm512_mulhi_epi16(__A, __B), |
1128 | (__v32hi)__W); |
1129 | } |
1130 | |
1131 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1132 | _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) |
1133 | { |
1134 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1135 | (__v32hi)_mm512_mulhi_epi16(__A, __B), |
1136 | (__v32hi)_mm512_setzero_si512()); |
1137 | } |
1138 | |
1139 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1140 | _mm512_mulhi_epu16(__m512i __A, __m512i __B) |
1141 | { |
1142 | return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); |
1143 | } |
1144 | |
1145 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1146 | _mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) |
1147 | { |
1148 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1149 | (__v32hi)_mm512_mulhi_epu16(__A, __B), |
1150 | (__v32hi)__W); |
1151 | } |
1152 | |
1153 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1154 | _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) |
1155 | { |
1156 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1157 | (__v32hi)_mm512_mulhi_epu16(__A, __B), |
1158 | (__v32hi)_mm512_setzero_si512()); |
1159 | } |
1160 | |
1161 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1162 | _mm512_maddubs_epi16(__m512i __X, __m512i __Y) { |
1163 | return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y); |
1164 | } |
1165 | |
1166 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1167 | _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, |
1168 | __m512i __Y) { |
1169 | return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, |
1170 | (__v32hi)_mm512_maddubs_epi16(__X, __Y), |
1171 | (__v32hi)__W); |
1172 | } |
1173 | |
1174 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1175 | _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { |
1176 | return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, |
1177 | (__v32hi)_mm512_maddubs_epi16(__X, __Y), |
1178 | (__v32hi)_mm512_setzero_si512()); |
1179 | } |
1180 | |
1181 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1182 | _mm512_madd_epi16(__m512i __A, __m512i __B) { |
1183 | return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B); |
1184 | } |
1185 | |
1186 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1187 | _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { |
1188 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
1189 | (__v16si)_mm512_madd_epi16(__A, __B), |
1190 | (__v16si)__W); |
1191 | } |
1192 | |
1193 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1194 | _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { |
1195 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
1196 | (__v16si)_mm512_madd_epi16(__A, __B), |
1197 | (__v16si)_mm512_setzero_si512()); |
1198 | } |
1199 | |
1200 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
1201 | _mm512_cvtsepi16_epi8 (__m512i __A) { |
1202 | return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, |
1203 | (__v32qi)_mm256_setzero_si256(), |
1204 | (__mmask32) -1); |
1205 | } |
1206 | |
1207 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
1208 | _mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { |
1209 | return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, |
1210 | (__v32qi)__O, |
1211 | __M); |
1212 | } |
1213 | |
1214 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
1215 | _mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) { |
1216 | return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, |
1217 | (__v32qi) _mm256_setzero_si256(), |
1218 | __M); |
1219 | } |
1220 | |
1221 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
1222 | _mm512_cvtusepi16_epi8 (__m512i __A) { |
1223 | return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, |
1224 | (__v32qi) _mm256_setzero_si256(), |
1225 | (__mmask32) -1); |
1226 | } |
1227 | |
1228 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
1229 | _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { |
1230 | return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, |
1231 | (__v32qi) __O, |
1232 | __M); |
1233 | } |
1234 | |
1235 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
1236 | _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) { |
1237 | return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, |
1238 | (__v32qi) _mm256_setzero_si256(), |
1239 | __M); |
1240 | } |
1241 | |
1242 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
1243 | _mm512_cvtepi16_epi8 (__m512i __A) { |
1244 | return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, |
1245 | (__v32qi) _mm256_undefined_si256(), |
1246 | (__mmask32) -1); |
1247 | } |
1248 | |
1249 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
1250 | _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { |
1251 | return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, |
1252 | (__v32qi) __O, |
1253 | __M); |
1254 | } |
1255 | |
1256 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
1257 | _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) { |
1258 | return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, |
1259 | (__v32qi) _mm256_setzero_si256(), |
1260 | __M); |
1261 | } |
1262 | |
1263 | static __inline__ void __DEFAULT_FN_ATTRS512 |
1264 | _mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) |
1265 | { |
1266 | __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); |
1267 | } |
1268 | |
1269 | static __inline__ void __DEFAULT_FN_ATTRS512 |
1270 | _mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) |
1271 | { |
1272 | __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); |
1273 | } |
1274 | |
1275 | static __inline__ void __DEFAULT_FN_ATTRS512 |
1276 | _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) |
1277 | { |
1278 | __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); |
1279 | } |
1280 | |
1281 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1282 | _mm512_unpackhi_epi8(__m512i __A, __m512i __B) { |
1283 | return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, |
1284 | 8, 64+8, 9, 64+9, |
1285 | 10, 64+10, 11, 64+11, |
1286 | 12, 64+12, 13, 64+13, |
1287 | 14, 64+14, 15, 64+15, |
1288 | 24, 64+24, 25, 64+25, |
1289 | 26, 64+26, 27, 64+27, |
1290 | 28, 64+28, 29, 64+29, |
1291 | 30, 64+30, 31, 64+31, |
1292 | 40, 64+40, 41, 64+41, |
1293 | 42, 64+42, 43, 64+43, |
1294 | 44, 64+44, 45, 64+45, |
1295 | 46, 64+46, 47, 64+47, |
1296 | 56, 64+56, 57, 64+57, |
1297 | 58, 64+58, 59, 64+59, |
1298 | 60, 64+60, 61, 64+61, |
1299 | 62, 64+62, 63, 64+63); |
1300 | } |
1301 | |
1302 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1303 | _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { |
1304 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
1305 | (__v64qi)_mm512_unpackhi_epi8(__A, __B), |
1306 | (__v64qi)__W); |
1307 | } |
1308 | |
1309 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1310 | _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) { |
1311 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
1312 | (__v64qi)_mm512_unpackhi_epi8(__A, __B), |
1313 | (__v64qi)_mm512_setzero_si512()); |
1314 | } |
1315 | |
1316 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1317 | _mm512_unpackhi_epi16(__m512i __A, __m512i __B) { |
1318 | return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, |
1319 | 4, 32+4, 5, 32+5, |
1320 | 6, 32+6, 7, 32+7, |
1321 | 12, 32+12, 13, 32+13, |
1322 | 14, 32+14, 15, 32+15, |
1323 | 20, 32+20, 21, 32+21, |
1324 | 22, 32+22, 23, 32+23, |
1325 | 28, 32+28, 29, 32+29, |
1326 | 30, 32+30, 31, 32+31); |
1327 | } |
1328 | |
1329 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1330 | _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { |
1331 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1332 | (__v32hi)_mm512_unpackhi_epi16(__A, __B), |
1333 | (__v32hi)__W); |
1334 | } |
1335 | |
1336 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1337 | _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { |
1338 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1339 | (__v32hi)_mm512_unpackhi_epi16(__A, __B), |
1340 | (__v32hi)_mm512_setzero_si512()); |
1341 | } |
1342 | |
1343 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1344 | _mm512_unpacklo_epi8(__m512i __A, __m512i __B) { |
1345 | return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, |
1346 | 0, 64+0, 1, 64+1, |
1347 | 2, 64+2, 3, 64+3, |
1348 | 4, 64+4, 5, 64+5, |
1349 | 6, 64+6, 7, 64+7, |
1350 | 16, 64+16, 17, 64+17, |
1351 | 18, 64+18, 19, 64+19, |
1352 | 20, 64+20, 21, 64+21, |
1353 | 22, 64+22, 23, 64+23, |
1354 | 32, 64+32, 33, 64+33, |
1355 | 34, 64+34, 35, 64+35, |
1356 | 36, 64+36, 37, 64+37, |
1357 | 38, 64+38, 39, 64+39, |
1358 | 48, 64+48, 49, 64+49, |
1359 | 50, 64+50, 51, 64+51, |
1360 | 52, 64+52, 53, 64+53, |
1361 | 54, 64+54, 55, 64+55); |
1362 | } |
1363 | |
1364 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1365 | _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { |
1366 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
1367 | (__v64qi)_mm512_unpacklo_epi8(__A, __B), |
1368 | (__v64qi)__W); |
1369 | } |
1370 | |
1371 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1372 | _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) { |
1373 | return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, |
1374 | (__v64qi)_mm512_unpacklo_epi8(__A, __B), |
1375 | (__v64qi)_mm512_setzero_si512()); |
1376 | } |
1377 | |
1378 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1379 | _mm512_unpacklo_epi16(__m512i __A, __m512i __B) { |
1380 | return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, |
1381 | 0, 32+0, 1, 32+1, |
1382 | 2, 32+2, 3, 32+3, |
1383 | 8, 32+8, 9, 32+9, |
1384 | 10, 32+10, 11, 32+11, |
1385 | 16, 32+16, 17, 32+17, |
1386 | 18, 32+18, 19, 32+19, |
1387 | 24, 32+24, 25, 32+25, |
1388 | 26, 32+26, 27, 32+27); |
1389 | } |
1390 | |
1391 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1392 | _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { |
1393 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1394 | (__v32hi)_mm512_unpacklo_epi16(__A, __B), |
1395 | (__v32hi)__W); |
1396 | } |
1397 | |
1398 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1399 | _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { |
1400 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1401 | (__v32hi)_mm512_unpacklo_epi16(__A, __B), |
1402 | (__v32hi)_mm512_setzero_si512()); |
1403 | } |
1404 | |
1405 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1406 | _mm512_cvtepi8_epi16(__m256i __A) |
1407 | { |
1408 | |
1409 | |
1410 | return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi); |
1411 | } |
1412 | |
1413 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1414 | _mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) |
1415 | { |
1416 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1417 | (__v32hi)_mm512_cvtepi8_epi16(__A), |
1418 | (__v32hi)__W); |
1419 | } |
1420 | |
1421 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1422 | _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) |
1423 | { |
1424 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1425 | (__v32hi)_mm512_cvtepi8_epi16(__A), |
1426 | (__v32hi)_mm512_setzero_si512()); |
1427 | } |
1428 | |
1429 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1430 | _mm512_cvtepu8_epi16(__m256i __A) |
1431 | { |
1432 | return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi); |
1433 | } |
1434 | |
1435 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1436 | _mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) |
1437 | { |
1438 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1439 | (__v32hi)_mm512_cvtepu8_epi16(__A), |
1440 | (__v32hi)__W); |
1441 | } |
1442 | |
1443 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1444 | _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) |
1445 | { |
1446 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1447 | (__v32hi)_mm512_cvtepu8_epi16(__A), |
1448 | (__v32hi)_mm512_setzero_si512()); |
1449 | } |
1450 | |
1451 | |
1452 | #define _mm512_shufflehi_epi16(A, imm) \ |
1453 | (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)) |
1454 | |
1455 | #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ |
1456 | (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
1457 | (__v32hi)_mm512_shufflehi_epi16((A), \ |
1458 | (imm)), \ |
1459 | (__v32hi)(__m512i)(W)) |
1460 | |
1461 | #define _mm512_maskz_shufflehi_epi16(U, A, imm) \ |
1462 | (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
1463 | (__v32hi)_mm512_shufflehi_epi16((A), \ |
1464 | (imm)), \ |
1465 | (__v32hi)_mm512_setzero_si512()) |
1466 | |
1467 | #define _mm512_shufflelo_epi16(A, imm) \ |
1468 | (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)) |
1469 | |
1470 | |
1471 | #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ |
1472 | (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
1473 | (__v32hi)_mm512_shufflelo_epi16((A), \ |
1474 | (imm)), \ |
1475 | (__v32hi)(__m512i)(W)) |
1476 | |
1477 | |
1478 | #define _mm512_maskz_shufflelo_epi16(U, A, imm) \ |
1479 | (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
1480 | (__v32hi)_mm512_shufflelo_epi16((A), \ |
1481 | (imm)), \ |
1482 | (__v32hi)_mm512_setzero_si512()) |
1483 | |
1484 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1485 | _mm512_sllv_epi16(__m512i __A, __m512i __B) |
1486 | { |
1487 | return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B); |
1488 | } |
1489 | |
1490 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1491 | _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) |
1492 | { |
1493 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1494 | (__v32hi)_mm512_sllv_epi16(__A, __B), |
1495 | (__v32hi)__W); |
1496 | } |
1497 | |
1498 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1499 | _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) |
1500 | { |
1501 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1502 | (__v32hi)_mm512_sllv_epi16(__A, __B), |
1503 | (__v32hi)_mm512_setzero_si512()); |
1504 | } |
1505 | |
1506 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1507 | _mm512_sll_epi16(__m512i __A, __m128i __B) |
1508 | { |
1509 | return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); |
1510 | } |
1511 | |
1512 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1513 | _mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) |
1514 | { |
1515 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1516 | (__v32hi)_mm512_sll_epi16(__A, __B), |
1517 | (__v32hi)__W); |
1518 | } |
1519 | |
1520 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1521 | _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) |
1522 | { |
1523 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1524 | (__v32hi)_mm512_sll_epi16(__A, __B), |
1525 | (__v32hi)_mm512_setzero_si512()); |
1526 | } |
1527 | |
1528 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1529 | _mm512_slli_epi16(__m512i __A, int __B) |
1530 | { |
1531 | return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B); |
1532 | } |
1533 | |
1534 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1535 | _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) |
1536 | { |
1537 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1538 | (__v32hi)_mm512_slli_epi16(__A, __B), |
1539 | (__v32hi)__W); |
1540 | } |
1541 | |
1542 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1543 | _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B) |
1544 | { |
1545 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1546 | (__v32hi)_mm512_slli_epi16(__A, __B), |
1547 | (__v32hi)_mm512_setzero_si512()); |
1548 | } |
1549 | |
1550 | #define _mm512_bslli_epi128(a, imm) \ |
1551 | (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) |
1552 | |
1553 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1554 | _mm512_srlv_epi16(__m512i __A, __m512i __B) |
1555 | { |
1556 | return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B); |
1557 | } |
1558 | |
1559 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1560 | _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) |
1561 | { |
1562 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1563 | (__v32hi)_mm512_srlv_epi16(__A, __B), |
1564 | (__v32hi)__W); |
1565 | } |
1566 | |
1567 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1568 | _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) |
1569 | { |
1570 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1571 | (__v32hi)_mm512_srlv_epi16(__A, __B), |
1572 | (__v32hi)_mm512_setzero_si512()); |
1573 | } |
1574 | |
1575 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1576 | _mm512_srav_epi16(__m512i __A, __m512i __B) |
1577 | { |
1578 | return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B); |
1579 | } |
1580 | |
1581 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1582 | _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) |
1583 | { |
1584 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1585 | (__v32hi)_mm512_srav_epi16(__A, __B), |
1586 | (__v32hi)__W); |
1587 | } |
1588 | |
1589 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1590 | _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) |
1591 | { |
1592 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1593 | (__v32hi)_mm512_srav_epi16(__A, __B), |
1594 | (__v32hi)_mm512_setzero_si512()); |
1595 | } |
1596 | |
1597 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1598 | _mm512_sra_epi16(__m512i __A, __m128i __B) |
1599 | { |
1600 | return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); |
1601 | } |
1602 | |
1603 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1604 | _mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) |
1605 | { |
1606 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1607 | (__v32hi)_mm512_sra_epi16(__A, __B), |
1608 | (__v32hi)__W); |
1609 | } |
1610 | |
1611 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1612 | _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) |
1613 | { |
1614 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1615 | (__v32hi)_mm512_sra_epi16(__A, __B), |
1616 | (__v32hi)_mm512_setzero_si512()); |
1617 | } |
1618 | |
1619 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1620 | _mm512_srai_epi16(__m512i __A, int __B) |
1621 | { |
1622 | return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B); |
1623 | } |
1624 | |
1625 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1626 | _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) |
1627 | { |
1628 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1629 | (__v32hi)_mm512_srai_epi16(__A, __B), |
1630 | (__v32hi)__W); |
1631 | } |
1632 | |
1633 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1634 | _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B) |
1635 | { |
1636 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1637 | (__v32hi)_mm512_srai_epi16(__A, __B), |
1638 | (__v32hi)_mm512_setzero_si512()); |
1639 | } |
1640 | |
1641 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1642 | _mm512_srl_epi16(__m512i __A, __m128i __B) |
1643 | { |
1644 | return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); |
1645 | } |
1646 | |
1647 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1648 | _mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) |
1649 | { |
1650 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1651 | (__v32hi)_mm512_srl_epi16(__A, __B), |
1652 | (__v32hi)__W); |
1653 | } |
1654 | |
1655 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1656 | _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) |
1657 | { |
1658 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1659 | (__v32hi)_mm512_srl_epi16(__A, __B), |
1660 | (__v32hi)_mm512_setzero_si512()); |
1661 | } |
1662 | |
1663 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1664 | _mm512_srli_epi16(__m512i __A, int __B) |
1665 | { |
1666 | return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B); |
1667 | } |
1668 | |
1669 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1670 | _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) |
1671 | { |
1672 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1673 | (__v32hi)_mm512_srli_epi16(__A, __B), |
1674 | (__v32hi)__W); |
1675 | } |
1676 | |
1677 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1678 | _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) |
1679 | { |
1680 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, |
1681 | (__v32hi)_mm512_srli_epi16(__A, __B), |
1682 | (__v32hi)_mm512_setzero_si512()); |
1683 | } |
1684 | |
1685 | #define _mm512_bsrli_epi128(a, imm) \ |
1686 | (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) |
1687 | |
1688 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1689 | _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) |
1690 | { |
1691 | return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, |
1692 | (__v32hi) __A, |
1693 | (__v32hi) __W); |
1694 | } |
1695 | |
1696 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1697 | _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) |
1698 | { |
1699 | return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, |
1700 | (__v32hi) __A, |
1701 | (__v32hi) _mm512_setzero_si512 ()); |
1702 | } |
1703 | |
1704 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1705 | _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) |
1706 | { |
1707 | return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, |
1708 | (__v64qi) __A, |
1709 | (__v64qi) __W); |
1710 | } |
1711 | |
1712 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1713 | _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) |
1714 | { |
1715 | return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, |
1716 | (__v64qi) __A, |
1717 | (__v64qi) _mm512_setzero_si512 ()); |
1718 | } |
1719 | |
1720 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1721 | _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) |
1722 | { |
1723 | return (__m512i) __builtin_ia32_selectb_512(__M, |
1724 | (__v64qi)_mm512_set1_epi8(__A), |
1725 | (__v64qi) __O); |
1726 | } |
1727 | |
1728 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1729 | _mm512_maskz_set1_epi8 (__mmask64 __M, char __A) |
1730 | { |
1731 | return (__m512i) __builtin_ia32_selectb_512(__M, |
1732 | (__v64qi) _mm512_set1_epi8(__A), |
1733 | (__v64qi) _mm512_setzero_si512()); |
1734 | } |
1735 | |
1736 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 |
1737 | _mm512_kunpackd (__mmask64 __A, __mmask64 __B) |
1738 | { |
1739 | return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, |
1740 | (__mmask64) __B); |
1741 | } |
1742 | |
1743 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 |
1744 | _mm512_kunpackw (__mmask32 __A, __mmask32 __B) |
1745 | { |
1746 | return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, |
1747 | (__mmask32) __B); |
1748 | } |
1749 | |
1750 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
1751 | _mm512_loadu_epi16 (void const *__P) |
1752 | { |
1753 | struct __loadu_epi16 { |
1754 | __m512i_u __v; |
1755 | } __attribute__((__packed__, __may_alias__)); |
1756 | return ((struct __loadu_epi16*)__P)->__v; |
1757 | } |
1758 | |
1759 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1760 | _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) |
1761 | { |
1762 | return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, |
1763 | (__v32hi) __W, |
1764 | (__mmask32) __U); |
1765 | } |
1766 | |
1767 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1768 | _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) |
1769 | { |
1770 | return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, |
1771 | (__v32hi) |
1772 | _mm512_setzero_si512 (), |
1773 | (__mmask32) __U); |
1774 | } |
1775 | |
1776 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
1777 | _mm512_loadu_epi8 (void const *__P) |
1778 | { |
1779 | struct __loadu_epi8 { |
1780 | __m512i_u __v; |
1781 | } __attribute__((__packed__, __may_alias__)); |
1782 | return ((struct __loadu_epi8*)__P)->__v; |
1783 | } |
1784 | |
1785 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1786 | _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) |
1787 | { |
1788 | return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, |
1789 | (__v64qi) __W, |
1790 | (__mmask64) __U); |
1791 | } |
1792 | |
1793 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1794 | _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) |
1795 | { |
1796 | return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, |
1797 | (__v64qi) |
1798 | _mm512_setzero_si512 (), |
1799 | (__mmask64) __U); |
1800 | } |
1801 | |
1802 | static __inline void __DEFAULT_FN_ATTRS512 |
1803 | _mm512_storeu_epi16 (void *__P, __m512i __A) |
1804 | { |
1805 | struct __storeu_epi16 { |
1806 | __m512i_u __v; |
1807 | } __attribute__((__packed__, __may_alias__)); |
1808 | ((struct __storeu_epi16*)__P)->__v = __A; |
1809 | } |
1810 | |
1811 | static __inline__ void __DEFAULT_FN_ATTRS512 |
1812 | _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) |
1813 | { |
1814 | __builtin_ia32_storedquhi512_mask ((__v32hi *) __P, |
1815 | (__v32hi) __A, |
1816 | (__mmask32) __U); |
1817 | } |
1818 | |
1819 | static __inline void __DEFAULT_FN_ATTRS512 |
1820 | _mm512_storeu_epi8 (void *__P, __m512i __A) |
1821 | { |
1822 | struct __storeu_epi8 { |
1823 | __m512i_u __v; |
1824 | } __attribute__((__packed__, __may_alias__)); |
1825 | ((struct __storeu_epi8*)__P)->__v = __A; |
1826 | } |
1827 | |
1828 | static __inline__ void __DEFAULT_FN_ATTRS512 |
1829 | _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) |
1830 | { |
1831 | __builtin_ia32_storedquqi512_mask ((__v64qi *) __P, |
1832 | (__v64qi) __A, |
1833 | (__mmask64) __U); |
1834 | } |
1835 | |
1836 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 |
1837 | _mm512_test_epi8_mask (__m512i __A, __m512i __B) |
1838 | { |
1839 | return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B), |
1840 | _mm512_setzero_si512()); |
1841 | } |
1842 | |
1843 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 |
1844 | _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) |
1845 | { |
1846 | return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), |
1847 | _mm512_setzero_si512()); |
1848 | } |
1849 | |
1850 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 |
1851 | _mm512_test_epi16_mask (__m512i __A, __m512i __B) |
1852 | { |
1853 | return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B), |
1854 | _mm512_setzero_si512()); |
1855 | } |
1856 | |
1857 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 |
1858 | _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) |
1859 | { |
1860 | return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), |
1861 | _mm512_setzero_si512()); |
1862 | } |
1863 | |
1864 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 |
1865 | _mm512_testn_epi8_mask (__m512i __A, __m512i __B) |
1866 | { |
1867 | return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); |
1868 | } |
1869 | |
1870 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 |
1871 | _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) |
1872 | { |
1873 | return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), |
1874 | _mm512_setzero_si512()); |
1875 | } |
1876 | |
1877 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 |
1878 | _mm512_testn_epi16_mask (__m512i __A, __m512i __B) |
1879 | { |
1880 | return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B), |
1881 | _mm512_setzero_si512()); |
1882 | } |
1883 | |
1884 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 |
1885 | _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) |
1886 | { |
1887 | return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), |
1888 | _mm512_setzero_si512()); |
1889 | } |
1890 | |
1891 | static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 |
1892 | _mm512_movepi8_mask (__m512i __A) |
1893 | { |
1894 | return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); |
1895 | } |
1896 | |
1897 | static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 |
1898 | _mm512_movepi16_mask (__m512i __A) |
1899 | { |
1900 | return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); |
1901 | } |
1902 | |
1903 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1904 | _mm512_movm_epi8 (__mmask64 __A) |
1905 | { |
1906 | return (__m512i) __builtin_ia32_cvtmask2b512 (__A); |
1907 | } |
1908 | |
1909 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1910 | _mm512_movm_epi16 (__mmask32 __A) |
1911 | { |
1912 | return (__m512i) __builtin_ia32_cvtmask2w512 (__A); |
1913 | } |
1914 | |
1915 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1916 | _mm512_broadcastb_epi8 (__m128i __A) |
1917 | { |
1918 | return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A, |
1919 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
1920 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
1921 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
1922 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
1923 | } |
1924 | |
1925 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1926 | _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) |
1927 | { |
1928 | return (__m512i)__builtin_ia32_selectb_512(__M, |
1929 | (__v64qi) _mm512_broadcastb_epi8(__A), |
1930 | (__v64qi) __O); |
1931 | } |
1932 | |
1933 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1934 | _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) |
1935 | { |
1936 | return (__m512i)__builtin_ia32_selectb_512(__M, |
1937 | (__v64qi) _mm512_broadcastb_epi8(__A), |
1938 | (__v64qi) _mm512_setzero_si512()); |
1939 | } |
1940 | |
1941 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1942 | _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) |
1943 | { |
1944 | return (__m512i) __builtin_ia32_selectw_512(__M, |
1945 | (__v32hi) _mm512_set1_epi16(__A), |
1946 | (__v32hi) __O); |
1947 | } |
1948 | |
1949 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1950 | _mm512_maskz_set1_epi16 (__mmask32 __M, short __A) |
1951 | { |
1952 | return (__m512i) __builtin_ia32_selectw_512(__M, |
1953 | (__v32hi) _mm512_set1_epi16(__A), |
1954 | (__v32hi) _mm512_setzero_si512()); |
1955 | } |
1956 | |
1957 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1958 | _mm512_broadcastw_epi16 (__m128i __A) |
1959 | { |
1960 | return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A, |
1961 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
1962 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
1963 | } |
1964 | |
1965 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1966 | _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) |
1967 | { |
1968 | return (__m512i)__builtin_ia32_selectw_512(__M, |
1969 | (__v32hi) _mm512_broadcastw_epi16(__A), |
1970 | (__v32hi) __O); |
1971 | } |
1972 | |
1973 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1974 | _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) |
1975 | { |
1976 | return (__m512i)__builtin_ia32_selectw_512(__M, |
1977 | (__v32hi) _mm512_broadcastw_epi16(__A), |
1978 | (__v32hi) _mm512_setzero_si512()); |
1979 | } |
1980 | |
1981 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1982 | _mm512_permutexvar_epi16 (__m512i __A, __m512i __B) |
1983 | { |
1984 | return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); |
1985 | } |
1986 | |
1987 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1988 | _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, |
1989 | __m512i __B) |
1990 | { |
1991 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
1992 | (__v32hi)_mm512_permutexvar_epi16(__A, __B), |
1993 | (__v32hi)_mm512_setzero_si512()); |
1994 | } |
1995 | |
1996 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
1997 | _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, |
1998 | __m512i __B) |
1999 | { |
2000 | return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, |
2001 | (__v32hi)_mm512_permutexvar_epi16(__A, __B), |
2002 | (__v32hi)__W); |
2003 | } |
2004 | |
2005 | #define _mm512_alignr_epi8(A, B, N) \ |
2006 | (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ |
2007 | (__v64qi)(__m512i)(B), (int)(N)) |
2008 | |
2009 | #define _mm512_mask_alignr_epi8(W, U, A, B, N) \ |
2010 | (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ |
2011 | (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ |
2012 | (__v64qi)(__m512i)(W)) |
2013 | |
2014 | #define _mm512_maskz_alignr_epi8(U, A, B, N) \ |
2015 | (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ |
2016 | (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ |
2017 | (__v64qi)(__m512i)_mm512_setzero_si512()) |
2018 | |
2019 | #define _mm512_dbsad_epu8(A, B, imm) \ |
2020 | (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ |
2021 | (__v64qi)(__m512i)(B), (int)(imm)) |
2022 | |
2023 | #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ |
2024 | (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
2025 | (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ |
2026 | (__v32hi)(__m512i)(W)) |
2027 | |
2028 | #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ |
2029 | (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
2030 | (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ |
2031 | (__v32hi)_mm512_setzero_si512()) |
2032 | |
2033 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
2034 | _mm512_sad_epu8 (__m512i __A, __m512i __B) |
2035 | { |
2036 | return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A, |
2037 | (__v64qi) __B); |
2038 | } |
2039 | |
2040 | #undef __DEFAULT_FN_ATTRS512 |
2041 | #undef __DEFAULT_FN_ATTRS |
2042 | |
2043 | #endif |
2044 | |