1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | #ifndef __X86INTRIN_H |
25 | #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." |
26 | #endif |
27 | |
28 | #ifndef __XOPINTRIN_H |
29 | #define __XOPINTRIN_H |
30 | |
31 | #include <fma4intrin.h> |
32 | |
33 | |
34 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128))) |
35 | #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256))) |
36 | |
37 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
38 | _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) |
39 | { |
40 | return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); |
41 | } |
42 | |
43 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
44 | _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) |
45 | { |
46 | return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); |
47 | } |
48 | |
49 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
50 | _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) |
51 | { |
52 | return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); |
53 | } |
54 | |
55 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
56 | _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) |
57 | { |
58 | return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); |
59 | } |
60 | |
61 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
62 | _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) |
63 | { |
64 | return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C); |
65 | } |
66 | |
67 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
68 | _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) |
69 | { |
70 | return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C); |
71 | } |
72 | |
73 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
74 | _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) |
75 | { |
76 | return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C); |
77 | } |
78 | |
79 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
80 | _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) |
81 | { |
82 | return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C); |
83 | } |
84 | |
85 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
86 | _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) |
87 | { |
88 | return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); |
89 | } |
90 | |
91 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
92 | _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) |
93 | { |
94 | return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); |
95 | } |
96 | |
97 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
98 | _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) |
99 | { |
100 | return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); |
101 | } |
102 | |
103 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
104 | _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) |
105 | { |
106 | return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); |
107 | } |
108 | |
109 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
110 | _mm_haddw_epi8(__m128i __A) |
111 | { |
112 | return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); |
113 | } |
114 | |
115 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
116 | _mm_haddd_epi8(__m128i __A) |
117 | { |
118 | return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); |
119 | } |
120 | |
121 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
122 | _mm_haddq_epi8(__m128i __A) |
123 | { |
124 | return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); |
125 | } |
126 | |
127 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
128 | _mm_haddd_epi16(__m128i __A) |
129 | { |
130 | return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); |
131 | } |
132 | |
133 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
134 | _mm_haddq_epi16(__m128i __A) |
135 | { |
136 | return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); |
137 | } |
138 | |
139 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
140 | _mm_haddq_epi32(__m128i __A) |
141 | { |
142 | return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); |
143 | } |
144 | |
145 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
146 | _mm_haddw_epu8(__m128i __A) |
147 | { |
148 | return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); |
149 | } |
150 | |
151 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
152 | _mm_haddd_epu8(__m128i __A) |
153 | { |
154 | return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); |
155 | } |
156 | |
157 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
158 | _mm_haddq_epu8(__m128i __A) |
159 | { |
160 | return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); |
161 | } |
162 | |
163 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
164 | _mm_haddd_epu16(__m128i __A) |
165 | { |
166 | return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); |
167 | } |
168 | |
169 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
170 | _mm_haddq_epu16(__m128i __A) |
171 | { |
172 | return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); |
173 | } |
174 | |
175 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
176 | _mm_haddq_epu32(__m128i __A) |
177 | { |
178 | return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); |
179 | } |
180 | |
181 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
182 | _mm_hsubw_epi8(__m128i __A) |
183 | { |
184 | return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); |
185 | } |
186 | |
187 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
188 | _mm_hsubd_epi16(__m128i __A) |
189 | { |
190 | return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); |
191 | } |
192 | |
193 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
194 | _mm_hsubq_epi32(__m128i __A) |
195 | { |
196 | return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); |
197 | } |
198 | |
199 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
200 | _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) |
201 | { |
202 | return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C)); |
203 | } |
204 | |
205 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
206 | _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) |
207 | { |
208 | return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C)); |
209 | } |
210 | |
211 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
212 | _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) |
213 | { |
214 | return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); |
215 | } |
216 | |
217 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
218 | _mm_rot_epi8(__m128i __A, __m128i __B) |
219 | { |
220 | return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); |
221 | } |
222 | |
223 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
224 | _mm_rot_epi16(__m128i __A, __m128i __B) |
225 | { |
226 | return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); |
227 | } |
228 | |
229 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
230 | _mm_rot_epi32(__m128i __A, __m128i __B) |
231 | { |
232 | return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); |
233 | } |
234 | |
235 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
236 | _mm_rot_epi64(__m128i __A, __m128i __B) |
237 | { |
238 | return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); |
239 | } |
240 | |
241 | #define _mm_roti_epi8(A, N) \ |
242 | (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)) |
243 | |
244 | #define _mm_roti_epi16(A, N) \ |
245 | (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)) |
246 | |
247 | #define _mm_roti_epi32(A, N) \ |
248 | (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)) |
249 | |
250 | #define _mm_roti_epi64(A, N) \ |
251 | (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)) |
252 | |
253 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
254 | _mm_shl_epi8(__m128i __A, __m128i __B) |
255 | { |
256 | return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B); |
257 | } |
258 | |
259 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
260 | _mm_shl_epi16(__m128i __A, __m128i __B) |
261 | { |
262 | return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B); |
263 | } |
264 | |
265 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
266 | _mm_shl_epi32(__m128i __A, __m128i __B) |
267 | { |
268 | return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B); |
269 | } |
270 | |
271 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
272 | _mm_shl_epi64(__m128i __A, __m128i __B) |
273 | { |
274 | return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B); |
275 | } |
276 | |
277 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
278 | _mm_sha_epi8(__m128i __A, __m128i __B) |
279 | { |
280 | return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B); |
281 | } |
282 | |
283 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
284 | _mm_sha_epi16(__m128i __A, __m128i __B) |
285 | { |
286 | return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B); |
287 | } |
288 | |
289 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
290 | _mm_sha_epi32(__m128i __A, __m128i __B) |
291 | { |
292 | return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B); |
293 | } |
294 | |
295 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
296 | _mm_sha_epi64(__m128i __A, __m128i __B) |
297 | { |
298 | return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); |
299 | } |
300 | |
301 | #define _mm_com_epu8(A, B, N) \ |
302 | (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \ |
303 | (__v16qi)(__m128i)(B), (N)) |
304 | |
305 | #define _mm_com_epu16(A, B, N) \ |
306 | (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \ |
307 | (__v8hi)(__m128i)(B), (N)) |
308 | |
309 | #define _mm_com_epu32(A, B, N) \ |
310 | (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \ |
311 | (__v4si)(__m128i)(B), (N)) |
312 | |
313 | #define _mm_com_epu64(A, B, N) \ |
314 | (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \ |
315 | (__v2di)(__m128i)(B), (N)) |
316 | |
317 | #define _mm_com_epi8(A, B, N) \ |
318 | (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \ |
319 | (__v16qi)(__m128i)(B), (N)) |
320 | |
321 | #define _mm_com_epi16(A, B, N) \ |
322 | (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \ |
323 | (__v8hi)(__m128i)(B), (N)) |
324 | |
325 | #define _mm_com_epi32(A, B, N) \ |
326 | (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \ |
327 | (__v4si)(__m128i)(B), (N)) |
328 | |
329 | #define _mm_com_epi64(A, B, N) \ |
330 | (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \ |
331 | (__v2di)(__m128i)(B), (N)) |
332 | |
333 | #define _MM_PCOMCTRL_LT 0 |
334 | #define _MM_PCOMCTRL_LE 1 |
335 | #define _MM_PCOMCTRL_GT 2 |
336 | #define _MM_PCOMCTRL_GE 3 |
337 | #define _MM_PCOMCTRL_EQ 4 |
338 | #define _MM_PCOMCTRL_NEQ 5 |
339 | #define _MM_PCOMCTRL_FALSE 6 |
340 | #define _MM_PCOMCTRL_TRUE 7 |
341 | |
342 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
343 | _mm_comlt_epu8(__m128i __A, __m128i __B) |
344 | { |
345 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT); |
346 | } |
347 | |
348 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
349 | _mm_comle_epu8(__m128i __A, __m128i __B) |
350 | { |
351 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE); |
352 | } |
353 | |
354 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
355 | _mm_comgt_epu8(__m128i __A, __m128i __B) |
356 | { |
357 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT); |
358 | } |
359 | |
360 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
361 | _mm_comge_epu8(__m128i __A, __m128i __B) |
362 | { |
363 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE); |
364 | } |
365 | |
366 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
367 | _mm_comeq_epu8(__m128i __A, __m128i __B) |
368 | { |
369 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ); |
370 | } |
371 | |
372 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
373 | _mm_comneq_epu8(__m128i __A, __m128i __B) |
374 | { |
375 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ); |
376 | } |
377 | |
378 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
379 | _mm_comfalse_epu8(__m128i __A, __m128i __B) |
380 | { |
381 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE); |
382 | } |
383 | |
384 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
385 | _mm_comtrue_epu8(__m128i __A, __m128i __B) |
386 | { |
387 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE); |
388 | } |
389 | |
390 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
391 | _mm_comlt_epu16(__m128i __A, __m128i __B) |
392 | { |
393 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT); |
394 | } |
395 | |
396 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
397 | _mm_comle_epu16(__m128i __A, __m128i __B) |
398 | { |
399 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE); |
400 | } |
401 | |
402 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
403 | _mm_comgt_epu16(__m128i __A, __m128i __B) |
404 | { |
405 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT); |
406 | } |
407 | |
408 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
409 | _mm_comge_epu16(__m128i __A, __m128i __B) |
410 | { |
411 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE); |
412 | } |
413 | |
414 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
415 | _mm_comeq_epu16(__m128i __A, __m128i __B) |
416 | { |
417 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ); |
418 | } |
419 | |
420 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
421 | _mm_comneq_epu16(__m128i __A, __m128i __B) |
422 | { |
423 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ); |
424 | } |
425 | |
426 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
427 | _mm_comfalse_epu16(__m128i __A, __m128i __B) |
428 | { |
429 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE); |
430 | } |
431 | |
432 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
433 | _mm_comtrue_epu16(__m128i __A, __m128i __B) |
434 | { |
435 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE); |
436 | } |
437 | |
438 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
439 | _mm_comlt_epu32(__m128i __A, __m128i __B) |
440 | { |
441 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT); |
442 | } |
443 | |
444 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
445 | _mm_comle_epu32(__m128i __A, __m128i __B) |
446 | { |
447 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE); |
448 | } |
449 | |
450 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
451 | _mm_comgt_epu32(__m128i __A, __m128i __B) |
452 | { |
453 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT); |
454 | } |
455 | |
456 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
457 | _mm_comge_epu32(__m128i __A, __m128i __B) |
458 | { |
459 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE); |
460 | } |
461 | |
462 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
463 | _mm_comeq_epu32(__m128i __A, __m128i __B) |
464 | { |
465 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ); |
466 | } |
467 | |
468 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
469 | _mm_comneq_epu32(__m128i __A, __m128i __B) |
470 | { |
471 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ); |
472 | } |
473 | |
474 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
475 | _mm_comfalse_epu32(__m128i __A, __m128i __B) |
476 | { |
477 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE); |
478 | } |
479 | |
480 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
481 | _mm_comtrue_epu32(__m128i __A, __m128i __B) |
482 | { |
483 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE); |
484 | } |
485 | |
486 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
487 | _mm_comlt_epu64(__m128i __A, __m128i __B) |
488 | { |
489 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT); |
490 | } |
491 | |
492 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
493 | _mm_comle_epu64(__m128i __A, __m128i __B) |
494 | { |
495 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE); |
496 | } |
497 | |
498 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
499 | _mm_comgt_epu64(__m128i __A, __m128i __B) |
500 | { |
501 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT); |
502 | } |
503 | |
504 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
505 | _mm_comge_epu64(__m128i __A, __m128i __B) |
506 | { |
507 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE); |
508 | } |
509 | |
510 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
511 | _mm_comeq_epu64(__m128i __A, __m128i __B) |
512 | { |
513 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ); |
514 | } |
515 | |
516 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
517 | _mm_comneq_epu64(__m128i __A, __m128i __B) |
518 | { |
519 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ); |
520 | } |
521 | |
522 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
523 | _mm_comfalse_epu64(__m128i __A, __m128i __B) |
524 | { |
525 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE); |
526 | } |
527 | |
528 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
529 | _mm_comtrue_epu64(__m128i __A, __m128i __B) |
530 | { |
531 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE); |
532 | } |
533 | |
534 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
535 | _mm_comlt_epi8(__m128i __A, __m128i __B) |
536 | { |
537 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT); |
538 | } |
539 | |
540 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
541 | _mm_comle_epi8(__m128i __A, __m128i __B) |
542 | { |
543 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE); |
544 | } |
545 | |
546 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
547 | _mm_comgt_epi8(__m128i __A, __m128i __B) |
548 | { |
549 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT); |
550 | } |
551 | |
552 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
553 | _mm_comge_epi8(__m128i __A, __m128i __B) |
554 | { |
555 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE); |
556 | } |
557 | |
558 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
559 | _mm_comeq_epi8(__m128i __A, __m128i __B) |
560 | { |
561 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ); |
562 | } |
563 | |
564 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
565 | _mm_comneq_epi8(__m128i __A, __m128i __B) |
566 | { |
567 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ); |
568 | } |
569 | |
570 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
571 | _mm_comfalse_epi8(__m128i __A, __m128i __B) |
572 | { |
573 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE); |
574 | } |
575 | |
576 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
577 | _mm_comtrue_epi8(__m128i __A, __m128i __B) |
578 | { |
579 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE); |
580 | } |
581 | |
582 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
583 | _mm_comlt_epi16(__m128i __A, __m128i __B) |
584 | { |
585 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT); |
586 | } |
587 | |
588 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
589 | _mm_comle_epi16(__m128i __A, __m128i __B) |
590 | { |
591 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE); |
592 | } |
593 | |
594 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
595 | _mm_comgt_epi16(__m128i __A, __m128i __B) |
596 | { |
597 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT); |
598 | } |
599 | |
600 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
601 | _mm_comge_epi16(__m128i __A, __m128i __B) |
602 | { |
603 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE); |
604 | } |
605 | |
606 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
607 | _mm_comeq_epi16(__m128i __A, __m128i __B) |
608 | { |
609 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ); |
610 | } |
611 | |
612 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
613 | _mm_comneq_epi16(__m128i __A, __m128i __B) |
614 | { |
615 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ); |
616 | } |
617 | |
618 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
619 | _mm_comfalse_epi16(__m128i __A, __m128i __B) |
620 | { |
621 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE); |
622 | } |
623 | |
624 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
625 | _mm_comtrue_epi16(__m128i __A, __m128i __B) |
626 | { |
627 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE); |
628 | } |
629 | |
630 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
631 | _mm_comlt_epi32(__m128i __A, __m128i __B) |
632 | { |
633 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT); |
634 | } |
635 | |
636 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
637 | _mm_comle_epi32(__m128i __A, __m128i __B) |
638 | { |
639 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE); |
640 | } |
641 | |
642 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
643 | _mm_comgt_epi32(__m128i __A, __m128i __B) |
644 | { |
645 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT); |
646 | } |
647 | |
648 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
649 | _mm_comge_epi32(__m128i __A, __m128i __B) |
650 | { |
651 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE); |
652 | } |
653 | |
654 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
655 | _mm_comeq_epi32(__m128i __A, __m128i __B) |
656 | { |
657 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ); |
658 | } |
659 | |
660 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
661 | _mm_comneq_epi32(__m128i __A, __m128i __B) |
662 | { |
663 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ); |
664 | } |
665 | |
666 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
667 | _mm_comfalse_epi32(__m128i __A, __m128i __B) |
668 | { |
669 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE); |
670 | } |
671 | |
672 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
673 | _mm_comtrue_epi32(__m128i __A, __m128i __B) |
674 | { |
675 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE); |
676 | } |
677 | |
678 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
679 | _mm_comlt_epi64(__m128i __A, __m128i __B) |
680 | { |
681 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT); |
682 | } |
683 | |
684 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
685 | _mm_comle_epi64(__m128i __A, __m128i __B) |
686 | { |
687 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE); |
688 | } |
689 | |
690 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
691 | _mm_comgt_epi64(__m128i __A, __m128i __B) |
692 | { |
693 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT); |
694 | } |
695 | |
696 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
697 | _mm_comge_epi64(__m128i __A, __m128i __B) |
698 | { |
699 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE); |
700 | } |
701 | |
702 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
703 | _mm_comeq_epi64(__m128i __A, __m128i __B) |
704 | { |
705 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ); |
706 | } |
707 | |
708 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
709 | _mm_comneq_epi64(__m128i __A, __m128i __B) |
710 | { |
711 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ); |
712 | } |
713 | |
714 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
715 | _mm_comfalse_epi64(__m128i __A, __m128i __B) |
716 | { |
717 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE); |
718 | } |
719 | |
720 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
721 | _mm_comtrue_epi64(__m128i __A, __m128i __B) |
722 | { |
723 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); |
724 | } |
725 | |
726 | #define _mm_permute2_pd(X, Y, C, I) \ |
727 | (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \ |
728 | (__v2df)(__m128d)(Y), \ |
729 | (__v2di)(__m128i)(C), (I)) |
730 | |
731 | #define _mm256_permute2_pd(X, Y, C, I) \ |
732 | (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \ |
733 | (__v4df)(__m256d)(Y), \ |
734 | (__v4di)(__m256i)(C), (I)) |
735 | |
736 | #define _mm_permute2_ps(X, Y, C, I) \ |
737 | (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \ |
738 | (__v4si)(__m128i)(C), (I)) |
739 | |
740 | #define _mm256_permute2_ps(X, Y, C, I) \ |
741 | (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \ |
742 | (__v8sf)(__m256)(Y), \ |
743 | (__v8si)(__m256i)(C), (I)) |
744 | |
745 | static __inline__ __m128 __DEFAULT_FN_ATTRS |
746 | _mm_frcz_ss(__m128 __A) |
747 | { |
748 | return (__m128)__builtin_ia32_vfrczss((__v4sf)__A); |
749 | } |
750 | |
751 | static __inline__ __m128d __DEFAULT_FN_ATTRS |
752 | _mm_frcz_sd(__m128d __A) |
753 | { |
754 | return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A); |
755 | } |
756 | |
757 | static __inline__ __m128 __DEFAULT_FN_ATTRS |
758 | _mm_frcz_ps(__m128 __A) |
759 | { |
760 | return (__m128)__builtin_ia32_vfrczps((__v4sf)__A); |
761 | } |
762 | |
763 | static __inline__ __m128d __DEFAULT_FN_ATTRS |
764 | _mm_frcz_pd(__m128d __A) |
765 | { |
766 | return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); |
767 | } |
768 | |
769 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
770 | _mm256_frcz_ps(__m256 __A) |
771 | { |
772 | return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); |
773 | } |
774 | |
775 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
776 | _mm256_frcz_pd(__m256d __A) |
777 | { |
778 | return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); |
779 | } |
780 | |
781 | #undef __DEFAULT_FN_ATTRS |
782 | #undef __DEFAULT_FN_ATTRS256 |
783 | |
784 | #endif |
785 | |