1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | #ifndef __IMMINTRIN_H |
25 | #define __IMMINTRIN_H |
26 | |
27 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__) |
28 | #include <mmintrin.h> |
29 | #endif |
30 | |
31 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__) |
32 | #include <xmmintrin.h> |
33 | #endif |
34 | |
35 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__) |
36 | #include <emmintrin.h> |
37 | #endif |
38 | |
39 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__) |
40 | #include <pmmintrin.h> |
41 | #endif |
42 | |
43 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__) |
44 | #include <tmmintrin.h> |
45 | #endif |
46 | |
47 | #if !defined(_MSC_VER) || __has_feature(modules) || \ |
48 | (defined(__SSE4_2__) || defined(__SSE4_1__)) |
49 | #include <smmintrin.h> |
50 | #endif |
51 | |
52 | #if !defined(_MSC_VER) || __has_feature(modules) || \ |
53 | (defined(__AES__) || defined(__PCLMUL__)) |
54 | #include <wmmintrin.h> |
55 | #endif |
56 | |
57 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) |
58 | #include <clflushoptintrin.h> |
59 | #endif |
60 | |
61 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__) |
62 | #include <clwbintrin.h> |
63 | #endif |
64 | |
65 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) |
66 | #include <avxintrin.h> |
67 | #endif |
68 | |
69 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__) |
70 | #include <avx2intrin.h> |
71 | #endif |
72 | |
73 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__) |
74 | #include <f16cintrin.h> |
75 | #endif |
76 | |
77 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__) |
78 | #include <vpclmulqdqintrin.h> |
79 | #endif |
80 | |
81 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) |
82 | #include <bmiintrin.h> |
83 | #endif |
84 | |
85 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) |
86 | #include <bmi2intrin.h> |
87 | #endif |
88 | |
89 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__) |
90 | #include <lzcntintrin.h> |
91 | #endif |
92 | |
93 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__) |
94 | #include <popcntintrin.h> |
95 | #endif |
96 | |
97 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__) |
98 | #include <fmaintrin.h> |
99 | #endif |
100 | |
101 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__) |
102 | #include <avx512fintrin.h> |
103 | #endif |
104 | |
105 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__) |
106 | #include <avx512vlintrin.h> |
107 | #endif |
108 | |
109 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__) |
110 | #include <avx512bwintrin.h> |
111 | #endif |
112 | |
113 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__) |
114 | #include <avx512bitalgintrin.h> |
115 | #endif |
116 | |
117 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) |
118 | #include <avx512cdintrin.h> |
119 | #endif |
120 | |
121 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) |
122 | #include <avx512vpopcntdqintrin.h> |
123 | #endif |
124 | |
125 | #if !defined(_MSC_VER) || __has_feature(modules) || \ |
126 | (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) |
127 | #include <avx512vpopcntdqvlintrin.h> |
128 | #endif |
129 | |
130 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__) |
131 | #include <avx512vnniintrin.h> |
132 | #endif |
133 | |
134 | #if !defined(_MSC_VER) || __has_feature(modules) || \ |
135 | (defined(__AVX512VL__) && defined(__AVX512VNNI__)) |
136 | #include <avx512vlvnniintrin.h> |
137 | #endif |
138 | |
139 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) |
140 | #include <avx512dqintrin.h> |
141 | #endif |
142 | |
143 | #if !defined(_MSC_VER) || __has_feature(modules) || \ |
144 | (defined(__AVX512VL__) && defined(__AVX512BITALG__)) |
145 | #include <avx512vlbitalgintrin.h> |
146 | #endif |
147 | |
148 | #if !defined(_MSC_VER) || __has_feature(modules) || \ |
149 | (defined(__AVX512VL__) && defined(__AVX512BW__)) |
150 | #include <avx512vlbwintrin.h> |
151 | #endif |
152 | |
153 | #if !defined(_MSC_VER) || __has_feature(modules) || \ |
154 | (defined(__AVX512VL__) && defined(__AVX512CD__)) |
155 | #include <avx512vlcdintrin.h> |
156 | #endif |
157 | |
158 | #if !defined(_MSC_VER) || __has_feature(modules) || \ |
159 | (defined(__AVX512VL__) && defined(__AVX512DQ__)) |
160 | #include <avx512vldqintrin.h> |
161 | #endif |
162 | |
163 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__) |
164 | #include <avx512erintrin.h> |
165 | #endif |
166 | |
167 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__) |
168 | #include <avx512ifmaintrin.h> |
169 | #endif |
170 | |
171 | #if !defined(_MSC_VER) || __has_feature(modules) || \ |
172 | (defined(__AVX512IFMA__) && defined(__AVX512VL__)) |
173 | #include <avx512ifmavlintrin.h> |
174 | #endif |
175 | |
176 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__) |
177 | #include <avx512vbmiintrin.h> |
178 | #endif |
179 | |
180 | #if !defined(_MSC_VER) || __has_feature(modules) || \ |
181 | (defined(__AVX512VBMI__) && defined(__AVX512VL__)) |
182 | #include <avx512vbmivlintrin.h> |
183 | #endif |
184 | |
185 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__) |
186 | #include <avx512vbmi2intrin.h> |
187 | #endif |
188 | |
189 | #if !defined(_MSC_VER) || __has_feature(modules) || \ |
190 | (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) |
191 | #include <avx512vlvbmi2intrin.h> |
192 | #endif |
193 | |
194 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__) |
195 | #include <avx512pfintrin.h> |
196 | #endif |
197 | |
198 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__) |
199 | #include <pkuintrin.h> |
200 | #endif |
201 | |
202 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__) |
203 | #include <vaesintrin.h> |
204 | #endif |
205 | |
206 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__) |
207 | #include <gfniintrin.h> |
208 | #endif |
209 | |
210 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__) |
211 | |
212 | |
213 | |
214 | |
215 | |
216 | static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) |
217 | _rdpid_u32(void) { |
218 | return __builtin_ia32_rdpid(); |
219 | } |
220 | #endif |
221 | |
222 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) |
223 | static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) |
224 | _rdrand16_step(unsigned short *__p) |
225 | { |
226 | return __builtin_ia32_rdrand16_step(__p); |
227 | } |
228 | |
229 | static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) |
230 | _rdrand32_step(unsigned int *__p) |
231 | { |
232 | return __builtin_ia32_rdrand32_step(__p); |
233 | } |
234 | |
235 | #ifdef __x86_64__ |
236 | static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) |
237 | _rdrand64_step(unsigned long long *__p) |
238 | { |
239 | return __builtin_ia32_rdrand64_step(__p); |
240 | } |
241 | #endif |
242 | #endif |
243 | |
244 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__) |
245 | #ifdef __x86_64__ |
246 | static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
247 | _readfsbase_u32(void) |
248 | { |
249 | return __builtin_ia32_rdfsbase32(); |
250 | } |
251 | |
252 | static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
253 | _readfsbase_u64(void) |
254 | { |
255 | return __builtin_ia32_rdfsbase64(); |
256 | } |
257 | |
258 | static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
259 | _readgsbase_u32(void) |
260 | { |
261 | return __builtin_ia32_rdgsbase32(); |
262 | } |
263 | |
264 | static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
265 | _readgsbase_u64(void) |
266 | { |
267 | return __builtin_ia32_rdgsbase64(); |
268 | } |
269 | |
270 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
271 | _writefsbase_u32(unsigned int __V) |
272 | { |
273 | __builtin_ia32_wrfsbase32(__V); |
274 | } |
275 | |
276 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
277 | _writefsbase_u64(unsigned long long __V) |
278 | { |
279 | __builtin_ia32_wrfsbase64(__V); |
280 | } |
281 | |
282 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
283 | _writegsbase_u32(unsigned int __V) |
284 | { |
285 | __builtin_ia32_wrgsbase32(__V); |
286 | } |
287 | |
288 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
289 | _writegsbase_u64(unsigned long long __V) |
290 | { |
291 | __builtin_ia32_wrgsbase64(__V); |
292 | } |
293 | |
294 | #endif |
295 | #endif |
296 | |
297 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__) |
298 | |
299 | |
300 | |
301 | |
302 | |
303 | |
304 | |
305 | static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
306 | _loadbe_i16(void const * __P) { |
307 | struct __loadu_i16 { |
308 | short __v; |
309 | } __attribute__((__packed__, __may_alias__)); |
310 | return __builtin_bswap16(((struct __loadu_i16*)__P)->__v); |
311 | } |
312 | |
313 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
314 | _storebe_i16(void * __P, short __D) { |
315 | struct __storeu_i16 { |
316 | short __v; |
317 | } __attribute__((__packed__, __may_alias__)); |
318 | ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D); |
319 | } |
320 | |
321 | static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
322 | _loadbe_i32(void const * __P) { |
323 | struct __loadu_i32 { |
324 | int __v; |
325 | } __attribute__((__packed__, __may_alias__)); |
326 | return __builtin_bswap32(((struct __loadu_i32*)__P)->__v); |
327 | } |
328 | |
329 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
330 | _storebe_i32(void * __P, int __D) { |
331 | struct __storeu_i32 { |
332 | int __v; |
333 | } __attribute__((__packed__, __may_alias__)); |
334 | ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D); |
335 | } |
336 | |
337 | #ifdef __x86_64__ |
338 | static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
339 | _loadbe_i64(void const * __P) { |
340 | struct __loadu_i64 { |
341 | long long __v; |
342 | } __attribute__((__packed__, __may_alias__)); |
343 | return __builtin_bswap64(((struct __loadu_i64*)__P)->__v); |
344 | } |
345 | |
346 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
347 | _storebe_i64(void * __P, long long __D) { |
348 | struct __storeu_i64 { |
349 | long long __v; |
350 | } __attribute__((__packed__, __may_alias__)); |
351 | ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D); |
352 | } |
353 | #endif |
354 | #endif |
355 | |
356 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) |
357 | #include <rtmintrin.h> |
358 | #include <xtestintrin.h> |
359 | #endif |
360 | |
361 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__) |
362 | #include <shaintrin.h> |
363 | #endif |
364 | |
365 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__) |
366 | #include <fxsrintrin.h> |
367 | #endif |
368 | |
369 | |
370 | #include <xsaveintrin.h> |
371 | |
372 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__) |
373 | #include <xsaveoptintrin.h> |
374 | #endif |
375 | |
376 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__) |
377 | #include <xsavecintrin.h> |
378 | #endif |
379 | |
380 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__) |
381 | #include <xsavesintrin.h> |
382 | #endif |
383 | |
384 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__) |
385 | #include <cetintrin.h> |
386 | #endif |
387 | |
388 | |
389 | |
390 | #include <adxintrin.h> |
391 | |
392 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__) |
393 | #include <rdseedintrin.h> |
394 | #endif |
395 | |
396 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__) |
397 | #include <wbnoinvdintrin.h> |
398 | #endif |
399 | |
400 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__) |
401 | #include <cldemoteintrin.h> |
402 | #endif |
403 | |
404 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__) |
405 | #include <waitpkgintrin.h> |
406 | #endif |
407 | |
408 | #if !defined(_MSC_VER) || __has_feature(modules) || \ |
409 | defined(__MOVDIRI__) || defined(__MOVDIR64B__) |
410 | #include <movdirintrin.h> |
411 | #endif |
412 | |
413 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__) |
414 | #include <pconfigintrin.h> |
415 | #endif |
416 | |
417 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__) |
418 | #include <sgxintrin.h> |
419 | #endif |
420 | |
421 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__) |
422 | #include <ptwriteintrin.h> |
423 | #endif |
424 | |
425 | #if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__) |
426 | #include <invpcidintrin.h> |
427 | #endif |
428 | |
429 | #ifdef _MSC_VER |
430 | |
431 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) |
432 | #ifdef __cplusplus |
433 | extern "C" { |
434 | #endif |
435 | |
436 | |
437 | |
438 | #if defined(__i386__) || defined(__x86_64__) |
439 | static __inline__ long __DEFAULT_FN_ATTRS |
440 | _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { |
441 | __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" |
442 | : "+r" (_Value), "+m" (*_Target) :: "memory"); |
443 | return _Value; |
444 | } |
445 | static __inline__ long __DEFAULT_FN_ATTRS |
446 | _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { |
447 | __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" |
448 | : "+r" (_Value), "+m" (*_Target) :: "memory"); |
449 | return _Value; |
450 | } |
451 | #endif |
452 | #if defined(__x86_64__) |
453 | static __inline__ __int64 __DEFAULT_FN_ATTRS |
454 | _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { |
455 | __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" |
456 | : "+r" (_Value), "+m" (*_Target) :: "memory"); |
457 | return _Value; |
458 | } |
459 | static __inline__ __int64 __DEFAULT_FN_ATTRS |
460 | _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { |
461 | __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" |
462 | : "+r" (_Value), "+m" (*_Target) :: "memory"); |
463 | return _Value; |
464 | } |
465 | #endif |
466 | |
467 | |
468 | |
469 | #if defined(__i386__) || defined(__x86_64__) |
470 | static __inline__ long __DEFAULT_FN_ATTRS |
471 | _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, |
472 | long _Exchange, long _Comparand) { |
473 | __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" |
474 | : "+a" (_Comparand), "+m" (*_Destination) |
475 | : "r" (_Exchange) : "memory"); |
476 | return _Comparand; |
477 | } |
478 | static __inline__ long __DEFAULT_FN_ATTRS |
479 | _InterlockedCompareExchange_HLERelease(long volatile *_Destination, |
480 | long _Exchange, long _Comparand) { |
481 | __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" |
482 | : "+a" (_Comparand), "+m" (*_Destination) |
483 | : "r" (_Exchange) : "memory"); |
484 | return _Comparand; |
485 | } |
486 | #endif |
487 | #if defined(__x86_64__) |
488 | static __inline__ __int64 __DEFAULT_FN_ATTRS |
489 | _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, |
490 | __int64 _Exchange, __int64 _Comparand) { |
491 | __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" |
492 | : "+a" (_Comparand), "+m" (*_Destination) |
493 | : "r" (_Exchange) : "memory"); |
494 | return _Comparand; |
495 | } |
496 | static __inline__ __int64 __DEFAULT_FN_ATTRS |
497 | _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, |
498 | __int64 _Exchange, __int64 _Comparand) { |
499 | __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" |
500 | : "+a" (_Comparand), "+m" (*_Destination) |
501 | : "r" (_Exchange) : "memory"); |
502 | return _Comparand; |
503 | } |
504 | #endif |
505 | #ifdef __cplusplus |
506 | } |
507 | #endif |
508 | |
509 | #undef __DEFAULT_FN_ATTRS |
510 | |
511 | #endif |
512 | |
513 | #endif |
514 | |