1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | #ifndef __IMMINTRIN_H |
25 | #error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead." |
26 | #endif |
27 | |
28 | #ifndef __VBMIVLINTRIN_H |
29 | #define __VBMIVLINTRIN_H |
30 | |
31 | |
32 | #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128))) |
33 | #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256))) |
34 | |
35 | |
36 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
37 | _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) |
38 | { |
39 | return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A, |
40 | (__v16qi)__I, |
41 | (__v16qi)__B); |
42 | } |
43 | |
44 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
45 | _mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I, |
46 | __m128i __B) |
47 | { |
48 | return (__m128i)__builtin_ia32_selectb_128(__U, |
49 | (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), |
50 | (__v16qi)__A); |
51 | } |
52 | |
53 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
54 | _mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U, |
55 | __m128i __B) |
56 | { |
57 | return (__m128i)__builtin_ia32_selectb_128(__U, |
58 | (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), |
59 | (__v16qi)__I); |
60 | } |
61 | |
62 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
63 | _mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I, |
64 | __m128i __B) |
65 | { |
66 | return (__m128i)__builtin_ia32_selectb_128(__U, |
67 | (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), |
68 | (__v16qi)_mm_setzero_si128()); |
69 | } |
70 | |
71 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
72 | _mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) |
73 | { |
74 | return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I, |
75 | (__v32qi)__B); |
76 | } |
77 | |
78 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
79 | _mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I, |
80 | __m256i __B) |
81 | { |
82 | return (__m256i)__builtin_ia32_selectb_256(__U, |
83 | (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), |
84 | (__v32qi)__A); |
85 | } |
86 | |
87 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
88 | _mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U, |
89 | __m256i __B) |
90 | { |
91 | return (__m256i)__builtin_ia32_selectb_256(__U, |
92 | (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), |
93 | (__v32qi)__I); |
94 | } |
95 | |
96 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
97 | _mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, |
98 | __m256i __B) |
99 | { |
100 | return (__m256i)__builtin_ia32_selectb_256(__U, |
101 | (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), |
102 | (__v32qi)_mm256_setzero_si256()); |
103 | } |
104 | |
105 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
106 | _mm_permutexvar_epi8 (__m128i __A, __m128i __B) |
107 | { |
108 | return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A); |
109 | } |
110 | |
111 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
112 | _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) |
113 | { |
114 | return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, |
115 | (__v16qi)_mm_permutexvar_epi8(__A, __B), |
116 | (__v16qi)_mm_setzero_si128()); |
117 | } |
118 | |
119 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
120 | _mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, |
121 | __m128i __B) |
122 | { |
123 | return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, |
124 | (__v16qi)_mm_permutexvar_epi8(__A, __B), |
125 | (__v16qi)__W); |
126 | } |
127 | |
128 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
129 | _mm256_permutexvar_epi8 (__m256i __A, __m256i __B) |
130 | { |
131 | return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A); |
132 | } |
133 | |
134 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
135 | _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, |
136 | __m256i __B) |
137 | { |
138 | return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, |
139 | (__v32qi)_mm256_permutexvar_epi8(__A, __B), |
140 | (__v32qi)_mm256_setzero_si256()); |
141 | } |
142 | |
143 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
144 | _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, |
145 | __m256i __B) |
146 | { |
147 | return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, |
148 | (__v32qi)_mm256_permutexvar_epi8(__A, __B), |
149 | (__v32qi)__W); |
150 | } |
151 | |
152 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
153 | _mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) |
154 | { |
155 | return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y); |
156 | } |
157 | |
158 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
159 | _mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X, |
160 | __m128i __Y) |
161 | { |
162 | return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, |
163 | (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), |
164 | (__v16qi)__W); |
165 | } |
166 | |
167 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
168 | _mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y) |
169 | { |
170 | return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, |
171 | (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), |
172 | (__v16qi)_mm_setzero_si128()); |
173 | } |
174 | |
175 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
176 | _mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) |
177 | { |
178 | return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y); |
179 | } |
180 | |
181 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
182 | _mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X, |
183 | __m256i __Y) |
184 | { |
185 | return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, |
186 | (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), |
187 | (__v32qi)__W); |
188 | } |
189 | |
190 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
191 | _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) |
192 | { |
193 | return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, |
194 | (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), |
195 | (__v32qi)_mm256_setzero_si256()); |
196 | } |
197 | |
198 | |
199 | #undef __DEFAULT_FN_ATTRS128 |
200 | #undef __DEFAULT_FN_ATTRS256 |
201 | |
202 | #endif |
203 | |