1 | /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== |
---|---|
2 | * |
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
4 | * of this software and associated documentation files (the "Software"), to deal |
5 | * in the Software without restriction, including without limitation the rights |
6 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
7 | * copies of the Software, and to permit persons to whom the Software is |
8 | * furnished to do so, subject to the following conditions: |
9 | * |
10 | * The above copyright notice and this permission notice shall be included in |
11 | * all copies or substantial portions of the Software. |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
19 | * THE SOFTWARE. |
20 | * |
21 | *===-----------------------------------------------------------------------=== |
22 | */ |
23 | |
24 | #ifndef __MMINTRIN_H |
25 | #define __MMINTRIN_H |
26 | |
27 | typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8))); |
28 | |
29 | typedef long long __v1di __attribute__((__vector_size__(8))); |
30 | typedef int __v2si __attribute__((__vector_size__(8))); |
31 | typedef short __v4hi __attribute__((__vector_size__(8))); |
32 | typedef char __v8qi __attribute__((__vector_size__(8))); |
33 | |
34 | /* Define the default attributes for the functions in this file. */ |
35 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64))) |
36 | |
37 | /// Clears the MMX state by setting the state of the x87 stack registers |
38 | /// to empty. |
39 | /// |
40 | /// \headerfile <x86intrin.h> |
41 | /// |
42 | /// This intrinsic corresponds to the <c> EMMS </c> instruction. |
43 | /// |
44 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) |
45 | _mm_empty(void) |
46 | { |
47 | __builtin_ia32_emms(); |
48 | } |
49 | |
50 | /// Constructs a 64-bit integer vector, setting the lower 32 bits to the |
51 | /// value of the 32-bit integer parameter and setting the upper 32 bits to 0. |
52 | /// |
53 | /// \headerfile <x86intrin.h> |
54 | /// |
55 | /// This intrinsic corresponds to the <c> MOVD </c> instruction. |
56 | /// |
57 | /// \param __i |
58 | /// A 32-bit integer value. |
59 | /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the |
60 | /// parameter. The upper 32 bits are set to 0. |
61 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
62 | _mm_cvtsi32_si64(int __i) |
63 | { |
64 | return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); |
65 | } |
66 | |
67 | /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit |
68 | /// signed integer. |
69 | /// |
70 | /// \headerfile <x86intrin.h> |
71 | /// |
72 | /// This intrinsic corresponds to the <c> MOVD </c> instruction. |
73 | /// |
74 | /// \param __m |
75 | /// A 64-bit integer vector. |
76 | /// \returns A 32-bit signed integer value containing the lower 32 bits of the |
77 | /// parameter. |
78 | static __inline__ int __DEFAULT_FN_ATTRS |
79 | _mm_cvtsi64_si32(__m64 __m) |
80 | { |
81 | return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); |
82 | } |
83 | |
84 | /// Casts a 64-bit signed integer value into a 64-bit integer vector. |
85 | /// |
86 | /// \headerfile <x86intrin.h> |
87 | /// |
88 | /// This intrinsic corresponds to the <c> MOVQ </c> instruction. |
89 | /// |
90 | /// \param __i |
91 | /// A 64-bit signed integer. |
92 | /// \returns A 64-bit integer vector containing the same bitwise pattern as the |
93 | /// parameter. |
94 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
95 | _mm_cvtsi64_m64(long long __i) |
96 | { |
97 | return (__m64)__i; |
98 | } |
99 | |
100 | /// Casts a 64-bit integer vector into a 64-bit signed integer value. |
101 | /// |
102 | /// \headerfile <x86intrin.h> |
103 | /// |
104 | /// This intrinsic corresponds to the <c> MOVQ </c> instruction. |
105 | /// |
106 | /// \param __m |
107 | /// A 64-bit integer vector. |
108 | /// \returns A 64-bit signed integer containing the same bitwise pattern as the |
109 | /// parameter. |
110 | static __inline__ long long __DEFAULT_FN_ATTRS |
111 | _mm_cvtm64_si64(__m64 __m) |
112 | { |
113 | return (long long)__m; |
114 | } |
115 | |
116 | /// Converts 16-bit signed integers from both 64-bit integer vector |
117 | /// parameters of [4 x i16] into 8-bit signed integer values, and constructs |
118 | /// a 64-bit integer vector of [8 x i8] as the result. Positive values |
119 | /// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80 |
120 | /// are saturated to 0x80. |
121 | /// |
122 | /// \headerfile <x86intrin.h> |
123 | /// |
124 | /// This intrinsic corresponds to the <c> PACKSSWB </c> instruction. |
125 | /// |
126 | /// \param __m1 |
127 | /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a |
128 | /// 16-bit signed integer and is converted to an 8-bit signed integer with |
129 | /// saturation. Positive values greater than 0x7F are saturated to 0x7F. |
130 | /// Negative values less than 0x80 are saturated to 0x80. The converted |
131 | /// [4 x i8] values are written to the lower 32 bits of the result. |
132 | /// \param __m2 |
133 | /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a |
134 | /// 16-bit signed integer and is converted to an 8-bit signed integer with |
135 | /// saturation. Positive values greater than 0x7F are saturated to 0x7F. |
136 | /// Negative values less than 0x80 are saturated to 0x80. The converted |
137 | /// [4 x i8] values are written to the upper 32 bits of the result. |
138 | /// \returns A 64-bit integer vector of [8 x i8] containing the converted |
139 | /// values. |
140 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
141 | _mm_packs_pi16(__m64 __m1, __m64 __m2) |
142 | { |
143 | return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); |
144 | } |
145 | |
146 | /// Converts 32-bit signed integers from both 64-bit integer vector |
147 | /// parameters of [2 x i32] into 16-bit signed integer values, and constructs |
148 | /// a 64-bit integer vector of [4 x i16] as the result. Positive values |
149 | /// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than |
150 | /// 0x8000 are saturated to 0x8000. |
151 | /// |
152 | /// \headerfile <x86intrin.h> |
153 | /// |
154 | /// This intrinsic corresponds to the <c> PACKSSDW </c> instruction. |
155 | /// |
156 | /// \param __m1 |
157 | /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a |
158 | /// 32-bit signed integer and is converted to a 16-bit signed integer with |
159 | /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. |
160 | /// Negative values less than 0x8000 are saturated to 0x8000. The converted |
161 | /// [2 x i16] values are written to the lower 32 bits of the result. |
162 | /// \param __m2 |
163 | /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a |
164 | /// 32-bit signed integer and is converted to a 16-bit signed integer with |
165 | /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. |
166 | /// Negative values less than 0x8000 are saturated to 0x8000. The converted |
167 | /// [2 x i16] values are written to the upper 32 bits of the result. |
168 | /// \returns A 64-bit integer vector of [4 x i16] containing the converted |
169 | /// values. |
170 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
171 | _mm_packs_pi32(__m64 __m1, __m64 __m2) |
172 | { |
173 | return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); |
174 | } |
175 | |
176 | /// Converts 16-bit signed integers from both 64-bit integer vector |
177 | /// parameters of [4 x i16] into 8-bit unsigned integer values, and |
178 | /// constructs a 64-bit integer vector of [8 x i8] as the result. Values |
179 | /// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated |
180 | /// to 0. |
181 | /// |
182 | /// \headerfile <x86intrin.h> |
183 | /// |
184 | /// This intrinsic corresponds to the <c> PACKUSWB </c> instruction. |
185 | /// |
186 | /// \param __m1 |
187 | /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a |
188 | /// 16-bit signed integer and is converted to an 8-bit unsigned integer with |
189 | /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less |
190 | /// than 0 are saturated to 0. The converted [4 x i8] values are written to |
191 | /// the lower 32 bits of the result. |
192 | /// \param __m2 |
193 | /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a |
194 | /// 16-bit signed integer and is converted to an 8-bit unsigned integer with |
195 | /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less |
196 | /// than 0 are saturated to 0. The converted [4 x i8] values are written to |
197 | /// the upper 32 bits of the result. |
198 | /// \returns A 64-bit integer vector of [8 x i8] containing the converted |
199 | /// values. |
200 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
201 | _mm_packs_pu16(__m64 __m1, __m64 __m2) |
202 | { |
203 | return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); |
204 | } |
205 | |
206 | /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] |
207 | /// and interleaves them into a 64-bit integer vector of [8 x i8]. |
208 | /// |
209 | /// \headerfile <x86intrin.h> |
210 | /// |
211 | /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction. |
212 | /// |
213 | /// \param __m1 |
214 | /// A 64-bit integer vector of [8 x i8]. \n |
215 | /// Bits [39:32] are written to bits [7:0] of the result. \n |
216 | /// Bits [47:40] are written to bits [23:16] of the result. \n |
217 | /// Bits [55:48] are written to bits [39:32] of the result. \n |
218 | /// Bits [63:56] are written to bits [55:48] of the result. |
219 | /// \param __m2 |
220 | /// A 64-bit integer vector of [8 x i8]. |
221 | /// Bits [39:32] are written to bits [15:8] of the result. \n |
222 | /// Bits [47:40] are written to bits [31:24] of the result. \n |
223 | /// Bits [55:48] are written to bits [47:40] of the result. \n |
224 | /// Bits [63:56] are written to bits [63:56] of the result. |
225 | /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved |
226 | /// values. |
227 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
228 | _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) |
229 | { |
230 | return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); |
231 | } |
232 | |
233 | /// Unpacks the upper 32 bits from two 64-bit integer vectors of |
234 | /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. |
235 | /// |
236 | /// \headerfile <x86intrin.h> |
237 | /// |
238 | /// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction. |
239 | /// |
240 | /// \param __m1 |
241 | /// A 64-bit integer vector of [4 x i16]. |
242 | /// Bits [47:32] are written to bits [15:0] of the result. \n |
243 | /// Bits [63:48] are written to bits [47:32] of the result. |
244 | /// \param __m2 |
245 | /// A 64-bit integer vector of [4 x i16]. |
246 | /// Bits [47:32] are written to bits [31:16] of the result. \n |
247 | /// Bits [63:48] are written to bits [63:48] of the result. |
248 | /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved |
249 | /// values. |
250 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
251 | _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) |
252 | { |
253 | return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); |
254 | } |
255 | |
256 | /// Unpacks the upper 32 bits from two 64-bit integer vectors of |
257 | /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. |
258 | /// |
259 | /// \headerfile <x86intrin.h> |
260 | /// |
261 | /// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction. |
262 | /// |
263 | /// \param __m1 |
264 | /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to |
265 | /// the lower 32 bits of the result. |
266 | /// \param __m2 |
267 | /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to |
268 | /// the upper 32 bits of the result. |
269 | /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved |
270 | /// values. |
271 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
272 | _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) |
273 | { |
274 | return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); |
275 | } |
276 | |
277 | /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] |
278 | /// and interleaves them into a 64-bit integer vector of [8 x i8]. |
279 | /// |
280 | /// \headerfile <x86intrin.h> |
281 | /// |
282 | /// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction. |
283 | /// |
284 | /// \param __m1 |
285 | /// A 64-bit integer vector of [8 x i8]. |
286 | /// Bits [7:0] are written to bits [7:0] of the result. \n |
287 | /// Bits [15:8] are written to bits [23:16] of the result. \n |
288 | /// Bits [23:16] are written to bits [39:32] of the result. \n |
289 | /// Bits [31:24] are written to bits [55:48] of the result. |
290 | /// \param __m2 |
291 | /// A 64-bit integer vector of [8 x i8]. |
292 | /// Bits [7:0] are written to bits [15:8] of the result. \n |
293 | /// Bits [15:8] are written to bits [31:24] of the result. \n |
294 | /// Bits [23:16] are written to bits [47:40] of the result. \n |
295 | /// Bits [31:24] are written to bits [63:56] of the result. |
296 | /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved |
297 | /// values. |
298 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
299 | _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) |
300 | { |
301 | return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); |
302 | } |
303 | |
304 | /// Unpacks the lower 32 bits from two 64-bit integer vectors of |
305 | /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. |
306 | /// |
307 | /// \headerfile <x86intrin.h> |
308 | /// |
309 | /// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction. |
310 | /// |
311 | /// \param __m1 |
312 | /// A 64-bit integer vector of [4 x i16]. |
313 | /// Bits [15:0] are written to bits [15:0] of the result. \n |
314 | /// Bits [31:16] are written to bits [47:32] of the result. |
315 | /// \param __m2 |
316 | /// A 64-bit integer vector of [4 x i16]. |
317 | /// Bits [15:0] are written to bits [31:16] of the result. \n |
318 | /// Bits [31:16] are written to bits [63:48] of the result. |
319 | /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved |
320 | /// values. |
321 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
322 | _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) |
323 | { |
324 | return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); |
325 | } |
326 | |
327 | /// Unpacks the lower 32 bits from two 64-bit integer vectors of |
328 | /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. |
329 | /// |
330 | /// \headerfile <x86intrin.h> |
331 | /// |
332 | /// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction. |
333 | /// |
334 | /// \param __m1 |
335 | /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to |
336 | /// the lower 32 bits of the result. |
337 | /// \param __m2 |
338 | /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to |
339 | /// the upper 32 bits of the result. |
340 | /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved |
341 | /// values. |
342 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
343 | _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) |
344 | { |
345 | return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); |
346 | } |
347 | |
348 | /// Adds each 8-bit integer element of the first 64-bit integer vector |
349 | /// of [8 x i8] to the corresponding 8-bit integer element of the second |
350 | /// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are |
351 | /// packed into a 64-bit integer vector of [8 x i8]. |
352 | /// |
353 | /// \headerfile <x86intrin.h> |
354 | /// |
355 | /// This intrinsic corresponds to the <c> PADDB </c> instruction. |
356 | /// |
357 | /// \param __m1 |
358 | /// A 64-bit integer vector of [8 x i8]. |
359 | /// \param __m2 |
360 | /// A 64-bit integer vector of [8 x i8]. |
361 | /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both |
362 | /// parameters. |
363 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
364 | _mm_add_pi8(__m64 __m1, __m64 __m2) |
365 | { |
366 | return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); |
367 | } |
368 | |
369 | /// Adds each 16-bit integer element of the first 64-bit integer vector |
370 | /// of [4 x i16] to the corresponding 16-bit integer element of the second |
371 | /// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are |
372 | /// packed into a 64-bit integer vector of [4 x i16]. |
373 | /// |
374 | /// \headerfile <x86intrin.h> |
375 | /// |
376 | /// This intrinsic corresponds to the <c> PADDW </c> instruction. |
377 | /// |
378 | /// \param __m1 |
379 | /// A 64-bit integer vector of [4 x i16]. |
380 | /// \param __m2 |
381 | /// A 64-bit integer vector of [4 x i16]. |
382 | /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both |
383 | /// parameters. |
384 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
385 | _mm_add_pi16(__m64 __m1, __m64 __m2) |
386 | { |
387 | return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); |
388 | } |
389 | |
390 | /// Adds each 32-bit integer element of the first 64-bit integer vector |
391 | /// of [2 x i32] to the corresponding 32-bit integer element of the second |
392 | /// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are |
393 | /// packed into a 64-bit integer vector of [2 x i32]. |
394 | /// |
395 | /// \headerfile <x86intrin.h> |
396 | /// |
397 | /// This intrinsic corresponds to the <c> PADDD </c> instruction. |
398 | /// |
399 | /// \param __m1 |
400 | /// A 64-bit integer vector of [2 x i32]. |
401 | /// \param __m2 |
402 | /// A 64-bit integer vector of [2 x i32]. |
403 | /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both |
404 | /// parameters. |
405 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
406 | _mm_add_pi32(__m64 __m1, __m64 __m2) |
407 | { |
408 | return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); |
409 | } |
410 | |
411 | /// Adds each 8-bit signed integer element of the first 64-bit integer |
412 | /// vector of [8 x i8] to the corresponding 8-bit signed integer element of |
413 | /// the second 64-bit integer vector of [8 x i8]. Positive sums greater than |
414 | /// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to |
415 | /// 0x80. The results are packed into a 64-bit integer vector of [8 x i8]. |
416 | /// |
417 | /// \headerfile <x86intrin.h> |
418 | /// |
419 | /// This intrinsic corresponds to the <c> PADDSB </c> instruction. |
420 | /// |
421 | /// \param __m1 |
422 | /// A 64-bit integer vector of [8 x i8]. |
423 | /// \param __m2 |
424 | /// A 64-bit integer vector of [8 x i8]. |
425 | /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums |
426 | /// of both parameters. |
427 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
428 | _mm_adds_pi8(__m64 __m1, __m64 __m2) |
429 | { |
430 | return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); |
431 | } |
432 | |
433 | /// Adds each 16-bit signed integer element of the first 64-bit integer |
434 | /// vector of [4 x i16] to the corresponding 16-bit signed integer element of |
435 | /// the second 64-bit integer vector of [4 x i16]. Positive sums greater than |
436 | /// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are |
437 | /// saturated to 0x8000. The results are packed into a 64-bit integer vector |
438 | /// of [4 x i16]. |
439 | /// |
440 | /// \headerfile <x86intrin.h> |
441 | /// |
442 | /// This intrinsic corresponds to the <c> PADDSW </c> instruction. |
443 | /// |
444 | /// \param __m1 |
445 | /// A 64-bit integer vector of [4 x i16]. |
446 | /// \param __m2 |
447 | /// A 64-bit integer vector of [4 x i16]. |
448 | /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums |
449 | /// of both parameters. |
450 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
451 | _mm_adds_pi16(__m64 __m1, __m64 __m2) |
452 | { |
453 | return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); |
454 | } |
455 | |
456 | /// Adds each 8-bit unsigned integer element of the first 64-bit integer |
457 | /// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of |
458 | /// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are |
459 | /// saturated to 0xFF. The results are packed into a 64-bit integer vector of |
460 | /// [8 x i8]. |
461 | /// |
462 | /// \headerfile <x86intrin.h> |
463 | /// |
464 | /// This intrinsic corresponds to the <c> PADDUSB </c> instruction. |
465 | /// |
466 | /// \param __m1 |
467 | /// A 64-bit integer vector of [8 x i8]. |
468 | /// \param __m2 |
469 | /// A 64-bit integer vector of [8 x i8]. |
470 | /// \returns A 64-bit integer vector of [8 x i8] containing the saturated |
471 | /// unsigned sums of both parameters. |
472 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
473 | _mm_adds_pu8(__m64 __m1, __m64 __m2) |
474 | { |
475 | return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); |
476 | } |
477 | |
478 | /// Adds each 16-bit unsigned integer element of the first 64-bit integer |
479 | /// vector of [4 x i16] to the corresponding 16-bit unsigned integer element |
480 | /// of the second 64-bit integer vector of [4 x i16]. Sums greater than |
481 | /// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit |
482 | /// integer vector of [4 x i16]. |
483 | /// |
484 | /// \headerfile <x86intrin.h> |
485 | /// |
486 | /// This intrinsic corresponds to the <c> PADDUSW </c> instruction. |
487 | /// |
488 | /// \param __m1 |
489 | /// A 64-bit integer vector of [4 x i16]. |
490 | /// \param __m2 |
491 | /// A 64-bit integer vector of [4 x i16]. |
492 | /// \returns A 64-bit integer vector of [4 x i16] containing the saturated |
493 | /// unsigned sums of both parameters. |
494 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
495 | _mm_adds_pu16(__m64 __m1, __m64 __m2) |
496 | { |
497 | return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); |
498 | } |
499 | |
500 | /// Subtracts each 8-bit integer element of the second 64-bit integer |
501 | /// vector of [8 x i8] from the corresponding 8-bit integer element of the |
502 | /// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results |
503 | /// are packed into a 64-bit integer vector of [8 x i8]. |
504 | /// |
505 | /// \headerfile <x86intrin.h> |
506 | /// |
507 | /// This intrinsic corresponds to the <c> PSUBB </c> instruction. |
508 | /// |
509 | /// \param __m1 |
510 | /// A 64-bit integer vector of [8 x i8] containing the minuends. |
511 | /// \param __m2 |
512 | /// A 64-bit integer vector of [8 x i8] containing the subtrahends. |
513 | /// \returns A 64-bit integer vector of [8 x i8] containing the differences of |
514 | /// both parameters. |
515 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
516 | _mm_sub_pi8(__m64 __m1, __m64 __m2) |
517 | { |
518 | return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); |
519 | } |
520 | |
521 | /// Subtracts each 16-bit integer element of the second 64-bit integer |
522 | /// vector of [4 x i16] from the corresponding 16-bit integer element of the |
523 | /// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the |
524 | /// results are packed into a 64-bit integer vector of [4 x i16]. |
525 | /// |
526 | /// \headerfile <x86intrin.h> |
527 | /// |
528 | /// This intrinsic corresponds to the <c> PSUBW </c> instruction. |
529 | /// |
530 | /// \param __m1 |
531 | /// A 64-bit integer vector of [4 x i16] containing the minuends. |
532 | /// \param __m2 |
533 | /// A 64-bit integer vector of [4 x i16] containing the subtrahends. |
534 | /// \returns A 64-bit integer vector of [4 x i16] containing the differences of |
535 | /// both parameters. |
536 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
537 | _mm_sub_pi16(__m64 __m1, __m64 __m2) |
538 | { |
539 | return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); |
540 | } |
541 | |
542 | /// Subtracts each 32-bit integer element of the second 64-bit integer |
543 | /// vector of [2 x i32] from the corresponding 32-bit integer element of the |
544 | /// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the |
545 | /// results are packed into a 64-bit integer vector of [2 x i32]. |
546 | /// |
547 | /// \headerfile <x86intrin.h> |
548 | /// |
549 | /// This intrinsic corresponds to the <c> PSUBD </c> instruction. |
550 | /// |
551 | /// \param __m1 |
552 | /// A 64-bit integer vector of [2 x i32] containing the minuends. |
553 | /// \param __m2 |
554 | /// A 64-bit integer vector of [2 x i32] containing the subtrahends. |
555 | /// \returns A 64-bit integer vector of [2 x i32] containing the differences of |
556 | /// both parameters. |
557 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
558 | _mm_sub_pi32(__m64 __m1, __m64 __m2) |
559 | { |
560 | return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); |
561 | } |
562 | |
563 | /// Subtracts each 8-bit signed integer element of the second 64-bit |
564 | /// integer vector of [8 x i8] from the corresponding 8-bit signed integer |
565 | /// element of the first 64-bit integer vector of [8 x i8]. Positive results |
566 | /// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80 |
567 | /// are saturated to 0x80. The results are packed into a 64-bit integer |
568 | /// vector of [8 x i8]. |
569 | /// |
570 | /// \headerfile <x86intrin.h> |
571 | /// |
572 | /// This intrinsic corresponds to the <c> PSUBSB </c> instruction. |
573 | /// |
574 | /// \param __m1 |
575 | /// A 64-bit integer vector of [8 x i8] containing the minuends. |
576 | /// \param __m2 |
577 | /// A 64-bit integer vector of [8 x i8] containing the subtrahends. |
578 | /// \returns A 64-bit integer vector of [8 x i8] containing the saturated |
579 | /// differences of both parameters. |
580 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
581 | _mm_subs_pi8(__m64 __m1, __m64 __m2) |
582 | { |
583 | return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); |
584 | } |
585 | |
586 | /// Subtracts each 16-bit signed integer element of the second 64-bit |
587 | /// integer vector of [4 x i16] from the corresponding 16-bit signed integer |
588 | /// element of the first 64-bit integer vector of [4 x i16]. Positive results |
589 | /// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than |
590 | /// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit |
591 | /// integer vector of [4 x i16]. |
592 | /// |
593 | /// \headerfile <x86intrin.h> |
594 | /// |
595 | /// This intrinsic corresponds to the <c> PSUBSW </c> instruction. |
596 | /// |
597 | /// \param __m1 |
598 | /// A 64-bit integer vector of [4 x i16] containing the minuends. |
599 | /// \param __m2 |
600 | /// A 64-bit integer vector of [4 x i16] containing the subtrahends. |
601 | /// \returns A 64-bit integer vector of [4 x i16] containing the saturated |
602 | /// differences of both parameters. |
603 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
604 | _mm_subs_pi16(__m64 __m1, __m64 __m2) |
605 | { |
606 | return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); |
607 | } |
608 | |
609 | /// Subtracts each 8-bit unsigned integer element of the second 64-bit |
610 | /// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer |
611 | /// element of the first 64-bit integer vector of [8 x i8]. |
612 | /// |
613 | /// If an element of the first vector is less than the corresponding element |
614 | /// of the second vector, the result is saturated to 0. The results are |
615 | /// packed into a 64-bit integer vector of [8 x i8]. |
616 | /// |
617 | /// \headerfile <x86intrin.h> |
618 | /// |
619 | /// This intrinsic corresponds to the <c> PSUBUSB </c> instruction. |
620 | /// |
621 | /// \param __m1 |
622 | /// A 64-bit integer vector of [8 x i8] containing the minuends. |
623 | /// \param __m2 |
624 | /// A 64-bit integer vector of [8 x i8] containing the subtrahends. |
625 | /// \returns A 64-bit integer vector of [8 x i8] containing the saturated |
626 | /// differences of both parameters. |
627 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
628 | _mm_subs_pu8(__m64 __m1, __m64 __m2) |
629 | { |
630 | return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); |
631 | } |
632 | |
633 | /// Subtracts each 16-bit unsigned integer element of the second 64-bit |
634 | /// integer vector of [4 x i16] from the corresponding 16-bit unsigned |
635 | /// integer element of the first 64-bit integer vector of [4 x i16]. |
636 | /// |
637 | /// If an element of the first vector is less than the corresponding element |
638 | /// of the second vector, the result is saturated to 0. The results are |
639 | /// packed into a 64-bit integer vector of [4 x i16]. |
640 | /// |
641 | /// \headerfile <x86intrin.h> |
642 | /// |
643 | /// This intrinsic corresponds to the <c> PSUBUSW </c> instruction. |
644 | /// |
645 | /// \param __m1 |
646 | /// A 64-bit integer vector of [4 x i16] containing the minuends. |
647 | /// \param __m2 |
648 | /// A 64-bit integer vector of [4 x i16] containing the subtrahends. |
649 | /// \returns A 64-bit integer vector of [4 x i16] containing the saturated |
650 | /// differences of both parameters. |
651 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
652 | _mm_subs_pu16(__m64 __m1, __m64 __m2) |
653 | { |
654 | return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); |
655 | } |
656 | |
657 | /// Multiplies each 16-bit signed integer element of the first 64-bit |
658 | /// integer vector of [4 x i16] by the corresponding 16-bit signed integer |
659 | /// element of the second 64-bit integer vector of [4 x i16] and get four |
660 | /// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. |
661 | /// The lower 32 bits of these two sums are packed into a 64-bit integer |
662 | /// vector of [2 x i32]. |
663 | /// |
664 | /// For example, bits [15:0] of both parameters are multiplied, bits [31:16] |
665 | /// of both parameters are multiplied, and the sum of both results is written |
666 | /// to bits [31:0] of the result. |
667 | /// |
668 | /// \headerfile <x86intrin.h> |
669 | /// |
670 | /// This intrinsic corresponds to the <c> PMADDWD </c> instruction. |
671 | /// |
672 | /// \param __m1 |
673 | /// A 64-bit integer vector of [4 x i16]. |
674 | /// \param __m2 |
675 | /// A 64-bit integer vector of [4 x i16]. |
676 | /// \returns A 64-bit integer vector of [2 x i32] containing the sums of |
677 | /// products of both parameters. |
678 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
679 | _mm_madd_pi16(__m64 __m1, __m64 __m2) |
680 | { |
681 | return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); |
682 | } |
683 | |
684 | /// Multiplies each 16-bit signed integer element of the first 64-bit |
685 | /// integer vector of [4 x i16] by the corresponding 16-bit signed integer |
686 | /// element of the second 64-bit integer vector of [4 x i16]. Packs the upper |
687 | /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. |
688 | /// |
689 | /// \headerfile <x86intrin.h> |
690 | /// |
691 | /// This intrinsic corresponds to the <c> PMULHW </c> instruction. |
692 | /// |
693 | /// \param __m1 |
694 | /// A 64-bit integer vector of [4 x i16]. |
695 | /// \param __m2 |
696 | /// A 64-bit integer vector of [4 x i16]. |
697 | /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits |
698 | /// of the products of both parameters. |
699 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
700 | _mm_mulhi_pi16(__m64 __m1, __m64 __m2) |
701 | { |
702 | return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); |
703 | } |
704 | |
705 | /// Multiplies each 16-bit signed integer element of the first 64-bit |
706 | /// integer vector of [4 x i16] by the corresponding 16-bit signed integer |
707 | /// element of the second 64-bit integer vector of [4 x i16]. Packs the lower |
708 | /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. |
709 | /// |
710 | /// \headerfile <x86intrin.h> |
711 | /// |
712 | /// This intrinsic corresponds to the <c> PMULLW </c> instruction. |
713 | /// |
714 | /// \param __m1 |
715 | /// A 64-bit integer vector of [4 x i16]. |
716 | /// \param __m2 |
717 | /// A 64-bit integer vector of [4 x i16]. |
718 | /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits |
719 | /// of the products of both parameters. |
720 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
721 | _mm_mullo_pi16(__m64 __m1, __m64 __m2) |
722 | { |
723 | return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); |
724 | } |
725 | |
726 | /// Left-shifts each 16-bit signed integer element of the first |
727 | /// parameter, which is a 64-bit integer vector of [4 x i16], by the number |
728 | /// of bits specified by the second parameter, which is a 64-bit integer. The |
729 | /// lower 16 bits of the results are packed into a 64-bit integer vector of |
730 | /// [4 x i16]. |
731 | /// |
732 | /// \headerfile <x86intrin.h> |
733 | /// |
734 | /// This intrinsic corresponds to the <c> PSLLW </c> instruction. |
735 | /// |
736 | /// \param __m |
737 | /// A 64-bit integer vector of [4 x i16]. |
738 | /// \param __count |
739 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
740 | /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted |
741 | /// values. If \a __count is greater or equal to 16, the result is set to all |
742 | /// 0. |
743 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
744 | _mm_sll_pi16(__m64 __m, __m64 __count) |
745 | { |
746 | return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); |
747 | } |
748 | |
749 | /// Left-shifts each 16-bit signed integer element of a 64-bit integer |
750 | /// vector of [4 x i16] by the number of bits specified by a 32-bit integer. |
751 | /// The lower 16 bits of the results are packed into a 64-bit integer vector |
752 | /// of [4 x i16]. |
753 | /// |
754 | /// \headerfile <x86intrin.h> |
755 | /// |
756 | /// This intrinsic corresponds to the <c> PSLLW </c> instruction. |
757 | /// |
758 | /// \param __m |
759 | /// A 64-bit integer vector of [4 x i16]. |
760 | /// \param __count |
761 | /// A 32-bit integer value. |
762 | /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted |
763 | /// values. If \a __count is greater or equal to 16, the result is set to all |
764 | /// 0. |
765 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
766 | _mm_slli_pi16(__m64 __m, int __count) |
767 | { |
768 | return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); |
769 | } |
770 | |
771 | /// Left-shifts each 32-bit signed integer element of the first |
772 | /// parameter, which is a 64-bit integer vector of [2 x i32], by the number |
773 | /// of bits specified by the second parameter, which is a 64-bit integer. The |
774 | /// lower 32 bits of the results are packed into a 64-bit integer vector of |
775 | /// [2 x i32]. |
776 | /// |
777 | /// \headerfile <x86intrin.h> |
778 | /// |
779 | /// This intrinsic corresponds to the <c> PSLLD </c> instruction. |
780 | /// |
781 | /// \param __m |
782 | /// A 64-bit integer vector of [2 x i32]. |
783 | /// \param __count |
784 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
785 | /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted |
786 | /// values. If \a __count is greater or equal to 32, the result is set to all |
787 | /// 0. |
788 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
789 | _mm_sll_pi32(__m64 __m, __m64 __count) |
790 | { |
791 | return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); |
792 | } |
793 | |
794 | /// Left-shifts each 32-bit signed integer element of a 64-bit integer |
795 | /// vector of [2 x i32] by the number of bits specified by a 32-bit integer. |
796 | /// The lower 32 bits of the results are packed into a 64-bit integer vector |
797 | /// of [2 x i32]. |
798 | /// |
799 | /// \headerfile <x86intrin.h> |
800 | /// |
801 | /// This intrinsic corresponds to the <c> PSLLD </c> instruction. |
802 | /// |
803 | /// \param __m |
804 | /// A 64-bit integer vector of [2 x i32]. |
805 | /// \param __count |
806 | /// A 32-bit integer value. |
807 | /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted |
808 | /// values. If \a __count is greater or equal to 32, the result is set to all |
809 | /// 0. |
810 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
811 | _mm_slli_pi32(__m64 __m, int __count) |
812 | { |
813 | return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); |
814 | } |
815 | |
816 | /// Left-shifts the first 64-bit integer parameter by the number of bits |
817 | /// specified by the second 64-bit integer parameter. The lower 64 bits of |
818 | /// result are returned. |
819 | /// |
820 | /// \headerfile <x86intrin.h> |
821 | /// |
822 | /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. |
823 | /// |
824 | /// \param __m |
825 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
826 | /// \param __count |
827 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
828 | /// \returns A 64-bit integer vector containing the left-shifted value. If |
829 | /// \a __count is greater or equal to 64, the result is set to 0. |
830 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
831 | _mm_sll_si64(__m64 __m, __m64 __count) |
832 | { |
833 | return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); |
834 | } |
835 | |
836 | /// Left-shifts the first parameter, which is a 64-bit integer, by the |
837 | /// number of bits specified by the second parameter, which is a 32-bit |
838 | /// integer. The lower 64 bits of result are returned. |
839 | /// |
840 | /// \headerfile <x86intrin.h> |
841 | /// |
842 | /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. |
843 | /// |
844 | /// \param __m |
845 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
846 | /// \param __count |
847 | /// A 32-bit integer value. |
848 | /// \returns A 64-bit integer vector containing the left-shifted value. If |
849 | /// \a __count is greater or equal to 64, the result is set to 0. |
850 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
851 | _mm_slli_si64(__m64 __m, int __count) |
852 | { |
853 | return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); |
854 | } |
855 | |
856 | /// Right-shifts each 16-bit integer element of the first parameter, |
857 | /// which is a 64-bit integer vector of [4 x i16], by the number of bits |
858 | /// specified by the second parameter, which is a 64-bit integer. |
859 | /// |
860 | /// High-order bits are filled with the sign bit of the initial value of each |
861 | /// 16-bit element. The 16-bit results are packed into a 64-bit integer |
862 | /// vector of [4 x i16]. |
863 | /// |
864 | /// \headerfile <x86intrin.h> |
865 | /// |
866 | /// This intrinsic corresponds to the <c> PSRAW </c> instruction. |
867 | /// |
868 | /// \param __m |
869 | /// A 64-bit integer vector of [4 x i16]. |
870 | /// \param __count |
871 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
872 | /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted |
873 | /// values. |
874 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
875 | _mm_sra_pi16(__m64 __m, __m64 __count) |
876 | { |
877 | return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); |
878 | } |
879 | |
880 | /// Right-shifts each 16-bit integer element of a 64-bit integer vector |
881 | /// of [4 x i16] by the number of bits specified by a 32-bit integer. |
882 | /// |
883 | /// High-order bits are filled with the sign bit of the initial value of each |
884 | /// 16-bit element. The 16-bit results are packed into a 64-bit integer |
885 | /// vector of [4 x i16]. |
886 | /// |
887 | /// \headerfile <x86intrin.h> |
888 | /// |
889 | /// This intrinsic corresponds to the <c> PSRAW </c> instruction. |
890 | /// |
891 | /// \param __m |
892 | /// A 64-bit integer vector of [4 x i16]. |
893 | /// \param __count |
894 | /// A 32-bit integer value. |
895 | /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted |
896 | /// values. |
897 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
898 | _mm_srai_pi16(__m64 __m, int __count) |
899 | { |
900 | return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); |
901 | } |
902 | |
903 | /// Right-shifts each 32-bit integer element of the first parameter, |
904 | /// which is a 64-bit integer vector of [2 x i32], by the number of bits |
905 | /// specified by the second parameter, which is a 64-bit integer. |
906 | /// |
907 | /// High-order bits are filled with the sign bit of the initial value of each |
908 | /// 32-bit element. The 32-bit results are packed into a 64-bit integer |
909 | /// vector of [2 x i32]. |
910 | /// |
911 | /// \headerfile <x86intrin.h> |
912 | /// |
913 | /// This intrinsic corresponds to the <c> PSRAD </c> instruction. |
914 | /// |
915 | /// \param __m |
916 | /// A 64-bit integer vector of [2 x i32]. |
917 | /// \param __count |
918 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
919 | /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted |
920 | /// values. |
921 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
922 | _mm_sra_pi32(__m64 __m, __m64 __count) |
923 | { |
924 | return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); |
925 | } |
926 | |
927 | /// Right-shifts each 32-bit integer element of a 64-bit integer vector |
928 | /// of [2 x i32] by the number of bits specified by a 32-bit integer. |
929 | /// |
930 | /// High-order bits are filled with the sign bit of the initial value of each |
931 | /// 32-bit element. The 32-bit results are packed into a 64-bit integer |
932 | /// vector of [2 x i32]. |
933 | /// |
934 | /// \headerfile <x86intrin.h> |
935 | /// |
936 | /// This intrinsic corresponds to the <c> PSRAD </c> instruction. |
937 | /// |
938 | /// \param __m |
939 | /// A 64-bit integer vector of [2 x i32]. |
940 | /// \param __count |
941 | /// A 32-bit integer value. |
942 | /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted |
943 | /// values. |
944 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
945 | _mm_srai_pi32(__m64 __m, int __count) |
946 | { |
947 | return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); |
948 | } |
949 | |
950 | /// Right-shifts each 16-bit integer element of the first parameter, |
951 | /// which is a 64-bit integer vector of [4 x i16], by the number of bits |
952 | /// specified by the second parameter, which is a 64-bit integer. |
953 | /// |
954 | /// High-order bits are cleared. The 16-bit results are packed into a 64-bit |
955 | /// integer vector of [4 x i16]. |
956 | /// |
957 | /// \headerfile <x86intrin.h> |
958 | /// |
959 | /// This intrinsic corresponds to the <c> PSRLW </c> instruction. |
960 | /// |
961 | /// \param __m |
962 | /// A 64-bit integer vector of [4 x i16]. |
963 | /// \param __count |
964 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
965 | /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted |
966 | /// values. |
967 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
968 | _mm_srl_pi16(__m64 __m, __m64 __count) |
969 | { |
970 | return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); |
971 | } |
972 | |
973 | /// Right-shifts each 16-bit integer element of a 64-bit integer vector |
974 | /// of [4 x i16] by the number of bits specified by a 32-bit integer. |
975 | /// |
976 | /// High-order bits are cleared. The 16-bit results are packed into a 64-bit |
977 | /// integer vector of [4 x i16]. |
978 | /// |
979 | /// \headerfile <x86intrin.h> |
980 | /// |
981 | /// This intrinsic corresponds to the <c> PSRLW </c> instruction. |
982 | /// |
983 | /// \param __m |
984 | /// A 64-bit integer vector of [4 x i16]. |
985 | /// \param __count |
986 | /// A 32-bit integer value. |
987 | /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted |
988 | /// values. |
989 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
990 | _mm_srli_pi16(__m64 __m, int __count) |
991 | { |
992 | return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); |
993 | } |
994 | |
995 | /// Right-shifts each 32-bit integer element of the first parameter, |
996 | /// which is a 64-bit integer vector of [2 x i32], by the number of bits |
997 | /// specified by the second parameter, which is a 64-bit integer. |
998 | /// |
999 | /// High-order bits are cleared. The 32-bit results are packed into a 64-bit |
1000 | /// integer vector of [2 x i32]. |
1001 | /// |
1002 | /// \headerfile <x86intrin.h> |
1003 | /// |
1004 | /// This intrinsic corresponds to the <c> PSRLD </c> instruction. |
1005 | /// |
1006 | /// \param __m |
1007 | /// A 64-bit integer vector of [2 x i32]. |
1008 | /// \param __count |
1009 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
1010 | /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted |
1011 | /// values. |
1012 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1013 | _mm_srl_pi32(__m64 __m, __m64 __count) |
1014 | { |
1015 | return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); |
1016 | } |
1017 | |
1018 | /// Right-shifts each 32-bit integer element of a 64-bit integer vector |
1019 | /// of [2 x i32] by the number of bits specified by a 32-bit integer. |
1020 | /// |
1021 | /// High-order bits are cleared. The 32-bit results are packed into a 64-bit |
1022 | /// integer vector of [2 x i32]. |
1023 | /// |
1024 | /// \headerfile <x86intrin.h> |
1025 | /// |
1026 | /// This intrinsic corresponds to the <c> PSRLD </c> instruction. |
1027 | /// |
1028 | /// \param __m |
1029 | /// A 64-bit integer vector of [2 x i32]. |
1030 | /// \param __count |
1031 | /// A 32-bit integer value. |
1032 | /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted |
1033 | /// values. |
1034 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1035 | _mm_srli_pi32(__m64 __m, int __count) |
1036 | { |
1037 | return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); |
1038 | } |
1039 | |
1040 | /// Right-shifts the first 64-bit integer parameter by the number of bits |
1041 | /// specified by the second 64-bit integer parameter. |
1042 | /// |
1043 | /// High-order bits are cleared. |
1044 | /// |
1045 | /// \headerfile <x86intrin.h> |
1046 | /// |
1047 | /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. |
1048 | /// |
1049 | /// \param __m |
1050 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
1051 | /// \param __count |
1052 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
1053 | /// \returns A 64-bit integer vector containing the right-shifted value. |
1054 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1055 | _mm_srl_si64(__m64 __m, __m64 __count) |
1056 | { |
1057 | return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); |
1058 | } |
1059 | |
1060 | /// Right-shifts the first parameter, which is a 64-bit integer, by the |
1061 | /// number of bits specified by the second parameter, which is a 32-bit |
1062 | /// integer. |
1063 | /// |
1064 | /// High-order bits are cleared. |
1065 | /// |
1066 | /// \headerfile <x86intrin.h> |
1067 | /// |
1068 | /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. |
1069 | /// |
1070 | /// \param __m |
1071 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
1072 | /// \param __count |
1073 | /// A 32-bit integer value. |
1074 | /// \returns A 64-bit integer vector containing the right-shifted value. |
1075 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1076 | _mm_srli_si64(__m64 __m, int __count) |
1077 | { |
1078 | return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); |
1079 | } |
1080 | |
1081 | /// Performs a bitwise AND of two 64-bit integer vectors. |
1082 | /// |
1083 | /// \headerfile <x86intrin.h> |
1084 | /// |
1085 | /// This intrinsic corresponds to the <c> PAND </c> instruction. |
1086 | /// |
1087 | /// \param __m1 |
1088 | /// A 64-bit integer vector. |
1089 | /// \param __m2 |
1090 | /// A 64-bit integer vector. |
1091 | /// \returns A 64-bit integer vector containing the bitwise AND of both |
1092 | /// parameters. |
1093 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1094 | _mm_and_si64(__m64 __m1, __m64 __m2) |
1095 | { |
1096 | return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); |
1097 | } |
1098 | |
1099 | /// Performs a bitwise NOT of the first 64-bit integer vector, and then |
1100 | /// performs a bitwise AND of the intermediate result and the second 64-bit |
1101 | /// integer vector. |
1102 | /// |
1103 | /// \headerfile <x86intrin.h> |
1104 | /// |
1105 | /// This intrinsic corresponds to the <c> PANDN </c> instruction. |
1106 | /// |
1107 | /// \param __m1 |
1108 | /// A 64-bit integer vector. The one's complement of this parameter is used |
1109 | /// in the bitwise AND. |
1110 | /// \param __m2 |
1111 | /// A 64-bit integer vector. |
1112 | /// \returns A 64-bit integer vector containing the bitwise AND of the second |
1113 | /// parameter and the one's complement of the first parameter. |
1114 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1115 | _mm_andnot_si64(__m64 __m1, __m64 __m2) |
1116 | { |
1117 | return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); |
1118 | } |
1119 | |
1120 | /// Performs a bitwise OR of two 64-bit integer vectors. |
1121 | /// |
1122 | /// \headerfile <x86intrin.h> |
1123 | /// |
1124 | /// This intrinsic corresponds to the <c> POR </c> instruction. |
1125 | /// |
1126 | /// \param __m1 |
1127 | /// A 64-bit integer vector. |
1128 | /// \param __m2 |
1129 | /// A 64-bit integer vector. |
1130 | /// \returns A 64-bit integer vector containing the bitwise OR of both |
1131 | /// parameters. |
1132 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1133 | _mm_or_si64(__m64 __m1, __m64 __m2) |
1134 | { |
1135 | return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); |
1136 | } |
1137 | |
1138 | /// Performs a bitwise exclusive OR of two 64-bit integer vectors. |
1139 | /// |
1140 | /// \headerfile <x86intrin.h> |
1141 | /// |
1142 | /// This intrinsic corresponds to the <c> PXOR </c> instruction. |
1143 | /// |
1144 | /// \param __m1 |
1145 | /// A 64-bit integer vector. |
1146 | /// \param __m2 |
1147 | /// A 64-bit integer vector. |
1148 | /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both |
1149 | /// parameters. |
1150 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1151 | _mm_xor_si64(__m64 __m1, __m64 __m2) |
1152 | { |
1153 | return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); |
1154 | } |
1155 | |
1156 | /// Compares the 8-bit integer elements of two 64-bit integer vectors of |
1157 | /// [8 x i8] to determine if the element of the first vector is equal to the |
1158 | /// corresponding element of the second vector. |
1159 | /// |
1160 | /// The comparison yields 0 for false, 0xFF for true. |
1161 | /// |
1162 | /// \headerfile <x86intrin.h> |
1163 | /// |
1164 | /// This intrinsic corresponds to the <c> PCMPEQB </c> instruction. |
1165 | /// |
1166 | /// \param __m1 |
1167 | /// A 64-bit integer vector of [8 x i8]. |
1168 | /// \param __m2 |
1169 | /// A 64-bit integer vector of [8 x i8]. |
1170 | /// \returns A 64-bit integer vector of [8 x i8] containing the comparison |
1171 | /// results. |
1172 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1173 | _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) |
1174 | { |
1175 | return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); |
1176 | } |
1177 | |
1178 | /// Compares the 16-bit integer elements of two 64-bit integer vectors of |
1179 | /// [4 x i16] to determine if the element of the first vector is equal to the |
1180 | /// corresponding element of the second vector. |
1181 | /// |
1182 | /// The comparison yields 0 for false, 0xFFFF for true. |
1183 | /// |
1184 | /// \headerfile <x86intrin.h> |
1185 | /// |
1186 | /// This intrinsic corresponds to the <c> PCMPEQW </c> instruction. |
1187 | /// |
1188 | /// \param __m1 |
1189 | /// A 64-bit integer vector of [4 x i16]. |
1190 | /// \param __m2 |
1191 | /// A 64-bit integer vector of [4 x i16]. |
1192 | /// \returns A 64-bit integer vector of [4 x i16] containing the comparison |
1193 | /// results. |
1194 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1195 | _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) |
1196 | { |
1197 | return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); |
1198 | } |
1199 | |
1200 | /// Compares the 32-bit integer elements of two 64-bit integer vectors of |
1201 | /// [2 x i32] to determine if the element of the first vector is equal to the |
1202 | /// corresponding element of the second vector. |
1203 | /// |
1204 | /// The comparison yields 0 for false, 0xFFFFFFFF for true. |
1205 | /// |
1206 | /// \headerfile <x86intrin.h> |
1207 | /// |
1208 | /// This intrinsic corresponds to the <c> PCMPEQD </c> instruction. |
1209 | /// |
1210 | /// \param __m1 |
1211 | /// A 64-bit integer vector of [2 x i32]. |
1212 | /// \param __m2 |
1213 | /// A 64-bit integer vector of [2 x i32]. |
1214 | /// \returns A 64-bit integer vector of [2 x i32] containing the comparison |
1215 | /// results. |
1216 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1217 | _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) |
1218 | { |
1219 | return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); |
1220 | } |
1221 | |
1222 | /// Compares the 8-bit integer elements of two 64-bit integer vectors of |
1223 | /// [8 x i8] to determine if the element of the first vector is greater than |
1224 | /// the corresponding element of the second vector. |
1225 | /// |
1226 | /// The comparison yields 0 for false, 0xFF for true. |
1227 | /// |
1228 | /// \headerfile <x86intrin.h> |
1229 | /// |
1230 | /// This intrinsic corresponds to the <c> PCMPGTB </c> instruction. |
1231 | /// |
1232 | /// \param __m1 |
1233 | /// A 64-bit integer vector of [8 x i8]. |
1234 | /// \param __m2 |
1235 | /// A 64-bit integer vector of [8 x i8]. |
1236 | /// \returns A 64-bit integer vector of [8 x i8] containing the comparison |
1237 | /// results. |
1238 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1239 | _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) |
1240 | { |
1241 | return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); |
1242 | } |
1243 | |
1244 | /// Compares the 16-bit integer elements of two 64-bit integer vectors of |
1245 | /// [4 x i16] to determine if the element of the first vector is greater than |
1246 | /// the corresponding element of the second vector. |
1247 | /// |
1248 | /// The comparison yields 0 for false, 0xFFFF for true. |
1249 | /// |
1250 | /// \headerfile <x86intrin.h> |
1251 | /// |
1252 | /// This intrinsic corresponds to the <c> PCMPGTW </c> instruction. |
1253 | /// |
1254 | /// \param __m1 |
1255 | /// A 64-bit integer vector of [4 x i16]. |
1256 | /// \param __m2 |
1257 | /// A 64-bit integer vector of [4 x i16]. |
1258 | /// \returns A 64-bit integer vector of [4 x i16] containing the comparison |
1259 | /// results. |
1260 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1261 | _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) |
1262 | { |
1263 | return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); |
1264 | } |
1265 | |
1266 | /// Compares the 32-bit integer elements of two 64-bit integer vectors of |
1267 | /// [2 x i32] to determine if the element of the first vector is greater than |
1268 | /// the corresponding element of the second vector. |
1269 | /// |
1270 | /// The comparison yields 0 for false, 0xFFFFFFFF for true. |
1271 | /// |
1272 | /// \headerfile <x86intrin.h> |
1273 | /// |
1274 | /// This intrinsic corresponds to the <c> PCMPGTD </c> instruction. |
1275 | /// |
1276 | /// \param __m1 |
1277 | /// A 64-bit integer vector of [2 x i32]. |
1278 | /// \param __m2 |
1279 | /// A 64-bit integer vector of [2 x i32]. |
1280 | /// \returns A 64-bit integer vector of [2 x i32] containing the comparison |
1281 | /// results. |
1282 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1283 | _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) |
1284 | { |
1285 | return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); |
1286 | } |
1287 | |
1288 | /// Constructs a 64-bit integer vector initialized to zero. |
1289 | /// |
1290 | /// \headerfile <x86intrin.h> |
1291 | /// |
1292 | /// This intrinsic corresponds to the <c> PXOR </c> instruction. |
1293 | /// |
1294 | /// \returns An initialized 64-bit integer vector with all elements set to zero. |
1295 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1296 | _mm_setzero_si64(void) |
1297 | { |
1298 | return __extension__ (__m64){ 0LL }; |
1299 | } |
1300 | |
1301 | /// Constructs a 64-bit integer vector initialized with the specified |
1302 | /// 32-bit integer values. |
1303 | /// |
1304 | /// \headerfile <x86intrin.h> |
1305 | /// |
1306 | /// This intrinsic is a utility function and does not correspond to a specific |
1307 | /// instruction. |
1308 | /// |
1309 | /// \param __i1 |
1310 | /// A 32-bit integer value used to initialize the upper 32 bits of the |
1311 | /// result. |
1312 | /// \param __i0 |
1313 | /// A 32-bit integer value used to initialize the lower 32 bits of the |
1314 | /// result. |
1315 | /// \returns An initialized 64-bit integer vector. |
1316 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1317 | _mm_set_pi32(int __i1, int __i0) |
1318 | { |
1319 | return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); |
1320 | } |
1321 | |
1322 | /// Constructs a 64-bit integer vector initialized with the specified |
1323 | /// 16-bit integer values. |
1324 | /// |
1325 | /// \headerfile <x86intrin.h> |
1326 | /// |
1327 | /// This intrinsic is a utility function and does not correspond to a specific |
1328 | /// instruction. |
1329 | /// |
1330 | /// \param __s3 |
1331 | /// A 16-bit integer value used to initialize bits [63:48] of the result. |
1332 | /// \param __s2 |
1333 | /// A 16-bit integer value used to initialize bits [47:32] of the result. |
1334 | /// \param __s1 |
1335 | /// A 16-bit integer value used to initialize bits [31:16] of the result. |
1336 | /// \param __s0 |
1337 | /// A 16-bit integer value used to initialize bits [15:0] of the result. |
1338 | /// \returns An initialized 64-bit integer vector. |
1339 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1340 | _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) |
1341 | { |
1342 | return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); |
1343 | } |
1344 | |
1345 | /// Constructs a 64-bit integer vector initialized with the specified |
1346 | /// 8-bit integer values. |
1347 | /// |
1348 | /// \headerfile <x86intrin.h> |
1349 | /// |
1350 | /// This intrinsic is a utility function and does not correspond to a specific |
1351 | /// instruction. |
1352 | /// |
1353 | /// \param __b7 |
1354 | /// An 8-bit integer value used to initialize bits [63:56] of the result. |
1355 | /// \param __b6 |
1356 | /// An 8-bit integer value used to initialize bits [55:48] of the result. |
1357 | /// \param __b5 |
1358 | /// An 8-bit integer value used to initialize bits [47:40] of the result. |
1359 | /// \param __b4 |
1360 | /// An 8-bit integer value used to initialize bits [39:32] of the result. |
1361 | /// \param __b3 |
1362 | /// An 8-bit integer value used to initialize bits [31:24] of the result. |
1363 | /// \param __b2 |
1364 | /// An 8-bit integer value used to initialize bits [23:16] of the result. |
1365 | /// \param __b1 |
1366 | /// An 8-bit integer value used to initialize bits [15:8] of the result. |
1367 | /// \param __b0 |
1368 | /// An 8-bit integer value used to initialize bits [7:0] of the result. |
1369 | /// \returns An initialized 64-bit integer vector. |
1370 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1371 | _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, |
1372 | char __b1, char __b0) |
1373 | { |
1374 | return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, |
1375 | __b4, __b5, __b6, __b7); |
1376 | } |
1377 | |
1378 | /// Constructs a 64-bit integer vector of [2 x i32], with each of the |
1379 | /// 32-bit integer vector elements set to the specified 32-bit integer |
1380 | /// value. |
1381 | /// |
1382 | /// \headerfile <x86intrin.h> |
1383 | /// |
1384 | /// This intrinsic is a utility function and does not correspond to a specific |
1385 | /// instruction. |
1386 | /// |
1387 | /// \param __i |
1388 | /// A 32-bit integer value used to initialize each vector element of the |
1389 | /// result. |
1390 | /// \returns An initialized 64-bit integer vector of [2 x i32]. |
1391 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1392 | _mm_set1_pi32(int __i) |
1393 | { |
1394 | return _mm_set_pi32(__i, __i); |
1395 | } |
1396 | |
1397 | /// Constructs a 64-bit integer vector of [4 x i16], with each of the |
1398 | /// 16-bit integer vector elements set to the specified 16-bit integer |
1399 | /// value. |
1400 | /// |
1401 | /// \headerfile <x86intrin.h> |
1402 | /// |
1403 | /// This intrinsic is a utility function and does not correspond to a specific |
1404 | /// instruction. |
1405 | /// |
1406 | /// \param __w |
1407 | /// A 16-bit integer value used to initialize each vector element of the |
1408 | /// result. |
1409 | /// \returns An initialized 64-bit integer vector of [4 x i16]. |
1410 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1411 | _mm_set1_pi16(short __w) |
1412 | { |
1413 | return _mm_set_pi16(__w, __w, __w, __w); |
1414 | } |
1415 | |
1416 | /// Constructs a 64-bit integer vector of [8 x i8], with each of the |
1417 | /// 8-bit integer vector elements set to the specified 8-bit integer value. |
1418 | /// |
1419 | /// \headerfile <x86intrin.h> |
1420 | /// |
1421 | /// This intrinsic is a utility function and does not correspond to a specific |
1422 | /// instruction. |
1423 | /// |
1424 | /// \param __b |
1425 | /// An 8-bit integer value used to initialize each vector element of the |
1426 | /// result. |
1427 | /// \returns An initialized 64-bit integer vector of [8 x i8]. |
1428 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1429 | _mm_set1_pi8(char __b) |
1430 | { |
1431 | return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); |
1432 | } |
1433 | |
1434 | /// Constructs a 64-bit integer vector, initialized in reverse order with |
1435 | /// the specified 32-bit integer values. |
1436 | /// |
1437 | /// \headerfile <x86intrin.h> |
1438 | /// |
1439 | /// This intrinsic is a utility function and does not correspond to a specific |
1440 | /// instruction. |
1441 | /// |
1442 | /// \param __i0 |
1443 | /// A 32-bit integer value used to initialize the lower 32 bits of the |
1444 | /// result. |
1445 | /// \param __i1 |
1446 | /// A 32-bit integer value used to initialize the upper 32 bits of the |
1447 | /// result. |
1448 | /// \returns An initialized 64-bit integer vector. |
1449 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1450 | _mm_setr_pi32(int __i0, int __i1) |
1451 | { |
1452 | return _mm_set_pi32(__i1, __i0); |
1453 | } |
1454 | |
1455 | /// Constructs a 64-bit integer vector, initialized in reverse order with |
1456 | /// the specified 16-bit integer values. |
1457 | /// |
1458 | /// \headerfile <x86intrin.h> |
1459 | /// |
1460 | /// This intrinsic is a utility function and does not correspond to a specific |
1461 | /// instruction. |
1462 | /// |
1463 | /// \param __w0 |
1464 | /// A 16-bit integer value used to initialize bits [15:0] of the result. |
1465 | /// \param __w1 |
1466 | /// A 16-bit integer value used to initialize bits [31:16] of the result. |
1467 | /// \param __w2 |
1468 | /// A 16-bit integer value used to initialize bits [47:32] of the result. |
1469 | /// \param __w3 |
1470 | /// A 16-bit integer value used to initialize bits [63:48] of the result. |
1471 | /// \returns An initialized 64-bit integer vector. |
1472 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1473 | _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) |
1474 | { |
1475 | return _mm_set_pi16(__w3, __w2, __w1, __w0); |
1476 | } |
1477 | |
1478 | /// Constructs a 64-bit integer vector, initialized in reverse order with |
1479 | /// the specified 8-bit integer values. |
1480 | /// |
1481 | /// \headerfile <x86intrin.h> |
1482 | /// |
1483 | /// This intrinsic is a utility function and does not correspond to a specific |
1484 | /// instruction. |
1485 | /// |
1486 | /// \param __b0 |
1487 | /// An 8-bit integer value used to initialize bits [7:0] of the result. |
1488 | /// \param __b1 |
1489 | /// An 8-bit integer value used to initialize bits [15:8] of the result. |
1490 | /// \param __b2 |
1491 | /// An 8-bit integer value used to initialize bits [23:16] of the result. |
1492 | /// \param __b3 |
1493 | /// An 8-bit integer value used to initialize bits [31:24] of the result. |
1494 | /// \param __b4 |
1495 | /// An 8-bit integer value used to initialize bits [39:32] of the result. |
1496 | /// \param __b5 |
1497 | /// An 8-bit integer value used to initialize bits [47:40] of the result. |
1498 | /// \param __b6 |
1499 | /// An 8-bit integer value used to initialize bits [55:48] of the result. |
1500 | /// \param __b7 |
1501 | /// An 8-bit integer value used to initialize bits [63:56] of the result. |
1502 | /// \returns An initialized 64-bit integer vector. |
1503 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1504 | _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, |
1505 | char __b6, char __b7) |
1506 | { |
1507 | return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); |
1508 | } |
1509 | |
1510 | #undef __DEFAULT_FN_ATTRS |
1511 | |
1512 | /* Aliases for compatibility. */ |
1513 | #define _m_empty _mm_empty |
1514 | #define _m_from_int _mm_cvtsi32_si64 |
1515 | #define _m_from_int64 _mm_cvtsi64_m64 |
1516 | #define _m_to_int _mm_cvtsi64_si32 |
1517 | #define _m_to_int64 _mm_cvtm64_si64 |
1518 | #define _m_packsswb _mm_packs_pi16 |
1519 | #define _m_packssdw _mm_packs_pi32 |
1520 | #define _m_packuswb _mm_packs_pu16 |
1521 | #define _m_punpckhbw _mm_unpackhi_pi8 |
1522 | #define _m_punpckhwd _mm_unpackhi_pi16 |
1523 | #define _m_punpckhdq _mm_unpackhi_pi32 |
1524 | #define _m_punpcklbw _mm_unpacklo_pi8 |
1525 | #define _m_punpcklwd _mm_unpacklo_pi16 |
1526 | #define _m_punpckldq _mm_unpacklo_pi32 |
1527 | #define _m_paddb _mm_add_pi8 |
1528 | #define _m_paddw _mm_add_pi16 |
1529 | #define _m_paddd _mm_add_pi32 |
1530 | #define _m_paddsb _mm_adds_pi8 |
1531 | #define _m_paddsw _mm_adds_pi16 |
1532 | #define _m_paddusb _mm_adds_pu8 |
1533 | #define _m_paddusw _mm_adds_pu16 |
1534 | #define _m_psubb _mm_sub_pi8 |
1535 | #define _m_psubw _mm_sub_pi16 |
1536 | #define _m_psubd _mm_sub_pi32 |
1537 | #define _m_psubsb _mm_subs_pi8 |
1538 | #define _m_psubsw _mm_subs_pi16 |
1539 | #define _m_psubusb _mm_subs_pu8 |
1540 | #define _m_psubusw _mm_subs_pu16 |
1541 | #define _m_pmaddwd _mm_madd_pi16 |
1542 | #define _m_pmulhw _mm_mulhi_pi16 |
1543 | #define _m_pmullw _mm_mullo_pi16 |
1544 | #define _m_psllw _mm_sll_pi16 |
1545 | #define _m_psllwi _mm_slli_pi16 |
1546 | #define _m_pslld _mm_sll_pi32 |
1547 | #define _m_pslldi _mm_slli_pi32 |
1548 | #define _m_psllq _mm_sll_si64 |
1549 | #define _m_psllqi _mm_slli_si64 |
1550 | #define _m_psraw _mm_sra_pi16 |
1551 | #define _m_psrawi _mm_srai_pi16 |
1552 | #define _m_psrad _mm_sra_pi32 |
1553 | #define _m_psradi _mm_srai_pi32 |
1554 | #define _m_psrlw _mm_srl_pi16 |
1555 | #define _m_psrlwi _mm_srli_pi16 |
1556 | #define _m_psrld _mm_srl_pi32 |
1557 | #define _m_psrldi _mm_srli_pi32 |
1558 | #define _m_psrlq _mm_srl_si64 |
1559 | #define _m_psrlqi _mm_srli_si64 |
1560 | #define _m_pand _mm_and_si64 |
1561 | #define _m_pandn _mm_andnot_si64 |
1562 | #define _m_por _mm_or_si64 |
1563 | #define _m_pxor _mm_xor_si64 |
1564 | #define _m_pcmpeqb _mm_cmpeq_pi8 |
1565 | #define _m_pcmpeqw _mm_cmpeq_pi16 |
1566 | #define _m_pcmpeqd _mm_cmpeq_pi32 |
1567 | #define _m_pcmpgtb _mm_cmpgt_pi8 |
1568 | #define _m_pcmpgtw _mm_cmpgt_pi16 |
1569 | #define _m_pcmpgtd _mm_cmpgt_pi32 |
1570 | |
1571 | #endif /* __MMINTRIN_H */ |
1572 | |
1573 |