tmmintrin.h source code [clang_source_code/lib/Headers/tmmintrin.h]

1	/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2	*
3	* Permission is hereby granted, free of charge, to any person obtaining a copy
4	* of this software and associated documentation files (the "Software"), to deal
5	* in the Software without restriction, including without limitation the rights
6	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7	* copies of the Software, and to permit persons to whom the Software is
8	* furnished to do so, subject to the following conditions:
9	*
10	* The above copyright notice and this permission notice shall be included in
11	* all copies or substantial portions of the Software.
12	*
13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19	* THE SOFTWARE.
20	*
21	*===-----------------------------------------------------------------------===
22	*/
23
24	#ifndef __TMMINTRIN_H
25	#define __TMMINTRIN_H
26
27	#include <pmmintrin.h>
28
29	/* Define the default attributes for the functions in this file. */
30	#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
31	#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
32
33	/// Computes the absolute value of each of the packed 8-bit signed
34	/// integers in the source operand and stores the 8-bit unsigned integer
35	/// results in the destination.
36	///
37	/// \headerfile <x86intrin.h>
38	///
39	/// This intrinsic corresponds to the \c PABSB instruction.
40	///
41	/// \param __a
42	/// A 64-bit vector of [8 x i8].
43	/// \returns A 64-bit integer vector containing the absolute values of the
44	/// elements in the operand.
45	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
46	_mm_abs_pi8(__m64 __a)
47	{
48	return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
49	}
50
51	/// Computes the absolute value of each of the packed 8-bit signed
52	/// integers in the source operand and stores the 8-bit unsigned integer
53	/// results in the destination.
54	///
55	/// \headerfile <x86intrin.h>
56	///
57	/// This intrinsic corresponds to the \c VPABSB instruction.
58	///
59	/// \param __a
60	/// A 128-bit vector of [16 x i8].
61	/// \returns A 128-bit integer vector containing the absolute values of the
62	/// elements in the operand.
63	static __inline__ __m128i __DEFAULT_FN_ATTRS
64	_mm_abs_epi8(__m128i __a)
65	{
66	return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
67	}
68
69	/// Computes the absolute value of each of the packed 16-bit signed
70	/// integers in the source operand and stores the 16-bit unsigned integer
71	/// results in the destination.
72	///
73	/// \headerfile <x86intrin.h>
74	///
75	/// This intrinsic corresponds to the \c PABSW instruction.
76	///
77	/// \param __a
78	/// A 64-bit vector of [4 x i16].
79	/// \returns A 64-bit integer vector containing the absolute values of the
80	/// elements in the operand.
81	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
82	_mm_abs_pi16(__m64 __a)
83	{
84	return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
85	}
86
87	/// Computes the absolute value of each of the packed 16-bit signed
88	/// integers in the source operand and stores the 16-bit unsigned integer
89	/// results in the destination.
90	///
91	/// \headerfile <x86intrin.h>
92	///
93	/// This intrinsic corresponds to the \c VPABSW instruction.
94	///
95	/// \param __a
96	/// A 128-bit vector of [8 x i16].
97	/// \returns A 128-bit integer vector containing the absolute values of the
98	/// elements in the operand.
99	static __inline__ __m128i __DEFAULT_FN_ATTRS
100	_mm_abs_epi16(__m128i __a)
101	{
102	return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
103	}
104
105	/// Computes the absolute value of each of the packed 32-bit signed
106	/// integers in the source operand and stores the 32-bit unsigned integer
107	/// results in the destination.
108	///
109	/// \headerfile <x86intrin.h>
110	///
111	/// This intrinsic corresponds to the \c PABSD instruction.
112	///
113	/// \param __a
114	/// A 64-bit vector of [2 x i32].
115	/// \returns A 64-bit integer vector containing the absolute values of the
116	/// elements in the operand.
117	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
118	_mm_abs_pi32(__m64 __a)
119	{
120	return (__m64)__builtin_ia32_pabsd((__v2si)__a);
121	}
122
123	/// Computes the absolute value of each of the packed 32-bit signed
124	/// integers in the source operand and stores the 32-bit unsigned integer
125	/// results in the destination.
126	///
127	/// \headerfile <x86intrin.h>
128	///
129	/// This intrinsic corresponds to the \c VPABSD instruction.
130	///
131	/// \param __a
132	/// A 128-bit vector of [4 x i32].
133	/// \returns A 128-bit integer vector containing the absolute values of the
134	/// elements in the operand.
135	static __inline__ __m128i __DEFAULT_FN_ATTRS
136	_mm_abs_epi32(__m128i __a)
137	{
138	return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
139	}
140
141	/// Concatenates the two 128-bit integer vector operands, and
142	/// right-shifts the result by the number of bytes specified in the immediate
143	/// operand.
144	///
145	/// \headerfile <x86intrin.h>
146	///
147	/// \code
148	/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
149	/// \endcode
150	///
151	/// This intrinsic corresponds to the \c PALIGNR instruction.
152	///
153	/// \param a
154	/// A 128-bit vector of [16 x i8] containing one of the source operands.
155	/// \param b
156	/// A 128-bit vector of [16 x i8] containing one of the source operands.
157	/// \param n
158	/// An immediate operand specifying how many bytes to right-shift the result.
159	/// \returns A 128-bit integer vector containing the concatenated right-shifted
160	/// value.
161	#define _mm_alignr_epi8(a, b, n) \
162	(__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
163	(__v16qi)(__m128i)(b), (n))
164
165	/// Concatenates the two 64-bit integer vector operands, and right-shifts
166	/// the result by the number of bytes specified in the immediate operand.
167	///
168	/// \headerfile <x86intrin.h>
169	///
170	/// \code
171	/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
172	/// \endcode
173	///
174	/// This intrinsic corresponds to the \c PALIGNR instruction.
175	///
176	/// \param a
177	/// A 64-bit vector of [8 x i8] containing one of the source operands.
178	/// \param b
179	/// A 64-bit vector of [8 x i8] containing one of the source operands.
180	/// \param n
181	/// An immediate operand specifying how many bytes to right-shift the result.
182	/// \returns A 64-bit integer vector containing the concatenated right-shifted
183	/// value.
184	#define _mm_alignr_pi8(a, b, n) \
185	(__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))
186
187	/// Horizontally adds the adjacent pairs of values contained in 2 packed
188	/// 128-bit vectors of [8 x i16].
189	///
190	/// \headerfile <x86intrin.h>
191	///
192	/// This intrinsic corresponds to the \c VPHADDW instruction.
193	///
194	/// \param __a
195	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
196	/// horizontal sums of the values are stored in the lower bits of the
197	/// destination.
198	/// \param __b
199	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
200	/// horizontal sums of the values are stored in the upper bits of the
201	/// destination.
202	/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
203	/// both operands.
204	static __inline__ __m128i __DEFAULT_FN_ATTRS
205	_mm_hadd_epi16(__m128i __a, __m128i __b)
206	{
207	return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
208	}
209
210	/// Horizontally adds the adjacent pairs of values contained in 2 packed
211	/// 128-bit vectors of [4 x i32].
212	///
213	/// \headerfile <x86intrin.h>
214	///
215	/// This intrinsic corresponds to the \c VPHADDD instruction.
216	///
217	/// \param __a
218	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
219	/// horizontal sums of the values are stored in the lower bits of the
220	/// destination.
221	/// \param __b
222	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
223	/// horizontal sums of the values are stored in the upper bits of the
224	/// destination.
225	/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
226	/// both operands.
227	static __inline__ __m128i __DEFAULT_FN_ATTRS
228	_mm_hadd_epi32(__m128i __a, __m128i __b)
229	{
230	return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
231	}
232
233	/// Horizontally adds the adjacent pairs of values contained in 2 packed
234	/// 64-bit vectors of [4 x i16].
235	///
236	/// \headerfile <x86intrin.h>
237	///
238	/// This intrinsic corresponds to the \c PHADDW instruction.
239	///
240	/// \param __a
241	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
242	/// horizontal sums of the values are stored in the lower bits of the
243	/// destination.
244	/// \param __b
245	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
246	/// horizontal sums of the values are stored in the upper bits of the
247	/// destination.
248	/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
249	/// operands.
250	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
251	_mm_hadd_pi16(__m64 __a, __m64 __b)
252	{
253	return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
254	}
255
256	/// Horizontally adds the adjacent pairs of values contained in 2 packed
257	/// 64-bit vectors of [2 x i32].
258	///
259	/// \headerfile <x86intrin.h>
260	///
261	/// This intrinsic corresponds to the \c PHADDD instruction.
262	///
263	/// \param __a
264	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
265	/// horizontal sums of the values are stored in the lower bits of the
266	/// destination.
267	/// \param __b
268	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
269	/// horizontal sums of the values are stored in the upper bits of the
270	/// destination.
271	/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
272	/// operands.
273	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
274	_mm_hadd_pi32(__m64 __a, __m64 __b)
275	{
276	return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
277	}
278
279	/// Horizontally adds the adjacent pairs of values contained in 2 packed
280	/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
281	/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
282	/// 0x8000.
283	///
284	/// \headerfile <x86intrin.h>
285	///
286	/// This intrinsic corresponds to the \c VPHADDSW instruction.
287	///
288	/// \param __a
289	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
290	/// horizontal sums of the values are stored in the lower bits of the
291	/// destination.
292	/// \param __b
293	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
294	/// horizontal sums of the values are stored in the upper bits of the
295	/// destination.
296	/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
297	/// sums of both operands.
298	static __inline__ __m128i __DEFAULT_FN_ATTRS
299	_mm_hadds_epi16(__m128i __a, __m128i __b)
300	{
301	return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
302	}
303
304	/// Horizontally adds the adjacent pairs of values contained in 2 packed
305	/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
306	/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
307	/// 0x8000.
308	///
309	/// \headerfile <x86intrin.h>
310	///
311	/// This intrinsic corresponds to the \c PHADDSW instruction.
312	///
313	/// \param __a
314	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
315	/// horizontal sums of the values are stored in the lower bits of the
316	/// destination.
317	/// \param __b
318	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
319	/// horizontal sums of the values are stored in the upper bits of the
320	/// destination.
321	/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
322	/// sums of both operands.
323	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
324	_mm_hadds_pi16(__m64 __a, __m64 __b)
325	{
326	return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
327	}
328
329	/// Horizontally subtracts the adjacent pairs of values contained in 2
330	/// packed 128-bit vectors of [8 x i16].
331	///
332	/// \headerfile <x86intrin.h>
333	///
334	/// This intrinsic corresponds to the \c VPHSUBW instruction.
335	///
336	/// \param __a
337	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
338	/// horizontal differences between the values are stored in the lower bits of
339	/// the destination.
340	/// \param __b
341	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
342	/// horizontal differences between the values are stored in the upper bits of
343	/// the destination.
344	/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
345	/// of both operands.
346	static __inline__ __m128i __DEFAULT_FN_ATTRS
347	_mm_hsub_epi16(__m128i __a, __m128i __b)
348	{
349	return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
350	}
351
352	/// Horizontally subtracts the adjacent pairs of values contained in 2
353	/// packed 128-bit vectors of [4 x i32].
354	///
355	/// \headerfile <x86intrin.h>
356	///
357	/// This intrinsic corresponds to the \c VPHSUBD instruction.
358	///
359	/// \param __a
360	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
361	/// horizontal differences between the values are stored in the lower bits of
362	/// the destination.
363	/// \param __b
364	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
365	/// horizontal differences between the values are stored in the upper bits of
366	/// the destination.
367	/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
368	/// of both operands.
369	static __inline__ __m128i __DEFAULT_FN_ATTRS
370	_mm_hsub_epi32(__m128i __a, __m128i __b)
371	{
372	return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
373	}
374
375	/// Horizontally subtracts the adjacent pairs of values contained in 2
376	/// packed 64-bit vectors of [4 x i16].
377	///
378	/// \headerfile <x86intrin.h>
379	///
380	/// This intrinsic corresponds to the \c PHSUBW instruction.
381	///
382	/// \param __a
383	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
384	/// horizontal differences between the values are stored in the lower bits of
385	/// the destination.
386	/// \param __b
387	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
388	/// horizontal differences between the values are stored in the upper bits of
389	/// the destination.
390	/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
391	/// of both operands.
392	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
393	_mm_hsub_pi16(__m64 __a, __m64 __b)
394	{
395	return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
396	}
397
398	/// Horizontally subtracts the adjacent pairs of values contained in 2
399	/// packed 64-bit vectors of [2 x i32].
400	///
401	/// \headerfile <x86intrin.h>
402	///
403	/// This intrinsic corresponds to the \c PHSUBD instruction.
404	///
405	/// \param __a
406	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
407	/// horizontal differences between the values are stored in the lower bits of
408	/// the destination.
409	/// \param __b
410	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
411	/// horizontal differences between the values are stored in the upper bits of
412	/// the destination.
413	/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
414	/// of both operands.
415	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
416	_mm_hsub_pi32(__m64 __a, __m64 __b)
417	{
418	return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
419	}
420
421	/// Horizontally subtracts the adjacent pairs of values contained in 2
422	/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
423	/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
424	/// saturated to 0x8000.
425	///
426	/// \headerfile <x86intrin.h>
427	///
428	/// This intrinsic corresponds to the \c VPHSUBSW instruction.
429	///
430	/// \param __a
431	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
432	/// horizontal differences between the values are stored in the lower bits of
433	/// the destination.
434	/// \param __b
435	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
436	/// horizontal differences between the values are stored in the upper bits of
437	/// the destination.
438	/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
439	/// differences of both operands.
440	static __inline__ __m128i __DEFAULT_FN_ATTRS
441	_mm_hsubs_epi16(__m128i __a, __m128i __b)
442	{
443	return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
444	}
445
446	/// Horizontally subtracts the adjacent pairs of values contained in 2
447	/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
448	/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
449	/// saturated to 0x8000.
450	///
451	/// \headerfile <x86intrin.h>
452	///
453	/// This intrinsic corresponds to the \c PHSUBSW instruction.
454	///
455	/// \param __a
456	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
457	/// horizontal differences between the values are stored in the lower bits of
458	/// the destination.
459	/// \param __b
460	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
461	/// horizontal differences between the values are stored in the upper bits of
462	/// the destination.
463	/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
464	/// differences of both operands.
465	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
466	_mm_hsubs_pi16(__m64 __a, __m64 __b)
467	{
468	return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
469	}
470
471	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
472	/// values contained in the first source operand and packed 8-bit signed
473	/// integer values contained in the second source operand, adds pairs of
474	/// contiguous products with signed saturation, and writes the 16-bit sums to
475	/// the corresponding bits in the destination.
476	///
477	/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
478	/// both operands are multiplied, and the sum of both results is written to
479	/// bits [15:0] of the destination.
480	///
481	/// \headerfile <x86intrin.h>
482	///
483	/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
484	///
485	/// \param __a
486	/// A 128-bit integer vector containing the first source operand.
487	/// \param __b
488	/// A 128-bit integer vector containing the second source operand.
489	/// \returns A 128-bit integer vector containing the sums of products of both
490	/// operands: \n
491	/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
492	/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
493	/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
494	/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
495	/// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
496	/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
497	/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
498	/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
499	static __inline__ __m128i __DEFAULT_FN_ATTRS
500	_mm_maddubs_epi16(__m128i __a, __m128i __b)
501	{
502	return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
503	}
504
505	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
506	/// values contained in the first source operand and packed 8-bit signed
507	/// integer values contained in the second source operand, adds pairs of
508	/// contiguous products with signed saturation, and writes the 16-bit sums to
509	/// the corresponding bits in the destination.
510	///
511	/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
512	/// both operands are multiplied, and the sum of both results is written to
513	/// bits [15:0] of the destination.
514	///
515	/// \headerfile <x86intrin.h>
516	///
517	/// This intrinsic corresponds to the \c PMADDUBSW instruction.
518	///
519	/// \param __a
520	/// A 64-bit integer vector containing the first source operand.
521	/// \param __b
522	/// A 64-bit integer vector containing the second source operand.
523	/// \returns A 64-bit integer vector containing the sums of products of both
524	/// operands: \n
525	/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
526	/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
527	/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
528	/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
529	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
530	_mm_maddubs_pi16(__m64 __a, __m64 __b)
531	{
532	return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
533	}
534
535	/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
536	/// products to the 18 most significant bits by right-shifting, rounds the
537	/// truncated value by adding 1, and writes bits [16:1] to the destination.
538	///
539	/// \headerfile <x86intrin.h>
540	///
541	/// This intrinsic corresponds to the \c VPMULHRSW instruction.
542	///
543	/// \param __a
544	/// A 128-bit vector of [8 x i16] containing one of the source operands.
545	/// \param __b
546	/// A 128-bit vector of [8 x i16] containing one of the source operands.
547	/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
548	/// products of both operands.
549	static __inline__ __m128i __DEFAULT_FN_ATTRS
550	_mm_mulhrs_epi16(__m128i __a, __m128i __b)
551	{
552	return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
553	}
554
555	/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
556	/// products to the 18 most significant bits by right-shifting, rounds the
557	/// truncated value by adding 1, and writes bits [16:1] to the destination.
558	///
559	/// \headerfile <x86intrin.h>
560	///
561	/// This intrinsic corresponds to the \c PMULHRSW instruction.
562	///
563	/// \param __a
564	/// A 64-bit vector of [4 x i16] containing one of the source operands.
565	/// \param __b
566	/// A 64-bit vector of [4 x i16] containing one of the source operands.
567	/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
568	/// products of both operands.
569	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
570	_mm_mulhrs_pi16(__m64 __a, __m64 __b)
571	{
572	return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
573	}
574
575	/// Copies the 8-bit integers from a 128-bit integer vector to the
576	/// destination or clears 8-bit values in the destination, as specified by
577	/// the second source operand.
578	///
579	/// \headerfile <x86intrin.h>
580	///
581	/// This intrinsic corresponds to the \c VPSHUFB instruction.
582	///
583	/// \param __a
584	/// A 128-bit integer vector containing the values to be copied.
585	/// \param __b
586	/// A 128-bit integer vector containing control bytes corresponding to
587	/// positions in the destination:
588	/// Bit 7: \n
589	/// 1: Clear the corresponding byte in the destination. \n
590	/// 0: Copy the selected source byte to the corresponding byte in the
591	/// destination. \n
592	/// Bits [6:4] Reserved. \n
593	/// Bits [3:0] select the source byte to be copied.
594	/// \returns A 128-bit integer vector containing the copied or cleared values.
595	static __inline__ __m128i __DEFAULT_FN_ATTRS
596	_mm_shuffle_epi8(__m128i __a, __m128i __b)
597	{
598	return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
599	}
600
601	/// Copies the 8-bit integers from a 64-bit integer vector to the
602	/// destination or clears 8-bit values in the destination, as specified by
603	/// the second source operand.
604	///
605	/// \headerfile <x86intrin.h>
606	///
607	/// This intrinsic corresponds to the \c PSHUFB instruction.
608	///
609	/// \param __a
610	/// A 64-bit integer vector containing the values to be copied.
611	/// \param __b
612	/// A 64-bit integer vector containing control bytes corresponding to
613	/// positions in the destination:
614	/// Bit 7: \n
615	/// 1: Clear the corresponding byte in the destination. \n
616	/// 0: Copy the selected source byte to the corresponding byte in the
617	/// destination. \n
618	/// Bits [3:0] select the source byte to be copied.
619	/// \returns A 64-bit integer vector containing the copied or cleared values.
620	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
621	_mm_shuffle_pi8(__m64 __a, __m64 __b)
622	{
623	return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
624	}
625
626	/// For each 8-bit integer in the first source operand, perform one of
627	/// the following actions as specified by the second source operand.
628	///
629	/// If the byte in the second source is negative, calculate the two's
630	/// complement of the corresponding byte in the first source, and write that
631	/// value to the destination. If the byte in the second source is positive,
632	/// copy the corresponding byte from the first source to the destination. If
633	/// the byte in the second source is zero, clear the corresponding byte in
634	/// the destination.
635	///
636	/// \headerfile <x86intrin.h>
637	///
638	/// This intrinsic corresponds to the \c VPSIGNB instruction.
639	///
640	/// \param __a
641	/// A 128-bit integer vector containing the values to be copied.
642	/// \param __b
643	/// A 128-bit integer vector containing control bytes corresponding to
644	/// positions in the destination.
645	/// \returns A 128-bit integer vector containing the resultant values.
646	static __inline__ __m128i __DEFAULT_FN_ATTRS
647	_mm_sign_epi8(__m128i __a, __m128i __b)
648	{
649	return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
650	}
651
652	/// For each 16-bit integer in the first source operand, perform one of
653	/// the following actions as specified by the second source operand.
654	///
655	/// If the word in the second source is negative, calculate the two's
656	/// complement of the corresponding word in the first source, and write that
657	/// value to the destination. If the word in the second source is positive,
658	/// copy the corresponding word from the first source to the destination. If
659	/// the word in the second source is zero, clear the corresponding word in
660	/// the destination.
661	///
662	/// \headerfile <x86intrin.h>
663	///
664	/// This intrinsic corresponds to the \c VPSIGNW instruction.
665	///
666	/// \param __a
667	/// A 128-bit integer vector containing the values to be copied.
668	/// \param __b
669	/// A 128-bit integer vector containing control words corresponding to
670	/// positions in the destination.
671	/// \returns A 128-bit integer vector containing the resultant values.
672	static __inline__ __m128i __DEFAULT_FN_ATTRS
673	_mm_sign_epi16(__m128i __a, __m128i __b)
674	{
675	return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
676	}
677
678	/// For each 32-bit integer in the first source operand, perform one of
679	/// the following actions as specified by the second source operand.
680	///
681	/// If the doubleword in the second source is negative, calculate the two's
682	/// complement of the corresponding word in the first source, and write that
683	/// value to the destination. If the doubleword in the second source is
684	/// positive, copy the corresponding word from the first source to the
685	/// destination. If the doubleword in the second source is zero, clear the
686	/// corresponding word in the destination.
687	///
688	/// \headerfile <x86intrin.h>
689	///
690	/// This intrinsic corresponds to the \c VPSIGND instruction.
691	///
692	/// \param __a
693	/// A 128-bit integer vector containing the values to be copied.
694	/// \param __b
695	/// A 128-bit integer vector containing control doublewords corresponding to
696	/// positions in the destination.
697	/// \returns A 128-bit integer vector containing the resultant values.
698	static __inline__ __m128i __DEFAULT_FN_ATTRS
699	_mm_sign_epi32(__m128i __a, __m128i __b)
700	{
701	return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
702	}
703
704	/// For each 8-bit integer in the first source operand, perform one of
705	/// the following actions as specified by the second source operand.
706	///
707	/// If the byte in the second source is negative, calculate the two's
708	/// complement of the corresponding byte in the first source, and write that
709	/// value to the destination. If the byte in the second source is positive,
710	/// copy the corresponding byte from the first source to the destination. If
711	/// the byte in the second source is zero, clear the corresponding byte in
712	/// the destination.
713	///
714	/// \headerfile <x86intrin.h>
715	///
716	/// This intrinsic corresponds to the \c PSIGNB instruction.
717	///
718	/// \param __a
719	/// A 64-bit integer vector containing the values to be copied.
720	/// \param __b
721	/// A 64-bit integer vector containing control bytes corresponding to
722	/// positions in the destination.
723	/// \returns A 64-bit integer vector containing the resultant values.
724	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
725	_mm_sign_pi8(__m64 __a, __m64 __b)
726	{
727	return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
728	}
729
730	/// For each 16-bit integer in the first source operand, perform one of
731	/// the following actions as specified by the second source operand.
732	///
733	/// If the word in the second source is negative, calculate the two's
734	/// complement of the corresponding word in the first source, and write that
735	/// value to the destination. If the word in the second source is positive,
736	/// copy the corresponding word from the first source to the destination. If
737	/// the word in the second source is zero, clear the corresponding word in
738	/// the destination.
739	///
740	/// \headerfile <x86intrin.h>
741	///
742	/// This intrinsic corresponds to the \c PSIGNW instruction.
743	///
744	/// \param __a
745	/// A 64-bit integer vector containing the values to be copied.
746	/// \param __b
747	/// A 64-bit integer vector containing control words corresponding to
748	/// positions in the destination.
749	/// \returns A 64-bit integer vector containing the resultant values.
750	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
751	_mm_sign_pi16(__m64 __a, __m64 __b)
752	{
753	return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
754	}
755
756	/// For each 32-bit integer in the first source operand, perform one of
757	/// the following actions as specified by the second source operand.
758	///
759	/// If the doubleword in the second source is negative, calculate the two's
760	/// complement of the corresponding doubleword in the first source, and
761	/// write that value to the destination. If the doubleword in the second
762	/// source is positive, copy the corresponding doubleword from the first
763	/// source to the destination. If the doubleword in the second source is
764	/// zero, clear the corresponding doubleword in the destination.
765	///
766	/// \headerfile <x86intrin.h>
767	///
768	/// This intrinsic corresponds to the \c PSIGND instruction.
769	///
770	/// \param __a
771	/// A 64-bit integer vector containing the values to be copied.
772	/// \param __b
773	/// A 64-bit integer vector containing two control doublewords corresponding
774	/// to positions in the destination.
775	/// \returns A 64-bit integer vector containing the resultant values.
776	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
777	_mm_sign_pi32(__m64 __a, __m64 __b)
778	{
779	return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
780	}
781
782	#undef __DEFAULT_FN_ATTRS
783	#undef __DEFAULT_FN_ATTRS_MMX
784
785	#endif /* __TMMINTRIN_H */
786

Clang Project