ammintrin.h source code [clang_source_code/lib/Headers/ammintrin.h]

1	/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===
2	*
3	* Permission is hereby granted, free of charge, to any person obtaining a copy
4	* of this software and associated documentation files (the "Software"), to deal
5	* in the Software without restriction, including without limitation the rights
6	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7	* copies of the Software, and to permit persons to whom the Software is
8	* furnished to do so, subject to the following conditions:
9	*
10	* The above copyright notice and this permission notice shall be included in
11	* all copies or substantial portions of the Software.
12	*
13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19	* THE SOFTWARE.
20	*
21	*===-----------------------------------------------------------------------===
22	*/
23
24	#ifndef __AMMINTRIN_H
25	#define __AMMINTRIN_H
26
27	#include <pmmintrin.h>
28
29	/* Define the default attributes for the functions in this file. */
30	#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128)))
31
32	/// Extracts the specified bits from the lower 64 bits of the 128-bit
33	/// integer vector operand at the index \a idx and of the length \a len.
34	///
35	/// \headerfile <x86intrin.h>
36	///
37	/// \code
38	/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);
39	/// \endcode
40	///
41	/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
42	///
43	/// \param x
44	/// The value from which bits are extracted.
45	/// \param len
46	/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
47	/// are zero, the length is interpreted as 64.
48	/// \param idx
49	/// Bits [5:0] specify the index of the least significant bit; the other
50	/// bits are ignored. If the sum of the index and length is greater than 64,
51	/// the result is undefined. If the length and index are both zero, bits
52	/// [63:0] of parameter \a x are extracted. If the length is zero but the
53	/// index is non-zero, the result is undefined.
54	/// \returns A 128-bit integer vector whose lower 64 bits contain the bits
55	/// extracted from the source operand.
56	#define _mm_extracti_si64(x, len, idx) \
57	((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
58	(char)(len), (char)(idx)))
59
60	/// Extracts the specified bits from the lower 64 bits of the 128-bit
61	/// integer vector operand at the index and of the length specified by
62	/// \a __y.
63	///
64	/// \headerfile <x86intrin.h>
65	///
66	/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
67	///
68	/// \param __x
69	/// The value from which bits are extracted.
70	/// \param __y
71	/// Specifies the index of the least significant bit at [13:8] and the
72	/// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the
73	/// length is interpreted as 64. If the sum of the index and length is
74	/// greater than 64, the result is undefined. If the length and index are
75	/// both zero, bits [63:0] of parameter \a __x are extracted. If the length
76	/// is zero but the index is non-zero, the result is undefined.
77	/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted
78	/// from the source operand.
79	static __inline__ __m128i __DEFAULT_FN_ATTRS
80	_mm_extract_si64(__m128i __x, __m128i __y)
81	{
82	return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
83	}
84
85	/// Inserts bits of a specified length from the source integer vector
86	/// \a y into the lower 64 bits of the destination integer vector \a x at
87	/// the index \a idx and of the length \a len.
88	///
89	/// \headerfile <x86intrin.h>
90	///
91	/// \code
92	/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,
93	/// const int idx);
94	/// \endcode
95	///
96	/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
97	///
98	/// \param x
99	/// The destination operand where bits will be inserted. The inserted bits
100	/// are defined by the length \a len and by the index \a idx specifying the
101	/// least significant bit.
102	/// \param y
103	/// The source operand containing the bits to be extracted. The extracted
104	/// bits are the least significant bits of operand \a y of length \a len.
105	/// \param len
106	/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
107	/// are zero, the length is interpreted as 64.
108	/// \param idx
109	/// Bits [5:0] specify the index of the least significant bit; the other
110	/// bits are ignored. If the sum of the index and length is greater than 64,
111	/// the result is undefined. If the length and index are both zero, bits
112	/// [63:0] of parameter \a y are inserted into parameter \a x. If the length
113	/// is zero but the index is non-zero, the result is undefined.
114	/// \returns A 128-bit integer vector containing the original lower 64-bits of
115	/// destination operand \a x with the specified bitfields replaced by the
116	/// lower bits of source operand \a y. The upper 64 bits of the return value
117	/// are undefined.
118	#define _mm_inserti_si64(x, y, len, idx) \
119	((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
120	(__v2di)(__m128i)(y), \
121	(char)(len), (char)(idx)))
122
123	/// Inserts bits of a specified length from the source integer vector
124	/// \a __y into the lower 64 bits of the destination integer vector \a __x
125	/// at the index and of the length specified by \a __y.
126	///
127	/// \headerfile <x86intrin.h>
128	///
129	/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
130	///
131	/// \param __x
132	/// The destination operand where bits will be inserted. The inserted bits
133	/// are defined by the length and by the index of the least significant bit
134	/// specified by operand \a __y.
135	/// \param __y
136	/// The source operand containing the bits to be extracted. The extracted
137	/// bits are the least significant bits of operand \a __y with length
138	/// specified by bits [69:64]. These are inserted into the destination at the
139	/// index specified by bits [77:72]; all other bits are ignored. If bits
140	/// [69:64] are zero, the length is interpreted as 64. If the sum of the
141	/// index and length is greater than 64, the result is undefined. If the
142	/// length and index are both zero, bits [63:0] of parameter \a __y are
143	/// inserted into parameter \a __x. If the length is zero but the index is
144	/// non-zero, the result is undefined.
145	/// \returns A 128-bit integer vector containing the original lower 64-bits of
146	/// destination operand \a __x with the specified bitfields replaced by the
147	/// lower bits of source operand \a __y. The upper 64 bits of the return
148	/// value are undefined.
149	static __inline__ __m128i __DEFAULT_FN_ATTRS
150	_mm_insert_si64(__m128i __x, __m128i __y)
151	{
152	return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
153	}
154
155	/// Stores a 64-bit double-precision value in a 64-bit memory location.
156	/// To minimize caching, the data is flagged as non-temporal (unlikely to be
157	/// used again soon).
158	///
159	/// \headerfile <x86intrin.h>
160	///
161	/// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.
162	///
163	/// \param __p
164	/// The 64-bit memory location used to store the register value.
165	/// \param __a
166	/// The 64-bit double-precision floating-point register value to be stored.
167	static __inline__ void __DEFAULT_FN_ATTRS
168	_mm_stream_sd(double *__p, __m128d __a)
169	{
170	__builtin_ia32_movntsd(__p, (__v2df)__a);
171	}
172
173	/// Stores a 32-bit single-precision floating-point value in a 32-bit
174	/// memory location. To minimize caching, the data is flagged as
175	/// non-temporal (unlikely to be used again soon).
176	///
177	/// \headerfile <x86intrin.h>
178	///
179	/// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.
180	///
181	/// \param __p
182	/// The 32-bit memory location used to store the register value.
183	/// \param __a
184	/// The 32-bit single-precision floating-point register value to be stored.
185	static __inline__ void __DEFAULT_FN_ATTRS
186	_mm_stream_ss(float *__p, __m128 __a)
187	{
188	__builtin_ia32_movntss(__p, (__v4sf)__a);
189	}
190
191	#undef __DEFAULT_FN_ATTRS
192
193	#endif /* __AMMINTRIN_H */
194

Clang Project