f16cintrin.h source code [clang_source_code/lib/Headers/f16cintrin.h]

1	/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
2	*
3	* Permission is hereby granted, free of charge, to any person obtaining a copy
4	* of this software and associated documentation files (the "Software"), to deal
5	* in the Software without restriction, including without limitation the rights
6	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7	* copies of the Software, and to permit persons to whom the Software is
8	* furnished to do so, subject to the following conditions:
9	*
10	* The above copyright notice and this permission notice shall be included in
11	* all copies or substantial portions of the Software.
12	*
13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19	* THE SOFTWARE.
20	*
21	*===-----------------------------------------------------------------------===
22	*/
23
24	#if !defined __IMMINTRIN_H
25	#error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."
26	#endif
27
28	#ifndef __F16CINTRIN_H
29	#define __F16CINTRIN_H
30
31	/* Define the default attributes for the functions in this file. */
32	#define __DEFAULT_FN_ATTRS128 \
33	__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
34	#define __DEFAULT_FN_ATTRS256 \
35	__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
36
37	/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
38	* but that's because icc can emulate these without f16c using a library call.
39	* Since we don't do that let's leave these in f16cintrin.h.
40	*/
41
42	/// Converts a 16-bit half-precision float value into a 32-bit float
43	/// value.
44	///
45	/// \headerfile <x86intrin.h>
46	///
47	/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
48	///
49	/// \param __a
50	/// A 16-bit half-precision float value.
51	/// \returns The converted 32-bit float value.
52	static __inline float __DEFAULT_FN_ATTRS128
53	_cvtsh_ss(unsigned short __a)
54	{
55	__v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
56	__v4sf r = __builtin_ia32_vcvtph2ps(v);
57	return r[0];
58	}
59
60	/// Converts a 32-bit single-precision float value to a 16-bit
61	/// half-precision float value.
62	///
63	/// \headerfile <x86intrin.h>
64	///
65	/// \code
66	/// unsigned short _cvtss_sh(float a, const int imm);
67	/// \endcode
68	///
69	/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
70	///
71	/// \param a
72	/// A 32-bit single-precision float value to be converted to a 16-bit
73	/// half-precision float value.
74	/// \param imm
75	/// An immediate value controlling rounding using bits [2:0]: \n
76	/// 000: Nearest \n
77	/// 001: Down \n
78	/// 010: Up \n
79	/// 011: Truncate \n
80	/// 1XX: Use MXCSR.RC for rounding
81	/// \returns The converted 16-bit half-precision float value.
82	#define _cvtss_sh(a, imm) \
83	(unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
84	(imm)))[0])
85
86	/// Converts a 128-bit vector containing 32-bit float values into a
87	/// 128-bit vector containing 16-bit half-precision float values.
88	///
89	/// \headerfile <x86intrin.h>
90	///
91	/// \code
92	/// __m128i _mm_cvtps_ph(__m128 a, const int imm);
93	/// \endcode
94	///
95	/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
96	///
97	/// \param a
98	/// A 128-bit vector containing 32-bit float values.
99	/// \param imm
100	/// An immediate value controlling rounding using bits [2:0]: \n
101	/// 000: Nearest \n
102	/// 001: Down \n
103	/// 010: Up \n
104	/// 011: Truncate \n
105	/// 1XX: Use MXCSR.RC for rounding
106	/// \returns A 128-bit vector containing converted 16-bit half-precision float
107	/// values. The lower 64 bits are used to store the converted 16-bit
108	/// half-precision floating-point values.
109	#define _mm_cvtps_ph(a, imm) \
110	(__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))
111
112	/// Converts a 128-bit vector containing 16-bit half-precision float
113	/// values into a 128-bit vector containing 32-bit float values.
114	///
115	/// \headerfile <x86intrin.h>
116	///
117	/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
118	///
119	/// \param __a
120	/// A 128-bit vector containing 16-bit half-precision float values. The lower
121	/// 64 bits are used in the conversion.
122	/// \returns A 128-bit vector of [4 x float] containing converted float values.
123	static __inline __m128 __DEFAULT_FN_ATTRS128
124	_mm_cvtph_ps(__m128i __a)
125	{
126	return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
127	}
128
129	/// Converts a 256-bit vector of [8 x float] into a 128-bit vector
130	/// containing 16-bit half-precision float values.
131	///
132	/// \headerfile <x86intrin.h>
133	///
134	/// \code
135	/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
136	/// \endcode
137	///
138	/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
139	///
140	/// \param a
141	/// A 256-bit vector containing 32-bit single-precision float values to be
142	/// converted to 16-bit half-precision float values.
143	/// \param imm
144	/// An immediate value controlling rounding using bits [2:0]: \n
145	/// 000: Nearest \n
146	/// 001: Down \n
147	/// 010: Up \n
148	/// 011: Truncate \n
149	/// 1XX: Use MXCSR.RC for rounding
150	/// \returns A 128-bit vector containing the converted 16-bit half-precision
151	/// float values.
152	#define _mm256_cvtps_ph(a, imm) \
153	(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))
154
155	/// Converts a 128-bit vector containing 16-bit half-precision float
156	/// values into a 256-bit vector of [8 x float].
157	///
158	/// \headerfile <x86intrin.h>
159	///
160	/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
161	///
162	/// \param __a
163	/// A 128-bit vector containing 16-bit half-precision float values to be
164	/// converted to 32-bit single-precision float values.
165	/// \returns A vector of [8 x float] containing the converted 32-bit
166	/// single-precision float values.
167	static __inline __m256 __DEFAULT_FN_ATTRS256
168	_mm256_cvtph_ps(__m128i __a)
169	{
170	return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
171	}
172
173	#undef __DEFAULT_FN_ATTRS128
174	#undef __DEFAULT_FN_ATTRS256
175
176	#endif /* __F16CINTRIN_H */
177

Clang Project