1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | #ifndef __IMMINTRIN_H |
25 | #error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." |
26 | #endif |
27 | |
28 | #ifndef __FMAINTRIN_H |
29 | #define __FMAINTRIN_H |
30 | |
31 | |
32 | #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) |
33 | #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) |
34 | |
35 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
36 | _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) |
37 | { |
38 | return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
39 | } |
40 | |
41 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
42 | _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) |
43 | { |
44 | return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); |
45 | } |
46 | |
47 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
48 | _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) |
49 | { |
50 | return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
51 | } |
52 | |
53 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
54 | _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) |
55 | { |
56 | return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); |
57 | } |
58 | |
59 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
60 | _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) |
61 | { |
62 | return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
63 | } |
64 | |
65 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
66 | _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) |
67 | { |
68 | return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); |
69 | } |
70 | |
71 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
72 | _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) |
73 | { |
74 | return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
75 | } |
76 | |
77 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
78 | _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) |
79 | { |
80 | return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); |
81 | } |
82 | |
83 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
84 | _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) |
85 | { |
86 | return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
87 | } |
88 | |
89 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
90 | _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) |
91 | { |
92 | return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); |
93 | } |
94 | |
95 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
96 | _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) |
97 | { |
98 | return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); |
99 | } |
100 | |
101 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
102 | _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) |
103 | { |
104 | return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); |
105 | } |
106 | |
107 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
108 | _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) |
109 | { |
110 | return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
111 | } |
112 | |
113 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
114 | _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) |
115 | { |
116 | return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); |
117 | } |
118 | |
119 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
120 | _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) |
121 | { |
122 | return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); |
123 | } |
124 | |
125 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
126 | _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) |
127 | { |
128 | return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); |
129 | } |
130 | |
131 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
132 | _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) |
133 | { |
134 | return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
135 | } |
136 | |
137 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
138 | _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) |
139 | { |
140 | return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); |
141 | } |
142 | |
143 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
144 | _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) |
145 | { |
146 | return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
147 | } |
148 | |
149 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
150 | _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) |
151 | { |
152 | return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); |
153 | } |
154 | |
155 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
156 | _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) |
157 | { |
158 | return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); |
159 | } |
160 | |
161 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
162 | _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) |
163 | { |
164 | return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); |
165 | } |
166 | |
167 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
168 | _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) |
169 | { |
170 | return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); |
171 | } |
172 | |
173 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
174 | _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) |
175 | { |
176 | return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); |
177 | } |
178 | |
179 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
180 | _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) |
181 | { |
182 | return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); |
183 | } |
184 | |
185 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
186 | _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) |
187 | { |
188 | return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); |
189 | } |
190 | |
191 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
192 | _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) |
193 | { |
194 | return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); |
195 | } |
196 | |
197 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
198 | _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) |
199 | { |
200 | return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); |
201 | } |
202 | |
203 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
204 | _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) |
205 | { |
206 | return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); |
207 | } |
208 | |
209 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
210 | _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) |
211 | { |
212 | return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); |
213 | } |
214 | |
215 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
216 | _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) |
217 | { |
218 | return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); |
219 | } |
220 | |
221 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
222 | _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) |
223 | { |
224 | return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); |
225 | } |
226 | |
227 | #undef __DEFAULT_FN_ATTRS128 |
228 | #undef __DEFAULT_FN_ATTRS256 |
229 | |
230 | #endif |
231 | |