1 | // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s |
2 | |
3 | |
4 | #include <x86intrin.h> |
5 | |
6 | __m128 test_mm_macc_ps(__m128 a, __m128 b, __m128 c) { |
7 | // CHECK-LABEL: test_mm_macc_ps |
8 | // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) |
9 | return _mm_macc_ps(a, b, c); |
10 | } |
11 | |
12 | __m128d test_mm_macc_pd(__m128d a, __m128d b, __m128d c) { |
13 | // CHECK-LABEL: test_mm_macc_pd |
14 | // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) |
15 | return _mm_macc_pd(a, b, c); |
16 | } |
17 | |
18 | __m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) { |
19 | // CHECK-LABEL: test_mm_macc_ss |
20 | // CHECK: extractelement <4 x float> %{{.*}}, i64 0 |
21 | // CHECK: extractelement <4 x float> %{{.*}}, i64 0 |
22 | // CHECK: extractelement <4 x float> %{{.*}}, i64 0 |
23 | // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}) |
24 | // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0 |
25 | return _mm_macc_ss(a, b, c); |
26 | } |
27 | |
28 | __m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) { |
29 | // CHECK-LABEL: test_mm_macc_sd |
30 | // CHECK: extractelement <2 x double> %{{.*}}, i64 0 |
31 | // CHECK: extractelement <2 x double> %{{.*}}, i64 0 |
32 | // CHECK: extractelement <2 x double> %{{.*}}, i64 0 |
33 | // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}}) |
34 | // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0 |
35 | return _mm_macc_sd(a, b, c); |
36 | } |
37 | |
38 | __m128 test_mm_msub_ps(__m128 a, __m128 b, __m128 c) { |
39 | // CHECK-LABEL: test_mm_msub_ps |
40 | // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} |
41 | // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) |
42 | return _mm_msub_ps(a, b, c); |
43 | } |
44 | |
45 | __m128d test_mm_msub_pd(__m128d a, __m128d b, __m128d c) { |
46 | // CHECK-LABEL: test_mm_msub_pd |
47 | // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
48 | // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) |
49 | return _mm_msub_pd(a, b, c); |
50 | } |
51 | |
52 | __m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) { |
53 | // CHECK-LABEL: test_mm_msub_ss |
54 | // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} |
55 | // CHECK: extractelement <4 x float> %{{.*}}, i64 0 |
56 | // CHECK: extractelement <4 x float> %{{.*}}, i64 0 |
57 | // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0 |
58 | // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float [[C]]) |
59 | // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0 |
60 | return _mm_msub_ss(a, b, c); |
61 | } |
62 | |
63 | __m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) { |
64 | // CHECK-LABEL: test_mm_msub_sd |
65 | // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
66 | // CHECK: extractelement <2 x double> %{{.*}}, i64 0 |
67 | // CHECK: extractelement <2 x double> %{{.*}}, i64 0 |
68 | // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0 |
69 | // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double [[C]]) |
70 | // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0 |
71 | return _mm_msub_sd(a, b, c); |
72 | } |
73 | |
74 | __m128 test_mm_nmacc_ps(__m128 a, __m128 b, __m128 c) { |
75 | // CHECK-LABEL: test_mm_nmacc_ps |
76 | // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} |
77 | // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) |
78 | return _mm_nmacc_ps(a, b, c); |
79 | } |
80 | |
81 | __m128d test_mm_nmacc_pd(__m128d a, __m128d b, __m128d c) { |
82 | // CHECK-LABEL: test_mm_nmacc_pd |
83 | // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
84 | // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) |
85 | return _mm_nmacc_pd(a, b, c); |
86 | } |
87 | |
88 | __m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) { |
89 | // CHECK-LABEL: test_mm_nmacc_ss |
90 | // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} |
91 | // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0 |
92 | // CHECK: extractelement <4 x float> %{{.*}}, i64 0 |
93 | // CHECK: extractelement <4 x float> %{{.*}}, i64 0 |
94 | // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float %{{.*}}) |
95 | // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0 |
96 | return _mm_nmacc_ss(a, b, c); |
97 | } |
98 | |
99 | __m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) { |
100 | // CHECK-LABEL: test_mm_nmacc_sd |
101 | // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
102 | // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0 |
103 | // CHECK: extractelement <2 x double> %{{.*}}, i64 0 |
104 | // CHECK: extractelement <2 x double> %{{.*}}, i64 0 |
105 | // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double %{{.*}}) |
106 | // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0 |
107 | return _mm_nmacc_sd(a, b, c); |
108 | } |
109 | |
110 | __m128 test_mm_nmsub_ps(__m128 a, __m128 b, __m128 c) { |
111 | // CHECK-LABEL: test_mm_nmsub_ps |
112 | // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} |
113 | // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} |
114 | // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) |
115 | return _mm_nmsub_ps(a, b, c); |
116 | } |
117 | |
118 | __m128d test_mm_nmsub_pd(__m128d a, __m128d b, __m128d c) { |
119 | // CHECK-LABEL: test_mm_nmsub_pd |
120 | // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
121 | // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
122 | // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) |
123 | return _mm_nmsub_pd(a, b, c); |
124 | } |
125 | |
126 | __m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) { |
127 | // CHECK-LABEL: test_mm_nmsub_ss |
128 | // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} |
129 | // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} |
130 | // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0 |
131 | // CHECK: extractelement <4 x float> %{{.*}}, i64 0 |
132 | // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0 |
133 | // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float [[C]]) |
134 | // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0 |
135 | return _mm_nmsub_ss(a, b, c); |
136 | } |
137 | |
138 | __m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) { |
139 | // CHECK-LABEL: test_mm_nmsub_sd |
140 | // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
141 | // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
142 | // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0 |
143 | // CHECK: extractelement <2 x double> %{{.*}}, i64 0 |
144 | // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0 |
145 | // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double [[C]]) |
146 | // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0 |
147 | return _mm_nmsub_sd(a, b, c); |
148 | } |
149 | |
150 | __m128 test_mm_maddsub_ps(__m128 a, __m128 b, __m128 c) { |
151 | // CHECK-LABEL: test_mm_maddsub_ps |
152 | // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) |
153 | // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} |
154 | // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]] |
155 | // CHECK: shufflevector <4 x float> [[SUB]], <4 x float> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
156 | return _mm_maddsub_ps(a, b, c); |
157 | } |
158 | |
159 | __m128d test_mm_maddsub_pd(__m128d a, __m128d b, __m128d c) { |
160 | // CHECK-LABEL: test_mm_maddsub_pd |
161 | // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) |
162 | // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
163 | // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]] |
164 | // CHECK: shufflevector <2 x double> [[SUB]], <2 x double> [[ADD]], <2 x i32> <i32 0, i32 3> |
165 | return _mm_maddsub_pd(a, b, c); |
166 | } |
167 | |
168 | __m128 test_mm_msubadd_ps(__m128 a, __m128 b, __m128 c) { |
169 | // CHECK-LABEL: test_mm_msubadd_ps |
170 | // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} |
171 | // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]] |
172 | // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) |
173 | // CHECK: shufflevector <4 x float> [[ADD]], <4 x float> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
174 | return _mm_msubadd_ps(a, b, c); |
175 | } |
176 | |
177 | __m128d test_mm_msubadd_pd(__m128d a, __m128d b, __m128d c) { |
178 | // CHECK-LABEL: test_mm_msubadd_pd |
179 | // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
180 | // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]] |
181 | // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) |
182 | // CHECK: shufflevector <2 x double> [[ADD]], <2 x double> [[SUB]], <2 x i32> <i32 0, i32 3> |
183 | return _mm_msubadd_pd(a, b, c); |
184 | } |
185 | |
186 | __m256 test_mm256_macc_ps(__m256 a, __m256 b, __m256 c) { |
187 | // CHECK-LABEL: test_mm256_macc_ps |
188 | // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) |
189 | return _mm256_macc_ps(a, b, c); |
190 | } |
191 | |
192 | __m256d test_mm256_macc_pd(__m256d a, __m256d b, __m256d c) { |
193 | // CHECK-LABEL: test_mm256_macc_pd |
194 | // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) |
195 | return _mm256_macc_pd(a, b, c); |
196 | } |
197 | |
198 | __m256 test_mm256_msub_ps(__m256 a, __m256 b, __m256 c) { |
199 | // CHECK-LABEL: test_mm256_msub_ps |
200 | // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}} |
201 | // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) |
202 | return _mm256_msub_ps(a, b, c); |
203 | } |
204 | |
205 | __m256d test_mm256_msub_pd(__m256d a, __m256d b, __m256d c) { |
206 | // CHECK-LABEL: test_mm256_msub_pd |
207 | // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
208 | // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) |
209 | return _mm256_msub_pd(a, b, c); |
210 | } |
211 | |
212 | __m256 test_mm256_nmacc_ps(__m256 a, __m256 b, __m256 c) { |
213 | // CHECK-LABEL: test_mm256_nmacc_ps |
214 | // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}} |
215 | // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) |
216 | return _mm256_nmacc_ps(a, b, c); |
217 | } |
218 | |
219 | __m256d test_mm256_nmacc_pd(__m256d a, __m256d b, __m256d c) { |
220 | // CHECK-LABEL: test_mm256_nmacc_pd |
221 | // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
222 | // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) |
223 | return _mm256_nmacc_pd(a, b, c); |
224 | } |
225 | |
226 | __m256 test_mm256_nmsub_ps(__m256 a, __m256 b, __m256 c) { |
227 | // CHECK-LABEL: test_mm256_nmsub_ps |
228 | // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}} |
229 | // CHECK: [[NEG2:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}} |
230 | // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) |
231 | return _mm256_nmsub_ps(a, b, c); |
232 | } |
233 | |
234 | __m256d test_mm256_nmsub_pd(__m256d a, __m256d b, __m256d c) { |
235 | // CHECK-LABEL: test_mm256_nmsub_pd |
236 | // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
237 | // CHECK: [[NEG2:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
238 | // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) |
239 | return _mm256_nmsub_pd(a, b, c); |
240 | } |
241 | |
242 | __m256 test_mm256_maddsub_ps(__m256 a, __m256 b, __m256 c) { |
243 | // CHECK-LABEL: test_mm256_maddsub_ps |
244 | // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) |
245 | // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}} |
246 | // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]] |
247 | // CHECK: shufflevector <8 x float> [[SUB]], <8 x float> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> |
248 | return _mm256_maddsub_ps(a, b, c); |
249 | } |
250 | |
251 | __m256d test_mm256_maddsub_pd(__m256d a, __m256d b, __m256d c) { |
252 | // CHECK-LABEL: test_mm256_maddsub_pd |
253 | // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) |
254 | // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
255 | // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) |
256 | // CHECK: shufflevector <4 x double> [[SUB]], <4 x double> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
257 | return _mm256_maddsub_pd(a, b, c); |
258 | } |
259 | |
260 | __m256 test_mm256_msubadd_ps(__m256 a, __m256 b, __m256 c) { |
261 | // CHECK-LABEL: test_mm256_msubadd_ps |
262 | // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}} |
263 | // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]] |
264 | // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) |
265 | // CHECK: shufflevector <8 x float> [[ADD]], <8 x float> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> |
266 | return _mm256_msubadd_ps(a, b, c); |
267 | } |
268 | |
269 | __m256d test_mm256_msubadd_pd(__m256d a, __m256d b, __m256d c) { |
270 | // CHECK-LABEL: test_mm256_msubadd_pd |
271 | // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}} |
272 | // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]] |
273 | // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) |
274 | // CHECK: shufflevector <4 x double> [[ADD]], <4 x double> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
275 | return _mm256_msubadd_pd(a, b, c); |
276 | } |
277 | |