1 | // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s |
2 | // RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s |
3 | |
4 | |
5 | #include <immintrin.h> |
6 | |
7 | // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll |
8 | |
9 | __m128 test_mm_add_ps(__m128 A, __m128 B) { |
10 | // CHECK-LABEL: test_mm_add_ps |
11 | // CHECK: fadd <4 x float> |
12 | return _mm_add_ps(A, B); |
13 | } |
14 | |
15 | __m128 test_mm_add_ss(__m128 A, __m128 B) { |
16 | // CHECK-LABEL: test_mm_add_ss |
17 | // CHECK: extractelement <4 x float> %{{.*}}, i32 0 |
18 | // CHECK: extractelement <4 x float> %{{.*}}, i32 0 |
19 | // CHECK: fadd float |
20 | // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 |
21 | return _mm_add_ss(A, B); |
22 | } |
23 | |
24 | __m128 test_mm_and_ps(__m128 A, __m128 B) { |
25 | // CHECK-LABEL: test_mm_and_ps |
26 | // CHECK: and <4 x i32> |
27 | return _mm_and_ps(A, B); |
28 | } |
29 | |
30 | __m128 test_mm_andnot_ps(__m128 A, __m128 B) { |
31 | // CHECK-LABEL: test_mm_andnot_ps |
32 | // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1> |
33 | // CHECK: and <4 x i32> |
34 | return _mm_andnot_ps(A, B); |
35 | } |
36 | |
37 | __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { |
38 | // CHECK-LABEL: @test_mm_cmpeq_ps |
39 | // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> |
40 | // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> |
41 | // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> |
42 | // CHECK-NEXT: ret <4 x float> [[BC]] |
43 | return _mm_cmpeq_ps(__a, __b); |
44 | } |
45 | |
46 | __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) { |
47 | // CHECK-LABEL: @test_mm_cmpeq_ss |
48 | // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0) |
49 | return _mm_cmpeq_ss(__a, __b); |
50 | } |
51 | |
52 | __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { |
53 | // CHECK-LABEL: @test_mm_cmpge_ps |
54 | // CHECK: [[CMP:%.*]] = fcmp ole <4 x float> |
55 | // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> |
56 | // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> |
57 | // CHECK-NEXT: ret <4 x float> [[BC]] |
58 | return _mm_cmpge_ps(__a, __b); |
59 | } |
60 | |
61 | __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) { |
62 | // CHECK-LABEL: @test_mm_cmpge_ss |
63 | // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2) |
64 | // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> |
65 | return _mm_cmpge_ss(__a, __b); |
66 | } |
67 | |
68 | __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { |
69 | // CHECK-LABEL: @test_mm_cmpgt_ps |
70 | // CHECK: [[CMP:%.*]] = fcmp olt <4 x float> |
71 | // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> |
72 | // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> |
73 | // CHECK-NEXT: ret <4 x float> [[BC]] |
74 | return _mm_cmpgt_ps(__a, __b); |
75 | } |
76 | |
77 | __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) { |
78 | // CHECK-LABEL: @test_mm_cmpgt_ss |
79 | // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1) |
80 | // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> |
81 | return _mm_cmpgt_ss(__a, __b); |
82 | } |
83 | |
84 | __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) { |
85 | // CHECK-LABEL: @test_mm_cmple_ps |
86 | // CHECK: [[CMP:%.*]] = fcmp ole <4 x float> |
87 | // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> |
88 | // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> |
89 | // CHECK-NEXT: ret <4 x float> [[BC]] |
90 | return _mm_cmple_ps(__a, __b); |
91 | } |
92 | |
93 | __m128 test_mm_cmple_ss(__m128 __a, __m128 __b) { |
94 | // CHECK-LABEL: @test_mm_cmple_ss |
95 | // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2) |
96 | return _mm_cmple_ss(__a, __b); |
97 | } |
98 | |
99 | __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) { |
100 | // CHECK-LABEL: @test_mm_cmplt_ps |
101 | // CHECK: [[CMP:%.*]] = fcmp olt <4 x float> |
102 | // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> |
103 | // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> |
104 | // CHECK-NEXT: ret <4 x float> [[BC]] |
105 | return _mm_cmplt_ps(__a, __b); |
106 | } |
107 | |
108 | __m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) { |
109 | // CHECK-LABEL: @test_mm_cmplt_ss |
110 | // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1) |
111 | return _mm_cmplt_ss(__a, __b); |
112 | } |
113 | |
114 | __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) { |
115 | // CHECK-LABEL: @test_mm_cmpneq_ps |
116 | // CHECK: [[CMP:%.*]] = fcmp une <4 x float> |
117 | // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> |
118 | // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> |
119 | // CHECK-NEXT: ret <4 x float> [[BC]] |
120 | return _mm_cmpneq_ps(__a, __b); |
121 | } |
122 | |
123 | __m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) { |
124 | // CHECK-LABEL: @test_mm_cmpneq_ss |
125 | // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4) |
126 | return _mm_cmpneq_ss(__a, __b); |
127 | } |
128 | |
129 | __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) { |
130 | // CHECK-LABEL: @test_mm_cmpnge_ps |
131 | // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float> |
132 | // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> |
133 | // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> |
134 | // CHECK-NEXT: ret <4 x float> [[BC]] |
135 | return _mm_cmpnge_ps(__a, __b); |
136 | } |
137 | |
138 | __m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) { |
139 | // CHECK-LABEL: @test_mm_cmpnge_ss |
140 | // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6) |
141 | // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> |
142 | return _mm_cmpnge_ss(__a, __b); |
143 | } |
144 | |
145 | __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) { |
146 | // CHECK-LABEL: @test_mm_cmpngt_ps |
147 | // CHECK: [[CMP:%.*]] = fcmp uge <4 x float> |
148 | // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> |
149 | // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> |
150 | // CHECK-NEXT: ret <4 x float> [[BC]] |
151 | return _mm_cmpngt_ps(__a, __b); |
152 | } |
153 | |
154 | __m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) { |
155 | // CHECK-LABEL: @test_mm_cmpngt_ss |
156 | // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5) |
157 | // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> |
158 | return _mm_cmpngt_ss(__a, __b); |
159 | } |
160 | |
161 | __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) { |
162 | // CHECK-LABEL: @test_mm_cmpnle_ps |
163 | // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float> |
164 | // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> |
165 | // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> |
166 | // CHECK-NEXT: ret <4 x float> [[BC]] |
167 | return _mm_cmpnle_ps(__a, __b); |
168 | } |
169 | |
170 | __m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) { |
171 | // CHECK-LABEL: @test_mm_cmpnle_ss |
172 | // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6) |
173 | return _mm_cmpnle_ss(__a, __b); |
174 | } |
175 | |
176 | __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) { |
177 | // CHECK-LABEL: @test_mm_cmpnlt_ps |
178 | // CHECK: [[CMP:%.*]] = fcmp uge <4 x float> |
179 | // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> |
180 | // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> |
181 | // CHECK-NEXT: ret <4 x float> [[BC]] |
182 | return _mm_cmpnlt_ps(__a, __b); |
183 | } |
184 | |
185 | __m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) { |
186 | // CHECK-LABEL: @test_mm_cmpnlt_ss |
187 | // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5) |
188 | return _mm_cmpnlt_ss(__a, __b); |
189 | } |
190 | |
191 | __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) { |
192 | // CHECK-LABEL: @test_mm_cmpord_ps |
193 | // CHECK: [[CMP:%.*]] = fcmp ord <4 x float> |
194 | // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> |
195 | // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> |
196 | // CHECK-NEXT: ret <4 x float> [[BC]] |
197 | return _mm_cmpord_ps(__a, __b); |
198 | } |
199 | |
200 | __m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) { |
201 | // CHECK-LABEL: @test_mm_cmpord_ss |
202 | // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7) |
203 | return _mm_cmpord_ss(__a, __b); |
204 | } |
205 | |
206 | __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) { |
207 | // CHECK-LABEL: @test_mm_cmpunord_ps |
208 | // CHECK: [[CMP:%.*]] = fcmp uno <4 x float> |
209 | // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> |
210 | // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> |
211 | // CHECK-NEXT: ret <4 x float> [[BC]] |
212 | return _mm_cmpunord_ps(__a, __b); |
213 | } |
214 | |
215 | __m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) { |
216 | // CHECK-LABEL: @test_mm_cmpunord_ss |
217 | // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3) |
218 | return _mm_cmpunord_ss(__a, __b); |
219 | } |
220 | |
221 | int test_mm_comieq_ss(__m128 A, __m128 B) { |
222 | // CHECK-LABEL: test_mm_comieq_ss |
223 | // CHECK: call i32 @llvm.x86.sse.comieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
224 | return _mm_comieq_ss(A, B); |
225 | } |
226 | |
227 | int test_mm_comige_ss(__m128 A, __m128 B) { |
228 | // CHECK-LABEL: test_mm_comige_ss |
229 | // CHECK: call i32 @llvm.x86.sse.comige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
230 | return _mm_comige_ss(A, B); |
231 | } |
232 | |
233 | int test_mm_comigt_ss(__m128 A, __m128 B) { |
234 | // CHECK-LABEL: test_mm_comigt_ss |
235 | // CHECK: call i32 @llvm.x86.sse.comigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
236 | return _mm_comigt_ss(A, B); |
237 | } |
238 | |
239 | int test_mm_comile_ss(__m128 A, __m128 B) { |
240 | // CHECK-LABEL: test_mm_comile_ss |
241 | // CHECK: call i32 @llvm.x86.sse.comile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
242 | return _mm_comile_ss(A, B); |
243 | } |
244 | |
245 | int test_mm_comilt_ss(__m128 A, __m128 B) { |
246 | // CHECK-LABEL: test_mm_comilt_ss |
247 | // CHECK: call i32 @llvm.x86.sse.comilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
248 | return _mm_comilt_ss(A, B); |
249 | } |
250 | |
251 | int test_mm_comineq_ss(__m128 A, __m128 B) { |
252 | // CHECK-LABEL: test_mm_comineq_ss |
253 | // CHECK: call i32 @llvm.x86.sse.comineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
254 | return _mm_comineq_ss(A, B); |
255 | } |
256 | |
257 | int test_mm_cvt_ss2si(__m128 A) { |
258 | // CHECK-LABEL: test_mm_cvt_ss2si |
259 | // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}}) |
260 | return _mm_cvt_ss2si(A); |
261 | } |
262 | |
263 | __m128 test_mm_cvtsi32_ss(__m128 A, int B) { |
264 | // CHECK-LABEL: test_mm_cvtsi32_ss |
265 | // CHECK: sitofp i32 %{{.*}} to float |
266 | // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 |
267 | return _mm_cvtsi32_ss(A, B); |
268 | } |
269 | |
270 | __m128 test_mm_cvtsi64_ss(__m128 A, long long B) { |
271 | // CHECK-LABEL: test_mm_cvtsi64_ss |
272 | // CHECK: sitofp i64 %{{.*}} to float |
273 | // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 |
274 | return _mm_cvtsi64_ss(A, B); |
275 | } |
276 | |
277 | float test_mm_cvtss_f32(__m128 A) { |
278 | // CHECK-LABEL: test_mm_cvtss_f32 |
279 | // CHECK: extractelement <4 x float> %{{.*}}, i32 0 |
280 | return _mm_cvtss_f32(A); |
281 | } |
282 | |
283 | int test_mm_cvtss_si32(__m128 A) { |
284 | // CHECK-LABEL: test_mm_cvtss_si32 |
285 | // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}}) |
286 | return _mm_cvtss_si32(A); |
287 | } |
288 | |
289 | long long test_mm_cvtss_si64(__m128 A) { |
290 | // CHECK-LABEL: test_mm_cvtss_si64 |
291 | // CHECK: call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}}) |
292 | return _mm_cvtss_si64(A); |
293 | } |
294 | |
295 | int test_mm_cvtt_ss2si(__m128 A) { |
296 | // CHECK-LABEL: test_mm_cvtt_ss2si |
297 | // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}}) |
298 | return _mm_cvtt_ss2si(A); |
299 | } |
300 | |
301 | int test_mm_cvttss_si32(__m128 A) { |
302 | // CHECK-LABEL: test_mm_cvttss_si32 |
303 | // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}}) |
304 | return _mm_cvttss_si32(A); |
305 | } |
306 | |
307 | long long test_mm_cvttss_si64(__m128 A) { |
308 | // CHECK-LABEL: test_mm_cvttss_si64 |
309 | // CHECK: call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %{{.*}}) |
310 | return _mm_cvttss_si64(A); |
311 | } |
312 | |
313 | __m128 test_mm_div_ps(__m128 A, __m128 B) { |
314 | // CHECK-LABEL: test_mm_div_ps |
315 | // CHECK: fdiv <4 x float> |
316 | return _mm_div_ps(A, B); |
317 | } |
318 | |
319 | __m128 test_mm_div_ss(__m128 A, __m128 B) { |
320 | // CHECK-LABEL: test_mm_div_ss |
321 | // CHECK: extractelement <4 x float> %{{.*}}, i32 0 |
322 | // CHECK: extractelement <4 x float> %{{.*}}, i32 0 |
323 | // CHECK: fdiv float |
324 | // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 |
325 | return _mm_div_ss(A, B); |
326 | } |
327 | |
328 | unsigned int test_MM_GET_EXCEPTION_MASK() { |
329 | // CHECK-LABEL: test_MM_GET_EXCEPTION_MASK |
330 | // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}}) |
331 | // CHECK: and i32 %{{.*}}, 8064 |
332 | return _MM_GET_EXCEPTION_MASK(); |
333 | } |
334 | |
335 | unsigned int test_MM_GET_EXCEPTION_STATE() { |
336 | // CHECK-LABEL: test_MM_GET_EXCEPTION_STATE |
337 | // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}}) |
338 | // CHECK: and i32 %{{.*}}, 63 |
339 | return _MM_GET_EXCEPTION_STATE(); |
340 | } |
341 | |
342 | unsigned int test_MM_GET_FLUSH_ZERO_MODE() { |
343 | // CHECK-LABEL: test_MM_GET_FLUSH_ZERO_MODE |
344 | // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}}) |
345 | // CHECK: and i32 %{{.*}}, 32768 |
346 | return _MM_GET_FLUSH_ZERO_MODE(); |
347 | } |
348 | |
349 | unsigned int test_MM_GET_ROUNDING_MODE() { |
350 | // CHECK-LABEL: test_MM_GET_ROUNDING_MODE |
351 | // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}}) |
352 | // CHECK: and i32 %{{.*}}, 24576 |
353 | return _MM_GET_ROUNDING_MODE(); |
354 | } |
355 | |
356 | unsigned int test_mm_getcsr() { |
357 | // CHECK-LABEL: test_mm_getcsr |
358 | // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}}) |
359 | // CHECK: load i32 |
360 | return _mm_getcsr(); |
361 | } |
362 | |
363 | __m128 test_mm_load_ps(float* y) { |
364 | // CHECK-LABEL: test_mm_load_ps |
365 | // CHECK: load <4 x float>, <4 x float>* {{.*}}, align 16 |
366 | return _mm_load_ps(y); |
367 | } |
368 | |
369 | __m128 test_mm_load_ps1(float* y) { |
370 | // CHECK-LABEL: test_mm_load_ps1 |
371 | // CHECK: load float, float* %{{.*}}, align 4 |
372 | // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0 |
373 | // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1 |
374 | // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2 |
375 | // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3 |
376 | return _mm_load_ps1(y); |
377 | } |
378 | |
379 | __m128 test_mm_load_ss(float* y) { |
380 | // CHECK-LABEL: test_mm_load_ss |
381 | // CHECK: load float, float* {{.*}}, align 1{{$}} |
382 | // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0 |
383 | // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1 |
384 | // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2 |
385 | // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3 |
386 | return _mm_load_ss(y); |
387 | } |
388 | |
389 | __m128 test_mm_load1_ps(float* y) { |
390 | // CHECK-LABEL: test_mm_load1_ps |
391 | // CHECK: load float, float* %{{.*}}, align 4 |
392 | // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0 |
393 | // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1 |
394 | // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2 |
395 | // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3 |
396 | return _mm_load1_ps(y); |
397 | } |
398 | |
399 | __m128 test_mm_loadh_pi(__m128 x, __m64* y) { |
400 | // CHECK-LABEL: test_mm_loadh_pi |
401 | // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}} |
402 | // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1 |
403 | // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5> |
404 | return _mm_loadh_pi(x,y); |
405 | } |
406 | |
407 | __m128 test_mm_loadl_pi(__m128 x, __m64* y) { |
408 | // CHECK-LABEL: test_mm_loadl_pi |
409 | // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}} |
410 | // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1 |
411 | // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3> |
412 | return _mm_loadl_pi(x,y); |
413 | } |
414 | |
415 | __m128 test_mm_loadr_ps(float* A) { |
416 | // CHECK-LABEL: test_mm_loadr_ps |
417 | // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 16 |
418 | // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
419 | return _mm_loadr_ps(A); |
420 | } |
421 | |
422 | __m128 test_mm_loadu_ps(float* A) { |
423 | // CHECK-LABEL: test_mm_loadu_ps |
424 | // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}} |
425 | return _mm_loadu_ps(A); |
426 | } |
427 | |
428 | __m128 test_mm_max_ps(__m128 A, __m128 B) { |
429 | // CHECK-LABEL: test_mm_max_ps |
430 | // CHECK: @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
431 | return _mm_max_ps(A, B); |
432 | } |
433 | |
434 | __m128 test_mm_max_ss(__m128 A, __m128 B) { |
435 | // CHECK-LABEL: test_mm_max_ss |
436 | // CHECK: @llvm.x86.sse.max.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
437 | return _mm_max_ss(A, B); |
438 | } |
439 | |
440 | __m128 test_mm_min_ps(__m128 A, __m128 B) { |
441 | // CHECK-LABEL: test_mm_min_ps |
442 | // CHECK: @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
443 | return _mm_min_ps(A, B); |
444 | } |
445 | |
446 | __m128 test_mm_min_ss(__m128 A, __m128 B) { |
447 | // CHECK-LABEL: test_mm_min_ss |
448 | // CHECK: @llvm.x86.sse.min.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
449 | return _mm_min_ss(A, B); |
450 | } |
451 | |
452 | __m128 test_mm_move_ss(__m128 A, __m128 B) { |
453 | // CHECK-LABEL: test_mm_move_ss |
454 | // CHECK: extractelement <4 x float> %{{.*}}, i32 0 |
455 | // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 |
456 | return _mm_move_ss(A, B); |
457 | } |
458 | |
459 | __m128 test_mm_movehl_ps(__m128 A, __m128 B) { |
460 | // CHECK-LABEL: test_mm_movehl_ps |
461 | // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3> |
462 | return _mm_movehl_ps(A, B); |
463 | } |
464 | |
465 | __m128 test_mm_movelh_ps(__m128 A, __m128 B) { |
466 | // CHECK-LABEL: test_mm_movelh_ps |
467 | // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5> |
468 | return _mm_movelh_ps(A, B); |
469 | } |
470 | |
471 | int test_mm_movemask_ps(__m128 A) { |
472 | // CHECK-LABEL: test_mm_movemask_ps |
473 | // CHECK: call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}}) |
474 | return _mm_movemask_ps(A); |
475 | } |
476 | |
477 | __m128 test_mm_mul_ps(__m128 A, __m128 B) { |
478 | // CHECK-LABEL: test_mm_mul_ps |
479 | // CHECK: fmul <4 x float> |
480 | return _mm_mul_ps(A, B); |
481 | } |
482 | |
483 | __m128 test_mm_mul_ss(__m128 A, __m128 B) { |
484 | // CHECK-LABEL: test_mm_mul_ss |
485 | // CHECK: extractelement <4 x float> %{{.*}}, i32 0 |
486 | // CHECK: extractelement <4 x float> %{{.*}}, i32 0 |
487 | // CHECK: fmul float |
488 | // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 |
489 | return _mm_mul_ss(A, B); |
490 | } |
491 | |
492 | __m128 test_mm_or_ps(__m128 A, __m128 B) { |
493 | // CHECK-LABEL: test_mm_or_ps |
494 | // CHECK: or <4 x i32> |
495 | return _mm_or_ps(A, B); |
496 | } |
497 | |
498 | void test_mm_prefetch(char const* p) { |
499 | // CHECK-LABEL: test_mm_prefetch |
500 | // CHECK: call void @llvm.prefetch(i8* {{.*}}, i32 0, i32 0, i32 1) |
501 | _mm_prefetch(p, 0); |
502 | } |
503 | |
504 | __m128 test_mm_rcp_ps(__m128 x) { |
505 | // CHECK-LABEL: test_mm_rcp_ps |
506 | // CHECK: call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> {{.*}}) |
507 | return _mm_rcp_ps(x); |
508 | } |
509 | |
510 | __m128 test_mm_rcp_ss(__m128 x) { |
511 | // CHECK-LABEL: test_mm_rcp_ss |
512 | // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}}) |
513 | return _mm_rcp_ss(x); |
514 | } |
515 | |
516 | __m128 test_mm_rsqrt_ps(__m128 x) { |
517 | // CHECK-LABEL: test_mm_rsqrt_ps |
518 | // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> {{.*}}) |
519 | return _mm_rsqrt_ps(x); |
520 | } |
521 | |
522 | __m128 test_mm_rsqrt_ss(__m128 x) { |
523 | // CHECK-LABEL: test_mm_rsqrt_ss |
524 | // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}}) |
525 | return _mm_rsqrt_ss(x); |
526 | } |
527 | |
528 | void test_MM_SET_EXCEPTION_MASK(unsigned int A) { |
529 | // CHECK-LABEL: test_MM_SET_EXCEPTION_MASK |
530 | // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}}) |
531 | // CHECK: load i32 |
532 | // CHECK: and i32 {{.*}}, -8065 |
533 | // CHECK: or i32 |
534 | // CHECK: store i32 |
535 | // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}}) |
536 | _MM_SET_EXCEPTION_MASK(A); |
537 | } |
538 | |
539 | void test_MM_SET_EXCEPTION_STATE(unsigned int A) { |
540 | // CHECK-LABEL: test_MM_SET_EXCEPTION_STATE |
541 | // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}}) |
542 | // CHECK: load i32 |
543 | // CHECK: and i32 {{.*}}, -64 |
544 | // CHECK: or i32 |
545 | // CHECK: store i32 |
546 | // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}}) |
547 | _MM_SET_EXCEPTION_STATE(A); |
548 | } |
549 | |
550 | void test_MM_SET_FLUSH_ZERO_MODE(unsigned int A) { |
551 | // CHECK-LABEL: test_MM_SET_FLUSH_ZERO_MODE |
552 | // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}}) |
553 | // CHECK: load i32 |
554 | // CHECK: and i32 {{.*}}, -32769 |
555 | // CHECK: or i32 |
556 | // CHECK: store i32 |
557 | // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}}) |
558 | _MM_SET_FLUSH_ZERO_MODE(A); |
559 | } |
560 | |
561 | __m128 test_mm_set_ps(float A, float B, float C, float D) { |
562 | // CHECK-LABEL: test_mm_set_ps |
563 | // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 |
564 | // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 |
565 | // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 |
566 | // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 |
567 | return _mm_set_ps(A, B, C, D); |
568 | } |
569 | |
570 | __m128 test_mm_set_ps1(float A) { |
571 | // CHECK-LABEL: test_mm_set_ps1 |
572 | // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 |
573 | // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 |
574 | // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 |
575 | // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 |
576 | return _mm_set_ps1(A); |
577 | } |
578 | |
579 | void test_MM_SET_ROUNDING_MODE(unsigned int A) { |
580 | // CHECK-LABEL: test_MM_SET_ROUNDING_MODE |
581 | // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}}) |
582 | // CHECK: load i32 |
583 | // CHECK: and i32 {{.*}}, -24577 |
584 | // CHECK: or i32 |
585 | // CHECK: store i32 |
586 | // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}}) |
587 | _MM_SET_ROUNDING_MODE(A); |
588 | } |
589 | |
590 | __m128 test_mm_set_ss(float A) { |
591 | // CHECK-LABEL: test_mm_set_ss |
592 | // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 |
593 | // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 1 |
594 | // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 2 |
595 | // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3 |
596 | return _mm_set_ss(A); |
597 | } |
598 | |
599 | __m128 test_mm_set1_ps(float A) { |
600 | // CHECK-LABEL: test_mm_set1_ps |
601 | // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 |
602 | // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 |
603 | // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 |
604 | // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 |
605 | return _mm_set1_ps(A); |
606 | } |
607 | |
608 | void test_mm_setcsr(unsigned int A) { |
609 | // CHECK-LABEL: test_mm_setcsr |
610 | // CHECK: store i32 |
611 | // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}}) |
612 | _mm_setcsr(A); |
613 | } |
614 | |
615 | __m128 test_mm_setr_ps(float A, float B, float C, float D) { |
616 | // CHECK-LABEL: test_mm_setr_ps |
617 | // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 |
618 | // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 |
619 | // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 |
620 | // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 |
621 | return _mm_setr_ps(A, B, C, D); |
622 | } |
623 | |
624 | __m128 test_mm_setzero_ps() { |
625 | // CHECK-LABEL: test_mm_setzero_ps |
626 | // CHECK: store <4 x float> zeroinitializer |
627 | return _mm_setzero_ps(); |
628 | } |
629 | |
630 | void test_mm_sfence() { |
631 | // CHECK-LABEL: test_mm_sfence |
632 | // CHECK: call void @llvm.x86.sse.sfence() |
633 | _mm_sfence(); |
634 | } |
635 | |
636 | __m128 test_mm_shuffle_ps(__m128 A, __m128 B) { |
637 | // CHECK-LABEL: test_mm_shuffle_ps |
638 | // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 0, i32 4, i32 4> |
639 | return _mm_shuffle_ps(A, B, 0); |
640 | } |
641 | |
642 | __m128 test_mm_sqrt_ps(__m128 x) { |
643 | // CHECK-LABEL: test_mm_sqrt_ps |
644 | // CHECK: call <4 x float> @llvm.sqrt.v4f32(<4 x float> {{.*}}) |
645 | return _mm_sqrt_ps(x); |
646 | } |
647 | |
648 | __m128 test_sqrt_ss(__m128 x) { |
649 | // CHECK: define {{.*}} @test_sqrt_ss |
650 | // CHECK: extractelement <4 x float> {{.*}}, i64 0 |
651 | // CHECK: call float @llvm.sqrt.f32(float {{.*}}) |
652 | // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i64 0 |
653 | return _mm_sqrt_ss(x); |
654 | } |
655 | |
656 | void test_mm_store_ps(float* x, __m128 y) { |
657 | // CHECK-LABEL: test_mm_store_ps |
658 | // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16 |
659 | _mm_store_ps(x, y); |
660 | } |
661 | |
662 | void test_mm_store_ps1(float* x, __m128 y) { |
663 | // CHECK-LABEL: test_mm_store_ps1 |
664 | // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer |
665 | // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16 |
666 | _mm_store_ps1(x, y); |
667 | } |
668 | |
669 | void test_mm_store_ss(float* x, __m128 y) { |
670 | // CHECK-LABEL: test_mm_store_ss |
671 | // CHECK: extractelement <4 x float> {{.*}}, i32 0 |
672 | // CHECK: store float %{{.*}}, float* {{.*}}, align 1{{$}} |
673 | _mm_store_ss(x, y); |
674 | } |
675 | |
676 | void test_mm_store1_ps(float* x, __m128 y) { |
677 | // CHECK-LABEL: test_mm_store1_ps |
678 | // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer |
679 | // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16 |
680 | _mm_store1_ps(x, y); |
681 | } |
682 | |
683 | void test_mm_storeh_pi(__m64* x, __m128 y) { |
684 | // CHECK-LABEL: test_mm_storeh_pi |
685 | // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64> |
686 | // CHECK: extractelement <2 x i64> %{{.*}}, i64 1 |
687 | // CHECK: store i64 %{{.*}}, i64* {{.*}} |
688 | _mm_storeh_pi(x, y); |
689 | } |
690 | |
691 | void test_mm_storel_pi(__m64* x, __m128 y) { |
692 | // CHECK-LABEL: test_mm_storel_pi |
693 | // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64> |
694 | // CHECK: extractelement <2 x i64> %{{.*}}, i64 0 |
695 | // CHECK: store i64 %{{.*}}, i64* {{.*}} |
696 | _mm_storel_pi(x, y); |
697 | } |
698 | |
699 | void test_mm_storer_ps(float* x, __m128 y) { |
700 | // CHECK-LABEL: test_mm_storer_ps |
701 | // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
702 | // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16 |
703 | _mm_storer_ps(x, y); |
704 | } |
705 | |
706 | void test_mm_storeu_ps(float* x, __m128 y) { |
707 | // CHECK-LABEL: test_mm_storeu_ps |
708 | // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}} |
709 | // CHECK-NEXT: ret void |
710 | _mm_storeu_ps(x, y); |
711 | } |
712 | |
713 | void test_mm_stream_ps(float*A, __m128d B) { |
714 | // CHECK-LABEL: test_mm_stream_ps |
715 | // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16, !nontemporal |
716 | _mm_stream_ps(A, B); |
717 | } |
718 | |
719 | __m128 test_mm_sub_ps(__m128 A, __m128 B) { |
720 | // CHECK-LABEL: test_mm_sub_ps |
721 | // CHECK: fsub <4 x float> |
722 | return _mm_sub_ps(A, B); |
723 | } |
724 | |
725 | __m128 test_mm_sub_ss(__m128 A, __m128 B) { |
726 | // CHECK-LABEL: test_mm_sub_ss |
727 | // CHECK: extractelement <4 x float> %{{.*}}, i32 0 |
728 | // CHECK: extractelement <4 x float> %{{.*}}, i32 0 |
729 | // CHECK: fsub float |
730 | // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 |
731 | return _mm_sub_ss(A, B); |
732 | } |
733 | |
734 | void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) { |
735 | // CHECK-LABEL: test_MM_TRANSPOSE4_PS |
736 | // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> |
737 | // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> |
738 | // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> |
739 | // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> |
740 | // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5> |
741 | // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3> |
742 | // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5> |
743 | // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3> |
744 | _MM_TRANSPOSE4_PS(*A, *B, *C, *D); |
745 | } |
746 | |
747 | int test_mm_ucomieq_ss(__m128 A, __m128 B) { |
748 | // CHECK-LABEL: test_mm_ucomieq_ss |
749 | // CHECK: call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
750 | return _mm_ucomieq_ss(A, B); |
751 | } |
752 | |
753 | int test_mm_ucomige_ss(__m128 A, __m128 B) { |
754 | // CHECK-LABEL: test_mm_ucomige_ss |
755 | // CHECK: call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
756 | return _mm_ucomige_ss(A, B); |
757 | } |
758 | |
759 | int test_mm_ucomigt_ss(__m128 A, __m128 B) { |
760 | // CHECK-LABEL: test_mm_ucomigt_ss |
761 | // CHECK: call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
762 | return _mm_ucomigt_ss(A, B); |
763 | } |
764 | |
765 | int test_mm_ucomile_ss(__m128 A, __m128 B) { |
766 | // CHECK-LABEL: test_mm_ucomile_ss |
767 | // CHECK: call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
768 | return _mm_ucomile_ss(A, B); |
769 | } |
770 | |
771 | int test_mm_ucomilt_ss(__m128 A, __m128 B) { |
772 | // CHECK-LABEL: test_mm_ucomilt_ss |
773 | // CHECK: call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
774 | return _mm_ucomilt_ss(A, B); |
775 | } |
776 | |
777 | int test_mm_ucomineq_ss(__m128 A, __m128 B) { |
778 | // CHECK-LABEL: test_mm_ucomineq_ss |
779 | // CHECK: call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) |
780 | return _mm_ucomineq_ss(A, B); |
781 | } |
782 | |
783 | __m128 test_mm_undefined_ps() { |
784 | // CHECK-LABEL: @test_mm_undefined_ps |
785 | // CHECK: ret <4 x float> zeroinitializer |
786 | return _mm_undefined_ps(); |
787 | } |
788 | |
789 | __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) { |
790 | // CHECK-LABEL: test_mm_unpackhi_ps |
791 | // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> |
792 | return _mm_unpackhi_ps(A, B); |
793 | } |
794 | |
795 | __m128 test_mm_unpacklo_ps(__m128 A, __m128 B) { |
796 | // CHECK-LABEL: test_mm_unpacklo_ps |
797 | // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> |
798 | return _mm_unpacklo_ps(A, B); |
799 | } |
800 | |
801 | __m128 test_mm_xor_ps(__m128 A, __m128 B) { |
802 | // CHECK-LABEL: test_mm_xor_ps |
803 | // CHECK: xor <4 x i32> |
804 | return _mm_xor_ps(A, B); |
805 | } |
806 | |