1 | // RUN: %clang_cc1 -ffreestanding %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s |
2 | // FIXME: This is testing optimized generation of shuffle instructions and should be fixed. |
3 | |
4 | |
5 | #include <immintrin.h> |
6 | |
7 | // |
8 | // Test LLVM IR codegen of shuffle instructions |
9 | // |
10 | |
11 | __m256 x(__m256 a, __m256 b) { |
12 | // Check if the mask is correct |
13 | // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15> |
14 | return _mm256_shuffle_ps(a, b, 203); |
15 | } |
16 | |
17 | __m128d test_mm_permute_pd(__m128d a) { |
18 | // Check if the mask is correct |
19 | // CHECK: shufflevector{{.*}}<i32 1, i32 0> |
20 | return _mm_permute_pd(a, 1); |
21 | } |
22 | |
23 | __m256d test_mm256_permute_pd(__m256d a) { |
24 | // Check if the mask is correct |
25 | // CHECK: shufflevector{{.*}}<i32 1, i32 0, i32 3, i32 2> |
26 | return _mm256_permute_pd(a, 5); |
27 | } |
28 | |
29 | __m128 test_mm_permute_ps(__m128 a) { |
30 | // Check if the mask is correct |
31 | // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 1, i32 0> |
32 | return _mm_permute_ps(a, 0x1b); |
33 | } |
34 | |
35 | // Test case for PR12401 |
36 | __m128 test_mm_permute_ps2(__m128 a) { |
37 | // Check if the mask is correct |
38 | // CHECK: shufflevector{{.*}}<i32 2, i32 1, i32 2, i32 3> |
39 | return _mm_permute_ps(a, 0xe6); |
40 | } |
41 | |
42 | __m256 test_mm256_permute_ps(__m256 a) { |
43 | // Check if the mask is correct |
44 | // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
45 | return _mm256_permute_ps(a, 0x1b); |
46 | } |
47 | |
48 | __m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) { |
49 | // Check if the mask is correct |
50 | // CHECK: shufflevector{{.*}}<i32 2, i32 3, i32 6, i32 7> |
51 | return _mm256_permute2f128_pd(a, b, 0x31); |
52 | } |
53 | |
54 | __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) { |
55 | // Check if the mask is correct |
56 | // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> |
57 | return _mm256_permute2f128_ps(a, b, 0x13); |
58 | } |
59 | |
60 | __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) { |
61 | // Check if the mask is correct |
62 | // CHECK: shufflevector{{.*}} <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> |
63 | return _mm256_permute2f128_si256(a, b, 0x20); |
64 | } |
65 | |
66 | __m128 |
67 | test_mm_broadcast_ss(float const *__a) { |
68 | // CHECK-LABEL: @test_mm_broadcast_ss |
69 | // CHECK: insertelement <4 x float> {{.*}}, i32 0 |
70 | // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> undef, <4 x i32> zeroinitializer |
71 | return _mm_broadcast_ss(__a); |
72 | } |
73 | |
74 | __m256d |
75 | test_mm256_broadcast_sd(double const *__a) { |
76 | // CHECK-LABEL: @test_mm256_broadcast_sd |
77 | // CHECK: insertelement <4 x double> {{.*}}, i32 0 |
78 | // CHECK: shufflevector <4 x double> {{.*}}, <4 x double> undef, <4 x i32> zeroinitializer |
79 | return _mm256_broadcast_sd(__a); |
80 | } |
81 | |
82 | __m256 |
83 | test_mm256_broadcast_ss(float const *__a) { |
84 | // CHECK-LABEL: @test_mm256_broadcast_ss |
85 | // CHECK: insertelement <8 x float> {{.*}}, i32 0 |
86 | // CHECK: shufflevector <8 x float> {{.*}}, <8 x float> undef, <8 x i32> zeroinitializer |
87 | return _mm256_broadcast_ss(__a); |
88 | } |
89 | |
90 | // Make sure we have the correct mask for each insertf128 case. |
91 | |
92 | __m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) { |
93 | // CHECK-LABEL: @test_mm256_insertf128_ps_0 |
94 | // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7> |
95 | return _mm256_insertf128_ps(a, b, 0); |
96 | } |
97 | |
98 | __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) { |
99 | // CHECK-LABEL: @test_mm256_insertf128_pd_0 |
100 | // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3> |
101 | return _mm256_insertf128_pd(a, b, 0); |
102 | } |
103 | |
104 | __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) { |
105 | // CHECK-LABEL: @test_mm256_insertf128_si256_0 |
106 | // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7> |
107 | return _mm256_insertf128_si256(a, b, 0); |
108 | } |
109 | |
110 | __m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) { |
111 | // CHECK-LABEL: @test_mm256_insertf128_ps_1 |
112 | // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> |
113 | return _mm256_insertf128_ps(a, b, 1); |
114 | } |
115 | |
116 | __m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) { |
117 | // CHECK-LABEL: @test_mm256_insertf128_pd_1 |
118 | // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5> |
119 | return _mm256_insertf128_pd(a, b, 1); |
120 | } |
121 | |
122 | __m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) { |
123 | // CHECK-LABEL: @test_mm256_insertf128_si256_1 |
124 | // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> |
125 | return _mm256_insertf128_si256(a, b, 1); |
126 | } |
127 | |
128 | // Make sure we have the correct mask for each extractf128 case. |
129 | |
130 | __m128 test_mm256_extractf128_ps_0(__m256 a) { |
131 | // CHECK-LABEL: @test_mm256_extractf128_ps_0 |
132 | // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3> |
133 | return _mm256_extractf128_ps(a, 0); |
134 | } |
135 | |
136 | __m128d test_mm256_extractf128_pd_0(__m256d a) { |
137 | // CHECK-LABEL: @test_mm256_extractf128_pd_0 |
138 | // CHECK: shufflevector{{.*}}<i32 0, i32 1> |
139 | return _mm256_extractf128_pd(a, 0); |
140 | } |
141 | |
142 | __m128i test_mm256_extractf128_si256_0(__m256i a) { |
143 | // CHECK-LABEL: @test_mm256_extractf128_si256_0 |
144 | // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3> |
145 | return _mm256_extractf128_si256(a, 0); |
146 | } |
147 | |
148 | __m128 test_mm256_extractf128_ps_1(__m256 a) { |
149 | // CHECK-LABEL: @test_mm256_extractf128_ps_1 |
150 | // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7> |
151 | return _mm256_extractf128_ps(a, 1); |
152 | } |
153 | |
154 | __m128d test_mm256_extractf128_pd_1(__m256d a) { |
155 | // CHECK-LABEL: @test_mm256_extractf128_pd_1 |
156 | // CHECK: shufflevector{{.*}}<i32 2, i32 3> |
157 | return _mm256_extractf128_pd(a, 1); |
158 | } |
159 | |
160 | __m128i test_mm256_extractf128_si256_1(__m256i a) { |
161 | // CHECK-LABEL: @test_mm256_extractf128_si256_1 |
162 | // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7> |
163 | return _mm256_extractf128_si256(a, 1); |
164 | } |
165 | |
166 | __m256 test_mm256_set_m128(__m128 hi, __m128 lo) { |
167 | // CHECK-LABEL: @test_mm256_set_m128 |
168 | // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
169 | return _mm256_set_m128(hi, lo); |
170 | } |
171 | |
172 | __m256d test_mm256_set_m128d(__m128d hi, __m128d lo) { |
173 | // CHECK-LABEL: @test_mm256_set_m128d |
174 | // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3> |
175 | return _mm256_set_m128d(hi, lo); |
176 | } |
177 | |
178 | __m256i test_mm256_set_m128i(__m128i hi, __m128i lo) { |
179 | // CHECK-LABEL: @test_mm256_set_m128i |
180 | // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3> |
181 | return _mm256_set_m128i(hi, lo); |
182 | } |
183 | |
184 | __m256 test_mm256_setr_m128(__m128 hi, __m128 lo) { |
185 | // CHECK-LABEL: @test_mm256_setr_m128 |
186 | // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
187 | return _mm256_setr_m128(lo, hi); |
188 | } |
189 | |
190 | __m256d test_mm256_setr_m128d(__m128d hi, __m128d lo) { |
191 | // CHECK-LABEL: @test_mm256_setr_m128d |
192 | // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3> |
193 | return _mm256_setr_m128d(lo, hi); |
194 | } |
195 | |
196 | __m256i test_mm256_setr_m128i(__m128i hi, __m128i lo) { |
197 | // CHECK-LABEL: @test_mm256_setr_m128i |
198 | // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3> |
199 | return _mm256_setr_m128i(lo, hi); |
200 | } |
201 | |