1 | // RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon \ |
2 | // RUN: -fallow-half-arguments-and-returns -disable-O0-optnone -emit-llvm -o - %s \ |
3 | // RUN: | opt -S -mem2reg | FileCheck %s |
4 | |
5 | #include <arm_neon.h> |
6 | |
7 | // CHECK-LABEL: define i8 @test_vget_lane_u8(<8 x i8> %a) #0 { |
8 | // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7 |
9 | // CHECK: ret i8 [[VGET_LANE]] |
10 | uint8_t test_vget_lane_u8(uint8x8_t a) { |
11 | return vget_lane_u8(a, 7); |
12 | } |
13 | |
14 | // CHECK-LABEL: define i16 @test_vget_lane_u16(<4 x i16> %a) #0 { |
15 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
16 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> |
17 | // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 |
18 | // CHECK: ret i16 [[VGET_LANE]] |
19 | uint16_t test_vget_lane_u16(uint16x4_t a) { |
20 | return vget_lane_u16(a, 3); |
21 | } |
22 | |
23 | // CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 { |
24 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
25 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> |
26 | // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 |
27 | // CHECK: ret i32 [[VGET_LANE]] |
28 | uint32_t test_vget_lane_u32(uint32x2_t a) { |
29 | return vget_lane_u32(a, 1); |
30 | } |
31 | |
32 | // CHECK-LABEL: define i8 @test_vget_lane_s8(<8 x i8> %a) #0 { |
33 | // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7 |
34 | // CHECK: ret i8 [[VGET_LANE]] |
35 | int8_t test_vget_lane_s8(int8x8_t a) { |
36 | return vget_lane_s8(a, 7); |
37 | } |
38 | |
39 | // CHECK-LABEL: define i16 @test_vget_lane_s16(<4 x i16> %a) #0 { |
40 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
41 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> |
42 | // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 |
43 | // CHECK: ret i16 [[VGET_LANE]] |
44 | int16_t test_vget_lane_s16(int16x4_t a) { |
45 | return vget_lane_s16(a, 3); |
46 | } |
47 | |
48 | // CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 { |
49 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
50 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> |
51 | // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 |
52 | // CHECK: ret i32 [[VGET_LANE]] |
53 | int32_t test_vget_lane_s32(int32x2_t a) { |
54 | return vget_lane_s32(a, 1); |
55 | } |
56 | |
57 | // CHECK-LABEL: define i8 @test_vget_lane_p8(<8 x i8> %a) #0 { |
58 | // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7 |
59 | // CHECK: ret i8 [[VGET_LANE]] |
60 | poly8_t test_vget_lane_p8(poly8x8_t a) { |
61 | return vget_lane_p8(a, 7); |
62 | } |
63 | |
64 | // CHECK-LABEL: define i16 @test_vget_lane_p16(<4 x i16> %a) #0 { |
65 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
66 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> |
67 | // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 |
68 | // CHECK: ret i16 [[VGET_LANE]] |
69 | poly16_t test_vget_lane_p16(poly16x4_t a) { |
70 | return vget_lane_p16(a, 3); |
71 | } |
72 | |
73 | // CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 { |
74 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
75 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
76 | // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 |
77 | // CHECK: ret float [[VGET_LANE]] |
78 | float32_t test_vget_lane_f32(float32x2_t a) { |
79 | return vget_lane_f32(a, 1); |
80 | } |
81 | |
82 | // CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 { |
83 | // CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8 |
84 | // CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2 |
85 | // CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8 |
86 | // CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>* |
87 | // CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8 |
88 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> |
89 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> |
90 | // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1 |
91 | // CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2 |
92 | // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half* |
93 | // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 |
94 | // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float |
95 | // CHECK: ret float [[CONV]] |
96 | float32_t test_vget_lane_f16(float16x4_t a) { |
97 | return vget_lane_f16(a, 1); |
98 | } |
99 | |
100 | // CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #1 { |
101 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 |
102 | // CHECK: ret i8 [[VGETQ_LANE]] |
103 | uint8_t test_vgetq_lane_u8(uint8x16_t a) { |
104 | return vgetq_lane_u8(a, 15); |
105 | } |
106 | |
107 | // CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #1 { |
108 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
109 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> |
110 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 |
111 | // CHECK: ret i16 [[VGETQ_LANE]] |
112 | uint16_t test_vgetq_lane_u16(uint16x8_t a) { |
113 | return vgetq_lane_u16(a, 7); |
114 | } |
115 | |
116 | // CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #1 { |
117 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
118 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
119 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 |
120 | // CHECK: ret i32 [[VGETQ_LANE]] |
121 | uint32_t test_vgetq_lane_u32(uint32x4_t a) { |
122 | return vgetq_lane_u32(a, 3); |
123 | } |
124 | |
125 | // CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #1 { |
126 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 |
127 | // CHECK: ret i8 [[VGETQ_LANE]] |
128 | int8_t test_vgetq_lane_s8(int8x16_t a) { |
129 | return vgetq_lane_s8(a, 15); |
130 | } |
131 | |
132 | // CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #1 { |
133 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
134 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> |
135 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 |
136 | // CHECK: ret i16 [[VGETQ_LANE]] |
137 | int16_t test_vgetq_lane_s16(int16x8_t a) { |
138 | return vgetq_lane_s16(a, 7); |
139 | } |
140 | |
141 | // CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #1 { |
142 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
143 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
144 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 |
145 | // CHECK: ret i32 [[VGETQ_LANE]] |
146 | int32_t test_vgetq_lane_s32(int32x4_t a) { |
147 | return vgetq_lane_s32(a, 3); |
148 | } |
149 | |
150 | // CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #1 { |
151 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 |
152 | // CHECK: ret i8 [[VGETQ_LANE]] |
153 | poly8_t test_vgetq_lane_p8(poly8x16_t a) { |
154 | return vgetq_lane_p8(a, 15); |
155 | } |
156 | |
157 | // CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #1 { |
158 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
159 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> |
160 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 |
161 | // CHECK: ret i16 [[VGETQ_LANE]] |
162 | poly16_t test_vgetq_lane_p16(poly16x8_t a) { |
163 | return vgetq_lane_p16(a, 7); |
164 | } |
165 | |
166 | // CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #1 { |
167 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
168 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
169 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 |
170 | // CHECK: ret float [[VGETQ_LANE]] |
171 | float32_t test_vgetq_lane_f32(float32x4_t a) { |
172 | return vgetq_lane_f32(a, 3); |
173 | } |
174 | |
175 | // CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #1 { |
176 | // CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16 |
177 | // CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2 |
178 | // CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16 |
179 | // CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>* |
180 | // CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16 |
181 | // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8> |
182 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> |
183 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 |
184 | // CHECK: store i16 [[VGETQ_LANE]], i16* [[__REINT1_244]], align 2 |
185 | // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half* |
186 | // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 |
187 | // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float |
188 | // CHECK: ret float [[CONV]] |
189 | float32_t test_vgetq_lane_f16(float16x8_t a) { |
190 | return vgetq_lane_f16(a, 3); |
191 | } |
192 | |
193 | // CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 { |
194 | // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
195 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
196 | // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 |
197 | // CHECK: ret i64 [[VGET_LANE]] |
198 | int64_t test_vget_lane_s64(int64x1_t a) { |
199 | return vget_lane_s64(a, 0); |
200 | } |
201 | |
202 | // CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 { |
203 | // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
204 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
205 | // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 |
206 | // CHECK: ret i64 [[VGET_LANE]] |
207 | uint64_t test_vget_lane_u64(uint64x1_t a) { |
208 | return vget_lane_u64(a, 0); |
209 | } |
210 | |
211 | // CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #1 { |
212 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
213 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
214 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 |
215 | // CHECK: ret i64 [[VGETQ_LANE]] |
216 | int64_t test_vgetq_lane_s64(int64x2_t a) { |
217 | return vgetq_lane_s64(a, 1); |
218 | } |
219 | |
220 | // CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #1 { |
221 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
222 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
223 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 |
224 | // CHECK: ret i64 [[VGETQ_LANE]] |
225 | uint64_t test_vgetq_lane_u64(uint64x2_t a) { |
226 | return vgetq_lane_u64(a, 1); |
227 | } |
228 | |
229 | |
230 | // CHECK-LABEL: define <8 x i8> @test_vset_lane_u8(i8 %a, <8 x i8> %b) #0 { |
231 | // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7 |
232 | // CHECK: ret <8 x i8> [[VSET_LANE]] |
233 | uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) { |
234 | return vset_lane_u8(a, b, 7); |
235 | } |
236 | |
237 | // CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 %a, <4 x i16> %b) #0 { |
238 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
239 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> |
240 | // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 |
241 | // CHECK: ret <4 x i16> [[VSET_LANE]] |
242 | uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) { |
243 | return vset_lane_u16(a, b, 3); |
244 | } |
245 | |
246 | // CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32> %b) #0 { |
247 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
248 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> |
249 | // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1 |
250 | // CHECK: ret <2 x i32> [[VSET_LANE]] |
251 | uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) { |
252 | return vset_lane_u32(a, b, 1); |
253 | } |
254 | |
255 | // CHECK-LABEL: define <8 x i8> @test_vset_lane_s8(i8 %a, <8 x i8> %b) #0 { |
256 | // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7 |
257 | // CHECK: ret <8 x i8> [[VSET_LANE]] |
258 | int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) { |
259 | return vset_lane_s8(a, b, 7); |
260 | } |
261 | |
262 | // CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 %a, <4 x i16> %b) #0 { |
263 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
264 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> |
265 | // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 |
266 | // CHECK: ret <4 x i16> [[VSET_LANE]] |
267 | int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) { |
268 | return vset_lane_s16(a, b, 3); |
269 | } |
270 | |
271 | // CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32> %b) #0 { |
272 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
273 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> |
274 | // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1 |
275 | // CHECK: ret <2 x i32> [[VSET_LANE]] |
276 | int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) { |
277 | return vset_lane_s32(a, b, 1); |
278 | } |
279 | |
280 | // CHECK-LABEL: define <8 x i8> @test_vset_lane_p8(i8 %a, <8 x i8> %b) #0 { |
281 | // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7 |
282 | // CHECK: ret <8 x i8> [[VSET_LANE]] |
283 | poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) { |
284 | return vset_lane_p8(a, b, 7); |
285 | } |
286 | |
287 | // CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 %a, <4 x i16> %b) #0 { |
288 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
289 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> |
290 | // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 |
291 | // CHECK: ret <4 x i16> [[VSET_LANE]] |
292 | poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) { |
293 | return vset_lane_p16(a, b, 3); |
294 | } |
295 | |
296 | // CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x float> %b) #0 { |
297 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8> |
298 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
299 | // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1 |
300 | // CHECK: ret <2 x float> [[VSET_LANE]] |
301 | float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) { |
302 | return vset_lane_f32(a, b, 1); |
303 | } |
304 | |
305 | // CHECK-LABEL: define <4 x half> @test_vset_lane_f16(half* %a, <4 x half> %b) #0 { |
306 | // CHECK: [[__REINT_246:%.*]] = alloca half, align 2 |
307 | // CHECK: [[__REINT1_246:%.*]] = alloca <4 x half>, align 8 |
308 | // CHECK: [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8 |
309 | // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 |
310 | // CHECK: store half [[TMP0]], half* [[__REINT_246]], align 2 |
311 | // CHECK: store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8 |
312 | // CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16* |
313 | // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 |
314 | // CHECK: [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>* |
315 | // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8 |
316 | // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> |
317 | // CHECK: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> |
318 | // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 3 |
319 | // CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8 |
320 | // CHECK: [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>* |
321 | // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8 |
322 | // CHECK: ret <4 x half> [[TMP8]] |
323 | float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) { |
324 | return vset_lane_f16(*a, b, 3); |
325 | } |
326 | |
327 | // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #1 { |
328 | // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 |
329 | // CHECK: ret <16 x i8> [[VSET_LANE]] |
330 | uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) { |
331 | return vsetq_lane_u8(a, b, 15); |
332 | } |
333 | |
334 | // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #1 { |
335 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
336 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> |
337 | // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 |
338 | // CHECK: ret <8 x i16> [[VSET_LANE]] |
339 | uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) { |
340 | return vsetq_lane_u16(a, b, 7); |
341 | } |
342 | |
343 | // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #1 { |
344 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> |
345 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
346 | // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 |
347 | // CHECK: ret <4 x i32> [[VSET_LANE]] |
348 | uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) { |
349 | return vsetq_lane_u32(a, b, 3); |
350 | } |
351 | |
352 | // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #1 { |
353 | // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 |
354 | // CHECK: ret <16 x i8> [[VSET_LANE]] |
355 | int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) { |
356 | return vsetq_lane_s8(a, b, 15); |
357 | } |
358 | |
359 | // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #1 { |
360 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
361 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> |
362 | // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 |
363 | // CHECK: ret <8 x i16> [[VSET_LANE]] |
364 | int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) { |
365 | return vsetq_lane_s16(a, b, 7); |
366 | } |
367 | |
368 | // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #1 { |
369 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> |
370 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
371 | // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 |
372 | // CHECK: ret <4 x i32> [[VSET_LANE]] |
373 | int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) { |
374 | return vsetq_lane_s32(a, b, 3); |
375 | } |
376 | |
377 | // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #1 { |
378 | // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 |
379 | // CHECK: ret <16 x i8> [[VSET_LANE]] |
380 | poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) { |
381 | return vsetq_lane_p8(a, b, 15); |
382 | } |
383 | |
384 | // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #1 { |
385 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
386 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> |
387 | // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 |
388 | // CHECK: ret <8 x i16> [[VSET_LANE]] |
389 | poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) { |
390 | return vsetq_lane_p16(a, b, 7); |
391 | } |
392 | |
393 | // CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #1 { |
394 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> |
395 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
396 | // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3 |
397 | // CHECK: ret <4 x float> [[VSET_LANE]] |
398 | float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) { |
399 | return vsetq_lane_f32(a, b, 3); |
400 | } |
401 | |
402 | // CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #1 { |
403 | // CHECK: [[__REINT_248:%.*]] = alloca half, align 2 |
404 | // CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16 |
405 | // CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16 |
406 | // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 |
407 | // CHECK: store half [[TMP0]], half* [[__REINT_248]], align 2 |
408 | // CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16 |
409 | // CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16* |
410 | // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 |
411 | // CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>* |
412 | // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16 |
413 | // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> |
414 | // CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> |
415 | // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 7 |
416 | // CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16 |
417 | // CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>* |
418 | // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16 |
419 | // CHECK: ret <8 x half> [[TMP8]] |
420 | float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) { |
421 | return vsetq_lane_f16(*a, b, 7); |
422 | } |
423 | |
424 | // CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64> %b) #0 { |
425 | // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
426 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
427 | // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 |
428 | // CHECK: ret <1 x i64> [[VSET_LANE]] |
429 | int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) { |
430 | return vset_lane_s64(a, b, 0); |
431 | } |
432 | |
433 | // CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64> %b) #0 { |
434 | // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
435 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
436 | // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 |
437 | // CHECK: ret <1 x i64> [[VSET_LANE]] |
438 | uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) { |
439 | return vset_lane_u64(a, b, 0); |
440 | } |
441 | |
442 | // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #1 { |
443 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
444 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
445 | // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 |
446 | // CHECK: ret <2 x i64> [[VSET_LANE]] |
447 | int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) { |
448 | return vsetq_lane_s64(a, b, 1); |
449 | } |
450 | |
451 | // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #1 { |
452 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
453 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
454 | // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 |
455 | // CHECK: ret <2 x i64> [[VSET_LANE]] |
456 | uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) { |
457 | return vsetq_lane_u64(a, b, 1); |
458 | } |
459 | |
460 | // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" |
461 | // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" |
462 | |