arm-v8.2a-neon-intrinsics.c source code [clang_source_code/test/CodeGen/arm-v8.2a-neon-intrinsics.c]

1	// RUN: %clang_cc1 -triple armv8.2a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature +fullfp16 \
2	// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \
3	// RUN: \| opt -S -mem2reg \
4	// RUN: \| FileCheck %s
5
6	// REQUIRES: arm-registered-target
7
8	#include <arm_neon.h>
9
10	// CHECK-LABEL: test_vabs_f16
11	// CHECK: [[ABS:%.*]] = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
12	// CHECK: ret <4 x half> [[ABS]]
13	float16x4_t test_vabs_f16(float16x4_t a) {
14	return vabs_f16(a);
15	}
16
17	// CHECK-LABEL: test_vabsq_f16
18	// CHECK: [[ABS:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
19	// CHECK: ret <8 x half> [[ABS]]
20	float16x8_t test_vabsq_f16(float16x8_t a) {
21	return vabsq_f16(a);
22	}
23
24	// CHECK-LABEL: test_vceqz_f16
25	// CHECK: [[TMP1:%.*]] = fcmp oeq <4 x half> %a, zeroinitializer
26	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
27	// CHECK: ret <4 x i16> [[TMP2]]
28	uint16x4_t test_vceqz_f16(float16x4_t a) {
29	return vceqz_f16(a);
30	}
31
32	// CHECK-LABEL: test_vceqzq_f16
33	// CHECK: [[TMP1:%.*]] = fcmp oeq <8 x half> %a, zeroinitializer
34	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
35	// CHECK: ret <8 x i16> [[TMP2]]
36	uint16x8_t test_vceqzq_f16(float16x8_t a) {
37	return vceqzq_f16(a);
38	}
39
40	// CHECK-LABEL: test_vcgez_f16
41	// CHECK: [[TMP1:%.*]] = fcmp oge <4 x half> %a, zeroinitializer
42	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
43	// CHECK: ret <4 x i16> [[TMP2]]
44	uint16x4_t test_vcgez_f16(float16x4_t a) {
45	return vcgez_f16(a);
46	}
47
48	// CHECK-LABEL: test_vcgezq_f16
49	// CHECK: [[TMP1:%.*]] = fcmp oge <8 x half> %a, zeroinitializer
50	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
51	// CHECK: ret <8 x i16> [[TMP2]]
52	uint16x8_t test_vcgezq_f16(float16x8_t a) {
53	return vcgezq_f16(a);
54	}
55
56	// CHECK-LABEL: test_vcgtz_f16
57	// CHECK: [[TMP1:%.*]] = fcmp ogt <4 x half> %a, zeroinitializer
58	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
59	// CHECK: ret <4 x i16> [[TMP2]]
60	uint16x4_t test_vcgtz_f16(float16x4_t a) {
61	return vcgtz_f16(a);
62	}
63
64	// CHECK-LABEL: test_vcgtzq_f16
65	// CHECK: [[TMP1:%.*]] = fcmp ogt <8 x half> %a, zeroinitializer
66	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
67	// CHECK: ret <8 x i16> [[TMP2]]
68	uint16x8_t test_vcgtzq_f16(float16x8_t a) {
69	return vcgtzq_f16(a);
70	}
71
72	// CHECK-LABEL: test_vclez_f16
73	// CHECK: [[TMP1:%.*]] = fcmp ole <4 x half> %a, zeroinitializer
74	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
75	// CHECK: ret <4 x i16> [[TMP2]]
76	uint16x4_t test_vclez_f16(float16x4_t a) {
77	return vclez_f16(a);
78	}
79
80	// CHECK-LABEL: test_vclezq_f16
81	// CHECK: [[TMP1:%.*]] = fcmp ole <8 x half> %a, zeroinitializer
82	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
83	// CHECK: ret <8 x i16> [[TMP2]]
84	uint16x8_t test_vclezq_f16(float16x8_t a) {
85	return vclezq_f16(a);
86	}
87
88	// CHECK-LABEL: test_vcltz_f16
89	// CHECK: [[TMP1:%.*]] = fcmp olt <4 x half> %a, zeroinitializer
90	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
91	// CHECK: ret <4 x i16> [[TMP2]]
92	uint16x4_t test_vcltz_f16(float16x4_t a) {
93	return vcltz_f16(a);
94	}
95
96	// CHECK-LABEL: test_vcltzq_f16
97	// CHECK: [[TMP1:%.*]] = fcmp olt <8 x half> %a, zeroinitializer
98	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
99	// CHECK: ret <8 x i16> [[TMP2]]
100	uint16x8_t test_vcltzq_f16(float16x8_t a) {
101	return vcltzq_f16(a);
102	}
103
104	// CHECK-LABEL: test_vcvt_f16_s16
105	// CHECK: [[VCVT:%.*]] = sitofp <4 x i16> %a to <4 x half>
106	// CHECK: ret <4 x half> [[VCVT]]
107	float16x4_t test_vcvt_f16_s16 (int16x4_t a) {
108	return vcvt_f16_s16(a);
109	}
110
111	// CHECK-LABEL: test_vcvtq_f16_s16
112	// CHECK: [[VCVT:%.*]] = sitofp <8 x i16> %a to <8 x half>
113	// CHECK: ret <8 x half> [[VCVT]]
114	float16x8_t test_vcvtq_f16_s16 (int16x8_t a) {
115	return vcvtq_f16_s16(a);
116	}
117
118	// CHECK-LABEL: test_vcvt_f16_u16
119	// CHECK: [[VCVT:%.*]] = uitofp <4 x i16> %a to <4 x half>
120	// CHECK: ret <4 x half> [[VCVT]]
121	float16x4_t test_vcvt_f16_u16 (uint16x4_t a) {
122	return vcvt_f16_u16(a);
123	}
124
125	// CHECK-LABEL: test_vcvtq_f16_u16
126	// CHECK: [[VCVT:%.*]] = uitofp <8 x i16> %a to <8 x half>
127	// CHECK: ret <8 x half> [[VCVT]]
128	float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) {
129	return vcvtq_f16_u16(a);
130	}
131
132	// CHECK-LABEL: test_vcvt_s16_f16
133	// CHECK: [[VCVT:%.*]] = fptosi <4 x half> %a to <4 x i16>
134	// CHECK: ret <4 x i16> [[VCVT]]
135	int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
136	return vcvt_s16_f16(a);
137	}
138
139	// CHECK-LABEL: test_vcvtq_s16_f16
140	// CHECK: [[VCVT:%.*]] = fptosi <8 x half> %a to <8 x i16>
141	// CHECK: ret <8 x i16> [[VCVT]]
142	int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
143	return vcvtq_s16_f16(a);
144	}
145
146	// CHECK-LABEL: test_vcvt_u16_f16
147	// CHECK: [[VCVT:%.*]] = fptoui <4 x half> %a to <4 x i16>
148	// CHECK: ret <4 x i16> [[VCVT]]
149	int16x4_t test_vcvt_u16_f16 (float16x4_t a) {
150	return vcvt_u16_f16(a);
151	}
152
153	// CHECK-LABEL: test_vcvtq_u16_f16
154	// CHECK: [[VCVT:%.*]] = fptoui <8 x half> %a to <8 x i16>
155	// CHECK: ret <8 x i16> [[VCVT]]
156	int16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
157	return vcvtq_u16_f16(a);
158	}
159
160	// CHECK-LABEL: test_vcvta_s16_f16
161	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half> %a)
162	// CHECK: ret <4 x i16> [[VCVT]]
163	int16x4_t test_vcvta_s16_f16 (float16x4_t a) {
164	return vcvta_s16_f16(a);
165	}
166
167	// CHECK-LABEL: test_vcvta_u16_f16
168	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half> %a)
169	// CHECK: ret <4 x i16> [[VCVT]]
170	int16x4_t test_vcvta_u16_f16 (float16x4_t a) {
171	return vcvta_u16_f16(a);
172	}
173
174	// CHECK-LABEL: test_vcvtaq_s16_f16
175	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a)
176	// CHECK: ret <8 x i16> [[VCVT]]
177	int16x8_t test_vcvtaq_s16_f16 (float16x8_t a) {
178	return vcvtaq_s16_f16(a);
179	}
180
181	// CHECK-LABEL: test_vcvtm_s16_f16
182	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half> %a)
183	// CHECK: ret <4 x i16> [[VCVT]]
184	int16x4_t test_vcvtm_s16_f16 (float16x4_t a) {
185	return vcvtm_s16_f16(a);
186	}
187
188	// CHECK-LABEL: test_vcvtmq_s16_f16
189	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half> %a)
190	// CHECK: ret <8 x i16> [[VCVT]]
191	int16x8_t test_vcvtmq_s16_f16 (float16x8_t a) {
192	return vcvtmq_s16_f16(a);
193	}
194
195	// CHECK-LABEL: test_vcvtm_u16_f16
196	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half> %a)
197	// CHECK: ret <4 x i16> [[VCVT]]
198	uint16x4_t test_vcvtm_u16_f16 (float16x4_t a) {
199	return vcvtm_u16_f16(a);
200	}
201
202	// CHECK-LABEL: test_vcvtmq_u16_f16
203	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half> %a)
204	// CHECK: ret <8 x i16> [[VCVT]]
205	uint16x8_t test_vcvtmq_u16_f16 (float16x8_t a) {
206	return vcvtmq_u16_f16(a);
207	}
208
209	// CHECK-LABEL: test_vcvtn_s16_f16
210	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half> %a)
211	// CHECK: ret <4 x i16> [[VCVT]]
212	int16x4_t test_vcvtn_s16_f16 (float16x4_t a) {
213	return vcvtn_s16_f16(a);
214	}
215
216	// CHECK-LABEL: test_vcvtnq_s16_f16
217	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half> %a)
218	// CHECK: ret <8 x i16> [[VCVT]]
219	int16x8_t test_vcvtnq_s16_f16 (float16x8_t a) {
220	return vcvtnq_s16_f16(a);
221	}
222
223	// CHECK-LABEL: test_vcvtn_u16_f16
224	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half> %a)
225	// CHECK: ret <4 x i16> [[VCVT]]
226	uint16x4_t test_vcvtn_u16_f16 (float16x4_t a) {
227	return vcvtn_u16_f16(a);
228	}
229
230	// CHECK-LABEL: test_vcvtnq_u16_f16
231	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half> %a)
232	// CHECK: ret <8 x i16> [[VCVT]]
233	uint16x8_t test_vcvtnq_u16_f16 (float16x8_t a) {
234	return vcvtnq_u16_f16(a);
235	}
236
237	// CHECK-LABEL: test_vcvtp_s16_f16
238	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half> %a)
239	// CHECK: ret <4 x i16> [[VCVT]]
240	int16x4_t test_vcvtp_s16_f16 (float16x4_t a) {
241	return vcvtp_s16_f16(a);
242	}
243
244	// CHECK-LABEL: test_vcvtpq_s16_f16
245	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half> %a)
246	// CHECK: ret <8 x i16> [[VCVT]]
247	int16x8_t test_vcvtpq_s16_f16 (float16x8_t a) {
248	return vcvtpq_s16_f16(a);
249	}
250
251	// CHECK-LABEL: test_vcvtp_u16_f16
252	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half> %a)
253	// CHECK: ret <4 x i16> [[VCVT]]
254	uint16x4_t test_vcvtp_u16_f16 (float16x4_t a) {
255	return vcvtp_u16_f16(a);
256	}
257
258	// CHECK-LABEL: test_vcvtpq_u16_f16
259	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half> %a)
260	// CHECK: ret <8 x i16> [[VCVT]]
261	uint16x8_t test_vcvtpq_u16_f16 (float16x8_t a) {
262	return vcvtpq_u16_f16(a);
263	}
264
265	// FIXME: Fix the zero constant when fp16 non-storage-only type becomes available.
266	// CHECK-LABEL: test_vneg_f16
267	// CHECK: [[NEG:%.*]] = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
268	// CHECK: ret <4 x half> [[NEG]]
269	float16x4_t test_vneg_f16(float16x4_t a) {
270	return vneg_f16(a);
271	}
272
273	// CHECK-LABEL: test_vnegq_f16
274	// CHECK: [[NEG:%.*]] = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
275	// CHECK: ret <8 x half> [[NEG]]
276	float16x8_t test_vnegq_f16(float16x8_t a) {
277	return vnegq_f16(a);
278	}
279
280	// CHECK-LABEL: test_vrecpe_f16
281	// CHECK: [[RCP:%.*]] = call <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half> %a)
282	// CHECK: ret <4 x half> [[RCP]]
283	float16x4_t test_vrecpe_f16(float16x4_t a) {
284	return vrecpe_f16(a);
285	}
286
287	// CHECK-LABEL: test_vrecpeq_f16
288	// CHECK: [[RCP:%.*]] = call <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half> %a)
289	// CHECK: ret <8 x half> [[RCP]]
290	float16x8_t test_vrecpeq_f16(float16x8_t a) {
291	return vrecpeq_f16(a);
292	}
293
294	// CHECK-LABEL: test_vrnd_f16
295	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half> %a)
296	// CHECK: ret <4 x half> [[RND]]
297	float16x4_t test_vrnd_f16(float16x4_t a) {
298	return vrnd_f16(a);
299	}
300
301	// CHECK-LABEL: test_vrndq_f16
302	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half> %a)
303	// CHECK: ret <8 x half> [[RND]]
304	float16x8_t test_vrndq_f16(float16x8_t a) {
305	return vrndq_f16(a);
306	}
307
308	// CHECK-LABEL: test_vrnda_f16
309	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half> %a)
310	// CHECK: ret <4 x half> [[RND]]
311	float16x4_t test_vrnda_f16(float16x4_t a) {
312	return vrnda_f16(a);
313	}
314
315	// CHECK-LABEL: test_vrndaq_f16
316	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half> %a)
317	// CHECK: ret <8 x half> [[RND]]
318	float16x8_t test_vrndaq_f16(float16x8_t a) {
319	return vrndaq_f16(a);
320	}
321
322	// CHECK-LABEL: test_vrndm_f16
323	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> %a)
324	// CHECK: ret <4 x half> [[RND]]
325	float16x4_t test_vrndm_f16(float16x4_t a) {
326	return vrndm_f16(a);
327	}
328
329	// CHECK-LABEL: test_vrndmq_f16
330	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> %a)
331	// CHECK: ret <8 x half> [[RND]]
332	float16x8_t test_vrndmq_f16(float16x8_t a) {
333	return vrndmq_f16(a);
334	}
335
336	// CHECK-LABEL: test_vrndn_f16
337	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> %a)
338	// CHECK: ret <4 x half> [[RND]]
339	float16x4_t test_vrndn_f16(float16x4_t a) {
340	return vrndn_f16(a);
341	}
342
343	// CHECK-LABEL: test_vrndnq_f16
344	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> %a)
345	// CHECK: ret <8 x half> [[RND]]
346	float16x8_t test_vrndnq_f16(float16x8_t a) {
347	return vrndnq_f16(a);
348	}
349
350	// CHECK-LABEL: test_vrndp_f16
351	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half> %a)
352	// CHECK: ret <4 x half> [[RND]]
353	float16x4_t test_vrndp_f16(float16x4_t a) {
354	return vrndp_f16(a);
355	}
356
357	// CHECK-LABEL: test_vrndpq_f16
358	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half> %a)
359	// CHECK: ret <8 x half> [[RND]]
360	float16x8_t test_vrndpq_f16(float16x8_t a) {
361	return vrndpq_f16(a);
362	}
363
364	// CHECK-LABEL: test_vrndx_f16
365	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half> %a)
366	// CHECK: ret <4 x half> [[RND]]
367	float16x4_t test_vrndx_f16(float16x4_t a) {
368	return vrndx_f16(a);
369	}
370
371	// CHECK-LABEL: test_vrndxq_f16
372	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half> %a)
373	// CHECK: ret <8 x half> [[RND]]
374	float16x8_t test_vrndxq_f16(float16x8_t a) {
375	return vrndxq_f16(a);
376	}
377
378	// CHECK-LABEL: test_vrsqrte_f16
379	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half> %a)
380	// CHECK: ret <4 x half> [[RND]]
381	float16x4_t test_vrsqrte_f16(float16x4_t a) {
382	return vrsqrte_f16(a);
383	}
384
385	// CHECK-LABEL: test_vrsqrteq_f16
386	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half> %a)
387	// CHECK: ret <8 x half> [[RND]]
388	float16x8_t test_vrsqrteq_f16(float16x8_t a) {
389	return vrsqrteq_f16(a);
390	}
391
392	// CHECK-LABEL: test_vadd_f16
393	// CHECK: [[ADD:%.*]] = fadd <4 x half> %a, %b
394	// CHECK: ret <4 x half> [[ADD]]
395	float16x4_t test_vadd_f16(float16x4_t a, float16x4_t b) {
396	return vadd_f16(a, b);
397	}
398
399	// CHECK-LABEL: test_vaddq_f16
400	// CHECK: [[ADD:%.*]] = fadd <8 x half> %a, %b
401	// CHECK: ret <8 x half> [[ADD]]
402	float16x8_t test_vaddq_f16(float16x8_t a, float16x8_t b) {
403	return vaddq_f16(a, b);
404	}
405
406	// CHECK-LABEL: test_vabd_f16
407	// CHECK: [[ABD:%.*]] = call <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half> %a, <4 x half> %b)
408	// CHECK: ret <4 x half> [[ABD]]
409	float16x4_t test_vabd_f16(float16x4_t a, float16x4_t b) {
410	return vabd_f16(a, b);
411	}
412
413	// CHECK-LABEL: test_vabdq_f16
414	// CHECK: [[ABD:%.*]] = call <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half> %a, <8 x half> %b)
415	// CHECK: ret <8 x half> [[ABD]]
416	float16x8_t test_vabdq_f16(float16x8_t a, float16x8_t b) {
417	return vabdq_f16(a, b);
418	}
419
420	// CHECK-LABEL: test_vcage_f16
421	// CHECK: [[ABS:%.*]] = call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
422	// CHECK: ret <4 x i16> [[ABS]]
423	uint16x4_t test_vcage_f16(float16x4_t a, float16x4_t b) {
424	return vcage_f16(a, b);
425	}
426
427	// CHECK-LABEL: test_vcageq_f16
428	// CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
429	// CHECK: ret <8 x i16> [[ABS]]
430	uint16x8_t test_vcageq_f16(float16x8_t a, float16x8_t b) {
431	return vcageq_f16(a, b);
432	}
433
434	// CHECK-LABEL: test_vcagt_f16
435	// CHECK: [[ABS:%.*]] = call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
436	// CHECK: ret <4 x i16> [[ABS]]
437	uint16x4_t test_vcagt_f16(float16x4_t a, float16x4_t b) {
438	return vcagt_f16(a, b);
439	}
440
441	// CHECK-LABEL: test_vcagtq_f16
442	// CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
443	// CHECK: ret <8 x i16> [[ABS]]
444	uint16x8_t test_vcagtq_f16(float16x8_t a, float16x8_t b) {
445	return vcagtq_f16(a, b);
446	}
447
448	// CHECK-LABEL: test_vcale_f16
449	// CHECK: [[ABS:%.*]] = call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
450	// CHECK: ret <4 x i16> [[ABS]]
451	uint16x4_t test_vcale_f16(float16x4_t a, float16x4_t b) {
452	return vcale_f16(a, b);
453	}
454
455	// CHECK-LABEL: test_vcaleq_f16
456	// CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
457	// CHECK: ret <8 x i16> [[ABS]]
458	uint16x8_t test_vcaleq_f16(float16x8_t a, float16x8_t b) {
459	return vcaleq_f16(a, b);
460	}
461
462	// CHECK-LABEL: test_vcalt_f16
463	// CHECK: [[ABS:%.*]] = call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
464	// CHECK: ret <4 x i16> [[ABS]]
465	uint16x4_t test_vcalt_f16(float16x4_t a, float16x4_t b) {
466	return vcalt_f16(a, b);
467	}
468
469	// CHECK-LABEL: test_vcaltq_f16
470	// CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
471	// CHECK: ret <8 x i16> [[ABS]]
472	uint16x8_t test_vcaltq_f16(float16x8_t a, float16x8_t b) {
473	return vcaltq_f16(a, b);
474	}
475
476	// CHECK-LABEL: test_vceq_f16
477	// CHECK: [[TMP1:%.*]] = fcmp oeq <4 x half> %a, %b
478	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
479	// CHECK: ret <4 x i16> [[TMP2]]
480	uint16x4_t test_vceq_f16(float16x4_t a, float16x4_t b) {
481	return vceq_f16(a, b);
482	}
483
484	// CHECK-LABEL: test_vceqq_f16
485	// CHECK: [[TMP1:%.*]] = fcmp oeq <8 x half> %a, %b
486	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
487	// CHECK: ret <8 x i16> [[TMP2]]
488	uint16x8_t test_vceqq_f16(float16x8_t a, float16x8_t b) {
489	return vceqq_f16(a, b);
490	}
491
492	// CHECK-LABEL: test_vcge_f16
493	// CHECK: [[TMP1:%.*]] = fcmp oge <4 x half> %a, %b
494	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
495	// CHECK: ret <4 x i16> [[TMP2]]
496	uint16x4_t test_vcge_f16(float16x4_t a, float16x4_t b) {
497	return vcge_f16(a, b);
498	}
499
500	// CHECK-LABEL: test_vcgeq_f16
501	// CHECK: [[TMP1:%.*]] = fcmp oge <8 x half> %a, %b
502	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
503	// CHECK: ret <8 x i16> [[TMP2]]
504	uint16x8_t test_vcgeq_f16(float16x8_t a, float16x8_t b) {
505	return vcgeq_f16(a, b);
506	}
507
508	// CHECK-LABEL: test_vcgt_f16
509	// CHECK: [[TMP1:%.*]] = fcmp ogt <4 x half> %a, %b
510	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
511	// CHECK: ret <4 x i16> [[TMP2]]
512	uint16x4_t test_vcgt_f16(float16x4_t a, float16x4_t b) {
513	return vcgt_f16(a, b);
514	}
515
516	// CHECK-LABEL: test_vcgtq_f16
517	// CHECK: [[TMP1:%.*]] = fcmp ogt <8 x half> %a, %b
518	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
519	// CHECK: ret <8 x i16> [[TMP2]]
520	uint16x8_t test_vcgtq_f16(float16x8_t a, float16x8_t b) {
521	return vcgtq_f16(a, b);
522	}
523
524	// CHECK-LABEL: test_vcle_f16
525	// CHECK: [[TMP1:%.*]] = fcmp ole <4 x half> %a, %b
526	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
527	// CHECK: ret <4 x i16> [[TMP2]]
528	uint16x4_t test_vcle_f16(float16x4_t a, float16x4_t b) {
529	return vcle_f16(a, b);
530	}
531
532	// CHECK-LABEL: test_vcleq_f16
533	// CHECK: [[TMP1:%.*]] = fcmp ole <8 x half> %a, %b
534	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
535	// CHECK: ret <8 x i16> [[TMP2]]
536	uint16x8_t test_vcleq_f16(float16x8_t a, float16x8_t b) {
537	return vcleq_f16(a, b);
538	}
539
540	// CHECK-LABEL: test_vclt_f16
541	// CHECK: [[TMP1:%.*]] = fcmp olt <4 x half> %a, %b
542	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
543	// CHECK: ret <4 x i16> [[TMP2]]
544	uint16x4_t test_vclt_f16(float16x4_t a, float16x4_t b) {
545	return vclt_f16(a, b);
546	}
547
548	// CHECK-LABEL: test_vcltq_f16
549	// CHECK: [[TMP1:%.*]] = fcmp olt <8 x half> %a, %b
550	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
551	// CHECK: ret <8 x i16> [[TMP2]]
552	uint16x8_t test_vcltq_f16(float16x8_t a, float16x8_t b) {
553	return vcltq_f16(a, b);
554	}
555
556	// CHECK-LABEL: test_vcvt_n_f16_s16
557	// CHECK: [[CVT:%.*]] = call <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16> %vcvt_n, i32 2)
558	// CHECK: ret <4 x half> [[CVT]]
559	float16x4_t test_vcvt_n_f16_s16(int16x4_t a) {
560	return vcvt_n_f16_s16(a, 2);
561	}
562
563	// CHECK-LABEL: test_vcvtq_n_f16_s16
564	// CHECK: [[CVT:%.*]] = call <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16> %vcvt_n, i32 2)
565	// CHECK: ret <8 x half> [[CVT]]
566	float16x8_t test_vcvtq_n_f16_s16(int16x8_t a) {
567	return vcvtq_n_f16_s16(a, 2);
568	}
569
570	// CHECK-LABEL: test_vcvt_n_f16_u16
571	// CHECK: [[CVT:%.*]] = call <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16> %vcvt_n, i32 2)
572	// CHECK: ret <4 x half> [[CVT]]
573	float16x4_t test_vcvt_n_f16_u16(uint16x4_t a) {
574	return vcvt_n_f16_u16(a, 2);
575	}
576
577	// CHECK-LABEL: test_vcvtq_n_f16_u16
578	// CHECK: [[CVT:%.*]] = call <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16> %vcvt_n, i32 2)
579	// CHECK: ret <8 x half> [[CVT]]
580	float16x8_t test_vcvtq_n_f16_u16(uint16x8_t a) {
581	return vcvtq_n_f16_u16(a, 2);
582	}
583
584	// CHECK-LABEL: test_vcvt_n_s16_f16
585	// CHECK: [[CVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> %vcvt_n, i32 2)
586	// CHECK: ret <4 x i16> [[CVT]]
587	int16x4_t test_vcvt_n_s16_f16(float16x4_t a) {
588	return vcvt_n_s16_f16(a, 2);
589	}
590
591	// CHECK-LABEL: test_vcvtq_n_s16_f16
592	// CHECK: [[CVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> %vcvt_n, i32 2)
593	// CHECK: ret <8 x i16> [[CVT]]
594	int16x8_t test_vcvtq_n_s16_f16(float16x8_t a) {
595	return vcvtq_n_s16_f16(a, 2);
596	}
597
598	// CHECK-LABEL: test_vcvt_n_u16_f16
599	// CHECK: [[CVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> %vcvt_n, i32 2)
600	// CHECK: ret <4 x i16> [[CVT]]
601	uint16x4_t test_vcvt_n_u16_f16(float16x4_t a) {
602	return vcvt_n_u16_f16(a, 2);
603	}
604
605	// CHECK-LABEL: test_vcvtq_n_u16_f16
606	// CHECK: [[CVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> %vcvt_n, i32 2)
607	// CHECK: ret <8 x i16> [[CVT]]
608	uint16x8_t test_vcvtq_n_u16_f16(float16x8_t a) {
609	return vcvtq_n_u16_f16(a, 2);
610	}
611
612	// CHECK-LABEL: test_vmax_f16
613	// CHECK: [[MAX:%.*]] = call <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half> %a, <4 x half> %b)
614	// CHECK: ret <4 x half> [[MAX]]
615	float16x4_t test_vmax_f16(float16x4_t a, float16x4_t b) {
616	return vmax_f16(a, b);
617	}
618
619	// CHECK-LABEL: test_vmaxq_f16
620	// CHECK: [[MAX:%.*]] = call <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half> %a, <8 x half> %b)
621	// CHECK: ret <8 x half> [[MAX]]
622	float16x8_t test_vmaxq_f16(float16x8_t a, float16x8_t b) {
623	return vmaxq_f16(a, b);
624	}
625
626	// CHECK-LABEL: test_vmaxnm_f16
627	// CHECK: [[MAX:%.*]] = call <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half> %a, <4 x half> %b)
628	// CHECK: ret <4 x half> [[MAX]]
629	float16x4_t test_vmaxnm_f16(float16x4_t a, float16x4_t b) {
630	return vmaxnm_f16(a, b);
631	}
632
633	// CHECK-LABEL: test_vmaxnmq_f16
634	// CHECK: [[MAX:%.*]] = call <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b)
635	// CHECK: ret <8 x half> [[MAX]]
636	float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) {
637	return vmaxnmq_f16(a, b);
638	}
639
640	// CHECK-LABEL: test_vmin_f16
641	// CHECK: [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half> %a, <4 x half> %b)
642	// CHECK: ret <4 x half> [[MIN]]
643	float16x4_t test_vmin_f16(float16x4_t a, float16x4_t b) {
644	return vmin_f16(a, b);
645	}
646
647	// CHECK-LABEL: test_vminq_f16
648	// CHECK: [[MIN:%.*]] = call <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half> %a, <8 x half> %b)
649	// CHECK: ret <8 x half> [[MIN]]
650	float16x8_t test_vminq_f16(float16x8_t a, float16x8_t b) {
651	return vminq_f16(a, b);
652	}
653
654	// CHECK-LABEL: test_vminnm_f16
655	// CHECK: [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half> %a, <4 x half> %b)
656	// CHECK: ret <4 x half> [[MIN]]
657	float16x4_t test_vminnm_f16(float16x4_t a, float16x4_t b) {
658	return vminnm_f16(a, b);
659	}
660
661	// CHECK-LABEL: test_vminnmq_f16
662	// CHECK: [[MIN:%.*]] = call <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half> %a, <8 x half> %b)
663	// CHECK: ret <8 x half> [[MIN]]
664	float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) {
665	return vminnmq_f16(a, b);
666	}
667
668	// CHECK-LABEL: test_vmul_f16
669	// CHECK: [[MUL:%.*]] = fmul <4 x half> %a, %b
670	// CHECK: ret <4 x half> [[MUL]]
671	float16x4_t test_vmul_f16(float16x4_t a, float16x4_t b) {
672	return vmul_f16(a, b);
673	}
674
675	// CHECK-LABEL: test_vmulq_f16
676	// CHECK: [[MUL:%.*]] = fmul <8 x half> %a, %b
677	// CHECK: ret <8 x half> [[MUL]]
678	float16x8_t test_vmulq_f16(float16x8_t a, float16x8_t b) {
679	return vmulq_f16(a, b);
680	}
681
682	// CHECK-LABEL: test_vpadd_f16
683	// CHECK: [[ADD:%.*]] = call <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half> %a, <4 x half> %b)
684	// CHECK: ret <4 x half> [[ADD]]
685	float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) {
686	return vpadd_f16(a, b);
687	}
688
689	// CHECK-LABEL: test_vpmax_f16
690	// CHECK: [[MAX:%.*]] = call <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half> %a, <4 x half> %b)
691	// CHECK: ret <4 x half> [[MAX]]
692	float16x4_t test_vpmax_f16(float16x4_t a, float16x4_t b) {
693	return vpmax_f16(a, b);
694	}
695
696	// CHECK-LABEL: test_vpmin_f16
697	// CHECK: [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half> %a, <4 x half> %b)
698	// CHECK: ret <4 x half> [[MIN]]
699	float16x4_t test_vpmin_f16(float16x4_t a, float16x4_t b) {
700	return vpmin_f16(a, b);
701	}
702
703	// CHECK-LABEL: test_vrecps_f16
704	// CHECK: [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half> %a, <4 x half> %b)
705	// CHECK: ret <4 x half> [[MIN]]
706	float16x4_t test_vrecps_f16(float16x4_t a, float16x4_t b) {
707	return vrecps_f16(a, b);
708	}
709
710	// CHECK-LABEL: test_vrecpsq_f16
711	// CHECK: [[MIN:%.*]] = call <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half> %a, <8 x half> %b)
712	// CHECK: ret <8 x half> [[MIN]]
713	float16x8_t test_vrecpsq_f16(float16x8_t a, float16x8_t b) {
714	return vrecpsq_f16(a, b);
715	}
716
717	// CHECK-LABEL: test_vrsqrts_f16
718	// CHECK: [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half> %a, <4 x half> %b)
719	// CHECK: ret <4 x half> [[MIN]]
720	float16x4_t test_vrsqrts_f16(float16x4_t a, float16x4_t b) {
721	return vrsqrts_f16(a, b);
722	}
723
724	// CHECK-LABEL: test_vrsqrtsq_f16
725	// CHECK: [[MIN:%.*]] = call <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half> %a, <8 x half> %b)
726	// CHECK: ret <8 x half> [[MIN]]
727	float16x8_t test_vrsqrtsq_f16(float16x8_t a, float16x8_t b) {
728	return vrsqrtsq_f16(a, b);
729	}
730
731	// CHECK-LABEL: test_vsub_f16
732	// CHECK: [[ADD:%.*]] = fsub <4 x half> %a, %b
733	// CHECK: ret <4 x half> [[ADD]]
734	float16x4_t test_vsub_f16(float16x4_t a, float16x4_t b) {
735	return vsub_f16(a, b);
736	}
737
738	// CHECK-LABEL: test_vsubq_f16
739	// CHECK: [[ADD:%.*]] = fsub <8 x half> %a, %b
740	// CHECK: ret <8 x half> [[ADD]]
741	float16x8_t test_vsubq_f16(float16x8_t a, float16x8_t b) {
742	return vsubq_f16(a, b);
743	}
744
745	// CHECK-LABEL: test_vfma_f16
746	// CHECK: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
747	// CHECK: ret <4 x half> [[ADD]]
748	float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
749	return vfma_f16(a, b, c);
750	}
751
752	// CHECK-LABEL: test_vfmaq_f16
753	// CHECK: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
754	// CHECK: ret <8 x half> [[ADD]]
755	float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
756	return vfmaq_f16(a, b, c);
757	}
758
759	// CHECK-LABEL: test_vfms_f16
760	// CHECK: [[SUB:%.*]] = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
761	// CHECK: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a)
762	// CHECK: ret <4 x half> [[ADD]]
763	float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
764	return vfms_f16(a, b, c);
765	}
766
767	// CHECK-LABEL: test_vfmsq_f16
768	// CHECK: [[SUB:%.*]] = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
769	// CHECK: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a)
770	// CHECK: ret <8 x half> [[ADD]]
771	float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
772	return vfmsq_f16(a, b, c);
773	}
774
775	// CHECK-LABEL: test_vmul_lane_f16
776	// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
777	// CHECK: [[MUL:%.*]] = fmul <4 x half> %a, [[TMP0]]
778	// CHECK: ret <4 x half> [[MUL]]
779	float16x4_t test_vmul_lane_f16(float16x4_t a, float16x4_t b) {
780	return vmul_lane_f16(a, b, 3);
781	}
782
783	// CHECK-LABEL: test_vmulq_lane_f16
784	// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
785	// CHECK: [[MUL:%.*]] = fmul <8 x half> %a, [[TMP0]]
786	// CHECK: ret <8 x half> [[MUL]]
787	float16x8_t test_vmulq_lane_f16(float16x8_t a, float16x4_t b) {
788	return vmulq_lane_f16(a, b, 7);
789	}
790
791	// CHECK-LABEL: test_vmul_n_f16
792	// CHECK: [[TMP0:%.]] = insertelement <4 x half> undef, half [[b:%.]], i32 0
793	// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[b]], i32 1
794	// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[b]], i32 2
795	// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[b]], i32 3
796	// CHECK: [[MUL:%.*]] = fmul <4 x half> %a, [[TMP3]]
797	// CHECK: ret <4 x half> [[MUL]]
798	float16x4_t test_vmul_n_f16(float16x4_t a, float16_t b) {
799	return vmul_n_f16(a, b);
800	}
801
802	// CHECK-LABEL: test_vmulq_n_f16
803	// CHECK: [[TMP0:%.]] = insertelement <8 x half> undef, half [[b:%.]], i32 0
804	// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[b]], i32 1
805	// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[b]], i32 2
806	// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[b]], i32 3
807	// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[b]], i32 4
808	// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[b]], i32 5
809	// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[b]], i32 6
810	// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[b]], i32 7
811	// CHECK: [[MUL:%.*]] = fmul <8 x half> %a, [[TMP7]]
812	// CHECK: ret <8 x half> [[MUL]]
813	float16x8_t test_vmulq_n_f16(float16x8_t a, float16_t b) {
814	return vmulq_n_f16(a, b);
815	}
816
817	// CHECK-LABEL: test_vbsl_f16
818	// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
819	// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
820	// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
821	// CHECK: [[VBSL:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
822	// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL]] to <4 x half>
823	// CHECK: ret <4 x half> [[TMP3]]
824	float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) {
825	return vbsl_f16(a, b, c);
826	}
827
828	// CHECK-LABEL: test_vbslq_f16
829	// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
830	// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
831	// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
832	// CHECK: [[VBSL:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
833	// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL]] to <8 x half>
834	// CHECK: ret <8 x half> [[TMP3]]
835	float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) {
836	return vbslq_f16(a, b, c);
837	}
838
839	// CHECK-LABEL: test_vzip_f16
840	// CHECK: [[VZIP0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
841	// CHECK: store <4 x half> [[VZIP0]], <4 x half>* [[addr1:%.*]]
842	// CHECK: [[VZIP1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
843	// CHECK: store <4 x half> [[VZIP1]], <4 x half>* [[addr2:%.*]]
844	float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
845	return vzip_f16(a, b);
846	}
847
848	// CHECK-LABEL: test_vzipq_f16
849	// CHECK: [[VZIP0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
850	// CHECK: store <8 x half> [[VZIP0]], <8 x half>* [[addr1:%.*]]
851	// CHECK: [[VZIP1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
852	// CHECK: store <8 x half> [[VZIP1]], <8 x half>* [[addr2:%.*]]
853	float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
854	return vzipq_f16(a, b);
855	}
856
857	// CHECK-LABEL: test_vuzp_f16
858	// CHECK: [[VUZP0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
859	// CHECK: store <4 x half> [[VUZP0]], <4 x half>* [[addr1:%.*]]
860	// CHECK: [[VUZP1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
861	// CHECK: store <4 x half> [[VUZP1]], <4 x half>* [[addr1:%.*]]
862	float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
863	return vuzp_f16(a, b);
864	}
865
866	// CHECK-LABEL: test_vuzpq_f16
867	// CHECK: [[VUZP0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
868	// CHECK: store <8 x half> [[VUZP0]], <8 x half>* [[addr1:%.*]]
869	// CHECK: [[VUZP1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
870	// CHECK: store <8 x half> [[VUZP1]], <8 x half>* [[addr2:%.*]]
871	float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
872	return vuzpq_f16(a, b);
873	}
874
875	// CHECK-LABEL: test_vtrn_f16
876	// CHECK: [[VTRN0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
877	// CHECK: store <4 x half> [[VTRN0]], <4 x half>* [[addr1:%.*]]
878	// CHECK: [[VTRN1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
879	// CHECK: store <4 x half> [[VTRN1]], <4 x half>* [[addr2:%.*]]
880	float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
881	return vtrn_f16(a, b);
882	}
883
884	// CHECK-LABEL: test_vtrnq_f16
885	// CHECK: [[VTRN0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
886	// CHECK: store <8 x half> [[VTRN0]], <8 x half>* [[addr1:%.*]]
887	// CHECK: [[VTRN1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
888	// CHECK: store <8 x half> [[VTRN1]], <8 x half>* [[addr2:%.*]]
889	float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) {
890	return vtrnq_f16(a, b);
891	}
892
893	// CHECK-LABEL: test_vmov_n_f16
894	// CHECK: [[TMP0:%.]] = insertelement <4 x half> undef, half [[ARG:%.]], i32 0
895	// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[ARG]], i32 1
896	// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[ARG]], i32 2
897	// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[ARG]], i32 3
898	// CHECK: ret <4 x half> [[TMP3]]
899	float16x4_t test_vmov_n_f16(float16_t a) {
900	return vmov_n_f16(a);
901	}
902
903	// CHECK-LABEL: test_vmovq_n_f16
904	// CHECK: [[TMP0:%.]] = insertelement <8 x half> undef, half [[ARG:%.]], i32 0
905	// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[ARG]], i32 1
906	// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[ARG]], i32 2
907	// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[ARG]], i32 3
908	// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[ARG]], i32 4
909	// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[ARG]], i32 5
910	// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[ARG]], i32 6
911	// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[ARG]], i32 7
912	// CHECK: ret <8 x half> [[TMP7]]
913	float16x8_t test_vmovq_n_f16(float16_t a) {
914	return vmovq_n_f16(a);
915	}
916
917	// CHECK-LABEL: test_vdup_n_f16
918	// CHECK: [[TMP0:%.]] = insertelement <4 x half> undef, half [[ARG:%.]], i32 0
919	// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[ARG]], i32 1
920	// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[ARG]], i32 2
921	// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[ARG]], i32 3
922	// CHECK: ret <4 x half> [[TMP3]]
923	float16x4_t test_vdup_n_f16(float16_t a) {
924	return vdup_n_f16(a);
925	}
926
927	// CHECK-LABEL: test_vdupq_n_f16
928	// CHECK: [[TMP0:%.]] = insertelement <8 x half> undef, half [[ARG:%.]], i32 0
929	// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[ARG]], i32 1
930	// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[ARG]], i32 2
931	// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[ARG]], i32 3
932	// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[ARG]], i32 4
933	// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[ARG]], i32 5
934	// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[ARG]], i32 6
935	// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[ARG]], i32 7
936	// CHECK: ret <8 x half> [[TMP7]]
937	float16x8_t test_vdupq_n_f16(float16_t a) {
938	return vdupq_n_f16(a);
939	}
940
941	// CHECK-LABEL: test_vdup_lane_f16
942	// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
943	// CHECK: ret <4 x half> [[SHFL]]
944	float16x4_t test_vdup_lane_f16(float16x4_t a) {
945	return vdup_lane_f16(a, 3);
946	}
947
948	// CHECK-LABEL: test_vdupq_lane_f16
949	// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
950	// CHECK: ret <8 x half> [[SHFL]]
951	float16x8_t test_vdupq_lane_f16(float16x4_t a) {
952	return vdupq_lane_f16(a, 7);
953	}
954
955	// CHECK-LABEL: @test_vext_f16(
956	// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
957	// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
958	// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
959	// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
960	// CHECK: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
961	// CHECK: ret <4 x half> [[VEXT]]
962	float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) {
963	return vext_f16(a, b, 2);
964	}
965
966	// CHECK-LABEL: @test_vextq_f16(
967	// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
968	// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
969	// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
970	// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
971	// CHECK: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
972	// CHECK: ret <8 x half> [[VEXT]]
973	float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) {
974	return vextq_f16(a, b, 5);
975	}
976
977	// CHECK-LABEL: @test_vrev64_f16(
978	// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
979	// CHECK: ret <4 x half> [[SHFL]]
980	float16x4_t test_vrev64_f16(float16x4_t a) {
981	return vrev64_f16(a);
982	}
983
984	// CHECK-LABEL: @test_vrev64q_f16(
985	// CHECK: [[SHFL:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
986	// CHECK: ret <8 x half> [[SHFL]]
987	float16x8_t test_vrev64q_f16(float16x8_t a) {
988	return vrev64q_f16(a);
989	}
990

Clang Project