aarch64-poly64.c source code [clang_source_code/test/CodeGen/aarch64-poly64.c]

1	// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2	// RUN: -ffp-contract=fast -disable-O0-optnone -emit-llvm -o - %s \| opt -S -mem2reg \
3	// RUN: \| FileCheck %s
4
5	// Test new aarch64 intrinsics with poly64
6
7	#include <arm_neon.h>
8
9	// CHECK-LABEL: define <1 x i64> @test_vceq_p64(<1 x i64> %a, <1 x i64> %b) #0 {
10	// CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
11	// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
12	// CHECK: ret <1 x i64> [[SEXT_I]]
13	uint64x1_t test_vceq_p64(poly64x1_t a, poly64x1_t b) {
14	return vceq_p64(a, b);
15	}
16
17	// CHECK-LABEL: define <2 x i64> @test_vceqq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
18	// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %a, %b
19	// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
20	// CHECK: ret <2 x i64> [[SEXT_I]]
21	uint64x2_t test_vceqq_p64(poly64x2_t a, poly64x2_t b) {
22	return vceqq_p64(a, b);
23	}
24
25	// CHECK-LABEL: define <1 x i64> @test_vtst_p64(<1 x i64> %a, <1 x i64> %b) #0 {
26	// CHECK: [[TMP4:%.*]] = and <1 x i64> %a, %b
27	// CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
28	// CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
29	// CHECK: ret <1 x i64> [[VTST_I]]
30	uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) {
31	return vtst_p64(a, b);
32	}
33
34	// CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
35	// CHECK: [[TMP4:%.*]] = and <2 x i64> %a, %b
36	// CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
37	// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
38	// CHECK: ret <2 x i64> [[VTST_I]]
39	uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) {
40	return vtstq_p64(a, b);
41	}
42
43	// CHECK-LABEL: define <1 x i64> @test_vbsl_p64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 {
44	// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %a, %b
45	// CHECK: [[TMP3:%.*]] = xor <1 x i64> %a, <i64 -1>
46	// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %c
47	// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
48	// CHECK: ret <1 x i64> [[VBSL5_I]]
49	poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) {
50	return vbsl_p64(a, b, c);
51	}
52
53	// CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #1 {
54	// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %a, %b
55	// CHECK: [[TMP3:%.*]] = xor <2 x i64> %a, <i64 -1, i64 -1>
56	// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c
57	// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
58	// CHECK: ret <2 x i64> [[VBSL5_I]]
59	poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) {
60	return vbslq_p64(a, b, c);
61	}
62
63	// CHECK-LABEL: define i64 @test_vget_lane_p64(<1 x i64> %v) #0 {
64	// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8>
65	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
66	// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
67	// CHECK: ret i64 [[VGET_LANE]]
68	poly64_t test_vget_lane_p64(poly64x1_t v) {
69	return vget_lane_p64(v, 0);
70	}
71
72	// CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #1 {
73	// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
74	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
75	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
76	// CHECK: ret i64 [[VGETQ_LANE]]
77	poly64_t test_vgetq_lane_p64(poly64x2_t v) {
78	return vgetq_lane_p64(v, 1);
79	}
80
81	// CHECK-LABEL: define <1 x i64> @test_vset_lane_p64(i64 %a, <1 x i64> %v) #0 {
82	// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8>
83	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
84	// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
85	// CHECK: ret <1 x i64> [[VSET_LANE]]
86	poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) {
87	return vset_lane_p64(a, v, 0);
88	}
89
90	// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #1 {
91	// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
92	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
93	// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
94	// CHECK: ret <2 x i64> [[VSET_LANE]]
95	poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) {
96	return vsetq_lane_p64(a, v, 1);
97	}
98
99	// CHECK-LABEL: define <1 x i64> @test_vcopy_lane_p64(<1 x i64> %a, <1 x i64> %b) #0 {
100	// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
101	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
102	// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
103	// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %a to <8 x i8>
104	// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
105	// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 0
106	// CHECK: ret <1 x i64> [[VSET_LANE]]
107	poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
108	return vcopy_lane_p64(a, 0, b, 0);
109
110	}
111
112	// CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #1 {
113	// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
114	// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
115	// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
116	// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8>
117	// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
118	// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 1
119	// CHECK: ret <2 x i64> [[VSET_LANE]]
120	poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) {
121	return vcopyq_lane_p64(a, 1, b, 0);
122	}
123
124	// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
125	// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
126	// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
127	// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
128	// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8>
129	// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
130	// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 1
131	// CHECK: ret <2 x i64> [[VSET_LANE]]
132	poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) {
133	return vcopyq_laneq_p64(a, 1, b, 1);
134	}
135
136	// CHECK-LABEL: define <1 x i64> @test_vcreate_p64(i64 %a) #0 {
137	// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
138	// CHECK: ret <1 x i64> [[TMP0]]
139	poly64x1_t test_vcreate_p64(uint64_t a) {
140	return vcreate_p64(a);
141	}
142
143	// CHECK-LABEL: define <1 x i64> @test_vdup_n_p64(i64 %a) #0 {
144	// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
145	// CHECK: ret <1 x i64> [[VECINIT_I]]
146	poly64x1_t test_vdup_n_p64(poly64_t a) {
147	return vdup_n_p64(a);
148	}
149	// CHECK-LABEL: define <2 x i64> @test_vdupq_n_p64(i64 %a) #1 {
150	// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
151	// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
152	// CHECK: ret <2 x i64> [[VECINIT1_I]]
153	poly64x2_t test_vdupq_n_p64(poly64_t a) {
154	return vdupq_n_p64(a);
155	}
156
157	// CHECK-LABEL: define <1 x i64> @test_vmov_n_p64(i64 %a) #0 {
158	// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
159	// CHECK: ret <1 x i64> [[VECINIT_I]]
160	poly64x1_t test_vmov_n_p64(poly64_t a) {
161	return vmov_n_p64(a);
162	}
163
164	// CHECK-LABEL: define <2 x i64> @test_vmovq_n_p64(i64 %a) #1 {
165	// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
166	// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
167	// CHECK: ret <2 x i64> [[VECINIT1_I]]
168	poly64x2_t test_vmovq_n_p64(poly64_t a) {
169	return vmovq_n_p64(a);
170	}
171
172	// CHECK-LABEL: define <1 x i64> @test_vdup_lane_p64(<1 x i64> %vec) #0 {
173	// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <1 x i32> zeroinitializer
174	// CHECK: ret <1 x i64> [[SHUFFLE]]
175	poly64x1_t test_vdup_lane_p64(poly64x1_t vec) {
176	return vdup_lane_p64(vec, 0);
177	}
178
179	// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #1 {
180	// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <2 x i32> zeroinitializer
181	// CHECK: ret <2 x i64> [[SHUFFLE]]
182	poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) {
183	return vdupq_lane_p64(vec, 0);
184	}
185
186	// CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #1 {
187	// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i64> %vec, <2 x i64> %vec, <2 x i32> <i32 1, i32 1>
188	// CHECK: ret <2 x i64> [[SHUFFLE]]
189	poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) {
190	return vdupq_laneq_p64(vec, 1);
191	}
192
193	// CHECK-LABEL: define <2 x i64> @test_vcombine_p64(<1 x i64> %low, <1 x i64> %high) #1 {
194	// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %low, <1 x i64> %high, <2 x i32> <i32 0, i32 1>
195	// CHECK: ret <2 x i64> [[SHUFFLE_I]]
196	poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) {
197	return vcombine_p64(low, high);
198	}
199
200	// CHECK-LABEL: define <1 x i64> @test_vld1_p64(i64* %ptr) #0 {
201	// CHECK: [[TMP0:%.]] = bitcast i64 %ptr to i8*
202	// CHECK: [[TMP1:%.]] = bitcast i8 [[TMP0]] to <1 x i64>*
203	// CHECK: [[TMP2:%.]] = load <1 x i64>, <1 x i64> [[TMP1]]
204	// CHECK: ret <1 x i64> [[TMP2]]
205	poly64x1_t test_vld1_p64(poly64_t const * ptr) {
206	return vld1_p64(ptr);
207	}
208
209	// CHECK-LABEL: define <2 x i64> @test_vld1q_p64(i64* %ptr) #1 {
210	// CHECK: [[TMP0:%.]] = bitcast i64 %ptr to i8*
211	// CHECK: [[TMP1:%.]] = bitcast i8 [[TMP0]] to <2 x i64>*
212	// CHECK: [[TMP2:%.]] = load <2 x i64>, <2 x i64> [[TMP1]]
213	// CHECK: ret <2 x i64> [[TMP2]]
214	poly64x2_t test_vld1q_p64(poly64_t const * ptr) {
215	return vld1q_p64(ptr);
216	}
217
218	// CHECK-LABEL: define void @test_vst1_p64(i64* %ptr, <1 x i64> %val) #0 {
219	// CHECK: [[TMP0:%.]] = bitcast i64 %ptr to i8*
220	// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %val to <8 x i8>
221	// CHECK: [[TMP2:%.]] = bitcast i8 [[TMP0]] to <1 x i64>*
222	// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
223	// CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
224	// CHECK: ret void
225	void test_vst1_p64(poly64_t * ptr, poly64x1_t val) {
226	return vst1_p64(ptr, val);
227	}
228
229	// CHECK-LABEL: define void @test_vst1q_p64(i64* %ptr, <2 x i64> %val) #1 {
230	// CHECK: [[TMP0:%.]] = bitcast i64 %ptr to i8*
231	// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %val to <16 x i8>
232	// CHECK: [[TMP2:%.]] = bitcast i8 [[TMP0]] to <2 x i64>*
233	// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
234	// CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
235	// CHECK: ret void
236	void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) {
237	return vst1q_p64(ptr, val);
238	}
239
240	// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_p64(i64* %ptr) #2 {
241	// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
242	// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
243	// CHECK: [[TMP0:%.]] = bitcast %struct.poly64x1x2_t [[__RET]] to i8*
244	// CHECK: [[TMP1:%.]] = bitcast i64 %ptr to i8*
245	// CHECK: [[TMP2:%.]] = bitcast i8 [[TMP1]] to <1 x i64>*
246	// CHECK: [[VLD2:%.]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64> [[TMP2]])
247	// CHECK: [[TMP3:%.]] = bitcast i8 [[TMP0]] to { <1 x i64>, <1 x i64> }*
248	// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
249	// CHECK: [[TMP4:%.]] = bitcast %struct.poly64x1x2_t [[RETVAL]] to i8*
250	// CHECK: [[TMP5:%.]] = bitcast %struct.poly64x1x2_t [[__RET]] to i8*
251	// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
252	// CHECK: [[TMP6:%.]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t [[RETVAL]], align 8
253	// CHECK: ret %struct.poly64x1x2_t [[TMP6]]
254	poly64x1x2_t test_vld2_p64(poly64_t const * ptr) {
255	return vld2_p64(ptr);
256	}
257
258	// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_p64(i64* %ptr) #2 {
259	// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
260	// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
261	// CHECK: [[TMP0:%.]] = bitcast %struct.poly64x2x2_t [[__RET]] to i8*
262	// CHECK: [[TMP1:%.]] = bitcast i64 %ptr to i8*
263	// CHECK: [[TMP2:%.]] = bitcast i8 [[TMP1]] to <2 x i64>*
264	// CHECK: [[VLD2:%.]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64> [[TMP2]])
265	// CHECK: [[TMP3:%.]] = bitcast i8 [[TMP0]] to { <2 x i64>, <2 x i64> }*
266	// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
267	// CHECK: [[TMP4:%.]] = bitcast %struct.poly64x2x2_t [[RETVAL]] to i8*
268	// CHECK: [[TMP5:%.]] = bitcast %struct.poly64x2x2_t [[__RET]] to i8*
269	// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
270	// CHECK: [[TMP6:%.]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t [[RETVAL]], align 16
271	// CHECK: ret %struct.poly64x2x2_t [[TMP6]]
272	poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) {
273	return vld2q_p64(ptr);
274	}
275
276	// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_p64(i64* %ptr) #2 {
277	// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
278	// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
279	// CHECK: [[TMP0:%.]] = bitcast %struct.poly64x1x3_t [[__RET]] to i8*
280	// CHECK: [[TMP1:%.]] = bitcast i64 %ptr to i8*
281	// CHECK: [[TMP2:%.]] = bitcast i8 [[TMP1]] to <1 x i64>*
282	// CHECK: [[VLD3:%.]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64> [[TMP2]])
283	// CHECK: [[TMP3:%.]] = bitcast i8 [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
284	// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
285	// CHECK: [[TMP4:%.]] = bitcast %struct.poly64x1x3_t [[RETVAL]] to i8*
286	// CHECK: [[TMP5:%.]] = bitcast %struct.poly64x1x3_t [[__RET]] to i8*
287	// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
288	// CHECK: [[TMP6:%.]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t [[RETVAL]], align 8
289	// CHECK: ret %struct.poly64x1x3_t [[TMP6]]
290	poly64x1x3_t test_vld3_p64(poly64_t const * ptr) {
291	return vld3_p64(ptr);
292	}
293
294	// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_p64(i64* %ptr) #2 {
295	// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
296	// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
297	// CHECK: [[TMP0:%.]] = bitcast %struct.poly64x2x3_t [[__RET]] to i8*
298	// CHECK: [[TMP1:%.]] = bitcast i64 %ptr to i8*
299	// CHECK: [[TMP2:%.]] = bitcast i8 [[TMP1]] to <2 x i64>*
300	// CHECK: [[VLD3:%.]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64> [[TMP2]])
301	// CHECK: [[TMP3:%.]] = bitcast i8 [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
302	// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
303	// CHECK: [[TMP4:%.]] = bitcast %struct.poly64x2x3_t [[RETVAL]] to i8*
304	// CHECK: [[TMP5:%.]] = bitcast %struct.poly64x2x3_t [[__RET]] to i8*
305	// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
306	// CHECK: [[TMP6:%.]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t [[RETVAL]], align 16
307	// CHECK: ret %struct.poly64x2x3_t [[TMP6]]
308	poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) {
309	return vld3q_p64(ptr);
310	}
311
312	// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_p64(i64* %ptr) #2 {
313	// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
314	// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
315	// CHECK: [[TMP0:%.]] = bitcast %struct.poly64x1x4_t [[__RET]] to i8*
316	// CHECK: [[TMP1:%.]] = bitcast i64 %ptr to i8*
317	// CHECK: [[TMP2:%.]] = bitcast i8 [[TMP1]] to <1 x i64>*
318	// CHECK: [[VLD4:%.]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64> [[TMP2]])
319	// CHECK: [[TMP3:%.]] = bitcast i8 [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
320	// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
321	// CHECK: [[TMP4:%.]] = bitcast %struct.poly64x1x4_t [[RETVAL]] to i8*
322	// CHECK: [[TMP5:%.]] = bitcast %struct.poly64x1x4_t [[__RET]] to i8*
323	// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
324	// CHECK: [[TMP6:%.]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t [[RETVAL]], align 8
325	// CHECK: ret %struct.poly64x1x4_t [[TMP6]]
326	poly64x1x4_t test_vld4_p64(poly64_t const * ptr) {
327	return vld4_p64(ptr);
328	}
329
330	// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_p64(i64* %ptr) #2 {
331	// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
332	// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
333	// CHECK: [[TMP0:%.]] = bitcast %struct.poly64x2x4_t [[__RET]] to i8*
334	// CHECK: [[TMP1:%.]] = bitcast i64 %ptr to i8*
335	// CHECK: [[TMP2:%.]] = bitcast i8 [[TMP1]] to <2 x i64>*
336	// CHECK: [[VLD4:%.]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64> [[TMP2]])
337	// CHECK: [[TMP3:%.]] = bitcast i8 [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
338	// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
339	// CHECK: [[TMP4:%.]] = bitcast %struct.poly64x2x4_t [[RETVAL]] to i8*
340	// CHECK: [[TMP5:%.]] = bitcast %struct.poly64x2x4_t [[__RET]] to i8*
341	// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
342	// CHECK: [[TMP6:%.]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t [[RETVAL]], align 16
343	// CHECK: ret %struct.poly64x2x4_t [[TMP6]]
344	poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) {
345	return vld4q_p64(ptr);
346	}
347
348	// CHECK-LABEL: define void @test_vst2_p64(i64* %ptr, [2 x <1 x i64>] %val.coerce) #2 {
349	// CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
350	// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
351	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t [[VAL]], i32 0, i32 0
352	// CHECK: store [2 x <1 x i64>] [[VAL]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
353	// CHECK: [[TMP0:%.]] = bitcast %struct.poly64x1x2_t [[__S1]] to i8*
354	// CHECK: [[TMP1:%.]] = bitcast %struct.poly64x1x2_t [[VAL]] to i8*
355	// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
356	// CHECK: [[TMP2:%.]] = bitcast i64 %ptr to i8*
357	// CHECK: [[VAL1:%.]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t [[__S1]], i32 0, i32 0
358	// CHECK: [[ARRAYIDX:%.]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>] [[VAL1]], i64 0, i64 0
359	// CHECK: [[TMP3:%.]] = load <1 x i64>, <1 x i64> [[ARRAYIDX]], align 8
360	// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
361	// CHECK: [[VAL2:%.]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t [[__S1]], i32 0, i32 0
362	// CHECK: [[ARRAYIDX3:%.]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>] [[VAL2]], i64 0, i64 1
363	// CHECK: [[TMP5:%.]] = load <1 x i64>, <1 x i64> [[ARRAYIDX3]], align 8
364	// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
365	// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
366	// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
367	// CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
368	// CHECK: ret void
369	void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) {
370	return vst2_p64(ptr, val);
371	}
372
373	// CHECK-LABEL: define void @test_vst2q_p64(i64* %ptr, [2 x <2 x i64>] %val.coerce) #2 {
374	// CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
375	// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
376	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t [[VAL]], i32 0, i32 0
377	// CHECK: store [2 x <2 x i64>] [[VAL]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
378	// CHECK: [[TMP0:%.]] = bitcast %struct.poly64x2x2_t [[__S1]] to i8*
379	// CHECK: [[TMP1:%.]] = bitcast %struct.poly64x2x2_t [[VAL]] to i8*
380	// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
381	// CHECK: [[TMP2:%.]] = bitcast i64 %ptr to i8*
382	// CHECK: [[VAL1:%.]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t [[__S1]], i32 0, i32 0
383	// CHECK: [[ARRAYIDX:%.]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] [[VAL1]], i64 0, i64 0
384	// CHECK: [[TMP3:%.]] = load <2 x i64>, <2 x i64> [[ARRAYIDX]], align 16
385	// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
386	// CHECK: [[VAL2:%.]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t [[__S1]], i32 0, i32 0
387	// CHECK: [[ARRAYIDX3:%.]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] [[VAL2]], i64 0, i64 1
388	// CHECK: [[TMP5:%.]] = load <2 x i64>, <2 x i64> [[ARRAYIDX3]], align 16
389	// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
390	// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
391	// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
392	// CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
393	// CHECK: ret void
394	void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) {
395	return vst2q_p64(ptr, val);
396	}
397
398	// CHECK-LABEL: define void @test_vst3_p64(i64* %ptr, [3 x <1 x i64>] %val.coerce) #2 {
399	// CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
400	// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
401	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t [[VAL]], i32 0, i32 0
402	// CHECK: store [3 x <1 x i64>] [[VAL]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
403	// CHECK: [[TMP0:%.]] = bitcast %struct.poly64x1x3_t [[__S1]] to i8*
404	// CHECK: [[TMP1:%.]] = bitcast %struct.poly64x1x3_t [[VAL]] to i8*
405	// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
406	// CHECK: [[TMP2:%.]] = bitcast i64 %ptr to i8*
407	// CHECK: [[VAL1:%.]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t [[__S1]], i32 0, i32 0
408	// CHECK: [[ARRAYIDX:%.]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>] [[VAL1]], i64 0, i64 0
409	// CHECK: [[TMP3:%.]] = load <1 x i64>, <1 x i64> [[ARRAYIDX]], align 8
410	// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
411	// CHECK: [[VAL2:%.]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t [[__S1]], i32 0, i32 0
412	// CHECK: [[ARRAYIDX3:%.]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>] [[VAL2]], i64 0, i64 1
413	// CHECK: [[TMP5:%.]] = load <1 x i64>, <1 x i64> [[ARRAYIDX3]], align 8
414	// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
415	// CHECK: [[VAL4:%.]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t [[__S1]], i32 0, i32 0
416	// CHECK: [[ARRAYIDX5:%.]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>] [[VAL4]], i64 0, i64 2
417	// CHECK: [[TMP7:%.]] = load <1 x i64>, <1 x i64> [[ARRAYIDX5]], align 8
418	// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
419	// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
420	// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
421	// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
422	// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
423	// CHECK: ret void
424	void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) {
425	return vst3_p64(ptr, val);
426	}
427
428	// CHECK-LABEL: define void @test_vst3q_p64(i64* %ptr, [3 x <2 x i64>] %val.coerce) #2 {
429	// CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
430	// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
431	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t [[VAL]], i32 0, i32 0
432	// CHECK: store [3 x <2 x i64>] [[VAL]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
433	// CHECK: [[TMP0:%.]] = bitcast %struct.poly64x2x3_t [[__S1]] to i8*
434	// CHECK: [[TMP1:%.]] = bitcast %struct.poly64x2x3_t [[VAL]] to i8*
435	// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
436	// CHECK: [[TMP2:%.]] = bitcast i64 %ptr to i8*
437	// CHECK: [[VAL1:%.]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t [[__S1]], i32 0, i32 0
438	// CHECK: [[ARRAYIDX:%.]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>] [[VAL1]], i64 0, i64 0
439	// CHECK: [[TMP3:%.]] = load <2 x i64>, <2 x i64> [[ARRAYIDX]], align 16
440	// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
441	// CHECK: [[VAL2:%.]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t [[__S1]], i32 0, i32 0
442	// CHECK: [[ARRAYIDX3:%.]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>] [[VAL2]], i64 0, i64 1
443	// CHECK: [[TMP5:%.]] = load <2 x i64>, <2 x i64> [[ARRAYIDX3]], align 16
444	// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
445	// CHECK: [[VAL4:%.]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t [[__S1]], i32 0, i32 0
446	// CHECK: [[ARRAYIDX5:%.]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>] [[VAL4]], i64 0, i64 2
447	// CHECK: [[TMP7:%.]] = load <2 x i64>, <2 x i64> [[ARRAYIDX5]], align 16
448	// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
449	// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
450	// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
451	// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
452	// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
453	// CHECK: ret void
454	void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) {
455	return vst3q_p64(ptr, val);
456	}
457
458	// CHECK-LABEL: define void @test_vst4_p64(i64* %ptr, [4 x <1 x i64>] %val.coerce) #2 {
459	// CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
460	// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
461	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t [[VAL]], i32 0, i32 0
462	// CHECK: store [4 x <1 x i64>] [[VAL]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
463	// CHECK: [[TMP0:%.]] = bitcast %struct.poly64x1x4_t [[__S1]] to i8*
464	// CHECK: [[TMP1:%.]] = bitcast %struct.poly64x1x4_t [[VAL]] to i8*
465	// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
466	// CHECK: [[TMP2:%.]] = bitcast i64 %ptr to i8*
467	// CHECK: [[VAL1:%.]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t [[__S1]], i32 0, i32 0
468	// CHECK: [[ARRAYIDX:%.]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>] [[VAL1]], i64 0, i64 0
469	// CHECK: [[TMP3:%.]] = load <1 x i64>, <1 x i64> [[ARRAYIDX]], align 8
470	// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
471	// CHECK: [[VAL2:%.]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t [[__S1]], i32 0, i32 0
472	// CHECK: [[ARRAYIDX3:%.]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>] [[VAL2]], i64 0, i64 1
473	// CHECK: [[TMP5:%.]] = load <1 x i64>, <1 x i64> [[ARRAYIDX3]], align 8
474	// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
475	// CHECK: [[VAL4:%.]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t [[__S1]], i32 0, i32 0
476	// CHECK: [[ARRAYIDX5:%.]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>] [[VAL4]], i64 0, i64 2
477	// CHECK: [[TMP7:%.]] = load <1 x i64>, <1 x i64> [[ARRAYIDX5]], align 8
478	// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
479	// CHECK: [[VAL6:%.]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t [[__S1]], i32 0, i32 0
480	// CHECK: [[ARRAYIDX7:%.]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>] [[VAL6]], i64 0, i64 3
481	// CHECK: [[TMP9:%.]] = load <1 x i64>, <1 x i64> [[ARRAYIDX7]], align 8
482	// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
483	// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
484	// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
485	// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
486	// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
487	// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
488	// CHECK: ret void
489	void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) {
490	return vst4_p64(ptr, val);
491	}
492
493	// CHECK-LABEL: define void @test_vst4q_p64(i64* %ptr, [4 x <2 x i64>] %val.coerce) #2 {
494	// CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
495	// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
496	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t [[VAL]], i32 0, i32 0
497	// CHECK: store [4 x <2 x i64>] [[VAL]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
498	// CHECK: [[TMP0:%.]] = bitcast %struct.poly64x2x4_t [[__S1]] to i8*
499	// CHECK: [[TMP1:%.]] = bitcast %struct.poly64x2x4_t [[VAL]] to i8*
500	// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
501	// CHECK: [[TMP2:%.]] = bitcast i64 %ptr to i8*
502	// CHECK: [[VAL1:%.]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t [[__S1]], i32 0, i32 0
503	// CHECK: [[ARRAYIDX:%.]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>] [[VAL1]], i64 0, i64 0
504	// CHECK: [[TMP3:%.]] = load <2 x i64>, <2 x i64> [[ARRAYIDX]], align 16
505	// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
506	// CHECK: [[VAL2:%.]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t [[__S1]], i32 0, i32 0
507	// CHECK: [[ARRAYIDX3:%.]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>] [[VAL2]], i64 0, i64 1
508	// CHECK: [[TMP5:%.]] = load <2 x i64>, <2 x i64> [[ARRAYIDX3]], align 16
509	// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
510	// CHECK: [[VAL4:%.]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t [[__S1]], i32 0, i32 0
511	// CHECK: [[ARRAYIDX5:%.]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>] [[VAL4]], i64 0, i64 2
512	// CHECK: [[TMP7:%.]] = load <2 x i64>, <2 x i64> [[ARRAYIDX5]], align 16
513	// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
514	// CHECK: [[VAL6:%.]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t [[__S1]], i32 0, i32 0
515	// CHECK: [[ARRAYIDX7:%.]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>] [[VAL6]], i64 0, i64 3
516	// CHECK: [[TMP9:%.]] = load <2 x i64>, <2 x i64> [[ARRAYIDX7]], align 16
517	// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
518	// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
519	// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
520	// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
521	// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
522	// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
523	// CHECK: ret void
524	void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) {
525	return vst4q_p64(ptr, val);
526	}
527
528	// CHECK-LABEL: define <1 x i64> @test_vext_p64(<1 x i64> %a, <1 x i64> %b) #0 {
529	// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
530	// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
531	// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
532	// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
533	// CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
534	// CHECK: ret <1 x i64> [[VEXT]]
535	poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) {
536	return vext_u64(a, b, 0);
537
538	}
539
540	// CHECK-LABEL: define <2 x i64> @test_vextq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
541	// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
542	// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
543	// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
544	// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
545	// CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
546	// CHECK: ret <2 x i64> [[VEXT]]
547	poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) {
548	return vextq_p64(a, b, 1);
549	}
550
551	// CHECK-LABEL: define <2 x i64> @test_vzip1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
552	// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
553	// CHECK: ret <2 x i64> [[SHUFFLE_I]]
554	poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) {
555	return vzip1q_p64(a, b);
556	}
557
558	// CHECK-LABEL: define <2 x i64> @test_vzip2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
559	// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
560	// CHECK: ret <2 x i64> [[SHUFFLE_I]]
561	poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) {
562	return vzip2q_u64(a, b);
563	}
564
565	// CHECK-LABEL: define <2 x i64> @test_vuzp1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
566	// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
567	// CHECK: ret <2 x i64> [[SHUFFLE_I]]
568	poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) {
569	return vuzp1q_p64(a, b);
570	}
571
572	// CHECK-LABEL: define <2 x i64> @test_vuzp2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
573	// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
574	// CHECK: ret <2 x i64> [[SHUFFLE_I]]
575	poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) {
576	return vuzp2q_u64(a, b);
577	}
578
579	// CHECK-LABEL: define <2 x i64> @test_vtrn1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
580	// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
581	// CHECK: ret <2 x i64> [[SHUFFLE_I]]
582	poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) {
583	return vtrn1q_p64(a, b);
584	}
585
586	// CHECK-LABEL: define <2 x i64> @test_vtrn2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
587	// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
588	// CHECK: ret <2 x i64> [[SHUFFLE_I]]
589	poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) {
590	return vtrn2q_u64(a, b);
591	}
592
593	// CHECK-LABEL: define <1 x i64> @test_vsri_n_p64(<1 x i64> %a, <1 x i64> %b) #0 {
594	// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
595	// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
596	// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
597	// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
598	// CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 33)
599	// CHECK: ret <1 x i64> [[VSRI_N2]]
600	poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) {
601	return vsri_n_p64(a, b, 33);
602	}
603
604	// CHECK-LABEL: define <2 x i64> @test_vsriq_n_p64(<2 x i64> %a, <2 x i64> %b) #1 {
605	// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
606	// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
607	// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
608	// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
609	// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 64)
610	// CHECK: ret <2 x i64> [[VSRI_N2]]
611	poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) {
612	return vsriq_n_p64(a, b, 64);
613	}
614
615	// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
616	// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
617	// CHECK: attributes #2 ={{.*}}"min-legal-vector-width"="0"
618

Clang Project