1 | // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ |
2 | // RUN: -ffp-contract=fast -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \ |
3 | // RUN: | FileCheck %s |
4 | |
5 | // Test new aarch64 intrinsics with poly64 |
6 | |
7 | #include <arm_neon.h> |
8 | |
9 | // CHECK-LABEL: define <1 x i64> @test_vceq_p64(<1 x i64> %a, <1 x i64> %b) #0 { |
10 | // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b |
11 | // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
12 | // CHECK: ret <1 x i64> [[SEXT_I]] |
13 | uint64x1_t test_vceq_p64(poly64x1_t a, poly64x1_t b) { |
14 | return vceq_p64(a, b); |
15 | } |
16 | |
17 | // CHECK-LABEL: define <2 x i64> @test_vceqq_p64(<2 x i64> %a, <2 x i64> %b) #1 { |
18 | // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %a, %b |
19 | // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
20 | // CHECK: ret <2 x i64> [[SEXT_I]] |
21 | uint64x2_t test_vceqq_p64(poly64x2_t a, poly64x2_t b) { |
22 | return vceqq_p64(a, b); |
23 | } |
24 | |
25 | // CHECK-LABEL: define <1 x i64> @test_vtst_p64(<1 x i64> %a, <1 x i64> %b) #0 { |
26 | // CHECK: [[TMP4:%.*]] = and <1 x i64> %a, %b |
27 | // CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer |
28 | // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64> |
29 | // CHECK: ret <1 x i64> [[VTST_I]] |
30 | uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) { |
31 | return vtst_p64(a, b); |
32 | } |
33 | |
34 | // CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #1 { |
35 | // CHECK: [[TMP4:%.*]] = and <2 x i64> %a, %b |
36 | // CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer |
37 | // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> |
38 | // CHECK: ret <2 x i64> [[VTST_I]] |
39 | uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) { |
40 | return vtstq_p64(a, b); |
41 | } |
42 | |
43 | // CHECK-LABEL: define <1 x i64> @test_vbsl_p64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 { |
44 | // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %a, %b |
45 | // CHECK: [[TMP3:%.*]] = xor <1 x i64> %a, <i64 -1> |
46 | // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %c |
47 | // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] |
48 | // CHECK: ret <1 x i64> [[VBSL5_I]] |
49 | poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) { |
50 | return vbsl_p64(a, b, c); |
51 | } |
52 | |
53 | // CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #1 { |
54 | // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %a, %b |
55 | // CHECK: [[TMP3:%.*]] = xor <2 x i64> %a, <i64 -1, i64 -1> |
56 | // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c |
57 | // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] |
58 | // CHECK: ret <2 x i64> [[VBSL5_I]] |
59 | poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) { |
60 | return vbslq_p64(a, b, c); |
61 | } |
62 | |
63 | // CHECK-LABEL: define i64 @test_vget_lane_p64(<1 x i64> %v) #0 { |
64 | // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8> |
65 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
66 | // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 |
67 | // CHECK: ret i64 [[VGET_LANE]] |
68 | poly64_t test_vget_lane_p64(poly64x1_t v) { |
69 | return vget_lane_p64(v, 0); |
70 | } |
71 | |
72 | // CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #1 { |
73 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8> |
74 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
75 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 |
76 | // CHECK: ret i64 [[VGETQ_LANE]] |
77 | poly64_t test_vgetq_lane_p64(poly64x2_t v) { |
78 | return vgetq_lane_p64(v, 1); |
79 | } |
80 | |
81 | // CHECK-LABEL: define <1 x i64> @test_vset_lane_p64(i64 %a, <1 x i64> %v) #0 { |
82 | // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8> |
83 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
84 | // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 |
85 | // CHECK: ret <1 x i64> [[VSET_LANE]] |
86 | poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) { |
87 | return vset_lane_p64(a, v, 0); |
88 | } |
89 | |
90 | // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #1 { |
91 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8> |
92 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
93 | // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 |
94 | // CHECK: ret <2 x i64> [[VSET_LANE]] |
95 | poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) { |
96 | return vsetq_lane_p64(a, v, 1); |
97 | } |
98 | |
99 | // CHECK-LABEL: define <1 x i64> @test_vcopy_lane_p64(<1 x i64> %a, <1 x i64> %b) #0 { |
100 | // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
101 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
102 | // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 |
103 | // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
104 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> |
105 | // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 0 |
106 | // CHECK: ret <1 x i64> [[VSET_LANE]] |
107 | poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) { |
108 | return vcopy_lane_p64(a, 0, b, 0); |
109 | |
110 | } |
111 | |
112 | // CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #1 { |
113 | // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
114 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
115 | // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 |
116 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
117 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> |
118 | // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 1 |
119 | // CHECK: ret <2 x i64> [[VSET_LANE]] |
120 | poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) { |
121 | return vcopyq_lane_p64(a, 1, b, 0); |
122 | } |
123 | |
124 | // CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #1 { |
125 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
126 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
127 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 |
128 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
129 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> |
130 | // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 1 |
131 | // CHECK: ret <2 x i64> [[VSET_LANE]] |
132 | poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) { |
133 | return vcopyq_laneq_p64(a, 1, b, 1); |
134 | } |
135 | |
136 | // CHECK-LABEL: define <1 x i64> @test_vcreate_p64(i64 %a) #0 { |
137 | // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64> |
138 | // CHECK: ret <1 x i64> [[TMP0]] |
139 | poly64x1_t test_vcreate_p64(uint64_t a) { |
140 | return vcreate_p64(a); |
141 | } |
142 | |
143 | // CHECK-LABEL: define <1 x i64> @test_vdup_n_p64(i64 %a) #0 { |
144 | // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 |
145 | // CHECK: ret <1 x i64> [[VECINIT_I]] |
146 | poly64x1_t test_vdup_n_p64(poly64_t a) { |
147 | return vdup_n_p64(a); |
148 | } |
149 | // CHECK-LABEL: define <2 x i64> @test_vdupq_n_p64(i64 %a) #1 { |
150 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 |
151 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 |
152 | // CHECK: ret <2 x i64> [[VECINIT1_I]] |
153 | poly64x2_t test_vdupq_n_p64(poly64_t a) { |
154 | return vdupq_n_p64(a); |
155 | } |
156 | |
157 | // CHECK-LABEL: define <1 x i64> @test_vmov_n_p64(i64 %a) #0 { |
158 | // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 |
159 | // CHECK: ret <1 x i64> [[VECINIT_I]] |
160 | poly64x1_t test_vmov_n_p64(poly64_t a) { |
161 | return vmov_n_p64(a); |
162 | } |
163 | |
164 | // CHECK-LABEL: define <2 x i64> @test_vmovq_n_p64(i64 %a) #1 { |
165 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 |
166 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 |
167 | // CHECK: ret <2 x i64> [[VECINIT1_I]] |
168 | poly64x2_t test_vmovq_n_p64(poly64_t a) { |
169 | return vmovq_n_p64(a); |
170 | } |
171 | |
172 | // CHECK-LABEL: define <1 x i64> @test_vdup_lane_p64(<1 x i64> %vec) #0 { |
173 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <1 x i32> zeroinitializer |
174 | // CHECK: ret <1 x i64> [[SHUFFLE]] |
175 | poly64x1_t test_vdup_lane_p64(poly64x1_t vec) { |
176 | return vdup_lane_p64(vec, 0); |
177 | } |
178 | |
179 | // CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #1 { |
180 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <2 x i32> zeroinitializer |
181 | // CHECK: ret <2 x i64> [[SHUFFLE]] |
182 | poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) { |
183 | return vdupq_lane_p64(vec, 0); |
184 | } |
185 | |
186 | // CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #1 { |
187 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i64> %vec, <2 x i64> %vec, <2 x i32> <i32 1, i32 1> |
188 | // CHECK: ret <2 x i64> [[SHUFFLE]] |
189 | poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) { |
190 | return vdupq_laneq_p64(vec, 1); |
191 | } |
192 | |
193 | // CHECK-LABEL: define <2 x i64> @test_vcombine_p64(<1 x i64> %low, <1 x i64> %high) #1 { |
194 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %low, <1 x i64> %high, <2 x i32> <i32 0, i32 1> |
195 | // CHECK: ret <2 x i64> [[SHUFFLE_I]] |
196 | poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) { |
197 | return vcombine_p64(low, high); |
198 | } |
199 | |
200 | // CHECK-LABEL: define <1 x i64> @test_vld1_p64(i64* %ptr) #0 { |
201 | // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8* |
202 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* |
203 | // CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]] |
204 | // CHECK: ret <1 x i64> [[TMP2]] |
205 | poly64x1_t test_vld1_p64(poly64_t const * ptr) { |
206 | return vld1_p64(ptr); |
207 | } |
208 | |
209 | // CHECK-LABEL: define <2 x i64> @test_vld1q_p64(i64* %ptr) #1 { |
210 | // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8* |
211 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* |
212 | // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]] |
213 | // CHECK: ret <2 x i64> [[TMP2]] |
214 | poly64x2_t test_vld1q_p64(poly64_t const * ptr) { |
215 | return vld1q_p64(ptr); |
216 | } |
217 | |
218 | // CHECK-LABEL: define void @test_vst1_p64(i64* %ptr, <1 x i64> %val) #0 { |
219 | // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8* |
220 | // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %val to <8 x i8> |
221 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* |
222 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> |
223 | // CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]] |
224 | // CHECK: ret void |
225 | void test_vst1_p64(poly64_t * ptr, poly64x1_t val) { |
226 | return vst1_p64(ptr, val); |
227 | } |
228 | |
229 | // CHECK-LABEL: define void @test_vst1q_p64(i64* %ptr, <2 x i64> %val) #1 { |
230 | // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8* |
231 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %val to <16 x i8> |
232 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* |
233 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> |
234 | // CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]] |
235 | // CHECK: ret void |
236 | void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) { |
237 | return vst1q_p64(ptr, val); |
238 | } |
239 | |
240 | // CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_p64(i64* %ptr) #2 { |
241 | // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 |
242 | // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 |
243 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* |
244 | // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8* |
245 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* |
246 | // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) |
247 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* |
248 | // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] |
249 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8* |
250 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* |
251 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) |
252 | // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8 |
253 | // CHECK: ret %struct.poly64x1x2_t [[TMP6]] |
254 | poly64x1x2_t test_vld2_p64(poly64_t const * ptr) { |
255 | return vld2_p64(ptr); |
256 | } |
257 | |
258 | // CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_p64(i64* %ptr) #2 { |
259 | // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 |
260 | // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 |
261 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* |
262 | // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8* |
263 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* |
264 | // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) |
265 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* |
266 | // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] |
267 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8* |
268 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* |
269 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) |
270 | // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16 |
271 | // CHECK: ret %struct.poly64x2x2_t [[TMP6]] |
272 | poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) { |
273 | return vld2q_p64(ptr); |
274 | } |
275 | |
276 | // CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_p64(i64* %ptr) #2 { |
277 | // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 |
278 | // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 |
279 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* |
280 | // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8* |
281 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* |
282 | // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) |
283 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* |
284 | // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] |
285 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8* |
286 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* |
287 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) |
288 | // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8 |
289 | // CHECK: ret %struct.poly64x1x3_t [[TMP6]] |
290 | poly64x1x3_t test_vld3_p64(poly64_t const * ptr) { |
291 | return vld3_p64(ptr); |
292 | } |
293 | |
294 | // CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_p64(i64* %ptr) #2 { |
295 | // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 |
296 | // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 |
297 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* |
298 | // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8* |
299 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* |
300 | // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) |
301 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* |
302 | // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] |
303 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8* |
304 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* |
305 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) |
306 | // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16 |
307 | // CHECK: ret %struct.poly64x2x3_t [[TMP6]] |
308 | poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) { |
309 | return vld3q_p64(ptr); |
310 | } |
311 | |
312 | // CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_p64(i64* %ptr) #2 { |
313 | // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 |
314 | // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 |
315 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* |
316 | // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8* |
317 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* |
318 | // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) |
319 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* |
320 | // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] |
321 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8* |
322 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* |
323 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) |
324 | // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8 |
325 | // CHECK: ret %struct.poly64x1x4_t [[TMP6]] |
326 | poly64x1x4_t test_vld4_p64(poly64_t const * ptr) { |
327 | return vld4_p64(ptr); |
328 | } |
329 | |
330 | // CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_p64(i64* %ptr) #2 { |
331 | // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 |
332 | // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 |
333 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* |
334 | // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8* |
335 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* |
336 | // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) |
337 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* |
338 | // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] |
339 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8* |
340 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* |
341 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) |
342 | // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16 |
343 | // CHECK: ret %struct.poly64x2x4_t [[TMP6]] |
344 | poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) { |
345 | return vld4q_p64(ptr); |
346 | } |
347 | |
348 | // CHECK-LABEL: define void @test_vst2_p64(i64* %ptr, [2 x <1 x i64>] %val.coerce) #2 { |
349 | // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 |
350 | // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 |
351 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[VAL]], i32 0, i32 0 |
352 | // CHECK: store [2 x <1 x i64>] [[VAL]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 |
353 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8* |
354 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[VAL]] to i8* |
355 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) |
356 | // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* |
357 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 |
358 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 0 |
359 | // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 |
360 | // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> |
361 | // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 |
362 | // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL2]], i64 0, i64 1 |
363 | // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8 |
364 | // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> |
365 | // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> |
366 | // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> |
367 | // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]]) |
368 | // CHECK: ret void |
369 | void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) { |
370 | return vst2_p64(ptr, val); |
371 | } |
372 | |
373 | // CHECK-LABEL: define void @test_vst2q_p64(i64* %ptr, [2 x <2 x i64>] %val.coerce) #2 { |
374 | // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 |
375 | // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 |
376 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[VAL]], i32 0, i32 0 |
377 | // CHECK: store [2 x <2 x i64>] [[VAL]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 |
378 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8* |
379 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[VAL]] to i8* |
380 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) |
381 | // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* |
382 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 |
383 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 0 |
384 | // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 |
385 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> |
386 | // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 |
387 | // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL2]], i64 0, i64 1 |
388 | // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16 |
389 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> |
390 | // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> |
391 | // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> |
392 | // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]]) |
393 | // CHECK: ret void |
394 | void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) { |
395 | return vst2q_p64(ptr, val); |
396 | } |
397 | |
398 | // CHECK-LABEL: define void @test_vst3_p64(i64* %ptr, [3 x <1 x i64>] %val.coerce) #2 { |
399 | // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 |
400 | // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 |
401 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[VAL]], i32 0, i32 0 |
402 | // CHECK: store [3 x <1 x i64>] [[VAL]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 |
403 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8* |
404 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[VAL]] to i8* |
405 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) |
406 | // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* |
407 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 |
408 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 0 |
409 | // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 |
410 | // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> |
411 | // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 |
412 | // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL2]], i64 0, i64 1 |
413 | // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8 |
414 | // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> |
415 | // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 |
416 | // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL4]], i64 0, i64 2 |
417 | // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX5]], align 8 |
418 | // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> |
419 | // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> |
420 | // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> |
421 | // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> |
422 | // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]]) |
423 | // CHECK: ret void |
424 | void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) { |
425 | return vst3_p64(ptr, val); |
426 | } |
427 | |
428 | // CHECK-LABEL: define void @test_vst3q_p64(i64* %ptr, [3 x <2 x i64>] %val.coerce) #2 { |
429 | // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 |
430 | // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 |
431 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[VAL]], i32 0, i32 0 |
432 | // CHECK: store [3 x <2 x i64>] [[VAL]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 |
433 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8* |
434 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[VAL]] to i8* |
435 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) |
436 | // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* |
437 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 |
438 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 0 |
439 | // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 |
440 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> |
441 | // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 |
442 | // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL2]], i64 0, i64 1 |
443 | // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16 |
444 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> |
445 | // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 |
446 | // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL4]], i64 0, i64 2 |
447 | // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX5]], align 16 |
448 | // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> |
449 | // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> |
450 | // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> |
451 | // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> |
452 | // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]]) |
453 | // CHECK: ret void |
454 | void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) { |
455 | return vst3q_p64(ptr, val); |
456 | } |
457 | |
458 | // CHECK-LABEL: define void @test_vst4_p64(i64* %ptr, [4 x <1 x i64>] %val.coerce) #2 { |
459 | // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 |
460 | // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 |
461 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[VAL]], i32 0, i32 0 |
462 | // CHECK: store [4 x <1 x i64>] [[VAL]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 |
463 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8* |
464 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[VAL]] to i8* |
465 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) |
466 | // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* |
467 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 |
468 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 0 |
469 | // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 |
470 | // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> |
471 | // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 |
472 | // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL2]], i64 0, i64 1 |
473 | // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8 |
474 | // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> |
475 | // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 |
476 | // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL4]], i64 0, i64 2 |
477 | // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX5]], align 8 |
478 | // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> |
479 | // CHECK: [[VAL6:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 |
480 | // CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL6]], i64 0, i64 3 |
481 | // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX7]], align 8 |
482 | // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> |
483 | // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> |
484 | // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> |
485 | // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> |
486 | // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> |
487 | // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]]) |
488 | // CHECK: ret void |
489 | void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) { |
490 | return vst4_p64(ptr, val); |
491 | } |
492 | |
493 | // CHECK-LABEL: define void @test_vst4q_p64(i64* %ptr, [4 x <2 x i64>] %val.coerce) #2 { |
494 | // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 |
495 | // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 |
496 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[VAL]], i32 0, i32 0 |
497 | // CHECK: store [4 x <2 x i64>] [[VAL]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 |
498 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8* |
499 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[VAL]] to i8* |
500 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) |
501 | // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* |
502 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 |
503 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 0 |
504 | // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 |
505 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> |
506 | // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 |
507 | // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL2]], i64 0, i64 1 |
508 | // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16 |
509 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> |
510 | // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 |
511 | // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL4]], i64 0, i64 2 |
512 | // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX5]], align 16 |
513 | // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> |
514 | // CHECK: [[VAL6:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 |
515 | // CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL6]], i64 0, i64 3 |
516 | // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX7]], align 16 |
517 | // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> |
518 | // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> |
519 | // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> |
520 | // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> |
521 | // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> |
522 | // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]]) |
523 | // CHECK: ret void |
524 | void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) { |
525 | return vst4q_p64(ptr, val); |
526 | } |
527 | |
528 | // CHECK-LABEL: define <1 x i64> @test_vext_p64(<1 x i64> %a, <1 x i64> %b) #0 { |
529 | // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
530 | // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
531 | // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
532 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> |
533 | // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer |
534 | // CHECK: ret <1 x i64> [[VEXT]] |
535 | poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) { |
536 | return vext_u64(a, b, 0); |
537 | |
538 | } |
539 | |
540 | // CHECK-LABEL: define <2 x i64> @test_vextq_p64(<2 x i64> %a, <2 x i64> %b) #1 { |
541 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
542 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
543 | // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
544 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> |
545 | // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2> |
546 | // CHECK: ret <2 x i64> [[VEXT]] |
547 | poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) { |
548 | return vextq_p64(a, b, 1); |
549 | } |
550 | |
551 | // CHECK-LABEL: define <2 x i64> @test_vzip1q_p64(<2 x i64> %a, <2 x i64> %b) #1 { |
552 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> |
553 | // CHECK: ret <2 x i64> [[SHUFFLE_I]] |
554 | poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) { |
555 | return vzip1q_p64(a, b); |
556 | } |
557 | |
558 | // CHECK-LABEL: define <2 x i64> @test_vzip2q_p64(<2 x i64> %a, <2 x i64> %b) #1 { |
559 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> |
560 | // CHECK: ret <2 x i64> [[SHUFFLE_I]] |
561 | poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) { |
562 | return vzip2q_u64(a, b); |
563 | } |
564 | |
565 | // CHECK-LABEL: define <2 x i64> @test_vuzp1q_p64(<2 x i64> %a, <2 x i64> %b) #1 { |
566 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> |
567 | // CHECK: ret <2 x i64> [[SHUFFLE_I]] |
568 | poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) { |
569 | return vuzp1q_p64(a, b); |
570 | } |
571 | |
572 | // CHECK-LABEL: define <2 x i64> @test_vuzp2q_p64(<2 x i64> %a, <2 x i64> %b) #1 { |
573 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> |
574 | // CHECK: ret <2 x i64> [[SHUFFLE_I]] |
575 | poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) { |
576 | return vuzp2q_u64(a, b); |
577 | } |
578 | |
579 | // CHECK-LABEL: define <2 x i64> @test_vtrn1q_p64(<2 x i64> %a, <2 x i64> %b) #1 { |
580 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> |
581 | // CHECK: ret <2 x i64> [[SHUFFLE_I]] |
582 | poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) { |
583 | return vtrn1q_p64(a, b); |
584 | } |
585 | |
586 | // CHECK-LABEL: define <2 x i64> @test_vtrn2q_p64(<2 x i64> %a, <2 x i64> %b) #1 { |
587 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> |
588 | // CHECK: ret <2 x i64> [[SHUFFLE_I]] |
589 | poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) { |
590 | return vtrn2q_u64(a, b); |
591 | } |
592 | |
593 | // CHECK-LABEL: define <1 x i64> @test_vsri_n_p64(<1 x i64> %a, <1 x i64> %b) #0 { |
594 | // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
595 | // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
596 | // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
597 | // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> |
598 | // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 33) |
599 | // CHECK: ret <1 x i64> [[VSRI_N2]] |
600 | poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) { |
601 | return vsri_n_p64(a, b, 33); |
602 | } |
603 | |
604 | // CHECK-LABEL: define <2 x i64> @test_vsriq_n_p64(<2 x i64> %a, <2 x i64> %b) #1 { |
605 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
606 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
607 | // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
608 | // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> |
609 | // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 64) |
610 | // CHECK: ret <2 x i64> [[VSRI_N2]] |
611 | poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) { |
612 | return vsriq_n_p64(a, b, 64); |
613 | } |
614 | |
615 | // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" |
616 | // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" |
617 | // CHECK: attributes #2 ={{.*}}"min-legal-vector-width"="0" |
618 | |