1 | // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ |
2 | // RUN: -S -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | \ |
3 | // RUN: FileCheck -check-prefixes=CHECK,CHECK-A64 %s |
4 | // RUN: %clang_cc1 -triple armv8-none-linux-gnueabi -target-feature +neon \ |
5 | // RUN: -target-feature +fp16 -S -disable-O0-optnone -emit-llvm -o - %s | \ |
6 | // RUN: opt -S -mem2reg | FileCheck -check-prefixes=CHECK,CHECK-A32 %s |
7 | |
8 | #include <arm_neon.h> |
9 | |
10 | // CHECK-LABEL: @test_vld1_f16_x2( |
11 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 |
12 | // CHECK-A32: %struct.float16x4x2_t* noalias sret [[RETVAL:%.*]], |
13 | // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 |
14 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* |
15 | // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* |
16 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [[HALF:(half|i16)]]* |
17 | // CHECK: [[VLD1XN:%.*]] = call { <4 x [[HALF]]>, <4 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x2.v4f16.p0f16|arm.neon.vld1x2.v4i16.p0i16}}([[HALF]]* [[TMP2]]) |
18 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x [[HALF]]>, <4 x [[HALF]]> }* |
19 | // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD1XN]], { <4 x [[HALF]]>, <4 x [[HALF]]> }* [[TMP3]] |
20 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* |
21 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* |
22 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
23 | // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 |
24 | // CHECK-A64: ret %struct.float16x4x2_t [[TMP6]] |
25 | // CHECK-A32: ret void |
26 | float16x4x2_t test_vld1_f16_x2(float16_t const *a) { |
27 | return vld1_f16_x2(a); |
28 | } |
29 | |
30 | // CHECK-LABEL: @test_vld1_f16_x3( |
31 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8 |
32 | // CHECK-A32: %struct.float16x4x3_t* noalias sret [[RETVAL:%.*]], |
33 | // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 |
34 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* |
35 | // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* |
36 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [[HALF]]* |
37 | // CHECK: [[VLD1XN:%.*]] = call { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x3.v4f16.p0f16|arm.neon.vld1x3.v4i16.p0i16}}([[HALF]]* [[TMP2]]) |
38 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }* |
39 | // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD1XN]], { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }* [[TMP3]] |
40 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* |
41 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* |
42 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
43 | // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 |
44 | // CHECK-A64: ret %struct.float16x4x3_t [[TMP6]] |
45 | // CHECK-A32: ret void |
46 | float16x4x3_t test_vld1_f16_x3(float16_t const *a) { |
47 | return vld1_f16_x3(a); |
48 | } |
49 | |
50 | // CHECK-LABEL: @test_vld1_f16_x4( |
51 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8 |
52 | // CHECK-A32: %struct.float16x4x4_t* noalias sret [[RETVAL:%.*]], |
53 | // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 |
54 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* |
55 | // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* |
56 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [[HALF]]* |
57 | // CHECK: [[VLD1XN:%.*]] = call { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x4.v4f16.p0f16|arm.neon.vld1x4.v4i16.p0i16}}([[HALF]]* [[TMP2]]) |
58 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }* |
59 | // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD1XN]], { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }* [[TMP3]] |
60 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* |
61 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* |
62 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
63 | // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 |
64 | // CHECK-A64: ret %struct.float16x4x4_t [[TMP6]] |
65 | // CHECK-A32: ret void |
66 | float16x4x4_t test_vld1_f16_x4(float16_t const *a) { |
67 | return vld1_f16_x4(a); |
68 | } |
69 | |
70 | // CHECK-LABEL: @test_vld1_f32_x2( |
71 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 |
72 | // CHECK-A32: %struct.float32x2x2_t* noalias sret [[RETVAL:%.*]], |
73 | // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 |
74 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* |
75 | // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* |
76 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* |
77 | // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2f32.p0f32(float* [[TMP2]]) |
78 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* |
79 | // CHECK: store { <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float> }* [[TMP3]] |
80 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* |
81 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* |
82 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
83 | // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 |
84 | // CHECK-A64: ret %struct.float32x2x2_t [[TMP6]] |
85 | // CHECK-A32: ret void |
86 | float32x2x2_t test_vld1_f32_x2(float32_t const *a) { |
87 | return vld1_f32_x2(a); |
88 | } |
89 | |
90 | // CHECK-LABEL: @test_vld1_f32_x3( |
91 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8 |
92 | // CHECK-A32: %struct.float32x2x3_t* noalias sret [[RETVAL:%.*]], |
93 | // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 |
94 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* |
95 | // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* |
96 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* |
97 | // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2f32.p0f32(float* [[TMP2]]) |
98 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* |
99 | // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] |
100 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* |
101 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* |
102 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
103 | // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 |
104 | // CHECK-A64: ret %struct.float32x2x3_t [[TMP6]] |
105 | float32x2x3_t test_vld1_f32_x3(float32_t const *a) { |
106 | return vld1_f32_x3(a); |
107 | } |
108 | |
109 | // CHECK-LABEL: @test_vld1_f32_x4( |
110 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8 |
111 | // CHECK-A32: %struct.float32x2x4_t* noalias sret [[RETVAL:%.*]], |
112 | // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 |
113 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* |
114 | // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* |
115 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* |
116 | // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2f32.p0f32(float* [[TMP2]]) |
117 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* |
118 | // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] |
119 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* |
120 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* |
121 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
122 | // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 |
123 | // CHECK-A64: ret %struct.float32x2x4_t [[TMP6]] |
124 | // CHECK-A32: ret void |
125 | float32x2x4_t test_vld1_f32_x4(float32_t const *a) { |
126 | return vld1_f32_x4(a); |
127 | } |
128 | |
129 | // CHECK-LABEL: @test_vld1_p16_x2( |
130 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 |
131 | // CHECK-A32: %struct.poly16x4x2_t* noalias sret [[RETVAL:%.*]], |
132 | // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 |
133 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* |
134 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
135 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
136 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i16.p0i16(i16* [[TMP2]]) |
137 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* |
138 | // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] |
139 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* |
140 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* |
141 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
142 | // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 |
143 | // CHECK-A64: ret %struct.poly16x4x2_t [[TMP6]] |
144 | // CHECK-A32: ret void |
145 | poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) { |
146 | return vld1_p16_x2(a); |
147 | } |
148 | |
149 | // CHECK-LABEL: @test_vld1_p16_x3( |
150 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8 |
151 | // CHECK-A32: %struct.poly16x4x3_t* noalias sret [[RETVAL:%.*]], |
152 | // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 |
153 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* |
154 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
155 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
156 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i16.p0i16(i16* [[TMP2]]) |
157 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* |
158 | // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] |
159 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* |
160 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* |
161 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
162 | // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 |
163 | // CHECK-A64: ret %struct.poly16x4x3_t [[TMP6]] |
164 | // CHECK-A32: ret void |
165 | poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) { |
166 | return vld1_p16_x3(a); |
167 | } |
168 | |
169 | // CHECK-LABEL: @test_vld1_p16_x4( |
170 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8 |
171 | // CHECK-A32: %struct.poly16x4x4_t* noalias sret [[RETVAL:%.*]], |
172 | // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 |
173 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* |
174 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
175 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
176 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i16.p0i16(i16* [[TMP2]]) |
177 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* |
178 | // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] |
179 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* |
180 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* |
181 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
182 | // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 |
183 | // CHECK-A64: ret %struct.poly16x4x4_t [[TMP6]] |
184 | // CHECK-A32: ret void |
185 | poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) { |
186 | return vld1_p16_x4(a); |
187 | } |
188 | |
189 | // CHECK-LABEL: @test_vld1_p8_x2( |
190 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 |
191 | // CHECK-A32: %struct.poly8x8x2_t* noalias sret [[RETVAL:%.*]], |
192 | // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 |
193 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* |
194 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i8.p0i8(i8* %a) |
195 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* |
196 | // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]] |
197 | // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8* |
198 | // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* |
199 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 16, i1 false) |
200 | // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8 |
201 | // CHECK-A64: ret %struct.poly8x8x2_t [[TMP4]] |
202 | // CHECK-A32: ret void |
203 | poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) { |
204 | return vld1_p8_x2(a); |
205 | } |
206 | |
207 | // CHECK-LABEL: @test_vld1_p8_x3( |
208 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8 |
209 | // CHECK-A32: %struct.poly8x8x3_t* noalias sret [[RETVAL:%.*]], |
210 | // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 |
211 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* |
212 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i8.p0i8(i8* %a) |
213 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* |
214 | // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] |
215 | // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8* |
216 | // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* |
217 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 24, i1 false) |
218 | // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8 |
219 | // CHECK-A64: ret %struct.poly8x8x3_t [[TMP4]] |
220 | // CHECK-A32: ret void |
221 | poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) { |
222 | return vld1_p8_x3(a); |
223 | } |
224 | |
225 | // CHECK-LABEL: @test_vld1_p8_x4( |
226 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8 |
227 | // CHECK-A32: %struct.poly8x8x4_t* noalias sret [[RETVAL:%.*]], |
228 | // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 |
229 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* |
230 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i8.p0i8(i8* %a) |
231 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* |
232 | // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] |
233 | // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8* |
234 | // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* |
235 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 32, i1 false) |
236 | // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 |
237 | // CHECK-A64: ret %struct.poly8x8x4_t [[TMP4]] |
238 | // CHECK-A32: ret void |
239 | poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) { |
240 | return vld1_p8_x4(a); |
241 | } |
242 | |
243 | // CHECK-LABEL: @test_vld1_s16_x2( |
244 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 |
245 | // CHECK-A32: %struct.int16x4x2_t* noalias sret [[RETVAL:%.*]], |
246 | // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 |
247 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* |
248 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
249 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
250 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i16.p0i16(i16* [[TMP2]]) |
251 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* |
252 | // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] |
253 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* |
254 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* |
255 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
256 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 |
257 | // CHECK-A64: ret %struct.int16x4x2_t [[TMP6]] |
258 | // CHECK-A32: ret void |
259 | int16x4x2_t test_vld1_s16_x2(int16_t const *a) { |
260 | return vld1_s16_x2(a); |
261 | } |
262 | |
263 | // CHECK-LABEL: @test_vld1_s16_x3( |
264 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8 |
265 | // CHECK-A32: %struct.int16x4x3_t* noalias sret [[RETVAL:%.*]], |
266 | // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 |
267 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* |
268 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
269 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
270 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i16.p0i16(i16* [[TMP2]]) |
271 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* |
272 | // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] |
273 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* |
274 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* |
275 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
276 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 |
277 | // CHECK-A64: ret %struct.int16x4x3_t [[TMP6]] |
278 | // CHECK-A32: ret void |
279 | int16x4x3_t test_vld1_s16_x3(int16_t const *a) { |
280 | return vld1_s16_x3(a); |
281 | } |
282 | |
283 | // CHECK-LABEL: @test_vld1_s16_x4( |
284 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8 |
285 | // CHECK-A32: %struct.int16x4x4_t* noalias sret [[RETVAL:%.*]], |
286 | // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 |
287 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* |
288 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
289 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
290 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i16.p0i16(i16* [[TMP2]]) |
291 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* |
292 | // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] |
293 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* |
294 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* |
295 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
296 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 |
297 | // CHECK-A64: ret %struct.int16x4x4_t [[TMP6]] |
298 | // CHECK-A32: ret void |
299 | int16x4x4_t test_vld1_s16_x4(int16_t const *a) { |
300 | return vld1_s16_x4(a); |
301 | } |
302 | |
303 | // CHECK-LABEL: @test_vld1_s32_x2( |
304 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 |
305 | // CHECK-A32: %struct.int32x2x2_t* noalias sret [[RETVAL:%.*]], |
306 | // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 |
307 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* |
308 | // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* |
309 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
310 | // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2i32.p0i32(i32* [[TMP2]]) |
311 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* |
312 | // CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]] |
313 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* |
314 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* |
315 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
316 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 |
317 | // CHECK-A64: ret %struct.int32x2x2_t [[TMP6]] |
318 | // CHECK-A32: ret void |
319 | int32x2x2_t test_vld1_s32_x2(int32_t const *a) { |
320 | return vld1_s32_x2(a); |
321 | } |
322 | |
323 | // CHECK-LABEL: @test_vld1_s32_x3( |
324 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8 |
325 | // CHECK-A32: %struct.int32x2x3_t* noalias sret [[RETVAL:%.*]], |
326 | // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 |
327 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* |
328 | // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* |
329 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
330 | // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2i32.p0i32(i32* [[TMP2]]) |
331 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* |
332 | // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] |
333 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* |
334 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* |
335 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
336 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 |
337 | // CHECK-A64: ret %struct.int32x2x3_t [[TMP6]] |
338 | // CHECK-A32: ret void |
339 | int32x2x3_t test_vld1_s32_x3(int32_t const *a) { |
340 | return vld1_s32_x3(a); |
341 | } |
342 | |
343 | // CHECK-LABEL: @test_vld1_s32_x4( |
344 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8 |
345 | // CHECK-A32: %struct.int32x2x4_t* noalias sret [[RETVAL:%.*]], |
346 | // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 |
347 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* |
348 | // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* |
349 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
350 | // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2i32.p0i32(i32* [[TMP2]]) |
351 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* |
352 | // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] |
353 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* |
354 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* |
355 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
356 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 |
357 | // CHECK-A64: ret %struct.int32x2x4_t [[TMP6]] |
358 | // CHECK-A32: ret void |
359 | int32x2x4_t test_vld1_s32_x4(int32_t const *a) { |
360 | return vld1_s32_x4(a); |
361 | } |
362 | |
363 | // CHECK-LABEL: @test_vld1_s64_x2( |
364 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8 |
365 | // CHECK-A32: %struct.int64x1x2_t* noalias sret [[RETVAL:%.*]], |
366 | // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 |
367 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* |
368 | // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* |
369 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
370 | // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v1i64.p0i64(i64* [[TMP2]]) |
371 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* |
372 | // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]] |
373 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* |
374 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* |
375 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
376 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 |
377 | // CHECK-A64: ret %struct.int64x1x2_t [[TMP6]] |
378 | // CHECK-A32: ret void |
379 | int64x1x2_t test_vld1_s64_x2(int64_t const *a) { |
380 | return vld1_s64_x2(a); |
381 | } |
382 | |
383 | // CHECK-LABEL: @test_vld1_s64_x3( |
384 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8 |
385 | // CHECK-A32: %struct.int64x1x3_t* noalias sret [[RETVAL:%.*]], |
386 | // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 |
387 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* |
388 | // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* |
389 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
390 | // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v1i64.p0i64(i64* [[TMP2]]) |
391 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* |
392 | // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] |
393 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* |
394 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* |
395 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
396 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 |
397 | // CHECK-A64: ret %struct.int64x1x3_t [[TMP6]] |
398 | // CHECK-A32: ret void |
399 | int64x1x3_t test_vld1_s64_x3(int64_t const *a) { |
400 | return vld1_s64_x3(a); |
401 | } |
402 | |
403 | // CHECK-LABEL: @test_vld1_s64_x4( |
404 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8 |
405 | // CHECK-A32: %struct.int64x1x4_t* noalias sret [[RETVAL:%.*]], |
406 | // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 |
407 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* |
408 | // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* |
409 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
410 | // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v1i64.p0i64(i64* [[TMP2]]) |
411 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* |
412 | // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] |
413 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* |
414 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* |
415 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
416 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 |
417 | // CHECK-A64: ret %struct.int64x1x4_t [[TMP6]] |
418 | // CHECK-A32: ret void |
419 | int64x1x4_t test_vld1_s64_x4(int64_t const *a) { |
420 | return vld1_s64_x4(a); |
421 | } |
422 | |
423 | // CHECK-LABEL: @test_vld1_s8_x2( |
424 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 |
425 | // CHECK-A32: %struct.int8x8x2_t* noalias sret [[RETVAL:%.*]], |
426 | // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 |
427 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* |
428 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i8.p0i8(i8* %a) |
429 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* |
430 | // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]] |
431 | // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8* |
432 | // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* |
433 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 16, i1 false) |
434 | // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8 |
435 | // CHECK-A64: ret %struct.int8x8x2_t [[TMP4]] |
436 | // CHECK-A32: ret void |
437 | int8x8x2_t test_vld1_s8_x2(int8_t const *a) { |
438 | return vld1_s8_x2(a); |
439 | } |
440 | |
441 | // CHECK-LABEL: @test_vld1_s8_x3( |
442 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8 |
443 | // CHECK-A32: %struct.int8x8x3_t* noalias sret [[RETVAL:%.*]], |
444 | // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 |
445 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* |
446 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i8.p0i8(i8* %a) |
447 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* |
448 | // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] |
449 | // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8* |
450 | // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* |
451 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 24, i1 false) |
452 | // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8 |
453 | // CHECK-A64: ret %struct.int8x8x3_t [[TMP4]] |
454 | // CHECK-A32: ret void |
455 | int8x8x3_t test_vld1_s8_x3(int8_t const *a) { |
456 | return vld1_s8_x3(a); |
457 | } |
458 | |
459 | // CHECK-LABEL: @test_vld1_s8_x4( |
460 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8 |
461 | // CHECK-A32: %struct.int8x8x4_t* noalias sret [[RETVAL:%.*]], |
462 | // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 |
463 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* |
464 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i8.p0i8(i8* %a) |
465 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* |
466 | // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] |
467 | // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8* |
468 | // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* |
469 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 32, i1 false) |
470 | // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 |
471 | // CHECK-A64: ret %struct.int8x8x4_t [[TMP4]] |
472 | // CHECK-A32: ret void |
473 | int8x8x4_t test_vld1_s8_x4(int8_t const *a) { |
474 | return vld1_s8_x4(a); |
475 | } |
476 | |
477 | // CHECK-LABEL: @test_vld1_u16_x2( |
478 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 |
479 | // CHECK-A32: %struct.uint16x4x2_t* noalias sret [[RETVAL:%.*]], |
480 | // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 |
481 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* |
482 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
483 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
484 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i16.p0i16(i16* [[TMP2]]) |
485 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* |
486 | // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] |
487 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* |
488 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* |
489 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
490 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 |
491 | // CHECK-A64: ret %struct.uint16x4x2_t [[TMP6]] |
492 | // CHECK-A32: ret void |
493 | uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) { |
494 | return vld1_u16_x2(a); |
495 | } |
496 | |
497 | // CHECK-LABEL: @test_vld1_u16_x3( |
498 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8 |
499 | // CHECK-A32: %struct.uint16x4x3_t* noalias sret [[RETVAL:%.*]], |
500 | // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 |
501 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* |
502 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
503 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
504 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i16.p0i16(i16* [[TMP2]]) |
505 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* |
506 | // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] |
507 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* |
508 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* |
509 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
510 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 |
511 | // CHECK-A64: ret %struct.uint16x4x3_t [[TMP6]] |
512 | // CHECK-A32: ret void |
513 | uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) { |
514 | return vld1_u16_x3(a); |
515 | } |
516 | |
517 | // CHECK-LABEL: @test_vld1_u16_x4( |
518 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8 |
519 | // CHECK-A32: %struct.uint16x4x4_t* noalias sret [[RETVAL:%.*]], |
520 | // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 |
521 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* |
522 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
523 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
524 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i16.p0i16(i16* [[TMP2]]) |
525 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* |
526 | // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] |
527 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* |
528 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* |
529 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
530 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 |
531 | // CHECK-A64: ret %struct.uint16x4x4_t [[TMP6]] |
532 | // CHECK-A32: ret void |
533 | uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) { |
534 | return vld1_u16_x4(a); |
535 | } |
536 | |
537 | // CHECK-LABEL: @test_vld1_u32_x2( |
538 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 |
539 | // CHECK-A32: %struct.uint32x2x2_t* noalias sret [[RETVAL:%.*]], |
540 | // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 |
541 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* |
542 | // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* |
543 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
544 | // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2i32.p0i32(i32* [[TMP2]]) |
545 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* |
546 | // CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]] |
547 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* |
548 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* |
549 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
550 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 |
551 | // CHECK-A64: ret %struct.uint32x2x2_t [[TMP6]] |
552 | // CHECK-A32: ret void |
553 | uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) { |
554 | return vld1_u32_x2(a); |
555 | } |
556 | |
557 | // CHECK-LABEL: @test_vld1_u32_x3( |
558 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8 |
559 | // CHECK-A32: %struct.uint32x2x3_t* noalias sret [[RETVAL:%.*]], |
560 | // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 |
561 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* |
562 | // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* |
563 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
564 | // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2i32.p0i32(i32* [[TMP2]]) |
565 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* |
566 | // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] |
567 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* |
568 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* |
569 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
570 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 |
571 | // CHECK-A64: ret %struct.uint32x2x3_t [[TMP6]] |
572 | // CHECK-A32: ret void |
573 | uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) { |
574 | return vld1_u32_x3(a); |
575 | } |
576 | |
577 | // CHECK-LABEL: @test_vld1_u32_x4( |
578 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8 |
579 | // CHECK-A32: %struct.uint32x2x4_t* noalias sret [[RETVAL:%.*]], |
580 | // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 |
581 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* |
582 | // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* |
583 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
584 | // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2i32.p0i32(i32* [[TMP2]]) |
585 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* |
586 | // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] |
587 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* |
588 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* |
589 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
590 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 |
591 | // CHECK-A64: ret %struct.uint32x2x4_t [[TMP6]] |
592 | // CHECK-A32: ret void |
593 | uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) { |
594 | return vld1_u32_x4(a); |
595 | } |
596 | |
597 | // CHECK-LABEL: @test_vld1_u64_x2( |
598 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8 |
599 | // CHECK-A32: %struct.uint64x1x2_t* noalias sret [[RETVAL:%.*]], |
600 | // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 |
601 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* |
602 | // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* |
603 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
604 | // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v1i64.p0i64(i64* [[TMP2]]) |
605 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* |
606 | // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]] |
607 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* |
608 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* |
609 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
610 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 |
611 | // CHECK-A64: ret %struct.uint64x1x2_t [[TMP6]] |
612 | // CHECK-A32: ret void |
613 | uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) { |
614 | return vld1_u64_x2(a); |
615 | } |
616 | |
617 | // CHECK-LABEL: @test_vld1_u64_x3( |
618 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8 |
619 | // CHECK-A32: %struct.uint64x1x3_t* noalias sret [[RETVAL:%.*]], |
620 | // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 |
621 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* |
622 | // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* |
623 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
624 | // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v1i64.p0i64(i64* [[TMP2]]) |
625 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* |
626 | // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] |
627 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* |
628 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* |
629 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
630 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 |
631 | // CHECK-A64: ret %struct.uint64x1x3_t [[TMP6]] |
632 | // CHECK-A32: ret void |
633 | uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) { |
634 | return vld1_u64_x3(a); |
635 | } |
636 | |
637 | // CHECK-LABEL: @test_vld1_u64_x4( |
638 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8 |
639 | // CHECK-A32: %struct.uint64x1x4_t* noalias sret [[RETVAL:%.*]], |
640 | // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 |
641 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* |
642 | // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* |
643 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
644 | // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v1i64.p0i64(i64* [[TMP2]]) |
645 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* |
646 | // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] |
647 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* |
648 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* |
649 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
650 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 |
651 | // CHECK-A64: ret %struct.uint64x1x4_t [[TMP6]] |
652 | // CHECK-A32: ret void |
653 | uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) { |
654 | return vld1_u64_x4(a); |
655 | } |
656 | |
657 | // CHECK-LABEL: @test_vld1_u8_x2( |
658 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 |
659 | // CHECK-A32: %struct.uint8x8x2_t* noalias sret [[RETVAL:%.*]], |
660 | // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 |
661 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* |
662 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i8.p0i8(i8* %a) |
663 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* |
664 | // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]] |
665 | // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8* |
666 | // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* |
667 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 16, i1 false) |
668 | // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8 |
669 | // CHECK-A64: ret %struct.uint8x8x2_t [[TMP4]] |
670 | // CHECK-A32: ret void |
671 | uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) { |
672 | return vld1_u8_x2(a); |
673 | } |
674 | |
675 | // CHECK-LABEL: @test_vld1_u8_x3( |
676 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8 |
677 | // CHECK-A32: %struct.uint8x8x3_t* noalias sret [[RETVAL:%.*]], |
678 | // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 |
679 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* |
680 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i8.p0i8(i8* %a) |
681 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* |
682 | // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] |
683 | // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8* |
684 | // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* |
685 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 24, i1 false) |
686 | // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8 |
687 | // CHECK-A64: ret %struct.uint8x8x3_t [[TMP4]] |
688 | // CHECK-A32: ret void |
689 | uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) { |
690 | return vld1_u8_x3(a); |
691 | } |
692 | |
693 | // CHECK-LABEL: @test_vld1_u8_x4( |
694 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8 |
695 | // CHECK-A32: %struct.uint8x8x4_t* noalias sret [[RETVAL:%.*]], |
696 | // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 |
697 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* |
698 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i8.p0i8(i8* %a) |
699 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* |
700 | // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] |
701 | // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8* |
702 | // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* |
703 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 32, i1 false) |
704 | // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 |
705 | // CHECK-A64: ret %struct.uint8x8x4_t [[TMP4]] |
706 | // CHECK-A32: ret void |
707 | uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) { |
708 | return vld1_u8_x4(a); |
709 | } |
710 | |
711 | // CHECK-LABEL: @test_vld1q_f16_x2( |
712 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 |
713 | // CHECK-A32: %struct.float16x8x2_t* noalias sret [[RETVAL:%.*]], |
714 | // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align {{16|8}} |
715 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* |
716 | // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* |
717 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [[HALF]]* |
718 | // CHECK: [[VLD1XN:%.*]] = call { <8 x [[HALF]]>, <8 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x2.v8f16.p0f16|arm.neon.vld1x2.v8i16.p0i16}}([[HALF]]* [[TMP2]]) |
719 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x [[HALF]]>, <8 x [[HALF]]> }* |
720 | // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD1XN]], { <8 x [[HALF]]>, <8 x [[HALF]]> }* [[TMP3]] |
721 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* |
722 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* |
723 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
724 | // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 |
725 | // CHECK-A64: ret %struct.float16x8x2_t [[TMP6]] |
726 | // CHECK-A32: ret void |
727 | float16x8x2_t test_vld1q_f16_x2(float16_t const *a) { |
728 | return vld1q_f16_x2(a); |
729 | } |
730 | |
731 | // CHECK-LABEL: @test_vld1q_f16_x3( |
732 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16 |
733 | // CHECK-A32: %struct.float16x8x3_t* noalias sret [[RETVAL:%.*]], |
734 | // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align {{16|8}} |
735 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* |
736 | // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* |
737 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [[HALF]]* |
738 | // CHECK: [[VLD1XN:%.*]] = call { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x3.v8f16.p0f16|arm.neon.vld1x3.v8i16.p0i16}}([[HALF]]* [[TMP2]]) |
739 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }* |
740 | // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD1XN]], { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }* [[TMP3]] |
741 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* |
742 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* |
743 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
744 | // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 |
745 | // CHECK-A64: ret %struct.float16x8x3_t [[TMP6]] |
746 | // CHECK-A32: ret void |
747 | float16x8x3_t test_vld1q_f16_x3(float16_t const *a) { |
748 | return vld1q_f16_x3(a); |
749 | } |
750 | |
751 | // CHECK-LABEL: @test_vld1q_f16_x4( |
752 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16 |
753 | // CHECK-A32: %struct.float16x8x4_t* noalias sret [[RETVAL:%.*]], |
754 | // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align {{16|8}} |
755 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* |
756 | // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* |
757 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [[HALF]]* |
758 | // CHECK: [[VLD1XN:%.*]] = call { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x4.v8f16.p0f16|arm.neon.vld1x4.v8i16.p0i16}}([[HALF]]* [[TMP2]]) |
759 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }* |
760 | // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD1XN]], { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }* [[TMP3]] |
761 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* |
762 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* |
763 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
764 | // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 |
765 | // CHECK-A64: ret %struct.float16x8x4_t [[TMP6]] |
766 | // CHECK-A32: ret void |
767 | float16x8x4_t test_vld1q_f16_x4(float16_t const *a) { |
768 | return vld1q_f16_x4(a); |
769 | } |
770 | |
771 | // CHECK-LABEL: @test_vld1q_f32_x2( |
772 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 |
773 | // CHECK-A32: %struct.float32x4x2_t* noalias sret [[RETVAL:%.*]], |
774 | // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align {{16|8}} |
775 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* |
776 | // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* |
777 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* |
778 | // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4f32.p0f32(float* [[TMP2]]) |
779 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }* |
780 | // CHECK: store { <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float> }* [[TMP3]] |
781 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* |
782 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* |
783 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
784 | // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 |
785 | // CHECK-A64: ret %struct.float32x4x2_t [[TMP6]] |
786 | // CHECK-A32: ret void |
787 | float32x4x2_t test_vld1q_f32_x2(float32_t const *a) { |
788 | return vld1q_f32_x2(a); |
789 | } |
790 | |
791 | // CHECK-LABEL: @test_vld1q_f32_x3( |
792 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16 |
793 | // CHECK-A32: %struct.float32x4x3_t* noalias sret [[RETVAL:%.*]], |
794 | // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align {{16|8}} |
795 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* |
796 | // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* |
797 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* |
798 | // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4f32.p0f32(float* [[TMP2]]) |
799 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }* |
800 | // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] |
801 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* |
802 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* |
803 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
804 | // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 |
805 | // CHECK-A64: ret %struct.float32x4x3_t [[TMP6]] |
806 | // CHECK-A32: ret void |
807 | float32x4x3_t test_vld1q_f32_x3(float32_t const *a) { |
808 | return vld1q_f32_x3(a); |
809 | } |
810 | |
811 | // CHECK-LABEL: @test_vld1q_f32_x4( |
812 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16 |
813 | // CHECK-A32: %struct.float32x4x4_t* noalias sret [[RETVAL:%.*]], |
814 | // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align {{16|8}} |
815 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* |
816 | // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* |
817 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* |
818 | // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4f32.p0f32(float* [[TMP2]]) |
819 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* |
820 | // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] |
821 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* |
822 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* |
823 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
824 | // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 |
825 | // CHECK-A64: ret %struct.float32x4x4_t [[TMP6]] |
826 | // CHECK-A32: ret void |
827 | float32x4x4_t test_vld1q_f32_x4(float32_t const *a) { |
828 | return vld1q_f32_x4(a); |
829 | } |
830 | |
831 | // CHECK-LABEL: @test_vld1q_p16_x2( |
832 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 |
833 | // CHECK-A32: %struct.poly16x8x2_t* noalias sret [[RETVAL:%.*]], |
834 | // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align {{16|8}} |
835 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* |
836 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
837 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
838 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i16.p0i16(i16* [[TMP2]]) |
839 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* |
840 | // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] |
841 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* |
842 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* |
843 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
844 | // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 |
845 | // CHECK-A64: ret %struct.poly16x8x2_t [[TMP6]] |
846 | // CHECK-A32: ret void |
847 | poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) { |
848 | return vld1q_p16_x2(a); |
849 | } |
850 | |
851 | // CHECK-LABEL: @test_vld1q_p16_x3( |
852 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16 |
853 | // CHECK-A32: %struct.poly16x8x3_t* noalias sret [[RETVAL:%.*]], |
854 | // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align {{16|8}} |
855 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* |
856 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
857 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
858 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i16.p0i16(i16* [[TMP2]]) |
859 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* |
860 | // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] |
861 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* |
862 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* |
863 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
864 | // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 |
865 | // CHECK-A64: ret %struct.poly16x8x3_t [[TMP6]] |
866 | // CHECK-A32: ret void |
867 | poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) { |
868 | return vld1q_p16_x3(a); |
869 | } |
870 | |
871 | // CHECK-LABEL: @test_vld1q_p16_x4( |
872 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16 |
873 | // CHECK-A32: %struct.poly16x8x4_t* noalias sret [[RETVAL:%.*]], |
874 | // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align {{16|8}} |
875 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* |
876 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
877 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
878 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i16.p0i16(i16* [[TMP2]]) |
879 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* |
880 | // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] |
881 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* |
882 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* |
883 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
884 | // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 |
885 | // CHECK-A64: ret %struct.poly16x8x4_t [[TMP6]] |
886 | // CHECK-A32: ret void |
887 | poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) { |
888 | return vld1q_p16_x4(a); |
889 | } |
890 | |
891 | // CHECK-LABEL: @test_vld1q_p8_x2( |
892 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 |
893 | // CHECK-A32: %struct.poly8x16x2_t* noalias sret [[RETVAL:%.*]], |
894 | // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align {{16|8}} |
895 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* |
896 | // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v16i8.p0i8(i8* %a) |
897 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* |
898 | // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]] |
899 | // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8* |
900 | // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* |
901 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 32, i1 false) |
902 | // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16 |
903 | // CHECK-A64: ret %struct.poly8x16x2_t [[TMP4]] |
904 | // CHECK-A32: ret void |
905 | poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) { |
906 | return vld1q_p8_x2(a); |
907 | } |
908 | |
909 | // CHECK-LABEL: @test_vld1q_p8_x3( |
910 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16 |
911 | // CHECK-A32: %struct.poly8x16x3_t* noalias sret [[RETVAL:%.*]], |
912 | // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align {{16|8}} |
913 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* |
914 | // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v16i8.p0i8(i8* %a) |
915 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* |
916 | // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] |
917 | // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8* |
918 | // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* |
919 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 48, i1 false) |
920 | // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16 |
921 | // CHECK-A64: ret %struct.poly8x16x3_t [[TMP4]] |
922 | // CHECK-A32: ret void |
923 | poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) { |
924 | return vld1q_p8_x3(a); |
925 | } |
926 | |
927 | // CHECK-LABEL: @test_vld1q_p8_x4( |
928 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16 |
929 | // CHECK-A32: %struct.poly8x16x4_t* noalias sret [[RETVAL:%.*]], |
930 | // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align {{16|8}} |
931 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* |
932 | // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v16i8.p0i8(i8* %a) |
933 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* |
934 | // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] |
935 | // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8* |
936 | // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* |
937 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 64, i1 false) |
938 | // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 |
939 | // CHECK-A64: ret %struct.poly8x16x4_t [[TMP4]] |
940 | // CHECK-A32: ret void |
941 | poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) { |
942 | return vld1q_p8_x4(a); |
943 | } |
944 | |
945 | // CHECK-LABEL: @test_vld1q_s16_x2( |
946 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 |
947 | // CHECK-A32: %struct.int16x8x2_t* noalias sret [[RETVAL:%.*]], |
948 | // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align {{16|8}} |
949 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* |
950 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
951 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
952 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i16.p0i16(i16* [[TMP2]]) |
953 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* |
954 | // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] |
955 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* |
956 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* |
957 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
958 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 |
959 | // CHECK-A64: ret %struct.int16x8x2_t [[TMP6]] |
960 | // CHECK-A32: ret void |
961 | int16x8x2_t test_vld1q_s16_x2(int16_t const *a) { |
962 | return vld1q_s16_x2(a); |
963 | } |
964 | |
965 | // CHECK-LABEL: @test_vld1q_s16_x3( |
966 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16 |
967 | // CHECK-A32: %struct.int16x8x3_t* noalias sret [[RETVAL:%.*]], |
968 | // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align {{16|8}} |
969 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* |
970 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
971 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
972 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i16.p0i16(i16* [[TMP2]]) |
973 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* |
974 | // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] |
975 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* |
976 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* |
977 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
978 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 |
979 | // CHECK-A64: ret %struct.int16x8x3_t [[TMP6]] |
980 | // CHECK-A32: ret void |
981 | int16x8x3_t test_vld1q_s16_x3(int16_t const *a) { |
982 | return vld1q_s16_x3(a); |
983 | } |
984 | |
985 | // CHECK-LABEL: @test_vld1q_s16_x4( |
986 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16 |
987 | // CHECK-A32: %struct.int16x8x4_t* noalias sret [[RETVAL:%.*]], |
988 | // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align {{16|8}} |
989 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* |
990 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
991 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
992 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i16.p0i16(i16* [[TMP2]]) |
993 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* |
994 | // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] |
995 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* |
996 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* |
997 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
998 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 |
999 | // CHECK-A64: ret %struct.int16x8x4_t [[TMP6]] |
1000 | // CHECK-A32: ret void |
1001 | int16x8x4_t test_vld1q_s16_x4(int16_t const *a) { |
1002 | return vld1q_s16_x4(a); |
1003 | } |
1004 | |
1005 | // CHECK-LABEL: @test_vld1q_s32_x2( |
1006 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 |
1007 | // CHECK-A32: %struct.int32x4x2_t* noalias sret [[RETVAL:%.*]], |
1008 | // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align {{16|8}} |
1009 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* |
1010 | // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* |
1011 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
1012 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i32.p0i32(i32* [[TMP2]]) |
1013 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* |
1014 | // CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]] |
1015 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* |
1016 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* |
1017 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
1018 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 |
1019 | // CHECK-A64: ret %struct.int32x4x2_t [[TMP6]] |
1020 | // CHECK-A32: ret void |
1021 | int32x4x2_t test_vld1q_s32_x2(int32_t const *a) { |
1022 | return vld1q_s32_x2(a); |
1023 | } |
1024 | |
1025 | // CHECK-LABEL: @test_vld1q_s32_x3( |
1026 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16 |
1027 | // CHECK-A32: %struct.int32x4x3_t* noalias sret [[RETVAL:%.*]], |
1028 | // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align {{16|8}} |
1029 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* |
1030 | // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* |
1031 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
1032 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i32.p0i32(i32* [[TMP2]]) |
1033 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* |
1034 | // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] |
1035 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* |
1036 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* |
1037 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
1038 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 |
1039 | // CHECK-A64: ret %struct.int32x4x3_t [[TMP6]] |
1040 | // CHECK-A32: ret void |
1041 | int32x4x3_t test_vld1q_s32_x3(int32_t const *a) { |
1042 | return vld1q_s32_x3(a); |
1043 | } |
1044 | |
1045 | // CHECK-LABEL: @test_vld1q_s32_x4( |
1046 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16 |
1047 | // CHECK-A32: %struct.int32x4x4_t* noalias sret [[RETVAL:%.*]], |
1048 | // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align {{16|8}} |
1049 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* |
1050 | // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* |
1051 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
1052 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i32.p0i32(i32* [[TMP2]]) |
1053 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* |
1054 | // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] |
1055 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* |
1056 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* |
1057 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
1058 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 |
1059 | // CHECK-A64: ret %struct.int32x4x4_t [[TMP6]] |
1060 | // CHECK-A32: ret void |
1061 | int32x4x4_t test_vld1q_s32_x4(int32_t const *a) { |
1062 | return vld1q_s32_x4(a); |
1063 | } |
1064 | |
1065 | // CHECK-LABEL: @test_vld1q_s64_x2( |
1066 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 |
1067 | // CHECK-A32: %struct.int64x2x2_t* noalias sret [[RETVAL:%.*]], |
1068 | // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align {{16|8}} |
1069 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* |
1070 | // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* |
1071 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
1072 | // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2i64.p0i64(i64* [[TMP2]]) |
1073 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* |
1074 | // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]] |
1075 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* |
1076 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* |
1077 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
1078 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 |
1079 | // CHECK-A64: ret %struct.int64x2x2_t [[TMP6]] |
1080 | // CHECK-A32: ret void |
1081 | int64x2x2_t test_vld1q_s64_x2(int64_t const *a) { |
1082 | return vld1q_s64_x2(a); |
1083 | } |
1084 | |
1085 | // CHECK-LABEL: @test_vld1q_s64_x3( |
1086 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 |
1087 | // CHECK-A32: %struct.int64x2x3_t* noalias sret [[RETVAL:%.*]], |
1088 | // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align {{16|8}} |
1089 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* |
1090 | // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* |
1091 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
1092 | // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2i64.p0i64(i64* [[TMP2]]) |
1093 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* |
1094 | // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] |
1095 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* |
1096 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* |
1097 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
1098 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 |
1099 | // CHECK-A64: ret %struct.int64x2x3_t [[TMP6]] |
1100 | // CHECK-A32: ret void |
1101 | int64x2x3_t test_vld1q_s64_x3(int64_t const *a) { |
1102 | return vld1q_s64_x3(a); |
1103 | } |
1104 | |
1105 | // CHECK-LABEL: @test_vld1q_s64_x4( |
1106 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 |
1107 | // CHECK-A32: %struct.int64x2x4_t* noalias sret [[RETVAL:%.*]], |
1108 | // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align {{16|8}} |
1109 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* |
1110 | // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* |
1111 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
1112 | // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2i64.p0i64(i64* [[TMP2]]) |
1113 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* |
1114 | // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] |
1115 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* |
1116 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* |
1117 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
1118 | // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 |
1119 | // CHECK-A64: ret %struct.int64x2x4_t [[TMP6]] |
1120 | // CHECK-A32: ret void |
1121 | int64x2x4_t test_vld1q_s64_x4(int64_t const *a) { |
1122 | return vld1q_s64_x4(a); |
1123 | } |
1124 | |
1125 | // CHECK-LABEL: @test_vld1q_s8_x2( |
1126 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 |
1127 | // CHECK-A32: %struct.int8x16x2_t* noalias sret [[RETVAL:%.*]], |
1128 | // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align {{16|8}} |
1129 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* |
1130 | // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v16i8.p0i8(i8* %a) |
1131 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* |
1132 | // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]] |
1133 | // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8* |
1134 | // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* |
1135 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 32, i1 false) |
1136 | // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16 |
1137 | // CHECK-A64: ret %struct.int8x16x2_t [[TMP4]] |
1138 | // CHECK-A32: ret void |
1139 | int8x16x2_t test_vld1q_s8_x2(int8_t const *a) { |
1140 | return vld1q_s8_x2(a); |
1141 | } |
1142 | |
1143 | // CHECK-LABEL: @test_vld1q_s8_x3( |
1144 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16 |
1145 | // CHECK-A32: %struct.int8x16x3_t* noalias sret [[RETVAL:%.*]], |
1146 | // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align {{16|8}} |
1147 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* |
1148 | // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v16i8.p0i8(i8* %a) |
1149 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* |
1150 | // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] |
1151 | // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8* |
1152 | // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* |
1153 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 48, i1 false) |
1154 | // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16 |
1155 | // CHECK-A64: ret %struct.int8x16x3_t [[TMP4]] |
1156 | // CHECK-A32: ret void |
1157 | int8x16x3_t test_vld1q_s8_x3(int8_t const *a) { |
1158 | return vld1q_s8_x3(a); |
1159 | } |
1160 | |
1161 | // CHECK-LABEL: @test_vld1q_s8_x4( |
1162 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16 |
1163 | // CHECK-A32: %struct.int8x16x4_t* noalias sret [[RETVAL:%.*]], |
1164 | // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align {{16|8}} |
1165 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* |
1166 | // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v16i8.p0i8(i8* %a) |
1167 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* |
1168 | // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] |
1169 | // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8* |
1170 | // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* |
1171 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 64, i1 false) |
1172 | // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 |
1173 | // CHECK-A64: ret %struct.int8x16x4_t [[TMP4]] |
1174 | // CHECK-A32: ret void |
1175 | int8x16x4_t test_vld1q_s8_x4(int8_t const *a) { |
1176 | return vld1q_s8_x4(a); |
1177 | } |
1178 | |
1179 | // CHECK-LABEL: @test_vld1q_u16_x2( |
1180 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 |
1181 | // CHECK-A32: %struct.uint16x8x2_t* noalias sret [[RETVAL:%.*]], |
1182 | // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align {{16|8}} |
1183 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* |
1184 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
1185 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
1186 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i16.p0i16(i16* [[TMP2]]) |
1187 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* |
1188 | // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] |
1189 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* |
1190 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* |
1191 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
1192 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 |
1193 | // CHECK-A64: ret %struct.uint16x8x2_t [[TMP6]] |
1194 | // CHECK-A32: ret void |
1195 | uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) { |
1196 | return vld1q_u16_x2(a); |
1197 | } |
1198 | |
1199 | // CHECK-LABEL: @test_vld1q_u16_x3( |
1200 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16 |
1201 | // CHECK-A32: %struct.uint16x8x3_t* noalias sret [[RETVAL:%.*]], |
1202 | // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align {{16|8}} |
1203 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* |
1204 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
1205 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
1206 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i16.p0i16(i16* [[TMP2]]) |
1207 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* |
1208 | // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] |
1209 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* |
1210 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* |
1211 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
1212 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 |
1213 | // CHECK-A64: ret %struct.uint16x8x3_t [[TMP6]] |
1214 | // CHECK-A32: ret void |
1215 | uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) { |
1216 | return vld1q_u16_x3(a); |
1217 | } |
1218 | |
1219 | // CHECK-LABEL: @test_vld1q_u16_x4( |
1220 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16 |
1221 | // CHECK-A32: %struct.uint16x8x4_t* noalias sret [[RETVAL:%.*]], |
1222 | // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align {{16|8}} |
1223 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* |
1224 | // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* |
1225 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
1226 | // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i16.p0i16(i16* [[TMP2]]) |
1227 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* |
1228 | // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] |
1229 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* |
1230 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* |
1231 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
1232 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 |
1233 | // CHECK-A64: ret %struct.uint16x8x4_t [[TMP6]] |
1234 | // CHECK-A32: ret void |
1235 | uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) { |
1236 | return vld1q_u16_x4(a); |
1237 | } |
1238 | |
1239 | // CHECK-LABEL: @test_vld1q_u32_x2( |
1240 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 |
1241 | // CHECK-A32: %struct.uint32x4x2_t* noalias sret [[RETVAL:%.*]], |
1242 | // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align {{16|8}} |
1243 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* |
1244 | // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* |
1245 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
1246 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i32.p0i32(i32* [[TMP2]]) |
1247 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* |
1248 | // CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]] |
1249 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* |
1250 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* |
1251 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
1252 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 |
1253 | // CHECK-A64: ret %struct.uint32x4x2_t [[TMP6]] |
1254 | // CHECK-A32: ret void |
1255 | uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) { |
1256 | return vld1q_u32_x2(a); |
1257 | } |
1258 | |
1259 | // CHECK-LABEL: @test_vld1q_u32_x3( |
1260 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16 |
1261 | // CHECK-A32: %struct.uint32x4x3_t* noalias sret [[RETVAL:%.*]], |
1262 | // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align {{16|8}} |
1263 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* |
1264 | // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* |
1265 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
1266 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i32.p0i32(i32* [[TMP2]]) |
1267 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* |
1268 | // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] |
1269 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* |
1270 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* |
1271 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
1272 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 |
1273 | // CHECK-A64: ret %struct.uint32x4x3_t [[TMP6]] |
1274 | // CHECK-A32: ret void |
1275 | uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) { |
1276 | return vld1q_u32_x3(a); |
1277 | } |
1278 | |
1279 | // CHECK-LABEL: @test_vld1q_u32_x4( |
1280 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16 |
1281 | // CHECK-A32: %struct.uint32x4x4_t* noalias sret [[RETVAL:%.*]], |
1282 | // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align {{16|8}} |
1283 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* |
1284 | // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* |
1285 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
1286 | // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i32.p0i32(i32* [[TMP2]]) |
1287 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* |
1288 | // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] |
1289 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* |
1290 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* |
1291 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
1292 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 |
1293 | // CHECK-A64: ret %struct.uint32x4x4_t [[TMP6]] |
1294 | // CHECK-A32: ret void |
1295 | uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) { |
1296 | return vld1q_u32_x4(a); |
1297 | } |
1298 | |
1299 | // CHECK-LABEL: @test_vld1q_u64_x2( |
1300 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 |
1301 | // CHECK-A32: %struct.uint64x2x2_t* noalias sret [[RETVAL:%.*]], |
1302 | // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align {{16|8}} |
1303 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* |
1304 | // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* |
1305 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
1306 | // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2i64.p0i64(i64* [[TMP2]]) |
1307 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* |
1308 | // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]] |
1309 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* |
1310 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* |
1311 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
1312 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 |
1313 | // CHECK-A64: ret %struct.uint64x2x2_t [[TMP6]] |
1314 | // CHECK-A32: ret void |
1315 | uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) { |
1316 | return vld1q_u64_x2(a); |
1317 | } |
1318 | |
1319 | // CHECK-LABEL: @test_vld1q_u64_x3( |
1320 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 |
1321 | // CHECK-A32: %struct.uint64x2x3_t* noalias sret [[RETVAL:%.*]], |
1322 | // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align {{16|8}} |
1323 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* |
1324 | // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* |
1325 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
1326 | // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2i64.p0i64(i64* [[TMP2]]) |
1327 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* |
1328 | // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] |
1329 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* |
1330 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* |
1331 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
1332 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 |
1333 | // CHECK-A64: ret %struct.uint64x2x3_t [[TMP6]] |
1334 | // CHECK-A32: ret void |
1335 | uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) { |
1336 | return vld1q_u64_x3(a); |
1337 | } |
1338 | |
1339 | // CHECK-LABEL: @test_vld1q_u64_x4( |
1340 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 |
1341 | // CHECK-A32: %struct.uint64x2x4_t* noalias sret [[RETVAL:%.*]], |
1342 | // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align {{16|8}} |
1343 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* |
1344 | // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* |
1345 | // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
1346 | // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2i64.p0i64(i64* [[TMP2]]) |
1347 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* |
1348 | // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] |
1349 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* |
1350 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* |
1351 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
1352 | // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 |
1353 | // CHECK-A64: ret %struct.uint64x2x4_t [[TMP6]] |
1354 | // CHECK-A32: ret void |
1355 | uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) { |
1356 | return vld1q_u64_x4(a); |
1357 | } |
1358 | |
1359 | // CHECK-LABEL: @test_vld1q_u8_x2( |
1360 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 |
1361 | // CHECK-A32: %struct.uint8x16x2_t* noalias sret [[RETVAL:%.*]], |
1362 | // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align {{16|8}} |
1363 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* |
1364 | // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v16i8.p0i8(i8* %a) |
1365 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* |
1366 | // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]] |
1367 | // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8* |
1368 | // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* |
1369 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 32, i1 false) |
1370 | // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16 |
1371 | // CHECK-A64: ret %struct.uint8x16x2_t [[TMP4]] |
1372 | // CHECK-A32: ret void |
1373 | uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) { |
1374 | return vld1q_u8_x2(a); |
1375 | } |
1376 | |
1377 | // CHECK-LABEL: @test_vld1q_u8_x3( |
1378 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16 |
1379 | // CHECK-A32: %struct.uint8x16x3_t* noalias sret [[RETVAL:%.*]], |
1380 | // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align {{16|8}} |
1381 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* |
1382 | // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v16i8.p0i8(i8* %a) |
1383 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* |
1384 | // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] |
1385 | // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8* |
1386 | // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* |
1387 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 48, i1 false) |
1388 | // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16 |
1389 | // CHECK-A64: ret %struct.uint8x16x3_t [[TMP4]] |
1390 | // CHECK-A32: ret void |
1391 | uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) { |
1392 | return vld1q_u8_x3(a); |
1393 | } |
1394 | |
1395 | // CHECK-LABEL: @test_vld1q_u8_x4( |
1396 | // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16 |
1397 | // CHECK-A32: %struct.uint8x16x4_t* noalias sret [[RETVAL:%.*]], |
1398 | // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align {{16|8}} |
1399 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* |
1400 | // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v16i8.p0i8(i8* %a) |
1401 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* |
1402 | // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] |
1403 | // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8* |
1404 | // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* |
1405 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 64, i1 false) |
1406 | // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 |
1407 | // CHECK-A64: ret %struct.uint8x16x4_t [[TMP4]] |
1408 | // CHECK-A32: ret void |
1409 | uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) { |
1410 | return vld1q_u8_x4(a); |
1411 | } |
1412 | |
1413 | // CHECK-LABEL: @test_vld2_dup_f16( |
1414 | // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 |
1415 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* |
1416 | // CHECK: [[TMP1:%.*]] = bitcast half* %src to i8* |
1417 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half* |
1418 | // CHECK-A64: [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2r.v4f16.p0f16(half* [[TMP2]]) |
1419 | // CHECK-A32: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2dup.v4i16.p0i8(i8* [[TMP1]], i32 2) |
1420 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x [[HALF]]>, <4 x [[HALF]]> }* |
1421 | // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD2]], { <4 x [[HALF]]>, <4 x [[HALF]]> }* [[TMP3]] |
1422 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* %dest to i8* |
1423 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* |
1424 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
1425 | // CHECK: ret void |
1426 | void test_vld2_dup_f16(float16x4x2_t *dest, const float16_t *src) { |
1427 | *dest = vld2_dup_f16(src); |
1428 | } |
1429 | |
1430 | // CHECK-LABEL: @test_vld2_dup_f32( |
1431 | // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 |
1432 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* |
1433 | // CHECK: [[TMP1:%.*]] = bitcast float* %src to i8* |
1434 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* |
1435 | // CHECK-A64: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* [[TMP2]]) |
1436 | // CHECK-A32: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2dup.v2f32.p0i8(i8* [[TMP1]], i32 4) |
1437 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* |
1438 | // CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]] |
1439 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* %dest to i8* |
1440 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* |
1441 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
1442 | // CHECK: ret void |
1443 | void test_vld2_dup_f32(float32x2x2_t *dest, const float32_t *src) { |
1444 | *dest = vld2_dup_f32(src); |
1445 | } |
1446 | |
1447 | // CHECK-LABEL: @test_vld2_dup_p16( |
1448 | // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 |
1449 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* |
1450 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
1451 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
1452 | // CHECK-A64: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]]) |
1453 | // CHECK-A32: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2dup.v4i16.p0i8(i8* [[TMP1]], i32 2) |
1454 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* |
1455 | // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] |
1456 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* %dest to i8* |
1457 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* |
1458 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
1459 | // CHECK: ret void |
1460 | void test_vld2_dup_p16(poly16x4x2_t *dest, const poly16_t *src) { |
1461 | *dest = vld2_dup_p16(src); |
1462 | } |
1463 | |
1464 | // CHECK-LABEL: @test_vld2_dup_p8( |
1465 | // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 |
1466 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* |
1467 | // CHECK-A64: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %src) |
1468 | // CHECK-A32: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2dup.v8i8.p0i8(i8* %src, i32 1) |
1469 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* |
1470 | // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]] |
1471 | // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* %dest to i8* |
1472 | // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* |
1473 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 16, i1 false) |
1474 | // CHECK: ret void |
1475 | void test_vld2_dup_p8(poly8x8x2_t *dest, poly8_t *src) { |
1476 | *dest = vld2_dup_p8(src); |
1477 | } |
1478 | |
1479 | // CHECK-LABEL: @test_vld2_dup_s16( |
1480 | // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 |
1481 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* |
1482 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
1483 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
1484 | // CHECK-A64: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]]) |
1485 | // CHECK-A32: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2dup.v4i16.p0i8(i8* [[TMP1]], i32 2) |
1486 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* |
1487 | // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] |
1488 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* %dest to i8* |
1489 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* |
1490 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
1491 | // CHECK: ret void |
1492 | void test_vld2_dup_s16(int16x4x2_t *dest, const int16_t *src) { |
1493 | *dest = vld2_dup_s16(src); |
1494 | } |
1495 | |
1496 | // CHECK-LABEL: @test_vld2_dup_s32( |
1497 | // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 |
1498 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* |
1499 | // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8* |
1500 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
1501 | // CHECK-A64: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* [[TMP2]]) |
1502 | // CHECK-A32: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2dup.v2i32.p0i8(i8* [[TMP1]], i32 4) |
1503 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* |
1504 | // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] |
1505 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* %dest to i8* |
1506 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* |
1507 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
1508 | // CHECK: ret void |
1509 | void test_vld2_dup_s32(int32x2x2_t *dest, const int32_t *src) { |
1510 | *dest = vld2_dup_s32(src); |
1511 | } |
1512 | |
1513 | // CHECK-LABEL: @test_vld2_dup_s8( |
1514 | // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 |
1515 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* |
1516 | // CHECK-A64: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %src) |
1517 | // CHECK-A32: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2dup.v8i8.p0i8(i8* %src, i32 1) |
1518 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* |
1519 | // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]] |
1520 | // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* %dest to i8* |
1521 | // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* |
1522 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 16, i1 false) |
1523 | // CHECK: ret void |
1524 | void test_vld2_dup_s8(int8x8x2_t *dest, int8_t *src) { |
1525 | *dest = vld2_dup_s8(src); |
1526 | } |
1527 | |
1528 | // CHECK-LABEL: @test_vld2_dup_u16( |
1529 | // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 |
1530 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* |
1531 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
1532 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
1533 | // CHECK-A64: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]]) |
1534 | // CHECK-A32: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2dup.v4i16.p0i8(i8* [[TMP1]], i32 2) |
1535 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* |
1536 | // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] |
1537 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* %dest to i8* |
1538 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* |
1539 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
1540 | // CHECK: ret void |
1541 | void test_vld2_dup_u16(uint16x4x2_t *dest, const uint16_t *src) { |
1542 | *dest = vld2_dup_u16(src); |
1543 | } |
1544 | |
1545 | // CHECK-LABEL: @test_vld2_dup_u32( |
1546 | // CHECK: entry: |
1547 | // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 |
1548 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* |
1549 | // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8* |
1550 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
1551 | // CHECK-A64: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* [[TMP2]]) |
1552 | // CHECK-A32: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2dup.v2i32.p0i8(i8* [[TMP1]], i32 4) |
1553 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* |
1554 | // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] |
1555 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* %dest to i8* |
1556 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* |
1557 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
1558 | // CHECK: ret void |
1559 | void test_vld2_dup_u32(uint32x2x2_t *dest, const uint32_t *src) { |
1560 | *dest = vld2_dup_u32(src); |
1561 | } |
1562 | |
1563 | // CHECK-LABEL: @test_vld2_dup_s64( |
1564 | // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 |
1565 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* |
1566 | // CHECK: [[TMP1:%.*]] = bitcast i64* %src to i8* |
1567 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
1568 | // CHECK-A64: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* [[TMP2]]) |
1569 | // CHECK-A32: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2dup.v1i64.p0i8(i8* [[TMP1]], i32 8) |
1570 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* |
1571 | // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] |
1572 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* %dest to i8* |
1573 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* |
1574 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
1575 | // CHECK: ret void |
1576 | void test_vld2_dup_s64(int64x1x2_t *dest, const int64_t *src) { |
1577 | *dest = vld2_dup_s64(src); |
1578 | } |
1579 | |
1580 | // CHECK-LABEL: @test_vld2_dup_u64( |
1581 | // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 |
1582 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* |
1583 | // CHECK: [[TMP1:%.*]] = bitcast i64* %src to i8* |
1584 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
1585 | // CHECK-A64: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* [[TMP2]]) |
1586 | // CHECK-A32: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2dup.v1i64.p0i8(i8* [[TMP1]], i32 8) |
1587 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* |
1588 | // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] |
1589 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* %dest to i8* |
1590 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* |
1591 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 16, i1 false) |
1592 | // CHECK: ret void |
1593 | void test_vld2_dup_u64(uint64x1x2_t *dest, const uint64_t *src) { |
1594 | *dest = vld2_dup_u64(src); |
1595 | } |
1596 | |
1597 | // CHECK-LABEL: @test_vld2_dup_u8( |
1598 | // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 |
1599 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* |
1600 | // CHECK-A64: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %src) |
1601 | // CHECK-A32: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2dup.v8i8.p0i8(i8* %src, i32 1) |
1602 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* |
1603 | // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]] |
1604 | // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* %dest to i8* |
1605 | // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* |
1606 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 16, i1 false) |
1607 | // CHECK: ret void |
1608 | void test_vld2_dup_u8(uint8x8x2_t *dest, const uint8_t *src) { |
1609 | *dest = vld2_dup_u8(src); |
1610 | } |
1611 | |
1612 | // CHECK-LABEL: @test_vld3_dup_f16( |
1613 | // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 |
1614 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* |
1615 | // CHECK: [[TMP1:%.*]] = bitcast half* %src to i8* |
1616 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half* |
1617 | // CHECK-A64: [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3r.v4f16.p0f16(half* [[TMP2]]) |
1618 | // CHECK-A32: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3dup.v4i16.p0i8(i8* [[TMP1]], i32 2) |
1619 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }* |
1620 | // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD3]], { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }* [[TMP3]] |
1621 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* %dest to i8* |
1622 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* |
1623 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
1624 | // CHECK: ret void |
1625 | void test_vld3_dup_f16(float16x4x3_t *dest, float16_t *src) { |
1626 | *dest = vld3_dup_f16(src); |
1627 | } |
1628 | |
1629 | // CHECK-LABEL: @test_vld3_dup_f32( |
1630 | // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 |
1631 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* |
1632 | // CHECK: [[TMP1:%.*]] = bitcast float* %src to i8* |
1633 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* |
1634 | // CHECK-A64: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* [[TMP2]]) |
1635 | // CHECK-A32: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3dup.v2f32.p0i8(i8* [[TMP1]], i32 4) |
1636 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* |
1637 | // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] |
1638 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* %dest to i8* |
1639 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* |
1640 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
1641 | // CHECK: ret void |
1642 | void test_vld3_dup_f32(float32x2x3_t *dest, const float32_t *src) { |
1643 | *dest = vld3_dup_f32(src); |
1644 | } |
1645 | |
1646 | // CHECK-LABEL: @test_vld3_dup_p16( |
1647 | // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 |
1648 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* |
1649 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
1650 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
1651 | // CHECK-A64: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]]) |
1652 | // CHECK-A32: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3dup.v4i16.p0i8(i8* [[TMP1]], i32 2) |
1653 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* |
1654 | // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] |
1655 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* %dest to i8* |
1656 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* |
1657 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
1658 | // CHECK: ret void |
1659 | void test_vld3_dup_p16(poly16x4x3_t *dest, const poly16_t *src) { |
1660 | *dest = vld3_dup_p16(src); |
1661 | } |
1662 | |
1663 | // CHECK-LABEL: @test_vld3_dup_p8( |
1664 | // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 |
1665 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* |
1666 | // CHECK-A64: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %src) |
1667 | // CHECK-A32: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3dup.v8i8.p0i8(i8* %src, i32 1) |
1668 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* |
1669 | // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] |
1670 | // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* %dest to i8* |
1671 | // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* |
1672 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 24, i1 false) |
1673 | // CHECK: ret void |
1674 | void test_vld3_dup_p8(poly8x8x3_t *dest, const poly8_t *src) { |
1675 | *dest = vld3_dup_p8(src); |
1676 | } |
1677 | |
1678 | // CHECK-LABEL: @test_vld3_dup_s16( |
1679 | // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 |
1680 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* |
1681 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
1682 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
1683 | // CHECK-A64: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]]) |
1684 | // CHECK-A32: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3dup.v4i16.p0i8(i8* [[TMP1]], i32 2) |
1685 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* |
1686 | // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] |
1687 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* %dest to i8* |
1688 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* |
1689 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
1690 | // CHECK: ret void |
1691 | void test_vld3_dup_s16(int16x4x3_t *dest, const int16_t *src) { |
1692 | *dest = vld3_dup_s16(src); |
1693 | } |
1694 | |
1695 | // CHECK-LABEL: @test_vld3_dup_s32( |
1696 | // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 |
1697 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* |
1698 | // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8* |
1699 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
1700 | // CHECK-A64: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* [[TMP2]]) |
1701 | // CHECK-A32: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3dup.v2i32.p0i8(i8* [[TMP1]], i32 4) |
1702 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* |
1703 | // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] |
1704 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* %dest to i8* |
1705 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* |
1706 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
1707 | // CHECK: ret void |
1708 | void test_vld3_dup_s32(int32x2x3_t *dest, const int32_t *src) { |
1709 | *dest = vld3_dup_s32(src); |
1710 | } |
1711 | |
1712 | // CHECK-LABEL: @test_vld3_dup_s8( |
1713 | // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 |
1714 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* |
1715 | // CHECK-A64: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %src) |
1716 | // CHECK-A32: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3dup.v8i8.p0i8(i8* %src, i32 1) |
1717 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* |
1718 | // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] |
1719 | // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* %dest to i8* |
1720 | // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* |
1721 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 24, i1 false) |
1722 | // CHECK: ret void |
1723 | void test_vld3_dup_s8(int8x8x3_t *dest, const int8_t *src) { |
1724 | *dest = vld3_dup_s8(src); |
1725 | } |
1726 | |
1727 | // CHECK-LABEL: @test_vld3_dup_u16( |
1728 | // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 |
1729 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* |
1730 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
1731 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
1732 | // CHECK-A64: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]]) |
1733 | // CHECK-A32: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3dup.v4i16.p0i8(i8* [[TMP1]], i32 2) |
1734 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* |
1735 | // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] |
1736 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* %dest to i8* |
1737 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* |
1738 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
1739 | // CHECK: ret void |
1740 | void test_vld3_dup_u16(uint16x4x3_t *dest, const uint16_t *src) { |
1741 | *dest = vld3_dup_u16(src); |
1742 | } |
1743 | |
1744 | // CHECK-LABEL: @test_vld3_dup_u32( |
1745 | // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 |
1746 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* |
1747 | // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8* |
1748 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
1749 | // CHECK-A64: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* [[TMP2]]) |
1750 | // CHECK-A32: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3dup.v2i32.p0i8(i8* [[TMP1]], i32 4) |
1751 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* |
1752 | // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] |
1753 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* %dest to i8* |
1754 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* |
1755 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
1756 | // CHECK: ret void |
1757 | void test_vld3_dup_u32(uint32x2x3_t *dest, const uint32_t *src) { |
1758 | *dest = vld3_dup_u32(src); |
1759 | } |
1760 | |
1761 | // CHECK-LABEL: @test_vld3_dup_u8( |
1762 | // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 |
1763 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* |
1764 | // CHECK-A64: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %src) |
1765 | // CHECK-A32: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3dup.v8i8.p0i8(i8* %src, i32 1) |
1766 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* |
1767 | // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] |
1768 | // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* %dest to i8* |
1769 | // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* |
1770 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 24, i1 false) |
1771 | // CHECK: ret void |
1772 | void test_vld3_dup_u8(uint8x8x3_t *dest, const uint8_t *src) { |
1773 | *dest = vld3_dup_u8(src); |
1774 | } |
1775 | |
1776 | // CHECK-LABEL: @test_vld3_dup_s64( |
1777 | // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 |
1778 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* |
1779 | // CHECK: [[TMP1:%.*]] = bitcast i64* %src to i8* |
1780 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
1781 | // CHECK-A64: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* [[TMP2]]) |
1782 | // CHECK-A32: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3dup.v1i64.p0i8(i8* [[TMP1]], i32 8) |
1783 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* |
1784 | // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] |
1785 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* %dest to i8* |
1786 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* |
1787 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
1788 | // CHECK: ret void |
1789 | void test_vld3_dup_s64(int64x1x3_t *dest, const int64_t *src) { |
1790 | *dest = vld3_dup_s64(src); |
1791 | } |
1792 | |
1793 | // CHECK-LABEL: @test_vld3_dup_u64( |
1794 | // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 |
1795 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* |
1796 | // CHECK: [[TMP1:%.*]] = bitcast i64* %src to i8* |
1797 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
1798 | // CHECK-A64: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* [[TMP2]]) |
1799 | // CHECK-A32: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3dup.v1i64.p0i8(i8* [[TMP1]], i32 8) |
1800 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* |
1801 | // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] |
1802 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* %dest to i8* |
1803 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* |
1804 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 24, i1 false) |
1805 | // CHECK: ret void |
1806 | void test_vld3_dup_u64(uint64x1x3_t *dest, const uint64_t *src) { |
1807 | *dest = vld3_dup_u64(src); |
1808 | } |
1809 | |
1810 | // CHECK-LABEL: @test_vld4_dup_f16( |
1811 | // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 |
1812 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* |
1813 | // CHECK: [[TMP1:%.*]] = bitcast half* %src to i8* |
1814 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half* |
1815 | // CHECK-A64: [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4r.v4f16.p0f16(half* [[TMP2]]) |
1816 | // CHECK-A32: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4dup.v4i16.p0i8(i8* [[TMP1]], i32 2) |
1817 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }* |
1818 | // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD4]], { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> }* [[TMP3]] |
1819 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* %dest to i8* |
1820 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* |
1821 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
1822 | // CHECK: ret void |
1823 | void test_vld4_dup_f16(float16x4x4_t *dest, const float16_t *src) { |
1824 | *dest = vld4_dup_f16(src); |
1825 | } |
1826 | |
1827 | // CHECK-LABEL: @test_vld4_dup_f32( |
1828 | // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 |
1829 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* |
1830 | // CHECK: [[TMP1:%.*]] = bitcast float* %src to i8* |
1831 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* |
1832 | // CHECK-A64: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* [[TMP2]]) |
1833 | // CHECK-A32: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4dup.v2f32.p0i8(i8* [[TMP1]], i32 4) |
1834 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* |
1835 | // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] |
1836 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* %dest to i8* |
1837 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* |
1838 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
1839 | // CHECK: ret void |
1840 | void test_vld4_dup_f32(float32x2x4_t *dest, const float32_t *src) { |
1841 | *dest = vld4_dup_f32(src); |
1842 | } |
1843 | |
1844 | // CHECK-LABEL: @test_vld4_dup_p16( |
1845 | // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 |
1846 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* |
1847 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
1848 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
1849 | // CHECK-A64: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]]) |
1850 | // CHECK-A32: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4dup.v4i16.p0i8(i8* [[TMP1]], i32 2) |
1851 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* |
1852 | // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] |
1853 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* %dest to i8* |
1854 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* |
1855 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
1856 | // CHECK: ret void |
1857 | void test_vld4_dup_p16(poly16x4x4_t *dest, const poly16_t *src) { |
1858 | *dest = vld4_dup_p16(src); |
1859 | } |
1860 | |
1861 | // CHECK-LABEL: @test_vld4_dup_p8( |
1862 | // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 |
1863 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* |
1864 | // CHECK-A64: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %src) |
1865 | // CHECK-A32: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4dup.v8i8.p0i8(i8* %src, i32 1) |
1866 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* |
1867 | // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] |
1868 | // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* %dest to i8* |
1869 | // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* |
1870 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 32, i1 false) |
1871 | // CHECK: ret void |
1872 | void test_vld4_dup_p8(poly8x8x4_t *dest, const poly8_t *src) { |
1873 | *dest = vld4_dup_p8(src); |
1874 | } |
1875 | |
1876 | // CHECK-LABEL: @test_vld4_dup_s16( |
1877 | // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 |
1878 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* |
1879 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
1880 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
1881 | // CHECK-A64: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]]) |
1882 | // CHECK-A32: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4dup.v4i16.p0i8(i8* [[TMP1]], i32 2) |
1883 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* |
1884 | // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] |
1885 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* %dest to i8* |
1886 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* |
1887 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
1888 | // CHECK: ret void |
1889 | void test_vld4_dup_s16(int16x4x4_t *dest, const int16_t *src) { |
1890 | *dest = vld4_dup_s16(src); |
1891 | } |
1892 | |
1893 | // CHECK-LABEL: @test_vld4_dup_s32( |
1894 | // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 |
1895 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* |
1896 | // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8* |
1897 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
1898 | // CHECK-A64: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* [[TMP2]]) |
1899 | // CHECK-A32: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4dup.v2i32.p0i8(i8* [[TMP1]], i32 4) |
1900 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* |
1901 | // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] |
1902 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* %dest to i8* |
1903 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* |
1904 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
1905 | // CHECK: ret void |
1906 | void test_vld4_dup_s32(int32x2x4_t *dest, const int32_t *src) { |
1907 | *dest = vld4_dup_s32(src); |
1908 | } |
1909 | |
1910 | // CHECK-LABEL: @test_vld4_dup_s8( |
1911 | // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 |
1912 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* |
1913 | // CHECK-A64: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %src) |
1914 | // CHECK-A32: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4dup.v8i8.p0i8(i8* %src, i32 1) |
1915 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* |
1916 | // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] |
1917 | // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* %dest to i8* |
1918 | // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* |
1919 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 32, i1 false) |
1920 | // CHECK: ret void |
1921 | void test_vld4_dup_s8(int8x8x4_t *dest, const int8_t *src) { |
1922 | *dest = vld4_dup_s8(src); |
1923 | } |
1924 | |
1925 | // CHECK-LABEL: @test_vld4_dup_u16( |
1926 | // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 |
1927 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* |
1928 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
1929 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
1930 | // CHECK-A64: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]]) |
1931 | // CHECK-A32: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4dup.v4i16.p0i8(i8* [[TMP1]], i32 2) |
1932 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* |
1933 | // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] |
1934 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* %dest to i8* |
1935 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* |
1936 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
1937 | // CHECK: ret void |
1938 | void test_vld4_dup_u16(uint16x4x4_t *dest, const uint16_t *src) { |
1939 | *dest = vld4_dup_u16(src); |
1940 | } |
1941 | |
1942 | // CHECK-LABEL: @test_vld4_dup_u32( |
1943 | // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 |
1944 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* |
1945 | // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8* |
1946 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
1947 | // CHECK-A64: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* [[TMP2]]) |
1948 | // CHECK-A32: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4dup.v2i32.p0i8(i8* [[TMP1]], i32 4) |
1949 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* |
1950 | // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] |
1951 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* %dest to i8* |
1952 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* |
1953 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
1954 | // CHECK: ret void |
1955 | void test_vld4_dup_u32(uint32x2x4_t *dest, const uint32_t *src) { |
1956 | *dest = vld4_dup_u32(src); |
1957 | } |
1958 | |
1959 | // CHECK-LABEL: @test_vld4_dup_u8( |
1960 | // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 |
1961 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* |
1962 | // CHECK-A64: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %src) |
1963 | // CHECK-A32: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4dup.v8i8.p0i8(i8* %src, i32 1) |
1964 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* |
1965 | // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] |
1966 | // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* %dest to i8* |
1967 | // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* |
1968 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], {{i64|i32}} 32, i1 false) |
1969 | // CHECK: ret void |
1970 | void test_vld4_dup_u8(uint8x8x4_t *dest, const uint8_t *src) { |
1971 | *dest = vld4_dup_u8(src); |
1972 | } |
1973 | |
1974 | // CHECK-LABEL: @test_vld4_dup_s64( |
1975 | // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 |
1976 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* |
1977 | // CHECK: [[TMP1:%.*]] = bitcast i64* %src to i8* |
1978 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
1979 | // CHECK-A64: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* [[TMP2]]) |
1980 | // CHECK-A32: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4dup.v1i64.p0i8(i8* [[TMP1]], i32 8) |
1981 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* |
1982 | // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] |
1983 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* %dest to i8* |
1984 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* |
1985 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
1986 | // CHECK: ret void |
1987 | void test_vld4_dup_s64(int64x1x4_t *dest, const int64_t *src) { |
1988 | *dest = vld4_dup_s64(src); |
1989 | } |
1990 | |
1991 | // CHECK-LABEL: @test_vld4_dup_u64( |
1992 | // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 |
1993 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* |
1994 | // CHECK: [[TMP1:%.*]] = bitcast i64* %src to i8* |
1995 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* |
1996 | // CHECK-A64: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* [[TMP2]]) |
1997 | // CHECK-A32: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4dup.v1i64.p0i8(i8* [[TMP1]], i32 8) |
1998 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* |
1999 | // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] |
2000 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* %dest to i8* |
2001 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* |
2002 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], {{i64|i32}} 32, i1 false) |
2003 | // CHECK: ret void |
2004 | void test_vld4_dup_u64(uint64x1x4_t *dest, const uint64_t *src) { |
2005 | *dest = vld4_dup_u64(src); |
2006 | } |
2007 | |
2008 | // CHECK-LABEL: @test_vld2q_dup_f16( |
2009 | // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align {{16|8}} |
2010 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* |
2011 | // CHECK: [[TMP1:%.*]] = bitcast half* %src to i8* |
2012 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half* |
2013 | // CHECK-A64: [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2r.v8f16.p0f16(half* [[TMP2]]) |
2014 | // CHECK-A32: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2dup.v8i16.p0i8(i8* [[TMP1]], i32 2) |
2015 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x [[HALF]]>, <8 x [[HALF]]> }* |
2016 | // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD2]], { <8 x [[HALF]]>, <8 x [[HALF]]> }* [[TMP3]] |
2017 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* %dest to i8* |
2018 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* |
2019 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
2020 | // CHECK: ret void |
2021 | void test_vld2q_dup_f16(float16x8x2_t *dest, const float16_t *src) { |
2022 | *dest = vld2q_dup_f16(src); |
2023 | } |
2024 | |
2025 | // CHECK-LABEL: @test_vld2q_dup_f32( |
2026 | // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align {{16|8}} |
2027 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* |
2028 | // CHECK: [[TMP1:%.*]] = bitcast float* %src to i8* |
2029 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* |
2030 | // CHECK-A64: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* [[TMP2]]) |
2031 | // CHECK-A32: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2dup.v4f32.p0i8(i8* [[TMP1]], i32 4) |
2032 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }* |
2033 | // CHECK: store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]] |
2034 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* %dest to i8* |
2035 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* |
2036 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
2037 | // CHECK: ret void |
2038 | void test_vld2q_dup_f32(float32x4x2_t *dest, const float32_t *src) { |
2039 | *dest = vld2q_dup_f32(src); |
2040 | } |
2041 | |
2042 | // CHECK-LABEL: @test_vld2q_dup_p16( |
2043 | // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align {{16|8}} |
2044 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* |
2045 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
2046 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
2047 | // CHECK-A64: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]]) |
2048 | // CHECK-A32: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2dup.v8i16.p0i8(i8* [[TMP1]], i32 2) |
2049 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* |
2050 | // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] |
2051 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* %dest to i8* |
2052 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* |
2053 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
2054 | // CHECK: ret void |
2055 | void test_vld2q_dup_p16(poly16x8x2_t *dest, const poly16_t *src) { |
2056 | *dest = vld2q_dup_p16(src); |
2057 | } |
2058 | |
2059 | // CHECK-LABEL: @test_vld2q_dup_p8( |
2060 | // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align {{16|8}} |
2061 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* |
2062 | // CHECK-A64: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %src) |
2063 | // CHECK-A32: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2dup.v16i8.p0i8(i8* %src, i32 1) |
2064 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* |
2065 | // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]] |
2066 | // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* %dest to i8* |
2067 | // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* |
2068 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 32, i1 false) |
2069 | // CHECK: ret void |
2070 | void test_vld2q_dup_p8(poly8x16x2_t *dest, const poly8_t *src) { |
2071 | *dest = vld2q_dup_p8(src); |
2072 | } |
2073 | |
2074 | // CHECK-LABEL: @test_vld2q_dup_s16( |
2075 | // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align {{16|8}} |
2076 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* |
2077 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
2078 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
2079 | // CHECK-A64: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]]) |
2080 | // CHECK-A32: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2dup.v8i16.p0i8(i8* [[TMP1]], i32 2) |
2081 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* |
2082 | // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] |
2083 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* %dest to i8* |
2084 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* |
2085 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
2086 | // CHECK: ret void |
2087 | void test_vld2q_dup_s16(int16x8x2_t *dest, const int16_t *src) { |
2088 | *dest = vld2q_dup_s16(src); |
2089 | } |
2090 | |
2091 | // CHECK-LABEL: @test_vld2q_dup_s32( |
2092 | // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align {{16|8}} |
2093 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* |
2094 | // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8* |
2095 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
2096 | // CHECK-A64: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* [[TMP2]]) |
2097 | // CHECK-A32: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2dup.v4i32.p0i8(i8* [[TMP1]], i32 4) |
2098 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* |
2099 | // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] |
2100 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* %dest to i8* |
2101 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* |
2102 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
2103 | // CHECK: ret void |
2104 | void test_vld2q_dup_s32(int32x4x2_t *dest, const int32_t *src) { |
2105 | *dest = vld2q_dup_s32(src); |
2106 | } |
2107 | |
2108 | // CHECK-LABEL: @test_vld2q_dup_s8( |
2109 | // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align {{16|8}} |
2110 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* |
2111 | // CHECK-A64: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %src) |
2112 | // CHECK-A32: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2dup.v16i8.p0i8(i8* %src, i32 1) |
2113 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* |
2114 | // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]] |
2115 | // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* %dest to i8* |
2116 | // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* |
2117 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 32, i1 false) |
2118 | // CHECK: ret void |
2119 | void test_vld2q_dup_s8(int8x16x2_t *dest, const int8_t *src) { |
2120 | *dest = vld2q_dup_s8(src); |
2121 | } |
2122 | |
2123 | // CHECK-LABEL: @test_vld2q_dup_u16( |
2124 | // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align {{16|8}} |
2125 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* |
2126 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
2127 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
2128 | // CHECK-A64: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]]) |
2129 | // CHECK-A32: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2dup.v8i16.p0i8(i8* [[TMP1]], i32 2) |
2130 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* |
2131 | // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] |
2132 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* %dest to i8* |
2133 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* |
2134 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
2135 | // CHECK: ret void |
2136 | void test_vld2q_dup_u16(uint16x8x2_t *dest, const uint16_t *src) { |
2137 | *dest = vld2q_dup_u16(src); |
2138 | } |
2139 | |
2140 | // CHECK-LABEL: @test_vld2q_dup_u32( |
2141 | // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align {{16|8}} |
2142 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* |
2143 | // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8* |
2144 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
2145 | // CHECK-A64: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* [[TMP2]]) |
2146 | // CHECK-A32: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2dup.v4i32.p0i8(i8* [[TMP1]], i32 4) |
2147 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* |
2148 | // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] |
2149 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* %dest to i8* |
2150 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* |
2151 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 32, i1 false) |
2152 | // CHECK: ret void |
2153 | void test_vld2q_dup_u32(uint32x4x2_t *dest, const uint32_t *src) { |
2154 | *dest = vld2q_dup_u32(src); |
2155 | } |
2156 | |
2157 | // CHECK-LABEL: @test_vld2q_dup_u8( |
2158 | // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align {{16|8}} |
2159 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* |
2160 | // CHECK-A64: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %src) |
2161 | // CHECK-A32: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2dup.v16i8.p0i8(i8* %src, i32 1) |
2162 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* |
2163 | // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]] |
2164 | // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* %dest to i8* |
2165 | // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* |
2166 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 32, i1 false) |
2167 | // CHECK: ret void |
2168 | void test_vld2q_dup_u8(uint8x16x2_t *dest, const uint8_t *src) { |
2169 | *dest = vld2q_dup_u8(src); |
2170 | } |
2171 | |
2172 | // CHECK-LABEL: @test_vld3q_dup_f16( |
2173 | // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align {{16|8}} |
2174 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* |
2175 | // CHECK: [[TMP1:%.*]] = bitcast half* %src to i8* |
2176 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half* |
2177 | // CHECK-A64: [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3r.v8f16.p0f16(half* [[TMP2]]) |
2178 | // CHECK-A32: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3dup.v8i16.p0i8(i8* [[TMP1]], i32 2) |
2179 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }* |
2180 | // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD3]], { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }* [[TMP3]] |
2181 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* %dest to i8* |
2182 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* |
2183 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
2184 | // CHECK: ret void |
2185 | void test_vld3q_dup_f16(float16x8x3_t *dest, const float16_t *src) { |
2186 | *dest = vld3q_dup_f16(src); |
2187 | } |
2188 | |
2189 | // CHECK-LABEL: @test_vld3q_dup_f32( |
2190 | // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align {{16|8}} |
2191 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* |
2192 | // CHECK: [[TMP1:%.*]] = bitcast float* %src to i8* |
2193 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* |
2194 | // CHECK-A64: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* [[TMP2]]) |
2195 | // CHECK-A32: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3dup.v4f32.p0i8(i8* [[TMP1]], i32 4) |
2196 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }* |
2197 | // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] |
2198 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* %dest to i8* |
2199 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* |
2200 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
2201 | // CHECK: ret void |
2202 | void test_vld3q_dup_f32(float32x4x3_t *dest, const float32_t *src) { |
2203 | *dest = vld3q_dup_f32(src); |
2204 | } |
2205 | |
2206 | // CHECK-LABEL: @test_vld3q_dup_p16( |
2207 | // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align {{16|8}} |
2208 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* |
2209 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
2210 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
2211 | // CHECK-A64: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]]) |
2212 | // CHECK-A32: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3dup.v8i16.p0i8(i8* [[TMP1]], i32 2) |
2213 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* |
2214 | // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] |
2215 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* %dest to i8* |
2216 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* |
2217 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
2218 | // CHECK: ret void |
2219 | void test_vld3q_dup_p16(poly16x8x3_t *dest, const poly16_t *src) { |
2220 | *dest = vld3q_dup_p16(src); |
2221 | } |
2222 | |
2223 | // CHECK-LABEL: @test_vld3q_dup_p8( |
2224 | // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align {{16|8}} |
2225 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* |
2226 | // CHECK-A64: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %src) |
2227 | // CHECK-A32: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3dup.v16i8.p0i8(i8* %src, i32 1) |
2228 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* |
2229 | // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] |
2230 | // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* %dest to i8* |
2231 | // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* |
2232 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 48, i1 false) |
2233 | // CHECK: ret void |
2234 | void test_vld3q_dup_p8(poly8x16x3_t *dest, const poly8_t *src) { |
2235 | *dest = vld3q_dup_p8(src); |
2236 | } |
2237 | |
2238 | // CHECK-LABEL: @test_vld3q_dup_s16( |
2239 | // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align {{16|8}} |
2240 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* |
2241 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
2242 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
2243 | // CHECK-A64: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]]) |
2244 | // CHECK-A32: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3dup.v8i16.p0i8(i8* [[TMP1]], i32 2) |
2245 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* |
2246 | // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] |
2247 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* %dest to i8* |
2248 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* |
2249 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
2250 | // CHECK: ret void |
2251 | void test_vld3q_dup_s16(int16x8x3_t *dest, const int16_t *src) { |
2252 | *dest = vld3q_dup_s16(src); |
2253 | } |
2254 | |
2255 | // CHECK-LABEL: @test_vld3q_dup_s32( |
2256 | // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align {{16|8}} |
2257 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* |
2258 | // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8* |
2259 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
2260 | // CHECK-A64: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* [[TMP2]]) |
2261 | // CHECK-A32: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3dup.v4i32.p0i8(i8* [[TMP1]], i32 4) |
2262 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* |
2263 | // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] |
2264 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* %dest to i8* |
2265 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* |
2266 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
2267 | // CHECK: ret void |
2268 | void test_vld3q_dup_s32(int32x4x3_t *dest, const int32_t *src) { |
2269 | *dest = vld3q_dup_s32(src); |
2270 | } |
2271 | |
2272 | // CHECK-LABEL: @test_vld3q_dup_s8( |
2273 | // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align {{16|8}} |
2274 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* |
2275 | // CHECK-A64: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %src) |
2276 | // CHECK-A32: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3dup.v16i8.p0i8(i8* %src, i32 1) |
2277 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* |
2278 | // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] |
2279 | // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* %dest to i8* |
2280 | // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* |
2281 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 48, i1 false) |
2282 | // CHECK: ret void |
2283 | void test_vld3q_dup_s8(int8x16x3_t *dest, const int8_t *src) { |
2284 | *dest = vld3q_dup_s8(src); |
2285 | } |
2286 | |
2287 | // CHECK-LABEL: @test_vld3q_dup_u16( |
2288 | // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align {{16|8}} |
2289 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* |
2290 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
2291 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
2292 | // CHECK-A64: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]]) |
2293 | // CHECK-A32: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3dup.v8i16.p0i8(i8* [[TMP1]], i32 2) |
2294 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* |
2295 | // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] |
2296 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* %dest to i8* |
2297 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* |
2298 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
2299 | // CHECK: ret void |
2300 | void test_vld3q_dup_u16(uint16x8x3_t *dest, const uint16_t *src) { |
2301 | *dest = vld3q_dup_u16(src); |
2302 | } |
2303 | |
2304 | // CHECK-LABEL: @test_vld3q_dup_u32( |
2305 | // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align {{16|8}} |
2306 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* |
2307 | // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8* |
2308 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
2309 | // CHECK-A64: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* [[TMP2]]) |
2310 | // CHECK-A32: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3dup.v4i32.p0i8(i8* [[TMP1]], i32 4) |
2311 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* |
2312 | // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] |
2313 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* %dest to i8* |
2314 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* |
2315 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 48, i1 false) |
2316 | // CHECK: ret void |
2317 | void test_vld3q_dup_u32(uint32x4x3_t *dest, const uint32_t *src) { |
2318 | *dest = vld3q_dup_u32(src); |
2319 | } |
2320 | |
2321 | // CHECK-LABEL: @test_vld3q_dup_u8( |
2322 | // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align {{16|8}} |
2323 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* |
2324 | // CHECK-A64: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %src) |
2325 | // CHECK-A32: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3dup.v16i8.p0i8(i8* %src, i32 1) |
2326 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* |
2327 | // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] |
2328 | // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* %dest to i8* |
2329 | // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* |
2330 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 48, i1 false) |
2331 | // CHECK: ret void |
2332 | void test_vld3q_dup_u8(uint8x16x3_t *dest, const uint8_t *src) { |
2333 | *dest = vld3q_dup_u8(src); |
2334 | } |
2335 | |
2336 | // CHECK-LABEL: @test_vld4q_dup_f16( |
2337 | // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align {{16|8}} |
2338 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* |
2339 | // CHECK: [[TMP1:%.*]] = bitcast half* %src to i8* |
2340 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half* |
2341 | // CHECK-A64: [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4r.v8f16.p0f16(half* [[TMP2]]) |
2342 | // CHECK-A32: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4dup.v8i16.p0i8(i8* [[TMP1]], i32 2) |
2343 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }* |
2344 | // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD4]], { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> }* [[TMP3]] |
2345 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* %dest to i8* |
2346 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* |
2347 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
2348 | // CHECK: ret void |
2349 | void test_vld4q_dup_f16(float16x8x4_t *dest, const float16_t *src) { |
2350 | *dest = vld4q_dup_f16(src); |
2351 | } |
2352 | |
2353 | // CHECK-LABEL: @test_vld4q_dup_f32( |
2354 | // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align {{16|8}} |
2355 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* |
2356 | // CHECK: [[TMP1:%.*]] = bitcast float* %src to i8* |
2357 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* |
2358 | // CHECK-A64: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* [[TMP2]]) |
2359 | // CHECK-A32: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4dup.v4f32.p0i8(i8* [[TMP1]], i32 4) |
2360 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* |
2361 | // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] |
2362 | // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* %dest to i8* |
2363 | // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* |
2364 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
2365 | // CHECK: ret void |
2366 | void test_vld4q_dup_f32(float32x4x4_t *dest, const float32_t *src) { |
2367 | *dest = vld4q_dup_f32(src); |
2368 | } |
2369 | |
2370 | // CHECK-LABEL: @test_vld4q_dup_p16( |
2371 | // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align {{16|8}} |
2372 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* |
2373 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
2374 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
2375 | // CHECK-A64: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]]) |
2376 | // CHECK-A32: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4dup.v8i16.p0i8(i8* [[TMP1]], i32 2) |
2377 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* |
2378 | // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] |
2379 | // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* %dest to i8* |
2380 | // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* |
2381 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
2382 | // CHECK: ret void |
2383 | void test_vld4q_dup_p16(poly16x8x4_t *dest, const poly16_t *src) { |
2384 | *dest = vld4q_dup_p16(src); |
2385 | } |
2386 | |
2387 | // CHECK-LABEL: @test_vld4q_dup_p8( |
2388 | // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align {{16|8}} |
2389 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* |
2390 | // CHECK-A64: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %src) |
2391 | // CHECK-A32: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4dup.v16i8.p0i8(i8* %src, i32 1) |
2392 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* |
2393 | // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] |
2394 | // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* %dest to i8* |
2395 | // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* |
2396 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 64, i1 false) |
2397 | // CHECK: ret void |
2398 | void test_vld4q_dup_p8(poly8x16x4_t *dest, const poly8_t *src) { |
2399 | *dest = vld4q_dup_p8(src); |
2400 | } |
2401 | |
2402 | // CHECK-LABEL: @test_vld4q_dup_s16( |
2403 | // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align {{16|8}} |
2404 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* |
2405 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
2406 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
2407 | // CHECK-A64: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]]) |
2408 | // CHECK-A32: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4dup.v8i16.p0i8(i8* [[TMP1]], i32 2) |
2409 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* |
2410 | // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] |
2411 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* %dest to i8* |
2412 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* |
2413 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
2414 | // CHECK: ret void |
2415 | void test_vld4q_dup_s16(int16x8x4_t *dest, const int16_t *src) { |
2416 | *dest = vld4q_dup_s16(src); |
2417 | } |
2418 | |
2419 | // CHECK-LABEL: @test_vld4q_dup_s32( |
2420 | // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align {{16|8}} |
2421 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* |
2422 | // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8* |
2423 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
2424 | // CHECK-A64: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* [[TMP2]]) |
2425 | // CHECK-A32: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4dup.v4i32.p0i8(i8* [[TMP1]], i32 4) |
2426 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* |
2427 | // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] |
2428 | // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* %dest to i8* |
2429 | // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* |
2430 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
2431 | // CHECK: ret void |
2432 | void test_vld4q_dup_s32(int32x4x4_t *dest, const int32_t *src) { |
2433 | *dest = vld4q_dup_s32(src); |
2434 | } |
2435 | |
2436 | // CHECK-LABEL: @test_vld4q_dup_s8( |
2437 | // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align {{16|8}} |
2438 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* |
2439 | // CHECK-A64: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %src) |
2440 | // CHECK-A32: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4dup.v16i8.p0i8(i8* %src, i32 1) |
2441 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* |
2442 | // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] |
2443 | // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* %dest to i8* |
2444 | // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* |
2445 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 64, i1 false) |
2446 | // CHECK: ret void |
2447 | void test_vld4q_dup_s8(int8x16x4_t *dest, const int8_t *src) { |
2448 | *dest = vld4q_dup_s8(src); |
2449 | } |
2450 | |
2451 | // CHECK-LABEL: @test_vld4q_dup_u16( |
2452 | // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align {{16|8}} |
2453 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* |
2454 | // CHECK: [[TMP1:%.*]] = bitcast i16* %src to i8* |
2455 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* |
2456 | // CHECK-A64: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]]) |
2457 | // CHECK-A32: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4dup.v8i16.p0i8(i8* [[TMP1]], i32 2) |
2458 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* |
2459 | // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] |
2460 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* %dest to i8* |
2461 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* |
2462 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
2463 | // CHECK: ret void |
2464 | void test_vld4q_dup_u16(uint16x8x4_t *dest, const uint16_t *src) { |
2465 | *dest = vld4q_dup_u16(src); |
2466 | } |
2467 | |
2468 | // CHECK-LABEL: @test_vld4q_dup_u32( |
2469 | // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align {{16|8}} |
2470 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* |
2471 | // CHECK: [[TMP1:%.*]] = bitcast i32* %src to i8* |
2472 | // CHECK-A64: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* |
2473 | // CHECK-A64: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* [[TMP2]]) |
2474 | // CHECK-A32: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4dup.v4i32.p0i8(i8* [[TMP1]], i32 4) |
2475 | // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* |
2476 | // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] |
2477 | // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* %dest to i8* |
2478 | // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* |
2479 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP4]], i8* align {{16|8}} [[TMP5]], {{i64|i32}} 64, i1 false) |
2480 | // CHECK: ret void |
2481 | void test_vld4q_dup_u32(uint32x4x4_t *dest, const uint32_t *src) { |
2482 | *dest = vld4q_dup_u32(src); |
2483 | } |
2484 | |
2485 | // CHECK-LABEL: @test_vld4q_dup_u8( |
2486 | // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align {{16|8}} |
2487 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* |
2488 | // CHECK-A64: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %src) |
2489 | // CHECK-A32: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4dup.v16i8.p0i8(i8* %src, i32 1) |
2490 | // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* |
2491 | // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] |
2492 | // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* %dest to i8* |
2493 | // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* |
2494 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align {{16|8}} [[TMP2]], i8* align {{16|8}} [[TMP3]], {{i64|i32}} 64, i1 false) |
2495 | // CHECK: ret void |
2496 | void test_vld4q_dup_u8(uint8x16x4_t *dest, const uint8_t *src) { |
2497 | *dest = vld4q_dup_u8(src); |
2498 | } |
2499 | |