1 | // RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s |
2 | |
3 | // CHECK: define signext i8 @f0() |
4 | char f0(void) { |
5 | return 0; |
6 | } |
7 | |
8 | // Struct as return type. Aggregates <= 16 bytes are passed directly and round |
9 | // up to multiple of 8 bytes. |
10 | // CHECK: define i64 @f1() |
11 | struct s1 { char f0; }; |
12 | struct s1 f1(void) {} |
13 | |
14 | // CHECK: define i64 @f2() |
15 | struct s2 { short f0; }; |
16 | struct s2 f2(void) {} |
17 | |
18 | // CHECK: define i64 @f3() |
19 | struct s3 { int f0; }; |
20 | struct s3 f3(void) {} |
21 | |
22 | // CHECK: define i64 @f4() |
23 | struct s4 { struct s4_0 { int f0; } f0; }; |
24 | struct s4 f4(void) {} |
25 | |
26 | // CHECK: define i64 @f5() |
27 | struct s5 { struct { } f0; int f1; }; |
28 | struct s5 f5(void) {} |
29 | |
30 | // CHECK: define i64 @f6() |
31 | struct s6 { int f0[1]; }; |
32 | struct s6 f6(void) {} |
33 | |
34 | // CHECK: define void @f7() |
35 | struct s7 { struct { int : 0; } f0; }; |
36 | struct s7 f7(void) {} |
37 | |
38 | // CHECK: define void @f8() |
39 | struct s8 { struct { int : 0; } f0[1]; }; |
40 | struct s8 f8(void) {} |
41 | |
42 | // CHECK: define i64 @f9() |
43 | struct s9 { int f0; int : 0; }; |
44 | struct s9 f9(void) {} |
45 | |
46 | // CHECK: define i64 @f10() |
47 | struct s10 { int f0; int : 0; int : 0; }; |
48 | struct s10 f10(void) {} |
49 | |
50 | // CHECK: define i64 @f11() |
51 | struct s11 { int : 0; int f0; }; |
52 | struct s11 f11(void) {} |
53 | |
54 | // CHECK: define i64 @f12() |
55 | union u12 { char f0; short f1; int f2; }; |
56 | union u12 f12(void) {} |
57 | |
58 | // Homogeneous Aggregate as return type will be passed directly. |
59 | // CHECK: define %struct.s13 @f13() |
60 | struct s13 { float f0; }; |
61 | struct s13 f13(void) {} |
62 | // CHECK: define %union.u14 @f14() |
63 | union u14 { float f0; }; |
64 | union u14 f14(void) {} |
65 | |
66 | // CHECK: define void @f15() |
67 | void f15(struct s7 a0) {} |
68 | |
69 | // CHECK: define void @f16() |
70 | void f16(struct s8 a0) {} |
71 | |
72 | // CHECK: define i64 @f17() |
73 | struct s17 { short f0 : 13; char f1 : 4; }; |
74 | struct s17 f17(void) {} |
75 | |
76 | // CHECK: define i64 @f18() |
77 | struct s18 { short f0; char f1 : 4; }; |
78 | struct s18 f18(void) {} |
79 | |
80 | // CHECK: define i64 @f19() |
81 | struct s19 { int f0; struct s8 f1; }; |
82 | struct s19 f19(void) {} |
83 | |
84 | // CHECK: define i64 @f20() |
85 | struct s20 { struct s8 f1; int f0; }; |
86 | struct s20 f20(void) {} |
87 | |
88 | // CHECK: define i64 @f21() |
89 | struct s21 { struct {} f1; int f0 : 4; }; |
90 | struct s21 f21(void) {} |
91 | |
92 | // CHECK: define i64 @f22() |
93 | // CHECK: define i64 @f23() |
94 | // CHECK: define i64 @f24() |
95 | // CHECK: define [2 x i64] @f25() |
96 | // CHECK: define { float, float } @f26() |
97 | // CHECK: define { double, double } @f27() |
98 | _Complex char f22(void) {} |
99 | _Complex short f23(void) {} |
100 | _Complex int f24(void) {} |
101 | _Complex long long f25(void) {} |
102 | _Complex float f26(void) {} |
103 | _Complex double f27(void) {} |
104 | |
105 | // CHECK: define i64 @f28() |
106 | struct s28 { _Complex char f0; }; |
107 | struct s28 f28() {} |
108 | |
109 | // CHECK: define i64 @f29() |
110 | struct s29 { _Complex short f0; }; |
111 | struct s29 f29() {} |
112 | |
113 | // CHECK: define i64 @f30() |
114 | struct s30 { _Complex int f0; }; |
115 | struct s30 f30() {} |
116 | |
117 | struct s31 { char x; }; |
118 | void f31(struct s31 s) { } |
119 | // CHECK: define void @f31(i64 %s.coerce) |
120 | // CHECK: %s = alloca %struct.s31, align 1 |
121 | // CHECK: trunc i64 %s.coerce to i8 |
122 | // CHECK: store i8 %{{.*}}, |
123 | |
124 | struct s32 { double x; }; |
125 | void f32(struct s32 s) { } |
126 | // CHECK: @f32([1 x double] %{{.*}}) |
127 | |
128 | // A composite type larger than 16 bytes should be passed indirectly. |
129 | struct s33 { char buf[32*32]; }; |
130 | void f33(struct s33 s) { } |
131 | // CHECK: define void @f33(%struct.s33* %s) |
132 | |
133 | struct s34 { char c; }; |
134 | void f34(struct s34 s); |
135 | void g34(struct s34 *s) { f34(*s); } |
136 | // CHECK: @g34(%struct.s34* %s) |
137 | // CHECK: %[[a:.*]] = load i8, i8* %{{.*}} |
138 | // CHECK: zext i8 %[[a]] to i64 |
139 | // CHECK: call void @f34(i64 %{{.*}}) |
140 | |
141 | /* |
142 | * Check that va_arg accesses stack according to ABI alignment |
143 | */ |
144 | long long t1(int i, ...) { |
145 | // CHECK: t1 |
146 | __builtin_va_list ap; |
147 | __builtin_va_start(ap, i); |
148 | // CHECK-NOT: add i32 %{{.*}} 7 |
149 | // CHECK-NOT: and i32 %{{.*}} -8 |
150 | long long ll = __builtin_va_arg(ap, long long); |
151 | __builtin_va_end(ap); |
152 | return ll; |
153 | } |
154 | double t2(int i, ...) { |
155 | // CHECK: t2 |
156 | __builtin_va_list ap; |
157 | __builtin_va_start(ap, i); |
158 | // CHECK-NOT: add i32 %{{.*}} 7 |
159 | // CHECK-NOT: and i32 %{{.*}} -8 |
160 | double ll = __builtin_va_arg(ap, double); |
161 | __builtin_va_end(ap); |
162 | return ll; |
163 | } |
164 | |
165 | #include <arm_neon.h> |
166 | |
167 | // Homogeneous Vector Aggregate as return type and argument type. |
168 | // CHECK: define %struct.int8x16x2_t @f0_0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) |
169 | int8x16x2_t f0_0(int8x16_t a0, int8x16_t a1) { |
170 | return vzipq_s8(a0, a1); |
171 | } |
172 | |
173 | // Test direct vector passing. |
174 | typedef float T_float32x2 __attribute__ ((__vector_size__ (8))); |
175 | typedef float T_float32x4 __attribute__ ((__vector_size__ (16))); |
176 | typedef float T_float32x8 __attribute__ ((__vector_size__ (32))); |
177 | typedef float T_float32x16 __attribute__ ((__vector_size__ (64))); |
178 | |
179 | // CHECK: define <2 x float> @f1_0(<2 x float> %{{.*}}) |
180 | T_float32x2 f1_0(T_float32x2 a0) { return a0; } |
181 | // CHECK: define <4 x float> @f1_1(<4 x float> %{{.*}}) |
182 | T_float32x4 f1_1(T_float32x4 a0) { return a0; } |
183 | // Vector with length bigger than 16-byte is illegal and is passed indirectly. |
184 | // CHECK: define void @f1_2(<8 x float>* noalias sret %{{.*}}, <8 x float>*) |
185 | T_float32x8 f1_2(T_float32x8 a0) { return a0; } |
186 | // CHECK: define void @f1_3(<16 x float>* noalias sret %{{.*}}, <16 x float>*) |
187 | T_float32x16 f1_3(T_float32x16 a0) { return a0; } |
188 | |
189 | // Testing alignment with aggregates: HFA, aggregates with size <= 16 bytes and |
190 | // aggregates with size > 16 bytes. |
191 | struct s35 |
192 | { |
193 | float v[4]; //Testing HFA. |
194 | } __attribute__((aligned(16))); |
195 | typedef struct s35 s35_with_align; |
196 | |
197 | typedef __attribute__((neon_vector_type(4))) float float32x4_t; |
198 | float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) { |
199 | // CHECK: define <4 x float> @f35(i32 %i, [4 x float] %s1.coerce, [4 x float] %s2.coerce) |
200 | // CHECK: %s1 = alloca %struct.s35, align 16 |
201 | // CHECK: %s2 = alloca %struct.s35, align 16 |
202 | // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>* |
203 | // CHECK: load <4 x float>, <4 x float>* %[[a]], align 16 |
204 | // CHECK: %[[b:.*]] = bitcast %struct.s35* %s2 to <4 x float>* |
205 | // CHECK: load <4 x float>, <4 x float>* %[[b]], align 16 |
206 | float32x4_t v = vaddq_f32(*(float32x4_t *)&s1, |
207 | *(float32x4_t *)&s2); |
208 | return v; |
209 | } |
210 | |
211 | struct s36 |
212 | { |
213 | int v[4]; //Testing 16-byte aggregate. |
214 | } __attribute__((aligned(16))); |
215 | typedef struct s36 s36_with_align; |
216 | |
217 | typedef __attribute__((neon_vector_type(4))) int int32x4_t; |
218 | int32x4_t f36(int i, s36_with_align s1, s36_with_align s2) { |
219 | // CHECK: define <4 x i32> @f36(i32 %i, i128 %s1.coerce, i128 %s2.coerce) |
220 | // CHECK: %s1 = alloca %struct.s36, align 16 |
221 | // CHECK: %s2 = alloca %struct.s36, align 16 |
222 | // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 |
223 | // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 |
224 | // CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>* |
225 | // CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16 |
226 | // CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>* |
227 | // CHECK: load <4 x i32>, <4 x i32>* %[[b]], align 16 |
228 | int32x4_t v = vaddq_s32(*(int32x4_t *)&s1, |
229 | *(int32x4_t *)&s2); |
230 | return v; |
231 | } |
232 | |
233 | struct s37 |
234 | { |
235 | int v[18]; //Testing large aggregate. |
236 | } __attribute__((aligned(16))); |
237 | typedef struct s37 s37_with_align; |
238 | |
239 | int32x4_t f37(int i, s37_with_align s1, s37_with_align s2) { |
240 | // CHECK: define <4 x i32> @f37(i32 %i, %struct.s37* %s1, %struct.s37* %s2) |
241 | // CHECK: %[[a:.*]] = bitcast %struct.s37* %s1 to <4 x i32>* |
242 | // CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16 |
243 | // CHECK: %[[b:.*]] = bitcast %struct.s37* %s2 to <4 x i32>* |
244 | // CHECK: load <4 x i32>, <4 x i32>* %[[b]], align 16 |
245 | int32x4_t v = vaddq_s32(*(int32x4_t *)&s1, |
246 | *(int32x4_t *)&s2); |
247 | return v; |
248 | } |
249 | s37_with_align g37; |
250 | int32x4_t caller37() { |
251 | // CHECK: caller37 |
252 | // CHECK: %[[a:.*]] = alloca %struct.s37, align 16 |
253 | // CHECK: %[[b:.*]] = alloca %struct.s37, align 16 |
254 | // CHECK: call void @llvm.memcpy |
255 | // CHECK: call void @llvm.memcpy |
256 | // CHECK: call <4 x i32> @f37(i32 3, %struct.s37* %[[a]], %struct.s37* %[[b]]) |
257 | return f37(3, g37, g37); |
258 | } |
259 | |
260 | // rdar://problem/12648441 |
261 | // Test passing structs with size < 8, < 16 and > 16 |
262 | // with alignment of 16 and without |
263 | |
264 | // structs with size <= 8 bytes, without alignment attribute |
265 | // passed as i64 regardless of the align attribute |
266 | struct s38 |
267 | { |
268 | int i; |
269 | short s; |
270 | }; |
271 | typedef struct s38 s38_no_align; |
272 | // passing structs in registers |
273 | __attribute__ ((noinline)) |
274 | int f38(int i, s38_no_align s1, s38_no_align s2) { |
275 | // CHECK: define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) |
276 | // CHECK: %s1 = alloca %struct.s38, align 4 |
277 | // CHECK: %s2 = alloca %struct.s38, align 4 |
278 | // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 4 |
279 | // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 4 |
280 | // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0 |
281 | // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0 |
282 | // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1 |
283 | // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 1 |
284 | return s1.i + s2.i + i + s1.s + s2.s; |
285 | } |
286 | s38_no_align g38; |
287 | s38_no_align g38_2; |
288 | int caller38() { |
289 | // CHECK: define i32 @caller38() |
290 | // CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4 |
291 | // CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 |
292 | // CHECK: call i32 @f38(i32 3, i64 %[[a]], i64 %[[b]]) |
293 | return f38(3, g38, g38_2); |
294 | } |
295 | // passing structs on stack |
296 | __attribute__ ((noinline)) |
297 | int f38_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, |
298 | int i9, s38_no_align s1, s38_no_align s2) { |
299 | // CHECK: define i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) |
300 | // CHECK: %s1 = alloca %struct.s38, align 4 |
301 | // CHECK: %s2 = alloca %struct.s38, align 4 |
302 | // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 4 |
303 | // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 4 |
304 | // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0 |
305 | // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0 |
306 | // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1 |
307 | // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 1 |
308 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; |
309 | } |
310 | int caller38_stack() { |
311 | // CHECK: define i32 @caller38_stack() |
312 | // CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4 |
313 | // CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 |
314 | // CHECK: call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i64 %[[a]], i64 %[[b]]) |
315 | return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2); |
316 | } |
317 | |
318 | // structs with size <= 8 bytes, with alignment attribute |
319 | struct s39 |
320 | { |
321 | int i; |
322 | short s; |
323 | } __attribute__((aligned(16))); |
324 | typedef struct s39 s39_with_align; |
325 | // passing aligned structs in registers |
326 | __attribute__ ((noinline)) |
327 | int f39(int i, s39_with_align s1, s39_with_align s2) { |
328 | // CHECK: define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) |
329 | // CHECK: %s1 = alloca %struct.s39, align 16 |
330 | // CHECK: %s2 = alloca %struct.s39, align 16 |
331 | // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 |
332 | // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 |
333 | // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0 |
334 | // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0 |
335 | // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1 |
336 | // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 1 |
337 | return s1.i + s2.i + i + s1.s + s2.s; |
338 | } |
339 | s39_with_align g39; |
340 | s39_with_align g39_2; |
341 | int caller39() { |
342 | // CHECK: define i32 @caller39() |
343 | // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16 |
344 | // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 |
345 | // CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]]) |
346 | return f39(3, g39, g39_2); |
347 | } |
348 | // passing aligned structs on stack |
349 | __attribute__ ((noinline)) |
350 | int f39_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, |
351 | int i9, s39_with_align s1, s39_with_align s2) { |
352 | // CHECK: define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) |
353 | // CHECK: %s1 = alloca %struct.s39, align 16 |
354 | // CHECK: %s2 = alloca %struct.s39, align 16 |
355 | // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 |
356 | // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 |
357 | // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0 |
358 | // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0 |
359 | // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1 |
360 | // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 1 |
361 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; |
362 | } |
363 | int caller39_stack() { |
364 | // CHECK: define i32 @caller39_stack() |
365 | // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16 |
366 | // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 |
367 | // CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]]) |
368 | return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2); |
369 | } |
370 | |
371 | // structs with size <= 16 bytes, without alignment attribute |
372 | struct s40 |
373 | { |
374 | int i; |
375 | short s; |
376 | int i2; |
377 | short s2; |
378 | }; |
379 | typedef struct s40 s40_no_align; |
380 | // passing structs in registers |
381 | __attribute__ ((noinline)) |
382 | int f40(int i, s40_no_align s1, s40_no_align s2) { |
383 | // CHECK: define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) |
384 | // CHECK: %s1 = alloca %struct.s40, align 4 |
385 | // CHECK: %s2 = alloca %struct.s40, align 4 |
386 | // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 4 |
387 | // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 4 |
388 | // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0 |
389 | // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0 |
390 | // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1 |
391 | // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 1 |
392 | return s1.i + s2.i + i + s1.s + s2.s; |
393 | } |
394 | s40_no_align g40; |
395 | s40_no_align g40_2; |
396 | int caller40() { |
397 | // CHECK: define i32 @caller40() |
398 | // CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 |
399 | // CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 |
400 | // CHECK: call i32 @f40(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]]) |
401 | return f40(3, g40, g40_2); |
402 | } |
403 | // passing structs on stack |
404 | __attribute__ ((noinline)) |
405 | int f40_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, |
406 | int i9, s40_no_align s1, s40_no_align s2) { |
407 | // CHECK: define i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) |
408 | // CHECK: %s1 = alloca %struct.s40, align 4 |
409 | // CHECK: %s2 = alloca %struct.s40, align 4 |
410 | // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 4 |
411 | // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 4 |
412 | // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0 |
413 | // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0 |
414 | // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1 |
415 | // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 1 |
416 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; |
417 | } |
418 | int caller40_stack() { |
419 | // CHECK: define i32 @caller40_stack() |
420 | // CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 |
421 | // CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 |
422 | // CHECK: call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]]) |
423 | return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2); |
424 | } |
425 | |
426 | // structs with size <= 16 bytes, with alignment attribute |
427 | struct s41 |
428 | { |
429 | int i; |
430 | short s; |
431 | int i2; |
432 | short s2; |
433 | } __attribute__((aligned(16))); |
434 | typedef struct s41 s41_with_align; |
435 | // passing aligned structs in registers |
436 | __attribute__ ((noinline)) |
437 | int f41(int i, s41_with_align s1, s41_with_align s2) { |
438 | // CHECK: define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) |
439 | // CHECK: %s1 = alloca %struct.s41, align 16 |
440 | // CHECK: %s2 = alloca %struct.s41, align 16 |
441 | // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 |
442 | // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 |
443 | // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0 |
444 | // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0 |
445 | // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1 |
446 | // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 1 |
447 | return s1.i + s2.i + i + s1.s + s2.s; |
448 | } |
449 | s41_with_align g41; |
450 | s41_with_align g41_2; |
451 | int caller41() { |
452 | // CHECK: define i32 @caller41() |
453 | // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16 |
454 | // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 |
455 | // CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]]) |
456 | return f41(3, g41, g41_2); |
457 | } |
458 | // passing aligned structs on stack |
459 | __attribute__ ((noinline)) |
460 | int f41_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, |
461 | int i9, s41_with_align s1, s41_with_align s2) { |
462 | // CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) |
463 | // CHECK: %s1 = alloca %struct.s41, align 16 |
464 | // CHECK: %s2 = alloca %struct.s41, align 16 |
465 | // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 |
466 | // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 |
467 | // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0 |
468 | // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0 |
469 | // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1 |
470 | // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 1 |
471 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; |
472 | } |
473 | int caller41_stack() { |
474 | // CHECK: define i32 @caller41_stack() |
475 | // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16 |
476 | // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 |
477 | // CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]]) |
478 | return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2); |
479 | } |
480 | |
481 | // structs with size > 16 bytes, without alignment attribute |
482 | struct s42 |
483 | { |
484 | int i; |
485 | short s; |
486 | int i2; |
487 | short s2; |
488 | int i3; |
489 | short s3; |
490 | }; |
491 | typedef struct s42 s42_no_align; |
492 | // passing structs in registers |
493 | __attribute__ ((noinline)) |
494 | int f42(int i, s42_no_align s1, s42_no_align s2) { |
495 | // CHECK: define i32 @f42(i32 %i, %struct.s42* %s1, %struct.s42* %s2) |
496 | // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 0 |
497 | // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 0 |
498 | // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 1 |
499 | // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 1 |
500 | return s1.i + s2.i + i + s1.s + s2.s; |
501 | } |
502 | s42_no_align g42; |
503 | s42_no_align g42_2; |
504 | int caller42() { |
505 | // CHECK: define i32 @caller42() |
506 | // CHECK: %[[a:.*]] = alloca %struct.s42, align 4 |
507 | // CHECK: %[[b:.*]] = alloca %struct.s42, align 4 |
508 | // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8* |
509 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
510 | // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8* |
511 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
512 | // CHECK: call i32 @f42(i32 3, %struct.s42* %[[a]], %struct.s42* %[[b]]) |
513 | return f42(3, g42, g42_2); |
514 | } |
515 | // passing structs on stack |
516 | __attribute__ ((noinline)) |
517 | int f42_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, |
518 | int i9, s42_no_align s1, s42_no_align s2) { |
519 | // CHECK: define i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s42* %s1, %struct.s42* %s2) |
520 | // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 0 |
521 | // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 0 |
522 | // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 1 |
523 | // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 1 |
524 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; |
525 | } |
526 | int caller42_stack() { |
527 | // CHECK: define i32 @caller42_stack() |
528 | // CHECK: %[[a:.*]] = alloca %struct.s42, align 4 |
529 | // CHECK: %[[b:.*]] = alloca %struct.s42, align 4 |
530 | // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8* |
531 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
532 | // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8* |
533 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
534 | // CHECK: call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s42* %[[a]], %struct.s42* %[[b]]) |
535 | return f42_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g42, g42_2); |
536 | } |
537 | |
538 | // structs with size > 16 bytes, with alignment attribute |
539 | struct s43 |
540 | { |
541 | int i; |
542 | short s; |
543 | int i2; |
544 | short s2; |
545 | int i3; |
546 | short s3; |
547 | } __attribute__((aligned(16))); |
548 | typedef struct s43 s43_with_align; |
549 | // passing aligned structs in registers |
550 | __attribute__ ((noinline)) |
551 | int f43(int i, s43_with_align s1, s43_with_align s2) { |
552 | // CHECK: define i32 @f43(i32 %i, %struct.s43* %s1, %struct.s43* %s2) |
553 | // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 0 |
554 | // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 0 |
555 | // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 1 |
556 | // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 1 |
557 | return s1.i + s2.i + i + s1.s + s2.s; |
558 | } |
559 | s43_with_align g43; |
560 | s43_with_align g43_2; |
561 | int caller43() { |
562 | // CHECK: define i32 @caller43() |
563 | // CHECK: %[[a:.*]] = alloca %struct.s43, align 16 |
564 | // CHECK: %[[b:.*]] = alloca %struct.s43, align 16 |
565 | // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8* |
566 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
567 | // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8* |
568 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
569 | // CHECK: call i32 @f43(i32 3, %struct.s43* %[[a]], %struct.s43* %[[b]]) |
570 | return f43(3, g43, g43_2); |
571 | } |
572 | // passing aligned structs on stack |
573 | __attribute__ ((noinline)) |
574 | int f43_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, |
575 | int i9, s43_with_align s1, s43_with_align s2) { |
576 | // CHECK: define i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s43* %s1, %struct.s43* %s2) |
577 | // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 0 |
578 | // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 0 |
579 | // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 1 |
580 | // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 1 |
581 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; |
582 | } |
583 | int caller43_stack() { |
584 | // CHECK: define i32 @caller43_stack() |
585 | // CHECK: %[[a:.*]] = alloca %struct.s43, align 16 |
586 | // CHECK: %[[b:.*]] = alloca %struct.s43, align 16 |
587 | // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8* |
588 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
589 | // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8* |
590 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
591 | // CHECK: call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s43* %[[a]], %struct.s43* %[[b]]) |
592 | return f43_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g43, g43_2); |
593 | } |
594 | |
595 | // rdar://13668927 |
596 | // We should not split argument s1 between registers and stack. |
597 | __attribute__ ((noinline)) |
598 | int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7, |
599 | s40_no_align s1, s40_no_align s2) { |
600 | // CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) |
601 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s; |
602 | } |
603 | int caller40_split() { |
604 | // CHECK: define i32 @caller40_split() |
605 | // CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [2 x i64] %{{.*}} [2 x i64] %{{.*}}) |
606 | return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2); |
607 | } |
608 | |
609 | __attribute__ ((noinline)) |
610 | int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7, |
611 | s41_with_align s1, s41_with_align s2) { |
612 | // CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i128 %s1.coerce, i128 %s2.coerce) |
613 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s; |
614 | } |
615 | int caller41_split() { |
616 | // CHECK: define i32 @caller41_split() |
617 | // CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 %{{.*}}, i128 %{{.*}}) |
618 | return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2); |
619 | } |
620 | |
621 | // Handle homogeneous aggregates properly in variadic functions. |
622 | struct HFA { |
623 | float a, b, c, d; |
624 | }; |
625 | |
626 | float test_hfa(int n, ...) { |
627 | // CHECK-LABEL: define float @test_hfa(i32 %n, ...) |
628 | // CHECK: [[THELIST:%.*]] = alloca i8* |
629 | // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]] |
630 | |
631 | // HFA is not indirect, so occupies its full 16 bytes on the stack. |
632 | // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 16 |
633 | // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] |
634 | |
635 | // CHECK: bitcast i8* [[CURLIST]] to %struct.HFA* |
636 | __builtin_va_list thelist; |
637 | __builtin_va_start(thelist, n); |
638 | struct HFA h = __builtin_va_arg(thelist, struct HFA); |
639 | return h.d; |
640 | } |
641 | |
642 | float test_hfa_call(struct HFA *a) { |
643 | // CHECK-LABEL: define float @test_hfa_call(%struct.HFA* %a) |
644 | // CHECK: call float (i32, ...) @test_hfa(i32 1, [4 x float] {{.*}}) |
645 | test_hfa(1, *a); |
646 | } |
647 | |
648 | struct TooBigHFA { |
649 | float a, b, c, d, e; |
650 | }; |
651 | |
652 | float test_toobig_hfa(int n, ...) { |
653 | // CHECK-LABEL: define float @test_toobig_hfa(i32 %n, ...) |
654 | // CHECK: [[THELIST:%.*]] = alloca i8* |
655 | // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]] |
656 | |
657 | // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes |
658 | // of stack consumed. |
659 | // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8 |
660 | // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] |
661 | |
662 | // CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHFA** |
663 | // CHECK: [[HFAPTR:%.*]] = load %struct.TooBigHFA*, %struct.TooBigHFA** [[HFAPTRPTR]] |
664 | __builtin_va_list thelist; |
665 | __builtin_va_start(thelist, n); |
666 | struct TooBigHFA h = __builtin_va_arg(thelist, struct TooBigHFA); |
667 | return h.d; |
668 | } |
669 | |
670 | struct HVA { |
671 | int32x4_t a, b; |
672 | }; |
673 | |
674 | int32x4_t test_hva(int n, ...) { |
675 | // CHECK-LABEL: define <4 x i32> @test_hva(i32 %n, ...) |
676 | // CHECK: [[THELIST:%.*]] = alloca i8* |
677 | // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]] |
678 | |
679 | // HVA is not indirect, so occupies its full 16 bytes on the stack. but it |
680 | // must be properly aligned. |
681 | // CHECK: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64 |
682 | // CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15 |
683 | // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16 |
684 | // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8* |
685 | |
686 | // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 32 |
687 | // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] |
688 | |
689 | // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HVA* |
690 | __builtin_va_list thelist; |
691 | __builtin_va_start(thelist, n); |
692 | struct HVA h = __builtin_va_arg(thelist, struct HVA); |
693 | return h.b; |
694 | } |
695 | |
696 | struct TooBigHVA { |
697 | int32x4_t a, b, c, d, e; |
698 | }; |
699 | |
700 | int32x4_t test_toobig_hva(int n, ...) { |
701 | // CHECK-LABEL: define <4 x i32> @test_toobig_hva(i32 %n, ...) |
702 | // CHECK: [[THELIST:%.*]] = alloca i8* |
703 | // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]] |
704 | |
705 | // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes |
706 | // of stack consumed. |
707 | // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8 |
708 | // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] |
709 | |
710 | // CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHVA** |
711 | // CHECK: [[HVAPTR:%.*]] = load %struct.TooBigHVA*, %struct.TooBigHVA** [[HVAPTRPTR]] |
712 | __builtin_va_list thelist; |
713 | __builtin_va_start(thelist, n); |
714 | struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA); |
715 | return h.d; |
716 | } |
717 | |
718 | typedef __attribute__((__ext_vector_type__(3))) float float32x3_t; |
719 | typedef struct { float32x3_t arr[4]; } HFAv3; |
720 | |
721 | float32x3_t test_hva_v3(int n, ...) { |
722 | // CHECK-LABEL: define <3 x float> @test_hva_v3(i32 %n, ...) |
723 | // CHECK: [[THELIST:%.*]] = alloca i8* |
724 | // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]] |
725 | |
726 | // HVA is not indirect, so occupies its full 16 bytes on the stack. but it |
727 | // must be properly aligned. |
728 | // CHECK: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64 |
729 | // CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15 |
730 | // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16 |
731 | // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8* |
732 | |
733 | // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 64 |
734 | // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] |
735 | |
736 | // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HFAv3* |
737 | __builtin_va_list l; |
738 | __builtin_va_start(l, n); |
739 | HFAv3 r = __builtin_va_arg(l, HFAv3); |
740 | return r.arr[2]; |
741 | } |
742 | |
743 | float32x3_t test_hva_v3_call(HFAv3 *a) { |
744 | // CHECK-LABEL: define <3 x float> @test_hva_v3_call(%struct.HFAv3* %a) |
745 | // CHECK: call <3 x float> (i32, ...) @test_hva_v3(i32 1, [4 x <4 x float>] {{.*}}) |
746 | return test_hva_v3(1, *a); |
747 | } |
748 | |