1 | // RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-win32 | FileCheck %s --check-prefix=Win32 |
2 | // RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=Win64 |
3 | // RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-linux-gnu | FileCheck %s --check-prefix=Lin32 |
4 | // RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-linux-gnu | FileCheck %s --check-prefix=Lin64 |
5 | |
6 | #include <xmmintrin.h> |
7 | |
8 | void __regcall v1(int a, int b) {} |
9 | // Win32: define dso_local x86_regcallcc void @__regcall3__v1(i32 inreg %a, i32 inreg %b) |
10 | // Win64: define dso_local x86_regcallcc void @__regcall3__v1(i32 %a, i32 %b) |
11 | // Lin32: define x86_regcallcc void @__regcall3__v1(i32 inreg %a, i32 inreg %b) |
12 | // Lin64: define x86_regcallcc void @__regcall3__v1(i32 %a, i32 %b) |
13 | |
14 | void __attribute__((regcall)) v1b(int a, int b) {} |
15 | // Win32: define dso_local x86_regcallcc void @__regcall3__v1b(i32 inreg %a, i32 inreg %b) |
16 | // Win64: define dso_local x86_regcallcc void @__regcall3__v1b(i32 %a, i32 %b) |
17 | // Lin32: define x86_regcallcc void @__regcall3__v1b(i32 inreg %a, i32 inreg %b) |
18 | // Lin64: define x86_regcallcc void @__regcall3__v1b(i32 %a, i32 %b) |
19 | |
20 | void __regcall v2(char a, char b) {} |
21 | // Win32: define dso_local x86_regcallcc void @__regcall3__v2(i8 inreg signext %a, i8 inreg signext %b) |
22 | // Win64: define dso_local x86_regcallcc void @__regcall3__v2(i8 %a, i8 %b) |
23 | // Lin32: define x86_regcallcc void @__regcall3__v2(i8 inreg signext %a, i8 inreg signext %b) |
24 | // Lin64: define x86_regcallcc void @__regcall3__v2(i8 signext %a, i8 signext %b) |
25 | |
26 | struct Small { int x; }; |
27 | void __regcall v3(int a, struct Small b, int c) {} |
28 | // Win32: define dso_local x86_regcallcc void @__regcall3__v3(i32 inreg %a, i32 %b.0, i32 inreg %c) |
29 | // Win64: define dso_local x86_regcallcc void @__regcall3__v3(i32 %a, i32 %b.coerce, i32 %c) |
30 | // Lin32: define x86_regcallcc void @__regcall3__v3(i32 inreg %a, i32 inreg, i32 %b.0, i32 inreg %c) |
31 | // Lin64: define x86_regcallcc void @__regcall3__v3(i32 %a, i32 %b.coerce, i32 %c) |
32 | |
33 | struct Large { int a[5]; }; |
34 | void __regcall v4(int a, struct Large b, int c) {} |
35 | // Win32: define dso_local x86_regcallcc void @__regcall3__v4(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c) |
36 | // Win64: define dso_local x86_regcallcc void @__regcall3__v4(i32 %a, %struct.Large* %b, i32 %c) |
37 | // Lin32: define x86_regcallcc void @__regcall3__v4(i32 inreg %a, %struct.Large* byval align 4 %b, i32 %c) |
38 | // Lin64: define x86_regcallcc void @__regcall3__v4(i32 %a, [5 x i32] %b.coerce, i32 %c) |
39 | |
40 | struct HFA2 { double x, y; }; |
41 | struct HFA4 { double w, x, y, z; }; |
42 | struct HFA5 { double v, w, x, y, z; }; |
43 | |
44 | void __regcall hfa1(int a, struct HFA4 b, int c) {} |
45 | // Win32: define dso_local x86_regcallcc void @__regcall3__hfa1(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c) |
46 | // Win64: define dso_local x86_regcallcc void @__regcall3__hfa1(i32 %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 %c) |
47 | // Lin32: define x86_regcallcc void @__regcall3__hfa1(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c) |
48 | // Lin64: define x86_regcallcc void @__regcall3__hfa1(i32 %a, double %b.coerce0, double %b.coerce1, double %b.coerce2, double %b.coerce3, i32 %c) |
49 | |
50 | // HFAs that would require more than six total SSE registers are passed |
51 | // indirectly. Additional vector arguments can consume the rest of the SSE |
52 | // registers. |
53 | void __regcall hfa2(struct HFA4 a, struct HFA4 b, double c) {} |
54 | // Win32: define dso_local x86_regcallcc void @__regcall3__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double* inreg) |
55 | // Win64: define dso_local x86_regcallcc void @__regcall3__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double %c) |
56 | // Lin32: define x86_regcallcc void @__regcall3__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, double* inreg) |
57 | // Lin64: define x86_regcallcc void @__regcall3__hfa2(double %a.coerce0, double %a.coerce1, double %a.coerce2, double %a.coerce3, double %b.coerce0, double %b.coerce1, double %b.coerce2, double %b.coerce3, double %c) |
58 | |
59 | // Ensure that we pass builtin types directly while counting them against the |
60 | // SSE register usage. |
61 | void __regcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {} |
62 | // Win32: define dso_local x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1) |
63 | // Win64: define dso_local x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1) |
64 | // Lin32: define x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1) |
65 | // Lin64: define x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.coerce0, double %f.coerce1) |
66 | |
67 | // Aggregates with more than four elements are not HFAs and are passed byval. |
68 | // Because they are not classified as homogeneous, they don't get special |
69 | // handling to ensure alignment. |
70 | void __regcall hfa4(struct HFA5 a) {} |
71 | // Win32: define dso_local x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* byval align 4) |
72 | // Win64: define dso_local x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* %a) |
73 | // Lin32: define x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* byval align 4 %a) |
74 | // Lin64: define x86_regcallcc void @__regcall3__hfa4(double %a.coerce0, double %a.coerce1, double %a.coerce2, double %a.coerce3, double %a.coerce4) |
75 | |
76 | // Return HFAs of 4 or fewer elements in registers. |
77 | static struct HFA2 g_hfa2; |
78 | struct HFA2 __regcall hfa5(void) { return g_hfa2; } |
79 | // Win32: define dso_local x86_regcallcc %struct.HFA2 @__regcall3__hfa5() |
80 | // Win64: define dso_local x86_regcallcc %struct.HFA2 @__regcall3__hfa5() |
81 | // Lin32: define x86_regcallcc %struct.HFA2 @__regcall3__hfa5() |
82 | // Lin64: define x86_regcallcc %struct.HFA2 @__regcall3__hfa5() |
83 | |
84 | typedef float __attribute__((vector_size(16))) v4f32; |
85 | struct HVA2 { v4f32 x, y; }; |
86 | struct HVA4 { v4f32 w, x, y, z; }; |
87 | |
88 | void __regcall hva1(int a, struct HVA4 b, int c) {} |
89 | // Win32: define dso_local x86_regcallcc void @__regcall3__hva1(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c) |
90 | // Win64: define dso_local x86_regcallcc void @__regcall3__hva1(i32 %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 %c) |
91 | // Lin32: define x86_regcallcc void @__regcall3__hva1(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c) |
92 | // Lin64: define x86_regcallcc void @__regcall3__hva1(i32 %a, <4 x float> %b.coerce0, <4 x float> %b.coerce1, <4 x float> %b.coerce2, <4 x float> %b.coerce3, i32 %c) |
93 | |
94 | void __regcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {} |
95 | // Win32: define dso_local x86_regcallcc void @__regcall3__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float>* inreg) |
96 | // Win64: define dso_local x86_regcallcc void @__regcall3__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float> %c) |
97 | // Lin32: define x86_regcallcc void @__regcall3__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float>* inreg) |
98 | // Lin64: define x86_regcallcc void @__regcall3__hva2(<4 x float> %a.coerce0, <4 x float> %a.coerce1, <4 x float> %a.coerce2, <4 x float> %a.coerce3, <4 x float> %b.coerce0, <4 x float> %b.coerce1, <4 x float> %b.coerce2, <4 x float> %b.coerce3, <4 x float> %c) |
99 | |
100 | void __regcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {} |
101 | // Win32: define dso_local x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1) |
102 | // Win64: define dso_local x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1) |
103 | // Lin32: define x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.0, <4 x float> %f.1) |
104 | // Lin64: define x86_regcallcc void @__regcall3__hva3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, <4 x float> %f.coerce0, <4 x float> %f.coerce1) |
105 | |
106 | typedef float __attribute__((ext_vector_type(3))) v3f32; |
107 | struct OddSizeHVA { v3f32 x, y; }; |
108 | |
109 | void __regcall odd_size_hva(struct OddSizeHVA a) {} |
110 | // Win32: define dso_local x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1) |
111 | // Win64: define dso_local x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1) |
112 | // Lin32: define x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1) |
113 | // Lin64: define x86_regcallcc void @__regcall3__odd_size_hva(<3 x float> %a.coerce0, <3 x float> %a.coerce1) |
114 | |
115 | struct HFA6 { __m128 f[4]; }; |
116 | struct HFA6 __regcall ret_reg_reused(struct HFA6 a, struct HFA6 b, struct HFA6 c, struct HFA6 d){ struct HFA6 h; return h;} |
117 | // Win32: define dso_local x86_regcallcc %struct.HFA6 @__regcall3__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, %struct.HFA6* inreg %c, %struct.HFA6* inreg %d) |
118 | // Win64: define dso_local x86_regcallcc %struct.HFA6 @__regcall3__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float> %c.0, <4 x float> %c.1, <4 x float> %c.2, <4 x float> %c.3, <4 x float> %d.0, <4 x float> %d.1, <4 x float> %d.2, <4 x float> %d.3) |
119 | // Lin32: define x86_regcallcc %struct.HFA6 @__regcall3__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, %struct.HFA6* inreg %c, %struct.HFA6* inreg %d) |
120 | // Lin64: define x86_regcallcc %struct.HFA6 @__regcall3__ret_reg_reused([4 x <4 x float>] %a.coerce, [4 x <4 x float>] %b.coerce, [4 x <4 x float>] %c.coerce, [4 x <4 x float>] %d.coerce) |
121 | |