1 | // RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-win32 | FileCheck %s --check-prefix=X32 |
2 | // RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64 |
3 | |
4 | void __vectorcall v1(int a, int b) {} |
5 | // X32: define dso_local x86_vectorcallcc void @"\01v1@@8"(i32 inreg %a, i32 inreg %b) |
6 | // X64: define dso_local x86_vectorcallcc void @"\01v1@@16"(i32 %a, i32 %b) |
7 | |
8 | void __vectorcall v2(char a, char b) {} |
9 | // X32: define dso_local x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b) |
10 | // X64: define dso_local x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b) |
11 | |
12 | struct Small { int x; }; |
13 | void __vectorcall v3(int a, struct Small b, int c) {} |
14 | // X32: define dso_local x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c) |
15 | // X64: define dso_local x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c) |
16 | |
17 | struct Large { int a[5]; }; |
18 | void __vectorcall v4(int a, struct Large b, int c) {} |
19 | // X32: define dso_local x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c) |
20 | // X64: define dso_local x86_vectorcallcc void @"\01v4@@40"(i32 %a, %struct.Large* %b, i32 %c) |
21 | |
22 | struct HFA2 { double x, y; }; |
23 | struct HFA4 { double w, x, y, z; }; |
24 | struct HFA5 { double v, w, x, y, z; }; |
25 | |
26 | void __vectorcall hfa1(int a, struct HFA4 b, int c) {} |
27 | // X32: define dso_local x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg %a, %struct.HFA4 inreg %b.coerce, i32 inreg %c) |
28 | // X64: define dso_local x86_vectorcallcc void @"\01hfa1@@48"(i32 %a, %struct.HFA4 inreg %b.coerce, i32 %c) |
29 | |
30 | // HFAs that would require more than six total SSE registers are passed |
31 | // indirectly. Additional vector arguments can consume the rest of the SSE |
32 | // registers. |
33 | void __vectorcall hfa2(struct HFA4 a, struct HFA4 b, double c) {} |
34 | // X32: define dso_local x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* inreg %b, double %c) |
35 | // X64: define dso_local x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* %b, double %c) |
36 | |
37 | // Ensure that we pass builtin types directly while counting them against the |
38 | // SSE register usage. |
39 | void __vectorcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {} |
40 | // X32: define dso_local x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f) |
41 | // X64: define dso_local x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* %f) |
42 | |
43 | // Aggregates with more than four elements are not HFAs and are passed byval. |
44 | // Because they are not classified as homogeneous, they don't get special |
45 | // handling to ensure alignment. |
46 | void __vectorcall hfa4(struct HFA5 a) {} |
47 | // X32: define dso_local x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4) |
48 | // X64: define dso_local x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* %a) |
49 | |
50 | // Return HFAs of 4 or fewer elements in registers. |
51 | static struct HFA2 g_hfa2; |
52 | struct HFA2 __vectorcall hfa5(void) { return g_hfa2; } |
53 | // X32: define dso_local x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"() |
54 | // X64: define dso_local x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"() |
55 | |
56 | typedef float __attribute__((vector_size(16))) v4f32; |
57 | struct HVA2 { v4f32 x, y; }; |
58 | struct HVA3 { v4f32 w, x, y; }; |
59 | struct HVA4 { v4f32 w, x, y, z; }; |
60 | struct HVA5 { v4f32 w, x, y, z, p; }; |
61 | |
62 | v4f32 __vectorcall hva1(int a, struct HVA4 b, int c) {return b.w;} |
63 | // X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva1@@72"(i32 inreg %a, %struct.HVA4 inreg %b.coerce, i32 inreg %c) |
64 | // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva1@@80"(i32 %a, %struct.HVA4 inreg %b.coerce, i32 %c) |
65 | |
66 | v4f32 __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {return c;} |
67 | // X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b, <4 x float> %c) |
68 | // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b, <4 x float> %c) |
69 | |
70 | v4f32 __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {return f.x;} |
71 | // X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f) |
72 | // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f) |
73 | |
74 | // vector types have higher priority then HVA structures, So vector types are allocated first |
75 | // and HVAs are allocated if enough registers are available |
76 | v4f32 __vectorcall hva4(struct HVA4 a, struct HVA2 b, v4f32 c) {return b.y;} |
77 | // X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* inreg %b, <4 x float> %c) |
78 | // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* %b, <4 x float> %c) |
79 | |
80 | v4f32 __vectorcall hva5(struct HVA3 a, struct HVA3 b, v4f32 c, struct HVA2 d) {return d.y;} |
81 | // X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* inreg %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce) |
82 | // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce) |
83 | |
84 | struct HVA4 __vectorcall hva6(struct HVA4 a, struct HVA4 b) { return b;} |
85 | // X32: define dso_local x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b) |
86 | // X64: define dso_local x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b) |
87 | |
88 | struct HVA5 __vectorcall hva7() {struct HVA5 a = {}; return a;} |
89 | // X32: define dso_local x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* inreg noalias sret %agg.result) |
90 | // X64: define dso_local x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* noalias sret %agg.result) |
91 | |
92 | v4f32 __vectorcall hva8(v4f32 a, v4f32 b, v4f32 c, v4f32 d, int e, v4f32 f) {return f;} |
93 | // X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva8@@84"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 inreg %e, <4 x float> %f) |
94 | // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva8@@88"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) |
95 | |
96 | typedef float __attribute__((ext_vector_type(3))) v3f32; |
97 | struct OddSizeHVA { v3f32 x, y; }; |
98 | |
99 | void __vectorcall odd_size_hva(struct OddSizeHVA a) {} |
100 | // X32: define dso_local x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce) |
101 | // X64: define dso_local x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce) |
102 | |
103 | // The Vectorcall ABI only allows passing the first 6 items in registers in x64, so this shouldn't |
104 | // consider 'p7' as a register. Instead p5 gets put into the register on the second pass. |
105 | // x86 should pass p2, p6 and p7 in registers, then p1 in the second pass. |
106 | struct HFA2 __vectorcall AddParticles(struct HFA2 p1, float p2, struct HFA4 p3, int p4, struct HFA2 p5, float p6, float p7, int p8){ return p1;} |
107 | // X32: define dso_local x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@84"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2* %p5, float %p6, float %p7, i32 %p8) |
108 | // X64: define dso_local x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@104"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* %p3, i32 %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7, i32 %p8) |
109 | |
110 | // Vectorcall in both architectures allows passing of an HVA as long as there is room, |
111 | // even if it is not one of the first 6 arguments. First pass puts p4 into a |
112 | // register on both. p9 ends up in a register in x86 only. Second pass puts p1 |
113 | // in a register, does NOT put p7 in a register (since theres no room), then puts |
114 | // p8 in a register. |
115 | void __vectorcall HVAAnywhere(struct HFA2 p1, int p2, int p3, float p4, int p5, int p6, struct HFA4 p7, struct HFA2 p8, float p9){} |
116 | // X32: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@88"(%struct.HFA2 inreg %p1.coerce, i32 inreg %p2, i32 inreg %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9) |
117 | // X64: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@112"(%struct.HFA2 inreg %p1.coerce, i32 %p2, i32 %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9) |
118 | |
119 | |