1 | // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s |
2 | // RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s -check-prefix=NONAMDHSA |
3 | // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s |
4 | |
5 | __attribute__((amdgpu_flat_work_group_size(0, 0))) // expected-no-diagnostics |
6 | kernel void flat_work_group_size_0_0() {} |
7 | __attribute__((amdgpu_waves_per_eu(0))) // expected-no-diagnostics |
8 | kernel void waves_per_eu_0() {} |
9 | __attribute__((amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics |
10 | kernel void waves_per_eu_0_0() {} |
11 | __attribute__((amdgpu_num_sgpr(0))) // expected-no-diagnostics |
12 | kernel void num_sgpr0() {} |
13 | __attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics |
14 | kernel void num_vgpr0() {} |
15 | |
16 | __attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0))) // expected-no-diagnostics |
17 | kernel void flat_work_group_size_0_0_waves_per_eu_0() {} |
18 | __attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics |
19 | kernel void flat_work_group_size_0_0_waves_per_eu_0_0() {} |
20 | __attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics |
21 | kernel void flat_work_group_size_0_0_num_sgpr_0() {} |
22 | __attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics |
23 | kernel void flat_work_group_size_0_0_num_vgpr_0() {} |
24 | __attribute__((amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics |
25 | kernel void waves_per_eu_0_num_sgpr_0() {} |
26 | __attribute__((amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics |
27 | kernel void waves_per_eu_0_num_vgpr_0() {} |
28 | __attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics |
29 | kernel void waves_per_eu_0_0_num_sgpr_0() {} |
30 | __attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics |
31 | kernel void waves_per_eu_0_0_num_vgpr_0() {} |
32 | __attribute__((amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics |
33 | kernel void num_sgpr_0_num_vgpr_0() {} |
34 | |
35 | __attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics |
36 | kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0() {} |
37 | __attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics |
38 | kernel void flat_work_group_size_0_0_waves_per_eu_0_num_vgpr_0() {} |
39 | __attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics |
40 | kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0() {} |
41 | __attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics |
42 | kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_vgpr_0() {} |
43 | |
44 | __attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics |
45 | kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0_num_vgpr_0() {} |
46 | __attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics |
47 | kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0_num_vgpr_0() {} |
48 | |
49 | __attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics |
50 | kernel void flat_work_group_size_32_64() { |
51 | // CHECK: define amdgpu_kernel void @flat_work_group_size_32_64() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]] |
52 | } |
53 | __attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics |
54 | kernel void waves_per_eu_2() { |
55 | // CHECK: define amdgpu_kernel void @waves_per_eu_2() [[WAVES_PER_EU_2:#[0-9]+]] |
56 | } |
57 | __attribute__((amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics |
58 | kernel void waves_per_eu_2_4() { |
59 | // CHECK: define amdgpu_kernel void @waves_per_eu_2_4() [[WAVES_PER_EU_2_4:#[0-9]+]] |
60 | } |
61 | __attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics |
62 | kernel void num_sgpr_32() { |
63 | // CHECK: define amdgpu_kernel void @num_sgpr_32() [[NUM_SGPR_32:#[0-9]+]] |
64 | } |
65 | __attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics |
66 | kernel void num_vgpr_64() { |
67 | // CHECK: define amdgpu_kernel void @num_vgpr_64() [[NUM_VGPR_64:#[0-9]+]] |
68 | } |
69 | |
70 | __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2))) // expected-no-diagnostics |
71 | kernel void flat_work_group_size_32_64_waves_per_eu_2() { |
72 | // CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2:#[0-9]+]] |
73 | } |
74 | __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics |
75 | kernel void flat_work_group_size_32_64_waves_per_eu_2_4() { |
76 | // CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4:#[0-9]+]] |
77 | } |
78 | __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_sgpr(32))) // expected-no-diagnostics |
79 | kernel void flat_work_group_size_32_64_num_sgpr_32() { |
80 | // CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32:#[0-9]+]] |
81 | } |
82 | __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_vgpr(64))) // expected-no-diagnostics |
83 | kernel void flat_work_group_size_32_64_num_vgpr_64() { |
84 | // CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64:#[0-9]+]] |
85 | } |
86 | __attribute__((amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32))) // expected-no-diagnostics |
87 | kernel void waves_per_eu_2_num_sgpr_32() { |
88 | // CHECK: define amdgpu_kernel void @waves_per_eu_2_num_sgpr_32() [[WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]] |
89 | } |
90 | __attribute__((amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64))) // expected-no-diagnostics |
91 | kernel void waves_per_eu_2_num_vgpr_64() { |
92 | // CHECK: define amdgpu_kernel void @waves_per_eu_2_num_vgpr_64() [[WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]] |
93 | } |
94 | __attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32))) // expected-no-diagnostics |
95 | kernel void waves_per_eu_2_4_num_sgpr_32() { |
96 | // CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_sgpr_32() [[WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]] |
97 | } |
98 | __attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64))) // expected-no-diagnostics |
99 | kernel void waves_per_eu_2_4_num_vgpr_64() { |
100 | // CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_vgpr_64() [[WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]] |
101 | } |
102 | __attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics |
103 | kernel void num_sgpr_32_num_vgpr_64() { |
104 | // CHECK: define amdgpu_kernel void @num_sgpr_32_num_vgpr_64() [[NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]] |
105 | } |
106 | |
107 | __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32))) |
108 | kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() { |
109 | // CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]] |
110 | } |
111 | __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64))) |
112 | kernel void flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() { |
113 | // CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]] |
114 | } |
115 | __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32))) |
116 | kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() { |
117 | // CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]] |
118 | } |
119 | __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64))) |
120 | kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() { |
121 | // CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]] |
122 | } |
123 | |
124 | __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics |
125 | kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() { |
126 | // CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]] |
127 | } |
128 | __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics |
129 | kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() { |
130 | // CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]] |
131 | } |
132 | |
133 | __attribute__((reqd_work_group_size(32, 2, 1))) // expected-no-diagnostics |
134 | kernel void reqd_work_group_size_32_2_1() { |
135 | // CHECK: define amdgpu_kernel void @reqd_work_group_size_32_2_1() [[FLAT_WORK_GROUP_SIZE_64_64:#[0-9]+]] |
136 | } |
137 | __attribute__((reqd_work_group_size(32, 2, 1), amdgpu_flat_work_group_size(16, 128))) // expected-no-diagnostics |
138 | kernel void reqd_work_group_size_32_2_1_flat_work_group_size_16_128() { |
139 | // CHECK: define amdgpu_kernel void @reqd_work_group_size_32_2_1_flat_work_group_size_16_128() [[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]] |
140 | } |
141 | |
142 | void a_function() { |
143 | // CHECK: define void @a_function() [[A_FUNCTION:#[0-9]+]] |
144 | } |
145 | |
146 | |
147 | // Make sure this is silently accepted on other targets. |
148 | // X86-NOT: "amdgpu-flat-work-group-size" |
149 | // X86-NOT: "amdgpu-waves-per-eu" |
150 | // X86-NOT: "amdgpu-num-vgpr" |
151 | // X86-NOT: "amdgpu-num-sgpr" |
152 | // X86-NOT: "amdgpu-implicitarg-num-bytes" |
153 | // NONAMDHSA-NOT: "amdgpu-implicitarg-num-bytes" |
154 | |
155 | // CHECK-NOT: "amdgpu-flat-work-group-size"="0,0" |
156 | // CHECK-NOT: "amdgpu-waves-per-eu"="0" |
157 | // CHECK-NOT: "amdgpu-waves-per-eu"="0,0" |
158 | // CHECK-NOT: "amdgpu-num-sgpr"="0" |
159 | // CHECK-NOT: "amdgpu-num-vgpr"="0" |
160 | |
161 | // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" |
162 | // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" "amdgpu-implicitarg-num-bytes"="48" |
163 | // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" "amdgpu-implicitarg-num-bytes"="48" |
164 | // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2" |
165 | // CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2,4" |
166 | // CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" |
167 | // CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" |
168 | |
169 | // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2" |
170 | // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2,4" |
171 | // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" |
172 | // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" |
173 | // CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" |
174 | // CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" |
175 | // CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4" |
176 | // CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" |
177 | // CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" |
178 | |
179 | // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" |
180 | // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" |
181 | // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4" |
182 | // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" |
183 | |
184 | // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" |
185 | // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" |
186 | |
187 | // CHECK-DAG: attributes [[A_FUNCTION]] = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" |
188 | |