1 | // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s |
2 | |
3 | // Also test serialization of atomic operations here, to avoid duplicating the test. |
4 | // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl |
5 | // RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s |
6 | |
7 | #ifndef ALREADY_INCLUDED |
8 | #define ALREADY_INCLUDED |
9 | |
10 | typedef __INTPTR_TYPE__ intptr_t; |
11 | typedef int int8 __attribute__((ext_vector_type(8))); |
12 | |
13 | typedef enum memory_order { |
14 | memory_order_relaxed = __ATOMIC_RELAXED, |
15 | memory_order_acquire = __ATOMIC_ACQUIRE, |
16 | memory_order_release = __ATOMIC_RELEASE, |
17 | memory_order_acq_rel = __ATOMIC_ACQ_REL, |
18 | memory_order_seq_cst = __ATOMIC_SEQ_CST |
19 | } memory_order; |
20 | |
21 | typedef enum memory_scope { |
22 | memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, |
23 | memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, |
24 | memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, |
25 | memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, |
26 | #if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) |
27 | memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP |
28 | #endif |
29 | } memory_scope; |
30 | |
31 | atomic_int j; |
32 | |
33 | void fi1(atomic_int *i) { |
34 | // CHECK-LABEL: @fi1 |
35 | // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst |
36 | int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); |
37 | |
38 | // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst |
39 | x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device); |
40 | |
41 | // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst |
42 | x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices); |
43 | |
44 | // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst |
45 | x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group); |
46 | } |
47 | |
48 | void fi2(atomic_int *i) { |
49 | // CHECK-LABEL: @fi2 |
50 | // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst |
51 | __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); |
52 | } |
53 | |
54 | void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *il) { |
55 | // CHECK-LABEL: @test_addr |
56 | // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst |
57 | __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group); |
58 | |
59 | // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst |
60 | __opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group); |
61 | |
62 | // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst |
63 | __opencl_atomic_store(il, 1, memory_order_seq_cst, memory_scope_work_group); |
64 | } |
65 | |
66 | void fi3(atomic_int *i, atomic_uint *ui) { |
67 | // CHECK-LABEL: @fi3 |
68 | // CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst |
69 | int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group); |
70 | |
71 | // CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst |
72 | x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group); |
73 | |
74 | // CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst |
75 | x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group); |
76 | |
77 | // CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst |
78 | x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group); |
79 | |
80 | // CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst |
81 | x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group); |
82 | } |
83 | |
84 | bool fi4(atomic_int *i) { |
85 | // CHECK-LABEL: @fi4( |
86 | // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire |
87 | // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0 |
88 | // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1 |
89 | // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]] |
90 | // CHECK: store i32 [[OLD]] |
91 | int cmp = 0; |
92 | return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group); |
93 | } |
94 | |
95 | void fi5(atomic_int *i, int scope) { |
96 | // CHECK-LABEL: @fi5 |
97 | // CHECK: switch i32 %{{.*}}, label %[[opencl_allsvmdevices:.*]] [ |
98 | // CHECK-NEXT: i32 1, label %[[opencl_workgroup:.*]] |
99 | // CHECK-NEXT: i32 2, label %[[opencl_device:.*]] |
100 | // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]] |
101 | // CHECK-NEXT: ] |
102 | // CHECK: [[opencl_workgroup]]: |
103 | // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst |
104 | // CHECK: br label %[[continue:.*]] |
105 | // CHECK: [[opencl_device]]: |
106 | // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst |
107 | // CHECK: br label %[[continue]] |
108 | // CHECK: [[opencl_allsvmdevices]]: |
109 | // CHECK: load atomic i32, i32* %{{.*}} seq_cst |
110 | // CHECK: br label %[[continue]] |
111 | // CHECK: [[opencl_subgroup]]: |
112 | // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst |
113 | // CHECK: br label %[[continue]] |
114 | // CHECK: [[continue]]: |
115 | int x = __opencl_atomic_load(i, memory_order_seq_cst, scope); |
116 | } |
117 | |
118 | void fi6(atomic_int *i, int order, int scope) { |
119 | // CHECK-LABEL: @fi6 |
120 | // CHECK: switch i32 %{{.*}}, label %[[monotonic:.*]] [ |
121 | // CHECK-NEXT: i32 1, label %[[acquire:.*]] |
122 | // CHECK-NEXT: i32 2, label %[[acquire:.*]] |
123 | // CHECK-NEXT: i32 5, label %[[seqcst:.*]] |
124 | // CHECK-NEXT: ] |
125 | // CHECK: [[monotonic]]: |
126 | // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [ |
127 | // CHECK-NEXT: i32 1, label %[[MON_WG:.*]] |
128 | // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]] |
129 | // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]] |
130 | // CHECK-NEXT: ] |
131 | // CHECK: [[acquire]]: |
132 | // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [ |
133 | // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]] |
134 | // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]] |
135 | // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]] |
136 | // CHECK-NEXT: ] |
137 | // CHECK: [[seqcst]]: |
138 | // CHECK: switch i32 %{{.*}}, label %[[SEQ_ALL:.*]] [ |
139 | // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]] |
140 | // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]] |
141 | // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]] |
142 | // CHECK-NEXT: ] |
143 | // CHECK: [[MON_WG]]: |
144 | // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic |
145 | // CHECK: [[MON_DEV]]: |
146 | // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic |
147 | // CHECK: [[MON_ALL]]: |
148 | // CHECK: load atomic i32, i32* %{{.*}} monotonic |
149 | // CHECK: [[MON_SUB]]: |
150 | // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic |
151 | // CHECK: [[ACQ_WG]]: |
152 | // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire |
153 | // CHECK: [[ACQ_DEV]]: |
154 | // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire |
155 | // CHECK: [[ACQ_ALL]]: |
156 | // CHECK: load atomic i32, i32* %{{.*}} acquire |
157 | // CHECK: [[ACQ_SUB]]: |
158 | // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire |
159 | // CHECK: [[SEQ_WG]]: |
160 | // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst |
161 | // CHECK: [[SEQ_DEV]]: |
162 | // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst |
163 | // CHECK: [[SEQ_ALL]]: |
164 | // CHECK: load atomic i32, i32* %{{.*}} seq_cst |
165 | // CHECK: [[SEQ_SUB]]: |
166 | // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst |
167 | int x = __opencl_atomic_load(i, order, scope); |
168 | } |
169 | |
170 | float ff1(global atomic_float *d) { |
171 | // CHECK-LABEL: @ff1 |
172 | // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic |
173 | return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group); |
174 | } |
175 | |
176 | void ff2(atomic_float *d) { |
177 | // CHECK-LABEL: @ff2 |
178 | // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release |
179 | __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group); |
180 | } |
181 | |
182 | float ff3(atomic_float *d) { |
183 | // CHECK-LABEL: @ff3 |
184 | // CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst |
185 | return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group); |
186 | } |
187 | |
188 | // CHECK-LABEL: @atomic_init_foo |
189 | void atomic_init_foo() |
190 | { |
191 | // CHECK-NOT: atomic |
192 | // CHECK: store |
193 | __opencl_atomic_init(&j, 42); |
194 | |
195 | // CHECK-NOT: atomic |
196 | // CHECK: } |
197 | } |
198 | |
199 | // CHECK-LABEL: @failureOrder |
200 | void failureOrder(atomic_int *ptr, int *ptr2) { |
201 | // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic |
202 | __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); |
203 | |
204 | // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire |
205 | __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group); |
206 | } |
207 | |
208 | // CHECK-LABEL: @generalFailureOrder |
209 | void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) { |
210 | __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group); |
211 | // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [ |
212 | // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]] |
213 | // CHECK-NEXT: i32 2, label %[[ACQUIRE]] |
214 | // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]] |
215 | // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]] |
216 | // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]] |
217 | |
218 | // CHECK: [[MONOTONIC]] |
219 | // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [ |
220 | // CHECK-NEXT: ] |
221 | |
222 | // CHECK: [[ACQUIRE]] |
223 | // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [ |
224 | // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] |
225 | // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] |
226 | // CHECK-NEXT: ] |
227 | |
228 | // CHECK: [[RELEASE]] |
229 | // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [ |
230 | // CHECK-NEXT: ] |
231 | |
232 | // CHECK: [[ACQREL]] |
233 | // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [ |
234 | // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] |
235 | // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] |
236 | // CHECK-NEXT: ] |
237 | |
238 | // CHECK: [[SEQCST]] |
239 | // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [ |
240 | // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] |
241 | // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] |
242 | // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]] |
243 | // CHECK-NEXT: ] |
244 | |
245 | // CHECK: [[MONOTONIC_MONOTONIC]] |
246 | // CHECK: cmpxchg {{.*}} monotonic monotonic |
247 | // CHECK: br |
248 | |
249 | // CHECK: [[ACQUIRE_MONOTONIC]] |
250 | // CHECK: cmpxchg {{.*}} acquire monotonic |
251 | // CHECK: br |
252 | |
253 | // CHECK: [[ACQUIRE_ACQUIRE]] |
254 | // CHECK: cmpxchg {{.*}} acquire acquire |
255 | // CHECK: br |
256 | |
257 | // CHECK: [[ACQREL_MONOTONIC]] |
258 | // CHECK: cmpxchg {{.*}} acq_rel monotonic |
259 | // CHECK: br |
260 | |
261 | // CHECK: [[ACQREL_ACQUIRE]] |
262 | // CHECK: cmpxchg {{.*}} acq_rel acquire |
263 | // CHECK: br |
264 | |
265 | // CHECK: [[SEQCST_MONOTONIC]] |
266 | // CHECK: cmpxchg {{.*}} seq_cst monotonic |
267 | // CHECK: br |
268 | |
269 | // CHECK: [[SEQCST_ACQUIRE]] |
270 | // CHECK: cmpxchg {{.*}} seq_cst acquire |
271 | // CHECK: br |
272 | |
273 | // CHECK: [[SEQCST_SEQCST]] |
274 | // CHECK: cmpxchg {{.*}} seq_cst seq_cst |
275 | // CHECK: br |
276 | } |
277 | |
278 | int test_volatile(volatile atomic_int *i) { |
279 | // CHECK-LABEL: @test_volatile |
280 | // CHECK: %[[i_addr:.*]] = alloca i32 |
281 | // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32 |
282 | // CHECK-NEXT: store i32* %i, i32* addrspace(5)* %[[i_addr]] |
283 | // CHECK-NEXT: %[[addr:.*]] = load i32*, i32* addrspace(5)* %[[i_addr]] |
284 | // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst |
285 | // CHECK-NEXT: store i32 %[[res]], i32 addrspace(5)* %[[atomicdst]] |
286 | // CHECK-NEXT: %[[retval:.*]] = load i32, i32 addrspace(5)* %[[atomicdst]] |
287 | // CHECK-NEXT: ret i32 %[[retval]] |
288 | return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); |
289 | } |
290 | |
291 | #endif |
292 | |