1 | // REQUIRES: nvptx-registered-target |
2 | // RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_60 \ |
3 | // RUN: -fcuda-is-device -S -emit-llvm -o - -x cuda %s \ |
4 | // RUN: | FileCheck -check-prefix=CHECK -check-prefix=LP32 %s |
5 | // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_60 \ |
6 | // RUN: -fcuda-is-device -S -emit-llvm -o - -x cuda %s \ |
7 | // RUN: | FileCheck -check-prefix=CHECK -check-prefix=LP64 %s |
8 | // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_61 \ |
9 | // RUN: -fcuda-is-device -S -emit-llvm -o - -x cuda %s \ |
10 | // RUN: | FileCheck -check-prefix=CHECK -check-prefix=LP64 %s |
11 | // RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_53 \ |
12 | // RUN: -DERROR_CHECK -fcuda-is-device -S -o /dev/null -x cuda -verify %s |
13 | |
14 | #define __device__ __attribute__((device)) |
15 | #define __global__ __attribute__((global)) |
16 | #define __shared__ __attribute__((shared)) |
17 | #define __constant__ __attribute__((constant)) |
18 | |
19 | __device__ int read_tid() { |
20 | |
21 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x() |
22 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y() |
23 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z() |
24 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.w() |
25 | |
26 | int x = __nvvm_read_ptx_sreg_tid_x(); |
27 | int y = __nvvm_read_ptx_sreg_tid_y(); |
28 | int z = __nvvm_read_ptx_sreg_tid_z(); |
29 | int w = __nvvm_read_ptx_sreg_tid_w(); |
30 | |
31 | return x + y + z + w; |
32 | |
33 | } |
34 | |
35 | __device__ int read_ntid() { |
36 | |
37 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() |
38 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() |
39 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() |
40 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.w() |
41 | |
42 | int x = __nvvm_read_ptx_sreg_ntid_x(); |
43 | int y = __nvvm_read_ptx_sreg_ntid_y(); |
44 | int z = __nvvm_read_ptx_sreg_ntid_z(); |
45 | int w = __nvvm_read_ptx_sreg_ntid_w(); |
46 | |
47 | return x + y + z + w; |
48 | |
49 | } |
50 | |
51 | __device__ int read_ctaid() { |
52 | |
53 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() |
54 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() |
55 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() |
56 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w() |
57 | |
58 | int x = __nvvm_read_ptx_sreg_ctaid_x(); |
59 | int y = __nvvm_read_ptx_sreg_ctaid_y(); |
60 | int z = __nvvm_read_ptx_sreg_ctaid_z(); |
61 | int w = __nvvm_read_ptx_sreg_ctaid_w(); |
62 | |
63 | return x + y + z + w; |
64 | |
65 | } |
66 | |
67 | __device__ int read_nctaid() { |
68 | |
69 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() |
70 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() |
71 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() |
72 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w() |
73 | |
74 | int x = __nvvm_read_ptx_sreg_nctaid_x(); |
75 | int y = __nvvm_read_ptx_sreg_nctaid_y(); |
76 | int z = __nvvm_read_ptx_sreg_nctaid_z(); |
77 | int w = __nvvm_read_ptx_sreg_nctaid_w(); |
78 | |
79 | return x + y + z + w; |
80 | |
81 | } |
82 | |
83 | __device__ int read_ids() { |
84 | |
85 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.laneid() |
86 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid() |
87 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid() |
88 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid() |
89 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid() |
90 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid() |
91 | |
92 | int a = __nvvm_read_ptx_sreg_laneid(); |
93 | int b = __nvvm_read_ptx_sreg_warpid(); |
94 | int c = __nvvm_read_ptx_sreg_nwarpid(); |
95 | int d = __nvvm_read_ptx_sreg_smid(); |
96 | int e = __nvvm_read_ptx_sreg_nsmid(); |
97 | int f = __nvvm_read_ptx_sreg_gridid(); |
98 | |
99 | return a + b + c + d + e + f; |
100 | |
101 | } |
102 | |
103 | __device__ int read_lanemasks() { |
104 | |
105 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq() |
106 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le() |
107 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt() |
108 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge() |
109 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt() |
110 | |
111 | int a = __nvvm_read_ptx_sreg_lanemask_eq(); |
112 | int b = __nvvm_read_ptx_sreg_lanemask_le(); |
113 | int c = __nvvm_read_ptx_sreg_lanemask_lt(); |
114 | int d = __nvvm_read_ptx_sreg_lanemask_ge(); |
115 | int e = __nvvm_read_ptx_sreg_lanemask_gt(); |
116 | |
117 | return a + b + c + d + e; |
118 | |
119 | } |
120 | |
121 | __device__ long long read_clocks() { |
122 | |
123 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock() |
124 | // CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64() |
125 | |
126 | int a = __nvvm_read_ptx_sreg_clock(); |
127 | long long b = __nvvm_read_ptx_sreg_clock64(); |
128 | |
129 | return a + b; |
130 | } |
131 | |
132 | __device__ int read_pms() { |
133 | |
134 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm0() |
135 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm1() |
136 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm2() |
137 | // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm3() |
138 | |
139 | int a = __nvvm_read_ptx_sreg_pm0(); |
140 | int b = __nvvm_read_ptx_sreg_pm1(); |
141 | int c = __nvvm_read_ptx_sreg_pm2(); |
142 | int d = __nvvm_read_ptx_sreg_pm3(); |
143 | |
144 | return a + b + c + d; |
145 | |
146 | } |
147 | |
148 | __device__ void sync() { |
149 | |
150 | // CHECK: call void @llvm.nvvm.bar.sync(i32 0) |
151 | |
152 | __nvvm_bar_sync(0); |
153 | |
154 | } |
155 | |
156 | |
157 | // NVVM intrinsics |
158 | |
159 | // The idea is not to test all intrinsics, just that Clang is recognizing the |
160 | // builtins defined in BuiltinsNVPTX.def |
161 | __device__ void nvvm_math(float f1, float f2, double d1, double d2) { |
162 | // CHECK: call float @llvm.nvvm.fmax.f |
163 | float t1 = __nvvm_fmax_f(f1, f2); |
164 | // CHECK: call float @llvm.nvvm.fmin.f |
165 | float t2 = __nvvm_fmin_f(f1, f2); |
166 | // CHECK: call float @llvm.nvvm.sqrt.rn.f |
167 | float t3 = __nvvm_sqrt_rn_f(f1); |
168 | // CHECK: call float @llvm.nvvm.rcp.rn.f |
169 | float t4 = __nvvm_rcp_rn_f(f2); |
170 | // CHECK: call float @llvm.nvvm.add.rn.f |
171 | float t5 = __nvvm_add_rn_f(f1, f2); |
172 | |
173 | // CHECK: call double @llvm.nvvm.fmax.d |
174 | double td1 = __nvvm_fmax_d(d1, d2); |
175 | // CHECK: call double @llvm.nvvm.fmin.d |
176 | double td2 = __nvvm_fmin_d(d1, d2); |
177 | // CHECK: call double @llvm.nvvm.sqrt.rn.d |
178 | double td3 = __nvvm_sqrt_rn_d(d1); |
179 | // CHECK: call double @llvm.nvvm.rcp.rn.d |
180 | double td4 = __nvvm_rcp_rn_d(d2); |
181 | |
182 | // CHECK: call void @llvm.nvvm.membar.cta() |
183 | __nvvm_membar_cta(); |
184 | // CHECK: call void @llvm.nvvm.membar.gl() |
185 | __nvvm_membar_gl(); |
186 | // CHECK: call void @llvm.nvvm.membar.sys() |
187 | __nvvm_membar_sys(); |
188 | // CHECK: call void @llvm.nvvm.barrier0() |
189 | __syncthreads(); |
190 | } |
191 | |
192 | __device__ int di; |
193 | __shared__ int si; |
194 | __device__ long dl; |
195 | __shared__ long sl; |
196 | __device__ long long dll; |
197 | __shared__ long long sll; |
198 | |
199 | // Check for atomic intrinsics |
200 | // CHECK-LABEL: nvvm_atom |
201 | __device__ void nvvm_atom(float *fp, float f, double *dfp, double df, int *ip, |
202 | int i, unsigned int *uip, unsigned ui, long *lp, |
203 | long l, long long *llp, long long ll) { |
204 | // CHECK: atomicrmw add |
205 | __nvvm_atom_add_gen_i(ip, i); |
206 | // CHECK: atomicrmw add |
207 | __nvvm_atom_add_gen_l(&dl, l); |
208 | // CHECK: atomicrmw add |
209 | __nvvm_atom_add_gen_ll(&sll, ll); |
210 | |
211 | // CHECK: atomicrmw sub |
212 | __nvvm_atom_sub_gen_i(ip, i); |
213 | // CHECK: atomicrmw sub |
214 | __nvvm_atom_sub_gen_l(&dl, l); |
215 | // CHECK: atomicrmw sub |
216 | __nvvm_atom_sub_gen_ll(&sll, ll); |
217 | |
218 | // CHECK: atomicrmw and |
219 | __nvvm_atom_and_gen_i(ip, i); |
220 | // CHECK: atomicrmw and |
221 | __nvvm_atom_and_gen_l(&dl, l); |
222 | // CHECK: atomicrmw and |
223 | __nvvm_atom_and_gen_ll(&sll, ll); |
224 | |
225 | // CHECK: atomicrmw or |
226 | __nvvm_atom_or_gen_i(ip, i); |
227 | // CHECK: atomicrmw or |
228 | __nvvm_atom_or_gen_l(&dl, l); |
229 | // CHECK: atomicrmw or |
230 | __nvvm_atom_or_gen_ll(&sll, ll); |
231 | |
232 | // CHECK: atomicrmw xor |
233 | __nvvm_atom_xor_gen_i(ip, i); |
234 | // CHECK: atomicrmw xor |
235 | __nvvm_atom_xor_gen_l(&dl, l); |
236 | // CHECK: atomicrmw xor |
237 | __nvvm_atom_xor_gen_ll(&sll, ll); |
238 | |
239 | // CHECK: atomicrmw xchg |
240 | __nvvm_atom_xchg_gen_i(ip, i); |
241 | // CHECK: atomicrmw xchg |
242 | __nvvm_atom_xchg_gen_l(&dl, l); |
243 | // CHECK: atomicrmw xchg |
244 | __nvvm_atom_xchg_gen_ll(&sll, ll); |
245 | |
246 | // CHECK: atomicrmw max i32* |
247 | __nvvm_atom_max_gen_i(ip, i); |
248 | // CHECK: atomicrmw umax i32* |
249 | __nvvm_atom_max_gen_ui((unsigned int *)ip, i); |
250 | // CHECK: atomicrmw max |
251 | __nvvm_atom_max_gen_l(&dl, l); |
252 | // CHECK: atomicrmw umax |
253 | __nvvm_atom_max_gen_ul((unsigned long *)&dl, l); |
254 | // CHECK: atomicrmw max i64* |
255 | __nvvm_atom_max_gen_ll(&sll, ll); |
256 | // CHECK: atomicrmw umax i64* |
257 | __nvvm_atom_max_gen_ull((unsigned long long *)&sll, ll); |
258 | |
259 | // CHECK: atomicrmw min i32* |
260 | __nvvm_atom_min_gen_i(ip, i); |
261 | // CHECK: atomicrmw umin i32* |
262 | __nvvm_atom_min_gen_ui((unsigned int *)ip, i); |
263 | // CHECK: atomicrmw min |
264 | __nvvm_atom_min_gen_l(&dl, l); |
265 | // CHECK: atomicrmw umin |
266 | __nvvm_atom_min_gen_ul((unsigned long *)&dl, l); |
267 | // CHECK: atomicrmw min i64* |
268 | __nvvm_atom_min_gen_ll(&sll, ll); |
269 | // CHECK: atomicrmw umin i64* |
270 | __nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll); |
271 | |
272 | // CHECK: cmpxchg |
273 | // CHECK-NEXT: extractvalue { i32, i1 } {{%[0-9]+}}, 0 |
274 | __nvvm_atom_cas_gen_i(ip, 0, i); |
275 | // CHECK: cmpxchg |
276 | // CHECK-NEXT: extractvalue { {{i32|i64}}, i1 } {{%[0-9]+}}, 0 |
277 | __nvvm_atom_cas_gen_l(&dl, 0, l); |
278 | // CHECK: cmpxchg |
279 | // CHECK-NEXT: extractvalue { i64, i1 } {{%[0-9]+}}, 0 |
280 | __nvvm_atom_cas_gen_ll(&sll, 0, ll); |
281 | |
282 | // CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 |
283 | __nvvm_atom_add_gen_f(fp, f); |
284 | |
285 | // CHECK: call i32 @llvm.nvvm.atomic.load.inc.32.p0i32 |
286 | __nvvm_atom_inc_gen_ui(uip, ui); |
287 | |
288 | // CHECK: call i32 @llvm.nvvm.atomic.load.dec.32.p0i32 |
289 | __nvvm_atom_dec_gen_ui(uip, ui); |
290 | |
291 | |
292 | ////////////////////////////////////////////////////////////////// |
293 | // Atomics with scope (only supported on sm_60+). |
294 | |
295 | #if ERROR_CHECK || __CUDA_ARCH__ >= 600 |
296 | |
297 | // CHECK: call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32 |
298 | // expected-error@+1 {{'__nvvm_atom_cta_add_gen_i' needs target feature sm_60}} |
299 | __nvvm_atom_cta_add_gen_i(ip, i); |
300 | // LP32: call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32 |
301 | // LP64: call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64 |
302 | // expected-error@+1 {{'__nvvm_atom_cta_add_gen_l' needs target feature sm_60}} |
303 | __nvvm_atom_cta_add_gen_l(&dl, l); |
304 | // CHECK: call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64 |
305 | // expected-error@+1 {{'__nvvm_atom_cta_add_gen_ll' needs target feature sm_60}} |
306 | __nvvm_atom_cta_add_gen_ll(&sll, ll); |
307 | // CHECK: call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32 |
308 | // expected-error@+1 {{'__nvvm_atom_sys_add_gen_i' needs target feature sm_60}} |
309 | __nvvm_atom_sys_add_gen_i(ip, i); |
310 | // LP32: call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32 |
311 | // LP64: call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64 |
312 | // expected-error@+1 {{'__nvvm_atom_sys_add_gen_l' needs target feature sm_60}} |
313 | __nvvm_atom_sys_add_gen_l(&dl, l); |
314 | // CHECK: call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64 |
315 | // expected-error@+1 {{'__nvvm_atom_sys_add_gen_ll' needs target feature sm_60}} |
316 | __nvvm_atom_sys_add_gen_ll(&sll, ll); |
317 | |
318 | // CHECK: call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32 |
319 | // expected-error@+1 {{'__nvvm_atom_cta_add_gen_f' needs target feature sm_60}} |
320 | __nvvm_atom_cta_add_gen_f(fp, f); |
321 | // CHECK: call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64 |
322 | // expected-error@+1 {{'__nvvm_atom_cta_add_gen_d' needs target feature sm_60}} |
323 | __nvvm_atom_cta_add_gen_d(dfp, df); |
324 | // CHECK: call float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0f32 |
325 | // expected-error@+1 {{'__nvvm_atom_sys_add_gen_f' needs target feature sm_60}} |
326 | __nvvm_atom_sys_add_gen_f(fp, f); |
327 | // CHECK: call double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0f64 |
328 | // expected-error@+1 {{'__nvvm_atom_sys_add_gen_d' needs target feature sm_60}} |
329 | __nvvm_atom_sys_add_gen_d(dfp, df); |
330 | |
331 | // CHECK: call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32 |
332 | // expected-error@+1 {{'__nvvm_atom_cta_xchg_gen_i' needs target feature sm_60}} |
333 | __nvvm_atom_cta_xchg_gen_i(ip, i); |
334 | // LP32: call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32 |
335 | // LP64: call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64 |
336 | // expected-error@+1 {{'__nvvm_atom_cta_xchg_gen_l' needs target feature sm_60}} |
337 | __nvvm_atom_cta_xchg_gen_l(&dl, l); |
338 | // CHECK: call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64 |
339 | // expected-error@+1 {{'__nvvm_atom_cta_xchg_gen_ll' needs target feature sm_60}} |
340 | __nvvm_atom_cta_xchg_gen_ll(&sll, ll); |
341 | |
342 | // CHECK: call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32 |
343 | // expected-error@+1 {{'__nvvm_atom_sys_xchg_gen_i' needs target feature sm_60}} |
344 | __nvvm_atom_sys_xchg_gen_i(ip, i); |
345 | // LP32: call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32 |
346 | // LP64: call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64 |
347 | // expected-error@+1 {{'__nvvm_atom_sys_xchg_gen_l' needs target feature sm_60}} |
348 | __nvvm_atom_sys_xchg_gen_l(&dl, l); |
349 | // CHECK: call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64 |
350 | // expected-error@+1 {{'__nvvm_atom_sys_xchg_gen_ll' needs target feature sm_60}} |
351 | __nvvm_atom_sys_xchg_gen_ll(&sll, ll); |
352 | |
353 | // CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32 |
354 | // expected-error@+1 {{'__nvvm_atom_cta_max_gen_i' needs target feature sm_60}} |
355 | __nvvm_atom_cta_max_gen_i(ip, i); |
356 | // CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32 |
357 | // expected-error@+1 {{'__nvvm_atom_cta_max_gen_ui' needs target feature sm_60}} |
358 | __nvvm_atom_cta_max_gen_ui((unsigned int *)ip, i); |
359 | // LP32: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32 |
360 | // LP64: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64 |
361 | // expected-error@+1 {{'__nvvm_atom_cta_max_gen_l' needs target feature sm_60}} |
362 | __nvvm_atom_cta_max_gen_l(&dl, l); |
363 | // LP32: call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32 |
364 | // LP64: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64 |
365 | // expected-error@+1 {{'__nvvm_atom_cta_max_gen_ul' needs target feature sm_60}} |
366 | __nvvm_atom_cta_max_gen_ul((unsigned long *)lp, l); |
367 | // CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64 |
368 | // expected-error@+1 {{'__nvvm_atom_cta_max_gen_ll' needs target feature sm_60}} |
369 | __nvvm_atom_cta_max_gen_ll(&sll, ll); |
370 | // CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64 |
371 | // expected-error@+1 {{'__nvvm_atom_cta_max_gen_ull' needs target feature sm_60}} |
372 | __nvvm_atom_cta_max_gen_ull((unsigned long long *)llp, ll); |
373 | |
374 | // CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32 |
375 | // expected-error@+1 {{'__nvvm_atom_sys_max_gen_i' needs target feature sm_60}} |
376 | __nvvm_atom_sys_max_gen_i(ip, i); |
377 | // CHECK: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32 |
378 | // expected-error@+1 {{'__nvvm_atom_sys_max_gen_ui' needs target feature sm_60}} |
379 | __nvvm_atom_sys_max_gen_ui((unsigned int *)ip, i); |
380 | // LP32: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32 |
381 | // LP64: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64 |
382 | // expected-error@+1 {{'__nvvm_atom_sys_max_gen_l' needs target feature sm_60}} |
383 | __nvvm_atom_sys_max_gen_l(&dl, l); |
384 | // LP32: call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32 |
385 | // LP64: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64 |
386 | // expected-error@+1 {{'__nvvm_atom_sys_max_gen_ul' needs target feature sm_60}} |
387 | __nvvm_atom_sys_max_gen_ul((unsigned long *)lp, l); |
388 | // CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64 |
389 | // expected-error@+1 {{'__nvvm_atom_sys_max_gen_ll' needs target feature sm_60}} |
390 | __nvvm_atom_sys_max_gen_ll(&sll, ll); |
391 | // CHECK: call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64 |
392 | // expected-error@+1 {{'__nvvm_atom_sys_max_gen_ull' needs target feature sm_60}} |
393 | __nvvm_atom_sys_max_gen_ull((unsigned long long *)llp, ll); |
394 | |
395 | // CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32 |
396 | // expected-error@+1 {{'__nvvm_atom_cta_min_gen_i' needs target feature sm_60}} |
397 | __nvvm_atom_cta_min_gen_i(ip, i); |
398 | // CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32 |
399 | // expected-error@+1 {{'__nvvm_atom_cta_min_gen_ui' needs target feature sm_60}} |
400 | __nvvm_atom_cta_min_gen_ui((unsigned int *)ip, i); |
401 | // LP32: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32 |
402 | // LP64: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64 |
403 | // expected-error@+1 {{'__nvvm_atom_cta_min_gen_l' needs target feature sm_60}} |
404 | __nvvm_atom_cta_min_gen_l(&dl, l); |
405 | // LP32: call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32 |
406 | // LP64: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64 |
407 | // expected-error@+1 {{'__nvvm_atom_cta_min_gen_ul' needs target feature sm_60}} |
408 | __nvvm_atom_cta_min_gen_ul((unsigned long *)lp, l); |
409 | // CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64 |
410 | // expected-error@+1 {{'__nvvm_atom_cta_min_gen_ll' needs target feature sm_60}} |
411 | __nvvm_atom_cta_min_gen_ll(&sll, ll); |
412 | // CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64 |
413 | // expected-error@+1 {{'__nvvm_atom_cta_min_gen_ull' needs target feature sm_60}} |
414 | __nvvm_atom_cta_min_gen_ull((unsigned long long *)llp, ll); |
415 | |
416 | // CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32 |
417 | // expected-error@+1 {{'__nvvm_atom_sys_min_gen_i' needs target feature sm_60}} |
418 | __nvvm_atom_sys_min_gen_i(ip, i); |
419 | // CHECK: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32 |
420 | // expected-error@+1 {{'__nvvm_atom_sys_min_gen_ui' needs target feature sm_60}} |
421 | __nvvm_atom_sys_min_gen_ui((unsigned int *)ip, i); |
422 | // LP32: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32 |
423 | // LP64: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64 |
424 | // expected-error@+1 {{'__nvvm_atom_sys_min_gen_l' needs target feature sm_60}} |
425 | __nvvm_atom_sys_min_gen_l(&dl, l); |
426 | // LP32: call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32 |
427 | // LP64: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64 |
428 | // expected-error@+1 {{'__nvvm_atom_sys_min_gen_ul' needs target feature sm_60}} |
429 | __nvvm_atom_sys_min_gen_ul((unsigned long *)lp, l); |
430 | // CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64 |
431 | // expected-error@+1 {{'__nvvm_atom_sys_min_gen_ll' needs target feature sm_60}} |
432 | __nvvm_atom_sys_min_gen_ll(&sll, ll); |
433 | // CHECK: call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64 |
434 | // expected-error@+1 {{'__nvvm_atom_sys_min_gen_ull' needs target feature sm_60}} |
435 | __nvvm_atom_sys_min_gen_ull((unsigned long long *)llp, ll); |
436 | |
437 | // CHECK: call i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0i32 |
438 | // expected-error@+1 {{'__nvvm_atom_cta_inc_gen_ui' needs target feature sm_60}} |
439 | __nvvm_atom_cta_inc_gen_ui((unsigned int *)ip, i); |
440 | // CHECK: call i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0i32 |
441 | // expected-error@+1 {{'__nvvm_atom_sys_inc_gen_ui' needs target feature sm_60}} |
442 | __nvvm_atom_sys_inc_gen_ui((unsigned int *)ip, i); |
443 | |
444 | // CHECK: call i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0i32 |
445 | // expected-error@+1 {{'__nvvm_atom_cta_dec_gen_ui' needs target feature sm_60}} |
446 | __nvvm_atom_cta_dec_gen_ui((unsigned int *)ip, i); |
447 | // CHECK: call i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0i32 |
448 | // expected-error@+1 {{'__nvvm_atom_sys_dec_gen_ui' needs target feature sm_60}} |
449 | __nvvm_atom_sys_dec_gen_ui((unsigned int *)ip, i); |
450 | |
451 | // CHECK: call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32 |
452 | // expected-error@+1 {{'__nvvm_atom_cta_and_gen_i' needs target feature sm_60}} |
453 | __nvvm_atom_cta_and_gen_i(ip, i); |
454 | // LP32: call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32 |
455 | // LP64: call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64 |
456 | // expected-error@+1 {{'__nvvm_atom_cta_and_gen_l' needs target feature sm_60}} |
457 | __nvvm_atom_cta_and_gen_l(&dl, l); |
458 | // CHECK: call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64 |
459 | // expected-error@+1 {{'__nvvm_atom_cta_and_gen_ll' needs target feature sm_60}} |
460 | __nvvm_atom_cta_and_gen_ll(&sll, ll); |
461 | |
462 | // CHECK: call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32 |
463 | // expected-error@+1 {{'__nvvm_atom_sys_and_gen_i' needs target feature sm_60}} |
464 | __nvvm_atom_sys_and_gen_i(ip, i); |
465 | // LP32: call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32 |
466 | // LP64: call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64 |
467 | // expected-error@+1 {{'__nvvm_atom_sys_and_gen_l' needs target feature sm_60}} |
468 | __nvvm_atom_sys_and_gen_l(&dl, l); |
469 | // CHECK: call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64 |
470 | // expected-error@+1 {{'__nvvm_atom_sys_and_gen_ll' needs target feature sm_60}} |
471 | __nvvm_atom_sys_and_gen_ll(&sll, ll); |
472 | |
473 | // CHECK: call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32 |
474 | // expected-error@+1 {{'__nvvm_atom_cta_or_gen_i' needs target feature sm_60}} |
475 | __nvvm_atom_cta_or_gen_i(ip, i); |
476 | // LP32: call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32 |
477 | // LP64: call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64 |
478 | // expected-error@+1 {{'__nvvm_atom_cta_or_gen_l' needs target feature sm_60}} |
479 | __nvvm_atom_cta_or_gen_l(&dl, l); |
480 | // CHECK: call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64 |
481 | // expected-error@+1 {{'__nvvm_atom_cta_or_gen_ll' needs target feature sm_60}} |
482 | __nvvm_atom_cta_or_gen_ll(&sll, ll); |
483 | |
484 | // CHECK: call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32 |
485 | // expected-error@+1 {{'__nvvm_atom_sys_or_gen_i' needs target feature sm_60}} |
486 | __nvvm_atom_sys_or_gen_i(ip, i); |
487 | // LP32: call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32 |
488 | // LP64: call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64 |
489 | // expected-error@+1 {{'__nvvm_atom_sys_or_gen_l' needs target feature sm_60}} |
490 | __nvvm_atom_sys_or_gen_l(&dl, l); |
491 | // CHECK: call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64 |
492 | // expected-error@+1 {{'__nvvm_atom_sys_or_gen_ll' needs target feature sm_60}} |
493 | __nvvm_atom_sys_or_gen_ll(&sll, ll); |
494 | |
495 | // CHECK: call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32 |
496 | // expected-error@+1 {{'__nvvm_atom_cta_xor_gen_i' needs target feature sm_60}} |
497 | __nvvm_atom_cta_xor_gen_i(ip, i); |
498 | // LP32: call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32 |
499 | // LP64: call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64 |
500 | // expected-error@+1 {{'__nvvm_atom_cta_xor_gen_l' needs target feature sm_60}} |
501 | __nvvm_atom_cta_xor_gen_l(&dl, l); |
502 | // CHECK: call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64 |
503 | // expected-error@+1 {{'__nvvm_atom_cta_xor_gen_ll' needs target feature sm_60}} |
504 | __nvvm_atom_cta_xor_gen_ll(&sll, ll); |
505 | |
506 | // CHECK: call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32 |
507 | // expected-error@+1 {{'__nvvm_atom_sys_xor_gen_i' needs target feature sm_60}} |
508 | __nvvm_atom_sys_xor_gen_i(ip, i); |
509 | // LP32: call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32 |
510 | // LP64: call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64 |
511 | // expected-error@+1 {{'__nvvm_atom_sys_xor_gen_l' needs target feature sm_60}} |
512 | __nvvm_atom_sys_xor_gen_l(&dl, l); |
513 | // CHECK: call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64 |
514 | // expected-error@+1 {{'__nvvm_atom_sys_xor_gen_ll' needs target feature sm_60}} |
515 | __nvvm_atom_sys_xor_gen_ll(&sll, ll); |
516 | |
517 | // CHECK: call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32 |
518 | // expected-error@+1 {{'__nvvm_atom_cta_cas_gen_i' needs target feature sm_60}} |
519 | __nvvm_atom_cta_cas_gen_i(ip, i, 0); |
520 | // LP32: call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32 |
521 | // LP64: call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64 |
522 | // expected-error@+1 {{'__nvvm_atom_cta_cas_gen_l' needs target feature sm_60}} |
523 | __nvvm_atom_cta_cas_gen_l(&dl, l, 0); |
524 | // CHECK: call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64 |
525 | // expected-error@+1 {{'__nvvm_atom_cta_cas_gen_ll' needs target feature sm_60}} |
526 | __nvvm_atom_cta_cas_gen_ll(&sll, ll, 0); |
527 | |
528 | // CHECK: call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32 |
529 | // expected-error@+1 {{'__nvvm_atom_sys_cas_gen_i' needs target feature sm_60}} |
530 | __nvvm_atom_sys_cas_gen_i(ip, i, 0); |
531 | // LP32: call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32 |
532 | // LP64: call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64 |
533 | // expected-error@+1 {{'__nvvm_atom_sys_cas_gen_l' needs target feature sm_60}} |
534 | __nvvm_atom_sys_cas_gen_l(&dl, l, 0); |
535 | // CHECK: call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64 |
536 | // expected-error@+1 {{'__nvvm_atom_sys_cas_gen_ll' needs target feature sm_60}} |
537 | __nvvm_atom_sys_cas_gen_ll(&sll, ll, 0); |
538 | #endif |
539 | |
540 | // CHECK: ret |
541 | } |
542 | |
543 | // CHECK-LABEL: nvvm_ldg |
544 | __device__ void nvvm_ldg(const void *p) { |
545 | // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0i8(i8* {{%[0-9]+}}, i32 1) |
546 | // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0i8(i8* {{%[0-9]+}}, i32 1) |
547 | __nvvm_ldg_c((const char *)p); |
548 | __nvvm_ldg_uc((const unsigned char *)p); |
549 | |
550 | // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0i16(i16* {{%[0-9]+}}, i32 2) |
551 | // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0i16(i16* {{%[0-9]+}}, i32 2) |
552 | __nvvm_ldg_s((const short *)p); |
553 | __nvvm_ldg_us((const unsigned short *)p); |
554 | |
555 | // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4) |
556 | // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4) |
557 | __nvvm_ldg_i((const int *)p); |
558 | __nvvm_ldg_ui((const unsigned int *)p); |
559 | |
560 | // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4) |
561 | // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4) |
562 | // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0i64(i64* {{%[0-9]+}}, i32 8) |
563 | // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0i64(i64* {{%[0-9]+}}, i32 8) |
564 | __nvvm_ldg_l((const long *)p); |
565 | __nvvm_ldg_ul((const unsigned long *)p); |
566 | |
567 | // CHECK: call float @llvm.nvvm.ldg.global.f.f32.p0f32(float* {{%[0-9]+}}, i32 4) |
568 | __nvvm_ldg_f((const float *)p); |
569 | // CHECK: call double @llvm.nvvm.ldg.global.f.f64.p0f64(double* {{%[0-9]+}}, i32 8) |
570 | __nvvm_ldg_d((const double *)p); |
571 | |
572 | // In practice, the pointers we pass to __ldg will be aligned as appropriate |
573 | // for the CUDA <type>N vector types (e.g. short4), which are not the same as |
574 | // the LLVM vector types. However, each LLVM vector type has an alignment |
575 | // less than or equal to its corresponding CUDA type, so we're OK. |
576 | // |
577 | // PTX Interoperability section 2.2: "For a vector with an even number of |
578 | // elements, its alignment is set to number of elements times the alignment of |
579 | // its member: n*alignof(t)." |
580 | |
581 | // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0v2i8(<2 x i8>* {{%[0-9]+}}, i32 2) |
582 | // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0v2i8(<2 x i8>* {{%[0-9]+}}, i32 2) |
583 | typedef char char2 __attribute__((ext_vector_type(2))); |
584 | typedef unsigned char uchar2 __attribute__((ext_vector_type(2))); |
585 | __nvvm_ldg_c2((const char2 *)p); |
586 | __nvvm_ldg_uc2((const uchar2 *)p); |
587 | |
588 | // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0v4i8(<4 x i8>* {{%[0-9]+}}, i32 4) |
589 | // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0v4i8(<4 x i8>* {{%[0-9]+}}, i32 4) |
590 | typedef char char4 __attribute__((ext_vector_type(4))); |
591 | typedef unsigned char uchar4 __attribute__((ext_vector_type(4))); |
592 | __nvvm_ldg_c4((const char4 *)p); |
593 | __nvvm_ldg_uc4((const uchar4 *)p); |
594 | |
595 | // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0v2i16(<2 x i16>* {{%[0-9]+}}, i32 4) |
596 | // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0v2i16(<2 x i16>* {{%[0-9]+}}, i32 4) |
597 | typedef short short2 __attribute__((ext_vector_type(2))); |
598 | typedef unsigned short ushort2 __attribute__((ext_vector_type(2))); |
599 | __nvvm_ldg_s2((const short2 *)p); |
600 | __nvvm_ldg_us2((const ushort2 *)p); |
601 | |
602 | // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0v4i16(<4 x i16>* {{%[0-9]+}}, i32 8) |
603 | // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0v4i16(<4 x i16>* {{%[0-9]+}}, i32 8) |
604 | typedef short short4 __attribute__((ext_vector_type(4))); |
605 | typedef unsigned short ushort4 __attribute__((ext_vector_type(4))); |
606 | __nvvm_ldg_s4((const short4 *)p); |
607 | __nvvm_ldg_us4((const ushort4 *)p); |
608 | |
609 | // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0v2i32(<2 x i32>* {{%[0-9]+}}, i32 8) |
610 | // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0v2i32(<2 x i32>* {{%[0-9]+}}, i32 8) |
611 | typedef int int2 __attribute__((ext_vector_type(2))); |
612 | typedef unsigned int uint2 __attribute__((ext_vector_type(2))); |
613 | __nvvm_ldg_i2((const int2 *)p); |
614 | __nvvm_ldg_ui2((const uint2 *)p); |
615 | |
616 | // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0v4i32(<4 x i32>* {{%[0-9]+}}, i32 16) |
617 | // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0v4i32(<4 x i32>* {{%[0-9]+}}, i32 16) |
618 | typedef int int4 __attribute__((ext_vector_type(4))); |
619 | typedef unsigned int uint4 __attribute__((ext_vector_type(4))); |
620 | __nvvm_ldg_i4((const int4 *)p); |
621 | __nvvm_ldg_ui4((const uint4 *)p); |
622 | |
623 | // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0v2i64(<2 x i64>* {{%[0-9]+}}, i32 16) |
624 | // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0v2i64(<2 x i64>* {{%[0-9]+}}, i32 16) |
625 | typedef long long longlong2 __attribute__((ext_vector_type(2))); |
626 | typedef unsigned long long ulonglong2 __attribute__((ext_vector_type(2))); |
627 | __nvvm_ldg_ll2((const longlong2 *)p); |
628 | __nvvm_ldg_ull2((const ulonglong2 *)p); |
629 | |
630 | // CHECK: call <2 x float> @llvm.nvvm.ldg.global.f.v2f32.p0v2f32(<2 x float>* {{%[0-9]+}}, i32 8) |
631 | typedef float float2 __attribute__((ext_vector_type(2))); |
632 | __nvvm_ldg_f2((const float2 *)p); |
633 | |
634 | // CHECK: call <4 x float> @llvm.nvvm.ldg.global.f.v4f32.p0v4f32(<4 x float>* {{%[0-9]+}}, i32 16) |
635 | typedef float float4 __attribute__((ext_vector_type(4))); |
636 | __nvvm_ldg_f4((const float4 *)p); |
637 | |
638 | // CHECK: call <2 x double> @llvm.nvvm.ldg.global.f.v2f64.p0v2f64(<2 x double>* {{%[0-9]+}}, i32 16) |
639 | typedef double double2 __attribute__((ext_vector_type(2))); |
640 | __nvvm_ldg_d2((const double2 *)p); |
641 | } |
642 | |
643 | // CHECK-LABEL: nvvm_shfl |
644 | __device__ void nvvm_shfl(int i, float f, int a, int b) { |
645 | // CHECK: call i32 @llvm.nvvm.shfl.down.i32(i32 |
646 | __nvvm_shfl_down_i32(i, a, b); |
647 | // CHECK: call float @llvm.nvvm.shfl.down.f32(float |
648 | __nvvm_shfl_down_f32(f, a, b); |
649 | // CHECK: call i32 @llvm.nvvm.shfl.up.i32(i32 |
650 | __nvvm_shfl_up_i32(i, a, b); |
651 | // CHECK: call float @llvm.nvvm.shfl.up.f32(float |
652 | __nvvm_shfl_up_f32(f, a, b); |
653 | // CHECK: call i32 @llvm.nvvm.shfl.bfly.i32(i32 |
654 | __nvvm_shfl_bfly_i32(i, a, b); |
655 | // CHECK: call float @llvm.nvvm.shfl.bfly.f32(float |
656 | __nvvm_shfl_bfly_f32(f, a, b); |
657 | // CHECK: call i32 @llvm.nvvm.shfl.idx.i32(i32 |
658 | __nvvm_shfl_idx_i32(i, a, b); |
659 | // CHECK: call float @llvm.nvvm.shfl.idx.f32(float |
660 | __nvvm_shfl_idx_f32(f, a, b); |
661 | // CHECK: ret void |
662 | } |
663 | |
664 | __device__ void nvvm_vote(int pred) { |
665 | // CHECK: call i1 @llvm.nvvm.vote.all(i1 |
666 | __nvvm_vote_all(pred); |
667 | // CHECK: call i1 @llvm.nvvm.vote.any(i1 |
668 | __nvvm_vote_any(pred); |
669 | // CHECK: call i1 @llvm.nvvm.vote.uni(i1 |
670 | __nvvm_vote_uni(pred); |
671 | // CHECK: call i32 @llvm.nvvm.vote.ballot(i1 |
672 | __nvvm_vote_ballot(pred); |
673 | // CHECK: ret void |
674 | } |
675 | |