link-device-bitcode.cu source code [clang_source_code/test/CodeGenCUDA/link-device-bitcode.cu]

1	// Test for linking with CUDA's libdevice as outlined in
2	// http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice
3	//
4	// REQUIRES: nvptx-registered-target
5	//
6	// Prepare bitcode file to link with
7	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
8	// RUN: -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll
9	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
10	// RUN: -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll
11	//
12	// Make sure function in device-code gets linked in and internalized.
13	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
14	// RUN: -mlink-builtin-bitcode %t.bc -emit-llvm \
15	// RUN: -disable-llvm-passes -o - %s \
16	// RUN: \| FileCheck %s -check-prefix CHECK-IR
17
18	// Make sure legacy flag name works
19	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
20	// RUN: -mlink-cuda-bitcode %t.bc -emit-llvm \
21	// RUN: -disable-llvm-passes -o - %s \
22	// RUN: \| FileCheck %s -check-prefix CHECK-IR
23	//
24	// Make sure we can link two bitcode files.
25	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
26	// RUN: -mlink-builtin-bitcode %t.bc -mlink-builtin-bitcode %t-2.bc \
27	// RUN: -emit-llvm -disable-llvm-passes -o - %s \
28	// RUN: \| FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2
29	//
30	// Make sure function in device-code gets linked but is not internalized
31	// without -fcuda-uses-libdevice
32	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
33	// RUN: -mlink-bitcode-file %t.bc -emit-llvm \
34	// RUN: -disable-llvm-passes -o - %s \
35	// RUN: \| FileCheck %s -check-prefix CHECK-IR-NLD
36	//
37	// Make sure NVVMReflect pass is enabled in NVPTX back-end.
38	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
39	// RUN: -mlink-builtin-bitcode %t.bc -S -o /dev/null %s \
40	// RUN: -mllvm -debug-pass=Structure 2>&1 \
41	// RUN: \| FileCheck %s -check-prefix CHECK-REFLECT
42
43	#include "Inputs/cuda.h"
44
45	__device__ float device_mul_or_add(float a, float b);
46	extern "C" __device__ double __nv_sin(double x);
47	extern "C" __device__ double __nv_exp(double x);
48
49	// CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf(
50	// CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(
51	__device__ void should_not_be_internalized(float *data) {}
52
53	// Make sure kernel call has not been internalized.
54	// CHECK-IR-LABEL: define void @_Z6kernelPfS_
55	// CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(
56	__global__ __attribute__((used)) void kernel(float out, float in) {
57	*out = device_mul_or_add(in[0], in[1]);
58	out += __nv_exp(__nv_sin(out));
59	should_not_be_internalized(out);
60	}
61
62	// Make sure device_mul_or_add() is present in IR, is internal and
63	// calls __nvvm_reflect().
64	// CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(
65	// CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff(
66	// CHECK-IR: call i32 @__nvvm_reflect
67	// CHECK-IR: ret float
68
69	// Make sure we've linked in and internalized only needed functions
70	// from the second bitcode file.
71	// CHECK-IR-2-LABEL: define internal double @__nv_sin
72	// CHECK-IR-2-LABEL: define internal double @__nv_exp
73	// CHECK-IR-2-NOT: double @__unused
74
75	// Verify that NVVMReflect pass is among the passes run by NVPTX back-end.
76	// CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1
77

Clang Project