1 | // REQUIRES: powerpc-registered-target |
2 | // REQUIRES: nvptx-registered-target |
3 | |
4 | // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix HOST |
5 | // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc |
6 | // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefixes=CLASS,FUN,CHECK |
7 | // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t |
8 | // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=CLASS,CHECK |
9 | // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=FUN,CHECK |
10 | |
11 | // expected-no-diagnostics |
12 | #ifndef HEADER |
13 | #define HEADER |
14 | |
15 | // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4, i64 4] |
16 | // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 547, i64 547, i64 544, i64 33, i64 673, i64 1407374883554064, i64 1407374883554064, i64 1407374883554064, i64 1407374883554064, i64 1407374883554064, i64 800] |
17 | // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4] |
18 | // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 547, i64 547, i64 547, i64 544, i64 547, i64 673, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720] |
19 | // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8] |
20 | // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953422096] |
21 | // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8] |
22 | // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953422096] |
23 | // HOST-DAG: = private unnamed_addr constant [2 x i64] [i64 8, i64 8] |
24 | // HOST-DAG: = private unnamed_addr constant [2 x i64] [i64 673, i64 281474976711440] |
25 | // CHECK-DAG: [[S:%.+]] = type { i32 } |
26 | // CHECK-DAG: [[CAP1:%.+]] = type { [[S]]* } |
27 | // CHECK-DAG: [[CAP2:%.+]] = type { i32*, i32*, i32*, i32**, i32* } |
28 | |
29 | // CLASS: define internal void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l72_worker() |
30 | // CLASS: define weak void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l72([[S]]* {{%.+}}, [[CAP1]]* dereferenceable(8) {{%.+}}) |
31 | // CLASS-NOT: getelementptr |
32 | // CLASS: br i1 % |
33 | // CLASS: call void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l72_worker() |
34 | // CLASS: br label % |
35 | // CLASS: br i1 % |
36 | // CLASS: call void @__kmpc_kernel_init( |
37 | // CLASS: call void @__kmpc_data_sharing_init_stack() |
38 | // CLASS: call void @llvm.memcpy. |
39 | // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]], |
40 | // CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0 |
41 | // CLASS: store [[S]]* [[S_:%.+]], [[S]]** [[THIS_REF]], |
42 | // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]], |
43 | // CLASS: call i32 [[LAMBDA1:@.+foo.+]]([[CAP1]]* [[L]]) |
44 | // CLASS: ret void |
45 | |
46 | // CLASS: define weak void @__omp_offloading_{{.+}}foo{{.+}}_l74([[S]]* %{{.+}}, [[CAP1]]* dereferenceable(8) %{{.+}}) |
47 | // CLASS-NOT: getelementptr |
48 | // CLASS: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, [[S]]* %{{.+}}, [[CAP1]]* %{{.+}}) |
49 | // CLASS: ret void |
50 | |
51 | // CLASS: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, [[S]]* %{{.+}}, [[CAP1]]* dereferenceable(8) %{{.+}}) |
52 | // CLASS-NOT: getelementptr |
53 | // CLASS: call void @llvm.memcpy. |
54 | // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]], |
55 | // CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0 |
56 | // CLASS: store [[S]]* %{{.+}}, [[S]]** [[THIS_REF]], |
57 | // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]], |
58 | // CLASS: call i32 [[LAMBDA1]]([[CAP1]]* [[L]]) |
59 | // CLASS: ret void |
60 | |
61 | template <typename T> |
62 | int foo(const T &t) { |
63 | #pragma omp target parallel |
64 | t(); |
65 | return 0; |
66 | } |
67 | |
68 | struct S { |
69 | int a = 15; |
70 | int foo() { |
71 | auto &&L = [&]() { return a; }; |
72 | #pragma omp target |
73 | L(); |
74 | #pragma omp target parallel |
75 | L(); |
76 | return a + ::foo(L); |
77 | } |
78 | } s; |
79 | |
80 | // FUN: define internal void @__omp_offloading_{{.+}}_main_l134_worker() |
81 | // FUN: define weak void @__omp_offloading_{{.+}}_main_l134(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}}, i64 %{{.+}}) |
82 | // FUN-NOT: getelementptr |
83 | // FUN: br i1 % |
84 | // FUN: call void @__omp_offloading_{{.*}}_{{.*}}main{{.*}}_l134_worker() |
85 | // FUN: br label % |
86 | // FUN: br i1 % |
87 | // FUN: call void @__kmpc_kernel_init( |
88 | // FUN: call void @__kmpc_data_sharing_init_stack() |
89 | // FUN: call void @llvm.memcpy. |
90 | // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]], |
91 | // FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0 |
92 | // FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]], |
93 | // FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1 |
94 | // FUN: store i32* %{{.+}}, i32** [[B_CAP]], |
95 | // FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2 |
96 | // FUN: store i32* %{{.+}}, i32** [[C_CAP]], |
97 | // FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3 |
98 | // FUN: store i32** %{{.+}}, i32*** [[D_CAP]], |
99 | // FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4 |
100 | // FUN: store i32* %{{.+}}, i32** [[A_CAP]], |
101 | // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]], |
102 | // FUN: call i64 [[LAMBDA2:@.+main.+]]([[CAP2]]* [[L]]) |
103 | // FUN: ret void |
104 | |
105 | // FUN: define weak void @__omp_offloading_{{.+}}_main_l136(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}} i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}}) |
106 | // FUN-NOT: getelementptr |
107 | // FUN: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, [[CAP2]]* %{{.+}}) |
108 | // FUN: ret void |
109 | |
110 | // FUN: define internal void [[PARALLEL:@.+]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}}) |
111 | // FUN-NOT: getelementptr |
112 | // FUN: call void @llvm.memcpy. |
113 | // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]], |
114 | // FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0 |
115 | // FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]], |
116 | // FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1 |
117 | // FUN: store i32* %{{.+}}, i32** [[B_CAP]], |
118 | // FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2 |
119 | // FUN: store i32* %{{.+}}, i32** [[C_CAP]], |
120 | // FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3 |
121 | // FUN: store i32** %{{.+}}, i32*** [[D_CAP]], |
122 | // FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4 |
123 | // FUN: store i32* %{{.+}}, i32** [[A_CAP]], |
124 | // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]], |
125 | // FUN: call i64 [[LAMBDA2]]([[CAP2]]* [[L]]) |
126 | // FUN: ret void |
127 | |
128 | int main(int argc, char **argv) { |
129 | int &b = argc; |
130 | int &&c = 1; |
131 | int *d = &argc; |
132 | int a; |
133 | auto &&L = [&]() { return argc + b + c + reinterpret_cast<long int>(d) + a; }; |
134 | #pragma omp target firstprivate(argc) map(to : a) |
135 | L(); |
136 | #pragma omp target parallel |
137 | L(); |
138 | return argc + s.foo(); |
139 | } |
140 | |
141 | |
142 | // HOST-LABEL: @main |
143 | |
144 | // HOST-DAG: call i32 @__tgt_target(i64 -1, i8* @{{.+}}, i32 11, i8** [[BASES:%.+]], i8** [[PTRS:%.+]], |
145 | // HOST-DAG: [[BASES:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[BASE_PTR:%.+]], i32 0, i32 0 |
146 | // HOST-DAG: [[PTRS:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[PTR_PTR:%.+]], i32 0, i32 0 |
147 | // HOST-DAG: [[BASE_REF:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[BASE_PTR]], i32 0, i32 5 |
148 | // HOST-DAG: [[BASE_REF_CAST:%.+]] = bitcast i8** [[BASE_REF]] to i32*** |
149 | // HOST-DAG: store i32** [[BASE:%.+]], i32*** [[BASE_REF_CAST]], |
150 | // HOST-DAG: [[BASE]] = getelementptr inbounds [[LAMBDA:%.+]], [[LAMBDA]]* [[LAMBDA_ADDR:%.+]], i32 0, i32 0 |
151 | // HOST-DAG: [[PTR_REF:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[PTR_PTR]], i32 0, i32 5 |
152 | // HOST-DAG: [[PTR_REF_CAST:%.+]] = bitcast i8** [[PTR_REF]] to i32** |
153 | // HOST-DAG: store i32* [[PTR:%.+]], i32** [[PTR_REF_CAST]], |
154 | // HOST-DAG: [[PTR]] = load i32*, i32** [[PTR_REF:%.+]], |
155 | // HOST-DAG: [[PTR_REF]] = getelementptr inbounds [[LAMBDA]], [[LAMBDA]]* [[LAMBDA_ADDR]], i32 0, i32 0 |
156 | #endif // HEADER |
157 | |