1 | // expected-no-diagnostics |
2 | #ifndef HEADER |
3 | #define HEADER |
4 | // Test host codegen. |
5 | // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 |
6 | // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
7 | // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 |
8 | // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 |
9 | // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
10 | // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 |
11 | |
12 | // RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
13 | // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
14 | // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
15 | // RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
16 | // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
17 | // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
18 | // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} |
19 | #ifdef CK1 |
20 | |
21 | int a[100]; |
22 | |
23 | // CK1: define {{.*}}i32 @{{.+}}teams_argument_globali( |
24 | int teams_argument_global(int n){ |
25 | int te = n / 128; |
26 | int th = 128; |
27 | // discard n_addr |
28 | // CK1: alloca i32, |
29 | // CK1: [[TE:%.+]] = alloca i32, |
30 | // CK1: [[TH:%.+]] = alloca i32, |
31 | // CK1: alloca i32, |
32 | // CK1: alloca i32, |
33 | // CK1: alloca i32, |
34 | // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}}, |
35 | // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}}, |
36 | // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}}) |
37 | // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], |
38 | // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], |
39 | |
40 | // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) |
41 | |
42 | // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], |
43 | #pragma omp target |
44 | #pragma omp teams distribute parallel for simd num_teams(te), thread_limit(th) simdlen(64) |
45 | for(int i = 0; i < n; i++) { |
46 | a[i] = 0; |
47 | } |
48 | |
49 | int i; |
50 | // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) |
51 | // CK1: call void @[[OFFL2:.+]]( |
52 | #pragma omp target |
53 | {{{ |
54 | #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i) |
55 | for(i = 0; i < n; i++) { |
56 | a[i] = 0; |
57 | } |
58 | }}} |
59 | // outlined target regions |
60 | // CK1: define internal void @[[OFFL1]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], i{{32|64}} {{.+}}, {{.+}}) |
61 | // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}}, |
62 | // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}}, |
63 | // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]], |
64 | // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]], |
65 | // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to |
66 | // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to |
67 | // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]], |
68 | // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]], |
69 | // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]], |
70 | // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]], |
71 | // CK1: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]]) |
72 | // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) |
73 | // CK1: ret void |
74 | |
75 | // CK1: define internal void @[[OUTL1]]({{.+}}) |
76 | // CK1: call void @__kmpc_for_static_init_4( |
77 | // CK1: call void {{.+}} @__kmpc_fork_call( |
78 | // CK1: call void @__kmpc_for_static_fini( |
79 | // CK1: ret void |
80 | |
81 | // CK1: define internal void @[[OFFL2]]({{.+}}, {{.+}}) |
82 | // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}}) |
83 | // CK1: ret void |
84 | |
85 | // CK1: define internal void @[[OUTL2]]({{.+}}) |
86 | // CK1: call void @__kmpc_for_static_init_4( |
87 | // CK1: call void {{.+}} @__kmpc_fork_call( |
88 | // CK1: call void @__kmpc_for_static_fini( |
89 | // CK1: ret void |
90 | |
91 | return a[0]; |
92 | } |
93 | |
94 | // CK1-DAG: !{!"llvm.loop.vectorize.enable", i1 true} |
95 | // CK1-DAG: !{!"llvm.loop.vectorize.width", i32 4} |
96 | // CK1-DAG: !{!"llvm.loop.vectorize.width", i32 64} |
97 | |
98 | #endif // CK1 |
99 | |
100 | // Test host codegen. |
101 | // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 |
102 | // RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
103 | // RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 |
104 | // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 |
105 | // RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
106 | // RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 |
107 | |
108 | // RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s |
109 | // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
110 | // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s |
111 | // RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s |
112 | // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
113 | // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s |
114 | // SIMD-ONLY1-NOT: {{__kmpc|__tgt}} |
115 | #ifdef CK2 |
116 | |
117 | // CK2: define {{.*}}i32 @{{.+}}teams_local_argv( |
118 | int teams_local_arg(void) { |
119 | int n = 100; |
120 | int a[n], i; |
121 | |
122 | // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) |
123 | // CK2: call void @[[OFFL1:.+]]( |
124 | #pragma omp target |
125 | #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i) |
126 | for(i = 0; i < n; i++) { |
127 | a[i] = 0; |
128 | } |
129 | |
130 | // outlined target region |
131 | // CK2: define internal void @[[OFFL1]]({{.+}}, {{.+}}) |
132 | // CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) |
133 | // CK2: ret void |
134 | |
135 | // CK2: define internal void @[[OUTL1]]({{.+}}) |
136 | // CK2: call void @__kmpc_for_static_init_4( |
137 | // CK2: call void {{.+}} @__kmpc_fork_call( |
138 | // CK2: call void @__kmpc_for_static_fini( |
139 | // CK2: ret void |
140 | |
141 | return a[0]; |
142 | } |
143 | |
144 | // CK2-DAG: !{!"llvm.loop.vectorize.enable", i1 true} |
145 | // CK2-DAG: !{!"llvm.loop.vectorize.width", i32 4} |
146 | |
147 | #endif // CK2 |
148 | |
149 | // Test host codegen. |
150 | // RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 |
151 | // RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
152 | // RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 |
153 | // RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 |
154 | // RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
155 | // RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 |
156 | |
157 | // RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s |
158 | // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
159 | // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s |
160 | // RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s |
161 | // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
162 | // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY2 %s |
163 | // SIMD-ONLY2-NOT: {{__kmpc|__tgt}} |
164 | #ifdef CK3 |
165 | |
166 | // CK3: [[SSI:%.+]] = type { [{{.+}} x i32], float } |
167 | |
168 | template <typename T, int X, long long Y> |
169 | struct SS{ |
170 | T a[X]; |
171 | float b; |
172 | // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}( |
173 | int foo(void) { |
174 | int i; |
175 | // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{[^,]+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) |
176 | // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}}) |
177 | #pragma omp target |
178 | #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i) |
179 | for(i = 0; i < X; i++) { |
180 | a[i] = (T)0; |
181 | } |
182 | |
183 | // outlined target region |
184 | // CK3: define internal void @[[OFFL1]]([[SSI]]* {{.+}}) |
185 | // CK3: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) |
186 | // CK3: ret void |
187 | |
188 | // CK3: define internal void @[[OUTL1]]({{.+}}) |
189 | // CK3: call void @__kmpc_for_static_init_4( |
190 | // CK3: call void {{.+}} @__kmpc_fork_call( |
191 | // CK3: call void @__kmpc_for_static_fini( |
192 | // CK3: ret void |
193 | |
194 | return a[0]; |
195 | } |
196 | }; |
197 | |
198 | int teams_template_struct(void) { |
199 | SS<int, 123, 456> V; |
200 | return V.foo(); |
201 | |
202 | } |
203 | |
204 | // CK3-DAG: !{!"llvm.loop.vectorize.enable", i1 true} |
205 | // CK3-DAG: !{!"llvm.loop.vectorize.width", i32 4} |
206 | #endif // CK3 |
207 | |
208 | // Test host codegen. |
209 | // RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64 |
210 | // RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
211 | // RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64 |
212 | // RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32 |
213 | // RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
214 | // RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32 |
215 | |
216 | // RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s |
217 | // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
218 | // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s |
219 | // RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s |
220 | // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
221 | // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY3 %s |
222 | // SIMD-ONLY3-NOT: {{__kmpc|__tgt}} |
223 | |
224 | #ifdef CK4 |
225 | |
226 | template <typename T, int n> |
227 | int tmain(T argc) { |
228 | T a[n]; |
229 | int te = n/128; |
230 | int th = 128; |
231 | #pragma omp target |
232 | #pragma omp teams distribute parallel for simd num_teams(te) thread_limit(th) simdlen(64) |
233 | for(int i = 0; i < n; i++) { |
234 | a[i] = (T)0; |
235 | } |
236 | return 0; |
237 | } |
238 | |
239 | int main (int argc, char **argv) { |
240 | int n = 100; |
241 | int a[n], i; |
242 | #pragma omp target |
243 | #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i) |
244 | for(i = 0; i < n; i++) { |
245 | a[i] = 0; |
246 | } |
247 | return tmain<int, 10>(argc); |
248 | } |
249 | |
250 | // CK4: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) |
251 | // CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) |
252 | // CK4: call void @[[OFFL1:.+]]({{.+}}) |
253 | // CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) |
254 | // CK4: ret |
255 | |
256 | // CK4: define {{.*}}void @[[OFFL1]]({{.+}}) |
257 | // CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) |
258 | // CK4: ret void |
259 | |
260 | // CK4: define internal void @[[OUTL1]]({{.+}}) |
261 | // CK4: call void @__kmpc_for_static_init_4( |
262 | // CK4: call void {{.+}} @__kmpc_fork_call( |
263 | // CK4: call void @__kmpc_for_static_fini( |
264 | // CK4: ret void |
265 | |
266 | // CK4: define {{.*}}i32 @[[TMAIN]]({{.+}}) |
267 | // CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) |
268 | // CK4: call void @[[OFFLT:.+]]({{.+}}) |
269 | // CK4: ret |
270 | // CK4-NEXT: } |
271 | |
272 | // CK4: define {{.*}}void @[[OFFLT]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], {{.+}}) |
273 | // CK4: [[TE_ADDR:%.+]] = alloca i{{32|64}}, |
274 | // CK4: [[TH_ADDR:%.+]] = alloca i{{32|64}}, |
275 | // CK4: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]], |
276 | // CK4: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]], |
277 | // CK4-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to |
278 | // CK4-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to |
279 | // CK4-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]], |
280 | // CK4-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]], |
281 | // CK4-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]], |
282 | // CK4-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]], |
283 | // CK4: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]]) |
284 | // CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT:.+]] to {{.+}}, {{.+}}, {{.+}}) |
285 | // CK4: ret void |
286 | |
287 | // CK4: define internal void @[[OUTLT]]({{.+}}) |
288 | // CK4: call void @__kmpc_for_static_init_4( |
289 | // CK4: call void {{.+}} @__kmpc_fork_call( |
290 | // CK4: call void @__kmpc_for_static_fini( |
291 | // CK4: ret void |
292 | |
293 | // CK4-DAG: !{!"llvm.loop.vectorize.enable", i1 true} |
294 | // CK4-DAG: !{!"llvm.loop.vectorize.width", i32 4} |
295 | // CK4-DAG: !{!"llvm.loop.vectorize.width", i32 64} |
296 | |
297 | #endif // CK4 |
298 | #endif |
299 | |
300 | |