1 | // Test host code gen |
2 | // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 |
3 | // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
4 | // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 |
5 | // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 |
6 | // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
7 | // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 |
8 | |
9 | // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
10 | // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
11 | // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
12 | // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
13 | // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
14 | // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
15 | // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} |
16 | |
17 | // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 |
18 | // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
19 | // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 |
20 | // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 |
21 | // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
22 | // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 |
23 | |
24 | // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s |
25 | // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
26 | // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s |
27 | // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s |
28 | // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
29 | // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s |
30 | // SIMD-ONLY1-NOT: {{__kmpc|__tgt}} |
31 | // expected-no-diagnostics |
32 | #ifndef HEADER |
33 | #define HEADER |
34 | |
35 | |
36 | template <typename T> |
37 | T tmain() { |
38 | T *a, *b, *c; |
39 | int n = 10000; |
40 | int ch = 100; |
41 | |
42 | // no schedule clauses |
43 | #pragma omp target |
44 | #pragma omp teams |
45 | #pragma omp distribute parallel for |
46 | for (int i = 0; i < n; ++i) { |
47 | #pragma omp cancel for |
48 | a[i] = b[i] + c[i]; |
49 | } |
50 | |
51 | // dist_schedule: static no chunk |
52 | #pragma omp target |
53 | #pragma omp teams |
54 | #pragma omp distribute parallel for dist_schedule(static) |
55 | for (int i = 0; i < n; ++i) { |
56 | a[i] = b[i] + c[i]; |
57 | } |
58 | |
59 | // dist_schedule: static chunk |
60 | #pragma omp target |
61 | #pragma omp teams |
62 | #pragma omp distribute parallel for dist_schedule(static, ch) |
63 | for (int i = 0; i < n; ++i) { |
64 | a[i] = b[i] + c[i]; |
65 | } |
66 | |
67 | // schedule: static no chunk |
68 | #pragma omp target |
69 | #pragma omp teams |
70 | #pragma omp distribute parallel for schedule(static) |
71 | for (int i = 0; i < n; ++i) { |
72 | a[i] = b[i] + c[i]; |
73 | } |
74 | |
75 | // schedule: static chunk |
76 | #pragma omp target |
77 | #pragma omp teams |
78 | #pragma omp distribute parallel for schedule(static, ch) |
79 | for (int i = 0; i < n; ++i) { |
80 | a[i] = b[i] + c[i]; |
81 | } |
82 | |
83 | // schedule: dynamic no chunk |
84 | #pragma omp target |
85 | #pragma omp teams |
86 | #pragma omp distribute parallel for schedule(dynamic) |
87 | for (int i = 0; i < n; ++i) { |
88 | a[i] = b[i] + c[i]; |
89 | } |
90 | |
91 | // schedule: dynamic chunk |
92 | #pragma omp target |
93 | #pragma omp teams |
94 | #pragma omp distribute parallel for schedule(dynamic, ch) |
95 | for (int i = 0; i < n; ++i) { |
96 | a[i] = b[i] + c[i]; |
97 | } |
98 | |
99 | return T(); |
100 | } |
101 | |
102 | int main() { |
103 | double *a, *b, *c; |
104 | int n = 10000; |
105 | int ch = 100; |
106 | |
107 | #ifdef LAMBDA |
108 | // LAMBDA-LABEL: @main |
109 | // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( |
110 | [&]() { |
111 | // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( |
112 | |
113 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
114 | // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]]( |
115 | |
116 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
117 | // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]]( |
118 | |
119 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
120 | // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]]( |
121 | |
122 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
123 | // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]]( |
124 | |
125 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
126 | // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]]( |
127 | |
128 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
129 | // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]]( |
130 | |
131 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
132 | // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]]( |
133 | |
134 | // no schedule clauses |
135 | #pragma omp target |
136 | #pragma omp teams |
137 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]]( |
138 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) |
139 | |
140 | #pragma omp distribute parallel for |
141 | for (int i = 0; i < n; ++i) { |
142 | a[i] = b[i] + c[i]; |
143 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]]( |
144 | // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca |
145 | // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
146 | // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
147 | // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca |
148 | |
149 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
150 | |
151 | // check EUB for distribute |
152 | // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
153 | // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, |
154 | // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
155 | // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
156 | // LAMBDA-DAG: [[EUB_TRUE]]: |
157 | // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, |
158 | // LAMBDA: br label %[[EUB_END:.+]] |
159 | // LAMBDA-DAG: [[EUB_FALSE]]: |
160 | // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
161 | // LAMBDA: br label %[[EUB_END]] |
162 | // LAMBDA-DAG: [[EUB_END]]: |
163 | // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
164 | // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
165 | |
166 | // initialize omp.iv |
167 | // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
168 | // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
169 | // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] |
170 | |
171 | // check exit condition |
172 | // LAMBDA: [[OMP_JUMP_BACK]]: |
173 | // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
174 | // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], |
175 | // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] |
176 | // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] |
177 | |
178 | // check that PrevLB and PrevUB are passed to the 'for' |
179 | // LAMBDA: [[DIST_BODY]]: |
180 | // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
181 | // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to |
182 | // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
183 | // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to |
184 | // check that distlb and distub are properly passed to fork_call |
185 | // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
186 | // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
187 | // LAMBDA: br label %[[DIST_INC:.+]] |
188 | |
189 | // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch |
190 | // LAMBDA: [[DIST_INC]]: |
191 | // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
192 | // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
193 | // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] |
194 | // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
195 | // LAMBDA: br label %[[OMP_JUMP_BACK]] |
196 | |
197 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
198 | // LAMBDA: ret |
199 | |
200 | // implementation of 'parallel for' |
201 | // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
202 | |
203 | // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
204 | // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
205 | // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
206 | |
207 | // initialize lb and ub to PrevLB and PrevUB |
208 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
209 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
210 | // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
211 | // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
212 | // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
213 | // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
214 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
215 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
216 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
217 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
218 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
219 | |
220 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
221 | // In this case we use EUB |
222 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
223 | // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
224 | // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
225 | // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
226 | // LAMBDA: [[PF_EUB_TRUE]]: |
227 | // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
228 | // LAMBDA: br label %[[PF_EUB_END:.+]] |
229 | // LAMBDA-DAG: [[PF_EUB_FALSE]]: |
230 | // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
231 | // LAMBDA: br label %[[PF_EUB_END]] |
232 | // LAMBDA-DAG: [[PF_EUB_END]]: |
233 | // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
234 | // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
235 | |
236 | // initialize omp.iv |
237 | // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
238 | // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
239 | // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] |
240 | |
241 | // check exit condition |
242 | // LAMBDA: [[OMP_PF_JUMP_BACK]]: |
243 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
244 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
245 | // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
246 | // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
247 | |
248 | // check that PrevLB and PrevUB are passed to the 'for' |
249 | // LAMBDA: [[PF_BODY]]: |
250 | // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
251 | // LAMBDA: br label {{.+}} |
252 | |
253 | // check stride 1 for 'for' in 'distribute parallel for' |
254 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
255 | // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
256 | // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
257 | // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] |
258 | |
259 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
260 | // LAMBDA: ret |
261 | |
262 | [&]() { |
263 | a[i] = b[i] + c[i]; |
264 | }(); |
265 | } |
266 | |
267 | // dist_schedule: static no chunk (same sa default - no dist_schedule) |
268 | #pragma omp target |
269 | #pragma omp teams |
270 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]]( |
271 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) |
272 | |
273 | #pragma omp distribute parallel for dist_schedule(static) |
274 | for (int i = 0; i < n; ++i) { |
275 | a[i] = b[i] + c[i]; |
276 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]]( |
277 | // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca |
278 | // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
279 | // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
280 | // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca |
281 | |
282 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
283 | |
284 | // check EUB for distribute |
285 | // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
286 | // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, |
287 | // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
288 | // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
289 | // LAMBDA-DAG: [[EUB_TRUE]]: |
290 | // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, |
291 | // LAMBDA: br label %[[EUB_END:.+]] |
292 | // LAMBDA-DAG: [[EUB_FALSE]]: |
293 | // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
294 | // LAMBDA: br label %[[EUB_END]] |
295 | // LAMBDA-DAG: [[EUB_END]]: |
296 | // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
297 | // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
298 | |
299 | // initialize omp.iv |
300 | // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
301 | // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
302 | // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] |
303 | |
304 | // check exit condition |
305 | // LAMBDA: [[OMP_JUMP_BACK]]: |
306 | // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
307 | // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], |
308 | // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] |
309 | // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] |
310 | |
311 | // check that PrevLB and PrevUB are passed to the 'for' |
312 | // LAMBDA: [[DIST_BODY]]: |
313 | // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
314 | // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to |
315 | // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
316 | // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to |
317 | // check that distlb and distub are properly passed to fork_call |
318 | // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
319 | // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
320 | // LAMBDA: br label %[[DIST_INC:.+]] |
321 | |
322 | // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch |
323 | // LAMBDA: [[DIST_INC]]: |
324 | // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
325 | // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
326 | // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] |
327 | // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
328 | // LAMBDA: br label %[[OMP_JUMP_BACK]] |
329 | |
330 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
331 | // LAMBDA: ret |
332 | |
333 | // implementation of 'parallel for' |
334 | // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
335 | |
336 | // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
337 | // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
338 | // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
339 | |
340 | // initialize lb and ub to PrevLB and PrevUB |
341 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
342 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
343 | // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
344 | // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
345 | // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
346 | // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
347 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
348 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
349 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
350 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
351 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
352 | |
353 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
354 | // In this case we use EUB |
355 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
356 | // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
357 | // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
358 | // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
359 | // LAMBDA: [[PF_EUB_TRUE]]: |
360 | // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
361 | // LAMBDA: br label %[[PF_EUB_END:.+]] |
362 | // LAMBDA-DAG: [[PF_EUB_FALSE]]: |
363 | // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
364 | // LAMBDA: br label %[[PF_EUB_END]] |
365 | // LAMBDA-DAG: [[PF_EUB_END]]: |
366 | // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
367 | // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
368 | |
369 | // initialize omp.iv |
370 | // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
371 | // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
372 | // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] |
373 | |
374 | // check exit condition |
375 | // LAMBDA: [[OMP_PF_JUMP_BACK]]: |
376 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
377 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
378 | // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
379 | // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
380 | |
381 | // check that PrevLB and PrevUB are passed to the 'for' |
382 | // LAMBDA: [[PF_BODY]]: |
383 | // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
384 | // LAMBDA: br label {{.+}} |
385 | |
386 | // check stride 1 for 'for' in 'distribute parallel for' |
387 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
388 | // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
389 | // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
390 | // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] |
391 | |
392 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
393 | // LAMBDA: ret |
394 | [&]() { |
395 | a[i] = b[i] + c[i]; |
396 | }(); |
397 | } |
398 | |
399 | // dist_schedule: static chunk |
400 | #pragma omp target |
401 | #pragma omp teams |
402 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]]( |
403 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) |
404 | |
405 | #pragma omp distribute parallel for dist_schedule(static, ch) |
406 | for (int i = 0; i < n; ++i) { |
407 | a[i] = b[i] + c[i]; |
408 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( |
409 | // LAMBDA: alloca |
410 | // LAMBDA: alloca |
411 | // LAMBDA: alloca |
412 | // LAMBDA: alloca |
413 | // LAMBDA: alloca |
414 | // LAMBDA: alloca |
415 | // LAMBDA: alloca |
416 | // LAMBDA: [[OMP_IV:%.+]] = alloca |
417 | // LAMBDA: alloca |
418 | // LAMBDA: alloca |
419 | // LAMBDA: alloca |
420 | // LAMBDA: alloca |
421 | // LAMBDA: [[OMP_LB:%.+]] = alloca |
422 | // LAMBDA: [[OMP_UB:%.+]] = alloca |
423 | // LAMBDA: [[OMP_ST:%.+]] = alloca |
424 | |
425 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, |
426 | |
427 | // check EUB for distribute |
428 | // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
429 | // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}} |
430 | // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
431 | // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
432 | // LAMBDA-DAG: [[EUB_TRUE]]: |
433 | // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, |
434 | // LAMBDA: br label %[[EUB_END:.+]] |
435 | // LAMBDA-DAG: [[EUB_FALSE]]: |
436 | // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
437 | // LAMBDA: br label %[[EUB_END]] |
438 | // LAMBDA-DAG: [[EUB_END]]: |
439 | // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
440 | // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
441 | |
442 | // initialize omp.iv |
443 | // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
444 | // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
445 | |
446 | // check exit condition |
447 | // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
448 | // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} |
449 | // LAMBDA-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 |
450 | // LAMBDA: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] |
451 | // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] |
452 | |
453 | // check that PrevLB and PrevUB are passed to the 'for' |
454 | // LAMBDA: [[DIST_INNER_LOOP_BODY]]: |
455 | // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
456 | // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
457 | // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
458 | // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
459 | // check that distlb and distub are properly passed to fork_call |
460 | // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
461 | // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
462 | // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]] |
463 | |
464 | // check DistInc |
465 | // LAMBDA: [[DIST_INNER_LOOP_INC]]: |
466 | // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
467 | // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
468 | // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] |
469 | // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
470 | // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], |
471 | // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], |
472 | // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] |
473 | // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], |
474 | // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], |
475 | // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], |
476 | // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] |
477 | // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], |
478 | |
479 | // Update UB |
480 | // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], |
481 | // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} |
482 | // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] |
483 | // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] |
484 | // LAMBDA-DAG: [[EUB_TRUE_1]]: |
485 | // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}} |
486 | // LAMBDA: br label %[[EUB_END_1:.+]] |
487 | // LAMBDA-DAG: [[EUB_FALSE_1]]: |
488 | // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], |
489 | // LAMBDA: br label %[[EUB_END_1]] |
490 | // LAMBDA-DAG: [[EUB_END_1]]: |
491 | // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] |
492 | // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], |
493 | |
494 | // Store LB in IV |
495 | // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], |
496 | // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], |
497 | |
498 | // LAMBDA: [[DIST_INNER_LOOP_END]]: |
499 | // LAMBDA: br label %[[LOOP_EXIT:.+]] |
500 | |
501 | // loop exit |
502 | // LAMBDA: [[LOOP_EXIT]]: |
503 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
504 | // LAMBDA: ret |
505 | |
506 | // skip implementation of 'parallel for': using default scheduling and was tested above |
507 | [&]() { |
508 | a[i] = b[i] + c[i]; |
509 | }(); |
510 | } |
511 | |
512 | // schedule: static no chunk |
513 | #pragma omp target |
514 | #pragma omp teams |
515 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]]( |
516 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) |
517 | |
518 | #pragma omp distribute parallel for schedule(static) |
519 | for (int i = 0; i < n; ++i) { |
520 | a[i] = b[i] + c[i]; |
521 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]]( |
522 | // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca |
523 | // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
524 | // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
525 | // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca |
526 | |
527 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
528 | // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, |
529 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
530 | // LAMBDA: ret |
531 | |
532 | // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) |
533 | // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
534 | |
535 | // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
536 | // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
537 | // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
538 | |
539 | // initialize lb and ub to PrevLB and PrevUB |
540 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
541 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
542 | // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
543 | // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
544 | // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
545 | // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
546 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
547 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
548 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
549 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
550 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
551 | |
552 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
553 | // In this case we use EUB |
554 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
555 | // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
556 | // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
557 | // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
558 | // LAMBDA: [[PF_EUB_TRUE]]: |
559 | // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
560 | // LAMBDA: br label %[[PF_EUB_END:.+]] |
561 | // LAMBDA-DAG: [[PF_EUB_FALSE]]: |
562 | // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
563 | // LAMBDA: br label %[[PF_EUB_END]] |
564 | // LAMBDA-DAG: [[PF_EUB_END]]: |
565 | // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
566 | // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
567 | |
568 | // initialize omp.iv |
569 | // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
570 | // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
571 | // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] |
572 | |
573 | // check exit condition |
574 | // LAMBDA: [[OMP_PF_JUMP_BACK]]: |
575 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
576 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
577 | // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
578 | // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
579 | |
580 | // check that PrevLB and PrevUB are passed to the 'for' |
581 | // LAMBDA: [[PF_BODY]]: |
582 | // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
583 | // LAMBDA: br label {{.+}} |
584 | |
585 | // check stride 1 for 'for' in 'distribute parallel for' |
586 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
587 | // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
588 | // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
589 | // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] |
590 | |
591 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
592 | // LAMBDA: ret |
593 | |
594 | [&]() { |
595 | a[i] = b[i] + c[i]; |
596 | }(); |
597 | } |
598 | |
599 | // schedule: static chunk |
600 | #pragma omp target |
601 | #pragma omp teams |
602 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]]( |
603 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) |
604 | |
605 | #pragma omp distribute parallel for schedule(static, ch) |
606 | for (int i = 0; i < n; ++i) { |
607 | a[i] = b[i] + c[i]; |
608 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]]( |
609 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
610 | // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, |
611 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
612 | // LAMBDA: ret |
613 | |
614 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
615 | // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
616 | // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
617 | // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
618 | // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
619 | // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
620 | |
621 | // initialize lb and ub to PrevLB and PrevUB |
622 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
623 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
624 | // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
625 | // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
626 | // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
627 | // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
628 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
629 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
630 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
631 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
632 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
633 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
634 | |
635 | // check PrevEUB (using PrevUB instead of NumIt as upper bound) |
636 | // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: |
637 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
638 | // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to |
639 | // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
640 | // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] |
641 | // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] |
642 | // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
643 | // LAMBDA: [[PF_EUB_TRUE]]: |
644 | // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
645 | // LAMBDA: br label %[[PF_EUB_END:.+]] |
646 | // LAMBDA-DAG: [[PF_EUB_FALSE]]: |
647 | // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
648 | // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to |
649 | // LAMBDA: br label %[[PF_EUB_END]] |
650 | // LAMBDA-DAG: [[PF_EUB_END]]: |
651 | // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] |
652 | // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] |
653 | // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to |
654 | // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], |
655 | // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], |
656 | |
657 | // initialize omp.iv (IV = LB) |
658 | // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
659 | // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
660 | |
661 | // outer loop: while (IV < UB) { |
662 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
663 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
664 | // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
665 | // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
666 | |
667 | // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: |
668 | // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] |
669 | |
670 | // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]: |
671 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
672 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
673 | // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
674 | // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
675 | |
676 | // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: |
677 | // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
678 | // skip body branch |
679 | // LAMBDA: br{{.+}} |
680 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
681 | |
682 | // IV = IV + 1 and inner loop latch |
683 | // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: |
684 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
685 | // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
686 | // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
687 | // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]] |
688 | |
689 | // check NextLB and NextUB |
690 | // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: |
691 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
692 | |
693 | // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: |
694 | // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
695 | // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], |
696 | // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] |
697 | // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], |
698 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
699 | // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], |
700 | // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] |
701 | // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], |
702 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
703 | |
704 | // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: |
705 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
706 | // LAMBDA: ret |
707 | [&]() { |
708 | a[i] = b[i] + c[i]; |
709 | }(); |
710 | } |
711 | |
712 | // schedule: dynamic no chunk |
713 | #pragma omp target |
714 | #pragma omp teams |
715 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]]( |
716 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) |
717 | |
718 | #pragma omp distribute parallel for schedule(dynamic) |
719 | for (int i = 0; i < n; ++i) { |
720 | a[i] = b[i] + c[i]; |
721 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]]( |
722 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
723 | // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, |
724 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
725 | // LAMBDA: ret |
726 | |
727 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
728 | // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
729 | // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
730 | // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
731 | // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
732 | // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
733 | |
734 | // initialize lb and ub to PrevLB and PrevUB |
735 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
736 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
737 | // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
738 | // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
739 | // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
740 | // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
741 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
742 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
743 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
744 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
745 | // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
746 | // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
747 | // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) |
748 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
749 | |
750 | // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: |
751 | // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) |
752 | // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 |
753 | // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
754 | |
755 | // initialize omp.iv (IV = LB) |
756 | // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: |
757 | // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
758 | // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
759 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] |
760 | |
761 | // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: |
762 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
763 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
764 | // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
765 | // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
766 | |
767 | // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: |
768 | // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
769 | // skip body branch |
770 | // LAMBDA: br{{.+}} |
771 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
772 | |
773 | // IV = IV + 1 and inner loop latch |
774 | // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: |
775 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
776 | // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
777 | // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
778 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] |
779 | |
780 | // check NextLB and NextUB |
781 | // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: |
782 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
783 | |
784 | // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: |
785 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
786 | |
787 | // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: |
788 | // LAMBDA: ret |
789 | [&]() { |
790 | a[i] = b[i] + c[i]; |
791 | }(); |
792 | } |
793 | |
794 | // schedule: dynamic chunk |
795 | #pragma omp target |
796 | #pragma omp teams |
797 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]]( |
798 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) |
799 | |
800 | #pragma omp distribute parallel for schedule(dynamic, ch) |
801 | for (int i = 0; i < n; ++i) { |
802 | a[i] = b[i] + c[i]; |
803 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]]( |
804 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
805 | // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, |
806 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
807 | // LAMBDA: ret |
808 | |
809 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
810 | // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
811 | // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
812 | // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
813 | // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
814 | // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
815 | |
816 | // initialize lb and ub to PrevLB and PrevUB |
817 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
818 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
819 | // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
820 | // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
821 | // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
822 | // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
823 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
824 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
825 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
826 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
827 | // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
828 | // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
829 | // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) |
830 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
831 | |
832 | // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: |
833 | // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) |
834 | // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 |
835 | // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
836 | |
837 | // initialize omp.iv (IV = LB) |
838 | // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: |
839 | // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
840 | // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
841 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] |
842 | |
843 | // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: |
844 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
845 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
846 | // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
847 | // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
848 | |
849 | // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: |
850 | // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
851 | // skip body branch |
852 | // LAMBDA: br{{.+}} |
853 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
854 | |
855 | // IV = IV + 1 and inner loop latch |
856 | // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: |
857 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
858 | // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
859 | // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
860 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] |
861 | |
862 | // check NextLB and NextUB |
863 | // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: |
864 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
865 | |
866 | // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: |
867 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
868 | |
869 | // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: |
870 | // LAMBDA: ret |
871 | [&]() { |
872 | a[i] = b[i] + c[i]; |
873 | }(); |
874 | } |
875 | }(); |
876 | return 0; |
877 | #else |
878 | // CHECK-LABEL: @main |
879 | |
880 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
881 | // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( |
882 | |
883 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
884 | // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( |
885 | |
886 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
887 | // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( |
888 | |
889 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
890 | // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( |
891 | |
892 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
893 | // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( |
894 | |
895 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
896 | // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( |
897 | |
898 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
899 | // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( |
900 | |
901 | // CHECK: call{{.+}} [[TMAIN:@.+]]() |
902 | |
903 | // no schedule clauses |
904 | #pragma omp target |
905 | #pragma omp teams |
906 | // CHECK: define internal void [[OFFLOADING_FUN_1]]( |
907 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) |
908 | |
909 | #pragma omp distribute parallel for |
910 | for (int i = 0; i < n; ++i) { |
911 | a[i] = b[i] + c[i]; |
912 | // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( |
913 | // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca |
914 | // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
915 | // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
916 | // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca |
917 | |
918 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
919 | |
920 | // check EUB for distribute |
921 | // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
922 | // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, |
923 | // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
924 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
925 | // CHECK-DAG: [[EUB_TRUE]]: |
926 | // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, |
927 | // CHECK: br label %[[EUB_END:.+]] |
928 | // CHECK-DAG: [[EUB_FALSE]]: |
929 | // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
930 | // CHECK: br label %[[EUB_END]] |
931 | // CHECK-DAG: [[EUB_END]]: |
932 | // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
933 | // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
934 | |
935 | // initialize omp.iv |
936 | // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
937 | // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
938 | // CHECK: br label %[[OMP_JUMP_BACK:.+]] |
939 | |
940 | // check exit condition |
941 | // CHECK: [[OMP_JUMP_BACK]]: |
942 | // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
943 | // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], |
944 | // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] |
945 | // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] |
946 | |
947 | // check that PrevLB and PrevUB are passed to the 'for' |
948 | // CHECK: [[DIST_BODY]]: |
949 | // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
950 | // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
951 | // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
952 | // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
953 | // check that distlb and distub are properly passed to fork_call |
954 | // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
955 | // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
956 | // CHECK: br label %[[DIST_INC:.+]] |
957 | |
958 | // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch |
959 | // CHECK: [[DIST_INC]]: |
960 | // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
961 | // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
962 | // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] |
963 | // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
964 | // CHECK: br label %[[OMP_JUMP_BACK]] |
965 | |
966 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
967 | // CHECK: ret |
968 | |
969 | // implementation of 'parallel for' |
970 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
971 | |
972 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
973 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
974 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
975 | |
976 | // initialize lb and ub to PrevLB and PrevUB |
977 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
978 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
979 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
980 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
981 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
982 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
983 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
984 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
985 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
986 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
987 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
988 | |
989 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
990 | // In this case we use EUB |
991 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
992 | // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
993 | // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
994 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
995 | // CHECK: [[PF_EUB_TRUE]]: |
996 | // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
997 | // CHECK: br label %[[PF_EUB_END:.+]] |
998 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
999 | // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1000 | // CHECK: br label %[[PF_EUB_END]] |
1001 | // CHECK-DAG: [[PF_EUB_END]]: |
1002 | // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
1003 | // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
1004 | |
1005 | // initialize omp.iv |
1006 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1007 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1008 | // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] |
1009 | |
1010 | // check exit condition |
1011 | // CHECK: [[OMP_PF_JUMP_BACK]]: |
1012 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
1013 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
1014 | // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
1015 | // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
1016 | |
1017 | // check that PrevLB and PrevUB are passed to the 'for' |
1018 | // CHECK: [[PF_BODY]]: |
1019 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1020 | // CHECK: br label {{.+}} |
1021 | |
1022 | // check stride 1 for 'for' in 'distribute parallel for' |
1023 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
1024 | // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
1025 | // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
1026 | // CHECK: br label %[[OMP_PF_JUMP_BACK]] |
1027 | |
1028 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1029 | // CHECK: ret |
1030 | } |
1031 | |
1032 | // dist_schedule: static no chunk |
1033 | #pragma omp target |
1034 | #pragma omp teams |
1035 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( |
1036 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) |
1037 | |
1038 | #pragma omp distribute parallel for dist_schedule(static) |
1039 | for (int i = 0; i < n; ++i) { |
1040 | a[i] = b[i] + c[i]; |
1041 | // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( |
1042 | // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca |
1043 | // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
1044 | // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
1045 | // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca |
1046 | |
1047 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1048 | |
1049 | // check EUB for distribute |
1050 | // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
1051 | // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, |
1052 | // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
1053 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
1054 | // CHECK-DAG: [[EUB_TRUE]]: |
1055 | // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, |
1056 | // CHECK: br label %[[EUB_END:.+]] |
1057 | // CHECK-DAG: [[EUB_FALSE]]: |
1058 | // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
1059 | // CHECK: br label %[[EUB_END]] |
1060 | // CHECK-DAG: [[EUB_END]]: |
1061 | // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
1062 | // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
1063 | |
1064 | // initialize omp.iv |
1065 | // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
1066 | // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
1067 | // CHECK: br label %[[OMP_JUMP_BACK:.+]] |
1068 | |
1069 | // check exit condition |
1070 | // CHECK: [[OMP_JUMP_BACK]]: |
1071 | // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
1072 | // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], |
1073 | // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] |
1074 | // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] |
1075 | |
1076 | // check that PrevLB and PrevUB are passed to the 'for' |
1077 | // CHECK: [[DIST_BODY]]: |
1078 | // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
1079 | // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
1080 | // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
1081 | // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
1082 | // check that distlb and distub are properly passed to fork_call |
1083 | // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
1084 | // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
1085 | // CHECK: br label %[[DIST_INC:.+]] |
1086 | |
1087 | // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch |
1088 | // CHECK: [[DIST_INC]]: |
1089 | // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
1090 | // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
1091 | // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] |
1092 | // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
1093 | // CHECK: br label %[[OMP_JUMP_BACK]] |
1094 | |
1095 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1096 | // CHECK: ret |
1097 | |
1098 | // implementation of 'parallel for' |
1099 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1100 | |
1101 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1102 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1103 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1104 | |
1105 | // initialize lb and ub to PrevLB and PrevUB |
1106 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1107 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1108 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1109 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1110 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1111 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1112 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1113 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1114 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1115 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1116 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
1117 | |
1118 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
1119 | // In this case we use EUB |
1120 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1121 | // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
1122 | // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
1123 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
1124 | // CHECK: [[PF_EUB_TRUE]]: |
1125 | // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
1126 | // CHECK: br label %[[PF_EUB_END:.+]] |
1127 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
1128 | // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1129 | // CHECK: br label %[[PF_EUB_END]] |
1130 | // CHECK-DAG: [[PF_EUB_END]]: |
1131 | // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
1132 | // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
1133 | |
1134 | // initialize omp.iv |
1135 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1136 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1137 | // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] |
1138 | |
1139 | // check exit condition |
1140 | // CHECK: [[OMP_PF_JUMP_BACK]]: |
1141 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
1142 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
1143 | // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
1144 | // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
1145 | |
1146 | // check that PrevLB and PrevUB are passed to the 'for' |
1147 | // CHECK: [[PF_BODY]]: |
1148 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1149 | // CHECK: br label {{.+}} |
1150 | |
1151 | // check stride 1 for 'for' in 'distribute parallel for' |
1152 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
1153 | // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
1154 | // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
1155 | // CHECK: br label %[[OMP_PF_JUMP_BACK]] |
1156 | |
1157 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1158 | // CHECK: ret |
1159 | } |
1160 | |
1161 | // dist_schedule: static chunk |
1162 | #pragma omp target |
1163 | #pragma omp teams |
1164 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( |
1165 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) |
1166 | |
1167 | #pragma omp distribute parallel for dist_schedule(static, ch) |
1168 | for (int i = 0; i < n; ++i) { |
1169 | a[i] = b[i] + c[i]; |
1170 | // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( |
1171 | // CHECK: alloca |
1172 | // CHECK: alloca |
1173 | // CHECK: alloca |
1174 | // CHECK: alloca |
1175 | // CHECK: alloca |
1176 | // CHECK: alloca |
1177 | // CHECK: alloca |
1178 | // CHECK: [[OMP_IV:%.+]] = alloca |
1179 | // CHECK: alloca |
1180 | // CHECK: alloca |
1181 | // CHECK: alloca |
1182 | // CHECK: alloca |
1183 | // CHECK: [[OMP_LB:%.+]] = alloca |
1184 | // CHECK: [[OMP_UB:%.+]] = alloca |
1185 | // CHECK: [[OMP_ST:%.+]] = alloca |
1186 | |
1187 | // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' |
1188 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, |
1189 | |
1190 | // check EUB for distribute |
1191 | // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
1192 | // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} |
1193 | // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
1194 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
1195 | // CHECK-DAG: [[EUB_TRUE]]: |
1196 | // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, |
1197 | // CHECK: br label %[[EUB_END:.+]] |
1198 | // CHECK-DAG: [[EUB_FALSE]]: |
1199 | // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
1200 | // CHECK: br label %[[EUB_END]] |
1201 | // CHECK-DAG: [[EUB_END]]: |
1202 | // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
1203 | // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
1204 | |
1205 | // initialize omp.iv |
1206 | // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
1207 | // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
1208 | |
1209 | // check exit condition |
1210 | // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
1211 | // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} |
1212 | // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 |
1213 | // CHECK: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] |
1214 | // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] |
1215 | |
1216 | // check that PrevLB and PrevUB are passed to the 'for' |
1217 | // CHECK: [[DIST_INNER_LOOP_BODY]]: |
1218 | // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
1219 | // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
1220 | // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
1221 | // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
1222 | // check that distlb and distub are properly passed to fork_call |
1223 | // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
1224 | // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
1225 | // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] |
1226 | |
1227 | // check DistInc |
1228 | // CHECK: [[DIST_INNER_LOOP_INC]]: |
1229 | // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
1230 | // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
1231 | // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] |
1232 | // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
1233 | // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], |
1234 | // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], |
1235 | // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] |
1236 | // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], |
1237 | // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], |
1238 | // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], |
1239 | // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] |
1240 | // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], |
1241 | |
1242 | // Update UB |
1243 | // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], |
1244 | // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} |
1245 | // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] |
1246 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] |
1247 | // CHECK-DAG: [[EUB_TRUE_1]]: |
1248 | // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} |
1249 | // CHECK: br label %[[EUB_END_1:.+]] |
1250 | // CHECK-DAG: [[EUB_FALSE_1]]: |
1251 | // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], |
1252 | // CHECK: br label %[[EUB_END_1]] |
1253 | // CHECK-DAG: [[EUB_END_1]]: |
1254 | // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] |
1255 | // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], |
1256 | |
1257 | // Store LB in IV |
1258 | // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], |
1259 | // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], |
1260 | |
1261 | // CHECK: [[DIST_INNER_LOOP_END]]: |
1262 | // CHECK: br label %[[LOOP_EXIT:.+]] |
1263 | |
1264 | // loop exit |
1265 | // CHECK: [[LOOP_EXIT]]: |
1266 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1267 | // CHECK: ret |
1268 | |
1269 | // skip implementation of 'parallel for': using default scheduling and was tested above |
1270 | } |
1271 | |
1272 | // schedule: static no chunk |
1273 | #pragma omp target |
1274 | #pragma omp teams |
1275 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( |
1276 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) |
1277 | |
1278 | #pragma omp distribute parallel for schedule(static) |
1279 | for (int i = 0; i < n; ++i) { |
1280 | a[i] = b[i] + c[i]; |
1281 | // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( |
1282 | // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca |
1283 | // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
1284 | // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
1285 | // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca |
1286 | |
1287 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1288 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, |
1289 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
1290 | // CHECK: ret |
1291 | |
1292 | // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) |
1293 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1294 | |
1295 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1296 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1297 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1298 | |
1299 | // initialize lb and ub to PrevLB and PrevUB |
1300 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1301 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1302 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1303 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1304 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1305 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1306 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1307 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1308 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1309 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1310 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
1311 | |
1312 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
1313 | // In this case we use EUB |
1314 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1315 | // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
1316 | // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
1317 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
1318 | // CHECK: [[PF_EUB_TRUE]]: |
1319 | // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
1320 | // CHECK: br label %[[PF_EUB_END:.+]] |
1321 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
1322 | // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1323 | // CHECK: br label %[[PF_EUB_END]] |
1324 | // CHECK-DAG: [[PF_EUB_END]]: |
1325 | // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
1326 | // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
1327 | |
1328 | // initialize omp.iv |
1329 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1330 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1331 | // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] |
1332 | |
1333 | // check exit condition |
1334 | // CHECK: [[OMP_PF_JUMP_BACK]]: |
1335 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
1336 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
1337 | // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
1338 | // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
1339 | |
1340 | // check that PrevLB and PrevUB are passed to the 'for' |
1341 | // CHECK: [[PF_BODY]]: |
1342 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1343 | // CHECK: br label {{.+}} |
1344 | |
1345 | // check stride 1 for 'for' in 'distribute parallel for' |
1346 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
1347 | // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
1348 | // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
1349 | // CHECK: br label %[[OMP_PF_JUMP_BACK]] |
1350 | |
1351 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1352 | // CHECK: ret |
1353 | } |
1354 | |
1355 | // schedule: static chunk |
1356 | #pragma omp target |
1357 | #pragma omp teams |
1358 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( |
1359 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) |
1360 | |
1361 | #pragma omp distribute parallel for schedule(static, ch) |
1362 | for (int i = 0; i < n; ++i) { |
1363 | a[i] = b[i] + c[i]; |
1364 | // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( |
1365 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1366 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, |
1367 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
1368 | // CHECK: ret |
1369 | |
1370 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
1371 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1372 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1373 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1374 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1375 | // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
1376 | |
1377 | // initialize lb and ub to PrevLB and PrevUB |
1378 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1379 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1380 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1381 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1382 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1383 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1384 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1385 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1386 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1387 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1388 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
1389 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
1390 | |
1391 | // check PrevEUB (using PrevUB instead of NumIt as upper bound) |
1392 | // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: |
1393 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1394 | // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to |
1395 | // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1396 | // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] |
1397 | // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] |
1398 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
1399 | // CHECK: [[PF_EUB_TRUE]]: |
1400 | // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1401 | // CHECK: br label %[[PF_EUB_END:.+]] |
1402 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
1403 | // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1404 | // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to |
1405 | // CHECK: br label %[[PF_EUB_END]] |
1406 | // CHECK-DAG: [[PF_EUB_END]]: |
1407 | // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] |
1408 | // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] |
1409 | // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to |
1410 | // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], |
1411 | // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], |
1412 | |
1413 | // initialize omp.iv (IV = LB) |
1414 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1415 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1416 | |
1417 | // outer loop: while (IV < UB) { |
1418 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1419 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
1420 | // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
1421 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
1422 | |
1423 | // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: |
1424 | // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] |
1425 | |
1426 | // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: |
1427 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1428 | // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
1429 | // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
1430 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
1431 | |
1432 | // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: |
1433 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1434 | // skip body branch |
1435 | // CHECK: br{{.+}} |
1436 | // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
1437 | |
1438 | // IV = IV + 1 and inner loop latch |
1439 | // CHECK: [[OMP_PF_INNER_LOOP_INC]]: |
1440 | // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
1441 | // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
1442 | // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
1443 | // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] |
1444 | |
1445 | // check NextLB and NextUB |
1446 | // CHECK: [[OMP_PF_INNER_LOOP_END]]: |
1447 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
1448 | |
1449 | // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: |
1450 | // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1451 | // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], |
1452 | // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] |
1453 | // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], |
1454 | // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
1455 | // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], |
1456 | // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] |
1457 | // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], |
1458 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
1459 | |
1460 | // CHECK: [[OMP_PF_OUTER_LOOP_END]]: |
1461 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1462 | // CHECK: ret |
1463 | } |
1464 | |
1465 | // schedule: dynamic no chunk |
1466 | #pragma omp target |
1467 | #pragma omp teams |
1468 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( |
1469 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) |
1470 | |
1471 | #pragma omp distribute parallel for schedule(dynamic) |
1472 | for (int i = 0; i < n; ++i) { |
1473 | a[i] = b[i] + c[i]; |
1474 | // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( |
1475 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1476 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, |
1477 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
1478 | // CHECK: ret |
1479 | |
1480 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
1481 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1482 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1483 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1484 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1485 | // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
1486 | |
1487 | // initialize lb and ub to PrevLB and PrevUB |
1488 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1489 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1490 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1491 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1492 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1493 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1494 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1495 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1496 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1497 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1498 | // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1499 | // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
1500 | // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) |
1501 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
1502 | |
1503 | // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: |
1504 | // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) |
1505 | // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 |
1506 | // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
1507 | |
1508 | // initialize omp.iv (IV = LB) |
1509 | // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: |
1510 | // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1511 | // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1512 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] |
1513 | |
1514 | // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: |
1515 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1516 | // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
1517 | // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
1518 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
1519 | |
1520 | // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: |
1521 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1522 | // skip body branch |
1523 | // CHECK: br{{.+}} |
1524 | // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
1525 | |
1526 | // IV = IV + 1 and inner loop latch |
1527 | // CHECK: [[OMP_PF_INNER_LOOP_INC]]: |
1528 | // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
1529 | // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
1530 | // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
1531 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] |
1532 | |
1533 | // check NextLB and NextUB |
1534 | // CHECK: [[OMP_PF_INNER_LOOP_END]]: |
1535 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
1536 | |
1537 | // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: |
1538 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
1539 | |
1540 | // CHECK: [[OMP_PF_OUTER_LOOP_END]]: |
1541 | // CHECK: ret |
1542 | } |
1543 | |
1544 | // schedule: dynamic chunk |
1545 | #pragma omp target |
1546 | #pragma omp teams |
1547 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( |
1548 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) |
1549 | |
1550 | #pragma omp distribute parallel for schedule(dynamic, ch) |
1551 | for (int i = 0; i < n; ++i) { |
1552 | a[i] = b[i] + c[i]; |
1553 | // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( |
1554 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1555 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, |
1556 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
1557 | // CHECK: ret |
1558 | |
1559 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
1560 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1561 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1562 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1563 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1564 | // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
1565 | |
1566 | // initialize lb and ub to PrevLB and PrevUB |
1567 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1568 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1569 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1570 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1571 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1572 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1573 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1574 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1575 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1576 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1577 | // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1578 | // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
1579 | // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) |
1580 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
1581 | |
1582 | // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: |
1583 | // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) |
1584 | // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 |
1585 | // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
1586 | |
1587 | // initialize omp.iv (IV = LB) |
1588 | // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: |
1589 | // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1590 | // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1591 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] |
1592 | |
1593 | // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: |
1594 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1595 | // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
1596 | // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
1597 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
1598 | |
1599 | // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: |
1600 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1601 | // skip body branch |
1602 | // CHECK: br{{.+}} |
1603 | // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
1604 | |
1605 | // IV = IV + 1 and inner loop latch |
1606 | // CHECK: [[OMP_PF_INNER_LOOP_INC]]: |
1607 | // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
1608 | // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
1609 | // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
1610 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] |
1611 | |
1612 | // check NextLB and NextUB |
1613 | // CHECK: [[OMP_PF_INNER_LOOP_END]]: |
1614 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
1615 | |
1616 | // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: |
1617 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
1618 | |
1619 | // CHECK: [[OMP_PF_OUTER_LOOP_END]]: |
1620 | // CHECK: ret |
1621 | } |
1622 | |
1623 | return tmain<int>(); |
1624 | #endif |
1625 | } |
1626 | |
1627 | // check code |
1628 | // CHECK: define{{.+}} [[TMAIN]]() |
1629 | |
1630 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1631 | // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( |
1632 | |
1633 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1634 | // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( |
1635 | |
1636 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1637 | // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( |
1638 | |
1639 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1640 | // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( |
1641 | |
1642 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1643 | // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( |
1644 | |
1645 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1646 | // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( |
1647 | |
1648 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1649 | // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( |
1650 | |
1651 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]]( |
1652 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) |
1653 | |
1654 | // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( |
1655 | // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca |
1656 | // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
1657 | // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
1658 | // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca |
1659 | |
1660 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1661 | |
1662 | // check EUB for distribute |
1663 | // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
1664 | // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, |
1665 | // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
1666 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
1667 | // CHECK-DAG: [[EUB_TRUE]]: |
1668 | // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, |
1669 | // CHECK: br label %[[EUB_END:.+]] |
1670 | // CHECK-DAG: [[EUB_FALSE]]: |
1671 | // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
1672 | // CHECK: br label %[[EUB_END]] |
1673 | // CHECK-DAG: [[EUB_END]]: |
1674 | // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
1675 | // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
1676 | |
1677 | // initialize omp.iv |
1678 | // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
1679 | // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
1680 | // CHECK: br label %[[OMP_JUMP_BACK:.+]] |
1681 | |
1682 | // check exit condition |
1683 | // CHECK: [[OMP_JUMP_BACK]]: |
1684 | // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
1685 | // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], |
1686 | // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] |
1687 | // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] |
1688 | |
1689 | // check that PrevLB and PrevUB are passed to the 'for' |
1690 | // CHECK: [[DIST_BODY]]: |
1691 | // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
1692 | // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
1693 | // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
1694 | // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
1695 | // check that distlb and distub are properly passed to fork_call |
1696 | // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
1697 | // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
1698 | // CHECK: br label %[[DIST_INC:.+]] |
1699 | |
1700 | // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch |
1701 | // CHECK: [[DIST_INC]]: |
1702 | // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
1703 | // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
1704 | // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] |
1705 | // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
1706 | // CHECK: br label %[[OMP_JUMP_BACK]] |
1707 | |
1708 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1709 | // CHECK: ret |
1710 | |
1711 | // implementation of 'parallel for' |
1712 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1713 | |
1714 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1715 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1716 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1717 | |
1718 | // initialize lb and ub to PrevLB and PrevUB |
1719 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1720 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1721 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1722 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1723 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1724 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1725 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1726 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1727 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1728 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1729 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
1730 | |
1731 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
1732 | // In this case we use EUB |
1733 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1734 | // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
1735 | // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
1736 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
1737 | // CHECK: [[PF_EUB_TRUE]]: |
1738 | // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
1739 | // CHECK: br label %[[PF_EUB_END:.+]] |
1740 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
1741 | // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1742 | // CHECK: br label %[[PF_EUB_END]] |
1743 | // CHECK-DAG: [[PF_EUB_END]]: |
1744 | // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
1745 | // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
1746 | |
1747 | // initialize omp.iv |
1748 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1749 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1750 | // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] |
1751 | |
1752 | // check exit condition |
1753 | // CHECK: [[OMP_PF_JUMP_BACK]]: |
1754 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
1755 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
1756 | // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
1757 | // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
1758 | |
1759 | // check that PrevLB and PrevUB are passed to the 'for' |
1760 | // CHECK: [[PF_BODY]]: |
1761 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1762 | // CHECK: br label {{.+}} |
1763 | |
1764 | // check stride 1 for 'for' in 'distribute parallel for' |
1765 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
1766 | // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
1767 | // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
1768 | // CHECK: br label %[[OMP_PF_JUMP_BACK]] |
1769 | |
1770 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1771 | // CHECK: ret |
1772 | |
1773 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( |
1774 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) |
1775 | |
1776 | // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( |
1777 | // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca |
1778 | // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
1779 | // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
1780 | // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca |
1781 | |
1782 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1783 | |
1784 | // check EUB for distribute |
1785 | // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
1786 | // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, |
1787 | // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
1788 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
1789 | // CHECK-DAG: [[EUB_TRUE]]: |
1790 | // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, |
1791 | // CHECK: br label %[[EUB_END:.+]] |
1792 | // CHECK-DAG: [[EUB_FALSE]]: |
1793 | // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
1794 | // CHECK: br label %[[EUB_END]] |
1795 | // CHECK-DAG: [[EUB_END]]: |
1796 | // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
1797 | // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
1798 | |
1799 | // initialize omp.iv |
1800 | // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
1801 | // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
1802 | // CHECK: br label %[[OMP_JUMP_BACK:.+]] |
1803 | |
1804 | // check exit condition |
1805 | // CHECK: [[OMP_JUMP_BACK]]: |
1806 | // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
1807 | // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], |
1808 | // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] |
1809 | // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] |
1810 | |
1811 | // check that PrevLB and PrevUB are passed to the 'for' |
1812 | // CHECK: [[DIST_BODY]]: |
1813 | // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
1814 | // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
1815 | // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
1816 | // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
1817 | // check that distlb and distub are properly passed to fork_call |
1818 | // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
1819 | // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
1820 | // CHECK: br label %[[DIST_INC:.+]] |
1821 | |
1822 | // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch |
1823 | // CHECK: [[DIST_INC]]: |
1824 | // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
1825 | // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
1826 | // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] |
1827 | // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
1828 | // CHECK: br label %[[OMP_JUMP_BACK]] |
1829 | |
1830 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1831 | // CHECK: ret |
1832 | |
1833 | // implementation of 'parallel for' |
1834 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1835 | |
1836 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1837 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1838 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1839 | |
1840 | // initialize lb and ub to PrevLB and PrevUB |
1841 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1842 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1843 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1844 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1845 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1846 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1847 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1848 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1849 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1850 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1851 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
1852 | |
1853 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
1854 | // In this case we use EUB |
1855 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1856 | // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
1857 | // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
1858 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
1859 | // CHECK: [[PF_EUB_TRUE]]: |
1860 | // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
1861 | // CHECK: br label %[[PF_EUB_END:.+]] |
1862 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
1863 | // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1864 | // CHECK: br label %[[PF_EUB_END]] |
1865 | // CHECK-DAG: [[PF_EUB_END]]: |
1866 | // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
1867 | // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
1868 | |
1869 | // initialize omp.iv |
1870 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1871 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1872 | // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] |
1873 | |
1874 | // check exit condition |
1875 | // CHECK: [[OMP_PF_JUMP_BACK]]: |
1876 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
1877 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
1878 | // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
1879 | // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
1880 | |
1881 | // check that PrevLB and PrevUB are passed to the 'for' |
1882 | // CHECK: [[PF_BODY]]: |
1883 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1884 | // CHECK: br label {{.+}} |
1885 | |
1886 | // check stride 1 for 'for' in 'distribute parallel for' |
1887 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
1888 | // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
1889 | // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
1890 | // CHECK: br label %[[OMP_PF_JUMP_BACK]] |
1891 | |
1892 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1893 | // CHECK: ret |
1894 | |
1895 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( |
1896 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) |
1897 | |
1898 | // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( |
1899 | // CHECK: alloca |
1900 | // CHECK: alloca |
1901 | // CHECK: alloca |
1902 | // CHECK: alloca |
1903 | // CHECK: alloca |
1904 | // CHECK: alloca |
1905 | // CHECK: alloca |
1906 | // CHECK: [[OMP_IV:%.+]] = alloca |
1907 | // CHECK: alloca |
1908 | // CHECK: alloca |
1909 | // CHECK: alloca |
1910 | // CHECK: alloca |
1911 | // CHECK: [[OMP_LB:%.+]] = alloca |
1912 | // CHECK: [[OMP_UB:%.+]] = alloca |
1913 | // CHECK: [[OMP_ST:%.+]] = alloca |
1914 | |
1915 | // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' |
1916 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, |
1917 | |
1918 | // check EUB for distribute |
1919 | // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
1920 | // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} |
1921 | // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
1922 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
1923 | // CHECK-DAG: [[EUB_TRUE]]: |
1924 | // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, |
1925 | // CHECK: br label %[[EUB_END:.+]] |
1926 | // CHECK-DAG: [[EUB_FALSE]]: |
1927 | // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
1928 | // CHECK: br label %[[EUB_END]] |
1929 | // CHECK-DAG: [[EUB_END]]: |
1930 | // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
1931 | // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
1932 | |
1933 | // initialize omp.iv |
1934 | // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
1935 | // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
1936 | |
1937 | // check exit condition |
1938 | // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
1939 | // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} |
1940 | // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 |
1941 | // CHECK: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] |
1942 | // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] |
1943 | |
1944 | // check that PrevLB and PrevUB are passed to the 'for' |
1945 | // CHECK: [[DIST_INNER_LOOP_BODY]]: |
1946 | // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
1947 | // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
1948 | // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
1949 | // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
1950 | // check that distlb and distub are properly passed to fork_call |
1951 | // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
1952 | // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
1953 | // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] |
1954 | |
1955 | // check DistInc |
1956 | // CHECK: [[DIST_INNER_LOOP_INC]]: |
1957 | // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
1958 | // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
1959 | // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] |
1960 | // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
1961 | // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], |
1962 | // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], |
1963 | // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] |
1964 | // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], |
1965 | // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], |
1966 | // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], |
1967 | // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] |
1968 | // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], |
1969 | |
1970 | // Update UB |
1971 | // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], |
1972 | // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} |
1973 | // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] |
1974 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] |
1975 | // CHECK-DAG: [[EUB_TRUE_1]]: |
1976 | // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} |
1977 | // CHECK: br label %[[EUB_END_1:.+]] |
1978 | // CHECK-DAG: [[EUB_FALSE_1]]: |
1979 | // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], |
1980 | // CHECK: br label %[[EUB_END_1]] |
1981 | // CHECK-DAG: [[EUB_END_1]]: |
1982 | // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] |
1983 | // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], |
1984 | |
1985 | // Store LB in IV |
1986 | // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], |
1987 | // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], |
1988 | |
1989 | // CHECK: [[DIST_INNER_LOOP_END]]: |
1990 | // CHECK: br label %[[LOOP_EXIT:.+]] |
1991 | |
1992 | // loop exit |
1993 | // CHECK: [[LOOP_EXIT]]: |
1994 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1995 | // CHECK: ret |
1996 | |
1997 | // skip implementation of 'parallel for': using default scheduling and was tested above |
1998 | |
1999 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( |
2000 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) |
2001 | |
2002 | // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( |
2003 | // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca |
2004 | // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
2005 | // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
2006 | // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca |
2007 | |
2008 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
2009 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, |
2010 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
2011 | // CHECK: ret |
2012 | |
2013 | // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) |
2014 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
2015 | |
2016 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
2017 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
2018 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
2019 | |
2020 | // initialize lb and ub to PrevLB and PrevUB |
2021 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
2022 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
2023 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
2024 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
2025 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
2026 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
2027 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
2028 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
2029 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
2030 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
2031 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
2032 | |
2033 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
2034 | // In this case we use EUB |
2035 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
2036 | // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
2037 | // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
2038 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
2039 | // CHECK: [[PF_EUB_TRUE]]: |
2040 | // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
2041 | // CHECK: br label %[[PF_EUB_END:.+]] |
2042 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
2043 | // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
2044 | // CHECK: br label %[[PF_EUB_END]] |
2045 | // CHECK-DAG: [[PF_EUB_END]]: |
2046 | // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
2047 | // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
2048 | |
2049 | // initialize omp.iv |
2050 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2051 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
2052 | // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] |
2053 | |
2054 | // check exit condition |
2055 | // CHECK: [[OMP_PF_JUMP_BACK]]: |
2056 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
2057 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
2058 | // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
2059 | // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
2060 | |
2061 | // check that PrevLB and PrevUB are passed to the 'for' |
2062 | // CHECK: [[PF_BODY]]: |
2063 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2064 | // CHECK: br label {{.+}} |
2065 | |
2066 | // check stride 1 for 'for' in 'distribute parallel for' |
2067 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
2068 | // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
2069 | // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
2070 | // CHECK: br label %[[OMP_PF_JUMP_BACK]] |
2071 | |
2072 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
2073 | // CHECK: ret |
2074 | |
2075 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( |
2076 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) |
2077 | |
2078 | // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( |
2079 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
2080 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, |
2081 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
2082 | // CHECK: ret |
2083 | |
2084 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
2085 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
2086 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
2087 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
2088 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
2089 | // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
2090 | |
2091 | // initialize lb and ub to PrevLB and PrevUB |
2092 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
2093 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
2094 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
2095 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
2096 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
2097 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
2098 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
2099 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
2100 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
2101 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
2102 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
2103 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
2104 | |
2105 | // check PrevEUB (using PrevUB instead of NumIt as upper bound) |
2106 | // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: |
2107 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
2108 | // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to |
2109 | // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
2110 | // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] |
2111 | // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] |
2112 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
2113 | // CHECK: [[PF_EUB_TRUE]]: |
2114 | // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
2115 | // CHECK: br label %[[PF_EUB_END:.+]] |
2116 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
2117 | // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
2118 | // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to |
2119 | // CHECK: br label %[[PF_EUB_END]] |
2120 | // CHECK-DAG: [[PF_EUB_END]]: |
2121 | // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] |
2122 | // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] |
2123 | // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to |
2124 | // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], |
2125 | // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], |
2126 | |
2127 | // initialize omp.iv (IV = LB) |
2128 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2129 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
2130 | |
2131 | // outer loop: while (IV < UB) { |
2132 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2133 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
2134 | // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
2135 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
2136 | |
2137 | // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: |
2138 | // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] |
2139 | |
2140 | // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: |
2141 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2142 | // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
2143 | // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
2144 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
2145 | |
2146 | // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: |
2147 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2148 | // skip body branch |
2149 | // CHECK: br{{.+}} |
2150 | // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
2151 | |
2152 | // IV = IV + 1 and inner loop latch |
2153 | // CHECK: [[OMP_PF_INNER_LOOP_INC]]: |
2154 | // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
2155 | // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
2156 | // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
2157 | // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] |
2158 | |
2159 | // check NextLB and NextUB |
2160 | // CHECK: [[OMP_PF_INNER_LOOP_END]]: |
2161 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
2162 | |
2163 | // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: |
2164 | // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2165 | // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], |
2166 | // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] |
2167 | // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], |
2168 | // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
2169 | // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], |
2170 | // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] |
2171 | // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], |
2172 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
2173 | |
2174 | // CHECK: [[OMP_PF_OUTER_LOOP_END]]: |
2175 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
2176 | // CHECK: ret |
2177 | |
2178 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( |
2179 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) |
2180 | |
2181 | // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( |
2182 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
2183 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, |
2184 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
2185 | // CHECK: ret |
2186 | |
2187 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
2188 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
2189 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
2190 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
2191 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
2192 | // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
2193 | |
2194 | // initialize lb and ub to PrevLB and PrevUB |
2195 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
2196 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
2197 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
2198 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
2199 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
2200 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
2201 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
2202 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
2203 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
2204 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
2205 | // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2206 | // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
2207 | // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) |
2208 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
2209 | |
2210 | // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: |
2211 | // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) |
2212 | // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 |
2213 | // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
2214 | |
2215 | // initialize omp.iv (IV = LB) |
2216 | // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: |
2217 | // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2218 | // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
2219 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] |
2220 | |
2221 | // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: |
2222 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2223 | // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
2224 | // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
2225 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
2226 | |
2227 | // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: |
2228 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2229 | // skip body branch |
2230 | // CHECK: br{{.+}} |
2231 | // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
2232 | |
2233 | // IV = IV + 1 and inner loop latch |
2234 | // CHECK: [[OMP_PF_INNER_LOOP_INC]]: |
2235 | // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
2236 | // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
2237 | // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
2238 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] |
2239 | |
2240 | // check NextLB and NextUB |
2241 | // CHECK: [[OMP_PF_INNER_LOOP_END]]: |
2242 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
2243 | |
2244 | // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: |
2245 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
2246 | |
2247 | // CHECK: [[OMP_PF_OUTER_LOOP_END]]: |
2248 | // CHECK: ret |
2249 | |
2250 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( |
2251 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) |
2252 | |
2253 | // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( |
2254 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
2255 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, |
2256 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
2257 | // CHECK: ret |
2258 | |
2259 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
2260 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
2261 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
2262 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
2263 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
2264 | // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
2265 | |
2266 | // initialize lb and ub to PrevLB and PrevUB |
2267 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
2268 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
2269 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
2270 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
2271 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
2272 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
2273 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
2274 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
2275 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
2276 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
2277 | // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2278 | // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
2279 | // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) |
2280 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
2281 | |
2282 | // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: |
2283 | // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) |
2284 | // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 |
2285 | // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
2286 | |
2287 | // initialize omp.iv (IV = LB) |
2288 | // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: |
2289 | // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2290 | // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
2291 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] |
2292 | |
2293 | // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: |
2294 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2295 | // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
2296 | // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
2297 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
2298 | |
2299 | // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: |
2300 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2301 | // skip body branch |
2302 | // CHECK: br{{.+}} |
2303 | // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
2304 | |
2305 | // IV = IV + 1 and inner loop latch |
2306 | // CHECK: [[OMP_PF_INNER_LOOP_INC]]: |
2307 | // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
2308 | // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
2309 | // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
2310 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] |
2311 | |
2312 | // check NextLB and NextUB |
2313 | // CHECK: [[OMP_PF_INNER_LOOP_END]]: |
2314 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
2315 | |
2316 | // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: |
2317 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
2318 | |
2319 | // CHECK: [[OMP_PF_OUTER_LOOP_END]]: |
2320 | // CHECK: ret |
2321 | #endif |
2322 | |