1 | // Test host code gen |
2 | // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 |
3 | // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
4 | // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 |
5 | // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 |
6 | // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
7 | // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 |
8 | |
9 | // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
10 | // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
11 | // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
12 | // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
13 | // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
14 | // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
15 | // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} |
16 | |
17 | // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 |
18 | // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
19 | // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 |
20 | // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 |
21 | // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
22 | // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 |
23 | |
24 | // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s |
25 | // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s |
26 | // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s |
27 | // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s |
28 | // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s |
29 | // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s |
30 | // SIMD-ONLY1-NOT: {{__kmpc|__tgt}} |
31 | // expected-no-diagnostics |
32 | #ifndef HEADER |
33 | #define HEADER |
34 | |
35 | |
36 | template <typename T> |
37 | T tmain() { |
38 | T *a, *b, *c; |
39 | int n = 10000; |
40 | int ch = 100; |
41 | |
42 | // no schedule clauses |
43 | #pragma omp target |
44 | #pragma omp teams |
45 | #pragma omp distribute parallel for simd |
46 | for (int i = 0; i < n; ++i) { |
47 | a[i] = b[i] + c[i]; |
48 | } |
49 | |
50 | // dist_schedule: static no chunk |
51 | #pragma omp target |
52 | #pragma omp teams |
53 | #pragma omp distribute parallel for simd dist_schedule(static) |
54 | for (int i = 0; i < n; ++i) { |
55 | a[i] = b[i] + c[i]; |
56 | } |
57 | |
58 | // dist_schedule: static chunk |
59 | #pragma omp target |
60 | #pragma omp teams |
61 | #pragma omp distribute parallel for simd dist_schedule(static, ch) |
62 | for (int i = 0; i < n; ++i) { |
63 | a[i] = b[i] + c[i]; |
64 | } |
65 | |
66 | // schedule: static no chunk |
67 | #pragma omp target |
68 | #pragma omp teams |
69 | #pragma omp distribute parallel for simd schedule(static) |
70 | for (int i = 0; i < n; ++i) { |
71 | a[i] = b[i] + c[i]; |
72 | } |
73 | |
74 | // schedule: static chunk |
75 | #pragma omp target |
76 | #pragma omp teams |
77 | #pragma omp distribute parallel for simd schedule(static, ch) |
78 | for (int i = 0; i < n; ++i) { |
79 | a[i] = b[i] + c[i]; |
80 | } |
81 | |
82 | // schedule: dynamic no chunk |
83 | #pragma omp target |
84 | #pragma omp teams |
85 | #pragma omp distribute parallel for simd schedule(dynamic) |
86 | for (int i = 0; i < n; ++i) { |
87 | a[i] = b[i] + c[i]; |
88 | } |
89 | |
90 | // schedule: dynamic chunk |
91 | #pragma omp target |
92 | #pragma omp teams |
93 | #pragma omp distribute parallel for simd schedule(dynamic, ch) |
94 | for (int i = 0; i < n; ++i) { |
95 | a[i] = b[i] + c[i]; |
96 | } |
97 | |
98 | return T(); |
99 | } |
100 | |
101 | int main() { |
102 | double *a, *b, *c; |
103 | int n = 10000; |
104 | int ch = 100; |
105 | |
106 | #ifdef LAMBDA |
107 | // LAMBDA-LABEL: @main |
108 | // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( |
109 | [&]() { |
110 | // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( |
111 | |
112 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
113 | // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]]( |
114 | |
115 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
116 | // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]]( |
117 | |
118 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
119 | // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]]( |
120 | |
121 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
122 | // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]]( |
123 | |
124 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
125 | // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]]( |
126 | |
127 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
128 | // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]]( |
129 | |
130 | // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( |
131 | // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]]( |
132 | |
133 | // no schedule clauses |
134 | #pragma omp target |
135 | #pragma omp teams |
136 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]]( |
137 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) |
138 | |
139 | #pragma omp distribute parallel for simd |
140 | for (int i = 0; i < n; ++i) { |
141 | a[i] = b[i] + c[i]; |
142 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]]( |
143 | // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca |
144 | // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
145 | // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
146 | // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca |
147 | |
148 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
149 | |
150 | // check EUB for distribute |
151 | // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
152 | // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, |
153 | // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
154 | // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
155 | // LAMBDA-DAG: [[EUB_TRUE]]: |
156 | // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, |
157 | // LAMBDA: br label %[[EUB_END:.+]] |
158 | // LAMBDA-DAG: [[EUB_FALSE]]: |
159 | // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
160 | // LAMBDA: br label %[[EUB_END]] |
161 | // LAMBDA-DAG: [[EUB_END]]: |
162 | // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
163 | // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
164 | |
165 | // initialize omp.iv |
166 | // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
167 | // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
168 | // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] |
169 | |
170 | // check exit condition |
171 | // LAMBDA: [[OMP_JUMP_BACK]]: |
172 | // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
173 | // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], |
174 | // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] |
175 | // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] |
176 | |
177 | // check that PrevLB and PrevUB are passed to the 'for' |
178 | // LAMBDA: [[DIST_BODY]]: |
179 | // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
180 | // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to |
181 | // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
182 | // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to |
183 | // check that distlb and distub are properly passed to fork_call |
184 | // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
185 | // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
186 | // LAMBDA: br label %[[DIST_INC:.+]] |
187 | |
188 | // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch |
189 | // LAMBDA: [[DIST_INC]]: |
190 | // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
191 | // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
192 | // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] |
193 | // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
194 | // LAMBDA: br label %[[OMP_JUMP_BACK]] |
195 | |
196 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
197 | // LAMBDA: ret |
198 | |
199 | // implementation of 'parallel for' |
200 | // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
201 | |
202 | // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
203 | // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
204 | // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
205 | |
206 | // initialize lb and ub to PrevLB and PrevUB |
207 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
208 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
209 | // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
210 | // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
211 | // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
212 | // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
213 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
214 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
215 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
216 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
217 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
218 | |
219 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
220 | // In this case we use EUB |
221 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
222 | // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
223 | // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
224 | // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
225 | // LAMBDA: [[PF_EUB_TRUE]]: |
226 | // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
227 | // LAMBDA: br label %[[PF_EUB_END:.+]] |
228 | // LAMBDA-DAG: [[PF_EUB_FALSE]]: |
229 | // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
230 | // LAMBDA: br label %[[PF_EUB_END]] |
231 | // LAMBDA-DAG: [[PF_EUB_END]]: |
232 | // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
233 | // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
234 | |
235 | // initialize omp.iv |
236 | // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
237 | // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
238 | // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] |
239 | |
240 | // check exit condition |
241 | // LAMBDA: [[OMP_PF_JUMP_BACK]]: |
242 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
243 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
244 | // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
245 | // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
246 | |
247 | // check that PrevLB and PrevUB are passed to the 'for' |
248 | // LAMBDA: [[PF_BODY]]: |
249 | // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
250 | // LAMBDA: br label {{.+}} |
251 | |
252 | // check stride 1 for 'for' in 'distribute parallel for simd' |
253 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
254 | // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
255 | // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
256 | // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] |
257 | |
258 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
259 | // LAMBDA: ret |
260 | |
261 | [&]() { |
262 | a[i] = b[i] + c[i]; |
263 | }(); |
264 | } |
265 | |
266 | // dist_schedule: static no chunk (same sa default - no dist_schedule) |
267 | #pragma omp target |
268 | #pragma omp teams |
269 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]]( |
270 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) |
271 | |
272 | #pragma omp distribute parallel for simd dist_schedule(static) |
273 | for (int i = 0; i < n; ++i) { |
274 | a[i] = b[i] + c[i]; |
275 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]]( |
276 | // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca |
277 | // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
278 | // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
279 | // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca |
280 | |
281 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
282 | |
283 | // check EUB for distribute |
284 | // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
285 | // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, |
286 | // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
287 | // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
288 | // LAMBDA-DAG: [[EUB_TRUE]]: |
289 | // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, |
290 | // LAMBDA: br label %[[EUB_END:.+]] |
291 | // LAMBDA-DAG: [[EUB_FALSE]]: |
292 | // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
293 | // LAMBDA: br label %[[EUB_END]] |
294 | // LAMBDA-DAG: [[EUB_END]]: |
295 | // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
296 | // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
297 | |
298 | // initialize omp.iv |
299 | // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
300 | // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
301 | // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] |
302 | |
303 | // check exit condition |
304 | // LAMBDA: [[OMP_JUMP_BACK]]: |
305 | // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
306 | // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], |
307 | // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] |
308 | // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] |
309 | |
310 | // check that PrevLB and PrevUB are passed to the 'for' |
311 | // LAMBDA: [[DIST_BODY]]: |
312 | // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
313 | // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to |
314 | // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
315 | // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to |
316 | // check that distlb and distub are properly passed to fork_call |
317 | // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
318 | // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
319 | // LAMBDA: br label %[[DIST_INC:.+]] |
320 | |
321 | // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch |
322 | // LAMBDA: [[DIST_INC]]: |
323 | // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
324 | // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
325 | // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] |
326 | // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
327 | // LAMBDA: br label %[[OMP_JUMP_BACK]] |
328 | |
329 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
330 | // LAMBDA: ret |
331 | |
332 | // implementation of 'parallel for' |
333 | // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
334 | |
335 | // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
336 | // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
337 | // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
338 | |
339 | // initialize lb and ub to PrevLB and PrevUB |
340 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
341 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
342 | // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
343 | // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
344 | // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
345 | // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
346 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
347 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
348 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
349 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
350 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
351 | |
352 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
353 | // In this case we use EUB |
354 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
355 | // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
356 | // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
357 | // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
358 | // LAMBDA: [[PF_EUB_TRUE]]: |
359 | // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
360 | // LAMBDA: br label %[[PF_EUB_END:.+]] |
361 | // LAMBDA-DAG: [[PF_EUB_FALSE]]: |
362 | // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
363 | // LAMBDA: br label %[[PF_EUB_END]] |
364 | // LAMBDA-DAG: [[PF_EUB_END]]: |
365 | // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
366 | // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
367 | |
368 | // initialize omp.iv |
369 | // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
370 | // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
371 | // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] |
372 | |
373 | // check exit condition |
374 | // LAMBDA: [[OMP_PF_JUMP_BACK]]: |
375 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
376 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
377 | // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
378 | // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
379 | |
380 | // check that PrevLB and PrevUB are passed to the 'for' |
381 | // LAMBDA: [[PF_BODY]]: |
382 | // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
383 | // LAMBDA: br label {{.+}} |
384 | |
385 | // check stride 1 for 'for' in 'distribute parallel for simd' |
386 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
387 | // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
388 | // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
389 | // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] |
390 | |
391 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
392 | // LAMBDA: ret |
393 | [&]() { |
394 | a[i] = b[i] + c[i]; |
395 | }(); |
396 | } |
397 | |
398 | // dist_schedule: static chunk |
399 | #pragma omp target |
400 | #pragma omp teams |
401 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]]( |
402 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) |
403 | |
404 | #pragma omp distribute parallel for simd dist_schedule(static, ch) |
405 | for (int i = 0; i < n; ++i) { |
406 | a[i] = b[i] + c[i]; |
407 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( |
408 | // LAMBDA: alloca |
409 | // LAMBDA: alloca |
410 | // LAMBDA: alloca |
411 | // LAMBDA: alloca |
412 | // LAMBDA: alloca |
413 | // LAMBDA: alloca |
414 | // LAMBDA: alloca |
415 | // LAMBDA: [[OMP_IV:%.+]] = alloca |
416 | // LAMBDA: alloca |
417 | // LAMBDA: alloca |
418 | // LAMBDA: alloca |
419 | // LAMBDA: alloca |
420 | // LAMBDA: [[OMP_LB:%.+]] = alloca |
421 | // LAMBDA: [[OMP_UB:%.+]] = alloca |
422 | // LAMBDA: [[OMP_ST:%.+]] = alloca |
423 | |
424 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, |
425 | |
426 | // check EUB for distribute |
427 | // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
428 | // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}} |
429 | // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
430 | // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
431 | // LAMBDA-DAG: [[EUB_TRUE]]: |
432 | // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, |
433 | // LAMBDA: br label %[[EUB_END:.+]] |
434 | // LAMBDA-DAG: [[EUB_FALSE]]: |
435 | // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
436 | // LAMBDA: br label %[[EUB_END]] |
437 | // LAMBDA-DAG: [[EUB_END]]: |
438 | // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
439 | // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
440 | |
441 | // initialize omp.iv |
442 | // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
443 | // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
444 | |
445 | // check exit condition |
446 | // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
447 | // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} |
448 | // LAMBDA-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 |
449 | // LAMBDA: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] |
450 | // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] |
451 | |
452 | // check that PrevLB and PrevUB are passed to the 'for' |
453 | // LAMBDA: [[DIST_INNER_LOOP_BODY]]: |
454 | // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
455 | // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
456 | // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
457 | // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
458 | // check that distlb and distub are properly passed to fork_call |
459 | // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
460 | // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
461 | // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]] |
462 | |
463 | // check DistInc |
464 | // LAMBDA: [[DIST_INNER_LOOP_INC]]: |
465 | // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
466 | // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
467 | // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] |
468 | // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
469 | // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], |
470 | // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], |
471 | // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] |
472 | // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], |
473 | // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], |
474 | // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], |
475 | // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] |
476 | // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], |
477 | |
478 | // Update UB |
479 | // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], |
480 | // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} |
481 | // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] |
482 | // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] |
483 | // LAMBDA-DAG: [[EUB_TRUE_1]]: |
484 | // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}} |
485 | // LAMBDA: br label %[[EUB_END_1:.+]] |
486 | // LAMBDA-DAG: [[EUB_FALSE_1]]: |
487 | // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], |
488 | // LAMBDA: br label %[[EUB_END_1]] |
489 | // LAMBDA-DAG: [[EUB_END_1]]: |
490 | // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] |
491 | // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], |
492 | |
493 | // Store LB in IV |
494 | // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], |
495 | // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], |
496 | |
497 | // LAMBDA: [[DIST_INNER_LOOP_END]]: |
498 | // LAMBDA: br label %[[LOOP_EXIT:.+]] |
499 | |
500 | // loop exit |
501 | // LAMBDA: [[LOOP_EXIT]]: |
502 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
503 | // LAMBDA: ret |
504 | |
505 | // skip implementation of 'parallel for': using default scheduling and was tested above |
506 | [&]() { |
507 | a[i] = b[i] + c[i]; |
508 | }(); |
509 | } |
510 | |
511 | // schedule: static no chunk |
512 | #pragma omp target |
513 | #pragma omp teams |
514 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]]( |
515 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) |
516 | |
517 | #pragma omp distribute parallel for simd schedule(static) |
518 | for (int i = 0; i < n; ++i) { |
519 | a[i] = b[i] + c[i]; |
520 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]]( |
521 | // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca |
522 | // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
523 | // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
524 | // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca |
525 | |
526 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
527 | // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, |
528 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
529 | // LAMBDA: ret |
530 | |
531 | // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) |
532 | // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
533 | |
534 | // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
535 | // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
536 | // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
537 | |
538 | // initialize lb and ub to PrevLB and PrevUB |
539 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
540 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
541 | // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
542 | // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
543 | // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
544 | // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
545 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
546 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
547 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
548 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
549 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
550 | |
551 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
552 | // In this case we use EUB |
553 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
554 | // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
555 | // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
556 | // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
557 | // LAMBDA: [[PF_EUB_TRUE]]: |
558 | // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
559 | // LAMBDA: br label %[[PF_EUB_END:.+]] |
560 | // LAMBDA-DAG: [[PF_EUB_FALSE]]: |
561 | // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
562 | // LAMBDA: br label %[[PF_EUB_END]] |
563 | // LAMBDA-DAG: [[PF_EUB_END]]: |
564 | // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
565 | // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
566 | |
567 | // initialize omp.iv |
568 | // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
569 | // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
570 | // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] |
571 | |
572 | // check exit condition |
573 | // LAMBDA: [[OMP_PF_JUMP_BACK]]: |
574 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
575 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
576 | // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
577 | // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
578 | |
579 | // check that PrevLB and PrevUB are passed to the 'for' |
580 | // LAMBDA: [[PF_BODY]]: |
581 | // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
582 | // LAMBDA: br label {{.+}} |
583 | |
584 | // check stride 1 for 'for' in 'distribute parallel for simd' |
585 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
586 | // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
587 | // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
588 | // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] |
589 | |
590 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
591 | // LAMBDA: ret |
592 | |
593 | [&]() { |
594 | a[i] = b[i] + c[i]; |
595 | }(); |
596 | } |
597 | |
598 | // schedule: static chunk |
599 | #pragma omp target |
600 | #pragma omp teams |
601 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]]( |
602 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) |
603 | |
604 | #pragma omp distribute parallel for simd schedule(static, ch) |
605 | for (int i = 0; i < n; ++i) { |
606 | a[i] = b[i] + c[i]; |
607 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]]( |
608 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
609 | // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, |
610 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
611 | // LAMBDA: ret |
612 | |
613 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
614 | // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
615 | // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
616 | // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
617 | // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
618 | // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
619 | |
620 | // initialize lb and ub to PrevLB and PrevUB |
621 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
622 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
623 | // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
624 | // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
625 | // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
626 | // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
627 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
628 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
629 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
630 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
631 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
632 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
633 | |
634 | // check PrevEUB (using PrevUB instead of NumIt as upper bound) |
635 | // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: |
636 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
637 | // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to |
638 | // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
639 | // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] |
640 | // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] |
641 | // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
642 | // LAMBDA: [[PF_EUB_TRUE]]: |
643 | // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
644 | // LAMBDA: br label %[[PF_EUB_END:.+]] |
645 | // LAMBDA-DAG: [[PF_EUB_FALSE]]: |
646 | // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
647 | // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to |
648 | // LAMBDA: br label %[[PF_EUB_END]] |
649 | // LAMBDA-DAG: [[PF_EUB_END]]: |
650 | // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] |
651 | // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] |
652 | // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to |
653 | // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], |
654 | // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], |
655 | |
656 | // initialize omp.iv (IV = LB) |
657 | // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
658 | // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
659 | |
660 | // outer loop: while (IV < UB) { |
661 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
662 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
663 | // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
664 | // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
665 | |
666 | // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: |
667 | // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] |
668 | |
669 | // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]: |
670 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
671 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
672 | // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
673 | // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
674 | |
675 | // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: |
676 | // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
677 | // skip body branch |
678 | // LAMBDA: br{{.+}} |
679 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
680 | |
681 | // IV = IV + 1 and inner loop latch |
682 | // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: |
683 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
684 | // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
685 | // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
686 | // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]] |
687 | |
688 | // check NextLB and NextUB |
689 | // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: |
690 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
691 | |
692 | // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: |
693 | // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
694 | // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], |
695 | // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] |
696 | // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], |
697 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
698 | // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], |
699 | // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] |
700 | // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], |
701 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
702 | |
703 | // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: |
704 | // LAMBDA-DAG: call void @__kmpc_for_static_fini( |
705 | // LAMBDA: ret |
706 | [&]() { |
707 | a[i] = b[i] + c[i]; |
708 | }(); |
709 | } |
710 | |
711 | // schedule: dynamic no chunk |
712 | #pragma omp target |
713 | #pragma omp teams |
714 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]]( |
715 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) |
716 | |
717 | #pragma omp distribute parallel for simd schedule(dynamic) |
718 | for (int i = 0; i < n; ++i) { |
719 | a[i] = b[i] + c[i]; |
720 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]]( |
721 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
722 | // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, |
723 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
724 | // LAMBDA: ret |
725 | |
726 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
727 | // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
728 | // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
729 | // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
730 | // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
731 | // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
732 | |
733 | // initialize lb and ub to PrevLB and PrevUB |
734 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
735 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
736 | // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
737 | // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
738 | // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
739 | // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
740 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
741 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
742 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
743 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
744 | // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
745 | // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
746 | // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) |
747 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
748 | |
749 | // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: |
750 | // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) |
751 | // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 |
752 | // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
753 | |
754 | // initialize omp.iv (IV = LB) |
755 | // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: |
756 | // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
757 | // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
758 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] |
759 | |
760 | // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: |
761 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
762 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
763 | // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
764 | // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
765 | |
766 | // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: |
767 | // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
768 | // skip body branch |
769 | // LAMBDA: br{{.+}} |
770 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
771 | |
772 | // IV = IV + 1 and inner loop latch |
773 | // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: |
774 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
775 | // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
776 | // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
777 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] |
778 | |
779 | // check NextLB and NextUB |
780 | // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: |
781 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
782 | |
783 | // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: |
784 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
785 | |
786 | // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: |
787 | // LAMBDA: ret |
788 | [&]() { |
789 | a[i] = b[i] + c[i]; |
790 | }(); |
791 | } |
792 | |
793 | // schedule: dynamic chunk |
794 | #pragma omp target |
795 | #pragma omp teams |
796 | // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]]( |
797 | // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) |
798 | |
799 | #pragma omp distribute parallel for simd schedule(dynamic, ch) |
800 | for (int i = 0; i < n; ++i) { |
801 | a[i] = b[i] + c[i]; |
802 | // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]]( |
803 | // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
804 | // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, |
805 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
806 | // LAMBDA: ret |
807 | |
808 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
809 | // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
810 | // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
811 | // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
812 | // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
813 | // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
814 | |
815 | // initialize lb and ub to PrevLB and PrevUB |
816 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
817 | // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
818 | // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
819 | // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
820 | // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
821 | // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
822 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
823 | // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
824 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
825 | // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
826 | // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
827 | // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
828 | // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) |
829 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
830 | |
831 | // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: |
832 | // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) |
833 | // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 |
834 | // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
835 | |
836 | // initialize omp.iv (IV = LB) |
837 | // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: |
838 | // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
839 | // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
840 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] |
841 | |
842 | // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: |
843 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
844 | // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
845 | // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
846 | // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
847 | |
848 | // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: |
849 | // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
850 | // skip body branch |
851 | // LAMBDA: br{{.+}} |
852 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
853 | |
854 | // IV = IV + 1 and inner loop latch |
855 | // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: |
856 | // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
857 | // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
858 | // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
859 | // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] |
860 | |
861 | // check NextLB and NextUB |
862 | // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: |
863 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
864 | |
865 | // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: |
866 | // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
867 | |
868 | // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: |
869 | // LAMBDA: ret |
870 | [&]() { |
871 | a[i] = b[i] + c[i]; |
872 | }(); |
873 | } |
874 | }(); |
875 | return 0; |
876 | #else |
877 | // CHECK-LABEL: @main |
878 | |
879 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
880 | // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( |
881 | |
882 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
883 | // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( |
884 | |
885 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
886 | // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( |
887 | |
888 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
889 | // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( |
890 | |
891 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
892 | // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( |
893 | |
894 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
895 | // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( |
896 | |
897 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
898 | // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( |
899 | |
900 | // CHECK: call{{.+}} [[TMAIN:@.+]]() |
901 | |
902 | // no schedule clauses |
903 | #pragma omp target |
904 | #pragma omp teams |
905 | // CHECK: define internal void [[OFFLOADING_FUN_1]]( |
906 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) |
907 | |
908 | #pragma omp distribute parallel for simd |
909 | for (int i = 0; i < n; ++i) { |
910 | a[i] = b[i] + c[i]; |
911 | // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( |
912 | // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca |
913 | // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
914 | // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
915 | // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca |
916 | |
917 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
918 | |
919 | // check EUB for distribute |
920 | // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
921 | // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, |
922 | // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
923 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
924 | // CHECK-DAG: [[EUB_TRUE]]: |
925 | // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, |
926 | // CHECK: br label %[[EUB_END:.+]] |
927 | // CHECK-DAG: [[EUB_FALSE]]: |
928 | // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
929 | // CHECK: br label %[[EUB_END]] |
930 | // CHECK-DAG: [[EUB_END]]: |
931 | // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
932 | // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
933 | |
934 | // initialize omp.iv |
935 | // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
936 | // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
937 | // CHECK: br label %[[OMP_JUMP_BACK:.+]] |
938 | |
939 | // check exit condition |
940 | // CHECK: [[OMP_JUMP_BACK]]: |
941 | // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
942 | // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], |
943 | // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] |
944 | // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] |
945 | |
946 | // check that PrevLB and PrevUB are passed to the 'for' |
947 | // CHECK: [[DIST_BODY]]: |
948 | // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
949 | // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
950 | // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
951 | // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
952 | // check that distlb and distub are properly passed to fork_call |
953 | // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
954 | // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
955 | // CHECK: br label %[[DIST_INC:.+]] |
956 | |
957 | // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch |
958 | // CHECK: [[DIST_INC]]: |
959 | // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
960 | // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
961 | // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] |
962 | // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
963 | // CHECK: br label %[[OMP_JUMP_BACK]] |
964 | |
965 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
966 | // CHECK: ret |
967 | |
968 | // implementation of 'parallel for' |
969 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
970 | |
971 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
972 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
973 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
974 | |
975 | // initialize lb and ub to PrevLB and PrevUB |
976 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
977 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
978 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
979 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
980 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
981 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
982 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
983 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
984 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
985 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
986 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
987 | |
988 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
989 | // In this case we use EUB |
990 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
991 | // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
992 | // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
993 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
994 | // CHECK: [[PF_EUB_TRUE]]: |
995 | // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
996 | // CHECK: br label %[[PF_EUB_END:.+]] |
997 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
998 | // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
999 | // CHECK: br label %[[PF_EUB_END]] |
1000 | // CHECK-DAG: [[PF_EUB_END]]: |
1001 | // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
1002 | // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
1003 | |
1004 | // initialize omp.iv |
1005 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1006 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1007 | // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] |
1008 | |
1009 | // check exit condition |
1010 | // CHECK: [[OMP_PF_JUMP_BACK]]: |
1011 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
1012 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
1013 | // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
1014 | // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
1015 | |
1016 | // check that PrevLB and PrevUB are passed to the 'for' |
1017 | // CHECK: [[PF_BODY]]: |
1018 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1019 | // CHECK: br label {{.+}} |
1020 | |
1021 | // check stride 1 for 'for' in 'distribute parallel for simd' |
1022 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
1023 | // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
1024 | // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
1025 | // CHECK: br label %[[OMP_PF_JUMP_BACK]] |
1026 | |
1027 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1028 | // CHECK: ret |
1029 | } |
1030 | |
1031 | // dist_schedule: static no chunk |
1032 | #pragma omp target |
1033 | #pragma omp teams |
1034 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( |
1035 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) |
1036 | |
1037 | #pragma omp distribute parallel for simd dist_schedule(static) |
1038 | for (int i = 0; i < n; ++i) { |
1039 | a[i] = b[i] + c[i]; |
1040 | // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( |
1041 | // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca |
1042 | // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
1043 | // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
1044 | // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca |
1045 | |
1046 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1047 | |
1048 | // check EUB for distribute |
1049 | // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
1050 | // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, |
1051 | // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
1052 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
1053 | // CHECK-DAG: [[EUB_TRUE]]: |
1054 | // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, |
1055 | // CHECK: br label %[[EUB_END:.+]] |
1056 | // CHECK-DAG: [[EUB_FALSE]]: |
1057 | // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
1058 | // CHECK: br label %[[EUB_END]] |
1059 | // CHECK-DAG: [[EUB_END]]: |
1060 | // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
1061 | // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
1062 | |
1063 | // initialize omp.iv |
1064 | // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
1065 | // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
1066 | // CHECK: br label %[[OMP_JUMP_BACK:.+]] |
1067 | |
1068 | // check exit condition |
1069 | // CHECK: [[OMP_JUMP_BACK]]: |
1070 | // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
1071 | // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], |
1072 | // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] |
1073 | // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] |
1074 | |
1075 | // check that PrevLB and PrevUB are passed to the 'for' |
1076 | // CHECK: [[DIST_BODY]]: |
1077 | // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
1078 | // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
1079 | // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
1080 | // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
1081 | // check that distlb and distub are properly passed to fork_call |
1082 | // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
1083 | // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
1084 | // CHECK: br label %[[DIST_INC:.+]] |
1085 | |
1086 | // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch |
1087 | // CHECK: [[DIST_INC]]: |
1088 | // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
1089 | // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
1090 | // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] |
1091 | // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
1092 | // CHECK: br label %[[OMP_JUMP_BACK]] |
1093 | |
1094 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1095 | // CHECK: ret |
1096 | |
1097 | // implementation of 'parallel for' |
1098 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1099 | |
1100 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1101 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1102 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1103 | |
1104 | // initialize lb and ub to PrevLB and PrevUB |
1105 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1106 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1107 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1108 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1109 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1110 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1111 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1112 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1113 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1114 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1115 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
1116 | |
1117 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
1118 | // In this case we use EUB |
1119 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1120 | // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
1121 | // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
1122 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
1123 | // CHECK: [[PF_EUB_TRUE]]: |
1124 | // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
1125 | // CHECK: br label %[[PF_EUB_END:.+]] |
1126 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
1127 | // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1128 | // CHECK: br label %[[PF_EUB_END]] |
1129 | // CHECK-DAG: [[PF_EUB_END]]: |
1130 | // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
1131 | // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
1132 | |
1133 | // initialize omp.iv |
1134 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1135 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1136 | // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] |
1137 | |
1138 | // check exit condition |
1139 | // CHECK: [[OMP_PF_JUMP_BACK]]: |
1140 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
1141 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
1142 | // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
1143 | // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
1144 | |
1145 | // check that PrevLB and PrevUB are passed to the 'for' |
1146 | // CHECK: [[PF_BODY]]: |
1147 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1148 | // CHECK: br label {{.+}} |
1149 | |
1150 | // check stride 1 for 'for' in 'distribute parallel for simd' |
1151 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
1152 | // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
1153 | // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
1154 | // CHECK: br label %[[OMP_PF_JUMP_BACK]] |
1155 | |
1156 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1157 | // CHECK: ret |
1158 | } |
1159 | |
1160 | // dist_schedule: static chunk |
1161 | #pragma omp target |
1162 | #pragma omp teams |
1163 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( |
1164 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) |
1165 | |
1166 | #pragma omp distribute parallel for simd dist_schedule(static, ch) |
1167 | for (int i = 0; i < n; ++i) { |
1168 | a[i] = b[i] + c[i]; |
1169 | // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( |
1170 | // CHECK: alloca |
1171 | // CHECK: alloca |
1172 | // CHECK: alloca |
1173 | // CHECK: alloca |
1174 | // CHECK: alloca |
1175 | // CHECK: alloca |
1176 | // CHECK: alloca |
1177 | // CHECK: [[OMP_IV:%.+]] = alloca |
1178 | // CHECK: alloca |
1179 | // CHECK: alloca |
1180 | // CHECK: alloca |
1181 | // CHECK: alloca |
1182 | // CHECK: [[OMP_LB:%.+]] = alloca |
1183 | // CHECK: [[OMP_UB:%.+]] = alloca |
1184 | // CHECK: [[OMP_ST:%.+]] = alloca |
1185 | |
1186 | // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' |
1187 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, |
1188 | |
1189 | // check EUB for distribute |
1190 | // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
1191 | // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} |
1192 | // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
1193 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
1194 | // CHECK-DAG: [[EUB_TRUE]]: |
1195 | // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, |
1196 | // CHECK: br label %[[EUB_END:.+]] |
1197 | // CHECK-DAG: [[EUB_FALSE]]: |
1198 | // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
1199 | // CHECK: br label %[[EUB_END]] |
1200 | // CHECK-DAG: [[EUB_END]]: |
1201 | // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
1202 | // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
1203 | |
1204 | // initialize omp.iv |
1205 | // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
1206 | // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
1207 | |
1208 | // check exit condition |
1209 | // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
1210 | // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} |
1211 | // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 |
1212 | // CHECK: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] |
1213 | // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] |
1214 | |
1215 | // check that PrevLB and PrevUB are passed to the 'for' |
1216 | // CHECK: [[DIST_INNER_LOOP_BODY]]: |
1217 | // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
1218 | // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
1219 | // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
1220 | // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
1221 | // check that distlb and distub are properly passed to fork_call |
1222 | // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
1223 | // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
1224 | // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] |
1225 | |
1226 | // check DistInc |
1227 | // CHECK: [[DIST_INNER_LOOP_INC]]: |
1228 | // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
1229 | // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
1230 | // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] |
1231 | // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
1232 | // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], |
1233 | // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], |
1234 | // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] |
1235 | // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], |
1236 | // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], |
1237 | // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], |
1238 | // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] |
1239 | // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], |
1240 | |
1241 | // Update UB |
1242 | // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], |
1243 | // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} |
1244 | // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] |
1245 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] |
1246 | // CHECK-DAG: [[EUB_TRUE_1]]: |
1247 | // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} |
1248 | // CHECK: br label %[[EUB_END_1:.+]] |
1249 | // CHECK-DAG: [[EUB_FALSE_1]]: |
1250 | // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], |
1251 | // CHECK: br label %[[EUB_END_1]] |
1252 | // CHECK-DAG: [[EUB_END_1]]: |
1253 | // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] |
1254 | // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], |
1255 | |
1256 | // Store LB in IV |
1257 | // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], |
1258 | // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], |
1259 | |
1260 | // CHECK: [[DIST_INNER_LOOP_END]]: |
1261 | // CHECK: br label %[[LOOP_EXIT:.+]] |
1262 | |
1263 | // loop exit |
1264 | // CHECK: [[LOOP_EXIT]]: |
1265 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1266 | // CHECK: ret |
1267 | |
1268 | // skip implementation of 'parallel for': using default scheduling and was tested above |
1269 | } |
1270 | |
1271 | // schedule: static no chunk |
1272 | #pragma omp target |
1273 | #pragma omp teams |
1274 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( |
1275 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) |
1276 | |
1277 | #pragma omp distribute parallel for simd schedule(static) |
1278 | for (int i = 0; i < n; ++i) { |
1279 | a[i] = b[i] + c[i]; |
1280 | // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( |
1281 | // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca |
1282 | // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
1283 | // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
1284 | // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca |
1285 | |
1286 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1287 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, |
1288 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
1289 | // CHECK: ret |
1290 | |
1291 | // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) |
1292 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1293 | |
1294 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1295 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1296 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1297 | |
1298 | // initialize lb and ub to PrevLB and PrevUB |
1299 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1300 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1301 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1302 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1303 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1304 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1305 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1306 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1307 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1308 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1309 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
1310 | |
1311 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
1312 | // In this case we use EUB |
1313 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1314 | // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
1315 | // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
1316 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
1317 | // CHECK: [[PF_EUB_TRUE]]: |
1318 | // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
1319 | // CHECK: br label %[[PF_EUB_END:.+]] |
1320 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
1321 | // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1322 | // CHECK: br label %[[PF_EUB_END]] |
1323 | // CHECK-DAG: [[PF_EUB_END]]: |
1324 | // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
1325 | // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
1326 | |
1327 | // initialize omp.iv |
1328 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1329 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1330 | // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] |
1331 | |
1332 | // check exit condition |
1333 | // CHECK: [[OMP_PF_JUMP_BACK]]: |
1334 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
1335 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
1336 | // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
1337 | // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
1338 | |
1339 | // check that PrevLB and PrevUB are passed to the 'for' |
1340 | // CHECK: [[PF_BODY]]: |
1341 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1342 | // CHECK: br label {{.+}} |
1343 | |
1344 | // check stride 1 for 'for' in 'distribute parallel for simd' |
1345 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
1346 | // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
1347 | // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
1348 | // CHECK: br label %[[OMP_PF_JUMP_BACK]] |
1349 | |
1350 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1351 | // CHECK: ret |
1352 | } |
1353 | |
1354 | // schedule: static chunk |
1355 | #pragma omp target |
1356 | #pragma omp teams |
1357 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( |
1358 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) |
1359 | |
1360 | #pragma omp distribute parallel for simd schedule(static, ch) |
1361 | for (int i = 0; i < n; ++i) { |
1362 | a[i] = b[i] + c[i]; |
1363 | // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( |
1364 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1365 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, |
1366 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
1367 | // CHECK: ret |
1368 | |
1369 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
1370 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1371 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1372 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1373 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1374 | // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
1375 | |
1376 | // initialize lb and ub to PrevLB and PrevUB |
1377 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1378 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1379 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1380 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1381 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1382 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1383 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1384 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1385 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1386 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1387 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
1388 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
1389 | |
1390 | // check PrevEUB (using PrevUB instead of NumIt as upper bound) |
1391 | // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: |
1392 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1393 | // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to |
1394 | // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1395 | // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] |
1396 | // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] |
1397 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
1398 | // CHECK: [[PF_EUB_TRUE]]: |
1399 | // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1400 | // CHECK: br label %[[PF_EUB_END:.+]] |
1401 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
1402 | // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1403 | // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to |
1404 | // CHECK: br label %[[PF_EUB_END]] |
1405 | // CHECK-DAG: [[PF_EUB_END]]: |
1406 | // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] |
1407 | // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] |
1408 | // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to |
1409 | // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], |
1410 | // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], |
1411 | |
1412 | // initialize omp.iv (IV = LB) |
1413 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1414 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1415 | |
1416 | // outer loop: while (IV < UB) { |
1417 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1418 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
1419 | // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
1420 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
1421 | |
1422 | // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: |
1423 | // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] |
1424 | |
1425 | // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: |
1426 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1427 | // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
1428 | // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
1429 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
1430 | |
1431 | // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: |
1432 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1433 | // skip body branch |
1434 | // CHECK: br{{.+}} |
1435 | // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
1436 | |
1437 | // IV = IV + 1 and inner loop latch |
1438 | // CHECK: [[OMP_PF_INNER_LOOP_INC]]: |
1439 | // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
1440 | // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
1441 | // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
1442 | // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] |
1443 | |
1444 | // check NextLB and NextUB |
1445 | // CHECK: [[OMP_PF_INNER_LOOP_END]]: |
1446 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
1447 | |
1448 | // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: |
1449 | // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1450 | // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], |
1451 | // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] |
1452 | // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], |
1453 | // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
1454 | // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], |
1455 | // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] |
1456 | // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], |
1457 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
1458 | |
1459 | // CHECK: [[OMP_PF_OUTER_LOOP_END]]: |
1460 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1461 | // CHECK: ret |
1462 | } |
1463 | |
1464 | // schedule: dynamic no chunk |
1465 | #pragma omp target |
1466 | #pragma omp teams |
1467 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( |
1468 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) |
1469 | |
1470 | #pragma omp distribute parallel for simd schedule(dynamic) |
1471 | for (int i = 0; i < n; ++i) { |
1472 | a[i] = b[i] + c[i]; |
1473 | // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( |
1474 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1475 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, |
1476 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
1477 | // CHECK: ret |
1478 | |
1479 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
1480 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1481 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1482 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1483 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1484 | // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
1485 | |
1486 | // initialize lb and ub to PrevLB and PrevUB |
1487 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1488 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1489 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1490 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1491 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1492 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1493 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1494 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1495 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1496 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1497 | // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1498 | // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
1499 | // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) |
1500 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
1501 | |
1502 | // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: |
1503 | // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) |
1504 | // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 |
1505 | // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
1506 | |
1507 | // initialize omp.iv (IV = LB) |
1508 | // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: |
1509 | // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1510 | // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1511 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] |
1512 | |
1513 | // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: |
1514 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1515 | // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
1516 | // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
1517 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
1518 | |
1519 | // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: |
1520 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1521 | // skip body branch |
1522 | // CHECK: br{{.+}} |
1523 | // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
1524 | |
1525 | // IV = IV + 1 and inner loop latch |
1526 | // CHECK: [[OMP_PF_INNER_LOOP_INC]]: |
1527 | // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
1528 | // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
1529 | // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
1530 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] |
1531 | |
1532 | // check NextLB and NextUB |
1533 | // CHECK: [[OMP_PF_INNER_LOOP_END]]: |
1534 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
1535 | |
1536 | // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: |
1537 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
1538 | |
1539 | // CHECK: [[OMP_PF_OUTER_LOOP_END]]: |
1540 | // CHECK: ret |
1541 | } |
1542 | |
1543 | // schedule: dynamic chunk |
1544 | #pragma omp target |
1545 | #pragma omp teams |
1546 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( |
1547 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) |
1548 | |
1549 | #pragma omp distribute parallel for simd schedule(dynamic, ch) |
1550 | for (int i = 0; i < n; ++i) { |
1551 | a[i] = b[i] + c[i]; |
1552 | // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( |
1553 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1554 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, |
1555 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
1556 | // CHECK: ret |
1557 | |
1558 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
1559 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1560 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1561 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1562 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1563 | // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
1564 | |
1565 | // initialize lb and ub to PrevLB and PrevUB |
1566 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1567 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1568 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1569 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1570 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1571 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1572 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1573 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1574 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1575 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1576 | // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1577 | // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
1578 | // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) |
1579 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
1580 | |
1581 | // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: |
1582 | // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) |
1583 | // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 |
1584 | // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
1585 | |
1586 | // initialize omp.iv (IV = LB) |
1587 | // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: |
1588 | // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1589 | // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1590 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] |
1591 | |
1592 | // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: |
1593 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1594 | // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
1595 | // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
1596 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
1597 | |
1598 | // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: |
1599 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1600 | // skip body branch |
1601 | // CHECK: br{{.+}} |
1602 | // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
1603 | |
1604 | // IV = IV + 1 and inner loop latch |
1605 | // CHECK: [[OMP_PF_INNER_LOOP_INC]]: |
1606 | // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
1607 | // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
1608 | // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
1609 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] |
1610 | |
1611 | // check NextLB and NextUB |
1612 | // CHECK: [[OMP_PF_INNER_LOOP_END]]: |
1613 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
1614 | |
1615 | // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: |
1616 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
1617 | |
1618 | // CHECK: [[OMP_PF_OUTER_LOOP_END]]: |
1619 | // CHECK: ret |
1620 | } |
1621 | |
1622 | return tmain<int>(); |
1623 | #endif |
1624 | } |
1625 | |
1626 | // check code |
1627 | // CHECK: define{{.+}} [[TMAIN]]() |
1628 | |
1629 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1630 | // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( |
1631 | |
1632 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1633 | // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( |
1634 | |
1635 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1636 | // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( |
1637 | |
1638 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1639 | // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( |
1640 | |
1641 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1642 | // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( |
1643 | |
1644 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1645 | // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( |
1646 | |
1647 | // CHECK: call i{{[0-9]+}} @__tgt_target_teams( |
1648 | // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( |
1649 | |
1650 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]]( |
1651 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) |
1652 | |
1653 | // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( |
1654 | // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca |
1655 | // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
1656 | // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
1657 | // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca |
1658 | |
1659 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1660 | |
1661 | // check EUB for distribute |
1662 | // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
1663 | // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, |
1664 | // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
1665 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
1666 | // CHECK-DAG: [[EUB_TRUE]]: |
1667 | // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, |
1668 | // CHECK: br label %[[EUB_END:.+]] |
1669 | // CHECK-DAG: [[EUB_FALSE]]: |
1670 | // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
1671 | // CHECK: br label %[[EUB_END]] |
1672 | // CHECK-DAG: [[EUB_END]]: |
1673 | // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
1674 | // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
1675 | |
1676 | // initialize omp.iv |
1677 | // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
1678 | // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
1679 | // CHECK: br label %[[OMP_JUMP_BACK:.+]] |
1680 | |
1681 | // check exit condition |
1682 | // CHECK: [[OMP_JUMP_BACK]]: |
1683 | // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
1684 | // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], |
1685 | // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] |
1686 | // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] |
1687 | |
1688 | // check that PrevLB and PrevUB are passed to the 'for' |
1689 | // CHECK: [[DIST_BODY]]: |
1690 | // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
1691 | // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
1692 | // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
1693 | // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
1694 | // check that distlb and distub are properly passed to fork_call |
1695 | // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
1696 | // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
1697 | // CHECK: br label %[[DIST_INC:.+]] |
1698 | |
1699 | // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch |
1700 | // CHECK: [[DIST_INC]]: |
1701 | // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
1702 | // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
1703 | // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] |
1704 | // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
1705 | // CHECK: br label %[[OMP_JUMP_BACK]] |
1706 | |
1707 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1708 | // CHECK: ret |
1709 | |
1710 | // implementation of 'parallel for' |
1711 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1712 | |
1713 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1714 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1715 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1716 | |
1717 | // initialize lb and ub to PrevLB and PrevUB |
1718 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1719 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1720 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1721 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1722 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1723 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1724 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1725 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1726 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1727 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1728 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
1729 | |
1730 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
1731 | // In this case we use EUB |
1732 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1733 | // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
1734 | // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
1735 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
1736 | // CHECK: [[PF_EUB_TRUE]]: |
1737 | // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
1738 | // CHECK: br label %[[PF_EUB_END:.+]] |
1739 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
1740 | // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1741 | // CHECK: br label %[[PF_EUB_END]] |
1742 | // CHECK-DAG: [[PF_EUB_END]]: |
1743 | // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
1744 | // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
1745 | |
1746 | // initialize omp.iv |
1747 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1748 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1749 | // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] |
1750 | |
1751 | // check exit condition |
1752 | // CHECK: [[OMP_PF_JUMP_BACK]]: |
1753 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
1754 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
1755 | // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
1756 | // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
1757 | |
1758 | // check that PrevLB and PrevUB are passed to the 'for' |
1759 | // CHECK: [[PF_BODY]]: |
1760 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1761 | // CHECK: br label {{.+}} |
1762 | |
1763 | // check stride 1 for 'for' in 'distribute parallel for simd' |
1764 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
1765 | // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
1766 | // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
1767 | // CHECK: br label %[[OMP_PF_JUMP_BACK]] |
1768 | |
1769 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1770 | // CHECK: ret |
1771 | |
1772 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( |
1773 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) |
1774 | |
1775 | // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( |
1776 | // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca |
1777 | // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
1778 | // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
1779 | // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca |
1780 | |
1781 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
1782 | |
1783 | // check EUB for distribute |
1784 | // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
1785 | // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, |
1786 | // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
1787 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
1788 | // CHECK-DAG: [[EUB_TRUE]]: |
1789 | // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, |
1790 | // CHECK: br label %[[EUB_END:.+]] |
1791 | // CHECK-DAG: [[EUB_FALSE]]: |
1792 | // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
1793 | // CHECK: br label %[[EUB_END]] |
1794 | // CHECK-DAG: [[EUB_END]]: |
1795 | // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
1796 | // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
1797 | |
1798 | // initialize omp.iv |
1799 | // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
1800 | // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
1801 | // CHECK: br label %[[OMP_JUMP_BACK:.+]] |
1802 | |
1803 | // check exit condition |
1804 | // CHECK: [[OMP_JUMP_BACK]]: |
1805 | // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
1806 | // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], |
1807 | // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] |
1808 | // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] |
1809 | |
1810 | // check that PrevLB and PrevUB are passed to the 'for' |
1811 | // CHECK: [[DIST_BODY]]: |
1812 | // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
1813 | // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
1814 | // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
1815 | // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
1816 | // check that distlb and distub are properly passed to fork_call |
1817 | // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
1818 | // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
1819 | // CHECK: br label %[[DIST_INC:.+]] |
1820 | |
1821 | // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch |
1822 | // CHECK: [[DIST_INC]]: |
1823 | // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
1824 | // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
1825 | // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] |
1826 | // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
1827 | // CHECK: br label %[[OMP_JUMP_BACK]] |
1828 | |
1829 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1830 | // CHECK: ret |
1831 | |
1832 | // implementation of 'parallel for' |
1833 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
1834 | |
1835 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
1836 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
1837 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
1838 | |
1839 | // initialize lb and ub to PrevLB and PrevUB |
1840 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
1841 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
1842 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
1843 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
1844 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
1845 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
1846 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
1847 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
1848 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
1849 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
1850 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
1851 | |
1852 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
1853 | // In this case we use EUB |
1854 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1855 | // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
1856 | // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
1857 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
1858 | // CHECK: [[PF_EUB_TRUE]]: |
1859 | // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
1860 | // CHECK: br label %[[PF_EUB_END:.+]] |
1861 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
1862 | // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
1863 | // CHECK: br label %[[PF_EUB_END]] |
1864 | // CHECK-DAG: [[PF_EUB_END]]: |
1865 | // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
1866 | // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
1867 | |
1868 | // initialize omp.iv |
1869 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
1870 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
1871 | // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] |
1872 | |
1873 | // check exit condition |
1874 | // CHECK: [[OMP_PF_JUMP_BACK]]: |
1875 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
1876 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
1877 | // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
1878 | // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
1879 | |
1880 | // check that PrevLB and PrevUB are passed to the 'for' |
1881 | // CHECK: [[PF_BODY]]: |
1882 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
1883 | // CHECK: br label {{.+}} |
1884 | |
1885 | // check stride 1 for 'for' in 'distribute parallel for simd' |
1886 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
1887 | // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
1888 | // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
1889 | // CHECK: br label %[[OMP_PF_JUMP_BACK]] |
1890 | |
1891 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1892 | // CHECK: ret |
1893 | |
1894 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( |
1895 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) |
1896 | |
1897 | // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( |
1898 | // CHECK: alloca |
1899 | // CHECK: alloca |
1900 | // CHECK: alloca |
1901 | // CHECK: alloca |
1902 | // CHECK: alloca |
1903 | // CHECK: alloca |
1904 | // CHECK: alloca |
1905 | // CHECK: [[OMP_IV:%.+]] = alloca |
1906 | // CHECK: alloca |
1907 | // CHECK: alloca |
1908 | // CHECK: alloca |
1909 | // CHECK: alloca |
1910 | // CHECK: [[OMP_LB:%.+]] = alloca |
1911 | // CHECK: [[OMP_UB:%.+]] = alloca |
1912 | // CHECK: [[OMP_ST:%.+]] = alloca |
1913 | |
1914 | // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' |
1915 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, |
1916 | |
1917 | // check EUB for distribute |
1918 | // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], |
1919 | // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} |
1920 | // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] |
1921 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] |
1922 | // CHECK-DAG: [[EUB_TRUE]]: |
1923 | // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, |
1924 | // CHECK: br label %[[EUB_END:.+]] |
1925 | // CHECK-DAG: [[EUB_FALSE]]: |
1926 | // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], |
1927 | // CHECK: br label %[[EUB_END]] |
1928 | // CHECK-DAG: [[EUB_END]]: |
1929 | // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] |
1930 | // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], |
1931 | |
1932 | // initialize omp.iv |
1933 | // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], |
1934 | // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], |
1935 | |
1936 | // check exit condition |
1937 | // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], |
1938 | // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} |
1939 | // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 |
1940 | // CHECK: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] |
1941 | // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] |
1942 | |
1943 | // check that PrevLB and PrevUB are passed to the 'for' |
1944 | // CHECK: [[DIST_INNER_LOOP_BODY]]: |
1945 | // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], |
1946 | // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} |
1947 | // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], |
1948 | // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} |
1949 | // check that distlb and distub are properly passed to fork_call |
1950 | // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) |
1951 | // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) |
1952 | // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] |
1953 | |
1954 | // check DistInc |
1955 | // CHECK: [[DIST_INNER_LOOP_INC]]: |
1956 | // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], |
1957 | // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], |
1958 | // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] |
1959 | // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], |
1960 | // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], |
1961 | // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], |
1962 | // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] |
1963 | // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], |
1964 | // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], |
1965 | // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], |
1966 | // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] |
1967 | // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], |
1968 | |
1969 | // Update UB |
1970 | // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], |
1971 | // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} |
1972 | // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] |
1973 | // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] |
1974 | // CHECK-DAG: [[EUB_TRUE_1]]: |
1975 | // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} |
1976 | // CHECK: br label %[[EUB_END_1:.+]] |
1977 | // CHECK-DAG: [[EUB_FALSE_1]]: |
1978 | // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], |
1979 | // CHECK: br label %[[EUB_END_1]] |
1980 | // CHECK-DAG: [[EUB_END_1]]: |
1981 | // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] |
1982 | // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], |
1983 | |
1984 | // Store LB in IV |
1985 | // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], |
1986 | // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], |
1987 | |
1988 | // CHECK: [[DIST_INNER_LOOP_END]]: |
1989 | // CHECK: br label %[[LOOP_EXIT:.+]] |
1990 | |
1991 | // loop exit |
1992 | // CHECK: [[LOOP_EXIT]]: |
1993 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
1994 | // CHECK: ret |
1995 | |
1996 | // skip implementation of 'parallel for': using default scheduling and was tested above |
1997 | |
1998 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( |
1999 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) |
2000 | |
2001 | // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( |
2002 | // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca |
2003 | // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca |
2004 | // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca |
2005 | // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca |
2006 | |
2007 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
2008 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, |
2009 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
2010 | // CHECK: ret |
2011 | |
2012 | // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) |
2013 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
2014 | |
2015 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
2016 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
2017 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
2018 | |
2019 | // initialize lb and ub to PrevLB and PrevUB |
2020 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
2021 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
2022 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
2023 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
2024 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
2025 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
2026 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
2027 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
2028 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
2029 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
2030 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
2031 | |
2032 | // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used |
2033 | // In this case we use EUB |
2034 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
2035 | // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, |
2036 | // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] |
2037 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
2038 | // CHECK: [[PF_EUB_TRUE]]: |
2039 | // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, |
2040 | // CHECK: br label %[[PF_EUB_END:.+]] |
2041 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
2042 | // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
2043 | // CHECK: br label %[[PF_EUB_END]] |
2044 | // CHECK-DAG: [[PF_EUB_END]]: |
2045 | // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] |
2046 | // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], |
2047 | |
2048 | // initialize omp.iv |
2049 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2050 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
2051 | // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] |
2052 | |
2053 | // check exit condition |
2054 | // CHECK: [[OMP_PF_JUMP_BACK]]: |
2055 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], |
2056 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], |
2057 | // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
2058 | // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] |
2059 | |
2060 | // check that PrevLB and PrevUB are passed to the 'for' |
2061 | // CHECK: [[PF_BODY]]: |
2062 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2063 | // CHECK: br label {{.+}} |
2064 | |
2065 | // check stride 1 for 'for' in 'distribute parallel for simd' |
2066 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], |
2067 | // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 |
2068 | // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], |
2069 | // CHECK: br label %[[OMP_PF_JUMP_BACK]] |
2070 | |
2071 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
2072 | // CHECK: ret |
2073 | |
2074 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( |
2075 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) |
2076 | |
2077 | // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( |
2078 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
2079 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, |
2080 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
2081 | // CHECK: ret |
2082 | |
2083 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
2084 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
2085 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
2086 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
2087 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
2088 | // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
2089 | |
2090 | // initialize lb and ub to PrevLB and PrevUB |
2091 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
2092 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
2093 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
2094 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
2095 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
2096 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
2097 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
2098 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
2099 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
2100 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
2101 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) |
2102 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
2103 | |
2104 | // check PrevEUB (using PrevUB instead of NumIt as upper bound) |
2105 | // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: |
2106 | // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], |
2107 | // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to |
2108 | // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
2109 | // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] |
2110 | // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] |
2111 | // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] |
2112 | // CHECK: [[PF_EUB_TRUE]]: |
2113 | // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
2114 | // CHECK: br label %[[PF_EUB_END:.+]] |
2115 | // CHECK-DAG: [[PF_EUB_FALSE]]: |
2116 | // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], |
2117 | // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to |
2118 | // CHECK: br label %[[PF_EUB_END]] |
2119 | // CHECK-DAG: [[PF_EUB_END]]: |
2120 | // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] |
2121 | // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] |
2122 | // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to |
2123 | // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], |
2124 | // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], |
2125 | |
2126 | // initialize omp.iv (IV = LB) |
2127 | // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2128 | // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
2129 | |
2130 | // outer loop: while (IV < UB) { |
2131 | // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2132 | // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
2133 | // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] |
2134 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
2135 | |
2136 | // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: |
2137 | // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] |
2138 | |
2139 | // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: |
2140 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2141 | // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
2142 | // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
2143 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
2144 | |
2145 | // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: |
2146 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2147 | // skip body branch |
2148 | // CHECK: br{{.+}} |
2149 | // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
2150 | |
2151 | // IV = IV + 1 and inner loop latch |
2152 | // CHECK: [[OMP_PF_INNER_LOOP_INC]]: |
2153 | // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
2154 | // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
2155 | // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
2156 | // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] |
2157 | |
2158 | // check NextLB and NextUB |
2159 | // CHECK: [[OMP_PF_INNER_LOOP_END]]: |
2160 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
2161 | |
2162 | // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: |
2163 | // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2164 | // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], |
2165 | // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] |
2166 | // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], |
2167 | // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
2168 | // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], |
2169 | // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] |
2170 | // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], |
2171 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
2172 | |
2173 | // CHECK: [[OMP_PF_OUTER_LOOP_END]]: |
2174 | // CHECK-DAG: call void @__kmpc_for_static_fini( |
2175 | // CHECK: ret |
2176 | |
2177 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( |
2178 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) |
2179 | |
2180 | // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( |
2181 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
2182 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, |
2183 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
2184 | // CHECK: ret |
2185 | |
2186 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
2187 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
2188 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
2189 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
2190 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
2191 | // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
2192 | |
2193 | // initialize lb and ub to PrevLB and PrevUB |
2194 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
2195 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
2196 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
2197 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
2198 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
2199 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
2200 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
2201 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
2202 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
2203 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
2204 | // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2205 | // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
2206 | // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) |
2207 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
2208 | |
2209 | // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: |
2210 | // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) |
2211 | // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 |
2212 | // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
2213 | |
2214 | // initialize omp.iv (IV = LB) |
2215 | // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: |
2216 | // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2217 | // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
2218 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] |
2219 | |
2220 | // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: |
2221 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2222 | // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
2223 | // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
2224 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
2225 | |
2226 | // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: |
2227 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2228 | // skip body branch |
2229 | // CHECK: br{{.+}} |
2230 | // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
2231 | |
2232 | // IV = IV + 1 and inner loop latch |
2233 | // CHECK: [[OMP_PF_INNER_LOOP_INC]]: |
2234 | // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
2235 | // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
2236 | // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
2237 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] |
2238 | |
2239 | // check NextLB and NextUB |
2240 | // CHECK: [[OMP_PF_INNER_LOOP_END]]: |
2241 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
2242 | |
2243 | // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: |
2244 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
2245 | |
2246 | // CHECK: [[OMP_PF_OUTER_LOOP_END]]: |
2247 | // CHECK: ret |
2248 | |
2249 | // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( |
2250 | // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) |
2251 | |
2252 | // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( |
2253 | // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, |
2254 | // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, |
2255 | // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case |
2256 | // CHECK: ret |
2257 | |
2258 | // 'parallel for' implementation using outer and inner loops and PrevEUB |
2259 | // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) |
2260 | // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, |
2261 | // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, |
2262 | // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, |
2263 | // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, |
2264 | |
2265 | // initialize lb and ub to PrevLB and PrevUB |
2266 | // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], |
2267 | // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], |
2268 | // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], |
2269 | // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} |
2270 | // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], |
2271 | // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} |
2272 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], |
2273 | // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], |
2274 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], |
2275 | // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], |
2276 | // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2277 | // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], |
2278 | // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) |
2279 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] |
2280 | |
2281 | // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: |
2282 | // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) |
2283 | // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 |
2284 | // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] |
2285 | |
2286 | // initialize omp.iv (IV = LB) |
2287 | // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: |
2288 | // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], |
2289 | // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], |
2290 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] |
2291 | |
2292 | // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: |
2293 | // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2294 | // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], |
2295 | // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] |
2296 | // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] |
2297 | |
2298 | // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: |
2299 | // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], |
2300 | // skip body branch |
2301 | // CHECK: br{{.+}} |
2302 | // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] |
2303 | |
2304 | // IV = IV + 1 and inner loop latch |
2305 | // CHECK: [[OMP_PF_INNER_LOOP_INC]]: |
2306 | // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], |
2307 | // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 |
2308 | // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], |
2309 | // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] |
2310 | |
2311 | // check NextLB and NextUB |
2312 | // CHECK: [[OMP_PF_INNER_LOOP_END]]: |
2313 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] |
2314 | |
2315 | // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: |
2316 | // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] |
2317 | |
2318 | // CHECK: [[OMP_PF_OUTER_LOOP_END]]: |
2319 | // CHECK: ret |
2320 | |
2321 | // CHECK: !{!"llvm.loop.vectorize.enable", i1 true} |
2322 | #endif |
2323 | |