1 | // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s |
2 | // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s |
3 | // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s |
4 | // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s |
5 | // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s |
6 | |
7 | // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
8 | // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s |
9 | // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
10 | // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
11 | // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s |
12 | // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} |
13 | // expected-no-diagnostics |
14 | #ifndef HEADER |
15 | #define HEADER |
16 | |
17 | volatile int g __attribute__((aligned(128))) = 1212; |
18 | |
19 | template <class T> |
20 | struct S { |
21 | T f; |
22 | S(T a) : f(a + g) {} |
23 | S() : f(g) {} |
24 | operator T() { return T(); } |
25 | S &operator&(const S &) { return *this; } |
26 | ~S() {} |
27 | }; |
28 | |
29 | struct SS { |
30 | int a; |
31 | int b : 4; |
32 | int &c; |
33 | SS(int &d) : a(0), b(0), c(d) { |
34 | #pragma omp parallel reduction(+: a, b, c) |
35 | #ifdef LAMBDA |
36 | [&]() { |
37 | ++this->a, --b, (this)->c /= 1; |
38 | #pragma omp parallel reduction(&: a, b, c) |
39 | ++(this)->a, --b, this->c /= 1; |
40 | }(); |
41 | #elif defined(BLOCKS) |
42 | ^{ |
43 | ++a; |
44 | --this->b; |
45 | (this)->c /= 1; |
46 | #pragma omp parallel reduction(-: a, b, c) |
47 | ++(this)->a, --b, this->c /= 1; |
48 | }(); |
49 | #else |
50 | ++this->a, --b, c /= 1; |
51 | #endif |
52 | } |
53 | }; |
54 | |
55 | template<typename T> |
56 | struct SST { |
57 | T a; |
58 | SST() : a(T()) { |
59 | #pragma omp parallel reduction(*: a) |
60 | #ifdef LAMBDA |
61 | [&]() { |
62 | [&]() { |
63 | ++this->a; |
64 | #pragma omp parallel reduction(&& :a) |
65 | ++(this)->a; |
66 | }(); |
67 | }(); |
68 | #elif defined(BLOCKS) |
69 | ^{ |
70 | ^{ |
71 | ++a; |
72 | #pragma omp parallel reduction(|: a) |
73 | ++(this)->a; |
74 | }(); |
75 | }(); |
76 | #else |
77 | ++(this)->a; |
78 | #endif |
79 | } |
80 | }; |
81 | |
82 | // CHECK: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 |
83 | // LAMBDA: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 |
84 | // BLOCKS: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 |
85 | // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } |
86 | // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } |
87 | // CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8* |
88 | // CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer |
89 | |
90 | //CHECK: foo_array_sect |
91 | //CHECK: call void {{.+}}@__kmpc_fork_call( |
92 | //CHECK: ret void |
93 | void foo_array_sect(short x[1]) { |
94 | #pragma omp parallel reduction(+ : x[:]) |
95 | {} |
96 | } |
97 | |
98 | template <typename T> |
99 | T tmain() { |
100 | T t; |
101 | S<T> test; |
102 | SST<T> sst; |
103 | T t_var __attribute__((aligned(128))) = T(), t_var1 __attribute__((aligned(128))); |
104 | T vec[] = {1, 2}; |
105 | S<T> s_arr[] = {1, 2}; |
106 | S<T> var __attribute__((aligned(128))) (3), var1 __attribute__((aligned(128))); |
107 | #pragma omp parallel reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1) |
108 | { |
109 | vec[0] = t_var; |
110 | s_arr[0] = var; |
111 | } |
112 | return T(); |
113 | } |
114 | |
115 | int sivar; |
116 | int main() { |
117 | SS ss(sivar); |
118 | #ifdef LAMBDA |
119 | // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, |
120 | // LAMBDA-LABEL: @main |
121 | // LAMBDA: alloca [[SS_TY]], |
122 | // LAMBDA: alloca [[CAP_TY:%.+]], |
123 | // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@[^(]+]]([[CAP_TY]]* |
124 | [&]() { |
125 | // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( |
126 | // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]]) |
127 | #pragma omp parallel reduction(+:g) |
128 | { |
129 | // LAMBDA: define {{.+}} @{{.+}}([[SS_TY]]* |
130 | // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 |
131 | // LAMBDA: store i{{[0-9]+}} 0, i{{[0-9]+}}* % |
132 | // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1 |
133 | // LAMBDA: store i8 |
134 | // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2 |
135 | // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 |
136 | // LAMBDA-NOT: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1 |
137 | // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2 |
138 | // LAMBDA: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*, i32*, i32*, i32*)* [[SS_MICROTASK:@.+]] to void |
139 | // LAMBDA: [[B_REF:%.+]] = getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1 |
140 | // LAMBDA: store i8 %{{.+}}, i8* [[B_REF]], |
141 | // LAMBDA: ret |
142 | |
143 | // LAMBDA: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, i32* {{.+}}, i32* {{.+}}, i32* {{.+}}) |
144 | // LAMBDA-NOT: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* % |
145 | // LAMBDA: call{{.*}} void |
146 | // LAMBDA: ret void |
147 | |
148 | // LAMBDA: define internal void @{{.+}}(i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* |
149 | // LAMBDA: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, |
150 | // LAMBDA: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, |
151 | // LAMBDA: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, |
152 | // LAMBDA: store i{{[0-9]+}} -1, i{{[0-9]+}}* [[A_PRIV]], |
153 | // LAMBDA: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]], |
154 | // LAMBDA: store i{{[0-9]+}} -1, i{{[0-9]+}}* [[B_PRIV]], |
155 | // LAMBDA: store i{{[0-9]+}} -1, i{{[0-9]+}}* [[C_PRIV]], |
156 | // LAMBDA: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]], |
157 | // LAMBDA: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]], |
158 | // LAMBDA-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], |
159 | // LAMBDA-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 |
160 | // LAMBDA-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], |
161 | // LAMBDA-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]], |
162 | // LAMBDA-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1 |
163 | // LAMBDA-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]], |
164 | // LAMBDA-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]], |
165 | // LAMBDA-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]], |
166 | // LAMBDA-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1 |
167 | // LAMBDA-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]], |
168 | // LAMBDA: call i32 @__kmpc_reduce_nowait( |
169 | // LAMBDA: ret void |
170 | |
171 | // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}) |
172 | // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, |
173 | |
174 | // Reduction list for runtime. |
175 | // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x i8*], |
176 | |
177 | // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]] |
178 | // LAMBDA: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128 |
179 | g = 1; |
180 | // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128 |
181 | // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 |
182 | // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]] |
183 | // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) |
184 | |
185 | // LAMBDA: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i64 0, i64 0 |
186 | // LAMBDA: [[BITCAST:%.+]] = bitcast i32* [[G_PRIVATE_ADDR]] to i8* |
187 | // LAMBDA: store i8* [[BITCAST]], i8** [[G_PRIV_REF]], |
188 | // LAMBDA: call i32 @__kmpc_reduce_nowait( |
189 | // LAMBDA: switch i32 %{{.+}}, label %[[REDUCTION_DONE:.+]] [ |
190 | // LAMBDA: i32 1, label %[[CASE1:.+]] |
191 | // LAMBDA: i32 2, label %[[CASE2:.+]] |
192 | // LAMBDA: [[CASE1]] |
193 | // LAMBDA: [[G_VAL:%.+]] = load i32, i32* [[G_REF]] |
194 | // LAMBDA: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]] |
195 | // LAMBDA: [[ADD:%.+]] = add nsw i32 [[G_VAL]], [[G_PRIV_VAL]] |
196 | // LAMBDA: store i32 [[ADD]], i32* [[G_REF]] |
197 | // LAMBDA: call void @__kmpc_end_reduce_nowait( |
198 | // LAMBDA: br label %[[REDUCTION_DONE]] |
199 | // LAMBDA: [[CASE2]] |
200 | // LAMBDA: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]] |
201 | // LAMBDA: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic |
202 | // LAMBDA: br label %[[REDUCTION_DONE]] |
203 | // LAMBDA: [[REDUCTION_DONE]] |
204 | // LAMBDA: ret void |
205 | [&]() { |
206 | // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) |
207 | // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], |
208 | g = 2; |
209 | // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] |
210 | // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 |
211 | // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] |
212 | // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] |
213 | }(); |
214 | } |
215 | }(); |
216 | return 0; |
217 | #elif defined(BLOCKS) |
218 | // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212, |
219 | // BLOCKS-LABEL: @main |
220 | // BLOCKS: call |
221 | // BLOCKS: call void {{%.+}}(i8 |
222 | ^{ |
223 | // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* |
224 | // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]]) |
225 | #pragma omp parallel reduction(-:g) |
226 | { |
227 | // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}) |
228 | // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, |
229 | |
230 | // Reduction list for runtime. |
231 | // BLOCKS: [[RED_LIST:%.+]] = alloca [1 x i8*], |
232 | |
233 | // BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]] |
234 | // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128 |
235 | g = 1; |
236 | // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128 |
237 | // BLOCKS-NOT: [[G]]{{[[^:word:]]}} |
238 | // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]] |
239 | // BLOCKS-NOT: [[G]]{{[[^:word:]]}} |
240 | // BLOCKS: call void {{%.+}}(i8 |
241 | |
242 | // BLOCKS: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i64 0, i64 0 |
243 | // BLOCKS: [[BITCAST:%.+]] = bitcast i32* [[G_PRIVATE_ADDR]] to i8* |
244 | // BLOCKS: store i8* [[BITCAST]], i8** [[G_PRIV_REF]], |
245 | // BLOCKS: call i32 @__kmpc_reduce_nowait( |
246 | // BLOCKS: switch i32 %{{.+}}, label %[[REDUCTION_DONE:.+]] [ |
247 | // BLOCKS: i32 1, label %[[CASE1:.+]] |
248 | // BLOCKS: i32 2, label %[[CASE2:.+]] |
249 | // BLOCKS: [[CASE1]] |
250 | // BLOCKS: [[G_VAL:%.+]] = load i32, i32* [[G_REF]] |
251 | // BLOCKS: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]] |
252 | // BLOCKS: [[ADD:%.+]] = add nsw i32 [[G_VAL]], [[G_PRIV_VAL]] |
253 | // BLOCKS: store i32 [[ADD]], i32* [[G_REF]] |
254 | // BLOCKS: call void @__kmpc_end_reduce_nowait( |
255 | // BLOCKS: br label %[[REDUCTION_DONE]] |
256 | // BLOCKS: [[CASE2]] |
257 | // BLOCKS: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]] |
258 | // BLOCKS: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic |
259 | // BLOCKS: br label %[[REDUCTION_DONE]] |
260 | // BLOCKS: [[REDUCTION_DONE]] |
261 | // BLOCKS: ret void |
262 | ^{ |
263 | // BLOCKS: define {{.+}} void {{@.+}}(i8* |
264 | g = 2; |
265 | // BLOCKS-NOT: [[G]]{{[[^:word:]]}} |
266 | // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}* |
267 | // BLOCKS-NOT: [[G]]{{[[^:word:]]}} |
268 | // BLOCKS: ret |
269 | }(); |
270 | } |
271 | }(); |
272 | return 0; |
273 | // BLOCKS: define {{.+}} @{{.+}}([[SS_TY]]* |
274 | // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 |
275 | // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* % |
276 | // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1 |
277 | // BLOCKS: store i8 |
278 | // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2 |
279 | // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 |
280 | // BLOCKS-NOT: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1 |
281 | // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2 |
282 | // BLOCKS: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*, i32*, i32*, i32*)* [[SS_MICROTASK:@.+]] to void |
283 | // BLOCKS: [[B_REF:%.+]] = getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1 |
284 | // BLOCKS: store i8 %{{.+}}, i8* [[B_REF]], |
285 | // BLOCKS: ret |
286 | |
287 | // BLOCKS: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, i32* {{.+}}, i32* {{.+}}, i32* {{.+}}) |
288 | // BLOCKS-NOT: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* % |
289 | // BLOCKS: call{{.*}} void |
290 | // BLOCKS: ret void |
291 | |
292 | // BLOCKS: define internal void @{{.+}}(i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, i32* {{.+}}, i32* {{.+}}, i32* {{.+}}) |
293 | // BLOCKS: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, |
294 | // BLOCKS: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, |
295 | // BLOCKS: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, |
296 | // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[A_PRIV]], |
297 | // BLOCKS: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]], |
298 | // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[B_PRIV]], |
299 | // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[C_PRIV]], |
300 | // BLOCKS: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]], |
301 | // BLOCKS: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]], |
302 | // BLOCKS-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], |
303 | // BLOCKS-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 |
304 | // BLOCKS-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], |
305 | // BLOCKS-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]], |
306 | // BLOCKS-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1 |
307 | // BLOCKS-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]], |
308 | // BLOCKS-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]], |
309 | // BLOCKS-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]], |
310 | // BLOCKS-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1 |
311 | // BLOCKS-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]], |
312 | // BLOCKS: call i32 @__kmpc_reduce_nowait( |
313 | // BLOCKS: ret void |
314 | #else |
315 | S<float> test; |
316 | float t_var = 0, t_var1; |
317 | int vec[] = {1, 2}; |
318 | S<float> s_arr[] = {1, 2}; |
319 | S<float> var(3), var1; |
320 | float _Complex cf; |
321 | #pragma omp parallel reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1) |
322 | { |
323 | vec[0] = t_var; |
324 | s_arr[0] = var; |
325 | } |
326 | if (var1) |
327 | #pragma omp parallel reduction(+ : t_var) reduction(& : var) reduction(&& : var1) reduction(min : t_var1) |
328 | while (1) { |
329 | vec[0] = t_var; |
330 | s_arr[0] = var; |
331 | } |
332 | #pragma omp parallel reduction(+ : cf) |
333 | ; |
334 | return tmain<int>(); |
335 | #endif |
336 | } |
337 | |
338 | // CHECK: define {{.*}}i{{[0-9]+}} @main() |
339 | // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], |
340 | // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) |
341 | // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, float*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*)* [[MAIN_MICROTASK:@.+]] to void |
342 | // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, float*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*)* [[MAIN_MICROTASK1:@.+]] to void |
343 | // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, { float, float }*)* [[MAIN_MICROTASK2:@.+]] to void |
344 | // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]() |
345 | // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* |
346 | // CHECK: ret |
347 | // |
348 | // CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, |
349 | // CHECK: [[T_VAR_PRIV:%.+]] = alloca float, |
350 | // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], |
351 | // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_FLOAT_TY]], |
352 | // CHECK: [[T_VAR1_PRIV:%.+]] = alloca float, |
353 | |
354 | // Reduction list for runtime. |
355 | // CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*], |
356 | |
357 | // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], |
358 | |
359 | // CHECK: [[T_VAR_REF:%.+]] = load float*, float** % |
360 | // CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** % |
361 | // CHECK: [[VAR1_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** % |
362 | // CHECK: [[T_VAR1_REF:%.+]] = load float*, float** % |
363 | |
364 | // For + reduction operation initial value of private variable is 0. |
365 | // CHECK: store float 0.0{{.+}}, float* [[T_VAR_PRIV]], |
366 | |
367 | // For & reduction operation initial value of private variable is ones in all bits. |
368 | // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) |
369 | |
370 | // For && reduction operation initial value of private variable is 1.0. |
371 | // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR1_PRIV]]) |
372 | |
373 | // For min reduction operation initial value of private variable is largest repesentable value. |
374 | // CHECK: store float 0x47EFFFFFE0000000, float* [[T_VAR1_PRIV]], |
375 | |
376 | // Skip checks for internal operations. |
377 | |
378 | // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; |
379 | |
380 | // CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 0 |
381 | // CHECK: [[BITCAST:%.+]] = bitcast float* [[T_VAR_PRIV]] to i8* |
382 | // CHECK: store i8* [[BITCAST]], i8** [[T_VAR_PRIV_REF]], |
383 | // CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 1 |
384 | // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_PRIV]] to i8* |
385 | // CHECK: store i8* [[BITCAST]], i8** [[VAR_PRIV_REF]], |
386 | // CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 2 |
387 | // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_PRIV]] to i8* |
388 | // CHECK: store i8* [[BITCAST]], i8** [[VAR1_PRIV_REF]], |
389 | // CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 3 |
390 | // CHECK: [[BITCAST:%.+]] = bitcast float* [[T_VAR1_PRIV]] to i8* |
391 | // CHECK: store i8* [[BITCAST]], i8** [[T_VAR1_PRIV_REF]], |
392 | |
393 | // res = __kmpc_reduce_nowait(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>); |
394 | |
395 | // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]] |
396 | // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] |
397 | // CHECK: [[BITCAST:%.+]] = bitcast [4 x i8*]* [[RED_LIST]] to i8* |
398 | // CHECK: [[RES:%.+]] = call i32 @__kmpc_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], i32 4, i64 32, i8* [[BITCAST]], void (i8*, i8*)* [[REDUCTION_FUNC:@.+]], [8 x i32]* [[REDUCTION_LOCK]]) |
399 | |
400 | // switch(res) |
401 | // CHECK: switch i32 [[RES]], label %[[RED_DONE:.+]] [ |
402 | // CHECK: i32 1, label %[[CASE1:.+]] |
403 | // CHECK: i32 2, label %[[CASE2:.+]] |
404 | // CHECK: ] |
405 | |
406 | // case 1: |
407 | // t_var += t_var_reduction; |
408 | // CHECK: [[T_VAR_VAL:%.+]] = load float, float* [[T_VAR_REF]], |
409 | // CHECK: [[T_VAR_PRIV_VAL:%.+]] = load float, float* [[T_VAR_PRIV]], |
410 | // CHECK: [[UP:%.+]] = fadd float [[T_VAR_VAL]], [[T_VAR_PRIV_VAL]] |
411 | // CHECK: store float [[UP]], float* [[T_VAR_REF]], |
412 | |
413 | // var = var.operator &(var_reduction); |
414 | // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_REF]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_PRIV]]) |
415 | // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8* |
416 | // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8* |
417 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) |
418 | |
419 | // var1 = var1.operator &&(var1_reduction); |
420 | // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_REF]]) |
421 | // CHECK: [[VAR1_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0 |
422 | // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]] |
423 | // CHECK: [[TRUE]] |
424 | // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_PRIV]]) |
425 | // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0 |
426 | // CHECK: br label %[[END2]] |
427 | // CHECK: [[END2]] |
428 | // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ] |
429 | // CHECK: [[CONV:%.+]] = uitofp i1 [[COND_LVALUE]] to float |
430 | // CHECK: call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]]) |
431 | // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8* |
432 | // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8* |
433 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) |
434 | |
435 | // t_var1 = min(t_var1, t_var1_reduction); |
436 | // CHECK: [[T_VAR1_VAL:%.+]] = load float, float* [[T_VAR1_REF]], |
437 | // CHECK: [[T_VAR1_PRIV_VAL:%.+]] = load float, float* [[T_VAR1_PRIV]], |
438 | // CHECK: [[CMP:%.+]] = fcmp olt float [[T_VAR1_VAL]], [[T_VAR1_PRIV_VAL]] |
439 | // CHECK: br i1 [[CMP]] |
440 | // CHECK: [[UP:%.+]] = phi float |
441 | // CHECK: store float [[UP]], float* [[T_VAR1_REF]], |
442 | |
443 | // __kmpc_end_reduce_nowait(<loc>, <gtid>, &<lock>); |
444 | // CHECK: call void @__kmpc_end_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], [8 x i32]* [[REDUCTION_LOCK]]) |
445 | |
446 | // break; |
447 | // CHECK: br label %[[RED_DONE]] |
448 | |
449 | // case 2: |
450 | // t_var += t_var_reduction; |
451 | // CHECK: load float, float* [[T_VAR_PRIV]] |
452 | // CHECK: [[T_VAR_REF_INT:%.+]] = bitcast float* [[T_VAR_REF]] to i32* |
453 | // CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR_REF_INT]] monotonic, |
454 | // CHECK: br label %[[CONT:.+]] |
455 | // CHECK: [[CONT]] |
456 | // CHECK: [[ORIG_OLD_INT:%.+]] = phi i32 [ [[OLD1]], %{{.+}} ], [ [[OLD2:%.+]], %[[CONT]] ] |
457 | // CHECK: fadd float |
458 | // CHECK: [[UP_INT:%.+]] = load i32 |
459 | // CHECK: [[T_VAR_REF_INT:%.+]] = bitcast float* [[T_VAR_REF]] to i32* |
460 | // CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic |
461 | // CHECK: [[OLD2:%.+]] = extractvalue { i32, i1 } [[RES]], 0 |
462 | // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1 |
463 | // CHECK: br i1 [[SUCCESS_FAIL]], label %[[ATOMIC_DONE:.+]], label %[[CONT]] |
464 | // CHECK: [[ATOMIC_DONE]] |
465 | |
466 | // var = var.operator &(var_reduction); |
467 | // CHECK: call void @__kmpc_critical( |
468 | // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_REF]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_PRIV]]) |
469 | // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8* |
470 | // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8* |
471 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) |
472 | // CHECK: call void @__kmpc_end_critical( |
473 | |
474 | // var1 = var1.operator &&(var1_reduction); |
475 | // CHECK: call void @__kmpc_critical( |
476 | // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_REF]]) |
477 | // CHECK: [[VAR1_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0 |
478 | // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]] |
479 | // CHECK: [[TRUE]] |
480 | // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_PRIV]]) |
481 | // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0 |
482 | // CHECK: br label %[[END2]] |
483 | // CHECK: [[END2]] |
484 | // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ] |
485 | // CHECK: [[CONV:%.+]] = uitofp i1 [[COND_LVALUE]] to float |
486 | // CHECK: call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]]) |
487 | // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8* |
488 | // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8* |
489 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) |
490 | // CHECK: call void @__kmpc_end_critical( |
491 | |
492 | // t_var1 = min(t_var1, t_var1_reduction); |
493 | // CHECK: load float, float* [[T_VAR1_PRIV]] |
494 | // CHECK: [[T_VAR1_REF_INT:%.+]] = bitcast float* [[T_VAR1_REF]] to i32* |
495 | // CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR1_REF_INT]] monotonic, |
496 | // CHECK: br label %[[CONT:.+]] |
497 | // CHECK: [[CONT]] |
498 | // CHECK: [[ORIG_OLD_INT:%.+]] = phi i32 [ [[OLD1]], %{{.+}} ], [ [[OLD2:%.+]], %{{.+}} ] |
499 | // CHECK: [[CMP:%.+]] = fcmp olt float |
500 | // CHECK: br i1 [[CMP]] |
501 | // CHECK: [[UP:%.+]] = phi float |
502 | // CHECK: [[UP_INT:%.+]] = load i32 |
503 | // CHECK: [[T_VAR1_REF_INT:%.+]] = bitcast float* [[T_VAR1_REF]] to i32* |
504 | // CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR1_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic |
505 | // CHECK: [[OLD2:%.+]] = extractvalue { i32, i1 } [[RES]], 0 |
506 | // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1 |
507 | // CHECK: br i1 [[SUCCESS_FAIL]], label %[[ATOMIC_DONE:.+]], label %[[CONT]] |
508 | // CHECK: [[ATOMIC_DONE]] |
509 | |
510 | // break; |
511 | // CHECK: br label %[[RED_DONE]] |
512 | // CHECK: [[RED_DONE]] |
513 | |
514 | // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) |
515 | // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* |
516 | // CHECK: ret void |
517 | |
518 | // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { |
519 | // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); |
520 | // ... |
521 | // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], |
522 | // *(Type<n>-1*)rhs[<n>-1]); |
523 | // } |
524 | // CHECK: define internal void [[REDUCTION_FUNC]](i8*, i8*) |
525 | // t_var_lhs = (float*)lhs[0]; |
526 | // CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i64 0, i64 0 |
527 | // CHECK: [[T_VAR_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR_RHS_REF]], |
528 | // CHECK: [[T_VAR_RHS:%.+]] = bitcast i8* [[T_VAR_RHS_VOID]] to float* |
529 | // t_var_rhs = (float*)rhs[0]; |
530 | // CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i64 0, i64 0 |
531 | // CHECK: [[T_VAR_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR_LHS_REF]], |
532 | // CHECK: [[T_VAR_LHS:%.+]] = bitcast i8* [[T_VAR_LHS_VOID]] to float* |
533 | |
534 | // var_lhs = (S<float>*)lhs[1]; |
535 | // CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 1 |
536 | // CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]], |
537 | // CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to [[S_FLOAT_TY]]* |
538 | // var_rhs = (S<float>*)rhs[1]; |
539 | // CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 1 |
540 | // CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]], |
541 | // CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to [[S_FLOAT_TY]]* |
542 | |
543 | // var1_lhs = (S<float>*)lhs[2]; |
544 | // CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 2 |
545 | // CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]], |
546 | // CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to [[S_FLOAT_TY]]* |
547 | // var1_rhs = (S<float>*)rhs[2]; |
548 | // CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 2 |
549 | // CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]], |
550 | // CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to [[S_FLOAT_TY]]* |
551 | |
552 | // t_var1_lhs = (float*)lhs[3]; |
553 | // CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 3 |
554 | // CHECK: [[T_VAR1_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_RHS_REF]], |
555 | // CHECK: [[T_VAR1_RHS:%.+]] = bitcast i8* [[T_VAR1_RHS_VOID]] to float* |
556 | // t_var1_rhs = (float*)rhs[3]; |
557 | // CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 3 |
558 | // CHECK: [[T_VAR1_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_LHS_REF]], |
559 | // CHECK: [[T_VAR1_LHS:%.+]] = bitcast i8* [[T_VAR1_LHS_VOID]] to float* |
560 | |
561 | // t_var_lhs += t_var_rhs; |
562 | // CHECK: [[T_VAR_LHS_VAL:%.+]] = load float, float* [[T_VAR_LHS]], |
563 | // CHECK: [[T_VAR_RHS_VAL:%.+]] = load float, float* [[T_VAR_RHS]], |
564 | // CHECK: [[UP:%.+]] = fadd float [[T_VAR_LHS_VAL]], [[T_VAR_RHS_VAL]] |
565 | // CHECK: store float [[UP]], float* [[T_VAR_LHS]], |
566 | |
567 | // var_lhs = var_lhs.operator &(var_rhs); |
568 | // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_LHS]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_RHS]]) |
569 | // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_LHS]] to i8* |
570 | // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8* |
571 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) |
572 | |
573 | // var1_lhs = var1_lhs.operator &&(var1_rhs); |
574 | // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_LHS]]) |
575 | // CHECK: [[VAR1_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0 |
576 | // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]] |
577 | // CHECK: [[TRUE]] |
578 | // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_RHS]]) |
579 | // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0 |
580 | // CHECK: br label %[[END2]] |
581 | // CHECK: [[END2]] |
582 | // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ] |
583 | // CHECK: [[CONV:%.+]] = uitofp i1 [[COND_LVALUE]] to float |
584 | // CHECK: call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]]) |
585 | // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_LHS]] to i8* |
586 | // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8* |
587 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) |
588 | |
589 | // t_var1_lhs = min(t_var1_lhs, t_var1_rhs); |
590 | // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load float, float* [[T_VAR1_LHS]], |
591 | // CHECK: [[T_VAR1_RHS_VAL:%.+]] = load float, float* [[T_VAR1_RHS]], |
592 | // CHECK: [[CMP:%.+]] = fcmp olt float [[T_VAR1_LHS_VAL]], [[T_VAR1_RHS_VAL]] |
593 | // CHECK: br i1 [[CMP]] |
594 | // CHECK: [[UP:%.+]] = phi float |
595 | // CHECK: store float [[UP]], float* [[T_VAR1_LHS]], |
596 | // CHECK: ret void |
597 | |
598 | // CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, |
599 | // CHECK: [[T_VAR_PRIV:%.+]] = alloca float, |
600 | // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], |
601 | // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_FLOAT_TY]], |
602 | // CHECK: [[T_VAR1_PRIV:%.+]] = alloca float, |
603 | |
604 | // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], |
605 | |
606 | // CHECK: [[T_VAR_REF:%.+]] = load float*, float** % |
607 | // CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** % |
608 | // CHECK: [[VAR1_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** % |
609 | // CHECK: [[T_VAR1_REF:%.+]] = load float*, float** % |
610 | |
611 | // For + reduction operation initial value of private variable is 0. |
612 | // CHECK: store float 0.0{{.+}}, float* [[T_VAR_PRIV]], |
613 | |
614 | // For & reduction operation initial value of private variable is ones in all bits. |
615 | // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) |
616 | |
617 | // For && reduction operation initial value of private variable is 1.0. |
618 | // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR1_PRIV]]) |
619 | |
620 | // For min reduction operation initial value of private variable is largest repesentable value. |
621 | // CHECK: store float 0x47EFFFFFE0000000, float* [[T_VAR1_PRIV]], |
622 | |
623 | // CHECK-NOT: call i32 @__kmpc_reduce |
624 | |
625 | // CHECK: ret void |
626 | |
627 | // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() |
628 | // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], |
629 | // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) |
630 | // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*, [[S_INT_TY]]*, i32*)* [[TMAIN_MICROTASK:@.+]] to void |
631 | // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* |
632 | // CHECK: ret |
633 | // |
634 | // CHECK: define {{.+}} @{{.+}}([[SS_TY]]* |
635 | // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 |
636 | // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* % |
637 | // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1 |
638 | // CHECK: store i8 |
639 | // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2 |
640 | // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 |
641 | // CHECK-NOT: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1 |
642 | // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2 |
643 | // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*, i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*)* [[SS_MICROTASK:@.+]] to void |
644 | // CHECK: [[B_REF:%.+]] = getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1 |
645 | // CHECK: store i8 %{{.+}}, i8* [[B_REF]], |
646 | // CHECK: ret |
647 | |
648 | // CHECK: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* |
649 | // CHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, |
650 | // CHECK: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, |
651 | // CHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, |
652 | // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[A_PRIV]], |
653 | // CHECK: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]], |
654 | // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[B_PRIV]], |
655 | // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[C_PRIV]], |
656 | // CHECK: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]], |
657 | // CHECK: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]], |
658 | // CHECK-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], |
659 | // CHECK-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 |
660 | // CHECK-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], |
661 | // CHECK-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]], |
662 | // CHECK-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1 |
663 | // CHECK-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]], |
664 | // CHECK-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]], |
665 | // CHECK-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]], |
666 | // CHECK-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1 |
667 | // CHECK-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]], |
668 | // CHECK: call i32 @__kmpc_reduce_nowait( |
669 | // CHECK: ret void |
670 | |
671 | // CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, |
672 | // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128 |
673 | // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128 |
674 | // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_INT_TY]], align 128 |
675 | // CHECK: [[T_VAR1_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128 |
676 | |
677 | // Reduction list for runtime. |
678 | // CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*], |
679 | |
680 | // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], |
681 | |
682 | // CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** % |
683 | // CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** % |
684 | // CHECK: [[VAR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** % |
685 | // CHECK: [[T_VAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** % |
686 | |
687 | // For + reduction operation initial value of private variable is 0. |
688 | // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[T_VAR_PRIV]], |
689 | |
690 | // For & reduction operation initial value of private variable is ones in all bits. |
691 | // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[VAR_PRIV]]) |
692 | |
693 | // For && reduction operation initial value of private variable is 1.0. |
694 | // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[VAR1_PRIV]]) |
695 | |
696 | // For min reduction operation initial value of private variable is largest repesentable value. |
697 | // CHECK: store i{{[0-9]+}} 2147483647, i{{[0-9]+}}* [[T_VAR1_PRIV]], |
698 | |
699 | // Skip checks for internal operations. |
700 | |
701 | // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; |
702 | |
703 | // CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 0 |
704 | // CHECK: [[BITCAST:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR_PRIV]] to i8* |
705 | // CHECK: store i8* [[BITCAST]], i8** [[T_VAR_PRIV_REF]], |
706 | // CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 1 |
707 | // CHECK: [[BITCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_PRIV]] to i8* |
708 | // CHECK: store i8* [[BITCAST]], i8** [[VAR_PRIV_REF]], |
709 | // CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 2 |
710 | // CHECK: [[BITCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_PRIV]] to i8* |
711 | // CHECK: store i8* [[BITCAST]], i8** [[VAR1_PRIV_REF]], |
712 | // CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 3 |
713 | // CHECK: [[BITCAST:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR1_PRIV]] to i8* |
714 | // CHECK: store i8* [[BITCAST]], i8** [[T_VAR1_PRIV_REF]], |
715 | |
716 | // res = __kmpc_reduce_nowait(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>); |
717 | |
718 | // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]] |
719 | // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] |
720 | // CHECK: [[BITCAST:%.+]] = bitcast [4 x i8*]* [[RED_LIST]] to i8* |
721 | // CHECK: [[RES:%.+]] = call i32 @__kmpc_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], i32 4, i64 32, i8* [[BITCAST]], void (i8*, i8*)* [[REDUCTION_FUNC:@.+]], [8 x i32]* [[REDUCTION_LOCK]]) |
722 | |
723 | // switch(res) |
724 | // CHECK: switch i32 [[RES]], label %[[RED_DONE:.+]] [ |
725 | // CHECK: i32 1, label %[[CASE1:.+]] |
726 | // CHECK: i32 2, label %[[CASE2:.+]] |
727 | // CHECK: ] |
728 | |
729 | // case 1: |
730 | // t_var += t_var_reduction; |
731 | // CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]], |
732 | // CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], |
733 | // CHECK: [[UP:%.+]] = add nsw i{{[0-9]+}} [[T_VAR_VAL]], [[T_VAR_PRIV_VAL]] |
734 | // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR_REF]], |
735 | |
736 | // var = var.operator &(var_reduction); |
737 | // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]]) |
738 | // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8* |
739 | // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8* |
740 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) |
741 | |
742 | // var1 = var1.operator &&(var1_reduction); |
743 | // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_REF]]) |
744 | // CHECK: [[VAR1_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0 |
745 | // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]] |
746 | // CHECK: [[TRUE]] |
747 | // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_PRIV]]) |
748 | // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0 |
749 | // CHECK: [[END2]] |
750 | // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ] |
751 | // CHECK: [[CONV:%.+]] = zext i1 [[COND_LVALUE]] to i32 |
752 | // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]]) |
753 | // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8* |
754 | // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8* |
755 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) |
756 | |
757 | // t_var1 = min(t_var1, t_var1_reduction); |
758 | // CHECK: [[T_VAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_REF]], |
759 | // CHECK: [[T_VAR1_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_PRIV]], |
760 | // CHECK: [[CMP:%.+]] = icmp slt i{{[0-9]+}} [[T_VAR1_VAL]], [[T_VAR1_PRIV_VAL]] |
761 | // CHECK: br i1 [[CMP]] |
762 | // CHECK: [[UP:%.+]] = phi i32 |
763 | // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR1_REF]], |
764 | |
765 | // __kmpc_end_reduce_nowait(<loc>, <gtid>, &<lock>); |
766 | // CHECK: call void @__kmpc_end_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], [8 x i32]* [[REDUCTION_LOCK]]) |
767 | |
768 | // break; |
769 | // CHECK: br label %[[RED_DONE]] |
770 | |
771 | // case 2: |
772 | // t_var += t_var_reduction; |
773 | // CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]] |
774 | // CHECK: atomicrmw add i32* [[T_VAR_REF]], i32 [[T_VAR_PRIV_VAL]] monotonic |
775 | |
776 | // var = var.operator &(var_reduction); |
777 | // CHECK: call void @__kmpc_critical( |
778 | // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]]) |
779 | // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8* |
780 | // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8* |
781 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) |
782 | // CHECK: call void @__kmpc_end_critical( |
783 | |
784 | // var1 = var1.operator &&(var1_reduction); |
785 | // CHECK: call void @__kmpc_critical( |
786 | // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_REF]]) |
787 | // CHECK: [[VAR1_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0 |
788 | // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]] |
789 | // CHECK: [[TRUE]] |
790 | // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_PRIV]]) |
791 | // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0 |
792 | // CHECK: br label %[[END2]] |
793 | // CHECK: [[END2]] |
794 | // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ] |
795 | // CHECK: [[CONV:%.+]] = zext i1 [[COND_LVALUE]] to i32 |
796 | // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]]) |
797 | // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8* |
798 | // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8* |
799 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) |
800 | // CHECK: call void @__kmpc_end_critical( |
801 | |
802 | // t_var1 = min(t_var1, t_var1_reduction); |
803 | // CHECK: [[T_VAR1_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_PRIV]] |
804 | // CHECK: atomicrmw min i32* [[T_VAR1_REF]], i32 [[T_VAR1_PRIV_VAL]] monotonic |
805 | |
806 | // break; |
807 | // CHECK: br label %[[RED_DONE]] |
808 | // CHECK: [[RED_DONE]] |
809 | |
810 | // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]]) |
811 | // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* |
812 | // CHECK: ret void |
813 | |
814 | // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { |
815 | // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); |
816 | // ... |
817 | // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], |
818 | // *(Type<n>-1*)rhs[<n>-1]); |
819 | // } |
820 | // CHECK: define internal void [[REDUCTION_FUNC]](i8*, i8*) |
821 | // t_var_lhs = (i{{[0-9]+}}*)lhs[0]; |
822 | // CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i64 0, i64 0 |
823 | // CHECK: [[T_VAR_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR_RHS_REF]], |
824 | // CHECK: [[T_VAR_RHS:%.+]] = bitcast i8* [[T_VAR_RHS_VOID]] to i{{[0-9]+}}* |
825 | // t_var_rhs = (i{{[0-9]+}}*)rhs[0]; |
826 | // CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i64 0, i64 0 |
827 | // CHECK: [[T_VAR_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR_LHS_REF]], |
828 | // CHECK: [[T_VAR_LHS:%.+]] = bitcast i8* [[T_VAR_LHS_VOID]] to i{{[0-9]+}}* |
829 | |
830 | // var_lhs = (S<i{{[0-9]+}}>*)lhs[1]; |
831 | // CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 1 |
832 | // CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]], |
833 | // CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to [[S_INT_TY]]* |
834 | // var_rhs = (S<i{{[0-9]+}}>*)rhs[1]; |
835 | // CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 1 |
836 | // CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]], |
837 | // CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to [[S_INT_TY]]* |
838 | |
839 | // var1_lhs = (S<i{{[0-9]+}}>*)lhs[2]; |
840 | // CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 2 |
841 | // CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]], |
842 | // CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to [[S_INT_TY]]* |
843 | // var1_rhs = (S<i{{[0-9]+}}>*)rhs[2]; |
844 | // CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 2 |
845 | // CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]], |
846 | // CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to [[S_INT_TY]]* |
847 | |
848 | // t_var1_lhs = (i{{[0-9]+}}*)lhs[3]; |
849 | // CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 3 |
850 | // CHECK: [[T_VAR1_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_RHS_REF]], |
851 | // CHECK: [[T_VAR1_RHS:%.+]] = bitcast i8* [[T_VAR1_RHS_VOID]] to i{{[0-9]+}}* |
852 | // t_var1_rhs = (i{{[0-9]+}}*)rhs[3]; |
853 | // CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 3 |
854 | // CHECK: [[T_VAR1_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_LHS_REF]], |
855 | // CHECK: [[T_VAR1_LHS:%.+]] = bitcast i8* [[T_VAR1_LHS_VOID]] to i{{[0-9]+}}* |
856 | |
857 | // t_var_lhs += t_var_rhs; |
858 | // CHECK: [[T_VAR_LHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_LHS]], |
859 | // CHECK: [[T_VAR_RHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_RHS]], |
860 | // CHECK: [[UP:%.+]] = add nsw i{{[0-9]+}} [[T_VAR_LHS_VAL]], [[T_VAR_RHS_VAL]] |
861 | // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR_LHS]], |
862 | |
863 | // var_lhs = var_lhs.operator &(var_rhs); |
864 | // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_LHS]], [[S_INT_TY]]* dereferenceable(4) [[VAR_RHS]]) |
865 | // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_LHS]] to i8* |
866 | // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8* |
867 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) |
868 | |
869 | // var1_lhs = var1_lhs.operator &&(var1_rhs); |
870 | // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_LHS]]) |
871 | // CHECK: [[VAR1_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0 |
872 | // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]] |
873 | // CHECK: [[TRUE]] |
874 | // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_RHS]]) |
875 | // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0 |
876 | // CHECK: br label %[[END2]] |
877 | // CHECK: [[END2]] |
878 | // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ] |
879 | // CHECK: [[CONV:%.+]] = zext i1 [[COND_LVALUE]] to i32 |
880 | // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]]) |
881 | // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_LHS]] to i8* |
882 | // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8* |
883 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) |
884 | |
885 | // t_var1_lhs = min(t_var1_lhs, t_var1_rhs); |
886 | // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_LHS]], |
887 | // CHECK: [[T_VAR1_RHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_RHS]], |
888 | // CHECK: [[CMP:%.+]] = icmp slt i{{[0-9]+}} [[T_VAR1_LHS_VAL]], [[T_VAR1_RHS_VAL]] |
889 | // CHECK: br i1 [[CMP]] |
890 | // CHECK: [[UP:%.+]] = phi i32 |
891 | // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR1_LHS]], |
892 | // CHECK: ret void |
893 | |
894 | #endif |
895 | |
896 | |