simd_codegen.cpp source code [clang_source_code/test/OpenMP/simd

1	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - \| FileCheck %s
2	// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
3	// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - \| FileCheck %s
4	// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - \| FileCheck %s --check-prefix=TERM_DEBUG
5
6	// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - \| FileCheck %s
7	// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
8	// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - \| FileCheck %s
9	// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - \| FileCheck --check-prefix=TERM_DEBUG %s
10	// expected-no-diagnostics
11	#ifndef HEADER
12	#define HEADER
13
14	// CHECK: [[SS_TY:%.+]] = type { i32 }
15
16	long long get_val() { return 0; }
17	double *g_ptr;
18
19	// CHECK-LABEL: define {{.void}} @{{.}}simple{{.}}(float {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
20	void simple(float a, float b, float c, float d) {
21	#pragma omp simd
22	// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
23
24	// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
25	// CHECK-NEXT: [[CMP:%.+]] = icmp slt i32 [[IV]], 6
26	// CHECK-NEXT: br i1 [[CMP]], label %[[SIMPLE_LOOP1_BODY:.+]], label %[[SIMPLE_LOOP1_END:[^,]+]]
27	for (int i = 3; i < 32; i += 5) {
28	// CHECK: [[SIMPLE_LOOP1_BODY]]:
29	// Start of body: calculate i from IV:
30	// CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
31	// CHECK: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 5
32	// CHECK-NEXT: [[CALC_I_2:%.+]] = add nsw i32 3, [[CALC_I_1]]
33	// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]{{.*}}!llvm.access.group
34	// ... loop body ...
35	// End of body: store into a[i]:
36	// CHECK: store float [[RESULT:%.+]], float* {{%.+}}{{.*}}!llvm.access.group
37	a[i] = b[i] * c[i] * d[i];
38	// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
39	// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
40	// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]{{.*}}!llvm.access.group
41	// br label %{{.+}}, !llvm.loop !{{.+}}
42	}
43	// CHECK: [[SIMPLE_LOOP1_END]]:
44
45	long long k = get_val();
46
47	#pragma omp simd linear(k : 3)
48	// CHECK: [[K0:%.+]] = call {{.}}i64 @{{.}}get_val
49	// CHECK-NEXT: store i64 [[K0]], i64* [[K_VAR:%[^,]+]]
50	// CHECK: store i32 0, i32* [[OMP_IV2:%[^,]+]]
51	// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_VAR]]
52	// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
53
54	// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.access.group
55	// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV2]], 9
56	// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP2_BODY:.+]], label %[[SIMPLE_LOOP2_END:[^,]+]]
57	for (int i = 10; i > 1; i--) {
58	// CHECK: [[SIMPLE_LOOP2_BODY]]:
59	// Start of body: calculate i from IV:
60	// CHECK: [[IV2_0:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.access.group
61	// FIXME: It is interesting, why the following "mul 1" was not constant folded?
62	// CHECK-NEXT: [[IV2_1:%.+]] = mul nsw i32 [[IV2_0]], 1
63	// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV2_1]]
64	// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.access.group
65	//
66	// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.access.group
67	// CHECK-NEXT: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.access.group
68	// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV2_2]], 3
69	// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
70	// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
71	// Update of the privatized version of linear variable!
72	// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
73	a[k]++;
74	k = k + 3;
75	// CHECK: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.access.group
76	// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV2_2]], 1
77	// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV2]]{{.*}}!llvm.access.group
78	// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP2_ID]]
79	}
80	// CHECK: [[SIMPLE_LOOP2_END]]:
81	//
82	// Update linear vars after loop, as the loop was operating on a private version.
83	// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
84	// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
85	// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_VAR]]
86	//
87
88	int lin = 12;
89	#pragma omp simd linear(lin : get_val()), linear(g_ptr)
90
91	// Init linear private var.
92	// CHECK: store i32 12, i32* [[LIN_VAR:%[^,]+]]
93	// CHECK: store i64 0, i64* [[OMP_IV3:%[^,]+]]
94
95	// CHECK: [[LIN_LOAD:%.+]] = load i32, i32* [[LIN_VAR]]
96	// CHECK-NEXT: store i32 [[LIN_LOAD]], i32* [[LIN_START:%[^,]+]]
97	// Remember linear step.
98	// CHECK: [[CALL_VAL:%.+]] = invoke
99	// CHECK: store i64 [[CALL_VAL]], i64* [[LIN_STEP:%[^,]+]]
100
101	// CHECK: [[GLIN_LOAD:%.+]] = load double, double* [[GLIN_VAR:@[^,]+]]
102	// CHECK-NEXT: store double* [[GLIN_LOAD]], double** [[GLIN_START:%[^,]+]]
103
104	// CHECK: [[IV3:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.access.group
105	// CHECK-NEXT: [[CMP3:%.+]] = icmp ult i64 [[IV3]], 4
106	// CHECK-NEXT: br i1 [[CMP3]], label %[[SIMPLE_LOOP3_BODY:.+]], label %[[SIMPLE_LOOP3_END:[^,]+]]
107	for (unsigned long long it = 2000; it >= 600; it-=400) {
108	// CHECK: [[SIMPLE_LOOP3_BODY]]:
109	// Start of body: calculate it from IV:
110	// CHECK: [[IV3_0:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.access.group
111	// CHECK-NEXT: [[LC_IT_1:%.+]] = mul i64 [[IV3_0]], 400
112	// CHECK-NEXT: [[LC_IT_2:%.+]] = sub i64 2000, [[LC_IT_1]]
113	// CHECK-NEXT: store i64 [[LC_IT_2]], i64* {{.+}}, !llvm.access.group
114	//
115	// Linear start and step are used to calculate current value of the linear variable.
116	// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]{{.*}}!llvm.access.group
117	// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]{{.*}}!llvm.access.group
118	// CHECK-NOT: store i32 {{.+}}, i32* [[LIN_VAR]],{{.*}}!llvm.access.group
119	// CHECK: [[GLINSTART:.+]] = load double, double* [[GLIN_START]]{{.*}}!llvm.access.group
120	// CHECK-NEXT: [[IV3_1:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.access.group
121	// CHECK-NEXT: [[MUL:%.+]] = mul i64 [[IV3_1]], 1
122	// CHECK: [[GEP:%.+]] = getelementptr{{.*}}[[GLINSTART]]
123	// CHECK-NEXT: store double* [[GEP]], double** [[G_PTR_CUR:%[^,]+]]{{.*}}!llvm.access.group
124	*g_ptr++ = 0.0;
125	// CHECK: [[GEP_VAL:%.+]] = load double{{.}}[[G_PTR_CUR]]{{.}}!llvm.access.group
126	// CHECK: store double{{.}}[[GEP_VAL]]{{.}}!llvm.access.group
127	a[it + lin]++;
128	// CHECK: [[FLT_INC:%.+]] = fadd float
129	// CHECK-NEXT: store float [[FLT_INC]],{{.*}}!llvm.access.group
130	// CHECK: [[IV3_2:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.access.group
131	// CHECK-NEXT: [[ADD3_2:%.+]] = add i64 [[IV3_2]], 1
132	// CHECK-NEXT: store i64 [[ADD3_2]], i64* [[OMP_IV3]]{{.*}}!llvm.access.group
133	}
134	// CHECK: [[SIMPLE_LOOP3_END]]:
135	//
136	// Linear start and step are used to calculate final value of the linear variables.
137	// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]
138	// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]
139	// CHECK: store i32 {{.+}}, i32* [[LIN_VAR]],
140	// CHECK: [[GLINSTART:.+]] = load double, double* [[GLIN_START]]
141	// CHECK: store double* {{.*}}[[GLIN_VAR]]
142
143	#pragma omp simd
144	// CHECK: store i32 0, i32* [[OMP_IV4:%[^,]+]]
145
146	// CHECK: [[IV4:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.access.group
147	// CHECK-NEXT: [[CMP4:%.+]] = icmp slt i32 [[IV4]], 4
148	// CHECK-NEXT: br i1 [[CMP4]], label %[[SIMPLE_LOOP4_BODY:.+]], label %[[SIMPLE_LOOP4_END:[^,]+]]
149	for (short it = 6; it <= 20; it-=-4) {
150	// CHECK: [[SIMPLE_LOOP4_BODY]]:
151	// Start of body: calculate it from IV:
152	// CHECK: [[IV4_0:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.access.group
153	// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i32 [[IV4_0]], 4
154	// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i32 6, [[LC_IT_1]]
155	// CHECK-NEXT: [[LC_IT_3:%.+]] = trunc i32 [[LC_IT_2]] to i16
156	// CHECK-NEXT: store i16 [[LC_IT_3]], i16* {{.+}}, !llvm.access.group
157
158	// CHECK: [[IV4_2:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.access.group
159	// CHECK-NEXT: [[ADD4_2:%.+]] = add nsw i32 [[IV4_2]], 1
160	// CHECK-NEXT: store i32 [[ADD4_2]], i32* [[OMP_IV4]]{{.*}}!llvm.access.group
161	}
162	// CHECK: [[SIMPLE_LOOP4_END]]:
163
164	#pragma omp simd
165	// CHECK: store i32 0, i32* [[OMP_IV5:%[^,]+]]
166
167	// CHECK: [[IV5:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.access.group
168	// CHECK-NEXT: [[CMP5:%.+]] = icmp slt i32 [[IV5]], 26
169	// CHECK-NEXT: br i1 [[CMP5]], label %[[SIMPLE_LOOP5_BODY:.+]], label %[[SIMPLE_LOOP5_END:[^,]+]]
170	for (unsigned char it = 'z'; it >= 'a'; it+=-1) {
171	// CHECK: [[SIMPLE_LOOP5_BODY]]:
172	// Start of body: calculate it from IV:
173	// CHECK: [[IV5_0:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.access.group
174	// CHECK-NEXT: [[IV5_1:%.+]] = mul nsw i32 [[IV5_0]], 1
175	// CHECK-NEXT: [[LC_IT_1:%.+]] = sub nsw i32 122, [[IV5_1]]
176	// CHECK-NEXT: [[LC_IT_2:%.+]] = trunc i32 [[LC_IT_1]] to i8
177	// CHECK-NEXT: store i8 [[LC_IT_2]], i8* {{.+}}, !llvm.access.group
178
179	// CHECK: [[IV5_2:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.access.group
180	// CHECK-NEXT: [[ADD5_2:%.+]] = add nsw i32 [[IV5_2]], 1
181	// CHECK-NEXT: store i32 [[ADD5_2]], i32* [[OMP_IV5]]{{.*}}!llvm.access.group
182	}
183	// CHECK: [[SIMPLE_LOOP5_END]]:
184
185	// CHECK-NOT: mul i32 %{{.+}}, 10
186	#pragma omp simd
187	for (unsigned i=100; i<10; i+=10) {
188	}
189
190	int A;
191	// CHECK: store i32 -1, i32* [[A:%.+]],
192	A = -1;
193	#pragma omp simd lastprivate(A)
194	// CHECK: store i64 0, i64* [[OMP_IV7:%[^,]+]]
195	// CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]]
196	// CHECK: [[SIMD_LOOP7_COND]]:
197	// CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.access.group
198	// CHECK-NEXT: [[CMP7:%.+]] = icmp slt i64 [[IV7]], 7
199	// CHECK-NEXT: br i1 [[CMP7]], label %[[SIMPLE_LOOP7_BODY:.+]], label %[[SIMPLE_LOOP7_END:[^,]+]]
200	for (long long i = -10; i < 10; i += 3) {
201	// CHECK: [[SIMPLE_LOOP7_BODY]]:
202	// Start of body: calculate i from IV:
203	// CHECK: [[IV7_0:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.access.group
204	// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV7_0]], 3
205	// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
206	// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.access.group
207	// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.access.group
208	// CHECK-NEXT: [[CONV:%.+]] = trunc i64 [[LC_VAL]] to i32
209	// CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]],{{.+}}!llvm.access.group
210	A = i;
211	// CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.access.group
212	// CHECK-NEXT: [[ADD7_2:%.+]] = add nsw i64 [[IV7_2]], 1
213	// CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]]{{.*}}!llvm.access.group
214	}
215	// CHECK: [[SIMPLE_LOOP7_END]]:
216	// CHECK-NEXT: store i64 11, i64*
217	// CHECK-NEXT: [[A_PRIV_VAL:%.+]] = load i32, i32* [[A_PRIV]],
218	// CHECK-NEXT: store i32 [[A_PRIV_VAL]], i32* [[A]],
219	int R;
220	// CHECK: store i32 -1, i32* [[R:%[^,]+]],
221	R = -1;
222	// CHECK: store i64 0, i64* [[OMP_IV8:%[^,]+]],
223	// CHECK: store i32 1, i32* [[R_PRIV:%[^,]+]],
224	#pragma omp simd reduction(*:R)
225	// CHECK: br label %[[SIMD_LOOP8_COND:[^,]+]]
226	// CHECK: [[SIMD_LOOP8_COND]]:
227	// CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.access.group
228	// CHECK-NEXT: [[CMP8:%.+]] = icmp slt i64 [[IV8]], 7
229	// CHECK-NEXT: br i1 [[CMP8]], label %[[SIMPLE_LOOP8_BODY:.+]], label %[[SIMPLE_LOOP8_END:[^,]+]]
230	for (long long i = -10; i < 10; i += 3) {
231	// CHECK: [[SIMPLE_LOOP8_BODY]]:
232	// Start of body: calculate i from IV:
233	// CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.access.group
234	// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV8_0]], 3
235	// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
236	// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.access.group
237	// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.access.group
238	// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],{{.+}}!llvm.access.group
239	R *= i;
240	// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.access.group
241	// CHECK-NEXT: [[ADD8_2:%.+]] = add nsw i64 [[IV8_2]], 1
242	// CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]]{{.*}}!llvm.access.group
243	}
244	// CHECK: [[SIMPLE_LOOP8_END]]:
245	// CHECK-DAG: [[R_VAL:%.+]] = load i32, i32* [[R]],
246	// CHECK-DAG: [[R_PRIV_VAL:%.+]] = load i32, i32* [[R_PRIV]],
247	// CHECK: [[RED:%.+]] = mul nsw i32 [[R_VAL]], [[R_PRIV_VAL]]
248	// CHECK-NEXT: store i32 [[RED]], i32* [[R]],
249	// CHECK-NEXT: ret void
250	}
251
252	template <class T, unsigned K> T tfoo(T a) { return a + K; }
253
254	template <typename T, unsigned N>
255	int templ1(T a, T *z) {
256	#pragma omp simd collapse(N)
257	for (int i = 0; i < N * 2; i++) {
258	for (long long j = 0; j < (N + N + N + N); j += 2) {
259	z[i + j] = a + tfoo<T, N>(i + j);
260	}
261	}
262	return 0;
263	}
264
265	// Instatiation templ1<float,2>
266	// CHECK-LABEL: define {{.i32}} @{{.}}templ1{{.}}(float {{.+}}, float {{.+}})
267	// CHECK: store i64 0, i64* [[T1_OMP_IV:[^,]+]]
268	// ...
269	// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group
270	// CHECK-NEXT: [[CMP1:%.+]] = icmp slt i64 [[IV]], 16
271	// CHECK-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]]
272	// CHECK: [[T1_BODY]]:
273	// Loop counters i and j updates:
274	// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group
275	// CHECK-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4
276	// CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1
277	// CHECK-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]]
278	// CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32
279	// CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.access.group
280	// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group
281	// CHECK: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group
282	// CHECK-NEXT: [[J_1_DIV1:%.+]] = sdiv i64 [[IV2_1]], 4
283	// CHECK-NEXT: [[J_1_MUL1:%.+]] = mul nsw i64 [[J_1_DIV1]], 4
284	// CHECK-NEXT: [[J_1_SUB0:%.+]] = sub nsw i64 [[IV2]], [[J_1_MUL1]]
285	// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1_SUB0]], 2
286	// CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]]
287	// CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.access.group
288	// simd.for.inc:
289	// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group
290	// CHECK-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1
291	// CHECK-NEXT: store i64 [[INC]], i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group
292	// CHECK-NEXT: br label {{%.+}}
293	// CHECK: [[T1_END]]:
294	// CHECK: ret i32 0
295	//
296	void inst_templ1() {
297	float a;
298	float z[100];
299	templ1<float,2> (a, z);
300	}
301
302
303	typedef int MyIdx;
304
305	class IterDouble {
306	double *Ptr;
307	public:
308	IterDouble operator++ () const {
309	IterDouble n;
310	n.Ptr = Ptr + 1;
311	return n;
312	}
313	bool operator < (const IterDouble &that) const {
314	return Ptr < that.Ptr;
315	}
316	double & operator *() const {
317	return *Ptr;
318	}
319	MyIdx operator - (const IterDouble &that) const {
320	return (MyIdx) (Ptr - that.Ptr);
321	}
322	IterDouble operator + (int Delta) {
323	IterDouble re;
324	re.Ptr = Ptr + Delta;
325	return re;
326	}
327
328	///~IterDouble() {}
329	};
330
331	// CHECK-LABEL: define {{.void}} @{{.}}iter_simple{{.*}}
332	void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) {
333	//
334	// Calculate number of iterations before the loop body.
335	// CHECK: [[DIFF1:%.+]] = invoke {{.}}i32 @{{.}}IterDouble{{.*}}
336	// CHECK: [[DIFF2:%.+]] = sub nsw i32 [[DIFF1]], 1
337	// CHECK-NEXT: [[DIFF3:%.+]] = add nsw i32 [[DIFF2]], 1
338	// CHECK-NEXT: [[DIFF4:%.+]] = sdiv i32 [[DIFF3]], 1
339	// CHECK-NEXT: [[DIFF5:%.+]] = sub nsw i32 [[DIFF4]], 1
340	// CHECK-NEXT: store i32 [[DIFF5]], i32* [[OMP_LAST_IT:%[^,]+]]{{.+}}
341	// CHECK: store i32 0, i32* [[IT_OMP_IV:%[^,]+]]
342	#pragma omp simd
343
344	// CHECK: [[IV:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}} !llvm.access.group
345	// CHECK-NEXT: [[LAST_IT:%.+]] = load i32, i32* [[OMP_LAST_IT]]{{.+}}!llvm.access.group
346	// CHECK-NEXT: [[NUM_IT:%.+]] = add nsw i32 [[LAST_IT]], 1
347	// CHECK-NEXT: [[CMP:%.+]] = icmp slt i32 [[IV]], [[NUM_IT]]
348	// CHECK-NEXT: br i1 [[CMP]], label %[[IT_BODY:[^,]+]], label %[[IT_END:[^,]+]]
349	for (IterDouble i = ia; i < ib; ++i) {
350	// CHECK: [[IT_BODY]]:
351	// Start of body: calculate i from index:
352	// CHECK: [[IV1:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.access.group
353	// Call of operator+ (i, IV).
354	// CHECK: {{%.+}} = invoke {{.+}} @{{.}}IterDouble{{.}}
355	// ... loop body ...
356	i = ic * 0.5;
357	// Float multiply and save result.
358	// CHECK: [[MULR:%.+]] = fmul double {{%.+}}, 5.000000e-01
359	// CHECK-NEXT: invoke {{.+}} @{{.}}IterDouble{{.}}
360	// CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]], !llvm.access.group
361	++ic;
362	//
363	// CHECK: [[IV2:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.access.group
364	// CHECK-NEXT: [[ADD2:%.+]] = add nsw i32 [[IV2]], 1
365	// CHECK-NEXT: store i32 [[ADD2]], i32* [[IT_OMP_IV]]{{.+}}!llvm.access.group
366	// br label %{{.*}}, !llvm.loop ![[ITER_LOOP_ID]]
367	}
368	// CHECK: [[IT_END]]:
369	// CHECK: ret void
370	}
371
372
373	// CHECK-LABEL: define {{.void}} @{{.}}collapsed{{.*}}
374	void collapsed(float a, float b, float c, float d) {
375	int i; // outer loop counter
376	unsigned j; // middle loop couter, leads to unsigned icmp in loop header.
377	// k declared in the loop init below
378	short l; // inner loop counter
379	// CHECK: store i32 0, i32* [[OMP_IV:[^,]+]]
380	//
381	#pragma omp simd collapse(4)
382
383	// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
384	// CHECK-NEXT: [[CMP:%.+]] = icmp ult i32 [[IV]], 120
385	// CHECK-NEXT: br i1 [[CMP]], label %[[COLL1_BODY:[^,]+]], label %[[COLL1_END:[^,]+]]
386	for (i = 1; i < 3; i++) // 2 iterations
387	for (j = 2u; j < 5u; j++) //3 iterations
388	for (int k = 3; k <= 6; k++) // 4 iterations
389	for (l = 4; l < 9; ++l) // 5 iterations
390	{
391	// CHECK: [[COLL1_BODY]]:
392	// Start of body: calculate i from index:
393	// CHECK: [[IV1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
394	// Calculation of the loop counters values.
395	// CHECK: [[CALC_I_1:%.+]] = udiv i32 [[IV1]], 60
396	// CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1
397	// CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]]
398	// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
399
400	// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
401	// CHECK: [[IV1_2_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
402	// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2_1]], 60
403	// CHECK-NEXT: [[MUL_1:%.+]] = mul i32 [[CALC_J_1]], 60
404	// CHECK-NEXT: [[SUB_3:%.+]] = sub i32 [[IV1_2]], [[MUL_1]]
405	// CHECK-NEXT: [[CALC_J_2:%.+]] = udiv i32 [[SUB_3]], 20
406	// CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1
407	// CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]]
408	// CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]]
409
410	// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
411	// CHECK: [[IV1_3_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
412	// CHECK-NEXT: [[DIV_1:%.+]] = udiv i32 [[IV1_3_1]], 60
413	// CHECK-NEXT: [[MUL_2:%.+]] = mul i32 [[DIV_1]], 60
414	// CHECK-NEXT: [[ADD_3:%.+]] = sub i32 [[IV1_3]], [[MUL_2]]
415
416	// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]
417	// CHECK: [[IV1_4_1:%.+]] = load i32, i32* [[OMP_IV]]
418	// CHECK-NEXT: [[DIV_2:%.+]] = udiv i32 [[IV1_4_1]], 60
419	// CHECK-NEXT: [[MUL_3:%.+]] = mul i32 [[DIV_2]], 60
420	// CHECK-NEXT: [[SUB_6:%.+]] = sub i32 [[IV1_4]], [[MUL_3]]
421	// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[SUB_6]], 20
422	// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 20
423	// CHECK-NEXT: [[ADD_5:%.+]] = sub i32 [[ADD_3]], [[MUL_4]]
424	// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[ADD_5]], 5
425	// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 1
426	// CHECK-NEXT: [[ADD_6:%.+]] = add i32 3, [[MUL_5]]
427	// CHECK-NEXT: store i32 [[ADD_6]], i32* [[LC_K:.+]]
428
429	// CHECK: [[IV1_5:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
430	// CHECK: [[IV1_5_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group
431	// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[IV1_5_1]], 60
432	// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 60
433	// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_5]], [[MUL_6]]
434
435	// CHECK: [[IV1_6:%.+]] = load i32, i32* [[OMP_IV]]
436	// CHECK: [[IV1_6_1:%.+]] = load i32, i32* [[OMP_IV]]
437	// CHECK-NEXT: [[DIV_6:%.+]] = udiv i32 [[IV1_6_1]], 60
438	// CHECK-NEXT: [[MUL_7:%.+]] = mul i32 [[DIV_6]], 60
439	// CHECK-NEXT: [[SUB_10:%.+]] = sub i32 [[IV1_6]], [[MUL_7]]
440	// CHECK-NEXT: [[DIV_7:%.+]] = udiv i32 [[SUB_10]], 20
441	// CHECK-NEXT: [[MUL_8:%.+]] = mul i32 [[DIV_7]], 20
442	// CHECK-NEXT: [[SUB_11:%.+]] = sub i32 [[SUB_7]], [[MUL_8]]
443
444	// CHECK: [[IV1_7:%.+]] = load i32, i32* [[OMP_IV]]
445	// CHECK: [[IV1_7_1:%.+]] = load i32, i32* [[OMP_IV]]
446	// CHECK-NEXT: [[DIV_8:%.+]] = udiv i32 [[IV1_7_1]], 60
447	// CHECK-NEXT: [[MUL_9:%.+]] = mul i32 [[DIV_8]], 60
448	// CHECK-NEXT: [[SUB_12:%.+]] = sub i32 [[IV1_7]], [[MUL_9]]
449
450	// CHECK: [[IV1_8:%.+]] = load i32, i32* [[OMP_IV]]
451	// CHECK: [[IV1_8_1:%.+]] = load i32, i32* [[OMP_IV]]
452	// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[IV1_8_1]], 60
453	// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 60
454	// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_8]], [[MUL_4]]
455	// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[SUB_7]], 20
456	// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 20
457	// CHECK-NEXT: [[SUB_8:%.+]] = sub i32 [[SUB_12]], [[MUL_5]]
458	// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[SUB_8]], 5
459	// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 5
460	// CHECK-NEXT: [[SUB_9:%.+]] = sub i32 [[SUB_11]], [[MUL_6]]
461	// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[SUB_9]], 1
462	// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[MUL_6]]
463	// CHECK-NEXT: [[CALC_L_3:%.+]] = trunc i32 [[CALC_L_2]] to i16
464	// CHECK-NEXT: store i16 [[CALC_L_3]], i16* [[LC_L:.+]]
465	// ... loop body ...
466	// End of body: store into a[i]:
467	// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.access.group
468	float res = b[j] * c[k];
469	a[i] = res * d[l];
470	// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
471	// CHECK-NEXT: [[ADD2:%.+]] = add i32 [[IV2]], 1
472	// CHECK-NEXT: store i32 [[ADD2]], i32* [[OMP_IV]]{{.*}}!llvm.access.group
473	// br label %{{[^,]+}}, !llvm.loop ![[COLL1_LOOP_ID]]
474	// CHECK: [[COLL1_END]]:
475	}
476	// i,j,l are updated; k is not updated.
477	// CHECK: store i32 3, i32*
478	// CHECK-NEXT: store i32 5, i32*
479	// CHECK-NEXT: store i32 7, i32*
480	// CHECK-NEXT: store i16 9, i16*
481	// CHECK: ret void
482	}
483
484	extern char foo();
485	extern double globalfloat;
486
487	// CHECK-LABEL: define {{.void}} @{{.}}widened{{.*}}
488	void widened(float a, float b, float c, float d) {
489	int i; // outer loop counter
490	short j; // inner loop counter
491	globalfloat = 1.0;
492	int localint = 1;
493	// CHECK: store double {{.+}}, double* [[GLOBALFLOAT:@.+]]
494	// Counter is widened to 64 bits.
495	// CHECK: store i64 0, i64* [[OMP_IV:[^,]+]]
496	//
497	#pragma omp simd collapse(2) private(globalfloat, localint)
498
499	// CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.access.group
500	// CHECK-NEXT: [[LI:%.+]] = load i64, i64* [[OMP_LI:%[^,]+]]{{.+}}!llvm.access.group
501	// CHECK-NEXT: [[NUMIT:%.+]] = add nsw i64 [[LI]], 1
502	// CHECK-NEXT: [[CMP:%.+]] = icmp slt i64 [[IV]], [[NUMIT]]
503	// CHECK-NEXT: br i1 [[CMP]], label %[[WIDE1_BODY:[^,]+]], label %[[WIDE1_END:[^,]+]]
504	for (i = 1; i < 3; i++) // 2 iterations
505	for (j = 0; j < foo(); j++) // foo() iterations
506	{
507	// CHECK: [[WIDE1_BODY]]:
508	// Start of body: calculate i from index:
509	// CHECK: [[IV1:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.access.group
510	// Calculation of the loop counters values...
511	// CHECK: store i32 {{[^,]+}}, i32* [[LC_I:.+]]
512	// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.access.group
513	// CHECK: store i16 {{[^,]+}}, i16* [[LC_J:.+]]
514	// ... loop body ...
515	//
516	// Here we expect store into private double var, not global
517	// CHECK-NOT: store double {{.+}}, double* [[GLOBALFLOAT]]
518	globalfloat = (float)j/i;
519	float res = b[j] * c[j];
520	// Store into a[i]:
521	// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.access.group
522	a[i] = res * d[i];
523	// Then there's a store into private var localint:
524	// CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]]{{.+}}!llvm.access.group
525	localint = (int)j;
526	// CHECK: [[IV2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}!llvm.access.group
527	// CHECK-NEXT: [[ADD2:%.+]] = add nsw i64 [[IV2]], 1
528	// CHECK-NEXT: store i64 [[ADD2]], i64* [[OMP_IV]]{{.*}}!llvm.access.group
529	//
530	// br label %{{[^,]+}}, !llvm.loop ![[WIDE1_LOOP_ID]]
531	// CHECK: [[WIDE1_END]]:
532	}
533	// i,j are updated.
534	// CHECK: store i32 3, i32* [[I:%[^,]+]]
535	// CHECK: store i16
536	//
537	// Here we expect store into original localint, not its privatized version.
538	// CHECK-NOT: store i32 {{.+}}, i32* [[LOCALINT]]
539	localint = (int)j;
540	// CHECK: ret void
541	}
542
543	// CHECK-LABEL: define {{.void}} @{{.}}linear{{.}}(float {{.+}})
544	void linear(float *a) {
545	// CHECK: [[VAL_ADDR:%.+]] = alloca i64,
546	// CHECK: [[K_ADDR:%.+]] = alloca i64*,
547	long long val = 0;
548	long long &k = val;
549
550	#pragma omp simd linear(k : 3)
551	// CHECK: store i64* [[VAL_ADDR]], i64** [[K_ADDR]],
552	// CHECK: [[VAL_REF:%.+]] = load i64, i64* [[K_ADDR]],
553	// CHECK: store i64* [[VAL_REF]], i64** [[K_ADDR_REF:%.+]],
554	// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
555	// CHECK: [[K_REF:%.+]] = load i64, i64* [[K_ADDR_REF]],
556	// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
557	// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
558
559	// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
560	// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
561	// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
562	for (int i = 10; i > 1; i--) {
563	// CHECK: [[SIMPLE_LOOP_BODY]]:
564	// Start of body: calculate i from IV:
565	// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
566	// FIXME: It is interesting, why the following "mul 1" was not constant folded?
567	// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
568	// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
569	// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.access.group
570	//
571	// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.access.group
572	// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
573	// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
574	// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
575	// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
576	// Update of the privatized version of linear variable!
577	// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
578	a[k]++;
579	k = k + 3;
580	// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
581	// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
582	// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.access.group
583	// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
584	}
585	// CHECK: [[SIMPLE_LOOP_END]]:
586	//
587	// Update linear vars after loop, as the loop was operating on a private version.
588	// CHECK: [[K_REF:%.+]] = load i64, i64* [[K_ADDR_REF]],
589	// CHECK: store i64* [[K_REF]], i64** [[K_PRIV_REF:%.+]],
590	// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
591	// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
592	// CHECK-NEXT: [[K_REF:%.+]] = load i64, i64* [[K_PRIV_REF]],
593	// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
594	//
595
596	#pragma omp simd linear(val(k) : 3)
597	// CHECK: [[VAL_REF:%.+]] = load i64, i64* [[K_ADDR]],
598	// CHECK: store i64* [[VAL_REF]], i64** [[K_ADDR_REF:%.+]],
599	// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
600	// CHECK: [[K_REF:%.+]] = load i64, i64* [[K_ADDR_REF]],
601	// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
602	// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
603
604	// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
605	// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
606	// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
607	for (int i = 10; i > 1; i--) {
608	// CHECK: [[SIMPLE_LOOP_BODY]]:
609	// Start of body: calculate i from IV:
610	// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
611	// FIXME: It is interesting, why the following "mul 1" was not constant folded?
612	// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
613	// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
614	// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.access.group
615	//
616	// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.access.group
617	// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
618	// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
619	// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
620	// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
621	// Update of the privatized version of linear variable!
622	// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
623	a[k]++;
624	k = k + 3;
625	// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
626	// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
627	// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.access.group
628	// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
629	}
630	// CHECK: [[SIMPLE_LOOP_END]]:
631	//
632	// Update linear vars after loop, as the loop was operating on a private version.
633	// CHECK: [[K_REF:%.+]] = load i64, i64* [[K_ADDR_REF]],
634	// CHECK: store i64* [[K_REF]], i64** [[K_PRIV_REF:%.+]],
635	// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
636	// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
637	// CHECK-NEXT: [[K_REF:%.+]] = load i64, i64* [[K_PRIV_REF]],
638	// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
639	//
640	#pragma omp simd linear(uval(k) : 3)
641	// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
642	// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[VAL_ADDR]]
643	// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
644
645	// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
646	// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
647	// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
648	for (int i = 10; i > 1; i--) {
649	// CHECK: [[SIMPLE_LOOP_BODY]]:
650	// Start of body: calculate i from IV:
651	// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
652	// FIXME: It is interesting, why the following "mul 1" was not constant folded?
653	// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
654	// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
655	// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.access.group
656	//
657	// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.access.group
658	// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
659	// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
660	// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
661	// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
662	// Update of the privatized version of linear variable!
663	// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
664	a[k]++;
665	k = k + 3;
666	// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group
667	// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
668	// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.access.group
669	// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
670	}
671	// CHECK: [[SIMPLE_LOOP_END]]:
672	//
673	// Update linear vars after loop, as the loop was operating on a private version.
674	// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
675	// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
676	// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[VAL_ADDR]]
677	//
678	}
679
680	// TERM_DEBUG-LABEL: bar
681	int bar() {return 0;};
682
683	// TERM_DEBUG-LABEL: parallel_simd
684	void parallel_simd(float *a) {
685	#pragma omp parallel
686	#pragma omp simd
687	// TERM_DEBUG-NOT: __kmpc_global_thread_num
688	// TERM_DEBUG: invoke i32 {{.}}bar{{.}}()
689	// TERM_DEBUG: unwind label %[[TERM_LPAD:[^,]+]],
690	// TERM_DEBUG-NOT: __kmpc_global_thread_num
691	// TERM_DEBUG: [[TERM_LPAD]]
692	// TERM_DEBUG: call void @__clang_call_terminate
693	// TERM_DEBUG: unreachable
694	for (unsigned i = 131071; i <= 2147483647; i += 127)
695	a[i] += bar();
696	}
697	// TERM_DEBUG: !{{[0-9]+}} = !DILocation(line: [[@LINE-11]],
698
699	// CHECK-LABEL: S8
700	// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
701	// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
702	// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
703	// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
704
705	// CHECK-DAG: and i64 %{{.+}}, 15
706	// CHECK-DAG: icmp eq i64 %{{.+}}, 0
707	// CHECK-DAG: call void @llvm.assume(i1
708
709	// CHECK-DAG: and i64 %{{.+}}, 7
710	// CHECK-DAG: icmp eq i64 %{{.+}}, 0
711	// CHECK-DAG: call void @llvm.assume(i1
712
713	// CHECK-DAG: and i64 %{{.+}}, 15
714	// CHECK-DAG: icmp eq i64 %{{.+}}, 0
715	// CHECK-DAG: call void @llvm.assume(i1
716
717	// CHECK-DAG: and i64 %{{.+}}, 3
718	// CHECK-DAG: icmp eq i64 %{{.+}}, 0
719	// CHECK-DAG: call void @llvm.assume(i1
720	struct SS {
721	SS(): a(0) {}
722	SS(int v) : a(v) {}
723	int a;
724	typedef int type;
725	};
726
727	template <typename T>
728	class S7 : public T {
729	protected:
730	T *a;
731	T b[2];
732	S7() : a(0) {}
733
734	public:
735	S7(typename T::type &v) : a((T*)&v) {
736	#pragma omp simd aligned(a)
737	for (int k = 0; k < a->a; ++k)
738	++this->a->a;
739	#pragma omp simd aligned(this->b : 8)
740	for (int k = 0; k < a->a; ++k)
741	++a->a;
742	}
743	};
744
745	class S8 : private IterDouble, public S7<SS> {
746	S8() {}
747
748	public:
749	S8(int v) : S7<SS>(v){
750	#pragma omp parallel private(a)
751	#pragma omp simd aligned(S7<SS>::a)
752	for (int k = 0; k < a->a; ++k)
753	++this->a->a;
754	#pragma omp parallel shared(b)
755	#pragma omp simd aligned(this->b: 4)
756	for (int k = 0; k < a->a; ++k)
757	++a->a;
758	}
759	};
760	S8 s8(0);
761
762	// TERM_DEBUG-NOT: line: 0,
763	// TERM_DEBUG: distinct !DISubprogram(linkageName: "_GLOBAL__sub_I_simd_codegen.cpp",
764
765	#endif // HEADER
766
767

Clang Project