1 | // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86 |
2 | // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX |
3 | // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512 |
4 | // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86 |
5 | // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX |
6 | // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512 |
7 | // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC |
8 | // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX |
9 | |
10 | // RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86 |
11 | // RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX |
12 | // RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512 |
13 | // RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86 |
14 | // RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX |
15 | // RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512 |
16 | // RUN: %clang_cc1 -fopenmp-simd -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC |
17 | // RUN: %clang_cc1 -fopenmp-simd -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX |
18 | |
19 | void h1(float *c, float *a, double b[], int size) |
20 | { |
21 | // CHECK-LABEL: define void @h1 |
22 | int t = 0; |
23 | #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) |
24 | // CHECK: [[C_PTRINT:%.+]] = ptrtoint |
25 | // CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31 |
26 | // CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0 |
27 | // CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]]) |
28 | // CHECK: [[A_PTRINT:%.+]] = ptrtoint |
29 | |
30 | // X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
31 | // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31 |
32 | // X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63 |
33 | // PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
34 | // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
35 | |
36 | // CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0 |
37 | // CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]]) |
38 | // CHECK: [[B_PTRINT:%.+]] = ptrtoint |
39 | |
40 | // X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 |
41 | // X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 |
42 | // X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63 |
43 | // PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 |
44 | // PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 |
45 | |
46 | // CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0 |
47 | // CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]]) |
48 | for (int i = 0; i < size; ++i) { |
49 | c[i] = a[i] * a[i] + b[i] * b[t]; |
50 | ++t; |
51 | } |
52 | // do not emit llvm.access.group metadata due to usage of safelen clause. |
53 | // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}} |
54 | #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) simdlen(8) |
55 | // CHECK: [[C_PTRINT:%.+]] = ptrtoint |
56 | // CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31 |
57 | // CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0 |
58 | // CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]]) |
59 | // CHECK: [[A_PTRINT:%.+]] = ptrtoint |
60 | |
61 | // X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
62 | // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31 |
63 | // X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63 |
64 | // PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
65 | // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
66 | |
67 | // CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0 |
68 | // CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]]) |
69 | // CHECK: [[B_PTRINT:%.+]] = ptrtoint |
70 | |
71 | // X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 |
72 | // X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 |
73 | // X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63 |
74 | // PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 |
75 | // PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 |
76 | |
77 | // CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0 |
78 | // CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]]) |
79 | for (int i = 0; i < size; ++i) { |
80 | c[i] = a[i] * a[i] + b[i] * b[t]; |
81 | ++t; |
82 | } |
83 | // do not emit llvm.access.group metadata due to usage of safelen clause. |
84 | // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}} |
85 | #pragma omp simd linear(t) aligned(c:32) aligned(a,b) simdlen(8) |
86 | // CHECK: [[C_PTRINT:%.+]] = ptrtoint |
87 | // CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31 |
88 | // CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0 |
89 | // CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]]) |
90 | // CHECK: [[A_PTRINT:%.+]] = ptrtoint |
91 | |
92 | // X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
93 | // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31 |
94 | // X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63 |
95 | // PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
96 | // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 |
97 | |
98 | // CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0 |
99 | // CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]]) |
100 | // CHECK: [[B_PTRINT:%.+]] = ptrtoint |
101 | |
102 | // X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 |
103 | // X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 |
104 | // X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63 |
105 | // PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 |
106 | // PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 |
107 | |
108 | // CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0 |
109 | // CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]]) |
110 | for (int i = 0; i < size; ++i) { |
111 | c[i] = a[i] * a[i] + b[i] * b[t]; |
112 | ++t; |
113 | // CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group ![[ACCESS_GROUP_7:[0-9]+]] |
114 | } |
115 | } |
116 | |
117 | void h2(float *c, float *a, float *b, int size) |
118 | { |
119 | // CHECK-LABEL: define void @h2 |
120 | int t = 0; |
121 | #pragma omp simd linear(t) |
122 | for (int i = 0; i < size; ++i) { |
123 | c[i] = a[i] * a[i] + b[i] * b[t]; |
124 | ++t; |
125 | // CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group ![[ACCESS_GROUP_10:[0-9]+]] |
126 | } |
127 | // CHECK: br label %{{.+}}, !llvm.loop [[LOOP_H2_HEADER:![0-9]+]] |
128 | } |
129 | |
130 | void h3(float *c, float *a, float *b, int size) |
131 | { |
132 | // CHECK-LABEL: define void @h3 |
133 | #pragma omp simd |
134 | for (int i = 0; i < size; ++i) { |
135 | for (int j = 0; j < size; ++j) { |
136 | c[j*i] = a[i] * b[j]; |
137 | } |
138 | // CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group ![[ACCESS_GROUP_13:[0-9]+]] |
139 | } |
140 | // CHECK: br label %{{.+}}, !llvm.loop [[LOOP_H3_HEADER:![0-9]+]] |
141 | } |
142 | |
143 | // Metadata for h1: |
144 | // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_16:![0-9]+]], [[LOOP_VEC_ENABLE:![0-9]+]]} |
145 | // CHECK: [[LOOP_WIDTH_16]] = !{!"llvm.loop.vectorize.width", i32 16} |
146 | // CHECK: [[LOOP_VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} |
147 | // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8:![0-9]+]], [[LOOP_VEC_ENABLE]]} |
148 | // CHECK: [[LOOP_WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8} |
149 | // CHECK: ![[ACCESS_GROUP_7]] = distinct !{} |
150 | // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8]], [[LOOP_VEC_ENABLE]], ![[PARALLEL_ACCESSES_9:[0-9]+]]} |
151 | // CHECK: ![[PARALLEL_ACCESSES_9]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_7]]} |
152 | // |
153 | // Metadata for h2: |
154 | // CHECK: ![[ACCESS_GROUP_10]] = distinct !{} |
155 | // CHECK: [[LOOP_H2_HEADER]] = distinct !{[[LOOP_H2_HEADER]], [[LOOP_VEC_ENABLE]], ![[PARALLEL_ACCESSES_12:[0-9]+]]} |
156 | // CHECK: ![[PARALLEL_ACCESSES_12]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_10]]} |
157 | // |
158 | // Metadata for h3: |
159 | // CHECK: ![[ACCESS_GROUP_13]] = distinct !{} |
160 | // CHECK: [[LOOP_H3_HEADER]] = distinct !{[[LOOP_H3_HEADER]], [[LOOP_VEC_ENABLE]], ![[PARALLEL_ACCESSES_15:[0-9]+]]} |
161 | // CHECK: ![[PARALLEL_ACCESSES_15]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_13]]} |
162 | // |
163 | |