1 | // REQUIRES: amdgpu-registered-target |
2 | // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s |
3 | // RUN: %clang_cc1 -triple amdgcn-unknown-unknown-opencl -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s |
4 | |
5 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable |
6 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable |
7 | |
8 | typedef char __attribute__((ext_vector_type(2))) char2; |
9 | typedef char __attribute__((ext_vector_type(3))) char3; |
10 | typedef char __attribute__((ext_vector_type(4))) char4; |
11 | typedef char __attribute__((ext_vector_type(8))) char8; |
12 | typedef char __attribute__((ext_vector_type(16))) char16; |
13 | |
14 | typedef short __attribute__((ext_vector_type(2))) short2; |
15 | typedef short __attribute__((ext_vector_type(3))) short3; |
16 | typedef short __attribute__((ext_vector_type(4))) short4; |
17 | typedef short __attribute__((ext_vector_type(8))) short8; |
18 | typedef short __attribute__((ext_vector_type(16))) short16; |
19 | |
20 | typedef int __attribute__((ext_vector_type(2))) int2; |
21 | typedef int __attribute__((ext_vector_type(3))) int3; |
22 | typedef int __attribute__((ext_vector_type(4))) int4; |
23 | typedef int __attribute__((ext_vector_type(8))) int8; |
24 | typedef int __attribute__((ext_vector_type(16))) int16; |
25 | |
26 | typedef long __attribute__((ext_vector_type(2))) long2; |
27 | typedef long __attribute__((ext_vector_type(3))) long3; |
28 | typedef long __attribute__((ext_vector_type(4))) long4; |
29 | typedef long __attribute__((ext_vector_type(8))) long8; |
30 | typedef long __attribute__((ext_vector_type(16))) long16; |
31 | |
32 | typedef half __attribute__((ext_vector_type(2))) half2; |
33 | typedef half __attribute__((ext_vector_type(3))) half3; |
34 | typedef half __attribute__((ext_vector_type(4))) half4; |
35 | typedef half __attribute__((ext_vector_type(8))) half8; |
36 | typedef half __attribute__((ext_vector_type(16))) half16; |
37 | |
38 | typedef float __attribute__((ext_vector_type(2))) float2; |
39 | typedef float __attribute__((ext_vector_type(3))) float3; |
40 | typedef float __attribute__((ext_vector_type(4))) float4; |
41 | typedef float __attribute__((ext_vector_type(8))) float8; |
42 | typedef float __attribute__((ext_vector_type(16))) float16; |
43 | |
44 | typedef double __attribute__((ext_vector_type(2))) double2; |
45 | typedef double __attribute__((ext_vector_type(3))) double3; |
46 | typedef double __attribute__((ext_vector_type(4))) double4; |
47 | typedef double __attribute__((ext_vector_type(8))) double8; |
48 | typedef double __attribute__((ext_vector_type(16))) double16; |
49 | |
50 | // CHECK: @local_memory_alignment_global.lds_i8 = internal addrspace(3) global [4 x i8] undef, align 1 |
51 | // CHECK: @local_memory_alignment_global.lds_v2i8 = internal addrspace(3) global [4 x <2 x i8>] undef, align 2 |
52 | // CHECK: @local_memory_alignment_global.lds_v3i8 = internal addrspace(3) global [4 x <3 x i8>] undef, align 4 |
53 | // CHECK: @local_memory_alignment_global.lds_v4i8 = internal addrspace(3) global [4 x <4 x i8>] undef, align 4 |
54 | // CHECK: @local_memory_alignment_global.lds_v8i8 = internal addrspace(3) global [4 x <8 x i8>] undef, align 8 |
55 | // CHECK: @local_memory_alignment_global.lds_v16i8 = internal addrspace(3) global [4 x <16 x i8>] undef, align 16 |
56 | // CHECK: @local_memory_alignment_global.lds_i16 = internal addrspace(3) global [4 x i16] undef, align 2 |
57 | // CHECK: @local_memory_alignment_global.lds_v2i16 = internal addrspace(3) global [4 x <2 x i16>] undef, align 4 |
58 | // CHECK: @local_memory_alignment_global.lds_v3i16 = internal addrspace(3) global [4 x <3 x i16>] undef, align 8 |
59 | // CHECK: @local_memory_alignment_global.lds_v4i16 = internal addrspace(3) global [4 x <4 x i16>] undef, align 8 |
60 | // CHECK: @local_memory_alignment_global.lds_v8i16 = internal addrspace(3) global [4 x <8 x i16>] undef, align 16 |
61 | // CHECK: @local_memory_alignment_global.lds_v16i16 = internal addrspace(3) global [4 x <16 x i16>] undef, align 32 |
62 | // CHECK: @local_memory_alignment_global.lds_i32 = internal addrspace(3) global [4 x i32] undef, align 4 |
63 | // CHECK: @local_memory_alignment_global.lds_v2i32 = internal addrspace(3) global [4 x <2 x i32>] undef, align 8 |
64 | // CHECK: @local_memory_alignment_global.lds_v3i32 = internal addrspace(3) global [4 x <3 x i32>] undef, align 16 |
65 | // CHECK: @local_memory_alignment_global.lds_v4i32 = internal addrspace(3) global [4 x <4 x i32>] undef, align 16 |
66 | // CHECK: @local_memory_alignment_global.lds_v8i32 = internal addrspace(3) global [4 x <8 x i32>] undef, align 32 |
67 | // CHECK: @local_memory_alignment_global.lds_v16i32 = internal addrspace(3) global [4 x <16 x i32>] undef, align 64 |
68 | // CHECK: @local_memory_alignment_global.lds_i64 = internal addrspace(3) global [4 x i64] undef, align 8 |
69 | // CHECK: @local_memory_alignment_global.lds_v2i64 = internal addrspace(3) global [4 x <2 x i64>] undef, align 16 |
70 | // CHECK: @local_memory_alignment_global.lds_v3i64 = internal addrspace(3) global [4 x <3 x i64>] undef, align 32 |
71 | // CHECK: @local_memory_alignment_global.lds_v4i64 = internal addrspace(3) global [4 x <4 x i64>] undef, align 32 |
72 | // CHECK: @local_memory_alignment_global.lds_v8i64 = internal addrspace(3) global [4 x <8 x i64>] undef, align 64 |
73 | // CHECK: @local_memory_alignment_global.lds_v16i64 = internal addrspace(3) global [4 x <16 x i64>] undef, align 128 |
74 | // CHECK: @local_memory_alignment_global.lds_f16 = internal addrspace(3) global [4 x half] undef, align 2 |
75 | // CHECK: @local_memory_alignment_global.lds_v2f16 = internal addrspace(3) global [4 x <2 x half>] undef, align 4 |
76 | // CHECK: @local_memory_alignment_global.lds_v3f16 = internal addrspace(3) global [4 x <3 x half>] undef, align 8 |
77 | // CHECK: @local_memory_alignment_global.lds_v4f16 = internal addrspace(3) global [4 x <4 x half>] undef, align 8 |
78 | // CHECK: @local_memory_alignment_global.lds_v8f16 = internal addrspace(3) global [4 x <8 x half>] undef, align 16 |
79 | // CHECK: @local_memory_alignment_global.lds_v16f16 = internal addrspace(3) global [4 x <16 x half>] undef, align 32 |
80 | // CHECK: @local_memory_alignment_global.lds_f32 = internal addrspace(3) global [4 x float] undef, align 4 |
81 | // CHECK: @local_memory_alignment_global.lds_v2f32 = internal addrspace(3) global [4 x <2 x float>] undef, align 8 |
82 | // CHECK: @local_memory_alignment_global.lds_v3f32 = internal addrspace(3) global [4 x <3 x float>] undef, align 16 |
83 | // CHECK: @local_memory_alignment_global.lds_v4f32 = internal addrspace(3) global [4 x <4 x float>] undef, align 16 |
84 | // CHECK: @local_memory_alignment_global.lds_v8f32 = internal addrspace(3) global [4 x <8 x float>] undef, align 32 |
85 | // CHECK: @local_memory_alignment_global.lds_v16f32 = internal addrspace(3) global [4 x <16 x float>] undef, align 64 |
86 | // CHECK: @local_memory_alignment_global.lds_f64 = internal addrspace(3) global [4 x double] undef, align 8 |
87 | // CHECK: @local_memory_alignment_global.lds_v2f64 = internal addrspace(3) global [4 x <2 x double>] undef, align 16 |
88 | // CHECK: @local_memory_alignment_global.lds_v3f64 = internal addrspace(3) global [4 x <3 x double>] undef, align 32 |
89 | // CHECK: @local_memory_alignment_global.lds_v4f64 = internal addrspace(3) global [4 x <4 x double>] undef, align 32 |
90 | // CHECK: @local_memory_alignment_global.lds_v8f64 = internal addrspace(3) global [4 x <8 x double>] undef, align 64 |
91 | // CHECK: @local_memory_alignment_global.lds_v16f64 = internal addrspace(3) global [4 x <16 x double>] undef, align 128 |
92 | |
93 | |
94 | // CHECK-LABEL: @local_memory_alignment_global( |
95 | // CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i64 0, i64 0), align 1 |
96 | // CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i64 0, i64 0), align 2 |
97 | // CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(3)* bitcast ([4 x <3 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v3i8 to <4 x i8> addrspace(3)*), align 4 |
98 | // CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i64 0, i64 0), align 4 |
99 | // CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i64 0, i64 0), align 8 |
100 | // CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i64 0, i64 0), align 16 |
101 | // CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i64 0, i64 0), align 2 |
102 | // CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i64 0, i64 0), align 4 |
103 | // CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(3)* bitcast ([4 x <3 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v3i16 to <4 x i16> addrspace(3)*), align 8 |
104 | // CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i64 0, i64 0), align 8 |
105 | // CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i64 0, i64 0), align 16 |
106 | // CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i64 0, i64 0), align 32 |
107 | // CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i64 0, i64 0), align 4 |
108 | // CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i64 0, i64 0), align 8 |
109 | // CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(3)* bitcast ([4 x <3 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v3i32 to <4 x i32> addrspace(3)*), align 16 |
110 | // CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i64 0, i64 0), align 16 |
111 | // CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i64 0, i64 0), align 32 |
112 | // CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i64 0, i64 0), align 64 |
113 | // CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i64 0, i64 0), align 8 |
114 | // CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i64 0, i64 0), align 16 |
115 | // CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(3)* bitcast ([4 x <3 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v3i64 to <4 x i64> addrspace(3)*), align 32 |
116 | // CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i64 0, i64 0), align 32 |
117 | // CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i64 0, i64 0), align 64 |
118 | // CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i64 0, i64 0), align 128 |
119 | // CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i64 0, i64 0), align 2 |
120 | // CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i64 0, i64 0), align 4 |
121 | // CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(3)* bitcast ([4 x <3 x half>] addrspace(3)* @local_memory_alignment_global.lds_v3f16 to <4 x half> addrspace(3)*), align 8 |
122 | // CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i64 0, i64 0), align 8 |
123 | // CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i64 0, i64 0), align 16 |
124 | // CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i64 0, i64 0), align 32 |
125 | // CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i64 0, i64 0), align 4 |
126 | // CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i64 0, i64 0), align 8 |
127 | // CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(3)* bitcast ([4 x <3 x float>] addrspace(3)* @local_memory_alignment_global.lds_v3f32 to <4 x float> addrspace(3)*), align 16 |
128 | // CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i64 0, i64 0), align 16 |
129 | // CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i64 0, i64 0), align 32 |
130 | // CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i64 0, i64 0), align 64 |
131 | // CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i64 0, i64 0), align 8 |
132 | // CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i64 0, i64 0), align 16 |
133 | // CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(3)* bitcast ([4 x <3 x double>] addrspace(3)* @local_memory_alignment_global.lds_v3f64 to <4 x double> addrspace(3)*), align 32 |
134 | // CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i64 0, i64 0), align 32 |
135 | // CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i64 0, i64 0), align 64 |
136 | // CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i64 0, i64 0), align 128 |
137 | kernel void local_memory_alignment_global() |
138 | { |
139 | volatile local char lds_i8[4]; |
140 | volatile local char2 lds_v2i8[4]; |
141 | volatile local char3 lds_v3i8[4]; |
142 | volatile local char4 lds_v4i8[4]; |
143 | volatile local char8 lds_v8i8[4]; |
144 | volatile local char16 lds_v16i8[4]; |
145 | |
146 | volatile local short lds_i16[4]; |
147 | volatile local short2 lds_v2i16[4]; |
148 | volatile local short3 lds_v3i16[4]; |
149 | volatile local short4 lds_v4i16[4]; |
150 | volatile local short8 lds_v8i16[4]; |
151 | volatile local short16 lds_v16i16[4]; |
152 | |
153 | volatile local int lds_i32[4]; |
154 | volatile local int2 lds_v2i32[4]; |
155 | volatile local int3 lds_v3i32[4]; |
156 | volatile local int4 lds_v4i32[4]; |
157 | volatile local int8 lds_v8i32[4]; |
158 | volatile local int16 lds_v16i32[4]; |
159 | |
160 | volatile local long lds_i64[4]; |
161 | volatile local long2 lds_v2i64[4]; |
162 | volatile local long3 lds_v3i64[4]; |
163 | volatile local long4 lds_v4i64[4]; |
164 | volatile local long8 lds_v8i64[4]; |
165 | volatile local long16 lds_v16i64[4]; |
166 | |
167 | volatile local half lds_f16[4]; |
168 | volatile local half2 lds_v2f16[4]; |
169 | volatile local half3 lds_v3f16[4]; |
170 | volatile local half4 lds_v4f16[4]; |
171 | volatile local half8 lds_v8f16[4]; |
172 | volatile local half16 lds_v16f16[4]; |
173 | |
174 | volatile local float lds_f32[4]; |
175 | volatile local float2 lds_v2f32[4]; |
176 | volatile local float3 lds_v3f32[4]; |
177 | volatile local float4 lds_v4f32[4]; |
178 | volatile local float8 lds_v8f32[4]; |
179 | volatile local float16 lds_v16f32[4]; |
180 | |
181 | volatile local double lds_f64[4]; |
182 | volatile local double2 lds_v2f64[4]; |
183 | volatile local double3 lds_v3f64[4]; |
184 | volatile local double4 lds_v4f64[4]; |
185 | volatile local double8 lds_v8f64[4]; |
186 | volatile local double16 lds_v16f64[4]; |
187 | |
188 | *lds_i8 = 0; |
189 | *lds_v2i8 = 0; |
190 | *lds_v3i8 = 0; |
191 | *lds_v4i8 = 0; |
192 | *lds_v8i8 = 0; |
193 | *lds_v16i8 = 0; |
194 | |
195 | *lds_i16 = 0; |
196 | *lds_v2i16 = 0; |
197 | *lds_v3i16 = 0; |
198 | *lds_v4i16 = 0; |
199 | *lds_v8i16 = 0; |
200 | *lds_v16i16 = 0; |
201 | |
202 | *lds_i32 = 0; |
203 | *lds_v2i32 = 0; |
204 | *lds_v3i32 = 0; |
205 | *lds_v4i32 = 0; |
206 | *lds_v8i32 = 0; |
207 | *lds_v16i32 = 0; |
208 | |
209 | *lds_i64 = 0; |
210 | *lds_v2i64 = 0; |
211 | *lds_v3i64 = 0; |
212 | *lds_v4i64 = 0; |
213 | *lds_v8i64 = 0; |
214 | *lds_v16i64 = 0; |
215 | |
216 | *lds_f16 = 0; |
217 | *lds_v2f16 = 0; |
218 | *lds_v3f16 = 0; |
219 | *lds_v4f16 = 0; |
220 | *lds_v8f16 = 0; |
221 | *lds_v16f16 = 0; |
222 | |
223 | *lds_f32 = 0; |
224 | *lds_v2f32 = 0; |
225 | *lds_v3f32 = 0; |
226 | *lds_v4f32 = 0; |
227 | *lds_v8f32 = 0; |
228 | *lds_v16f32 = 0; |
229 | |
230 | *lds_f64 = 0; |
231 | *lds_v2f64 = 0; |
232 | *lds_v3f64 = 0; |
233 | *lds_v4f64 = 0; |
234 | *lds_v8f64 = 0; |
235 | *lds_v16f64 = 0; |
236 | } |
237 | |
238 | kernel void local_memory_alignment_arg( |
239 | volatile local char* lds_i8, |
240 | volatile local char2* lds_v2i8, |
241 | volatile local char3* lds_v3i8, |
242 | volatile local char4* lds_v4i8, |
243 | volatile local char8* lds_v8i8, |
244 | volatile local char16* lds_v16i8, |
245 | |
246 | volatile local short* lds_i16, |
247 | volatile local short2* lds_v2i16, |
248 | volatile local short3* lds_v3i16, |
249 | volatile local short4* lds_v4i16, |
250 | volatile local short8* lds_v8i16, |
251 | volatile local short16* lds_v16i16, |
252 | |
253 | volatile local int* lds_i32, |
254 | volatile local int2* lds_v2i32, |
255 | volatile local int3* lds_v3i32, |
256 | volatile local int4* lds_v4i32, |
257 | volatile local int8* lds_v8i32, |
258 | volatile local int16* lds_v16i32, |
259 | |
260 | volatile local long* lds_i64, |
261 | volatile local long2* lds_v2i64, |
262 | volatile local long3* lds_v3i64, |
263 | volatile local long4* lds_v4i64, |
264 | volatile local long8* lds_v8i64, |
265 | volatile local long16* lds_v16i64, |
266 | |
267 | volatile local half* lds_f16, |
268 | volatile local half2* lds_v2f16, |
269 | volatile local half3* lds_v3f16, |
270 | volatile local half4* lds_v4f16, |
271 | volatile local half8* lds_v8f16, |
272 | volatile local half16* lds_v16f16, |
273 | |
274 | volatile local float* lds_f32, |
275 | volatile local float2* lds_v2f32, |
276 | volatile local float3* lds_v3f32, |
277 | volatile local float4* lds_v4f32, |
278 | volatile local float8* lds_v8f32, |
279 | volatile local float16* lds_v16f32, |
280 | |
281 | volatile local double* lds_f64, |
282 | volatile local double2* lds_v2f64, |
283 | volatile local double3* lds_v3f64, |
284 | volatile local double4* lds_v4f64, |
285 | volatile local double8* lds_v8f64, |
286 | volatile local double16* lds_v16f64) |
287 | { |
288 | *lds_i8 = 0; |
289 | *lds_v2i8 = 0; |
290 | *lds_v3i8 = 0; |
291 | *lds_v4i8 = 0; |
292 | *lds_v8i8 = 0; |
293 | *lds_v16i8 = 0; |
294 | |
295 | *lds_i16 = 0; |
296 | *lds_v2i16 = 0; |
297 | *lds_v3i16 = 0; |
298 | *lds_v4i16 = 0; |
299 | *lds_v8i16 = 0; |
300 | *lds_v16i16 = 0; |
301 | |
302 | *lds_i32 = 0; |
303 | *lds_v2i32 = 0; |
304 | *lds_v3i32 = 0; |
305 | *lds_v4i32 = 0; |
306 | *lds_v8i32 = 0; |
307 | *lds_v16i32 = 0; |
308 | |
309 | *lds_i64 = 0; |
310 | *lds_v2i64 = 0; |
311 | *lds_v3i64 = 0; |
312 | *lds_v4i64 = 0; |
313 | *lds_v8i64 = 0; |
314 | *lds_v16i64 = 0; |
315 | |
316 | *lds_f16 = 0; |
317 | *lds_v2f16 = 0; |
318 | *lds_v3f16 = 0; |
319 | *lds_v4f16 = 0; |
320 | *lds_v8f16 = 0; |
321 | *lds_v16f16 = 0; |
322 | |
323 | *lds_f32 = 0; |
324 | *lds_v2f32 = 0; |
325 | *lds_v3f32 = 0; |
326 | *lds_v4f32 = 0; |
327 | *lds_v8f32 = 0; |
328 | *lds_v16f32 = 0; |
329 | |
330 | *lds_f64 = 0; |
331 | *lds_v2f64 = 0; |
332 | *lds_v3f64 = 0; |
333 | *lds_v4f64 = 0; |
334 | *lds_v8f64 = 0; |
335 | *lds_v16f64 = 0; |
336 | } |
337 | |
338 | // CHECK-LABEL: @private_memory_alignment_alloca( |
339 | // CHECK: %private_i8 = alloca [4 x i8], align 1, addrspace(5) |
340 | // CHECK: %private_v2i8 = alloca [4 x <2 x i8>], align 2, addrspace(5) |
341 | // CHECK: %private_v3i8 = alloca [4 x <3 x i8>], align 4, addrspace(5) |
342 | // CHECK: %private_v4i8 = alloca [4 x <4 x i8>], align 4, addrspace(5) |
343 | // CHECK: %private_v8i8 = alloca [4 x <8 x i8>], align 8, addrspace(5) |
344 | // CHECK: %private_v16i8 = alloca [4 x <16 x i8>], align 16, addrspace(5) |
345 | // CHECK: %private_i16 = alloca [4 x i16], align 2, addrspace(5) |
346 | // CHECK: %private_v2i16 = alloca [4 x <2 x i16>], align 4, addrspace(5) |
347 | // CHECK: %private_v3i16 = alloca [4 x <3 x i16>], align 8, addrspace(5) |
348 | // CHECK: %private_v4i16 = alloca [4 x <4 x i16>], align 8, addrspace(5) |
349 | // CHECK: %private_v8i16 = alloca [4 x <8 x i16>], align 16, addrspace(5) |
350 | // CHECK: %private_v16i16 = alloca [4 x <16 x i16>], align 32, addrspace(5) |
351 | // CHECK: %private_i32 = alloca [4 x i32], align 4, addrspace(5) |
352 | // CHECK: %private_v2i32 = alloca [4 x <2 x i32>], align 8, addrspace(5) |
353 | // CHECK: %private_v3i32 = alloca [4 x <3 x i32>], align 16, addrspace(5) |
354 | // CHECK: %private_v4i32 = alloca [4 x <4 x i32>], align 16, addrspace(5) |
355 | // CHECK: %private_v8i32 = alloca [4 x <8 x i32>], align 32, addrspace(5) |
356 | // CHECK: %private_v16i32 = alloca [4 x <16 x i32>], align 64, addrspace(5) |
357 | // CHECK: %private_i64 = alloca [4 x i64], align 8, addrspace(5) |
358 | // CHECK: %private_v2i64 = alloca [4 x <2 x i64>], align 16, addrspace(5) |
359 | // CHECK: %private_v3i64 = alloca [4 x <3 x i64>], align 32, addrspace(5) |
360 | // CHECK: %private_v4i64 = alloca [4 x <4 x i64>], align 32, addrspace(5) |
361 | // CHECK: %private_v8i64 = alloca [4 x <8 x i64>], align 64, addrspace(5) |
362 | // CHECK: %private_v16i64 = alloca [4 x <16 x i64>], align 128, addrspace(5) |
363 | // CHECK: %private_f16 = alloca [4 x half], align 2, addrspace(5) |
364 | // CHECK: %private_v2f16 = alloca [4 x <2 x half>], align 4, addrspace(5) |
365 | // CHECK: %private_v3f16 = alloca [4 x <3 x half>], align 8, addrspace(5) |
366 | // CHECK: %private_v4f16 = alloca [4 x <4 x half>], align 8, addrspace(5) |
367 | // CHECK: %private_v8f16 = alloca [4 x <8 x half>], align 16, addrspace(5) |
368 | // CHECK: %private_v16f16 = alloca [4 x <16 x half>], align 32, addrspace(5) |
369 | // CHECK: %private_f32 = alloca [4 x float], align 4, addrspace(5) |
370 | // CHECK: %private_v2f32 = alloca [4 x <2 x float>], align 8, addrspace(5) |
371 | // CHECK: %private_v3f32 = alloca [4 x <3 x float>], align 16, addrspace(5) |
372 | // CHECK: %private_v4f32 = alloca [4 x <4 x float>], align 16, addrspace(5) |
373 | // CHECK: %private_v8f32 = alloca [4 x <8 x float>], align 32, addrspace(5) |
374 | // CHECK: %private_v16f32 = alloca [4 x <16 x float>], align 64, addrspace(5) |
375 | // CHECK: %private_f64 = alloca [4 x double], align 8, addrspace(5) |
376 | // CHECK: %private_v2f64 = alloca [4 x <2 x double>], align 16, addrspace(5) |
377 | // CHECK: %private_v3f64 = alloca [4 x <3 x double>], align 32, addrspace(5) |
378 | // CHECK: %private_v4f64 = alloca [4 x <4 x double>], align 32, addrspace(5) |
379 | // CHECK: %private_v8f64 = alloca [4 x <8 x double>], align 64, addrspace(5) |
380 | // CHECK: %private_v16f64 = alloca [4 x <16 x double>], align 128, addrspace(5) |
381 | |
382 | // CHECK: store volatile i8 0, i8 addrspace(5)* %arraydecay, align 1 |
383 | // CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 2 |
384 | // CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(5)* %storetmp, align 4 |
385 | // CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 4 |
386 | // CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
387 | // CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
388 | // CHECK: store volatile i16 0, i16 addrspace(5)* %arraydecay{{[0-9]+}}, align 2 |
389 | // CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 4 |
390 | // CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(5)* %storetmp{{[0-9]+}}, align 8 |
391 | // CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
392 | // CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
393 | // CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 |
394 | // CHECK: store volatile i32 0, i32 addrspace(5)* %arraydecay{{[0-9]+}}, align 4 |
395 | // CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
396 | // CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(5)* %storetmp16, align 16 |
397 | // CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
398 | // CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 |
399 | // CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 64 |
400 | // CHECK: store volatile i64 0, i64 addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
401 | // CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
402 | // CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(5)* %storetmp23, align 32 |
403 | // CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 |
404 | // CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 64 |
405 | // CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 128 |
406 | // CHECK: store volatile half 0xH0000, half addrspace(5)* %arraydecay{{[0-9]+}}, align 2 |
407 | // CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 4 |
408 | // CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(5)* %storetmp{{[0-9]+}}, align 8 |
409 | // CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
410 | // CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
411 | // CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 |
412 | // CHECK: store volatile float 0.000000e+00, float addrspace(5)* %arraydecay34, align 4 |
413 | // CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
414 | // CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(5)* %storetmp{{[0-9]+}}, align 16 |
415 | // CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
416 | // CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 |
417 | // CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 64 |
418 | // CHECK: store volatile double 0.000000e+00, double addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
419 | // CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
420 | // CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(5)* %storetmp{{[0-9]+}}, align 32 |
421 | // CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 |
422 | // CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 64 |
423 | // CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 128 |
424 | kernel void private_memory_alignment_alloca() |
425 | { |
426 | volatile private char private_i8[4]; |
427 | volatile private char2 private_v2i8[4]; |
428 | volatile private char3 private_v3i8[4]; |
429 | volatile private char4 private_v4i8[4]; |
430 | volatile private char8 private_v8i8[4]; |
431 | volatile private char16 private_v16i8[4]; |
432 | |
433 | volatile private short private_i16[4]; |
434 | volatile private short2 private_v2i16[4]; |
435 | volatile private short3 private_v3i16[4]; |
436 | volatile private short4 private_v4i16[4]; |
437 | volatile private short8 private_v8i16[4]; |
438 | volatile private short16 private_v16i16[4]; |
439 | |
440 | volatile private int private_i32[4]; |
441 | volatile private int2 private_v2i32[4]; |
442 | volatile private int3 private_v3i32[4]; |
443 | volatile private int4 private_v4i32[4]; |
444 | volatile private int8 private_v8i32[4]; |
445 | volatile private int16 private_v16i32[4]; |
446 | |
447 | volatile private long private_i64[4]; |
448 | volatile private long2 private_v2i64[4]; |
449 | volatile private long3 private_v3i64[4]; |
450 | volatile private long4 private_v4i64[4]; |
451 | volatile private long8 private_v8i64[4]; |
452 | volatile private long16 private_v16i64[4]; |
453 | |
454 | volatile private half private_f16[4]; |
455 | volatile private half2 private_v2f16[4]; |
456 | volatile private half3 private_v3f16[4]; |
457 | volatile private half4 private_v4f16[4]; |
458 | volatile private half8 private_v8f16[4]; |
459 | volatile private half16 private_v16f16[4]; |
460 | |
461 | volatile private float private_f32[4]; |
462 | volatile private float2 private_v2f32[4]; |
463 | volatile private float3 private_v3f32[4]; |
464 | volatile private float4 private_v4f32[4]; |
465 | volatile private float8 private_v8f32[4]; |
466 | volatile private float16 private_v16f32[4]; |
467 | |
468 | volatile private double private_f64[4]; |
469 | volatile private double2 private_v2f64[4]; |
470 | volatile private double3 private_v3f64[4]; |
471 | volatile private double4 private_v4f64[4]; |
472 | volatile private double8 private_v8f64[4]; |
473 | volatile private double16 private_v16f64[4]; |
474 | |
475 | *private_i8 = 0; |
476 | *private_v2i8 = 0; |
477 | *private_v3i8 = 0; |
478 | *private_v4i8 = 0; |
479 | *private_v8i8 = 0; |
480 | *private_v16i8 = 0; |
481 | |
482 | *private_i16 = 0; |
483 | *private_v2i16 = 0; |
484 | *private_v3i16 = 0; |
485 | *private_v4i16 = 0; |
486 | *private_v8i16 = 0; |
487 | *private_v16i16 = 0; |
488 | |
489 | *private_i32 = 0; |
490 | *private_v2i32 = 0; |
491 | *private_v3i32 = 0; |
492 | *private_v4i32 = 0; |
493 | *private_v8i32 = 0; |
494 | *private_v16i32 = 0; |
495 | |
496 | *private_i64 = 0; |
497 | *private_v2i64 = 0; |
498 | *private_v3i64 = 0; |
499 | *private_v4i64 = 0; |
500 | *private_v8i64 = 0; |
501 | *private_v16i64 = 0; |
502 | |
503 | *private_f16 = 0; |
504 | *private_v2f16 = 0; |
505 | *private_v3f16 = 0; |
506 | *private_v4f16 = 0; |
507 | *private_v8f16 = 0; |
508 | *private_v16f16 = 0; |
509 | |
510 | *private_f32 = 0; |
511 | *private_v2f32 = 0; |
512 | *private_v3f32 = 0; |
513 | *private_v4f32 = 0; |
514 | *private_v8f32 = 0; |
515 | *private_v16f32 = 0; |
516 | |
517 | *private_f64 = 0; |
518 | *private_v2f64 = 0; |
519 | *private_v3f64 = 0; |
520 | *private_v4f64 = 0; |
521 | *private_v8f64 = 0; |
522 | *private_v16f64 = 0; |
523 | } |
524 | |