| 1 | // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s |
| 2 | // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s |
| 3 | // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s |
| 4 | // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s |
| 5 | |
| 6 | typedef int int2 __attribute__((ext_vector_type(2))); |
| 7 | |
| 8 | typedef struct { |
| 9 | int cells[9]; |
| 10 | } Mat3X3; |
| 11 | |
| 12 | typedef struct { |
| 13 | int cells[16]; |
| 14 | } Mat4X4; |
| 15 | |
| 16 | typedef struct { |
| 17 | int cells[1024]; |
| 18 | } Mat32X32; |
| 19 | |
| 20 | typedef struct { |
| 21 | int cells[4096]; |
| 22 | } Mat64X64; |
| 23 | |
| 24 | struct StructOneMember { |
| 25 | int2 x; |
| 26 | }; |
| 27 | |
| 28 | struct StructTwoMember { |
| 29 | int2 x; |
| 30 | int2 y; |
| 31 | }; |
| 32 | |
| 33 | struct LargeStructOneMember { |
| 34 | int2 x[100]; |
| 35 | }; |
| 36 | |
| 37 | struct LargeStructTwoMember { |
| 38 | int2 x[40]; |
| 39 | int2 y[20]; |
| 40 | }; |
| 41 | |
| 42 | #if __OPENCL_C_VERSION__ >= 200 |
| 43 | struct LargeStructOneMember g_s; |
| 44 | #endif |
| 45 | |
| 46 | // X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret %agg.result, %struct.Mat3X3* byval align 4 %in) |
| 47 | // AMDGCN-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce) |
| 48 | Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { |
| 49 | Mat4X4 out; |
| 50 | return out; |
| 51 | } |
| 52 | |
| 53 | // COM-LABEL: define {{.*}} void @ker |
| 54 | // Expect two mem copies: one for the argument "in", and one for |
| 55 | // the return value. |
| 56 | // X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8* |
| 57 | // X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* |
| 58 | |
| 59 | // AMDGCN: load [9 x i32], [9 x i32] addrspace(1)* |
| 60 | // AMDGCN: call %struct.Mat4X4 @foo([9 x i32] |
| 61 | // AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* |
| 62 | kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { |
| 63 | out[0] = foo(in[1]); |
| 64 | } |
| 65 | |
| 66 | // X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret %agg.result, %struct.Mat32X32* byval align 4 %in) |
| 67 | // AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval align 4 %in) |
| 68 | Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { |
| 69 | Mat64X64 out; |
| 70 | return out; |
| 71 | } |
| 72 | |
| 73 | // COM-LABEL: define {{.*}} void @ker_large |
| 74 | // Expect two mem copies: one for the argument "in", and one for |
| 75 | // the return value. |
| 76 | // X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8* |
| 77 | // X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* |
| 78 | // AMDGCN: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* |
| 79 | // AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* |
| 80 | kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { |
| 81 | out[0] = foo_large(in[1]); |
| 82 | } |
| 83 | |
| 84 | // AMDGCN-LABEL: define void @FuncOneMember(<2 x i32> %u.coerce) |
| 85 | void FuncOneMember(struct StructOneMember u) { |
| 86 | u.x = (int2)(0, 0); |
| 87 | } |
| 88 | |
| 89 | // AMDGCN-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %u) |
| 90 | // AMDGCN-NOT: addrspacecast |
| 91 | // AMDGCN: store <2 x i32> %{{.*}}, <2 x i32> addrspace(5)* |
| 92 | void FuncOneLargeMember(struct LargeStructOneMember u) { |
| 93 | u.x[0] = (int2)(0, 0); |
| 94 | } |
| 95 | |
| 96 | // AMDGCN20-LABEL: define void @test_indirect_arg_globl() |
| 97 | // AMDGCN20: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) |
| 98 | // AMDGCN20: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)* |
| 99 | // AMDGCN20: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false) |
| 100 | // AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]]) |
| 101 | #if __OPENCL_C_VERSION__ >= 200 |
| 102 | void test_indirect_arg_globl(void) { |
| 103 | FuncOneLargeMember(g_s); |
| 104 | } |
| 105 | #endif |
| 106 | |
| 107 | // AMDGCN-LABEL: define amdgpu_kernel void @test_indirect_arg_local() |
| 108 | // AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) |
| 109 | // AMDGCN: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)* |
| 110 | // AMDGCN: call void @llvm.memcpy.p5i8.p3i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(3)* align 8 bitcast (%struct.LargeStructOneMember addrspace(3)* @test_indirect_arg_local.l_s to i8 addrspace(3)*), i64 800, i1 false) |
| 111 | // AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]]) |
| 112 | kernel void test_indirect_arg_local(void) { |
| 113 | local struct LargeStructOneMember l_s; |
| 114 | FuncOneLargeMember(l_s); |
| 115 | } |
| 116 | |
| 117 | // AMDGCN-LABEL: define void @test_indirect_arg_private() |
| 118 | // AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) |
| 119 | // AMDGCN-NOT: @llvm.memcpy |
| 120 | // AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[p_s]]) |
| 121 | void test_indirect_arg_private(void) { |
| 122 | struct LargeStructOneMember p_s; |
| 123 | FuncOneLargeMember(p_s); |
| 124 | } |
| 125 | |
| 126 | // AMDGCN-LABEL: define amdgpu_kernel void @KernelOneMember |
| 127 | // AMDGCN-SAME: (<2 x i32> %[[u_coerce:.*]]) |
| 128 | // AMDGCN: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5) |
| 129 | // AMDGCN: %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0 |
| 130 | // AMDGCN: store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]] |
| 131 | // AMDGCN: call void @FuncOneMember(<2 x i32> |
| 132 | kernel void KernelOneMember(struct StructOneMember u) { |
| 133 | FuncOneMember(u); |
| 134 | } |
| 135 | |
| 136 | // SPIR: call void @llvm.memcpy.p0i8.p1i8.i32 |
| 137 | // SPIR-NOT: addrspacecast |
| 138 | kernel void KernelOneMemberSpir(global struct StructOneMember* u) { |
| 139 | FuncOneMember(*u); |
| 140 | } |
| 141 | |
| 142 | // AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember( |
| 143 | // AMDGCN: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) |
| 144 | // AMDGCN: store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8 |
| 145 | // AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[U]]) |
| 146 | kernel void KernelLargeOneMember(struct LargeStructOneMember u) { |
| 147 | FuncOneLargeMember(u); |
| 148 | } |
| 149 | |
| 150 | // AMDGCN-LABEL: define void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1) |
| 151 | void FuncTwoMember(struct StructTwoMember u) { |
| 152 | u.y = (int2)(0, 0); |
| 153 | } |
| 154 | |
| 155 | // AMDGCN-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %u) |
| 156 | void FuncLargeTwoMember(struct LargeStructTwoMember u) { |
| 157 | u.y[0] = (int2)(0, 0); |
| 158 | } |
| 159 | |
| 160 | // AMDGCN-LABEL: define amdgpu_kernel void @KernelTwoMember |
| 161 | // AMDGCN-SAME: (%struct.StructTwoMember %[[u_coerce:.*]]) |
| 162 | // AMDGCN: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5) |
| 163 | // AMDGCN: %[[LD0:.*]] = load <2 x i32>, <2 x i32> addrspace(5)* |
| 164 | // AMDGCN: %[[LD1:.*]] = load <2 x i32>, <2 x i32> addrspace(5)* |
| 165 | // AMDGCN: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]]) |
| 166 | kernel void KernelTwoMember(struct StructTwoMember u) { |
| 167 | FuncTwoMember(u); |
| 168 | } |
| 169 | |
| 170 | // AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeTwoMember |
| 171 | // AMDGCN-SAME: (%struct.LargeStructTwoMember %[[u_coerce:.*]]) |
| 172 | // AMDGCN: %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5) |
| 173 | // AMDGCN: store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]] |
| 174 | // AMDGCN: call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %[[u]]) |
| 175 | kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { |
| 176 | FuncLargeTwoMember(u); |
| 177 | } |
| 178 | |