1// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=ALL,X86 %s 2// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN %s 3// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN,AMDGCN20 %s 4// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s 5 6typedef int int2 __attribute__((ext_vector_type(2))); 7 8typedef struct { 9 int cells[9]; 10} Mat3X3; 11 12typedef struct { 13 int cells[16]; 14} Mat4X4; 15 16typedef struct { 17 int cells[1024]; 18} Mat32X32; 19 20typedef struct { 21 int cells[4096]; 22} Mat64X64; 23 24struct StructOneMember { 25 int2 x; 26}; 27 28struct StructTwoMember { 29 int2 x; 30 int2 y; 31}; 32 33struct LargeStructOneMember { 34 int2 x[100]; 35}; 36 37struct LargeStructTwoMember { 38 int2 x[40]; 39 int2 y[20]; 40}; 41 42#if __OPENCL_C_VERSION__ >= 200 43struct LargeStructOneMember g_s; 44#endif 45 46// X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret(%struct.Mat4X4) align 4 %agg.result, %struct.Mat3X3* byval(%struct.Mat3X3) align 4 %in) 47// AMDGCN-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce) 48Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { 49 Mat4X4 out; 50 return out; 51} 52 53// ALL-LABEL: define {{.*}} void @ker 54// Expect two mem copies: one for the argument "in", and one for 55// the return value. 56// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8* 57// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* 58 59// AMDGCN: load [9 x i32], [9 x i32] addrspace(1)* 60// AMDGCN: call %struct.Mat4X4 @foo([9 x i32] 61// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* 62kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { 63 out[0] = foo(in[1]); 64} 65 66// X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret(%struct.Mat64X64) align 4 %agg.result, %struct.Mat32X32* byval(%struct.Mat32X32) align 4 %in) 67// AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret(%struct.Mat64X64) align 4 %agg.result, %struct.Mat32X32 addrspace(5)* byval(%struct.Mat32X32) align 4 %in) 68Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { 69 Mat64X64 out; 70 return out; 71} 72 73// ALL-LABEL: define {{.*}} void @ker_large 74// Expect two mem copies: one for the argument "in", and one for 75// the return value. 76// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8* 77// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* 78// AMDGCN: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* 79// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* 80kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { 81 out[0] = foo_large(in[1]); 82} 83 84// AMDGCN-LABEL: define void @FuncOneMember(<2 x i32> %u.coerce) 85void FuncOneMember(struct StructOneMember u) { 86 u.x = (int2)(0, 0); 87} 88 89// AMDGCN-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %u) 90// AMDGCN-NOT: addrspacecast 91// AMDGCN: store <2 x i32> %{{.*}}, <2 x i32> addrspace(5)* 92void FuncOneLargeMember(struct LargeStructOneMember u) { 93 u.x[0] = (int2)(0, 0); 94} 95 96// AMDGCN20-LABEL: define void @test_indirect_arg_globl() 97// AMDGCN20: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) 98// AMDGCN20: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)* 99// AMDGCN20: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false) 100// AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]]) 101#if __OPENCL_C_VERSION__ >= 200 102void test_indirect_arg_globl(void) { 103 FuncOneLargeMember(g_s); 104} 105#endif 106 107// AMDGCN-LABEL: define amdgpu_kernel void @test_indirect_arg_local() 108// AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) 109// AMDGCN: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)* 110// AMDGCN: call void @llvm.memcpy.p5i8.p3i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(3)* align 8 bitcast (%struct.LargeStructOneMember addrspace(3)* @test_indirect_arg_local.l_s to i8 addrspace(3)*), i64 800, i1 false) 111// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]]) 112kernel void test_indirect_arg_local(void) { 113 local struct LargeStructOneMember l_s; 114 FuncOneLargeMember(l_s); 115} 116 117// AMDGCN-LABEL: define void @test_indirect_arg_private() 118// AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) 119// AMDGCN-NOT: @llvm.memcpy 120// AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[p_s]]) 121void test_indirect_arg_private(void) { 122 struct LargeStructOneMember p_s; 123 FuncOneLargeMember(p_s); 124} 125 126// AMDGCN-LABEL: define amdgpu_kernel void @KernelOneMember 127// AMDGCN-SAME: (<2 x i32> %[[u_coerce:.*]]) 128// AMDGCN: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5) 129// AMDGCN: %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0 130// AMDGCN: store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]] 131// AMDGCN: call void @FuncOneMember(<2 x i32> 132kernel void KernelOneMember(struct StructOneMember u) { 133 FuncOneMember(u); 134} 135 136// SPIR: call void @llvm.memcpy.p0i8.p1i8.i32 137// SPIR-NOT: addrspacecast 138kernel void KernelOneMemberSpir(global struct StructOneMember* u) { 139 FuncOneMember(*u); 140} 141 142// AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember( 143// AMDGCN: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) 144// AMDGCN: store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8 145// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[U]]) 146kernel void KernelLargeOneMember(struct LargeStructOneMember u) { 147 FuncOneLargeMember(u); 148} 149 150// AMDGCN-LABEL: define void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1) 151void FuncTwoMember(struct StructTwoMember u) { 152 u.y = (int2)(0, 0); 153} 154 155// AMDGCN-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval(%struct.LargeStructTwoMember) align 8 %u) 156void FuncLargeTwoMember(struct LargeStructTwoMember u) { 157 u.y[0] = (int2)(0, 0); 158} 159 160// AMDGCN-LABEL: define amdgpu_kernel void @KernelTwoMember 161// AMDGCN-SAME: (%struct.StructTwoMember %[[u_coerce:.*]]) 162// AMDGCN: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5) 163// AMDGCN: %[[LD0:.*]] = load <2 x i32>, <2 x i32> addrspace(5)* 164// AMDGCN: %[[LD1:.*]] = load <2 x i32>, <2 x i32> addrspace(5)* 165// AMDGCN: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]]) 166kernel void KernelTwoMember(struct StructTwoMember u) { 167 FuncTwoMember(u); 168} 169 170// AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeTwoMember 171// AMDGCN-SAME: (%struct.LargeStructTwoMember %[[u_coerce:.*]]) 172// AMDGCN: %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5) 173// AMDGCN: store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]] 174// AMDGCN: call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval(%struct.LargeStructTwoMember) align 8 %[[u]]) 175kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { 176 FuncLargeTwoMember(u); 177} 178