1; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 3; Interleave loads and stores to fit into 9 VGPR limit. 4; This requires to avoid load/store clustering. 5 6; Reschedule the second scheduling region without clustering while 7; the first region is skipped. 8 9; GCN: global_load_dwordx4 10; GCN: global_store_dwordx4 11; GCN: global_load_dwordx4 12; GCN: global_store_dwordx4 13; GCN: global_load_dwordx4 14; GCN: global_store_dwordx4 15; GCN: NumVgprs: {{[0-9]$}} 16; GCN: ScratchSize: 0{{$}} 17 18define amdgpu_kernel void @load_store_max_9vgprs(<4 x i32> addrspace(1)* nocapture noalias readonly %arg, <4 x i32> addrspace(1)* nocapture noalias %arg1, i1 %cnd) #1 { 19bb: 20 %id = call i32 @llvm.amdgcn.workitem.id.x() 21 %base = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i32 %id 22 br i1 %cnd, label %bb1, label %bb2 23 24bb1: 25 %tmp = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 1 26 %tmp2 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp, align 4 27 %tmp3 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 3 28 %tmp4 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp3, align 4 29 %tmp5 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 5 30 %tmp6 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp5, align 4 31 store <4 x i32> %tmp2, <4 x i32> addrspace(1)* %arg1, align 4 32 %tmp7 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 3 33 store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %tmp7, align 4 34 %tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 5 35 store <4 x i32> %tmp6, <4 x i32> addrspace(1)* %tmp8, align 4 36 br label %bb2 37 38bb2: 39 ret void 40} 41 42declare i32 @llvm.amdgcn.workitem.id.x() #0 43 44attributes #0 = { nounwind readnone } 45attributes #1 = { "amdgpu-num-vgpr"="9" } 46