1; RUN: opt -mtriple=amdgcn-- -loop-unroll -simplifycfg -sroa %s -S -o - | FileCheck %s 2; RUN: opt -mtriple=r600-- -loop-unroll -simplifycfg -sroa %s -S -o - | FileCheck %s 3 4target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 5 6; This test contains a simple loop that initializes an array declared in 7; private memory. We want to make sure these kinds of loops are always 8; unrolled, because private memory is slow. 9 10; CHECK-LABEL: @private_memory 11; CHECK-NOT: alloca 12; CHECK: store i32 5, i32 addrspace(1)* %out 13define amdgpu_kernel void @private_memory(i32 addrspace(1)* %out) { 14entry: 15 %0 = alloca [32 x i32], addrspace(5) 16 br label %loop.header 17 18loop.header: 19 %counter = phi i32 [0, %entry], [%inc, %loop.inc] 20 br label %loop.body 21 22loop.body: 23 %ptr = getelementptr [32 x i32], [32 x i32] addrspace(5)* %0, i32 0, i32 %counter 24 store i32 %counter, i32 addrspace(5)* %ptr 25 br label %loop.inc 26 27loop.inc: 28 %inc = add i32 %counter, 1 29 %1 = icmp sge i32 %counter, 32 30 br i1 %1, label %exit, label %loop.header 31 32exit: 33 %2 = getelementptr [32 x i32], [32 x i32] addrspace(5)* %0, i32 0, i32 5 34 %3 = load i32, i32 addrspace(5)* %2 35 store i32 %3, i32 addrspace(1)* %out 36 ret void 37} 38 39; Check that loop is unrolled for local memory references 40 41; CHECK-LABEL: @local_memory 42; CHECK: getelementptr i32, i32 addrspace(1)* %out, i32 128 43; CHECK-NEXT: store 44; CHECK-NEXT: ret 45define amdgpu_kernel void @local_memory(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) { 46entry: 47 br label %loop.header 48 49loop.header: 50 %counter = phi i32 [0, %entry], [%inc, %loop.inc] 51 br label %loop.body 52 53loop.body: 54 %ptr_lds = getelementptr i32, i32 addrspace(3)* %lds, i32 %counter 55 %val = load i32, i32 addrspace(3)* %ptr_lds 56 %ptr_out = getelementptr i32, i32 addrspace(1)* %out, i32 %counter 57 store i32 %val, i32 addrspace(1)* %ptr_out 58 br label %loop.inc 59 60loop.inc: 61 %inc = add i32 %counter, 1 62 %cond = icmp sge i32 %counter, 128 63 br i1 %cond, label %exit, label %loop.header 64 65exit: 66 ret void 67} 68 69; Check that a loop with if inside completely unrolled to eliminate phi and if 70 71; CHECK-LABEL: @unroll_for_if 72; CHECK: entry: 73; CHECK-NEXT: getelementptr 74; CHECK-NEXT: store 75; CHECK-NEXT: getelementptr 76; CHECK-NEXT: store 77; CHECK-NOT: br 78define amdgpu_kernel void @unroll_for_if(i32 addrspace(5)* %a) { 79entry: 80 br label %for.body 81 82for.body: ; preds = %entry, %for.inc 83 %i1 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 84 %and = and i32 %i1, 1 85 %tobool = icmp eq i32 %and, 0 86 br i1 %tobool, label %for.inc, label %if.then 87 88if.then: ; preds = %for.body 89 %0 = sext i32 %i1 to i64 90 %arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %a, i64 %0 91 store i32 0, i32 addrspace(5)* %arrayidx, align 4 92 br label %for.inc 93 94for.inc: ; preds = %for.body, %if.then 95 %inc = add nuw nsw i32 %i1, 1 96 %cmp = icmp ult i32 %inc, 48 97 br i1 %cmp, label %for.body, label %for.end 98 99for.end: ; preds = %for.cond 100 ret void 101} 102 103; Check that runtime unroll is enabled for local memory references 104 105; CHECK-LABEL: @local_memory_runtime 106; CHECK: loop.header: 107; CHECK: load i32, i32 addrspace(3)* 108; CHECK: load i32, i32 addrspace(3)* 109; CHECK: br i1 110; CHECK: loop.header.epil 111; CHECK: load i32, i32 addrspace(3)* 112; CHECK: ret 113define amdgpu_kernel void @local_memory_runtime(i32 addrspace(1)* %out, i32 addrspace(3)* %lds, i32 %n) { 114entry: 115 br label %loop.header 116 117loop.header: 118 %counter = phi i32 [0, %entry], [%inc, %loop.inc] 119 br label %loop.body 120 121loop.body: 122 %ptr_lds = getelementptr i32, i32 addrspace(3)* %lds, i32 %counter 123 %val = load i32, i32 addrspace(3)* %ptr_lds 124 %ptr_out = getelementptr i32, i32 addrspace(1)* %out, i32 %counter 125 store i32 %val, i32 addrspace(1)* %ptr_out 126 br label %loop.inc 127 128loop.inc: 129 %inc = add i32 %counter, 1 130 %cond = icmp sge i32 %counter, %n 131 br i1 %cond, label %exit, label %loop.header 132 133exit: 134 ret void 135} 136