1; RUN: opt -mtriple=amdgcn-- -O1 -S < %s | FileCheck %s --check-prefixes=FUNC,LOOP
2; RUN: opt -mtriple=amdgcn-- -O1 -S -disable-promote-alloca-to-vector < %s | FileCheck %s --check-prefixes=FUNC,FULL-UNROLL
3
4target datalayout = "A5"
5
6; This test contains a simple loop that initializes an array declared in
7; private memory. This loop would be fully unrolled if we could not SROA
8; the alloca. Check that we successfully eliminate it before the unroll,
9; so that we do not need to fully unroll it.
10
11; FUNC-LABEL: @private_memory
12; LOOP-NOT: alloca
13; LOOP: loop.header:
14; LOOP: br i1 %{{[^,]+}}, label %exit, label %loop.header
15
16; FULL-UNROLL: alloca
17; FULL-UNROLL-COUNT-256: store i32 {{[0-9]+}}, i32 addrspace(5)*
18; FULL-UNROLL-NOT: br
19
20; FUNC: store i32 %{{[^,]+}}, i32 addrspace(1)* %out
21define amdgpu_kernel void @private_memory(i32 addrspace(1)* %out, i32 %n) {
22entry:
23  %alloca = alloca [16 x i32], addrspace(5)
24  br label %loop.header
25
26loop.header:
27  %counter = phi i32 [0, %entry], [%inc, %loop.inc]
28  br label %loop.body
29
30loop.body:
31  %salt = xor i32 %counter, %n
32  %idx = and i32 %salt, 15
33  %ptr = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %idx
34  store i32 %counter, i32 addrspace(5)* %ptr
35  br label %loop.inc
36
37loop.inc:
38  %inc = add i32 %counter, 1
39  %cmp = icmp sge i32 %counter, 255
40  br i1 %cmp, label  %exit, label %loop.header
41
42exit:
43  %gep = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %n
44  %load = load i32, i32 addrspace(5)* %gep
45  store i32 %load, i32 addrspace(1)* %out
46  ret void
47}
48