1; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
3; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
4; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
5
6declare float @extern_func(float) #0
7declare float @extern_func_many_args(<64 x float>) #0
8
9@funcptr = external hidden unnamed_addr addrspace(4) constant void()*, align 4
10
11define amdgpu_gfx float @no_stack(float %arg0) #0 {
12  %add = fadd float %arg0, 1.0
13  ret float %add
14}
15
16define amdgpu_gfx float @simple_stack(float %arg0) #0 {
17  %stack = alloca float, i32 4, align 4, addrspace(5)
18  store volatile float 2.0, float addrspace(5)* %stack
19  %val = load volatile float, float addrspace(5)* %stack
20  %add = fadd float %arg0, %val
21  ret float %add
22}
23
24define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
25  %stack = alloca float, i32 4, align 4, addrspace(5)
26  store volatile float 2.0, float addrspace(5)* %stack
27  %val = load volatile float, float addrspace(5)* %stack
28  %add = fadd float %arg0, %val
29  %stack2 = alloca float, i32 4, align 4, addrspace(5)
30  store volatile float 2.0, float addrspace(5)* %stack2
31  %val2 = load volatile float, float addrspace(5)* %stack2
32  %add2 = fadd float %add, %val2
33  ret float %add2
34}
35
36define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
37bb0:
38  %cmp = fcmp ogt float %arg0, 0.0
39  br i1 %cmp, label %bb1, label %bb2
40
41bb1:
42  %stack = alloca float, i32 4, align 4, addrspace(5)
43  store volatile float 2.0, float addrspace(5)* %stack
44  %val = load volatile float, float addrspace(5)* %stack
45  %add = fadd float %arg0, %val
46  br label %bb2
47
48bb2:
49  %res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ]
50  ret float %res
51}
52
53define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
54bb0:
55  br label %bb1
56
57bb1:
58  %ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ]
59  %stack = alloca float, i32 4, align 4, addrspace(5)
60  store volatile float 2.0, float addrspace(5)* %stack
61  %val = load volatile float, float addrspace(5)* %stack
62  %add = fadd float %arg0, %val
63  %cmp = icmp sgt i32 %ctr, 0
64  %newctr = sub i32 %ctr, 1
65  br i1 %cmp, label %bb1, label %bb2
66
67bb2:
68  ret float %add
69}
70
71define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
72  %res = call amdgpu_gfx float @simple_stack(float %arg0)
73  ret float %res
74}
75
76define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
77  %stack = alloca float, i32 4, align 4, addrspace(5)
78  store volatile float 2.0, float addrspace(5)* %stack
79  %val = load volatile float, float addrspace(5)* %stack
80  %res = call amdgpu_gfx float @simple_stack(float %arg0)
81  %add = fadd float %res, %val
82  ret float %add
83}
84
85define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
86  %res = call amdgpu_gfx float @extern_func(float %arg0)
87  ret float %res
88}
89
90define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
91  %stack = alloca float, i32 4, align 4, addrspace(5)
92  store volatile float 2.0, float addrspace(5)* %stack
93  %val = load volatile float, float addrspace(5)* %stack
94  %res = call amdgpu_gfx float @extern_func(float %arg0)
95  %add = fadd float %res, %val
96  ret float %add
97}
98
99define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
100  %res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
101  ret float %res
102}
103
104define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
105  %fptr = load void()*, void()* addrspace(4)* @funcptr
106  call amdgpu_gfx void %fptr()
107  ret float %arg0
108}
109
110define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
111  %stack = alloca float, i32 4, align 4, addrspace(5)
112  store volatile float 2.0, float addrspace(5)* %stack
113  %val = load volatile float, float addrspace(5)* %stack
114  %fptr = load void()*, void()* addrspace(4)* @funcptr
115  call amdgpu_gfx void %fptr()
116  %add = fadd float %arg0, %val
117  ret float %add
118}
119
120define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
121  %stack = alloca float, i32 4, align 4, addrspace(5)
122  store volatile float 2.0, float addrspace(5)* %stack
123  %val = load volatile float, float addrspace(5)* %stack
124  %res = call amdgpu_gfx float @simple_stack_recurse(float %arg0)
125  %add = fadd float %res, %val
126  ret float %add
127}
128
129attributes #0 = { nounwind }
130
131; GCN: amdpal.pipelines:
132; GCN-NEXT:   - .registers:      {}
133; GCN-NEXT:    .shader_functions:
134; GCN-NEXT:      dynamic_stack:
135; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
136; GCN-NEXT:      dynamic_stack_loop:
137; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
138; GCN-NEXT:      multiple_stack:
139; GCN-NEXT:        .stack_frame_size_in_bytes: 0x24{{$}}
140; GCN-NEXT:      no_stack:
141; GCN-NEXT:        .stack_frame_size_in_bytes: 0{{$}}
142; GCN-NEXT:      no_stack_call:
143; GCN-NEXT:        .stack_frame_size_in_bytes: 0{{$}}
144; GCN-NEXT:      no_stack_extern_call:
145; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
146; GCN-NEXT:      no_stack_extern_call_many_args:
147; SDAG-NEXT:        .stack_frame_size_in_bytes: 0x90{{$}}
148; GISEL-NEXT:        .stack_frame_size_in_bytes: 0xd0{{$}}
149; GCN-NEXT:      no_stack_indirect_call:
150; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
151; GCN-NEXT:      simple_stack:
152; GCN-NEXT:        .stack_frame_size_in_bytes: 0x14{{$}}
153; GCN-NEXT:      simple_stack_call:
154; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
155; GCN-NEXT:      simple_stack_extern_call:
156; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
157; GCN-NEXT:      simple_stack_indirect_call:
158; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
159; GCN-NEXT:      simple_stack_recurse:
160; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
161; GCN-NEXT: ...
162