1; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3; GCN-LABEL: {{^}}store_fi_lifetime:
4; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
5; GCN: buffer_store_dword [[FI]]
6define amdgpu_kernel void @store_fi_lifetime(i32 addrspace(1)* %out, i32 %in) #0 {
7entry:
8  %b = alloca i8, addrspace(5)
9  call void @llvm.lifetime.start.p5i8(i64 1, i8 addrspace(5)* %b)
10  store volatile i8 addrspace(5)* %b, i8 addrspace(5)* addrspace(1)* undef
11  call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* %b)
12  ret void
13}
14
15; GCN-LABEL: {{^}}stored_fi_to_lds:
16; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
17; GCN: buffer_store_dword v{{[0-9]+}}, off,
18; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 4{{$}}
19; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
20; GCN: ds_write_b32  [[VLDSPTR]], [[ZERO0]]
21define amdgpu_kernel void @stored_fi_to_lds(float addrspace(5)* addrspace(3)* %ptr) #0 {
22  %tmp = alloca float, addrspace(5)
23  store float 4.0, float  addrspace(5)*%tmp
24  store float addrspace(5)* %tmp, float addrspace(5)* addrspace(3)* %ptr
25  ret void
26}
27
28; Offset is applied
29; GCN-LABEL: {{^}}stored_fi_to_lds_2_small_objects:
30; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 4{{$}}
31; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
32; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8{{$}}
33
34; GCN-DAG: s_load_dword [[LDSPTR:s[0-9]+]]
35
36; GCN-DAG: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
37; GCN: ds_write_b32  [[VLDSPTR]], [[ZERO]]
38
39; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 8{{$}}
40; GCN: ds_write_b32  [[VLDSPTR]], [[FI1]]
41define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(float addrspace(5)* addrspace(3)* %ptr) #0 {
42  %tmp0 = alloca float, addrspace(5)
43  %tmp1 = alloca float, addrspace(5)
44  store float 4.0, float addrspace(5)* %tmp0
45  store float 4.0, float addrspace(5)* %tmp1
46  store volatile float addrspace(5)* %tmp0, float addrspace(5)* addrspace(3)* %ptr
47  store volatile float addrspace(5)* %tmp1, float addrspace(5)* addrspace(3)* %ptr
48  ret void
49}
50
51; Same frame index is used multiple times in the store
52; GCN-LABEL: {{^}}stored_fi_to_self:
53; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4d2{{$}}
54; GCN: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
55; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 4{{$}}
56; GCN: buffer_store_dword [[ZERO]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
57define amdgpu_kernel void @stored_fi_to_self() #0 {
58  %tmp = alloca i32 addrspace(5)*, addrspace(5)
59
60  ; Avoid optimizing everything out
61  store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp
62  %bitcast = bitcast i32 addrspace(5)* addrspace(5)* %tmp to i32 addrspace(5)*
63  store volatile i32 addrspace(5)* %bitcast, i32 addrspace(5)* addrspace(5)* %tmp
64  ret void
65}
66
67; GCN-LABEL: {{^}}stored_fi_to_self_offset:
68; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 32{{$}}
69; GCN: buffer_store_dword [[K0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
70
71; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0x4d2{{$}}
72; GCN: buffer_store_dword [[K1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2052{{$}}
73
74; GCN: v_mov_b32_e32 [[OFFSETK:v[0-9]+]], 0x804{{$}}
75; GCN: buffer_store_dword [[OFFSETK]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2052{{$}}
76define amdgpu_kernel void @stored_fi_to_self_offset() #0 {
77  %tmp0 = alloca [512 x i32], addrspace(5)
78  %tmp1 = alloca i32 addrspace(5)*, addrspace(5)
79
80  ; Avoid optimizing everything out
81  %tmp0.cast = bitcast [512 x i32] addrspace(5)* %tmp0 to i32 addrspace(5)*
82  store volatile i32 32, i32 addrspace(5)* %tmp0.cast
83
84  store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp1
85
86  %bitcast = bitcast i32 addrspace(5)* addrspace(5)* %tmp1 to i32 addrspace(5)*
87  store volatile i32 addrspace(5)* %bitcast, i32 addrspace(5)* addrspace(5)* %tmp1
88  ret void
89}
90
91; GCN-LABEL: {{^}}stored_fi_to_fi:
92; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
93; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8{{$}}
94; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12{{$}}
95
96; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 8{{$}}
97; GCN: buffer_store_dword [[FI1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12{{$}}
98
99; GCN: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}}
100; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8{{$}}
101define amdgpu_kernel void @stored_fi_to_fi() #0 {
102  %tmp0 = alloca i32 addrspace(5)*, addrspace(5)
103  %tmp1 = alloca i32 addrspace(5)*, addrspace(5)
104  %tmp2 = alloca i32 addrspace(5)*, addrspace(5)
105  store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp0
106  store volatile i32 addrspace(5)* inttoptr (i32 5678 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp1
107  store volatile i32 addrspace(5)* inttoptr (i32 9999 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp2
108
109  %bitcast1 = bitcast i32 addrspace(5)* addrspace(5)* %tmp1 to i32 addrspace(5)*
110  %bitcast2 = bitcast i32 addrspace(5)* addrspace(5)* %tmp2 to i32 addrspace(5)* ;  at offset 8
111
112  store volatile i32 addrspace(5)* %bitcast1, i32 addrspace(5)* addrspace(5)* %tmp2 ; store offset 4 at offset 8
113  store volatile i32 addrspace(5)* %bitcast2, i32 addrspace(5)* addrspace(5)* %tmp1 ; store offset 8 at offset 4
114  ret void
115}
116
117; GCN-LABEL: {{^}}stored_fi_to_global:
118; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
119; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
120; GCN: buffer_store_dword [[FI]]
121define amdgpu_kernel void @stored_fi_to_global(float addrspace(5)* addrspace(1)* %ptr) #0 {
122  %tmp = alloca float, addrspace(5)
123  store float 0.0, float  addrspace(5)*%tmp
124  store float addrspace(5)* %tmp, float addrspace(5)* addrspace(1)* %ptr
125  ret void
126}
127
128; Offset is applied
129; GCN-LABEL: {{^}}stored_fi_to_global_2_small_objects:
130; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
131; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8{{$}}
132; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12{{$}}
133
134; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 8{{$}}
135; GCN: buffer_store_dword [[FI1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
136
137; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}}
138; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
139define amdgpu_kernel void @stored_fi_to_global_2_small_objects(float addrspace(5)* addrspace(1)* %ptr) #0 {
140  %tmp0 = alloca float, addrspace(5)
141  %tmp1 = alloca float, addrspace(5)
142  %tmp2 = alloca float, addrspace(5)
143  store volatile float 0.0, float  addrspace(5)*%tmp0
144  store volatile float 0.0, float  addrspace(5)*%tmp1
145  store volatile float 0.0, float  addrspace(5)*%tmp2
146  store volatile float addrspace(5)* %tmp1, float addrspace(5)* addrspace(1)* %ptr
147  store volatile float addrspace(5)* %tmp2, float addrspace(5)* addrspace(1)* %ptr
148  ret void
149}
150
151; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset:
152; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
153; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
154
155; FIXME: Re-initialize
156; GCN: v_mov_b32_e32 [[BASE_0_1:v[0-9]+]], 4{{$}}
157
158; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
159; GCN-DAG: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]
160
161
162; GCN: v_add_i32_e32 [[BASE_1_OFF_2:v[0-9]+]], vcc, 56, [[BASE_0_1]]
163; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
164
165; GCN: buffer_store_dword [[BASE_1_OFF_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
166define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(i32 addrspace(5)* addrspace(1)* %ptr) #0 {
167  %tmp0 = alloca [4096 x i32], addrspace(5)
168  %tmp1 = alloca [4096 x i32], addrspace(5)
169  %gep0.tmp0 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 0
170  store volatile i32 0, i32 addrspace(5)* %gep0.tmp0
171  %gep1.tmp0 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 4095
172  store volatile i32 999, i32 addrspace(5)* %gep1.tmp0
173  %gep0.tmp1 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 14
174  store i32 addrspace(5)* %gep0.tmp1, i32 addrspace(5)* addrspace(1)* %ptr
175  ret void
176}
177
178@g1 = external addrspace(1) global i32 addrspace(5)*
179
180; This was leaving a dead node around resulting in failing to select
181; on the leftover AssertZext's ValueType operand.
182
183; GCN-LABEL: {{^}}cannot_select_assertzext_valuetype:
184; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
185; GCN: s_add_u32 s{{[0-9]+}}, s[[PC_LO]], g1@gotpcrel32@lo+4
186; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC_HI]], g1@gotpcrel32@hi+12
187; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
188; GCN: buffer_store_dword [[FI]]
189define amdgpu_kernel void @cannot_select_assertzext_valuetype(i32 addrspace(1)* %out, i32 %idx) #0 {
190entry:
191  %b = alloca i32, align 4, addrspace(5)
192  %tmp1 = load volatile i32 addrspace(5)*, i32 addrspace(5)* addrspace(1)* @g1, align 4
193  %arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %tmp1, i32 %idx
194  %tmp2 = load i32, i32 addrspace(5)* %arrayidx, align 4
195  store volatile i32 addrspace(5)* %b, i32 addrspace(5)* addrspace(1)* undef
196  ret void
197}
198
199declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #1
200declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #1
201
202attributes #0 = { nounwind }
203attributes #1 = { argmemonly nounwind }
204