1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3
4; Test the localizer did something and we don't materialize all
5; constants in SGPRs in the entry block.
6
7define amdgpu_kernel void @localize_constants(i1 %cond) {
8; GFX9-LABEL: localize_constants:
9; GFX9:       ; %bb.0: ; %entry
10; GFX9-NEXT:    s_load_dword s1, s[4:5], 0x0
11; GFX9-NEXT:    s_mov_b32 s0, -1
12; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
13; GFX9-NEXT:    s_xor_b32 s1, s1, -1
14; GFX9-NEXT:    s_and_b32 s1, s1, 1
15; GFX9-NEXT:    s_cmp_lg_u32 s1, 0
16; GFX9-NEXT:    s_cbranch_scc0 BB0_2
17; GFX9-NEXT:  ; %bb.1: ; %bb1
18; GFX9-NEXT:    v_mov_b32_e32 v0, 0x5be6
19; GFX9-NEXT:    global_store_dword v[0:1], v0, off
20; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1c7
21; GFX9-NEXT:    global_store_dword v[0:1], v0, off
22; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3e8
23; GFX9-NEXT:    global_store_dword v[0:1], v0, off
24; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1c8
25; GFX9-NEXT:    global_store_dword v[0:1], v0, off
26; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3e7
27; GFX9-NEXT:    global_store_dword v[0:1], v0, off
28; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
29; GFX9-NEXT:    s_mov_b32 s0, 0
30; GFX9-NEXT:    global_store_dword v[0:1], v0, off
31; GFX9-NEXT:  BB0_2: ; %Flow
32; GFX9-NEXT:    s_xor_b32 s0, s0, -1
33; GFX9-NEXT:    s_and_b32 s0, s0, 1
34; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
35; GFX9-NEXT:    s_cbranch_scc1 BB0_4
36; GFX9-NEXT:  ; %bb.3: ; %bb0
37; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
38; GFX9-NEXT:    global_store_dword v[0:1], v0, off
39; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1c8
40; GFX9-NEXT:    global_store_dword v[0:1], v0, off
41; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3e7
42; GFX9-NEXT:    global_store_dword v[0:1], v0, off
43; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3e8
44; GFX9-NEXT:    global_store_dword v[0:1], v0, off
45; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1c7
46; GFX9-NEXT:    global_store_dword v[0:1], v0, off
47; GFX9-NEXT:    v_mov_b32_e32 v0, 0x5be6
48; GFX9-NEXT:    global_store_dword v[0:1], v0, off
49; GFX9-NEXT:  BB0_4: ; %bb2
50; GFX9-NEXT:    s_endpgm
51entry:
52  br i1 %cond, label %bb0, label %bb1
53
54bb0:
55  store volatile i32 123, i32 addrspace(1)* undef
56  store volatile i32 456, i32 addrspace(1)* undef
57  store volatile i32 999, i32 addrspace(1)* undef
58  store volatile i32 1000, i32 addrspace(1)* undef
59  store volatile i32 455, i32 addrspace(1)* undef
60  store volatile i32 23526, i32 addrspace(1)* undef
61  br label %bb2
62
63bb1:
64  store volatile i32 23526, i32 addrspace(1)* undef
65  store volatile i32 455, i32 addrspace(1)* undef
66  store volatile i32 1000, i32 addrspace(1)* undef
67  store volatile i32 456, i32 addrspace(1)* undef
68  store volatile i32 999, i32 addrspace(1)* undef
69  store volatile i32 123, i32 addrspace(1)* undef
70  br label %bb2
71
72bb2:
73  ret void
74}
75
76; FIXME: These aren't localized because thesee were legalized before
77; the localizer, and are no longer G_GLOBAL_VALUE.
78@gv0 = addrspace(1) global i32 undef, align 4
79@gv1 = addrspace(1) global i32 undef, align 4
80@gv2 = addrspace(1) global i32 undef, align 4
81@gv3 = addrspace(1) global i32 undef, align 4
82
83define amdgpu_kernel void @localize_globals(i1 %cond) {
84; GFX9-LABEL: localize_globals:
85; GFX9:       ; %bb.0: ; %entry
86; GFX9-NEXT:    s_load_dword s1, s[4:5], 0x0
87; GFX9-NEXT:    s_mov_b32 s0, -1
88; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
89; GFX9-NEXT:    s_xor_b32 s1, s1, -1
90; GFX9-NEXT:    s_and_b32 s1, s1, 1
91; GFX9-NEXT:    s_cmp_lg_u32 s1, 0
92; GFX9-NEXT:    s_cbranch_scc0 BB1_2
93; GFX9-NEXT:  ; %bb.1: ; %bb1
94; GFX9-NEXT:    s_getpc_b64 s[0:1]
95; GFX9-NEXT:    s_add_u32 s0, s0, gv2@gotpcrel32@lo+4
96; GFX9-NEXT:    s_addc_u32 s1, s1, gv2@gotpcrel32@hi+12
97; GFX9-NEXT:    s_getpc_b64 s[2:3]
98; GFX9-NEXT:    s_add_u32 s2, s2, gv3@gotpcrel32@lo+4
99; GFX9-NEXT:    s_addc_u32 s3, s3, gv3@gotpcrel32@hi+12
100; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
101; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
102; GFX9-NEXT:    v_mov_b32_e32 v0, 0
103; GFX9-NEXT:    v_mov_b32_e32 v1, 1
104; GFX9-NEXT:    s_mov_b32 s0, 0
105; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
106; GFX9-NEXT:    global_store_dword v0, v0, s[4:5]
107; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
108; GFX9-NEXT:  BB1_2: ; %Flow
109; GFX9-NEXT:    s_xor_b32 s0, s0, -1
110; GFX9-NEXT:    s_and_b32 s0, s0, 1
111; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
112; GFX9-NEXT:    s_cbranch_scc1 BB1_4
113; GFX9-NEXT:  ; %bb.3: ; %bb0
114; GFX9-NEXT:    s_getpc_b64 s[0:1]
115; GFX9-NEXT:    s_add_u32 s0, s0, gv0@gotpcrel32@lo+4
116; GFX9-NEXT:    s_addc_u32 s1, s1, gv0@gotpcrel32@hi+12
117; GFX9-NEXT:    s_getpc_b64 s[2:3]
118; GFX9-NEXT:    s_add_u32 s2, s2, gv1@gotpcrel32@lo+4
119; GFX9-NEXT:    s_addc_u32 s3, s3, gv1@gotpcrel32@hi+12
120; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
121; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
122; GFX9-NEXT:    v_mov_b32_e32 v0, 0
123; GFX9-NEXT:    v_mov_b32_e32 v1, 1
124; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
125; GFX9-NEXT:    global_store_dword v0, v0, s[0:1]
126; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
127; GFX9-NEXT:  BB1_4: ; %bb2
128; GFX9-NEXT:    s_endpgm
129entry:
130  br i1 %cond, label %bb0, label %bb1
131
132bb0:
133  store volatile i32 0, i32 addrspace(1)* @gv0
134  store volatile i32 1, i32 addrspace(1)* @gv1
135  br label %bb2
136
137bb1:
138  store volatile i32 0, i32 addrspace(1)* @gv2
139  store volatile i32 1, i32 addrspace(1)* @gv3
140  br label %bb2
141
142bb2:
143  ret void
144}
145
146@static.gv0 = internal addrspace(1) global i32 undef, align 4
147@static.gv1 = internal addrspace(1) global i32 undef, align 4
148@static.gv2 = internal addrspace(1) global i32 undef, align 4
149@static.gv3 = internal addrspace(1) global i32 undef, align 4
150
151define void @localize_internal_globals(i1 %cond) {
152; GFX9-LABEL: localize_internal_globals:
153; GFX9:       ; %bb.0: ; %entry
154; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
156; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
157; GFX9-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
158; GFX9-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
159; GFX9-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
160; GFX9-NEXT:    s_xor_b64 s[4:5], exec, s[6:7]
161; GFX9-NEXT:    s_cbranch_execz BB2_2
162; GFX9-NEXT:  ; %bb.1: ; %bb1
163; GFX9-NEXT:    s_getpc_b64 s[6:7]
164; GFX9-NEXT:    s_add_u32 s6, s6, static.gv2@rel32@lo+4
165; GFX9-NEXT:    s_addc_u32 s7, s7, static.gv2@rel32@hi+12
166; GFX9-NEXT:    v_mov_b32_e32 v0, 0
167; GFX9-NEXT:    global_store_dword v0, v0, s[6:7]
168; GFX9-NEXT:    s_getpc_b64 s[6:7]
169; GFX9-NEXT:    s_add_u32 s6, s6, static.gv3@rel32@lo+4
170; GFX9-NEXT:    s_addc_u32 s7, s7, static.gv3@rel32@hi+12
171; GFX9-NEXT:    v_mov_b32_e32 v1, 1
172; GFX9-NEXT:    global_store_dword v0, v1, s[6:7]
173; GFX9-NEXT:  BB2_2: ; %Flow
174; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], s[4:5]
175; GFX9-NEXT:    s_xor_b64 exec, exec, s[4:5]
176; GFX9-NEXT:    s_cbranch_execz BB2_4
177; GFX9-NEXT:  ; %bb.3: ; %bb0
178; GFX9-NEXT:    s_getpc_b64 s[6:7]
179; GFX9-NEXT:    s_add_u32 s6, s6, static.gv0@rel32@lo+4
180; GFX9-NEXT:    s_addc_u32 s7, s7, static.gv0@rel32@hi+12
181; GFX9-NEXT:    v_mov_b32_e32 v0, 0
182; GFX9-NEXT:    global_store_dword v0, v0, s[6:7]
183; GFX9-NEXT:    s_getpc_b64 s[6:7]
184; GFX9-NEXT:    s_add_u32 s6, s6, static.gv1@rel32@lo+4
185; GFX9-NEXT:    s_addc_u32 s7, s7, static.gv1@rel32@hi+12
186; GFX9-NEXT:    v_mov_b32_e32 v1, 1
187; GFX9-NEXT:    global_store_dword v0, v1, s[6:7]
188; GFX9-NEXT:  BB2_4: ; %bb2
189; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
190; GFX9-NEXT:    s_waitcnt vmcnt(0)
191; GFX9-NEXT:    s_setpc_b64 s[30:31]
192entry:
193  br i1 %cond, label %bb0, label %bb1
194
195bb0:
196  store volatile i32 0, i32 addrspace(1)* @static.gv0
197  store volatile i32 1, i32 addrspace(1)* @static.gv1
198  br label %bb2
199
200bb1:
201  store volatile i32 0, i32 addrspace(1)* @static.gv2
202  store volatile i32 1, i32 addrspace(1)* @static.gv3
203  br label %bb2
204
205bb2:
206  ret void
207}
208