1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3 4; Test the localizer did something and we don't materialize all 5; constants in SGPRs in the entry block. 6 7define amdgpu_kernel void @localize_constants(i1 %cond) { 8; GFX9-LABEL: localize_constants: 9; GFX9: ; %bb.0: ; %entry 10; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0 11; GFX9-NEXT: s_mov_b32 s0, -1 12; GFX9-NEXT: s_waitcnt lgkmcnt(0) 13; GFX9-NEXT: s_xor_b32 s1, s1, -1 14; GFX9-NEXT: s_and_b32 s1, s1, 1 15; GFX9-NEXT: s_cmp_lg_u32 s1, 0 16; GFX9-NEXT: s_cbranch_scc0 BB0_2 17; GFX9-NEXT: ; %bb.1: ; %bb1 18; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6 19; GFX9-NEXT: global_store_dword v[0:1], v0, off 20; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7 21; GFX9-NEXT: global_store_dword v[0:1], v0, off 22; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8 23; GFX9-NEXT: global_store_dword v[0:1], v0, off 24; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8 25; GFX9-NEXT: global_store_dword v[0:1], v0, off 26; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7 27; GFX9-NEXT: global_store_dword v[0:1], v0, off 28; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b 29; GFX9-NEXT: s_mov_b32 s0, 0 30; GFX9-NEXT: global_store_dword v[0:1], v0, off 31; GFX9-NEXT: BB0_2: ; %Flow 32; GFX9-NEXT: s_xor_b32 s0, s0, -1 33; GFX9-NEXT: s_and_b32 s0, s0, 1 34; GFX9-NEXT: s_cmp_lg_u32 s0, 0 35; GFX9-NEXT: s_cbranch_scc1 BB0_4 36; GFX9-NEXT: ; %bb.3: ; %bb0 37; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b 38; GFX9-NEXT: global_store_dword v[0:1], v0, off 39; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8 40; GFX9-NEXT: global_store_dword v[0:1], v0, off 41; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7 42; GFX9-NEXT: global_store_dword v[0:1], v0, off 43; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8 44; GFX9-NEXT: global_store_dword v[0:1], v0, off 45; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7 46; GFX9-NEXT: global_store_dword v[0:1], v0, off 47; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6 48; GFX9-NEXT: global_store_dword v[0:1], v0, off 49; GFX9-NEXT: BB0_4: ; %bb2 50; GFX9-NEXT: s_endpgm 51entry: 52 br i1 %cond, label %bb0, label %bb1 53 54bb0: 55 store volatile i32 123, i32 addrspace(1)* undef 56 store volatile i32 456, i32 addrspace(1)* undef 57 store volatile i32 999, i32 addrspace(1)* undef 58 store volatile i32 1000, i32 addrspace(1)* undef 59 store volatile i32 455, i32 addrspace(1)* undef 60 store volatile i32 23526, i32 addrspace(1)* undef 61 br label %bb2 62 63bb1: 64 store volatile i32 23526, i32 addrspace(1)* undef 65 store volatile i32 455, i32 addrspace(1)* undef 66 store volatile i32 1000, i32 addrspace(1)* undef 67 store volatile i32 456, i32 addrspace(1)* undef 68 store volatile i32 999, i32 addrspace(1)* undef 69 store volatile i32 123, i32 addrspace(1)* undef 70 br label %bb2 71 72bb2: 73 ret void 74} 75 76; FIXME: These aren't localized because thesee were legalized before 77; the localizer, and are no longer G_GLOBAL_VALUE. 78@gv0 = addrspace(1) global i32 undef, align 4 79@gv1 = addrspace(1) global i32 undef, align 4 80@gv2 = addrspace(1) global i32 undef, align 4 81@gv3 = addrspace(1) global i32 undef, align 4 82 83define amdgpu_kernel void @localize_globals(i1 %cond) { 84; GFX9-LABEL: localize_globals: 85; GFX9: ; %bb.0: ; %entry 86; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0 87; GFX9-NEXT: s_mov_b32 s0, -1 88; GFX9-NEXT: s_waitcnt lgkmcnt(0) 89; GFX9-NEXT: s_xor_b32 s1, s1, -1 90; GFX9-NEXT: s_and_b32 s1, s1, 1 91; GFX9-NEXT: s_cmp_lg_u32 s1, 0 92; GFX9-NEXT: s_cbranch_scc0 BB1_2 93; GFX9-NEXT: ; %bb.1: ; %bb1 94; GFX9-NEXT: s_getpc_b64 s[0:1] 95; GFX9-NEXT: s_add_u32 s0, s0, gv2@gotpcrel32@lo+4 96; GFX9-NEXT: s_addc_u32 s1, s1, gv2@gotpcrel32@hi+12 97; GFX9-NEXT: s_getpc_b64 s[2:3] 98; GFX9-NEXT: s_add_u32 s2, s2, gv3@gotpcrel32@lo+4 99; GFX9-NEXT: s_addc_u32 s3, s3, gv3@gotpcrel32@hi+12 100; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 101; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 102; GFX9-NEXT: v_mov_b32_e32 v0, 0 103; GFX9-NEXT: v_mov_b32_e32 v1, 1 104; GFX9-NEXT: s_mov_b32 s0, 0 105; GFX9-NEXT: s_waitcnt lgkmcnt(0) 106; GFX9-NEXT: global_store_dword v0, v0, s[4:5] 107; GFX9-NEXT: global_store_dword v0, v1, s[2:3] 108; GFX9-NEXT: BB1_2: ; %Flow 109; GFX9-NEXT: s_xor_b32 s0, s0, -1 110; GFX9-NEXT: s_and_b32 s0, s0, 1 111; GFX9-NEXT: s_cmp_lg_u32 s0, 0 112; GFX9-NEXT: s_cbranch_scc1 BB1_4 113; GFX9-NEXT: ; %bb.3: ; %bb0 114; GFX9-NEXT: s_getpc_b64 s[0:1] 115; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4 116; GFX9-NEXT: s_addc_u32 s1, s1, gv0@gotpcrel32@hi+12 117; GFX9-NEXT: s_getpc_b64 s[2:3] 118; GFX9-NEXT: s_add_u32 s2, s2, gv1@gotpcrel32@lo+4 119; GFX9-NEXT: s_addc_u32 s3, s3, gv1@gotpcrel32@hi+12 120; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 121; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 122; GFX9-NEXT: v_mov_b32_e32 v0, 0 123; GFX9-NEXT: v_mov_b32_e32 v1, 1 124; GFX9-NEXT: s_waitcnt lgkmcnt(0) 125; GFX9-NEXT: global_store_dword v0, v0, s[0:1] 126; GFX9-NEXT: global_store_dword v0, v1, s[2:3] 127; GFX9-NEXT: BB1_4: ; %bb2 128; GFX9-NEXT: s_endpgm 129entry: 130 br i1 %cond, label %bb0, label %bb1 131 132bb0: 133 store volatile i32 0, i32 addrspace(1)* @gv0 134 store volatile i32 1, i32 addrspace(1)* @gv1 135 br label %bb2 136 137bb1: 138 store volatile i32 0, i32 addrspace(1)* @gv2 139 store volatile i32 1, i32 addrspace(1)* @gv3 140 br label %bb2 141 142bb2: 143 ret void 144} 145 146@static.gv0 = internal addrspace(1) global i32 undef, align 4 147@static.gv1 = internal addrspace(1) global i32 undef, align 4 148@static.gv2 = internal addrspace(1) global i32 undef, align 4 149@static.gv3 = internal addrspace(1) global i32 undef, align 4 150 151define void @localize_internal_globals(i1 %cond) { 152; GFX9-LABEL: localize_internal_globals: 153; GFX9: ; %bb.0: ; %entry 154; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 155; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 156; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 157; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 158; GFX9-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] 159; GFX9-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 160; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[6:7] 161; GFX9-NEXT: s_cbranch_execz BB2_2 162; GFX9-NEXT: ; %bb.1: ; %bb1 163; GFX9-NEXT: s_getpc_b64 s[6:7] 164; GFX9-NEXT: s_add_u32 s6, s6, static.gv2@rel32@lo+4 165; GFX9-NEXT: s_addc_u32 s7, s7, static.gv2@rel32@hi+12 166; GFX9-NEXT: v_mov_b32_e32 v0, 0 167; GFX9-NEXT: global_store_dword v0, v0, s[6:7] 168; GFX9-NEXT: s_getpc_b64 s[6:7] 169; GFX9-NEXT: s_add_u32 s6, s6, static.gv3@rel32@lo+4 170; GFX9-NEXT: s_addc_u32 s7, s7, static.gv3@rel32@hi+12 171; GFX9-NEXT: v_mov_b32_e32 v1, 1 172; GFX9-NEXT: global_store_dword v0, v1, s[6:7] 173; GFX9-NEXT: BB2_2: ; %Flow 174; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[4:5] 175; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] 176; GFX9-NEXT: s_cbranch_execz BB2_4 177; GFX9-NEXT: ; %bb.3: ; %bb0 178; GFX9-NEXT: s_getpc_b64 s[6:7] 179; GFX9-NEXT: s_add_u32 s6, s6, static.gv0@rel32@lo+4 180; GFX9-NEXT: s_addc_u32 s7, s7, static.gv0@rel32@hi+12 181; GFX9-NEXT: v_mov_b32_e32 v0, 0 182; GFX9-NEXT: global_store_dword v0, v0, s[6:7] 183; GFX9-NEXT: s_getpc_b64 s[6:7] 184; GFX9-NEXT: s_add_u32 s6, s6, static.gv1@rel32@lo+4 185; GFX9-NEXT: s_addc_u32 s7, s7, static.gv1@rel32@hi+12 186; GFX9-NEXT: v_mov_b32_e32 v1, 1 187; GFX9-NEXT: global_store_dword v0, v1, s[6:7] 188; GFX9-NEXT: BB2_4: ; %bb2 189; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 190; GFX9-NEXT: s_waitcnt vmcnt(0) 191; GFX9-NEXT: s_setpc_b64 s[30:31] 192entry: 193 br i1 %cond, label %bb0, label %bb1 194 195bb0: 196 store volatile i32 0, i32 addrspace(1)* @static.gv0 197 store volatile i32 1, i32 addrspace(1)* @static.gv1 198 br label %bb2 199 200bb1: 201 store volatile i32 0, i32 addrspace(1)* @static.gv2 202 store volatile i32 1, i32 addrspace(1)* @static.gv3 203 br label %bb2 204 205bb2: 206 ret void 207} 208