1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s 2 3; Test that the VGPR spiller correctly switches to SGPR offsets when the 4; instruction offset field would overflow, and that it accounts for memory 5; swizzling. 6 7; CHECK-LABEL: test_inst_offset_kernel 8define amdgpu_kernel void @test_inst_offset_kernel() { 9entry: 10 ; Occupy 4092 bytes of scratch, so the offset of the spill of %a just fits in 11 ; the instruction offset field. 12 %alloca = alloca i8, i32 4088, align 4, addrspace(5) 13 %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* 14 15 %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 16 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill 17 %a = load volatile i32, i32 addrspace(5)* %aptr 18 19 ; Force %a to spill. 20 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 21 22 %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 23 store volatile i32 %a, i32 addrspace(5)* %outptr 24 25 ret void 26} 27 28; CHECK-LABEL: test_sgpr_offset_kernel 29define amdgpu_kernel void @test_sgpr_offset_kernel() { 30entry: 31 ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not 32 ; fit in the instruction, and has to live in the SGPR offset. 33 %alloca = alloca i8, i32 4092, align 4, addrspace(5) 34 %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* 35 36 %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 37 ; 0x40000 / 64 = 4096 (for wave64) 38 ; CHECK: s_add_u32 s7, s7, 0x40000 39 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 ; 4-byte Folded Spill 40 ; CHECK: s_sub_u32 s7, s7, 0x40000 41 %a = load volatile i32, i32 addrspace(5)* %aptr 42 43 ; Force %a to spill 44 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 45 46 %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 47 store volatile i32 %a, i32 addrspace(5)* %outptr 48 49 ret void 50} 51 52; CHECK-LABEL: test_sgpr_offset_subregs_kernel 53define amdgpu_kernel void @test_sgpr_offset_subregs_kernel() { 54entry: 55 ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a 56 ; still fits below offset 4096 (4088 + 8 - 4 = 4092), and can be placed in 57 ; the instruction offset field. 58 %alloca = alloca i8, i32 4084, align 4, addrspace(5) 59 %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* 60 %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)* 61 62 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill 63 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill 64 %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1 65 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr 66 67 ; Force %a to spill. 68 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 69 70 ; Ensure the alloca sticks around. 71 %bptr = getelementptr i32, i32 addrspace(5)* %bufv1, i32 1 72 %b = load volatile i32, i32 addrspace(5)* %bptr 73 74 ; Ensure the spill is of the full super-reg. 75 call void asm sideeffect "; $0", "r"(<2 x i32> %a) 76 77 ret void 78} 79 80; CHECK-LABEL: test_inst_offset_subregs_kernel 81define amdgpu_kernel void @test_inst_offset_subregs_kernel() { 82entry: 83 ; Occupy 4092 bytes of scratch, so that the spill of the last subreg of %a 84 ; does not fit below offset 4096 (4092 + 8 - 4 = 4096), and has to live 85 ; in the SGPR offset. 86 %alloca = alloca i8, i32 4088, align 4, addrspace(5) 87 %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* 88 %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)* 89 90 ; 0x3ff00 / 64 = 4092 (for wave64) 91 ; CHECK: s_add_u32 s7, s7, 0x3ff00 92 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 ; 4-byte Folded Spill 93 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 offset:4 ; 4-byte Folded Spill 94 ; CHECK: s_sub_u32 s7, s7, 0x3ff00 95 %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1 96 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr 97 98 ; Force %a to spill. 99 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 100 101 ; Ensure the alloca sticks around. 102 %bptr = getelementptr i32, i32 addrspace(5)* %bufv1, i32 1 103 %b = load volatile i32, i32 addrspace(5)* %bptr 104 105 ; Ensure the spill is of the full super-reg. 106 call void asm sideeffect "; $0", "r"(<2 x i32> %a) 107 108 ret void 109} 110 111; CHECK-LABEL: test_inst_offset_function 112define void @test_inst_offset_function() { 113entry: 114 ; Occupy 4092 bytes of scratch, so the offset of the spill of %a just fits in 115 ; the instruction offset field. 116 %alloca = alloca i8, i32 4088, align 4, addrspace(5) 117 %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* 118 119 %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 120 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill 121 %a = load volatile i32, i32 addrspace(5)* %aptr 122 123 ; Force %a to spill. 124 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 125 126 %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 127 store volatile i32 %a, i32 addrspace(5)* %outptr 128 129 ret void 130} 131 132; CHECK-LABEL: test_sgpr_offset_function 133define void @test_sgpr_offset_function() { 134entry: 135 ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not 136 ; fit in the instruction, and has to live in the SGPR offset. 137 %alloca = alloca i8, i32 4092, align 4, addrspace(5) 138 %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* 139 140 %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 141 ; 0x40000 / 64 = 4096 (for wave64) 142 ; CHECK: s_add_u32 s5, s5, 0x40000 143 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 ; 4-byte Folded Spill 144 ; CHECK: s_sub_u32 s5, s5, 0x40000 145 %a = load volatile i32, i32 addrspace(5)* %aptr 146 147 ; Force %a to spill 148 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 149 150 %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 151 store volatile i32 %a, i32 addrspace(5)* %outptr 152 153 ret void 154} 155 156; CHECK-LABEL: test_sgpr_offset_subregs_function 157define void @test_sgpr_offset_subregs_function() { 158entry: 159 ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a 160 ; still fits below offset 4096 (4088 + 8 - 4 = 4092), and can be placed in 161 ; the instruction offset field. 162 %alloca = alloca i8, i32 4084, align 4, addrspace(5) 163 %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* 164 %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)* 165 166 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill 167 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill 168 %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1 169 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr 170 171 ; Force %a to spill. 172 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 173 174 ; Ensure the alloca sticks around. 175 %bptr = getelementptr i32, i32 addrspace(5)* %bufv1, i32 1 176 %b = load volatile i32, i32 addrspace(5)* %bptr 177 178 ; Ensure the spill is of the full super-reg. 179 call void asm sideeffect "; $0", "r"(<2 x i32> %a) 180 181 ret void 182} 183 184; CHECK-LABEL: test_inst_offset_subregs_function 185define void @test_inst_offset_subregs_function() { 186entry: 187 ; Occupy 4092 bytes of scratch, so that the spill of the last subreg of %a 188 ; does not fit below offset 4096 (4092 + 8 - 4 = 4096), and has to live 189 ; in the SGPR offset. 190 %alloca = alloca i8, i32 4088, align 4, addrspace(5) 191 %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* 192 %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)* 193 194 ; 0x3ff00 / 64 = 4092 (for wave64) 195 ; CHECK: s_add_u32 s5, s5, 0x3ff00 196 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 ; 4-byte Folded Spill 197 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 offset:4 ; 4-byte Folded Spill 198 ; CHECK: s_sub_u32 s5, s5, 0x3ff00 199 %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1 200 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr 201 202 ; Force %a to spill. 203 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 204 205 ; Ensure the alloca sticks around. 206 %bptr = getelementptr i32, i32 addrspace(5)* %bufv1, i32 1 207 %b = load volatile i32, i32 addrspace(5)* %bptr 208 209 ; Ensure the spill is of the full super-reg. 210 call void asm sideeffect "; $0", "r"(<2 x i32> %a) 211 212 ret void 213} 214