1# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefixes=CHECK,GFX89 %s 2# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefixes=CHECK,GFX89 %s 3 4--- | 5 define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4, 6 <4 x i32> addrspace(1)* %global16, 7 i32* %flat4, 8 <4 x i32>* %flat16) { 9 ret void 10 } 11 12 define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() { 13 ret void 14 } 15 16 define amdgpu_kernel void @single_branch_successor_not_next_block() { 17 ret void 18 } 19 20 define amdgpu_kernel void @preexisting_waitcnt() { 21 ret void 22 } 23 24 define amdgpu_kernel void @bundle_no_waitcnt() { 25 ret void 26 } 27 28 define amdgpu_kernel void @preexisting_waitcnt_in_bundle() { 29 ret void 30 } 31 32 define amdgpu_kernel void @insert_in_bundle() { 33 ret void 34 } 35 36 define amdgpu_kernel void @exit_bundle() { 37 ret void 38 } 39 40 define amdgpu_kernel void @cross_bundle() { 41 ret void 42 } 43 44 define amdgpu_kernel void @subregs16bit() { 45 ret void 46 } 47... 48--- 49 50# CHECK-LABEL: name: flat_zero_waitcnt 51 52# CHECK-LABEL: bb.0: 53# CHECK: FLAT_LOAD_DWORD 54# CHECK: FLAT_LOAD_DWORDX4 55# Global loads will return in order so we should: 56# s_waitcnt vmcnt(1) 57# CHECK-NEXT: S_WAITCNT 3953 58 59# CHECK-LABEL: bb.1: 60# CHECK: FLAT_LOAD_DWORD 61# s_waitcnt vmcnt(0) 62# GFX89: S_WAITCNT 3952 63# CHECK: FLAT_LOAD_DWORDX4 64 65# CHECK-LABEL: bb.2: 66# CHECK: FLAT_LOAD_DWORD 67# s_waitcnt vmcnt(0) 68# GFX89: S_WAITCNT 3952 69# CHECK: FLAT_LOAD_DWORDX4 70 71# CHECK-LABEL: bb.3: 72# s_waitcnt vmcnt(0) 73# GFX89: S_WAITCNT 3952 74# CHECK: FLAT_LOAD_DWORD 75# CHECK: FLAT_LOAD_DWORD 76# s_waitcnt vmcnt(0) lgkmcnt(0) 77# GFX89: S_WAITCNT 112 78 79# CHECK-LABEL: bb.4: 80# GFX89-NOT: S_WAITCNT 81# CHECK: FLAT_LOAD_DWORD 82# s_waitcnt vmcnt(0) lgkmcnt(0) 83# GFX89: S_WAITCNT 112 84 85name: flat_zero_waitcnt 86 87body: | 88 bb.0: 89 successors: %bb.1 90 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4) 91 $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) 92 $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec 93 S_BRANCH %bb.1 94 95 bb.1: 96 successors: %bb.2 97 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 98 $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) 99 $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec 100 S_BRANCH %bb.2 101 102 bb.2: 103 successors: %bb.3 104 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4) 105 $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16) 106 $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec 107 S_BRANCH %bb.3 108 109 bb.3: 110 successors: %bb.4 111 $vgpr3 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4) 112 $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4) 113 $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec 114 S_BRANCH %bb.4 115 116 bb.4: 117 $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4) 118 $vgpr0 = V_MOV_B32_e32 $vgpr5, implicit $exec 119 S_ENDPGM 0 120... 121--- 122# There is only a single fallthrough successor block, so there's no 123# need to wait immediately. 124 125# CHECK-LABEL: name: single_fallthrough_successor_no_end_block_wait 126# CHECK: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2 127# CHECK-NOT: S_WAITCNT 128 129# CHECK: bb.1: 130# CHECK-NEXT: V_LSHLREV_B64 131# CHECK-NEXT: S_WAITCNT 112 132# CHECK-NEXT: FLAT_STORE_DWORD 133name: single_fallthrough_successor_no_end_block_wait 134 135body: | 136 bb.0: 137 successors: %bb.1 138 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 139 140 bb.1: 141 $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec 142 FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 143 S_ENDPGM 0 144... 145--- 146# The block has a single predecessor with a single successor, but it 147# is not the next block so it's non-obvious that the wait is not needed. 148 149 150# CHECK-LABEL: name: single_branch_successor_not_next_block 151 152# CHECK: bb.1 153# CHECK-NEXT: FLAT_STORE_DWORD 154# CHECK-NEXT: S_ENDPGM 0 155 156# CHECK: bb.2: 157# CHECK-NEXT: V_LSHLREV_B64 158# CHECK-NEXT: S_WAITCNT 112 159# CHECK-NEXT: FLAT_STORE_DWORD 160name: single_branch_successor_not_next_block 161 162body: | 163 bb.0: 164 successors: %bb.2 165 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 166 S_BRANCH %bb.2 167 168 bb.1: 169 FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 170 S_ENDPGM 0 171 172 bb.2: 173 $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec 174 FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 175 S_ENDPGM 0 176... 177 178# CHECK-LABEL: name: preexisting_waitcnt{{$}} 179# CHECK: FLAT_LOAD_DWORD 180# CHECK-NEXT: S_WAITCNT 0 181# CHECK-NOT: S_WAITCNT 182name: preexisting_waitcnt 183tracksRegLiveness: true 184machineFunctionInfo: 185 isEntryFunction: true 186body: | 187 bb.0: 188 liveins: $vgpr1_vgpr2 189 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 190 S_WAITCNT 0 191 FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 192 193... 194 195--- 196 197# CHECK-LABEL: name: bundle_no_waitcnt{{$}} 198# CHECK: FLAT_LOAD_DWORD 199# CHECK-NEXT: BUNDLE 200# CHECK-NEXT: S_NOP 201# CHECK-NEXT: S_NOP 202# CHECK-NEXT: } 203# CHECK-NEXT: S_WAITCNT 112 204name: bundle_no_waitcnt 205tracksRegLiveness: true 206machineFunctionInfo: 207 isEntryFunction: true 208body: | 209 bb.0: 210 liveins: $vgpr1_vgpr2 211 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 212 BUNDLE { 213 S_NOP 0 214 S_NOP 0 215 } 216 FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 217 218... 219 220--- 221 222# See the waitcnt inside the bundle and don't insert an extra 223# CHECK-LABEL: name: preexisting_waitcnt_in_bundle{{$}} 224# CHECK: FLAT_LOAD_DWORD 225# CHECK: S_WAITCNT 0 226# CHECK-NOT: S_WAITCNT 227name: preexisting_waitcnt_in_bundle 228tracksRegLiveness: true 229machineFunctionInfo: 230 isEntryFunction: true 231body: | 232 bb.0: 233 liveins: $vgpr1_vgpr2 234 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 235 BUNDLE { 236 S_NOP 0 237 S_WAITCNT 0 238 } 239 FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 240 241... 242 243--- 244 245# Def and use inside bundle 246# CHECK-LABEL: name: insert_in_bundle{{$}} 247# CHECK: BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 { 248# CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 249# CHECK-NEXT: S_WAITCNT 112 250# CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, internal $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 251# CHECK-NEXT: } 252 253name: insert_in_bundle 254tracksRegLiveness: true 255machineFunctionInfo: 256 isEntryFunction: true 257body: | 258 bb.0: 259 liveins: $vgpr1_vgpr2 260 BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 { 261 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 262 FLAT_STORE_DWORD $vgpr1_vgpr2, internal $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 263 } 264... 265 266--- 267 268# Def is last instruction in bundle, use is outside bundle 269 270# CHECK-LABEL: name: exit_bundle{{$}} 271# CHECK: BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 { 272# CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 273# CHECK-NEXT: } 274# CHECK-NEXT: S_WAITCNT 112 275# CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 276 277name: exit_bundle 278tracksRegLiveness: true 279machineFunctionInfo: 280 isEntryFunction: true 281body: | 282 bb.0: 283 liveins: $vgpr1_vgpr2 284 BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 { 285 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 286 } 287 288 FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 289 290... 291 292--- 293 294# Def is in bundle, use is in another bundle 295 296# CHECK-LABEL: name: cross_bundle{{$}} 297# CHECK: BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 { 298# CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 299# CHECK-NEXT: } 300# CHECK-NEXT: S_WAITCNT 112 301# CHECK-NEXT: BUNDLE implicit $vgpr0, implicit $vgpr1_vgpr2 { 302# CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 303# CHECK-NEXT: } 304 305name: cross_bundle 306tracksRegLiveness: true 307machineFunctionInfo: 308 isEntryFunction: true 309body: | 310 bb.0: 311 liveins: $vgpr1_vgpr2 312 BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 { 313 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 314 } 315 BUNDLE implicit $vgpr0, implicit $vgpr1_vgpr2 { 316 FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 317 } 318... 319 320--- 321# CHECK-LABEL: name: subregs16bit 322# CHECK: S_WAITCNT 112 323# CHECK-NEXT: V_NOP_e32 324 325name: subregs16bit 326machineFunctionInfo: 327 isEntryFunction: true 328body: | 329 bb.0: 330 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4 331 $vgpr0 = FLAT_LOAD_USHORT killed $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 332 $vgpr1 = FLAT_LOAD_USHORT killed $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr 333 V_NOP_e32 implicit $exec, implicit $vgpr0_lo16, implicit $vgpr1_lo16 334... 335