1# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX89 %s 2# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX89 %s 3 4--- | 5 define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4, 6 <4 x i32> addrspace(1)* %global16, 7 i32* %flat4, 8 <4 x i32>* %flat16) { 9 ret void 10 } 11 12 define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() { 13 ret void 14 } 15 16 define amdgpu_kernel void @single_branch_successor_not_next_block() { 17 ret void 18 } 19 20... 21--- 22 23# CHECK-LABEL: name: flat_zero_waitcnt 24 25# CHECK-LABEL: bb.0: 26# CHECK: FLAT_LOAD_DWORD 27# CHECK: FLAT_LOAD_DWORDX4 28# Global loads will return in order so we should: 29# s_waitcnt vmcnt(1) lgkmcnt(1) 30# CHECK-NEXT: S_WAITCNT 369 31 32# CHECK-LABEL: bb.1: 33# CHECK: FLAT_LOAD_DWORD 34# GFX89: S_WAITCNT 112 35# CHECK: FLAT_LOAD_DWORDX4 36 37# CHECK-LABEL: bb.2: 38# CHECK: FLAT_LOAD_DWORD 39# GFX89: S_WAITCNT 112 40# CHECK: FLAT_LOAD_DWORDX4 41 42name: flat_zero_waitcnt 43 44body: | 45 bb.0: 46 successors: %bb.1 47 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4) 48 $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) 49 $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec 50 S_BRANCH %bb.1 51 52 bb.1: 53 successors: %bb.2 54 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr 55 $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) 56 $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec 57 S_BRANCH %bb.2 58 59 bb.2: 60 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4) 61 $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16) 62 $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec 63 S_ENDPGM 64... 65--- 66# There is only a single fallthrough successor block, so there's no 67# need to wait immediately. 68 69# CHECK-LABEL: name: single_fallthrough_successor_no_end_block_wait 70# CHECK: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2 71# CHECK-NOT: S_WAITCNT 72 73# CHECK: bb.1: 74# CHECK-NEXT: V_LSHLREV_B64 75# CHECK-NEXT: S_WAITCNT 112 76# CHECK-NEXT: FLAT_STORE_DWORD 77name: single_fallthrough_successor_no_end_block_wait 78 79body: | 80 bb.0: 81 successors: %bb.1 82 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr 83 84 bb.1: 85 $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec 86 FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr 87 S_ENDPGM 88... 89--- 90# The block has a single predecessor with a single successor, but it 91# is not the next block so it's non-obvious that the wait is not needed. 92 93 94# CHECK-LABEL: name: single_branch_successor_not_next_block 95 96# CHECK: bb.1 97# CHECK-NEXT: FLAT_STORE_DWORD 98# CHECK-NEXT: S_ENDPGM 99 100# CHECK: bb.2: 101# CHECK-NEXT: V_LSHLREV_B64 102# CHECK-NEXT: S_WAITCNT 112 103# CHECK-NEXT: FLAT_STORE_DWORD 104name: single_branch_successor_not_next_block 105 106body: | 107 bb.0: 108 successors: %bb.2 109 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr 110 S_BRANCH %bb.2 111 112 bb.1: 113 FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, 0, implicit $exec, implicit $flat_scr 114 S_ENDPGM 115 116 bb.2: 117 $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec 118 FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr 119 S_ENDPGM 120... 121