1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 2; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify %s | FileCheck -check-prefix=IR %s 3 4; SI-LABEL: {{^}}infinite_loop: 5; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 6; SI: [[LOOP:BB[0-9]+_[0-9]+]]: ; %loop 7; SI: s_waitcnt lgkmcnt(0) 8; SI: buffer_store_dword [[REG]] 9; SI: s_branch [[LOOP]] 10define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) { 11entry: 12 br label %loop 13 14loop: 15 store volatile i32 999, i32 addrspace(1)* %out, align 4 16 br label %loop 17} 18 19 20; IR-LABEL: @infinite_loop_ret( 21; IR: br i1 %cond, label %loop, label %UnifiedReturnBlock 22 23; IR: loop: 24; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4 25; IR: br i1 true, label %loop, label %UnifiedReturnBlock 26 27; IR: UnifiedReturnBlock: 28; IR: ret void 29 30 31; SI-LABEL: {{^}}infinite_loop_ret: 32; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]] 33 34; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 35; SI: [[LOOP:BB[0-9]+_[0-9]+]]: ; %loop 36; SI: s_and_b64 vcc, exec, -1 37; SI: s_waitcnt lgkmcnt(0) 38; SI: buffer_store_dword [[REG]] 39; SI: s_cbranch_vccnz [[LOOP]] 40 41; SI: [[RET]]: ; %UnifiedReturnBlock 42; SI: s_endpgm 43define amdgpu_kernel void @infinite_loop_ret(i32 addrspace(1)* %out) { 44entry: 45 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 46 %cond = icmp eq i32 %tmp, 1 47 br i1 %cond, label %loop, label %return 48 49loop: 50 store volatile i32 999, i32 addrspace(1)* %out, align 4 51 br label %loop 52 53return: 54 ret void 55} 56 57 58; IR-LABEL: @infinite_loops( 59; IR: br i1 undef, label %loop1, label %loop2 60 61; IR: loop1: 62; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4 63; IR: br i1 true, label %loop1, label %DummyReturnBlock 64 65; IR: loop2: 66; IR: store volatile i32 888, i32 addrspace(1)* %out, align 4 67; IR: br i1 true, label %loop2, label %DummyReturnBlock 68 69; IR: DummyReturnBlock: 70; IR: ret void 71 72 73; SI-LABEL: {{^}}infinite_loops: 74 75; SI: v_mov_b32_e32 [[REG1:v[0-9]+]], 0x3e7 76; SI: s_and_b64 vcc, exec, -1 77 78; SI: [[LOOP1:BB[0-9]+_[0-9]+]]: ; %loop1 79; SI: s_waitcnt lgkmcnt(0) 80; SI: buffer_store_dword [[REG1]] 81; SI: s_cbranch_vccnz [[LOOP1]] 82; SI: s_branch [[RET:BB[0-9]+_[0-9]+]] 83 84; SI: v_mov_b32_e32 [[REG2:v[0-9]+]], 0x378 85; SI: s_and_b64 vcc, exec, -1 86 87; SI: [[LOOP2:BB[0-9]+_[0-9]+]]: ; %loop2 88; SI: s_waitcnt lgkmcnt(0) 89; SI: buffer_store_dword [[REG2]] 90; SI: s_cbranch_vccnz [[LOOP2]] 91 92; SI: [[RET]]: ; %DummyReturnBlock 93; SI: s_endpgm 94define amdgpu_kernel void @infinite_loops(i32 addrspace(1)* %out) { 95entry: 96 br i1 undef, label %loop1, label %loop2 97 98loop1: 99 store volatile i32 999, i32 addrspace(1)* %out, align 4 100 br label %loop1 101 102loop2: 103 store volatile i32 888, i32 addrspace(1)* %out, align 4 104 br label %loop2 105} 106 107 108 109; IR-LABEL: @infinite_loop_nest_ret( 110; IR: br i1 %cond1, label %outer_loop, label %UnifiedReturnBlock 111 112; IR: outer_loop: 113; IR: br label %inner_loop 114 115; IR: inner_loop: 116; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4 117; IR: %cond3 = icmp eq i32 %tmp, 3 118; IR: br i1 true, label %TransitionBlock, label %UnifiedReturnBlock 119 120; IR: TransitionBlock: 121; IR: br i1 %cond3, label %inner_loop, label %outer_loop 122 123; IR: UnifiedReturnBlock: 124; IR: ret void 125 126; SI-LABEL: {{^}}infinite_loop_nest_ret: 127; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]] 128 129; SI: s_mov_b32 130; SI: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %outer_loop 131 132; SI: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %inner_loop 133; SI: s_waitcnt expcnt(0) 134; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 135; SI: s_waitcnt lgkmcnt(0) 136; SI: buffer_store_dword [[REG]] 137 138; SI: s_andn2_b64 exec 139; SI: s_cbranch_execnz [[INNER_LOOP]] 140 141; SI: s_andn2_b64 exec 142; SI: s_cbranch_execnz [[OUTER_LOOP]] 143 144; SI: [[RET]]: ; %UnifiedReturnBlock 145; SI: s_endpgm 146define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) { 147entry: 148 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 149 %cond1 = icmp eq i32 %tmp, 1 150 br i1 %cond1, label %outer_loop, label %return 151 152outer_loop: 153 ; %cond2 = icmp eq i32 %tmp, 2 154 ; br i1 %cond2, label %outer_loop, label %inner_loop 155 br label %inner_loop 156 157inner_loop: ; preds = %LeafBlock, %LeafBlock1 158 store volatile i32 999, i32 addrspace(1)* %out, align 4 159 %cond3 = icmp eq i32 %tmp, 3 160 br i1 %cond3, label %inner_loop, label %outer_loop 161 162return: 163 ret void 164} 165 166declare i32 @llvm.amdgcn.workitem.id.x() 167