1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 3; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s 4; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s 5 6; Uses llvm.amdgcn.break 7 8define amdgpu_kernel void @break_loop(i32 %arg) #0 { 9; OPT-LABEL: @break_loop( 10; OPT-NEXT: bb: 11; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 12; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 13; OPT-NEXT: br label [[BB1:%.*]] 14; OPT: bb1: 15; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 16; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 17; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 18; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 19; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 20; OPT: bb4: 21; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 22; OPT-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]] 23; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[CMP1]], true 24; OPT-NEXT: br label [[FLOW]] 25; OPT: Flow: 26; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ] 27; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]]) 28; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]]) 29; OPT-NEXT: br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]] 30; OPT: bb9: 31; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) 32; OPT-NEXT: ret void 33; 34; GCN-LABEL: break_loop: 35; GCN: ; %bb.0: ; %bb 36; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 37; GCN-NEXT: s_mov_b64 s[0:1], 0 38; GCN-NEXT: s_mov_b32 s2, -1 39; GCN-NEXT: s_waitcnt lgkmcnt(0) 40; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 41; GCN-NEXT: s_mov_b32 s3, 0xf000 42; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 43; GCN-NEXT: ; implicit-def: $sgpr6 44; GCN-NEXT: BB0_1: ; %bb1 45; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 46; GCN-NEXT: s_add_i32 s6, s6, 1 47; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 48; GCN-NEXT: s_cmp_gt_i32 s6, -1 49; GCN-NEXT: s_cbranch_scc1 BB0_3 50; GCN-NEXT: ; %bb.2: ; %bb4 51; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1 52; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 53; GCN-NEXT: s_waitcnt vmcnt(0) 54; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 55; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 56; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 57; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 58; GCN-NEXT: BB0_3: ; %Flow 59; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1 60; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 61; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 62; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 63; GCN-NEXT: s_cbranch_execnz BB0_1 64; GCN-NEXT: ; %bb.4: ; %bb9 65; GCN-NEXT: s_endpgm 66bb: 67 %id = call i32 @llvm.amdgcn.workitem.id.x() 68 %my.tmp = sub i32 %id, %arg 69 br label %bb1 70 71bb1: 72 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ] 73 %lsr.iv.next = add i32 %lsr.iv, 1 74 %cmp0 = icmp slt i32 %lsr.iv.next, 0 75 br i1 %cmp0, label %bb4, label %bb9 76 77bb4: 78 %load = load volatile i32, i32 addrspace(1)* undef, align 4 79 %cmp1 = icmp slt i32 %my.tmp, %load 80 br i1 %cmp1, label %bb1, label %bb9 81 82bb9: 83 ret void 84} 85 86define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 { 87; OPT-LABEL: @undef_phi_cond_break_loop( 88; OPT-NEXT: bb: 89; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 90; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 91; OPT-NEXT: br label [[BB1:%.*]] 92; OPT: bb1: 93; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 94; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 95; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 96; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 97; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 98; OPT: bb4: 99; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 100; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 101; OPT-NEXT: br label [[FLOW]] 102; OPT: Flow: 103; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ] 104; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 105; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 106; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 107; OPT: bb9: 108; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 109; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 110; OPT-NEXT: ret void 111; 112; GCN-LABEL: undef_phi_cond_break_loop: 113; GCN: ; %bb.0: ; %bb 114; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 115; GCN-NEXT: s_mov_b64 s[0:1], 0 116; GCN-NEXT: s_mov_b32 s2, -1 117; GCN-NEXT: s_waitcnt lgkmcnt(0) 118; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 119; GCN-NEXT: s_mov_b32 s3, 0xf000 120; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7 121; GCN-NEXT: ; implicit-def: $sgpr4 122; GCN-NEXT: BB1_1: ; %bb1 123; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 124; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec 125; GCN-NEXT: s_and_b64 s[8:9], s[0:1], exec 126; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 127; GCN-NEXT: s_cmp_gt_i32 s4, -1 128; GCN-NEXT: s_cbranch_scc1 BB1_3 129; GCN-NEXT: ; %bb.2: ; %bb4 130; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1 131; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 132; GCN-NEXT: s_waitcnt vmcnt(0) 133; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 134; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec 135; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 136; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 137; GCN-NEXT: BB1_3: ; %Flow 138; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1 139; GCN-NEXT: s_add_i32 s4, s4, 1 140; GCN-NEXT: s_and_b64 s[8:9], exec, s[6:7] 141; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 142; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 143; GCN-NEXT: s_cbranch_execnz BB1_1 144; GCN-NEXT: ; %bb.4: ; %bb9 145; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 146; GCN-NEXT: v_mov_b32_e32 v0, 7 147; GCN-NEXT: s_mov_b32 m0, -1 148; GCN-NEXT: ds_write_b32 v0, v0 149; GCN-NEXT: s_endpgm 150bb: 151 %id = call i32 @llvm.amdgcn.workitem.id.x() 152 %my.tmp = sub i32 %id, %arg 153 br label %bb1 154 155bb1: ; preds = %Flow, %bb 156 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 157 %lsr.iv.next = add i32 %lsr.iv, 1 158 %cmp0 = icmp slt i32 %lsr.iv.next, 0 159 br i1 %cmp0, label %bb4, label %Flow 160 161bb4: ; preds = %bb1 162 %load = load volatile i32, i32 addrspace(1)* undef, align 4 163 %cmp1 = icmp sge i32 %my.tmp, %load 164 br label %Flow 165 166Flow: ; preds = %bb4, %bb1 167 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 168 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] 169 br i1 %my.tmp3, label %bb9, label %bb1 170 171bb9: ; preds = %Flow 172 store volatile i32 7, i32 addrspace(3)* undef 173 ret void 174} 175 176; FIXME: ConstantExpr compare of address to null folds away 177@lds = addrspace(3) global i32 undef 178 179define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 { 180; OPT-LABEL: @constexpr_phi_cond_break_loop( 181; OPT-NEXT: bb: 182; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 183; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 184; OPT-NEXT: br label [[BB1:%.*]] 185; OPT: bb1: 186; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 187; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 188; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 189; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 190; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 191; OPT: bb4: 192; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 193; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 194; OPT-NEXT: br label [[FLOW]] 195; OPT: Flow: 196; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ] 197; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 198; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 199; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 200; OPT: bb9: 201; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 202; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 203; OPT-NEXT: ret void 204; 205; GCN-LABEL: constexpr_phi_cond_break_loop: 206; GCN: ; %bb.0: ; %bb 207; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 208; GCN-NEXT: s_mov_b64 s[0:1], 0 209; GCN-NEXT: s_mov_b32 s2, lds@abs32@lo 210; GCN-NEXT: s_mov_b32 s6, -1 211; GCN-NEXT: s_waitcnt lgkmcnt(0) 212; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 213; GCN-NEXT: s_mov_b32 s7, 0xf000 214; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 215; GCN-NEXT: ; implicit-def: $sgpr3 216; GCN-NEXT: BB2_1: ; %bb1 217; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 218; GCN-NEXT: v_cmp_ne_u32_e64 s[8:9], s2, 4 219; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 220; GCN-NEXT: s_and_b64 s[8:9], s[8:9], exec 221; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 222; GCN-NEXT: s_cmp_gt_i32 s3, -1 223; GCN-NEXT: s_cbranch_scc1 BB2_3 224; GCN-NEXT: ; %bb.2: ; %bb4 225; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1 226; GCN-NEXT: buffer_load_dword v1, off, s[4:7], 0 227; GCN-NEXT: s_waitcnt vmcnt(0) 228; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 229; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 230; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 231; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 232; GCN-NEXT: BB2_3: ; %Flow 233; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1 234; GCN-NEXT: s_add_i32 s3, s3, 1 235; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 236; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 237; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 238; GCN-NEXT: s_cbranch_execnz BB2_1 239; GCN-NEXT: ; %bb.4: ; %bb9 240; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 241; GCN-NEXT: v_mov_b32_e32 v0, 7 242; GCN-NEXT: s_mov_b32 m0, -1 243; GCN-NEXT: ds_write_b32 v0, v0 244; GCN-NEXT: s_endpgm 245bb: 246 %id = call i32 @llvm.amdgcn.workitem.id.x() 247 %my.tmp = sub i32 %id, %arg 248 br label %bb1 249 250bb1: ; preds = %Flow, %bb 251 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 252 %lsr.iv.next = add i32 %lsr.iv, 1 253 %cmp0 = icmp slt i32 %lsr.iv.next, 0 254 br i1 %cmp0, label %bb4, label %Flow 255 256bb4: ; preds = %bb1 257 %load = load volatile i32, i32 addrspace(1)* undef, align 4 258 %cmp1 = icmp sge i32 %my.tmp, %load 259 br label %Flow 260 261Flow: ; preds = %bb4, %bb1 262 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 263 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] 264 br i1 %my.tmp3, label %bb9, label %bb1 265 266bb9: ; preds = %Flow 267 store volatile i32 7, i32 addrspace(3)* undef 268 ret void 269} 270 271define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 { 272; OPT-LABEL: @true_phi_cond_break_loop( 273; OPT-NEXT: bb: 274; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 275; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 276; OPT-NEXT: br label [[BB1:%.*]] 277; OPT: bb1: 278; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 279; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 280; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 281; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 282; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 283; OPT: bb4: 284; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 285; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 286; OPT-NEXT: br label [[FLOW]] 287; OPT: Flow: 288; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] 289; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 290; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 291; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 292; OPT: bb9: 293; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 294; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 295; OPT-NEXT: ret void 296; 297; GCN-LABEL: true_phi_cond_break_loop: 298; GCN: ; %bb.0: ; %bb 299; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 300; GCN-NEXT: s_mov_b64 s[0:1], 0 301; GCN-NEXT: s_mov_b32 s2, -1 302; GCN-NEXT: s_waitcnt lgkmcnt(0) 303; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 304; GCN-NEXT: s_mov_b32 s3, 0xf000 305; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 306; GCN-NEXT: ; implicit-def: $sgpr6 307; GCN-NEXT: BB3_1: ; %bb1 308; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 309; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 310; GCN-NEXT: s_cmp_gt_i32 s6, -1 311; GCN-NEXT: s_cbranch_scc1 BB3_3 312; GCN-NEXT: ; %bb.2: ; %bb4 313; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1 314; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 315; GCN-NEXT: s_waitcnt vmcnt(0) 316; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 317; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 318; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 319; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 320; GCN-NEXT: BB3_3: ; %Flow 321; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1 322; GCN-NEXT: s_add_i32 s6, s6, 1 323; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 324; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 325; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 326; GCN-NEXT: s_cbranch_execnz BB3_1 327; GCN-NEXT: ; %bb.4: ; %bb9 328; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 329; GCN-NEXT: v_mov_b32_e32 v0, 7 330; GCN-NEXT: s_mov_b32 m0, -1 331; GCN-NEXT: ds_write_b32 v0, v0 332; GCN-NEXT: s_endpgm 333bb: 334 %id = call i32 @llvm.amdgcn.workitem.id.x() 335 %my.tmp = sub i32 %id, %arg 336 br label %bb1 337 338bb1: ; preds = %Flow, %bb 339 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 340 %lsr.iv.next = add i32 %lsr.iv, 1 341 %cmp0 = icmp slt i32 %lsr.iv.next, 0 342 br i1 %cmp0, label %bb4, label %Flow 343 344bb4: ; preds = %bb1 345 %load = load volatile i32, i32 addrspace(1)* undef, align 4 346 %cmp1 = icmp sge i32 %my.tmp, %load 347 br label %Flow 348 349Flow: ; preds = %bb4, %bb1 350 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 351 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 352 br i1 %my.tmp3, label %bb9, label %bb1 353 354bb9: ; preds = %Flow 355 store volatile i32 7, i32 addrspace(3)* undef 356 ret void 357} 358 359define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 { 360; OPT-LABEL: @false_phi_cond_break_loop( 361; OPT-NEXT: bb: 362; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 363; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 364; OPT-NEXT: br label [[BB1:%.*]] 365; OPT: bb1: 366; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 367; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 368; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 369; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 370; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 371; OPT: bb4: 372; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 373; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 374; OPT-NEXT: br label [[FLOW]] 375; OPT: Flow: 376; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ] 377; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 378; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 379; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 380; OPT: bb9: 381; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 382; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 383; OPT-NEXT: ret void 384; 385; GCN-LABEL: false_phi_cond_break_loop: 386; GCN: ; %bb.0: ; %bb 387; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 388; GCN-NEXT: s_mov_b64 s[0:1], 0 389; GCN-NEXT: s_mov_b32 s2, -1 390; GCN-NEXT: s_waitcnt lgkmcnt(0) 391; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 392; GCN-NEXT: s_mov_b32 s3, 0xf000 393; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 394; GCN-NEXT: ; implicit-def: $sgpr6 395; GCN-NEXT: BB4_1: ; %bb1 396; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 397; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 398; GCN-NEXT: s_cmp_gt_i32 s6, -1 399; GCN-NEXT: s_cbranch_scc1 BB4_3 400; GCN-NEXT: ; %bb.2: ; %bb4 401; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 402; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 403; GCN-NEXT: s_waitcnt vmcnt(0) 404; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 405; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 406; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 407; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 408; GCN-NEXT: BB4_3: ; %Flow 409; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 410; GCN-NEXT: s_add_i32 s6, s6, 1 411; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 412; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 413; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 414; GCN-NEXT: s_cbranch_execnz BB4_1 415; GCN-NEXT: ; %bb.4: ; %bb9 416; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 417; GCN-NEXT: v_mov_b32_e32 v0, 7 418; GCN-NEXT: s_mov_b32 m0, -1 419; GCN-NEXT: ds_write_b32 v0, v0 420; GCN-NEXT: s_endpgm 421bb: 422 %id = call i32 @llvm.amdgcn.workitem.id.x() 423 %my.tmp = sub i32 %id, %arg 424 br label %bb1 425 426bb1: ; preds = %Flow, %bb 427 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 428 %lsr.iv.next = add i32 %lsr.iv, 1 429 %cmp0 = icmp slt i32 %lsr.iv.next, 0 430 br i1 %cmp0, label %bb4, label %Flow 431 432bb4: ; preds = %bb1 433 %load = load volatile i32, i32 addrspace(1)* undef, align 4 434 %cmp1 = icmp sge i32 %my.tmp, %load 435 br label %Flow 436 437Flow: ; preds = %bb4, %bb1 438 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 439 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] 440 br i1 %my.tmp3, label %bb9, label %bb1 441 442bb9: ; preds = %Flow 443 store volatile i32 7, i32 addrspace(3)* undef 444 ret void 445} 446 447; Swap order of branches in flow block so that the true phi is 448; continue. 449 450define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 { 451; OPT-LABEL: @invert_true_phi_cond_break_loop( 452; OPT-NEXT: bb: 453; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 454; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 455; OPT-NEXT: br label [[BB1:%.*]] 456; OPT: bb1: 457; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 458; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 459; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 460; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 461; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 462; OPT: bb4: 463; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 464; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 465; OPT-NEXT: br label [[FLOW]] 466; OPT: Flow: 467; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] 468; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true 469; OPT-NEXT: [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]]) 470; OPT-NEXT: [[TMP2:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP1]]) 471; OPT-NEXT: br i1 [[TMP2]], label [[BB9:%.*]], label [[BB1]] 472; OPT: bb9: 473; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP1]]) 474; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 475; OPT-NEXT: ret void 476; 477; GCN-LABEL: invert_true_phi_cond_break_loop: 478; GCN: ; %bb.0: ; %bb 479; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 480; GCN-NEXT: s_mov_b64 s[0:1], 0 481; GCN-NEXT: s_mov_b32 s2, -1 482; GCN-NEXT: s_waitcnt lgkmcnt(0) 483; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 484; GCN-NEXT: s_mov_b32 s3, 0xf000 485; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 486; GCN-NEXT: ; implicit-def: $sgpr6 487; GCN-NEXT: BB5_1: ; %bb1 488; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 489; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 490; GCN-NEXT: s_cmp_gt_i32 s6, -1 491; GCN-NEXT: s_cbranch_scc1 BB5_3 492; GCN-NEXT: ; %bb.2: ; %bb4 493; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1 494; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 495; GCN-NEXT: s_waitcnt vmcnt(0) 496; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 497; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 498; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 499; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 500; GCN-NEXT: BB5_3: ; %Flow 501; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1 502; GCN-NEXT: s_xor_b64 s[8:9], s[4:5], -1 503; GCN-NEXT: s_add_i32 s6, s6, 1 504; GCN-NEXT: s_and_b64 s[8:9], exec, s[8:9] 505; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 506; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 507; GCN-NEXT: s_cbranch_execnz BB5_1 508; GCN-NEXT: ; %bb.4: ; %bb9 509; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 510; GCN-NEXT: v_mov_b32_e32 v0, 7 511; GCN-NEXT: s_mov_b32 m0, -1 512; GCN-NEXT: ds_write_b32 v0, v0 513; GCN-NEXT: s_endpgm 514bb: 515 %id = call i32 @llvm.amdgcn.workitem.id.x() 516 %my.tmp = sub i32 %id, %arg 517 br label %bb1 518 519bb1: ; preds = %Flow, %bb 520 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 521 %lsr.iv.next = add i32 %lsr.iv, 1 522 %cmp0 = icmp slt i32 %lsr.iv.next, 0 523 br i1 %cmp0, label %bb4, label %Flow 524 525bb4: ; preds = %bb1 526 %load = load volatile i32, i32 addrspace(1)* undef, align 4 527 %cmp1 = icmp sge i32 %my.tmp, %load 528 br label %Flow 529 530Flow: ; preds = %bb4, %bb1 531 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 532 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 533 br i1 %my.tmp3, label %bb1, label %bb9 534 535bb9: ; preds = %Flow 536 store volatile i32 7, i32 addrspace(3)* undef 537 ret void 538} 539 540declare i32 @llvm.amdgcn.workitem.id.x() #1 541 542attributes #0 = { nounwind } 543attributes #1 = { nounwind readnone } 544