1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s 3 4; Make sure the branch targets are correct after lowering llvm.amdgcn.if 5 6define i32 @divergent_if_swap_brtarget_order0(i32 %value) { 7; CHECK-LABEL: divergent_if_swap_brtarget_order0: 8; CHECK: ; %bb.0: ; %entry 9; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 11; CHECK-NEXT: ; implicit-def: $vgpr0 12; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 13; CHECK-NEXT: s_cbranch_execz BB0_2 14; CHECK-NEXT: ; %bb.1: ; %if.true 15; CHECK-NEXT: global_load_dword v0, v[0:1], off 16; CHECK-NEXT: BB0_2: ; %endif 17; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 18; CHECK-NEXT: s_waitcnt vmcnt(0) 19; CHECK-NEXT: s_setpc_b64 s[30:31] 20entry: 21 %c = icmp ne i32 %value, 0 22 br i1 %c, label %if.true, label %endif 23 24if.true: 25 %val = load volatile i32, i32 addrspace(1)* undef 26 br label %endif 27 28endif: 29 %v = phi i32 [ %val, %if.true ], [ undef, %entry ] 30 ret i32 %v 31} 32 33define i32 @divergent_if_swap_brtarget_order1(i32 %value) { 34; CHECK-LABEL: divergent_if_swap_brtarget_order1: 35; CHECK: ; %bb.0: ; %entry 36; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 38; CHECK-NEXT: ; implicit-def: $vgpr0 39; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 40; CHECK-NEXT: s_cbranch_execz BB1_2 41; CHECK-NEXT: ; %bb.1: ; %if.true 42; CHECK-NEXT: global_load_dword v0, v[0:1], off 43; CHECK-NEXT: BB1_2: ; %endif 44; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 45; CHECK-NEXT: s_waitcnt vmcnt(0) 46; CHECK-NEXT: s_setpc_b64 s[30:31] 47entry: 48 %c = icmp ne i32 %value, 0 49 br i1 %c, label %if.true, label %endif 50 51endif: 52 %v = phi i32 [ %val, %if.true ], [ undef, %entry ] 53 ret i32 %v 54 55if.true: 56 %val = load volatile i32, i32 addrspace(1)* undef 57 br label %endif 58} 59 60; Make sure and 1 is inserted on llvm.amdgcn.if 61define i32 @divergent_if_nonboolean_condition0(i32 %value) { 62; CHECK-LABEL: divergent_if_nonboolean_condition0: 63; CHECK: ; %bb.0: ; %entry 64; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 65; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 66; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 67; CHECK-NEXT: ; implicit-def: $vgpr0 68; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 69; CHECK-NEXT: s_cbranch_execz BB2_2 70; CHECK-NEXT: ; %bb.1: ; %if.true 71; CHECK-NEXT: global_load_dword v0, v[0:1], off 72; CHECK-NEXT: BB2_2: ; %endif 73; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 74; CHECK-NEXT: s_waitcnt vmcnt(0) 75; CHECK-NEXT: s_setpc_b64 s[30:31] 76entry: 77 %c = trunc i32 %value to i1 78 br i1 %c, label %if.true, label %endif 79 80if.true: 81 %val = load volatile i32, i32 addrspace(1)* undef 82 br label %endif 83 84endif: 85 %v = phi i32 [ %val, %if.true ], [ undef, %entry ] 86 ret i32 %v 87} 88 89; Make sure and 1 is inserted on llvm.amdgcn.if 90define i32 @divergent_if_nonboolean_condition1(i32 addrspace(1)* %ptr) { 91; CHECK-LABEL: divergent_if_nonboolean_condition1: 92; CHECK: ; %bb.0: ; %entry 93; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 94; CHECK-NEXT: global_load_dword v0, v[0:1], off 95; CHECK-NEXT: s_waitcnt vmcnt(0) 96; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 97; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 98; CHECK-NEXT: ; implicit-def: $vgpr0 99; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 100; CHECK-NEXT: s_cbranch_execz BB3_2 101; CHECK-NEXT: ; %bb.1: ; %if.true 102; CHECK-NEXT: global_load_dword v0, v[0:1], off 103; CHECK-NEXT: BB3_2: ; %endif 104; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 105; CHECK-NEXT: s_waitcnt vmcnt(0) 106; CHECK-NEXT: s_setpc_b64 s[30:31] 107entry: 108 %value = load i32, i32 addrspace(1)* %ptr 109 %c = trunc i32 %value to i1 110 br i1 %c, label %if.true, label %endif 111 112if.true: 113 %val = load volatile i32, i32 addrspace(1)* undef 114 br label %endif 115 116endif: 117 %v = phi i32 [ %val, %if.true ], [ undef, %entry ] 118 ret i32 %v 119} 120 121@external_constant = external addrspace(4) constant i32, align 4 122@const.ptr = external addrspace(4) constant float*, align 4 123 124; Make sure this case compiles. G_ICMP was mis-mapped due to having 125; the result register class constrained by llvm.amdgcn.if lowering. 126define void @constrained_if_register_class() { 127; CHECK-LABEL: constrained_if_register_class: 128; CHECK: ; %bb.0: ; %bb 129; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; CHECK-NEXT: s_getpc_b64 s[4:5] 131; CHECK-NEXT: s_add_u32 s4, s4, external_constant@gotpcrel32@lo+4 132; CHECK-NEXT: s_addc_u32 s5, s5, external_constant@gotpcrel32@hi+12 133; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 134; CHECK-NEXT: s_waitcnt lgkmcnt(0) 135; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0 136; CHECK-NEXT: s_waitcnt lgkmcnt(0) 137; CHECK-NEXT: s_cmp_lg_u32 s4, 0 138; CHECK-NEXT: s_cselect_b32 s4, 1, 0 139; CHECK-NEXT: s_and_b32 s4, s4, 1 140; CHECK-NEXT: s_cmp_lg_u32 s4, 0 141; CHECK-NEXT: s_cbranch_scc1 BB4_4 142; CHECK-NEXT: ; %bb.1: ; %bb2 143; CHECK-NEXT: s_getpc_b64 s[6:7] 144; CHECK-NEXT: s_add_u32 s6, s6, const.ptr@gotpcrel32@lo+4 145; CHECK-NEXT: s_addc_u32 s7, s7, const.ptr@gotpcrel32@hi+12 146; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 147; CHECK-NEXT: v_mov_b32_e32 v0, 0 148; CHECK-NEXT: s_mov_b32 s4, -1 149; CHECK-NEXT: s_waitcnt lgkmcnt(0) 150; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 151; CHECK-NEXT: s_waitcnt lgkmcnt(0) 152; CHECK-NEXT: global_load_dword v0, v0, s[6:7] 153; CHECK-NEXT: s_waitcnt vmcnt(0) 154; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0 155; CHECK-NEXT: s_cbranch_vccnz BB4_3 156; CHECK-NEXT: ; %bb.2: ; %bb7 157; CHECK-NEXT: s_mov_b32 s4, 0 158; CHECK-NEXT: BB4_3: ; %bb8 159; CHECK-NEXT: s_cmp_lg_u32 s4, 0 160; CHECK-NEXT: s_cselect_b32 s4, 1, 0 161; CHECK-NEXT: s_and_b32 s4, s4, 1 162; CHECK-NEXT: s_cmp_lg_u32 s4, 0 163; CHECK-NEXT: s_cbranch_scc0 BB4_5 164; CHECK-NEXT: BB4_4: ; %bb12 165; CHECK-NEXT: s_setpc_b64 s[30:31] 166; CHECK-NEXT: BB4_5: ; %bb11 167; CHECK-NEXT: v_mov_b32_e32 v0, 4.0 168; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen 169; CHECK-NEXT: s_waitcnt vmcnt(0) 170; CHECK-NEXT: s_setpc_b64 s[30:31] 171bb: 172 %tmp = load i32, i32 addrspace(4)* @external_constant 173 %ptr = load float*, float* addrspace(4)* @const.ptr 174 %tmp1 = icmp ne i32 %tmp, 0 175 br i1 %tmp1, label %bb12, label %bb2 176 177bb2: 178 %tmp4 = load float, float* %ptr, align 4 179 %tmp5 = fcmp olt float %tmp4, 1.0 180 %tmp6 = or i1 %tmp5, false 181 br i1 %tmp6, label %bb8, label %bb7 182 183bb7: 184 br label %bb8 185 186bb8: 187 %tmp9 = phi i32 [ 0, %bb7 ], [ -1, %bb2 ] 188 %tmp10 = icmp eq i32 %tmp9, 0 189 br i1 %tmp10, label %bb11, label %bb12 190 191bb11: 192 store float 4.0, float addrspace(5)* undef, align 4 193 br label %bb12 194 195bb12: 196 ret void 197} 198 199define amdgpu_kernel void @break_loop(i32 %arg) { 200; CHECK-LABEL: break_loop: 201; CHECK: ; %bb.0: ; %bb 202; CHECK-NEXT: s_load_dword s2, s[4:5], 0x0 203; CHECK-NEXT: s_mov_b64 s[0:1], 0 204; CHECK-NEXT: ; implicit-def: $vgpr1 205; CHECK-NEXT: s_waitcnt lgkmcnt(0) 206; CHECK-NEXT: v_subrev_u32_e32 v0, s2, v0 207; CHECK-NEXT: s_branch BB5_2 208; CHECK-NEXT: BB5_1: ; %Flow 209; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1 210; CHECK-NEXT: s_and_b64 s[2:3], exec, s[2:3] 211; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 212; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1] 213; CHECK-NEXT: s_cbranch_execz BB5_4 214; CHECK-NEXT: BB5_2: ; %bb1 215; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 216; CHECK-NEXT: v_add_u32_e32 v1, 1, v1 217; CHECK-NEXT: v_cmp_le_i32_e32 vcc, 0, v1 218; CHECK-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, 1 219; CHECK-NEXT: s_cbranch_vccnz BB5_1 220; CHECK-NEXT: ; %bb.3: ; %bb4 221; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1 222; CHECK-NEXT: global_load_dword v2, v[0:1], off 223; CHECK-NEXT: s_waitcnt vmcnt(0) 224; CHECK-NEXT: v_cmp_ge_i32_e64 s[2:3], v0, v2 225; CHECK-NEXT: s_branch BB5_1 226; CHECK-NEXT: BB5_4: ; %bb9 227; CHECK-NEXT: s_endpgm 228bb: 229 %id = call i32 @llvm.amdgcn.workitem.id.x() 230 %tmp = sub i32 %id, %arg 231 br label %bb1 232 233bb1: 234 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ] 235 %lsr.iv.next = add i32 %lsr.iv, 1 236 %cmp0 = icmp slt i32 %lsr.iv.next, 0 237 br i1 %cmp0, label %bb4, label %bb9 238 239bb4: 240 %load = load volatile i32, i32 addrspace(1)* undef, align 4 241 %cmp1 = icmp slt i32 %tmp, %load 242 br i1 %cmp1, label %bb1, label %bb9 243 244bb9: 245 ret void 246} 247 248declare i32 @llvm.amdgcn.workitem.id.x() 249