1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1030 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s 4 5; For gfx1010, overestimate the branch size in case we need to insert 6; a nop for the buggy offset. 7 8; GCN-LABEL: long_forward_scc_branch_3f_offset_bug: 9; GFX1030: s_cmp_lg_u32 10; GFX1030-NEXT: s_cbranch_scc1 [[ENDBB:BB[0-9]+_[0-9]+]] 11 12; GFX1010: s_cmp_lg_u32 13; GFX1010-NEXT: s_cbranch_scc0 [[RELAX_BB:BB[0-9]+_[0-9]+]] 14; GFX1010: s_getpc_b64 15; GFX1010-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[ENDBB:BB[0-9]+_[0-9]+]]-(BB 16; GFX1010-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}} 17; GFX1010: [[RELAX_BB]]: 18 19; GCN: v_nop 20; GCN: s_sleep 21; GCN: s_cbranch_scc1 22 23; GCN: [[ENDBB]]: 24; GCN: global_store_dword 25define amdgpu_kernel void @long_forward_scc_branch_3f_offset_bug(i32 addrspace(1)* %arg, i32 %cnd0) #0 { 26bb0: 27 %cmp0 = icmp eq i32 %cnd0, 0 28 br i1 %cmp0, label %bb2, label %bb3 29 30bb2: 31 %val = call i32 asm sideeffect 32 "s_mov_b32 $0, 0 33 v_nop_e64 34 v_nop_e64 35 v_nop_e64 36 v_nop_e64 37 v_nop_e64 38 v_nop_e64 39 v_nop_e64 40 v_nop_e64 41 v_nop_e64 42 v_nop_e64 43 v_nop_e64", "=s"() ; 20 * 12 = 240 44 call void @llvm.amdgcn.s.sleep(i32 0) ; +4 = 244 45 %cmp1 = icmp eq i32 %val, 0 ; +4 = 248 46 br i1 %cmp1, label %bb2, label %bb3 ; +4 (gfx1030), +8 with workaround (gfx1010) 47 48bb3: 49 store volatile i32 %cnd0, i32 addrspace(1)* %arg 50 ret void 51} 52 53; GCN-LABEL: {{^}}long_forward_exec_branch_3f_offset_bug: 54; GFX1030: v_cmp_eq_u32 55; GFX1030: s_and_saveexec_b32 56; GFX1030-NEXT: s_cbranch_execnz [[RELAX_BB:BB[0-9]+_[0-9]+]] 57 58; GFX1010: v_cmp_eq_u32 59; GFX1010: s_and_saveexec_b32 60; GFX1010-NEXT: s_cbranch_execnz [[RELAX_BB:BB[0-9]+_[0-9]+]] 61 62; GCN: s_getpc_b64 63; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[ENDBB:BB[0-9]+_[0-9]+]]-(BB 64; GCN-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}} 65; GCN: [[RELAX_BB]]: 66 67; GCN: v_nop 68; GCN: s_sleep 69; GCN: s_cbranch_execz 70 71; GCN: [[ENDBB]]: 72; GCN: global_store_dword 73define void @long_forward_exec_branch_3f_offset_bug(i32 addrspace(1)* %arg, i32 %cnd0) #0 { 74bb0: 75 %cmp0 = icmp eq i32 %cnd0, 0 76 br i1 %cmp0, label %bb2, label %bb3 77 78bb2: 79 %val = call i32 asm sideeffect 80 "v_mov_b32 $0, 0 81 v_nop_e64 82 v_nop_e64 83 v_nop_e64 84 v_nop_e64 85 v_nop_e64 86 v_nop_e64 87 v_nop_e64 88 v_nop_e64 89 v_nop_e64 90 v_nop_e64 91 v_nop_e64", "=v"() ; 20 * 12 = 240 92 call void @llvm.amdgcn.s.sleep(i32 0) ; +4 = 244 93 %cmp1 = icmp eq i32 %val, 0 ; +4 = 248 94 br i1 %cmp1, label %bb2, label %bb3 ; +4 (gfx1030), +8 with workaround (gfx1010) 95 96bb3: 97 store volatile i32 %cnd0, i32 addrspace(1)* %arg 98 ret void 99} 100 101declare void @llvm.amdgcn.s.sleep(i32 immarg) 102