1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1030 %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s
4
5; For gfx1010, overestimate the branch size in case we need to insert
6; a nop for the buggy offset.
7
8; GCN-LABEL: long_forward_scc_branch_3f_offset_bug:
9; GFX1030: s_cmp_lg_u32
10; GFX1030-NEXT: s_cbranch_scc1  [[ENDBB:BB[0-9]+_[0-9]+]]
11
12; GFX1010: s_cmp_lg_u32
13; GFX1010-NEXT: s_cbranch_scc0  [[RELAX_BB:BB[0-9]+_[0-9]+]]
14; GFX1010: s_getpc_b64
15; GFX1010-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[ENDBB:BB[0-9]+_[0-9]+]]-(BB
16; GFX1010-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}
17; GFX1010: [[RELAX_BB]]:
18
19; GCN: v_nop
20; GCN: s_sleep
21; GCN: s_cbranch_scc1
22
23; GCN: [[ENDBB]]:
24; GCN: global_store_dword
25define amdgpu_kernel void @long_forward_scc_branch_3f_offset_bug(i32 addrspace(1)* %arg, i32 %cnd0) #0 {
26bb0:
27  %cmp0 = icmp eq i32 %cnd0, 0
28  br i1 %cmp0, label %bb2, label %bb3
29
30bb2:
31  %val = call i32 asm sideeffect
32   "s_mov_b32 $0, 0
33    v_nop_e64
34    v_nop_e64
35    v_nop_e64
36    v_nop_e64
37    v_nop_e64
38    v_nop_e64
39    v_nop_e64
40    v_nop_e64
41    v_nop_e64
42    v_nop_e64
43    v_nop_e64", "=s"()   ; 20 * 12 = 240
44  call void @llvm.amdgcn.s.sleep(i32 0) ; +4 = 244
45  %cmp1 = icmp eq i32 %val, 0           ; +4 = 248
46  br i1 %cmp1, label %bb2, label %bb3   ; +4 (gfx1030), +8 with workaround (gfx1010)
47
48bb3:
49  store volatile i32 %cnd0, i32 addrspace(1)* %arg
50  ret void
51}
52
53; GCN-LABEL: {{^}}long_forward_exec_branch_3f_offset_bug:
54; GFX1030: v_cmp_eq_u32
55; GFX1030: s_and_saveexec_b32
56; GFX1030-NEXT: s_cbranch_execnz [[RELAX_BB:BB[0-9]+_[0-9]+]]
57
58; GFX1010: v_cmp_eq_u32
59; GFX1010: s_and_saveexec_b32
60; GFX1010-NEXT: s_cbranch_execnz  [[RELAX_BB:BB[0-9]+_[0-9]+]]
61
62; GCN: s_getpc_b64
63; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[ENDBB:BB[0-9]+_[0-9]+]]-(BB
64; GCN-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}
65; GCN: [[RELAX_BB]]:
66
67; GCN: v_nop
68; GCN: s_sleep
69; GCN: s_cbranch_execz
70
71; GCN: [[ENDBB]]:
72; GCN: global_store_dword
73define void @long_forward_exec_branch_3f_offset_bug(i32 addrspace(1)* %arg, i32 %cnd0) #0 {
74bb0:
75  %cmp0 = icmp eq i32 %cnd0, 0
76  br i1 %cmp0, label %bb2, label %bb3
77
78bb2:
79  %val = call i32 asm sideeffect
80   "v_mov_b32 $0, 0
81    v_nop_e64
82    v_nop_e64
83    v_nop_e64
84    v_nop_e64
85    v_nop_e64
86    v_nop_e64
87    v_nop_e64
88    v_nop_e64
89    v_nop_e64
90    v_nop_e64
91    v_nop_e64", "=v"()   ; 20 * 12 = 240
92  call void @llvm.amdgcn.s.sleep(i32 0) ; +4 = 244
93  %cmp1 = icmp eq i32 %val, 0           ; +4 = 248
94  br i1 %cmp1, label %bb2, label %bb3   ; +4 (gfx1030), +8 with workaround (gfx1010)
95
96bb3:
97  store volatile i32 %cnd0, i32 addrspace(1)* %arg
98  ret void
99}
100
101declare void @llvm.amdgcn.s.sleep(i32 immarg)
102