1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4
5; OPT-LABEL: @annotate_unreachable_noloop(
6; OPT-NOT: call i1 @llvm.amdgcn.loop
7
8; GCN-LABEL: {{^}}annotate_unreachable_noloop:
9; GCN: s_cbranch_scc1
10; GCN-NOT: s_endpgm
11; GCN: .Lfunc_end0
12define amdgpu_kernel void @annotate_unreachable_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
13bb:
14  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
15  br label %bb1
16
17bb1:                                              ; preds = %bb
18  %tmp2 = sext i32 %tmp to i64
19  %tmp3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %arg, i64 %tmp2
20  %tmp4 = load <4 x float>, <4 x float> addrspace(1)* %tmp3, align 16
21  br i1 undef, label %bb5, label %bb3
22
23bb3:                                              ; preds = %bb1
24  %tmp6 = extractelement <4 x float> %tmp4, i32 2
25  %tmp7 = fcmp olt float %tmp6, 0.000000e+00
26  br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
27
28bb4:                                              ; preds = %bb3
29  unreachable
30
31bb5:                                              ; preds = %bb3, %bb1
32  unreachable
33}
34
35
36; OPT-LABEL: @annotate_ret_noloop(
37; OPT-NOT: call i1 @llvm.amdgcn.loop
38
39; GCN-LABEL: {{^}}annotate_ret_noloop:
40; GCN: load_dwordx4
41; GCN: v_cmp_nlt_f32
42; GCN: s_and_saveexec_b64
43; GCN: ; mask branch [[UNIFIED_RET:BB[0-9]+_[0-9]+]]
44; GCN-NEXT: [[UNIFIED_RET]]:
45; GCN-NEXT: s_endpgm
46; GCN: .Lfunc_end
47define amdgpu_kernel void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
48bb:
49  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
50  br label %bb1
51
52bb1:                                              ; preds = %bb
53  %tmp2 = sext i32 %tmp to i64
54  %tmp3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %arg, i64 %tmp2
55  %tmp4 = load <4 x float>, <4 x float> addrspace(1)* %tmp3, align 16
56  %tmp5 = extractelement <4 x float> %tmp4, i32 1
57  store volatile <4 x float> %tmp4, <4 x float> addrspace(1)* undef
58  %cmp = fcmp ogt float %tmp5, 1.0
59  br i1 %cmp, label %bb5, label %bb3
60
61bb3:                                              ; preds = %bb1
62  %tmp6 = extractelement <4 x float> %tmp4, i32 2
63  %tmp7 = fcmp olt float %tmp6, 0.000000e+00
64  br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
65
66bb4:                                              ; preds = %bb3
67  ret void
68
69bb5:                                              ; preds = %bb3, %bb1
70  ret void
71}
72
73; OPT-LABEL: @uniform_annotate_ret_noloop(
74; OPT-NOT: call i1 @llvm.amdgcn.loop
75
76; GCN-LABEL: {{^}}uniform_annotate_ret_noloop:
77; GCN: s_cbranch_scc1
78; GCN: s_endpgm
79; GCN: .Lfunc_end
80define amdgpu_kernel void @uniform_annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg, i32 %tmp) #0 {
81bb:
82  br label %bb1
83
84bb1:                                              ; preds = %bb
85  %tmp2 = sext i32 %tmp to i64
86  %tmp3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %arg, i64 %tmp2
87  %tmp4 = load <4 x float>, <4 x float> addrspace(1)* %tmp3, align 16
88  br i1 undef, label %bb5, label %bb3
89
90bb3:                                              ; preds = %bb1
91  %tmp6 = extractelement <4 x float> %tmp4, i32 2
92  %tmp7 = fcmp olt float %tmp6, 0.000000e+00
93  br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
94
95bb4:                                              ; preds = %bb3
96  ret void
97
98bb5:                                              ; preds = %bb3, %bb1
99  ret void
100}
101
102
103declare i32 @llvm.amdgcn.workitem.id.x() #1
104
105attributes #0 = { nounwind }
106attributes #1 = { nounwind readnone }
107