1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4
5; OPT-LABEL: @annotate_unreachable_noloop(
6; OPT-NOT: call i1 @llvm.amdgcn.loop
7
8; GCN-LABEL: {{^}}annotate_unreachable_noloop:
9; GCN: s_cbranch_scc1
10; GCN-NOT: s_endpgm
11; GCN: .Lfunc_end0
12define amdgpu_kernel void @annotate_unreachable_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
13bb:
14  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
15  br label %bb1
16
17bb1:                                              ; preds = %bb
18  %tmp2 = sext i32 %tmp to i64
19  %tmp3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %arg, i64 %tmp2
20  %tmp4 = load <4 x float>, <4 x float> addrspace(1)* %tmp3, align 16
21  br i1 undef, label %bb5, label %bb3
22
23bb3:                                              ; preds = %bb1
24  %tmp6 = extractelement <4 x float> %tmp4, i32 2
25  %tmp7 = fcmp olt float %tmp6, 0.000000e+00
26  br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
27
28bb4:                                              ; preds = %bb3
29  unreachable
30
31bb5:                                              ; preds = %bb3, %bb1
32  unreachable
33}
34
35
36; OPT-LABEL: @annotate_ret_noloop(
37; OPT-NOT: call i1 @llvm.amdgcn.loop
38
39; GCN-LABEL: {{^}}annotate_ret_noloop:
40; GCN: load_dwordx4
41; GCN: v_cmp_nlt_f32
42; GCN: s_and_saveexec_b64
43; GCN-NEXT: s_endpgm
44; GCN: .Lfunc_end
45define amdgpu_kernel void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
46bb:
47  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
48  br label %bb1
49
50bb1:                                              ; preds = %bb
51  %tmp2 = sext i32 %tmp to i64
52  %tmp3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %arg, i64 %tmp2
53  %tmp4 = load <4 x float>, <4 x float> addrspace(1)* %tmp3, align 16
54  %tmp5 = extractelement <4 x float> %tmp4, i32 1
55  store volatile <4 x float> %tmp4, <4 x float> addrspace(1)* undef
56  %cmp = fcmp ogt float %tmp5, 1.0
57  br i1 %cmp, label %bb5, label %bb3
58
59bb3:                                              ; preds = %bb1
60  %tmp6 = extractelement <4 x float> %tmp4, i32 2
61  %tmp7 = fcmp olt float %tmp6, 0.000000e+00
62  br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
63
64bb4:                                              ; preds = %bb3
65  ret void
66
67bb5:                                              ; preds = %bb3, %bb1
68  ret void
69}
70
71; OPT-LABEL: @uniform_annotate_ret_noloop(
72; OPT-NOT: call i1 @llvm.amdgcn.loop
73
74; GCN-LABEL: {{^}}uniform_annotate_ret_noloop:
75; GCN: s_cbranch_scc1
76; GCN: s_endpgm
77; GCN: .Lfunc_end
78define amdgpu_kernel void @uniform_annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg, i32 %tmp) #0 {
79bb:
80  br label %bb1
81
82bb1:                                              ; preds = %bb
83  %tmp2 = sext i32 %tmp to i64
84  %tmp3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %arg, i64 %tmp2
85  %tmp4 = load <4 x float>, <4 x float> addrspace(1)* %tmp3, align 16
86  br i1 undef, label %bb5, label %bb3
87
88bb3:                                              ; preds = %bb1
89  %tmp6 = extractelement <4 x float> %tmp4, i32 2
90  %tmp7 = fcmp olt float %tmp6, 0.000000e+00
91  br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
92
93bb4:                                              ; preds = %bb3
94  ret void
95
96bb5:                                              ; preds = %bb3, %bb1
97  ret void
98}
99
100
101declare i32 @llvm.amdgcn.workitem.id.x() #1
102
103attributes #0 = { nounwind }
104attributes #1 = { nounwind readnone }
105