1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -O2 -tail-dup-size=1000 -tail-dup-placement-threshold=1000 -enable-tail-merge=0 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2 3; Need to to trigger tail duplication this during 4; MachineBlockPlacement, since calls aren't tail duplicated pre-RA. 5 6declare void @nonconvergent_func() #0 7declare void @convergent_func() #1 8declare void @llvm.amdgcn.s.barrier() #1 9declare void @llvm.amdgcn.ds.gws.init(i32, i32) #2 10declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #2 11declare void @llvm.amdgcn.ds.gws.sema.release.all(i32 %offset) #2 12 13; barrier shouldn't be duplicated. 14 15; GCN-LABEL: {{^}}taildup_barrier: 16; GCN: s_barrier 17; GCN-NOT: s_barrier 18define void @taildup_barrier(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond) #0 { 19entry: 20 br i1 %cond, label %bb1, label %bb2 21 22bb1: 23 store i32 0, i32 addrspace(1)* %a 24 br label %call 25 26bb2: 27 store i32 1, i32 addrspace(1)* %a 28 br label %call 29 30call: 31 call void @llvm.amdgcn.s.barrier() 32 br label %ret 33 34ret: 35 ret void 36} 37 38; GCN-LABEL: {{^}}taildup_convergent_call: 39; GCN: s_swappc_b64 40; GCN-NOT: s_swappc_b64 41define void @taildup_convergent_call(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond) #1 { 42entry: 43 br i1 %cond, label %bb1, label %bb2 44 45bb1: 46 store i32 0, i32 addrspace(1)* %a 47 br label %call 48 49bb2: 50 store i32 1, i32 addrspace(1)* %a 51 br label %call 52 53call: 54 call void @convergent_func() 55 br label %ret 56 57ret: 58 ret void 59} 60 61; TODO: Currently there is only one convergent call pseudo, but this 62; theoretically could use a nonconvergent variant. 63; GCN-LABEL: {{^}}taildup_nonconvergent_call: 64; GCN: s_swappc_b64 65; GCN-NOT: s_swappc_b64 66define void @taildup_nonconvergent_call(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond) #1 { 67entry: 68 br i1 %cond, label %bb1, label %bb2 69 70bb1: 71 store i32 0, i32 addrspace(1)* %a 72 br label %call 73 74bb2: 75 store i32 1, i32 addrspace(1)* %a 76 br label %call 77 78call: 79 call void @nonconvergent_func() 80 br label %ret 81 82ret: 83 ret void 84} 85 86; GCN-LABEL: {{^}}taildup_convergent_tailcall: 87; GCN: s_setpc_b64 88; GCN-NOT: s_setpc_b64 89define void @taildup_convergent_tailcall(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond) #1 { 90entry: 91 br i1 %cond, label %bb1, label %bb2 92 93bb1: 94 store i32 0, i32 addrspace(1)* %a 95 br label %call 96 97bb2: 98 store i32 1, i32 addrspace(1)* %a 99 br label %call 100 101call: 102 tail call void @convergent_func() 103 ret void 104} 105 106; GCN-LABEL: {{^}}taildup_gws_init: 107; GCN: ds_gws_init 108; GCN-NOT: ds_gws_init 109define amdgpu_kernel void @taildup_gws_init(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond, i32 %val, i32 %offset) #0 { 110entry: 111 br i1 %cond, label %bb1, label %bb2 112 113bb1: 114 store i32 0, i32 addrspace(1)* %a 115 br label %call 116 117bb2: 118 store i32 1, i32 addrspace(1)* %a 119 br label %call 120 121call: 122 call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset) 123 br label %ret 124 125ret: 126 ret void 127} 128 129; GCN-LABEL: {{^}}taildup_gws_barrier: 130; GCN: ds_gws_barrier 131; GCN-NOT: ds_gws_barrier 132define amdgpu_kernel void @taildup_gws_barrier(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond, i32 %val, i32 %offset) #0 { 133entry: 134 br i1 %cond, label %bb1, label %bb2 135 136bb1: 137 store i32 0, i32 addrspace(1)* %a 138 br label %call 139 140bb2: 141 store i32 1, i32 addrspace(1)* %a 142 br label %call 143 144call: 145 call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 %offset) 146 br label %ret 147 148ret: 149 ret void 150} 151 152; GCN-LABEL: {{^}}taildup_gws_sema_release_all: 153; GCN: ds_gws_sema_release_all 154; GCN-NOT: ds_gws 155define amdgpu_kernel void @taildup_gws_sema_release_all(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond, i32 %offset) #0 { 156entry: 157 br i1 %cond, label %bb1, label %bb2 158 159bb1: 160 store i32 0, i32 addrspace(1)* %a 161 br label %call 162 163bb2: 164 store i32 1, i32 addrspace(1)* %a 165 br label %call 166 167call: 168 call void @llvm.amdgcn.ds.gws.sema.release.all(i32 %offset) 169 br label %ret 170 171ret: 172 ret void 173} 174 175attributes #0 = { nounwind } 176attributes #1 = { nounwind convergent } 177attributes #2 = { convergent inaccessiblememonly nounwind } 178