1; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s | FileCheck %s 2; RUN: llc -mtriple riscv32-unknown-elf -o - %s | FileCheck %s 3 4; Perform tail call optimization for global address. 5declare i32 @callee_tail(i32 %i) 6define i32 @caller_tail(i32 %i) nounwind { 7; CHECK-LABEL: caller_tail 8; CHECK: tail callee_tail 9entry: 10 %r = tail call i32 @callee_tail(i32 %i) 11 ret i32 %r 12} 13 14; Perform tail call optimization for external symbol. 15@dest = global [2 x i8] zeroinitializer 16declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) 17define void @caller_extern(i8* %src) optsize { 18entry: 19; CHECK: caller_extern 20; CHECK-NOT: call memcpy 21; CHECK: tail memcpy 22 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @dest, i32 0, i32 0), i8* %src, i32 7, i1 false) 23 ret void 24} 25 26; Perform tail call optimization for external symbol. 27@dest_pgso = global [2 x i8] zeroinitializer 28define void @caller_extern_pgso(i8* %src) !prof !14 { 29entry: 30; CHECK: caller_extern_pgso 31; CHECK-NOT: call memcpy 32; CHECK: tail memcpy 33 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @dest_pgso, i32 0, i32 0), i8* %src, i32 7, i1 false) 34 ret void 35} 36 37; Perform indirect tail call optimization (for function pointer call). 38declare void @callee_indirect1() 39declare void @callee_indirect2() 40define void @caller_indirect_tail(i32 %a) nounwind { 41; CHECK-LABEL: caller_indirect_tail 42; CHECK-NOT: call callee_indirect1 43; CHECK-NOT: call callee_indirect2 44; CHECK-NOT: tail callee_indirect1 45; CHECK-NOT: tail callee_indirect2 46 47; CHECK: lui a0, %hi(callee_indirect2) 48; CHECK-NEXT: addi a5, a0, %lo(callee_indirect2) 49; CHECK-NEXT: jr a5 50 51; CHECK: lui a0, %hi(callee_indirect1) 52; CHECK-NEXT: addi a5, a0, %lo(callee_indirect1) 53; CHECK-NEXT: jr a5 54entry: 55 %tobool = icmp eq i32 %a, 0 56 %callee = select i1 %tobool, void ()* @callee_indirect1, void ()* @callee_indirect2 57 tail call void %callee() 58 ret void 59} 60 61; Do not tail call optimize functions with varargs passed by stack. 62declare i32 @callee_varargs(i32, ...) 63define void @caller_varargs(i32 %a, i32 %b) nounwind { 64; CHECK-LABEL: caller_varargs 65; CHECK-NOT: tail callee_varargs 66; CHECK: call callee_varargs 67entry: 68 %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a) 69 ret void 70} 71 72; Do not tail call optimize if stack is used to pass parameters. 73declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) 74define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) nounwind { 75; CHECK-LABEL: caller_args 76; CHECK-NOT: tail callee_args 77; CHECK: call callee_args 78entry: 79 %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) 80 ret i32 %r 81} 82 83; Do not tail call optimize if parameters need to be passed indirectly. 84declare i32 @callee_indirect_args(fp128 %a) 85define void @caller_indirect_args() nounwind { 86; CHECK-LABEL: caller_indirect_args 87; CHECK-NOT: tail callee_indirect_args 88; CHECK: call callee_indirect_args 89entry: 90 %call = tail call i32 @callee_indirect_args(fp128 0xL00000000000000003FFF000000000000) 91 ret void 92} 93 94; Externally-defined functions with weak linkage should not be tail-called. 95; The behaviour of branch instructions in this situation (as used for tail 96; calls) is implementation-defined, so we cannot rely on the linker replacing 97; the tail call with a return. 98declare extern_weak void @callee_weak() 99define void @caller_weak() nounwind { 100; CHECK-LABEL: caller_weak 101; CHECK-NOT: tail callee_weak 102; CHECK: call callee_weak 103entry: 104 tail call void @callee_weak() 105 ret void 106} 107 108; Exception-handling functions need a special set of instructions to indicate a 109; return to the hardware. Tail-calling another function would probably break 110; this. 111declare void @callee_irq() 112define void @caller_irq() #0 { 113; CHECK-LABEL: caller_irq 114; CHECK-NOT: tail callee_irq 115; CHECK: call callee_irq 116entry: 117 tail call void @callee_irq() 118 ret void 119} 120attributes #0 = { "interrupt"="machine" } 121 122; Byval parameters hand the function a pointer directly into the stack area 123; we want to reuse during a tail call. Do not tail call optimize functions with 124; byval parameters. 125declare i32 @callee_byval(i32** byval(i32*) %a) 126define i32 @caller_byval() nounwind { 127; CHECK-LABEL: caller_byval 128; CHECK-NOT: tail callee_byval 129; CHECK: call callee_byval 130entry: 131 %a = alloca i32* 132 %r = tail call i32 @callee_byval(i32** byval(i32*) %a) 133 ret i32 %r 134} 135 136; Do not tail call optimize if callee uses structret semantics. 137%struct.A = type { i32 } 138@a = global %struct.A zeroinitializer 139 140declare void @callee_struct(%struct.A* sret(%struct.A) %a) 141define void @caller_nostruct() nounwind { 142; CHECK-LABEL: caller_nostruct 143; CHECK-NOT: tail callee_struct 144; CHECK: call callee_struct 145entry: 146 tail call void @callee_struct(%struct.A* sret(%struct.A) @a) 147 ret void 148} 149 150; Do not tail call optimize if caller uses structret semantics. 151declare void @callee_nostruct() 152define void @caller_struct(%struct.A* sret(%struct.A) %a) nounwind { 153; CHECK-LABEL: caller_struct 154; CHECK-NOT: tail callee_nostruct 155; CHECK: call callee_nostruct 156entry: 157 tail call void @callee_nostruct() 158 ret void 159} 160 161; Do not tail call optimize if disabled. 162define i32 @disable_tail_calls(i32 %i) nounwind "disable-tail-calls"="true" { 163; CHECK-LABEL: disable_tail_calls: 164; CHECK-NOT: tail callee_nostruct 165; CHECK: call callee_tail 166entry: 167 %rv = tail call i32 @callee_tail(i32 %i) 168 ret i32 %rv 169} 170 171; Duplicate returns to enable tail call optimizations. 172declare i32 @test() 173declare i32 @test1() 174declare i32 @test2() 175declare i32 @test3() 176define i32 @duplicate_returns(i32 %a, i32 %b) nounwind { 177; CHECK-LABEL: duplicate_returns: 178; CHECK: tail test2 179; CHECK: tail test 180; CHECK: tail test1 181; CHECK: tail test3 182entry: 183 %cmp = icmp eq i32 %a, 0 184 br i1 %cmp, label %if.then, label %if.else 185 186if.then: ; preds = %entry 187 %call = tail call i32 @test() 188 br label %return 189 190if.else: ; preds = %entry 191 %cmp1 = icmp eq i32 %b, 0 192 br i1 %cmp1, label %if.then2, label %if.else4 193 194if.then2: ; preds = %if.else 195 %call3 = tail call i32 @test1() 196 br label %return 197 198if.else4: ; preds = %if.else 199 %cmp5 = icmp sgt i32 %a, %b 200 br i1 %cmp5, label %if.then6, label %if.else8 201 202if.then6: ; preds = %if.else4 203 %call7 = tail call i32 @test2() 204 br label %return 205 206if.else8: ; preds = %if.else4 207 %call9 = tail call i32 @test3() 208 br label %return 209 210return: ; preds = %if.else8, %if.then6, %if.then2, %if.then 211 %retval = phi i32 [ %call, %if.then ], [ %call3, %if.then2 ], [ %call7, %if.then6 ], [ %call9, %if.else8 ] 212 ret i32 %retval 213} 214 215!llvm.module.flags = !{!0} 216!0 = !{i32 1, !"ProfileSummary", !1} 217!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 218!2 = !{!"ProfileFormat", !"InstrProf"} 219!3 = !{!"TotalCount", i64 10000} 220!4 = !{!"MaxCount", i64 10} 221!5 = !{!"MaxInternalCount", i64 1} 222!6 = !{!"MaxFunctionCount", i64 1000} 223!7 = !{!"NumCounts", i64 3} 224!8 = !{!"NumFunctions", i64 3} 225!9 = !{!"DetailedSummary", !10} 226!10 = !{!11, !12, !13} 227!11 = !{i32 10000, i64 100, i32 1} 228!12 = !{i32 999000, i64 100, i32 1} 229!13 = !{i32 999999, i64 1, i32 2} 230!14 = !{!"function_entry_count", i64 0} 231