1; REQUIRES: asserts 2; Test for CSSPGO's SampleContextTracker to make sure context profile tree is promoted and merged properly 3; based on inline decision, so post inline counts are accurate. 4 5; Note that we need new pass manager to enable top-down processing for sample profile loader 6; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-ALL 7; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-HOT 8 9 10; Testwe we inlined the following in top-down order and promot rest not inlined context profile into base profile 11; main:3 @ _Z5funcAi 12; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi 13; _Z5funcBi:1 @ _Z8funcLeafi 14; INLINE-ALL: Getting base profile for function: main 15; INLINE-ALL-NEXT: Merging context profile into base profile: main 16; INLINE-ALL-NEXT: Found context tree root to promote: external:12 @ main 17; INLINE-ALL-NEXT: Context promoted and merged to: main 18; INLINE-ALL-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi 19; INLINE-ALL-NEXT: Callee context found: main:3.1 @ _Z5funcBi 20; INLINE-ALL-NEXT: Getting callee context for instr: %call1 = tail call i32 @_Z5funcAi 21; INLINE-ALL-NEXT: Callee context found: main:3 @ _Z5funcAi 22; INLINE-ALL-NEXT: Marking context profile as inlined: main:3 @ _Z5funcAi 23; INLINE-ALL-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi( 24; INLINE-ALL-NEXT: Callee context found: main:3.1 @ _Z5funcBi 25; INLINE-ALL-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z8funcLeafi 26; INLINE-ALL-NEXT: Callee context found: main:3 @ _Z5funcAi:1 @ _Z8funcLeafi 27; INLINE-ALL-NEXT: Marking context profile as inlined: main:3 @ _Z5funcAi:1 @ _Z8funcLeafi 28; INLINE-ALL-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi 29; INLINE-ALL-NEXT: Callee context found: main:3.1 @ _Z5funcBi 30; INLINE-ALL-NEXT: Getting callee context for instr: %call.i1 = tail call i32 @_Z3fibi 31; INLINE-ALL-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi 32; INLINE-ALL-NEXT: Getting base profile for function: _Z5funcAi 33; INLINE-ALL-NEXT: Merging context profile into base profile: _Z5funcAi 34; INLINE-ALL-NEXT: Getting base profile for function: _Z5funcBi 35; INLINE-ALL-NEXT: Merging context profile into base profile: _Z5funcBi 36; INLINE-ALL-NEXT: Found context tree root to promote: external:10 @ _Z5funcBi 37; INLINE-ALL-NEXT: Context promoted to: _Z5funcBi 38; INLINE-ALL-NEXT: Found context tree root to promote: main:3.1 @ _Z5funcBi 39; INLINE-ALL-NEXT: Context promoted and merged to: _Z5funcBi 40; INLINE-ALL-NEXT: Context promoted to: _Z5funcBi:1 @ _Z8funcLeafi 41; INLINE-ALL-NEXT: Found context tree root to promote: externalA:17 @ _Z5funcBi 42; INLINE-ALL-NEXT: Context promoted and merged to: _Z5funcBi 43; INLINE-ALL-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi 44; INLINE-ALL-NEXT: Callee context found: _Z5funcBi:1 @ _Z8funcLeafi 45; INLINE-ALL-NEXT: Marking context profile as inlined: _Z5funcBi:1 @ _Z8funcLeafi 46; INLINE-ALL-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi 47; INLINE-ALL-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi 48; INLINE-ALL-NEXT: Getting base profile for function: _Z8funcLeafi 49; INLINE-ALL-NEXT: Merging context profile into base profile: _Z8funcLeafi 50 51; Testwe we inlined the following in top-down order and promot rest not inlined context profile into base profile 52; main:3 @ _Z5funcAi 53; _Z5funcAi:1 @ _Z8funcLeafi 54; _Z5funcBi:1 @ _Z8funcLeafi 55; INLINE-HOT: Getting base profile for function: main 56; INLINE-HOT-NEXT: Merging context profile into base profile: main 57; INLINE-HOT-NEXT: Found context tree root to promote: external:12 @ main 58; INLINE-HOT-NEXT: Context promoted and merged to: main 59; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !58 60; INLINE-HOT-NEXT: Callee context found: main:3.1 @ _Z5funcBi 61; INLINE-HOT-NEXT: Getting callee context for instr: %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !63 62; INLINE-HOT-NEXT: Callee context found: main:3 @ _Z5funcAi 63; INLINE-HOT-NEXT: Getting base profile for function: _Z5funcAi 64; INLINE-HOT-NEXT: Merging context profile into base profile: _Z5funcAi 65; INLINE-HOT-NEXT: Found context tree root to promote: main:3 @ _Z5funcAi 66; INLINE-HOT-NEXT: Context promoted to: _Z5funcAi 67; INLINE-HOT-NEXT: Context promoted to: _Z5funcAi:1 @ _Z8funcLeafi 68; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !50 69; INLINE-HOT-NEXT: Callee context found: _Z5funcAi:1 @ _Z8funcLeafi 70; INLINE-HOT-NEXT: Marking context profile as inlined: _Z5funcAi:1 @ _Z8funcLeafi 71; INLINE-HOT-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi(i32 %tmp.i) #2, !dbg !62 72; INLINE-HOT-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi(i32 %tmp1.i) #2, !dbg !69 73; INLINE-HOT-NEXT: Getting base profile for function: _Z5funcBi 74; INLINE-HOT-NEXT: Merging context profile into base profile: _Z5funcBi 75; INLINE-HOT-NEXT: Found context tree root to promote: external:10 @ _Z5funcBi 76; INLINE-HOT-NEXT: Context promoted to: _Z5funcBi 77; INLINE-HOT-NEXT: Found context tree root to promote: main:3.1 @ _Z5funcBi 78; INLINE-HOT-NEXT: Context promoted and merged to: _Z5funcBi 79; INLINE-HOT-NEXT: Context promoted to: _Z5funcBi:1 @ _Z8funcLeafi 80; INLINE-HOT-NEXT: Found context tree root to promote: externalA:17 @ _Z5funcBi 81; INLINE-HOT-NEXT: Context promoted and merged to: _Z5funcBi 82; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !50 83; INLINE-HOT-NEXT: Callee context found: _Z5funcBi:1 @ _Z8funcLeafi 84; INLINE-HOT-NEXT: Marking context profile as inlined: _Z5funcBi:1 @ _Z8funcLeafi 85; INLINE-HOT-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi(i32 %tmp.i) #2, !dbg !62 86; INLINE-HOT-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi(i32 %tmp1.i) #2, !dbg !69 87; INLINE-HOT-NEXT: Getting base profile for function: _Z8funcLeafi 88; INLINE-HOT-NEXT: Merging context profile into base profile: _Z8funcLeafi 89 90 91@factor = dso_local global i32 3, align 4, !dbg !0 92 93define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 { 94entry: 95 br label %for.body, !dbg !25 96 97for.cond.cleanup: ; preds = %for.body 98 ret i32 %add3, !dbg !27 99 100for.body: ; preds = %for.body, %entry 101 %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ] 102 %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ] 103 %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32 104 %add = add nuw nsw i32 %x.011, 1, !dbg !31 105 %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28 106 %add2 = add i32 %call, %r.010, !dbg !34 107 %add3 = add i32 %add2, %call1, !dbg !35 108 %dec = add nsw i32 %x.011, -1, !dbg !36 109 %cmp = icmp eq i32 %x.011, 0, !dbg !38 110 br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25 111} 112 113define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 { 114entry: 115 %add = add nsw i32 %x, 100000, !dbg !44 116 %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45 117 ret i32 %call, !dbg !46 118} 119 120define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 { 121entry: 122 %cmp = icmp sgt i32 %x, 0, !dbg !57 123 br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59 124 125while.cond2.preheader: ; preds = %entry 126 %cmp313 = icmp slt i32 %x, 0, !dbg !60 127 br i1 %cmp313, label %while.body4, label %if.end, !dbg !63 128 129while.body: ; preds = %while.body, %entry 130 %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ] 131 %tmp = load volatile i32, i32* @factor, align 4, !dbg !64 132 %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67 133 %sub = sub nsw i32 %x.addr.016, %call, !dbg !68 134 %cmp1 = icmp sgt i32 %sub, 0, !dbg !69 135 br i1 %cmp1, label %while.body, label %if.end, !dbg !71 136 137while.body4: ; preds = %while.body4, %while.cond2.preheader 138 %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ] 139 %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72 140 %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74 141 %add = add nsw i32 %call5, %x.addr.114, !dbg !75 142 %cmp3 = icmp slt i32 %add, 0, !dbg !60 143 br i1 %cmp3, label %while.body4, label %if.end, !dbg !63 144 145if.end: ; preds = %while.body4, %while.body, %while.cond2.preheader 146 %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ] 147 ret i32 %x.addr.2, !dbg !76 148} 149 150define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 { 151entry: 152 %sub = add nsw i32 %x, -100000, !dbg !51 153 %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52 154 ret i32 %call, !dbg !53 155} 156 157declare i32 @_Z3fibi(i32) 158 159attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } 160attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } 161 162!llvm.dbg.cu = !{!2} 163!llvm.module.flags = !{!14, !15, !16} 164!llvm.ident = !{!17} 165 166!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) 167!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true) 168!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) 169!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo") 170!4 = !{} 171!5 = !{!6, !10, !11} 172!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) 173!7 = !DISubroutineType(types: !8) 174!8 = !{!9, !9} 175!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) 176!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) 177!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) 178!12 = !{!0} 179!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9) 180!14 = !{i32 7, !"Dwarf Version", i32 4} 181!15 = !{i32 2, !"Debug Info Version", i32 3} 182!16 = !{i32 1, !"wchar_size", i32 4} 183!17 = !{!"clang version 11.0.0"} 184!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) 185!19 = !DISubroutineType(types: !20) 186!20 = !{!9} 187!21 = !{!22, !23} 188!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9) 189!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9) 190!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3) 191!25 = !DILocation(line: 13, column: 3, scope: !26) 192!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2) 193!27 = !DILocation(line: 17, column: 3, scope: !18) 194!28 = !DILocation(line: 14, column: 10, scope: !29) 195!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37) 196!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3) 197!31 = !DILocation(line: 14, column: 29, scope: !29) 198!32 = !DILocation(line: 14, column: 21, scope: !33) 199!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2) 200!34 = !DILocation(line: 14, column: 19, scope: !29) 201!35 = !DILocation(line: 14, column: 7, scope: !29) 202!36 = !DILocation(line: 13, column: 33, scope: !37) 203!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6) 204!38 = !DILocation(line: 13, column: 26, scope: !39) 205!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2) 206!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) 207!44 = !DILocation(line: 27, column: 22, scope: !40) 208!45 = !DILocation(line: 27, column: 11, scope: !40) 209!46 = !DILocation(line: 29, column: 3, scope: !40) 210!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) 211!51 = !DILocation(line: 33, column: 22, scope: !47) 212!52 = !DILocation(line: 33, column: 11, scope: !47) 213!53 = !DILocation(line: 35, column: 3, scope: !47) 214!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) 215!57 = !DILocation(line: 49, column: 9, scope: !58) 216!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7) 217!59 = !DILocation(line: 49, column: 7, scope: !54) 218!60 = !DILocation(line: 58, column: 14, scope: !61) 219!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2) 220!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8) 221!63 = !DILocation(line: 58, column: 5, scope: !61) 222!64 = !DILocation(line: 52, column: 16, scope: !65) 223!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19) 224!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14) 225!67 = !DILocation(line: 52, column: 12, scope: !65) 226!68 = !DILocation(line: 52, column: 9, scope: !65) 227!69 = !DILocation(line: 51, column: 14, scope: !70) 228!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2) 229!71 = !DILocation(line: 51, column: 5, scope: !70) 230!72 = !DILocation(line: 59, column: 16, scope: !73) 231!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19) 232!74 = !DILocation(line: 59, column: 12, scope: !73) 233!75 = !DILocation(line: 59, column: 9, scope: !73) 234!76 = !DILocation(line: 63, column: 3, scope: !54) 235