1 #include "llvm/ADT/DenseMap.h"
2 #include "llvm/DebugInfo/DIContext.h"
3 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
4 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
5 #include "llvm/Object/ObjectFile.h"
6 
7 #define DEBUG_TYPE "dwarfdump"
8 using namespace llvm;
9 using namespace object;
10 
11 /// Holds statistics for one function (or other entity that has a PC range and
12 /// contains variables, such as a compile unit).
13 struct PerFunctionStats {
14   /// Number of inlined instances of this function.
15   unsigned NumFnInlined = 0;
16   /// Number of variables with location across all inlined instances.
17   unsigned TotalVarWithLoc = 0;
18   /// Number of constants with location across all inlined instances.
19   unsigned ConstantMembers = 0;
20   /// List of all Variables in this function.
21   SmallDenseSet<uint32_t, 4> VarsInFunction;
22   /// Compile units also cover a PC range, but have this flag set to false.
23   bool IsFunction = false;
24 };
25 
26 /// Holds accumulated global statistics about local variables.
27 struct GlobalStats {
28   /// Total number of PC range bytes covered by DW_AT_locations.
29   unsigned ScopeBytesCovered = 0;
30   /// Total number of PC range bytes in each variable's enclosing scope,
31   /// starting from the first definition of the variable.
32   unsigned ScopeBytesFromFirstDefinition = 0;
33 };
34 
35 /// Extract the low pc from a Die.
getLowPC(DWARFDie Die)36 static uint64_t getLowPC(DWARFDie Die) {
37   auto RangesOrError = Die.getAddressRanges();
38   DWARFAddressRangesVector Ranges;
39   if (RangesOrError)
40     Ranges = RangesOrError.get();
41   else
42     llvm::consumeError(RangesOrError.takeError());
43   if (Ranges.size())
44     return Ranges[0].LowPC;
45   return dwarf::toAddress(Die.find(dwarf::DW_AT_low_pc), 0);
46 }
47 
48 /// Collect debug info quality metrics for one DIE.
collectStatsForDie(DWARFDie Die,std::string Prefix,uint64_t ScopeLowPC,uint64_t BytesInScope,StringMap<PerFunctionStats> & FnStatMap,GlobalStats & GlobalStats)49 static void collectStatsForDie(DWARFDie Die, std::string Prefix,
50                                uint64_t ScopeLowPC, uint64_t BytesInScope,
51                                StringMap<PerFunctionStats> &FnStatMap,
52                                GlobalStats &GlobalStats) {
53   bool HasLoc = false;
54   uint64_t BytesCovered = 0;
55   uint64_t OffsetToFirstDefinition = 0;
56   if (Die.find(dwarf::DW_AT_const_value)) {
57     // This catches constant members *and* variables.
58     HasLoc = true;
59     BytesCovered = BytesInScope;
60   } else if (Die.getTag() == dwarf::DW_TAG_variable ||
61              Die.getTag() == dwarf::DW_TAG_formal_parameter) {
62     // Handle variables and function arguments.
63     auto FormValue = Die.find(dwarf::DW_AT_location);
64     HasLoc = FormValue.hasValue();
65     if (HasLoc) {
66       // Get PC coverage.
67       if (auto DebugLocOffset = FormValue->getAsSectionOffset()) {
68         auto *DebugLoc = Die.getDwarfUnit()->getContext().getDebugLoc();
69         if (auto List = DebugLoc->getLocationListAtOffset(*DebugLocOffset)) {
70           for (auto Entry : List->Entries)
71             BytesCovered += Entry.End - Entry.Begin;
72           if (List->Entries.size()) {
73             uint64_t FirstDef = List->Entries[0].Begin;
74             uint64_t UnitOfs = getLowPC(Die.getDwarfUnit()->getUnitDIE());
75             // Ranges sometimes start before the lexical scope.
76             if (UnitOfs + FirstDef >= ScopeLowPC)
77               OffsetToFirstDefinition = UnitOfs + FirstDef - ScopeLowPC;
78             // Or even after it. Count that as a failure.
79             if (OffsetToFirstDefinition > BytesInScope)
80               OffsetToFirstDefinition = 0;
81           }
82         }
83         assert(BytesInScope);
84       } else {
85         // Assume the entire range is covered by a single location.
86         BytesCovered = BytesInScope;
87       }
88     }
89   } else {
90     // Not a variable or constant member.
91     return;
92   }
93 
94   // Collect PC range coverage data.
95   auto &FnStats = FnStatMap[Prefix];
96   if (DWARFDie D =
97           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin))
98     Die = D;
99   // This is a unique ID for the variable inside the current object file.
100   unsigned CanonicalDieOffset = Die.getOffset();
101   FnStats.VarsInFunction.insert(CanonicalDieOffset);
102   if (BytesInScope) {
103     FnStats.TotalVarWithLoc += (unsigned)HasLoc;
104     // Adjust for the fact the variables often start their lifetime in the
105     // middle of the scope.
106     BytesInScope -= OffsetToFirstDefinition;
107     // Turns out we have a lot of ranges that extend past the lexical scope.
108     GlobalStats.ScopeBytesCovered += std::min(BytesInScope, BytesCovered);
109     GlobalStats.ScopeBytesFromFirstDefinition += BytesInScope;
110     assert(GlobalStats.ScopeBytesCovered <=
111            GlobalStats.ScopeBytesFromFirstDefinition);
112   } else {
113     FnStats.ConstantMembers++;
114   }
115 }
116 
117 /// Recursively collect debug info quality metrics.
collectStatsRecursive(DWARFDie Die,std::string Prefix,uint64_t ScopeLowPC,uint64_t BytesInScope,StringMap<PerFunctionStats> & FnStatMap,GlobalStats & GlobalStats)118 static void collectStatsRecursive(DWARFDie Die, std::string Prefix,
119                                   uint64_t ScopeLowPC, uint64_t BytesInScope,
120                                   StringMap<PerFunctionStats> &FnStatMap,
121                                   GlobalStats &GlobalStats) {
122   // Handle any kind of lexical scope.
123   if (Die.getTag() == dwarf::DW_TAG_subprogram ||
124       Die.getTag() == dwarf::DW_TAG_inlined_subroutine ||
125       Die.getTag() == dwarf::DW_TAG_lexical_block) {
126     // Ignore forward declarations.
127     if (Die.find(dwarf::DW_AT_declaration))
128       return;
129 
130     // Count the function.
131     if (Die.getTag() != dwarf::DW_TAG_lexical_block) {
132       StringRef Name = Die.getName(DINameKind::LinkageName);
133       if (Name.empty())
134         Name = Die.getName(DINameKind::ShortName);
135       Prefix = Name;
136       // Skip over abstract origins.
137       if (Die.find(dwarf::DW_AT_inline))
138         return;
139       // We've seen an (inlined) instance of this function.
140       auto &FnStats = FnStatMap[Name];
141       FnStats.NumFnInlined++;
142       FnStats.IsFunction = true;
143     }
144 
145     // PC Ranges.
146     auto RangesOrError = Die.getAddressRanges();
147     if (!RangesOrError) {
148       llvm::consumeError(RangesOrError.takeError());
149       return;
150     }
151 
152     auto Ranges = RangesOrError.get();
153     uint64_t BytesInThisScope = 0;
154     for (auto Range : Ranges)
155       BytesInThisScope += Range.HighPC - Range.LowPC;
156     ScopeLowPC = getLowPC(Die);
157 
158     if (BytesInThisScope)
159       BytesInScope = BytesInThisScope;
160   } else {
161     // Not a scope, visit the Die itself. It could be a variable.
162     collectStatsForDie(Die, Prefix, ScopeLowPC, BytesInScope, FnStatMap,
163                        GlobalStats);
164   }
165 
166   // Traverse children.
167   DWARFDie Child = Die.getFirstChild();
168   while (Child) {
169     collectStatsRecursive(Child, Prefix, ScopeLowPC, BytesInScope, FnStatMap,
170                           GlobalStats);
171     Child = Child.getSibling();
172   }
173 }
174 
175 /// Print machine-readable output.
176 /// The machine-readable format is single-line JSON output.
177 /// \{
printDatum(raw_ostream & OS,const char * Key,StringRef Value)178 static void printDatum(raw_ostream &OS, const char *Key, StringRef Value) {
179   OS << ",\"" << Key << "\":\"" << Value << '"';
180   LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n');
181 }
printDatum(raw_ostream & OS,const char * Key,uint64_t Value)182 static void printDatum(raw_ostream &OS, const char *Key, uint64_t Value) {
183   OS << ",\"" << Key << "\":" << Value;
184   LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n');
185 }
186 /// \}
187 
188 /// Collect debug info quality metrics for an entire DIContext.
189 ///
190 /// Do the impossible and reduce the quality of the debug info down to a few
191 /// numbers. The idea is to condense the data into numbers that can be tracked
192 /// over time to identify trends in newer compiler versions and gauge the effect
193 /// of particular optimizations. The raw numbers themselves are not particularly
194 /// useful, only the delta between compiling the same program with different
195 /// compilers is.
collectStatsForObjectFile(ObjectFile & Obj,DWARFContext & DICtx,Twine Filename,raw_ostream & OS)196 bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
197                                Twine Filename, raw_ostream &OS) {
198   StringRef FormatName = Obj.getFileFormatName();
199   GlobalStats GlobalStats;
200   StringMap<PerFunctionStats> Statistics;
201   for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units())
202     if (DWARFDie CUDie = CU->getUnitDIE(false))
203       collectStatsRecursive(CUDie, "/", 0, 0, Statistics, GlobalStats);
204 
205   /// The version number should be increased every time the algorithm is changed
206   /// (including bug fixes). New metrics may be added without increasing the
207   /// version.
208   unsigned Version = 1;
209   unsigned VarTotal = 0;
210   unsigned VarUnique = 0;
211   unsigned VarWithLoc = 0;
212   unsigned NumFunctions = 0;
213   unsigned NumInlinedFunctions = 0;
214   for (auto &Entry : Statistics) {
215     PerFunctionStats &Stats = Entry.getValue();
216     unsigned TotalVars = Stats.VarsInFunction.size() * Stats.NumFnInlined;
217     unsigned Constants = Stats.ConstantMembers;
218     VarWithLoc += Stats.TotalVarWithLoc + Constants;
219     VarTotal += TotalVars + Constants;
220     VarUnique += Stats.VarsInFunction.size();
221     LLVM_DEBUG(for (auto V
222                     : Stats.VarsInFunction) llvm::dbgs()
223                << Entry.getKey() << ": " << V << "\n");
224     NumFunctions += Stats.IsFunction;
225     NumInlinedFunctions += Stats.IsFunction * Stats.NumFnInlined;
226   }
227 
228   // Print summary.
229   OS.SetBufferSize(1024);
230   OS << "{\"version\":\"" << Version << '"';
231   LLVM_DEBUG(llvm::dbgs() << "Variable location quality metrics\n";
232              llvm::dbgs() << "---------------------------------\n");
233   printDatum(OS, "file", Filename.str());
234   printDatum(OS, "format", FormatName);
235   printDatum(OS, "source functions", NumFunctions);
236   printDatum(OS, "inlined functions", NumInlinedFunctions);
237   printDatum(OS, "unique source variables", VarUnique);
238   printDatum(OS, "source variables", VarTotal);
239   printDatum(OS, "variables with location", VarWithLoc);
240   printDatum(OS, "scope bytes total",
241              GlobalStats.ScopeBytesFromFirstDefinition);
242   printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered);
243   OS << "}\n";
244   LLVM_DEBUG(
245       llvm::dbgs() << "Total Availability: "
246                    << (int)std::round((VarWithLoc * 100.0) / VarTotal) << "%\n";
247       llvm::dbgs() << "PC Ranges covered: "
248                    << (int)std::round((GlobalStats.ScopeBytesCovered * 100.0) /
249                                       GlobalStats.ScopeBytesFromFirstDefinition)
250                    << "%\n");
251   return true;
252 }
253