1 //===-- InstrProf.h - Instrumented profiling format support -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Instrumentation-based profiling data is generated by instrumented
11 // binaries through library functions in compiler-rt, and read by the clang
12 // frontend to feed PGO.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_PROFILEDATA_INSTRPROF_H
17 #define LLVM_PROFILEDATA_INSTRPROF_H
18 
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Metadata.h"
24 #include "llvm/ProfileData/InstrProfData.inc"
25 #include "llvm/ProfileData/ProfileCommon.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/MD5.h"
29 #include "llvm/Support/MathExtras.h"
30 #include <cstdint>
31 #include <list>
32 #include <system_error>
33 #include <vector>
34 
35 namespace llvm {
36 
37 class Function;
38 class GlobalVariable;
39 class Module;
40 
41 /// Return the name of data section containing profile counter variables.
getInstrProfCountersSectionName(bool AddSegment)42 inline StringRef getInstrProfCountersSectionName(bool AddSegment) {
43   return AddSegment ? "__DATA," INSTR_PROF_CNTS_SECT_NAME_STR
44                     : INSTR_PROF_CNTS_SECT_NAME_STR;
45 }
46 
47 /// Return the name of data section containing names of instrumented
48 /// functions.
getInstrProfNameSectionName(bool AddSegment)49 inline StringRef getInstrProfNameSectionName(bool AddSegment) {
50   return AddSegment ? "__DATA," INSTR_PROF_NAME_SECT_NAME_STR
51                     : INSTR_PROF_NAME_SECT_NAME_STR;
52 }
53 
54 /// Return the name of the data section containing per-function control
55 /// data.
getInstrProfDataSectionName(bool AddSegment)56 inline StringRef getInstrProfDataSectionName(bool AddSegment) {
57   return AddSegment ? "__DATA," INSTR_PROF_DATA_SECT_NAME_STR
58                     : INSTR_PROF_DATA_SECT_NAME_STR;
59 }
60 
61 /// Return the name of data section containing pointers to value profile
62 /// counters/nodes.
getInstrProfValuesSectionName(bool AddSegment)63 inline StringRef getInstrProfValuesSectionName(bool AddSegment) {
64   return AddSegment ? "__DATA," INSTR_PROF_VALS_SECT_NAME_STR
65                     : INSTR_PROF_VALS_SECT_NAME_STR;
66 }
67 
68 /// Return the name of data section containing nodes holdling value
69 /// profiling data.
getInstrProfVNodesSectionName(bool AddSegment)70 inline StringRef getInstrProfVNodesSectionName(bool AddSegment) {
71   return AddSegment ? "__DATA," INSTR_PROF_VNODES_SECT_NAME_STR
72                     : INSTR_PROF_VNODES_SECT_NAME_STR;
73 }
74 
75 /// Return the name profile runtime entry point to do value profiling
76 /// for a given site.
getInstrProfValueProfFuncName()77 inline StringRef getInstrProfValueProfFuncName() {
78   return INSTR_PROF_VALUE_PROF_FUNC_STR;
79 }
80 
81 /// Return the name of the section containing function coverage mapping
82 /// data.
getInstrProfCoverageSectionName(bool AddSegment)83 inline StringRef getInstrProfCoverageSectionName(bool AddSegment) {
84   return AddSegment ? "__DATA," INSTR_PROF_COVMAP_SECT_NAME_STR
85                     : INSTR_PROF_COVMAP_SECT_NAME_STR;
86 }
87 
88 /// Return the name prefix of variables containing instrumented function names.
getInstrProfNameVarPrefix()89 inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; }
90 
91 /// Return the name prefix of variables containing per-function control data.
getInstrProfDataVarPrefix()92 inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; }
93 
94 /// Return the name prefix of profile counter variables.
getInstrProfCountersVarPrefix()95 inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; }
96 
97 /// Return the name prefix of value profile variables.
getInstrProfValuesVarPrefix()98 inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; }
99 
100 /// Return the name of value profile node array variables:
getInstrProfVNodesVarName()101 inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; }
102 
103 /// Return the name prefix of the COMDAT group for instrumentation variables
104 /// associated with a COMDAT function.
getInstrProfComdatPrefix()105 inline StringRef getInstrProfComdatPrefix() { return "__profv_"; }
106 
107 /// Return the name of the variable holding the strings (possibly compressed)
108 /// of all function's PGO names.
getInstrProfNamesVarName()109 inline StringRef getInstrProfNamesVarName() {
110   return "__llvm_prf_nm";
111 }
112 
113 /// Return the name of a covarage mapping variable (internal linkage)
114 /// for each instrumented source module. Such variables are allocated
115 /// in the __llvm_covmap section.
getCoverageMappingVarName()116 inline StringRef getCoverageMappingVarName() {
117   return "__llvm_coverage_mapping";
118 }
119 
120 /// Return the name of the internal variable recording the array
121 /// of PGO name vars referenced by the coverage mapping. The owning
122 /// functions of those names are not emitted by FE (e.g, unused inline
123 /// functions.)
getCoverageUnusedNamesVarName()124 inline StringRef getCoverageUnusedNamesVarName() {
125   return "__llvm_coverage_names";
126 }
127 
128 /// Return the name of function that registers all the per-function control
129 /// data at program startup time by calling __llvm_register_function. This
130 /// function has internal linkage and is called by  __llvm_profile_init
131 /// runtime method. This function is not generated for these platforms:
132 /// Darwin, Linux, and FreeBSD.
getInstrProfRegFuncsName()133 inline StringRef getInstrProfRegFuncsName() {
134   return "__llvm_profile_register_functions";
135 }
136 
137 /// Return the name of the runtime interface that registers per-function control
138 /// data for one instrumented function.
getInstrProfRegFuncName()139 inline StringRef getInstrProfRegFuncName() {
140   return "__llvm_profile_register_function";
141 }
142 
143 /// Return the name of the runtime interface that registers the PGO name strings.
getInstrProfNamesRegFuncName()144 inline StringRef getInstrProfNamesRegFuncName() {
145   return "__llvm_profile_register_names_function";
146 }
147 
148 /// Return the name of the runtime initialization method that is generated by
149 /// the compiler. The function calls __llvm_profile_register_functions and
150 /// __llvm_profile_override_default_filename functions if needed. This function
151 /// has internal linkage and invoked at startup time via init_array.
getInstrProfInitFuncName()152 inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; }
153 
154 /// Return the name of the hook variable defined in profile runtime library.
155 /// A reference to the variable causes the linker to link in the runtime
156 /// initialization module (which defines the hook variable).
getInstrProfRuntimeHookVarName()157 inline StringRef getInstrProfRuntimeHookVarName() {
158   return "__llvm_profile_runtime";
159 }
160 
161 /// Return the name of the compiler generated function that references the
162 /// runtime hook variable. The function is a weak global.
getInstrProfRuntimeHookVarUseFuncName()163 inline StringRef getInstrProfRuntimeHookVarUseFuncName() {
164   return "__llvm_profile_runtime_user";
165 }
166 
167 /// Return the name of the profile runtime interface that overrides the default
168 /// profile data file name.
getInstrProfFileOverriderFuncName()169 inline StringRef getInstrProfFileOverriderFuncName() {
170   return "__llvm_profile_override_default_filename";
171 }
172 
173 /// Return the marker used to separate PGO names during serialization.
getInstrProfNameSeparator()174 inline StringRef getInstrProfNameSeparator() { return "\01"; }
175 
176 /// Return the modified name for function \c F suitable to be
177 /// used the key for profile lookup. Variable \c InLTO indicates if this
178 /// is called in LTO optimization passes.
179 std::string getPGOFuncName(const Function &F, bool InLTO = false,
180                            uint64_t Version = INSTR_PROF_INDEX_VERSION);
181 
182 /// Return the modified name for a function suitable to be
183 /// used the key for profile lookup. The function's original
184 /// name is \c RawFuncName and has linkage of type \c Linkage.
185 /// The function is defined in module \c FileName.
186 std::string getPGOFuncName(StringRef RawFuncName,
187                            GlobalValue::LinkageTypes Linkage,
188                            StringRef FileName,
189                            uint64_t Version = INSTR_PROF_INDEX_VERSION);
190 
191 /// Return the name of the global variable used to store a function
192 /// name in PGO instrumentation. \c FuncName is the name of the function
193 /// returned by the \c getPGOFuncName call.
194 std::string getPGOFuncNameVarName(StringRef FuncName,
195                                   GlobalValue::LinkageTypes Linkage);
196 
197 /// Create and return the global variable for function name used in PGO
198 /// instrumentation. \c FuncName is the name of the function returned
199 /// by \c getPGOFuncName call.
200 GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName);
201 
202 /// Create and return the global variable for function name used in PGO
203 /// instrumentation.  /// \c FuncName is the name of the function
204 /// returned by \c getPGOFuncName call, \c M is the owning module,
205 /// and \c Linkage is the linkage of the instrumented function.
206 GlobalVariable *createPGOFuncNameVar(Module &M,
207                                      GlobalValue::LinkageTypes Linkage,
208                                      StringRef PGOFuncName);
209 /// Return the initializer in string of the PGO name var \c NameVar.
210 StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar);
211 
212 /// Given a PGO function name, remove the filename prefix and return
213 /// the original (static) function name.
214 StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName,
215                                    StringRef FileName = "<unknown>");
216 
217 /// Given a vector of strings (function PGO names) \c NameStrs, the
218 /// method generates a combined string \c Result thatis ready to be
219 /// serialized.  The \c Result string is comprised of three fields:
220 /// The first field is the legnth of the uncompressed strings, and the
221 /// the second field is the length of the zlib-compressed string.
222 /// Both fields are encoded in ULEB128.  If \c doCompress is false, the
223 ///  third field is the uncompressed strings; otherwise it is the
224 /// compressed string. When the string compression is off, the
225 /// second field will have value zero.
226 Error collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
227                                 bool doCompression, std::string &Result);
228 /// Produce \c Result string with the same format described above. The input
229 /// is vector of PGO function name variables that are referenced.
230 Error collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
231                                 std::string &Result, bool doCompression = true);
232 class InstrProfSymtab;
233 /// \c NameStrings is a string composed of one of more sub-strings encoded in
234 /// the format described above. The substrings are seperated by 0 or more zero
235 /// bytes. This method decodes the string and populates the \c Symtab.
236 Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
237 
238 enum InstrProfValueKind : uint32_t {
239 #define VALUE_PROF_KIND(Enumerator, Value) Enumerator = Value,
240 #include "llvm/ProfileData/InstrProfData.inc"
241 };
242 
243 struct InstrProfRecord;
244 
245 /// Get the value profile data for value site \p SiteIdx from \p InstrProfR
246 /// and annotate the instruction \p Inst with the value profile meta data.
247 /// Annotate up to \p MaxMDCount (default 3) number of records per value site.
248 void annotateValueSite(Module &M, Instruction &Inst,
249                        const InstrProfRecord &InstrProfR,
250                        InstrProfValueKind ValueKind, uint32_t SiteIndx,
251                        uint32_t MaxMDCount = 3);
252 /// Same as the above interface but using an ArrayRef, as well as \p Sum.
253 void annotateValueSite(Module &M, Instruction &Inst,
254                        ArrayRef<InstrProfValueData> VDs,
255                        uint64_t Sum, InstrProfValueKind ValueKind,
256                        uint32_t MaxMDCount);
257 
258 /// Extract the value profile data from \p Inst which is annotated with
259 /// value profile meta data. Return false if there is no value data annotated,
260 /// otherwise  return true.
261 bool getValueProfDataFromInst(const Instruction &Inst,
262                               InstrProfValueKind ValueKind,
263                               uint32_t MaxNumValueData,
264                               InstrProfValueData ValueData[],
265                               uint32_t &ActualNumValueData, uint64_t &TotalC);
266 
getPGOFuncNameMetadataName()267 inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; }
268 
269 /// Return the PGOFuncName meta data associated with a function.
270 MDNode *getPGOFuncNameMetadata(const Function &F);
271 
272 /// Create the PGOFuncName meta data if PGOFuncName is different from
273 /// function's raw name. This should only apply to internal linkage functions
274 /// declared by users only.
275 void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
276 
277 const std::error_category &instrprof_category();
278 
279 enum class instrprof_error {
280   success = 0,
281   eof,
282   unrecognized_format,
283   bad_magic,
284   bad_header,
285   unsupported_version,
286   unsupported_hash_type,
287   too_large,
288   truncated,
289   malformed,
290   unknown_function,
291   hash_mismatch,
292   count_mismatch,
293   counter_overflow,
294   value_site_count_mismatch,
295   compress_failed,
296   uncompress_failed
297 };
298 
make_error_code(instrprof_error E)299 inline std::error_code make_error_code(instrprof_error E) {
300   return std::error_code(static_cast<int>(E), instrprof_category());
301 }
302 
303 class InstrProfError : public ErrorInfo<InstrProfError> {
304 public:
InstrProfError(instrprof_error Err)305   InstrProfError(instrprof_error Err) : Err(Err) {
306     assert(Err != instrprof_error::success && "Not an error");
307   }
308 
309   std::string message() const override;
310 
log(raw_ostream & OS)311   void log(raw_ostream &OS) const override { OS << message(); }
312 
convertToErrorCode()313   std::error_code convertToErrorCode() const override {
314     return make_error_code(Err);
315   }
316 
get()317   instrprof_error get() const { return Err; }
318 
319   /// Consume an Error and return the raw enum value contained within it. The
320   /// Error must either be a success value, or contain a single InstrProfError.
take(Error E)321   static instrprof_error take(Error E) {
322     auto Err = instrprof_error::success;
323     handleAllErrors(std::move(E), [&Err](const InstrProfError &IPE) {
324       assert(Err == instrprof_error::success && "Multiple errors encountered");
325       Err = IPE.get();
326     });
327     return Err;
328   }
329 
330   static char ID;
331 
332 private:
333   instrprof_error Err;
334 };
335 
336 class SoftInstrProfErrors {
337   /// Count the number of soft instrprof_errors encountered and keep track of
338   /// the first such error for reporting purposes.
339 
340   /// The first soft error encountered.
341   instrprof_error FirstError;
342 
343   /// The number of hash mismatches.
344   unsigned NumHashMismatches;
345 
346   /// The number of count mismatches.
347   unsigned NumCountMismatches;
348 
349   /// The number of counter overflows.
350   unsigned NumCounterOverflows;
351 
352   /// The number of value site count mismatches.
353   unsigned NumValueSiteCountMismatches;
354 
355 public:
SoftInstrProfErrors()356   SoftInstrProfErrors()
357       : FirstError(instrprof_error::success), NumHashMismatches(0),
358         NumCountMismatches(0), NumCounterOverflows(0),
359         NumValueSiteCountMismatches(0) {}
360 
~SoftInstrProfErrors()361   ~SoftInstrProfErrors() {
362     assert(FirstError == instrprof_error::success &&
363            "Unchecked soft error encountered");
364   }
365 
366   /// Track a soft error (\p IE) and increment its associated counter.
367   void addError(instrprof_error IE);
368 
369   /// Get the number of hash mismatches.
getNumHashMismatches()370   unsigned getNumHashMismatches() const { return NumHashMismatches; }
371 
372   /// Get the number of count mismatches.
getNumCountMismatches()373   unsigned getNumCountMismatches() const { return NumCountMismatches; }
374 
375   /// Get the number of counter overflows.
getNumCounterOverflows()376   unsigned getNumCounterOverflows() const { return NumCounterOverflows; }
377 
378   /// Get the number of value site count mismatches.
getNumValueSiteCountMismatches()379   unsigned getNumValueSiteCountMismatches() const {
380     return NumValueSiteCountMismatches;
381   }
382 
383   /// Return the first encountered error and reset FirstError to a success
384   /// value.
takeError()385   Error takeError() {
386     if (FirstError == instrprof_error::success)
387       return Error::success();
388     auto E = make_error<InstrProfError>(FirstError);
389     FirstError = instrprof_error::success;
390     return E;
391   }
392 };
393 
394 namespace object {
395 class SectionRef;
396 }
397 
398 namespace IndexedInstrProf {
399 uint64_t ComputeHash(StringRef K);
400 }
401 
402 /// A symbol table used for function PGO name look-up with keys
403 /// (such as pointers, md5hash values) to the function. A function's
404 /// PGO name or name's md5hash are used in retrieving the profile
405 /// data of the function. See \c getPGOFuncName() method for details
406 /// on how PGO name is formed.
407 class InstrProfSymtab {
408 public:
409   typedef std::vector<std::pair<uint64_t, uint64_t>> AddrHashMap;
410 
411 private:
412   StringRef Data;
413   uint64_t Address;
414   // Unique name strings.
415   StringSet<> NameTab;
416   // A map from MD5 keys to function name strings.
417   std::vector<std::pair<uint64_t, StringRef>> MD5NameMap;
418   // A map from MD5 keys to function define. We only populate this map
419   // when build the Symtab from a Module.
420   std::vector<std::pair<uint64_t, Function *>> MD5FuncMap;
421   // A map from function runtime address to function name MD5 hash.
422   // This map is only populated and used by raw instr profile reader.
423   AddrHashMap AddrToMD5Map;
424 
425 public:
InstrProfSymtab()426   InstrProfSymtab()
427       : Data(), Address(0), NameTab(), MD5NameMap(), MD5FuncMap(),
428       AddrToMD5Map() {}
429 
430   /// Create InstrProfSymtab from an object file section which
431   /// contains function PGO names. When section may contain raw
432   /// string data or string data in compressed form. This method
433   /// only initialize the symtab with reference to the data and
434   /// the section base address. The decompression will be delayed
435   /// until before it is used. See also \c create(StringRef) method.
436   Error create(object::SectionRef &Section);
437   /// This interface is used by reader of CoverageMapping test
438   /// format.
439   inline Error create(StringRef D, uint64_t BaseAddr);
440   /// \c NameStrings is a string composed of one of more sub-strings
441   ///  encoded in the format described in \c collectPGOFuncNameStrings.
442   /// This method is a wrapper to \c readPGOFuncNameStrings method.
443   inline Error create(StringRef NameStrings);
444   /// A wrapper interface to populate the PGO symtab with functions
445   /// decls from module \c M. This interface is used by transformation
446   /// passes such as indirect function call promotion. Variable \c InLTO
447   /// indicates if this is called from LTO optimization passes.
448   void create(Module &M, bool InLTO = false);
449   /// Create InstrProfSymtab from a set of names iteratable from
450   /// \p IterRange. This interface is used by IndexedProfReader.
451   template <typename NameIterRange> void create(const NameIterRange &IterRange);
452   // If the symtab is created by a series of calls to \c addFuncName, \c
453   // finalizeSymtab needs to be called before looking up function names.
454   // This is required because the underlying map is a vector (for space
455   // efficiency) which needs to be sorted.
456   inline void finalizeSymtab();
457   /// Update the symtab by adding \p FuncName to the table. This interface
458   /// is used by the raw and text profile readers.
addFuncName(StringRef FuncName)459   void addFuncName(StringRef FuncName) {
460     auto Ins = NameTab.insert(FuncName);
461     if (Ins.second)
462       MD5NameMap.push_back(std::make_pair(
463           IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey()));
464   }
465   /// Map a function address to its name's MD5 hash. This interface
466   /// is only used by the raw profiler reader.
mapAddress(uint64_t Addr,uint64_t MD5Val)467   void mapAddress(uint64_t Addr, uint64_t MD5Val) {
468     AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
469   }
getAddrHashMap()470   AddrHashMap &getAddrHashMap() { return AddrToMD5Map; }
471   /// Return function's PGO name from the function name's symbol
472   /// address in the object file. If an error occurs, return
473   /// an empty string.
474   StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
475   /// Return function's PGO name from the name's md5 hash value.
476   /// If not found, return an empty string.
477   inline StringRef getFuncName(uint64_t FuncMD5Hash);
478   /// Return function from the name's md5 hash. Return nullptr if not found.
479   inline Function *getFunction(uint64_t FuncMD5Hash);
480   /// Return the function's original assembly name by stripping off
481   /// the prefix attached (to symbols with priviate linkage). For
482   /// global functions, it returns the same string as getFuncName.
483   inline StringRef getOrigFuncName(uint64_t FuncMD5Hash);
484   /// Return the name section data.
getNameData()485   inline StringRef getNameData() const { return Data; }
486 };
487 
create(StringRef D,uint64_t BaseAddr)488 Error InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
489   Data = D;
490   Address = BaseAddr;
491   return Error::success();
492 }
493 
create(StringRef NameStrings)494 Error InstrProfSymtab::create(StringRef NameStrings) {
495   return readPGOFuncNameStrings(NameStrings, *this);
496 }
497 
498 template <typename NameIterRange>
create(const NameIterRange & IterRange)499 void InstrProfSymtab::create(const NameIterRange &IterRange) {
500   for (auto Name : IterRange)
501     addFuncName(Name);
502 
503   finalizeSymtab();
504 }
505 
finalizeSymtab()506 void InstrProfSymtab::finalizeSymtab() {
507   std::sort(MD5NameMap.begin(), MD5NameMap.end(), less_first());
508   std::sort(MD5FuncMap.begin(), MD5FuncMap.end(), less_first());
509   std::sort(AddrToMD5Map.begin(), AddrToMD5Map.end(), less_first());
510   AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
511                      AddrToMD5Map.end());
512 }
513 
getFuncName(uint64_t FuncMD5Hash)514 StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) {
515   auto Result =
516       std::lower_bound(MD5NameMap.begin(), MD5NameMap.end(), FuncMD5Hash,
517                        [](const std::pair<uint64_t, std::string> &LHS,
518                           uint64_t RHS) { return LHS.first < RHS; });
519   if (Result != MD5NameMap.end() && Result->first == FuncMD5Hash)
520     return Result->second;
521   return StringRef();
522 }
523 
getFunction(uint64_t FuncMD5Hash)524 Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) {
525   auto Result =
526       std::lower_bound(MD5FuncMap.begin(), MD5FuncMap.end(), FuncMD5Hash,
527                        [](const std::pair<uint64_t, Function*> &LHS,
528                           uint64_t RHS) { return LHS.first < RHS; });
529   if (Result != MD5FuncMap.end() && Result->first == FuncMD5Hash)
530     return Result->second;
531   return nullptr;
532 }
533 
534 // See also getPGOFuncName implementation. These two need to be
535 // matched.
getOrigFuncName(uint64_t FuncMD5Hash)536 StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) {
537   StringRef PGOName = getFuncName(FuncMD5Hash);
538   size_t S = PGOName.find_first_of(':');
539   if (S == StringRef::npos)
540     return PGOName;
541   return PGOName.drop_front(S + 1);
542 }
543 
544 struct InstrProfValueSiteRecord {
545   /// Value profiling data pairs at a given value site.
546   std::list<InstrProfValueData> ValueData;
547 
InstrProfValueSiteRecordInstrProfValueSiteRecord548   InstrProfValueSiteRecord() { ValueData.clear(); }
549   template <class InputIterator>
InstrProfValueSiteRecordInstrProfValueSiteRecord550   InstrProfValueSiteRecord(InputIterator F, InputIterator L)
551       : ValueData(F, L) {}
552 
553   /// Sort ValueData ascending by Value
sortByTargetValuesInstrProfValueSiteRecord554   void sortByTargetValues() {
555     ValueData.sort(
556         [](const InstrProfValueData &left, const InstrProfValueData &right) {
557           return left.Value < right.Value;
558         });
559   }
560   /// Sort ValueData Descending by Count
561   inline void sortByCount();
562 
563   /// Merge data from another InstrProfValueSiteRecord
564   /// Optionally scale merged counts by \p Weight.
565   void merge(SoftInstrProfErrors &SIPE, InstrProfValueSiteRecord &Input,
566              uint64_t Weight = 1);
567   /// Scale up value profile data counts.
568   void scale(SoftInstrProfErrors &SIPE, uint64_t Weight);
569 };
570 
571 /// Profiling information for a single function.
572 struct InstrProfRecord {
InstrProfRecordInstrProfRecord573   InstrProfRecord() : SIPE() {}
InstrProfRecordInstrProfRecord574   InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t> Counts)
575       : Name(Name), Hash(Hash), Counts(std::move(Counts)), SIPE() {}
576   StringRef Name;
577   uint64_t Hash;
578   std::vector<uint64_t> Counts;
579   SoftInstrProfErrors SIPE;
580 
581   typedef std::vector<std::pair<uint64_t, uint64_t>> ValueMapType;
582 
583   /// Return the number of value profile kinds with non-zero number
584   /// of profile sites.
585   inline uint32_t getNumValueKinds() const;
586   /// Return the number of instrumented sites for ValueKind.
587   inline uint32_t getNumValueSites(uint32_t ValueKind) const;
588   /// Return the total number of ValueData for ValueKind.
589   inline uint32_t getNumValueData(uint32_t ValueKind) const;
590   /// Return the number of value data collected for ValueKind at profiling
591   /// site: Site.
592   inline uint32_t getNumValueDataForSite(uint32_t ValueKind,
593                                          uint32_t Site) const;
594   /// Return the array of profiled values at \p Site. If \p TotalC
595   /// is not null, the total count of all target values at this site
596   /// will be stored in \c *TotalC.
597   inline std::unique_ptr<InstrProfValueData[]>
598   getValueForSite(uint32_t ValueKind, uint32_t Site,
599                   uint64_t *TotalC = 0) const;
600   /// Get the target value/counts of kind \p ValueKind collected at site
601   /// \p Site and store the result in array \p Dest. Return the total
602   /// counts of all target values at this site.
603   inline uint64_t getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind,
604                                   uint32_t Site) const;
605   /// Reserve space for NumValueSites sites.
606   inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites);
607   /// Add ValueData for ValueKind at value Site.
608   void addValueData(uint32_t ValueKind, uint32_t Site,
609                     InstrProfValueData *VData, uint32_t N,
610                     ValueMapType *ValueMap);
611 
612   /// Merge the counts in \p Other into this one.
613   /// Optionally scale merged counts by \p Weight.
614   void merge(InstrProfRecord &Other, uint64_t Weight = 1);
615 
616   /// Scale up profile counts (including value profile data) by
617   /// \p Weight.
618   void scale(uint64_t Weight);
619 
620   /// Sort value profile data (per site) by count.
sortValueDataInstrProfRecord621   void sortValueData() {
622     for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
623       std::vector<InstrProfValueSiteRecord> &SiteRecords =
624           getValueSitesForKind(Kind);
625       for (auto &SR : SiteRecords)
626         SR.sortByCount();
627     }
628   }
629   /// Clear value data entries
clearValueDataInstrProfRecord630   void clearValueData() {
631     for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
632       getValueSitesForKind(Kind).clear();
633   }
634 
635   /// Get the error contained within the record's soft error counter.
takeErrorInstrProfRecord636   Error takeError() { return SIPE.takeError(); }
637 
638 private:
639   std::vector<InstrProfValueSiteRecord> IndirectCallSites;
640   const std::vector<InstrProfValueSiteRecord> &
getValueSitesForKindInstrProfRecord641   getValueSitesForKind(uint32_t ValueKind) const {
642     switch (ValueKind) {
643     case IPVK_IndirectCallTarget:
644       return IndirectCallSites;
645     default:
646       llvm_unreachable("Unknown value kind!");
647     }
648     return IndirectCallSites;
649   }
650 
651   std::vector<InstrProfValueSiteRecord> &
getValueSitesForKindInstrProfRecord652   getValueSitesForKind(uint32_t ValueKind) {
653     return const_cast<std::vector<InstrProfValueSiteRecord> &>(
654         const_cast<const InstrProfRecord *>(this)
655             ->getValueSitesForKind(ValueKind));
656   }
657 
658   // Map indirect call target name hash to name string.
659   uint64_t remapValue(uint64_t Value, uint32_t ValueKind,
660                       ValueMapType *HashKeys);
661 
662   // Merge Value Profile data from Src record to this record for ValueKind.
663   // Scale merged value counts by \p Weight.
664   void mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src,
665                           uint64_t Weight);
666   // Scale up value profile data count.
667   void scaleValueProfData(uint32_t ValueKind, uint64_t Weight);
668 };
669 
getNumValueKinds()670 uint32_t InstrProfRecord::getNumValueKinds() const {
671   uint32_t NumValueKinds = 0;
672   for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
673     NumValueKinds += !(getValueSitesForKind(Kind).empty());
674   return NumValueKinds;
675 }
676 
getNumValueData(uint32_t ValueKind)677 uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const {
678   uint32_t N = 0;
679   const std::vector<InstrProfValueSiteRecord> &SiteRecords =
680       getValueSitesForKind(ValueKind);
681   for (auto &SR : SiteRecords) {
682     N += SR.ValueData.size();
683   }
684   return N;
685 }
686 
getNumValueSites(uint32_t ValueKind)687 uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const {
688   return getValueSitesForKind(ValueKind).size();
689 }
690 
getNumValueDataForSite(uint32_t ValueKind,uint32_t Site)691 uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind,
692                                                  uint32_t Site) const {
693   return getValueSitesForKind(ValueKind)[Site].ValueData.size();
694 }
695 
696 std::unique_ptr<InstrProfValueData[]>
getValueForSite(uint32_t ValueKind,uint32_t Site,uint64_t * TotalC)697 InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site,
698                                  uint64_t *TotalC) const {
699   uint64_t Dummy;
700   uint64_t &TotalCount = (TotalC == 0 ? Dummy : *TotalC);
701   uint32_t N = getNumValueDataForSite(ValueKind, Site);
702   if (N == 0) {
703     TotalCount = 0;
704     return std::unique_ptr<InstrProfValueData[]>(nullptr);
705   }
706 
707   auto VD = llvm::make_unique<InstrProfValueData[]>(N);
708   TotalCount = getValueForSite(VD.get(), ValueKind, Site);
709 
710   return VD;
711 }
712 
getValueForSite(InstrProfValueData Dest[],uint32_t ValueKind,uint32_t Site)713 uint64_t InstrProfRecord::getValueForSite(InstrProfValueData Dest[],
714                                           uint32_t ValueKind,
715                                           uint32_t Site) const {
716   uint32_t I = 0;
717   uint64_t TotalCount = 0;
718   for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) {
719     Dest[I].Value = V.Value;
720     Dest[I].Count = V.Count;
721     TotalCount = SaturatingAdd(TotalCount, V.Count);
722     I++;
723   }
724   return TotalCount;
725 }
726 
reserveSites(uint32_t ValueKind,uint32_t NumValueSites)727 void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) {
728   std::vector<InstrProfValueSiteRecord> &ValueSites =
729       getValueSitesForKind(ValueKind);
730   ValueSites.reserve(NumValueSites);
731 }
732 
getHostEndianness()733 inline support::endianness getHostEndianness() {
734   return sys::IsLittleEndianHost ? support::little : support::big;
735 }
736 
737 // Include definitions for value profile data
738 #define INSTR_PROF_VALUE_PROF_DATA
739 #include "llvm/ProfileData/InstrProfData.inc"
740 
sortByCount()741 void InstrProfValueSiteRecord::sortByCount() {
742   ValueData.sort(
743       [](const InstrProfValueData &left, const InstrProfValueData &right) {
744         return left.Count > right.Count;
745       });
746   // Now truncate
747   size_t max_s = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
748   if (ValueData.size() > max_s)
749     ValueData.resize(max_s);
750 }
751 
752 namespace IndexedInstrProf {
753 
754 enum class HashT : uint32_t {
755   MD5,
756 
757   Last = MD5
758 };
759 
ComputeHash(HashT Type,StringRef K)760 inline uint64_t ComputeHash(HashT Type, StringRef K) {
761   switch (Type) {
762   case HashT::MD5:
763     return MD5Hash(K);
764   }
765   llvm_unreachable("Unhandled hash type");
766 }
767 
768 const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
769 
770 enum ProfVersion {
771   // Version 1 is the first version. In this version, the value of
772   // a key/value pair can only include profile data of a single function.
773   // Due to this restriction, the number of block counters for a given
774   // function is not recorded but derived from the length of the value.
775   Version1 = 1,
776   // The version 2 format supports recording profile data of multiple
777   // functions which share the same key in one value field. To support this,
778   // the number block counters is recorded as an uint64_t field right after the
779   // function structural hash.
780   Version2 = 2,
781   // Version 3 supports value profile data. The value profile data is expected
782   // to follow the block counter profile data.
783   Version3 = 3,
784   // In this version, profile summary data \c IndexedInstrProf::Summary is
785   // stored after the profile header.
786   Version4 = 4,
787   // The current version is 4.
788   CurrentVersion = INSTR_PROF_INDEX_VERSION
789 };
790 const uint64_t Version = ProfVersion::CurrentVersion;
791 
792 const HashT HashType = HashT::MD5;
793 
ComputeHash(StringRef K)794 inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
795 
796 // This structure defines the file header of the LLVM profile
797 // data file in indexed-format.
798 struct Header {
799   uint64_t Magic;
800   uint64_t Version;
801   uint64_t Unused; // Becomes unused since version 4
802   uint64_t HashType;
803   uint64_t HashOffset;
804 };
805 
806 // Profile summary data recorded in the profile data file in indexed
807 // format. It is introduced in version 4. The summary data follows
808 // right after the profile file header.
809 struct Summary {
810 
811   struct Entry {
812     uint64_t Cutoff; ///< The required percentile of total execution count.
813     uint64_t
814         MinBlockCount;  ///< The minimum execution count for this percentile.
815     uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count.
816   };
817   // The field kind enumerator to assigned value mapping should remain
818   // unchanged  when a new kind is added or an old kind gets deleted in
819   // the future.
820   enum SummaryFieldKind {
821     /// The total number of functions instrumented.
822     TotalNumFunctions = 0,
823     /// Total number of instrumented blocks/edges.
824     TotalNumBlocks = 1,
825     /// The maximal execution count among all functions.
826     /// This field does not exist for profile data from IR based
827     /// instrumentation.
828     MaxFunctionCount = 2,
829     /// Max block count of the program.
830     MaxBlockCount = 3,
831     /// Max internal block count of the program (excluding entry blocks).
832     MaxInternalBlockCount = 4,
833     /// The sum of all instrumented block counts.
834     TotalBlockCount = 5,
835     NumKinds = TotalBlockCount + 1
836   };
837 
838   // The number of summmary fields following the summary header.
839   uint64_t NumSummaryFields;
840   // The number of Cutoff Entries (Summary::Entry) following summary fields.
841   uint64_t NumCutoffEntries;
842 
getSizeSummary843   static uint32_t getSize(uint32_t NumSumFields, uint32_t NumCutoffEntries) {
844     return sizeof(Summary) + NumCutoffEntries * sizeof(Entry) +
845            NumSumFields * sizeof(uint64_t);
846   }
847 
getSummaryDataBaseSummary848   const uint64_t *getSummaryDataBase() const {
849     return reinterpret_cast<const uint64_t *>(this + 1);
850   }
getSummaryDataBaseSummary851   uint64_t *getSummaryDataBase() {
852     return reinterpret_cast<uint64_t *>(this + 1);
853   }
getCutoffEntryBaseSummary854   const Entry *getCutoffEntryBase() const {
855     return reinterpret_cast<const Entry *>(
856         &getSummaryDataBase()[NumSummaryFields]);
857   }
getCutoffEntryBaseSummary858   Entry *getCutoffEntryBase() {
859     return reinterpret_cast<Entry *>(&getSummaryDataBase()[NumSummaryFields]);
860   }
861 
getSummary862   uint64_t get(SummaryFieldKind K) const {
863     return getSummaryDataBase()[K];
864   }
865 
setSummary866   void set(SummaryFieldKind K, uint64_t V) {
867     getSummaryDataBase()[K] = V;
868   }
869 
getEntrySummary870   const Entry &getEntry(uint32_t I) const { return getCutoffEntryBase()[I]; }
setEntrySummary871   void setEntry(uint32_t I, const ProfileSummaryEntry &E) {
872     Entry &ER = getCutoffEntryBase()[I];
873     ER.Cutoff = E.Cutoff;
874     ER.MinBlockCount = E.MinCount;
875     ER.NumBlocks = E.NumCounts;
876   }
877 
SummarySummary878   Summary(uint32_t Size) { memset(this, 0, Size); }
deleteSummary879   void operator delete(void *ptr) { ::operator delete(ptr); }
880 
881   Summary() = delete;
882 };
883 
allocSummary(uint32_t TotalSize)884 inline std::unique_ptr<Summary> allocSummary(uint32_t TotalSize) {
885   return std::unique_ptr<Summary>(new (::operator new(TotalSize))
886                                       Summary(TotalSize));
887 }
888 } // end namespace IndexedInstrProf
889 
890 namespace RawInstrProf {
891 
892 // Version 1: First version
893 // Version 2: Added value profile data section. Per-function control data
894 // struct has more fields to describe value profile information.
895 // Version 3: Compressed name section support. Function PGO name reference
896 // from control data struct is changed from raw pointer to Name's MD5 value.
897 // Version 4: ValueDataBegin and ValueDataSizes fields are removed from the
898 // raw header.
899 const uint64_t Version = INSTR_PROF_RAW_VERSION;
900 
901 template <class IntPtrT> inline uint64_t getMagic();
902 template <> inline uint64_t getMagic<uint64_t>() {
903   return INSTR_PROF_RAW_MAGIC_64;
904 }
905 
906 template <> inline uint64_t getMagic<uint32_t>() {
907   return INSTR_PROF_RAW_MAGIC_32;
908 }
909 
910 // Per-function profile data header/control structure.
911 // The definition should match the structure defined in
912 // compiler-rt/lib/profile/InstrProfiling.h.
913 // It should also match the synthesized type in
914 // Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters.
915 template <class IntPtrT> struct LLVM_ALIGNAS(8) ProfileData {
916   #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name;
917   #include "llvm/ProfileData/InstrProfData.inc"
918 };
919 
920 // File header structure of the LLVM profile data in raw format.
921 // The definition should match the header referenced in
922 // compiler-rt/lib/profile/InstrProfilingFile.c  and
923 // InstrProfilingBuffer.c.
924 struct Header {
925 #define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name;
926 #include "llvm/ProfileData/InstrProfData.inc"
927 };
928 
929 } // end namespace RawInstrProf
930 
931 } // end namespace llvm
932 
933 #endif // LLVM_PROFILEDATA_INSTRPROF_H
934