1 //=-- InstrProfReader.h - Instrumented profiling readers ----------*- C++ -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains support for reading profiling data for instrumentation
11 // based PGO and coverage.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
16 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/Support/EndianStream.h"
22 #include "llvm/Support/LineIterator.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/OnDiskHashTable.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <iterator>
27 
28 namespace llvm {
29 
30 class InstrProfReader;
31 
32 /// A file format agnostic iterator over profiling data.
33 class InstrProfIterator : public std::iterator<std::input_iterator_tag,
34                                                InstrProfRecord> {
35   InstrProfReader *Reader;
36   InstrProfRecord Record;
37 
38   void Increment();
39 public:
InstrProfIterator()40   InstrProfIterator() : Reader(nullptr) {}
InstrProfIterator(InstrProfReader * Reader)41   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
42 
43   InstrProfIterator &operator++() { Increment(); return *this; }
44   bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
45   bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
46   InstrProfRecord &operator*() { return Record; }
47   InstrProfRecord *operator->() { return &Record; }
48 };
49 
50 /// Base class and interface for reading profiling data of any known instrprof
51 /// format. Provides an iterator over InstrProfRecords.
52 class InstrProfReader {
53   instrprof_error LastError;
54 
55 public:
InstrProfReader()56   InstrProfReader() : LastError(instrprof_error::success), Symtab() {}
~InstrProfReader()57   virtual ~InstrProfReader() {}
58 
59   /// Read the header.  Required before reading first record.
60   virtual Error readHeader() = 0;
61   /// Read a single record.
62   virtual Error readNextRecord(InstrProfRecord &Record) = 0;
63   /// Iterator over profile data.
begin()64   InstrProfIterator begin() { return InstrProfIterator(this); }
end()65   InstrProfIterator end() { return InstrProfIterator(); }
66   virtual bool isIRLevelProfile() const = 0;
67 
68   /// Return the PGO symtab. There are three different readers:
69   /// Raw, Text, and Indexed profile readers. The first two types
70   /// of readers are used only by llvm-profdata tool, while the indexed
71   /// profile reader is also used by llvm-cov tool and the compiler (
72   /// backend or frontend). Since creating PGO symtab can create
73   /// significant runtime and memory overhead (as it touches data
74   /// for the whole program), InstrProfSymtab for the indexed profile
75   /// reader should be created on demand and it is recommended to be
76   /// only used for dumping purpose with llvm-proftool, not with the
77   /// compiler.
78   virtual InstrProfSymtab &getSymtab() = 0;
79 
80 protected:
81   std::unique_ptr<InstrProfSymtab> Symtab;
82   /// Set the current error and return same.
error(instrprof_error Err)83   Error error(instrprof_error Err) {
84     LastError = Err;
85     if (Err == instrprof_error::success)
86       return Error::success();
87     return make_error<InstrProfError>(Err);
88   }
error(Error E)89   Error error(Error E) { return error(InstrProfError::take(std::move(E))); }
90 
91   /// Clear the current error and return a successful one.
success()92   Error success() { return error(instrprof_error::success); }
93 
94 public:
95   /// Return true if the reader has finished reading the profile data.
isEOF()96   bool isEOF() { return LastError == instrprof_error::eof; }
97   /// Return true if the reader encountered an error reading profiling data.
hasError()98   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
99   /// Get the current error.
getError()100   Error getError() {
101     if (hasError())
102       return make_error<InstrProfError>(LastError);
103     return Error::success();
104   }
105 
106   /// Factory method to create an appropriately typed reader for the given
107   /// instrprof file.
108   static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
109 
110   static Expected<std::unique_ptr<InstrProfReader>>
111   create(std::unique_ptr<MemoryBuffer> Buffer);
112 };
113 
114 /// Reader for the simple text based instrprof format.
115 ///
116 /// This format is a simple text format that's suitable for test data. Records
117 /// are separated by one or more blank lines, and record fields are separated by
118 /// new lines.
119 ///
120 /// Each record consists of a function name, a function hash, a number of
121 /// counters, and then each counter value, in that order.
122 class TextInstrProfReader : public InstrProfReader {
123 private:
124   /// The profile data file contents.
125   std::unique_ptr<MemoryBuffer> DataBuffer;
126   /// Iterator over the profile data.
127   line_iterator Line;
128   bool IsIRLevelProfile;
129 
130   TextInstrProfReader(const TextInstrProfReader &) = delete;
131   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
132   Error readValueProfileData(InstrProfRecord &Record);
133 
134 public:
TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)135   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
136       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#'),
137         IsIRLevelProfile(false) {}
138 
139   /// Return true if the given buffer is in text instrprof format.
140   static bool hasFormat(const MemoryBuffer &Buffer);
141 
isIRLevelProfile()142   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
143 
144   /// Read the header.
145   Error readHeader() override;
146   /// Read a single record.
147   Error readNextRecord(InstrProfRecord &Record) override;
148 
getSymtab()149   InstrProfSymtab &getSymtab() override {
150     assert(Symtab.get());
151     return *Symtab.get();
152   }
153 };
154 
155 /// Reader for the raw instrprof binary format from runtime.
156 ///
157 /// This format is a raw memory dump of the instrumentation-baed profiling data
158 /// from the runtime.  It has no index.
159 ///
160 /// Templated on the unsigned type whose size matches pointers on the platform
161 /// that wrote the profile.
162 template <class IntPtrT>
163 class RawInstrProfReader : public InstrProfReader {
164 private:
165   /// The profile data file contents.
166   std::unique_ptr<MemoryBuffer> DataBuffer;
167   bool ShouldSwapBytes;
168   // The value of the version field of the raw profile data header. The lower 56
169   // bits specifies the format version and the most significant 8 bits specify
170   // the variant types of the profile.
171   uint64_t Version;
172   uint64_t CountersDelta;
173   uint64_t NamesDelta;
174   const RawInstrProf::ProfileData<IntPtrT> *Data;
175   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
176   const uint64_t *CountersStart;
177   const char *NamesStart;
178   uint64_t NamesSize;
179   // After value profile is all read, this pointer points to
180   // the header of next profile data (if exists)
181   const uint8_t *ValueDataStart;
182   uint32_t ValueKindLast;
183   uint32_t CurValueDataSize;
184 
185   InstrProfRecord::ValueMapType FunctionPtrToNameMap;
186 
187   RawInstrProfReader(const RawInstrProfReader &) = delete;
188   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
189 public:
RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)190   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
191       : DataBuffer(std::move(DataBuffer)) { }
192 
193   static bool hasFormat(const MemoryBuffer &DataBuffer);
194   Error readHeader() override;
195   Error readNextRecord(InstrProfRecord &Record) override;
isIRLevelProfile()196   bool isIRLevelProfile() const override {
197     return (Version & VARIANT_MASK_IR_PROF) != 0;
198   }
199 
getSymtab()200   InstrProfSymtab &getSymtab() override {
201     assert(Symtab.get());
202     return *Symtab.get();
203   }
204 
205 private:
206   Error createSymtab(InstrProfSymtab &Symtab);
207   Error readNextHeader(const char *CurrentPos);
208   Error readHeader(const RawInstrProf::Header &Header);
swap(IntT Int)209   template <class IntT> IntT swap(IntT Int) const {
210     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
211   }
getDataEndianness()212   support::endianness getDataEndianness() const {
213     support::endianness HostEndian = getHostEndianness();
214     if (!ShouldSwapBytes)
215       return HostEndian;
216     if (HostEndian == support::little)
217       return support::big;
218     else
219       return support::little;
220   }
221 
getNumPaddingBytes(uint64_t SizeInBytes)222   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
223     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
224   }
225   Error readName(InstrProfRecord &Record);
226   Error readFuncHash(InstrProfRecord &Record);
227   Error readRawCounts(InstrProfRecord &Record);
228   Error readValueProfilingData(InstrProfRecord &Record);
atEnd()229   bool atEnd() const { return Data == DataEnd; }
advanceData()230   void advanceData() {
231     Data++;
232     ValueDataStart += CurValueDataSize;
233   }
getNextHeaderPos()234   const char *getNextHeaderPos() const {
235       assert(atEnd());
236       return (const char *)ValueDataStart;
237   }
238 
getCounter(IntPtrT CounterPtr)239   const uint64_t *getCounter(IntPtrT CounterPtr) const {
240     ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
241     return CountersStart + Offset;
242   }
getName(uint64_t NameRef)243   StringRef getName(uint64_t NameRef) const {
244     return Symtab->getFuncName(swap(NameRef));
245   }
246 };
247 
248 typedef RawInstrProfReader<uint32_t> RawInstrProfReader32;
249 typedef RawInstrProfReader<uint64_t> RawInstrProfReader64;
250 
251 namespace IndexedInstrProf {
252 enum class HashT : uint32_t;
253 }
254 
255 /// Trait for lookups into the on-disk hash table for the binary instrprof
256 /// format.
257 class InstrProfLookupTrait {
258   std::vector<InstrProfRecord> DataBuffer;
259   IndexedInstrProf::HashT HashType;
260   unsigned FormatVersion;
261   // Endianness of the input value profile data.
262   // It should be LE by default, but can be changed
263   // for testing purpose.
264   support::endianness ValueProfDataEndianness;
265 
266 public:
InstrProfLookupTrait(IndexedInstrProf::HashT HashType,unsigned FormatVersion)267   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
268       : HashType(HashType), FormatVersion(FormatVersion),
269         ValueProfDataEndianness(support::little) {}
270 
271   typedef ArrayRef<InstrProfRecord> data_type;
272 
273   typedef StringRef internal_key_type;
274   typedef StringRef external_key_type;
275   typedef uint64_t hash_value_type;
276   typedef uint64_t offset_type;
277 
EqualKey(StringRef A,StringRef B)278   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
GetInternalKey(StringRef K)279   static StringRef GetInternalKey(StringRef K) { return K; }
GetExternalKey(StringRef K)280   static StringRef GetExternalKey(StringRef K) { return K; }
281 
282   hash_value_type ComputeHash(StringRef K);
283 
284   static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char * & D)285   ReadKeyDataLength(const unsigned char *&D) {
286     using namespace support;
287     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
288     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
289     return std::make_pair(KeyLen, DataLen);
290   }
291 
ReadKey(const unsigned char * D,offset_type N)292   StringRef ReadKey(const unsigned char *D, offset_type N) {
293     return StringRef((const char *)D, N);
294   }
295 
296   bool readValueProfilingData(const unsigned char *&D,
297                               const unsigned char *const End);
298   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
299 
300   // Used for testing purpose only.
setValueProfDataEndianness(support::endianness Endianness)301   void setValueProfDataEndianness(support::endianness Endianness) {
302     ValueProfDataEndianness = Endianness;
303   }
304 };
305 
306 struct InstrProfReaderIndexBase {
307   // Read all the profile records with the same key pointed to the current
308   // iterator.
309   virtual Error getRecords(ArrayRef<InstrProfRecord> &Data) = 0;
310   // Read all the profile records with the key equal to FuncName
311   virtual Error getRecords(StringRef FuncName,
312                                      ArrayRef<InstrProfRecord> &Data) = 0;
313   virtual void advanceToNextKey() = 0;
314   virtual bool atEnd() const = 0;
315   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
~InstrProfReaderIndexBaseInstrProfReaderIndexBase316   virtual ~InstrProfReaderIndexBase() {}
317   virtual uint64_t getVersion() const = 0;
318   virtual bool isIRLevelProfile() const = 0;
319   virtual void populateSymtab(InstrProfSymtab &) = 0;
320 };
321 
322 typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
323     OnDiskHashTableImplV3;
324 
325 template <typename HashTableImpl>
326 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
327 
328 private:
329   std::unique_ptr<HashTableImpl> HashTable;
330   typename HashTableImpl::data_iterator RecordIterator;
331   uint64_t FormatVersion;
332 
333 public:
334   InstrProfReaderIndex(const unsigned char *Buckets,
335                        const unsigned char *const Payload,
336                        const unsigned char *const Base,
337                        IndexedInstrProf::HashT HashType, uint64_t Version);
338 
339   Error getRecords(ArrayRef<InstrProfRecord> &Data) override;
340   Error getRecords(StringRef FuncName,
341                    ArrayRef<InstrProfRecord> &Data) override;
advanceToNextKey()342   void advanceToNextKey() override { RecordIterator++; }
atEnd()343   bool atEnd() const override {
344     return RecordIterator == HashTable->data_end();
345   }
setValueProfDataEndianness(support::endianness Endianness)346   void setValueProfDataEndianness(support::endianness Endianness) override {
347     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
348   }
~InstrProfReaderIndex()349   ~InstrProfReaderIndex() override {}
getVersion()350   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
isIRLevelProfile()351   bool isIRLevelProfile() const override {
352     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
353   }
populateSymtab(InstrProfSymtab & Symtab)354   void populateSymtab(InstrProfSymtab &Symtab) override {
355     Symtab.create(HashTable->keys());
356   }
357 };
358 
359 /// Reader for the indexed binary instrprof format.
360 class IndexedInstrProfReader : public InstrProfReader {
361 private:
362   /// The profile data file contents.
363   std::unique_ptr<MemoryBuffer> DataBuffer;
364   /// The index into the profile data.
365   std::unique_ptr<InstrProfReaderIndexBase> Index;
366   /// Profile summary data.
367   std::unique_ptr<ProfileSummary> Summary;
368 
369   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
370   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
371 
372   // Read the profile summary. Return a pointer pointing to one byte past the
373   // end of the summary data if it exists or the input \c Cur.
374   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
375                                    const unsigned char *Cur);
376 
377 public:
378   /// Return the profile version.
getVersion()379   uint64_t getVersion() const { return Index->getVersion(); }
isIRLevelProfile()380   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)381   IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
382       : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
383 
384   /// Return true if the given buffer is in an indexed instrprof format.
385   static bool hasFormat(const MemoryBuffer &DataBuffer);
386 
387   /// Read the file header.
388   Error readHeader() override;
389   /// Read a single record.
390   Error readNextRecord(InstrProfRecord &Record) override;
391 
392   /// Return the pointer to InstrProfRecord associated with FuncName
393   /// and FuncHash
394   Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
395                                                uint64_t FuncHash);
396 
397   /// Fill Counts with the profile data for the given function name.
398   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
399                           std::vector<uint64_t> &Counts);
400 
401   /// Return the maximum of all known function counts.
getMaximumFunctionCount()402   uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); }
403 
404   /// Factory method to create an indexed reader.
405   static Expected<std::unique_ptr<IndexedInstrProfReader>>
406   create(const Twine &Path);
407 
408   static Expected<std::unique_ptr<IndexedInstrProfReader>>
409   create(std::unique_ptr<MemoryBuffer> Buffer);
410 
411   // Used for testing purpose only.
setValueProfDataEndianness(support::endianness Endianness)412   void setValueProfDataEndianness(support::endianness Endianness) {
413     Index->setValueProfDataEndianness(Endianness);
414   }
415 
416   // See description in the base class. This interface is designed
417   // to be used by llvm-profdata (for dumping). Avoid using this when
418   // the client is the compiler.
419   InstrProfSymtab &getSymtab() override;
getSummary()420   ProfileSummary &getSummary() { return *(Summary.get()); }
421 };
422 
423 } // end namespace llvm
424 
425 #endif
426