1 //=-- InstrProfReader.h - Instrumented profiling readers ----------*- C++ -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains support for reading profiling data for instrumentation
11 // based PGO and coverage.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
16 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/Support/EndianStream.h"
22 #include "llvm/Support/ErrorOr.h"
23 #include "llvm/Support/LineIterator.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/OnDiskHashTable.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <iterator>
28 
29 namespace llvm {
30 
31 class InstrProfReader;
32 
33 /// A file format agnostic iterator over profiling data.
34 class InstrProfIterator : public std::iterator<std::input_iterator_tag,
35                                                InstrProfRecord> {
36   InstrProfReader *Reader;
37   InstrProfRecord Record;
38 
39   void Increment();
40 public:
InstrProfIterator()41   InstrProfIterator() : Reader(nullptr) {}
InstrProfIterator(InstrProfReader * Reader)42   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
43 
44   InstrProfIterator &operator++() { Increment(); return *this; }
45   bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
46   bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
47   InstrProfRecord &operator*() { return Record; }
48   InstrProfRecord *operator->() { return &Record; }
49 };
50 
51 /// Base class and interface for reading profiling data of any known instrprof
52 /// format. Provides an iterator over InstrProfRecords.
53 class InstrProfReader {
54   std::error_code LastError;
55 
56 public:
InstrProfReader()57   InstrProfReader() : LastError(instrprof_error::success), Symtab() {}
~InstrProfReader()58   virtual ~InstrProfReader() {}
59 
60   /// Read the header.  Required before reading first record.
61   virtual std::error_code readHeader() = 0;
62   /// Read a single record.
63   virtual std::error_code readNextRecord(InstrProfRecord &Record) = 0;
64   /// Iterator over profile data.
begin()65   InstrProfIterator begin() { return InstrProfIterator(this); }
end()66   InstrProfIterator end() { return InstrProfIterator(); }
67 
68   /// Return the PGO symtab. There are three different readers:
69   /// Raw, Text, and Indexed profile readers. The first two types
70   /// of readers are used only by llvm-profdata tool, while the indexed
71   /// profile reader is also used by llvm-cov tool and the compiler (
72   /// backend or frontend). Since creating PGO symtab can create
73   /// significant runtime and memory overhead (as it touches data
74   /// for the whole program), InstrProfSymtab for the indexed profile
75   /// reader should be created on demand and it is recommended to be
76   /// only used for dumping purpose with llvm-proftool, not with the
77   /// compiler.
78   virtual InstrProfSymtab &getSymtab() = 0;
79 
80 protected:
81   std::unique_ptr<InstrProfSymtab> Symtab;
82   /// Set the current std::error_code and return same.
error(std::error_code EC)83   std::error_code error(std::error_code EC) {
84     LastError = EC;
85     return EC;
86   }
87 
88   /// Clear the current error code and return a successful one.
success()89   std::error_code success() { return error(instrprof_error::success); }
90 
91 public:
92   /// Return true if the reader has finished reading the profile data.
isEOF()93   bool isEOF() { return LastError == instrprof_error::eof; }
94   /// Return true if the reader encountered an error reading profiling data.
hasError()95   bool hasError() { return LastError && !isEOF(); }
96   /// Get the current error code.
getError()97   std::error_code getError() { return LastError; }
98 
99   /// Factory method to create an appropriately typed reader for the given
100   /// instrprof file.
101   static ErrorOr<std::unique_ptr<InstrProfReader>> create(std::string Path);
102 
103   static ErrorOr<std::unique_ptr<InstrProfReader>>
104   create(std::unique_ptr<MemoryBuffer> Buffer);
105 };
106 
107 /// Reader for the simple text based instrprof format.
108 ///
109 /// This format is a simple text format that's suitable for test data. Records
110 /// are separated by one or more blank lines, and record fields are separated by
111 /// new lines.
112 ///
113 /// Each record consists of a function name, a function hash, a number of
114 /// counters, and then each counter value, in that order.
115 class TextInstrProfReader : public InstrProfReader {
116 private:
117   /// The profile data file contents.
118   std::unique_ptr<MemoryBuffer> DataBuffer;
119   /// Iterator over the profile data.
120   line_iterator Line;
121 
122   TextInstrProfReader(const TextInstrProfReader &) = delete;
123   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
124   std::error_code readValueProfileData(InstrProfRecord &Record);
125 
126 public:
TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)127   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
128       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
129 
130   /// Return true if the given buffer is in text instrprof format.
131   static bool hasFormat(const MemoryBuffer &Buffer);
132 
133   /// Read the header.
134   std::error_code readHeader() override;
135   /// Read a single record.
136   std::error_code readNextRecord(InstrProfRecord &Record) override;
137 
getSymtab()138   InstrProfSymtab &getSymtab() override {
139     assert(Symtab.get());
140     return *Symtab.get();
141   }
142 };
143 
144 /// Reader for the raw instrprof binary format from runtime.
145 ///
146 /// This format is a raw memory dump of the instrumentation-baed profiling data
147 /// from the runtime.  It has no index.
148 ///
149 /// Templated on the unsigned type whose size matches pointers on the platform
150 /// that wrote the profile.
151 template <class IntPtrT>
152 class RawInstrProfReader : public InstrProfReader {
153 private:
154   /// The profile data file contents.
155   std::unique_ptr<MemoryBuffer> DataBuffer;
156   bool ShouldSwapBytes;
157   uint64_t CountersDelta;
158   uint64_t NamesDelta;
159   const RawInstrProf::ProfileData<IntPtrT> *Data;
160   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
161   const uint64_t *CountersStart;
162   const char *NamesStart;
163   const uint8_t *ValueDataStart;
164   const char *ProfileEnd;
165   uint32_t ValueKindLast;
166   uint32_t CurValueDataSize;
167 
168   InstrProfRecord::ValueMapType FunctionPtrToNameMap;
169 
170   RawInstrProfReader(const RawInstrProfReader &) = delete;
171   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
172 public:
RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)173   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
174       : DataBuffer(std::move(DataBuffer)) { }
175 
176   static bool hasFormat(const MemoryBuffer &DataBuffer);
177   std::error_code readHeader() override;
178   std::error_code readNextRecord(InstrProfRecord &Record) override;
179 
getSymtab()180   InstrProfSymtab &getSymtab() override {
181     assert(Symtab.get());
182     return *Symtab.get();
183   }
184 
185 private:
186   void createSymtab(InstrProfSymtab &Symtab);
187   std::error_code readNextHeader(const char *CurrentPos);
188   std::error_code readHeader(const RawInstrProf::Header &Header);
swap(IntT Int)189   template <class IntT> IntT swap(IntT Int) const {
190     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
191   }
getDataEndianness()192   support::endianness getDataEndianness() const {
193     support::endianness HostEndian = getHostEndianness();
194     if (!ShouldSwapBytes)
195       return HostEndian;
196     if (HostEndian == support::little)
197       return support::big;
198     else
199       return support::little;
200   }
201 
getNumPaddingBytes(uint64_t SizeInBytes)202   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
203     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
204   }
205   std::error_code readName(InstrProfRecord &Record);
206   std::error_code readFuncHash(InstrProfRecord &Record);
207   std::error_code readRawCounts(InstrProfRecord &Record);
208   std::error_code readValueProfilingData(InstrProfRecord &Record);
atEnd()209   bool atEnd() const { return Data == DataEnd; }
advanceData()210   void advanceData() {
211     Data++;
212     ValueDataStart += CurValueDataSize;
213   }
214 
getCounter(IntPtrT CounterPtr)215   const uint64_t *getCounter(IntPtrT CounterPtr) const {
216     ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
217     return CountersStart + Offset;
218   }
getName(IntPtrT NamePtr)219   const char *getName(IntPtrT NamePtr) const {
220     ptrdiff_t Offset = (swap(NamePtr) - NamesDelta) / sizeof(char);
221     return NamesStart + Offset;
222   }
223 };
224 
225 typedef RawInstrProfReader<uint32_t> RawInstrProfReader32;
226 typedef RawInstrProfReader<uint64_t> RawInstrProfReader64;
227 
228 namespace IndexedInstrProf {
229 enum class HashT : uint32_t;
230 }
231 
232 /// Trait for lookups into the on-disk hash table for the binary instrprof
233 /// format.
234 class InstrProfLookupTrait {
235   std::vector<InstrProfRecord> DataBuffer;
236   IndexedInstrProf::HashT HashType;
237   unsigned FormatVersion;
238   // Endianness of the input value profile data.
239   // It should be LE by default, but can be changed
240   // for testing purpose.
241   support::endianness ValueProfDataEndianness;
242 
243 public:
InstrProfLookupTrait(IndexedInstrProf::HashT HashType,unsigned FormatVersion)244   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
245       : HashType(HashType), FormatVersion(FormatVersion),
246         ValueProfDataEndianness(support::little) {}
247 
248   typedef ArrayRef<InstrProfRecord> data_type;
249 
250   typedef StringRef internal_key_type;
251   typedef StringRef external_key_type;
252   typedef uint64_t hash_value_type;
253   typedef uint64_t offset_type;
254 
EqualKey(StringRef A,StringRef B)255   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
GetInternalKey(StringRef K)256   static StringRef GetInternalKey(StringRef K) { return K; }
GetExternalKey(StringRef K)257   static StringRef GetExternalKey(StringRef K) { return K; }
258 
259   hash_value_type ComputeHash(StringRef K);
260 
261   static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char * & D)262   ReadKeyDataLength(const unsigned char *&D) {
263     using namespace support;
264     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
265     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
266     return std::make_pair(KeyLen, DataLen);
267   }
268 
ReadKey(const unsigned char * D,offset_type N)269   StringRef ReadKey(const unsigned char *D, offset_type N) {
270     return StringRef((const char *)D, N);
271   }
272 
273   bool readValueProfilingData(const unsigned char *&D,
274                               const unsigned char *const End);
275   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
276 
277   // Used for testing purpose only.
setValueProfDataEndianness(support::endianness Endianness)278   void setValueProfDataEndianness(support::endianness Endianness) {
279     ValueProfDataEndianness = Endianness;
280   }
281 };
282 
283 struct InstrProfReaderIndexBase {
284   // Read all the profile records with the same key pointed to the current
285   // iterator.
286   virtual std::error_code getRecords(ArrayRef<InstrProfRecord> &Data) = 0;
287   // Read all the profile records with the key equal to FuncName
288   virtual std::error_code getRecords(StringRef FuncName,
289                                      ArrayRef<InstrProfRecord> &Data) = 0;
290   virtual void advanceToNextKey() = 0;
291   virtual bool atEnd() const = 0;
292   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
~InstrProfReaderIndexBaseInstrProfReaderIndexBase293   virtual ~InstrProfReaderIndexBase() {}
294   virtual uint64_t getVersion() const = 0;
295   virtual void populateSymtab(InstrProfSymtab &) = 0;
296 };
297 
298 typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
299     OnDiskHashTableImplV3;
300 
301 template <typename HashTableImpl>
302 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
303 
304 private:
305   std::unique_ptr<HashTableImpl> HashTable;
306   typename HashTableImpl::data_iterator RecordIterator;
307   uint64_t FormatVersion;
308 
309 public:
310   InstrProfReaderIndex(const unsigned char *Buckets,
311                        const unsigned char *const Payload,
312                        const unsigned char *const Base,
313                        IndexedInstrProf::HashT HashType, uint64_t Version);
314 
315   std::error_code getRecords(ArrayRef<InstrProfRecord> &Data) override;
316   std::error_code getRecords(StringRef FuncName,
317                              ArrayRef<InstrProfRecord> &Data) override;
advanceToNextKey()318   void advanceToNextKey() override { RecordIterator++; }
atEnd()319   bool atEnd() const override {
320     return RecordIterator == HashTable->data_end();
321   }
setValueProfDataEndianness(support::endianness Endianness)322   void setValueProfDataEndianness(support::endianness Endianness) override {
323     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
324   }
~InstrProfReaderIndex()325   ~InstrProfReaderIndex() override {}
getVersion()326   uint64_t getVersion() const override { return FormatVersion; }
populateSymtab(InstrProfSymtab & Symtab)327   void populateSymtab(InstrProfSymtab &Symtab) override {
328     Symtab.create(HashTable->keys());
329   }
330 };
331 
332 /// Reader for the indexed binary instrprof format.
333 class IndexedInstrProfReader : public InstrProfReader {
334 private:
335   /// The profile data file contents.
336   std::unique_ptr<MemoryBuffer> DataBuffer;
337   /// The index into the profile data.
338   std::unique_ptr<InstrProfReaderIndexBase> Index;
339   /// The maximal execution count among all functions.
340   uint64_t MaxFunctionCount;
341 
342   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
343   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
344 
345 public:
getVersion()346   uint64_t getVersion() const { return Index->getVersion(); }
IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)347   IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
348       : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
349 
350   /// Return true if the given buffer is in an indexed instrprof format.
351   static bool hasFormat(const MemoryBuffer &DataBuffer);
352 
353   /// Read the file header.
354   std::error_code readHeader() override;
355   /// Read a single record.
356   std::error_code readNextRecord(InstrProfRecord &Record) override;
357 
358   /// Return the pointer to InstrProfRecord associated with FuncName
359   /// and FuncHash
360   ErrorOr<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
361                                               uint64_t FuncHash);
362 
363   /// Fill Counts with the profile data for the given function name.
364   std::error_code getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
365                                     std::vector<uint64_t> &Counts);
366 
367   /// Return the maximum of all known function counts.
getMaximumFunctionCount()368   uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
369 
370   /// Factory method to create an indexed reader.
371   static ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
372   create(std::string Path);
373 
374   static ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
375   create(std::unique_ptr<MemoryBuffer> Buffer);
376 
377   // Used for testing purpose only.
setValueProfDataEndianness(support::endianness Endianness)378   void setValueProfDataEndianness(support::endianness Endianness) {
379     Index->setValueProfDataEndianness(Endianness);
380   }
381 
382   // See description in the base class. This interface is designed
383   // to be used by llvm-profdata (for dumping). Avoid using this when
384   // the client is the compiler.
385   InstrProfSymtab &getSymtab() override;
386 };
387 
388 } // end namespace llvm
389 
390 #endif
391