1 //=-- CoverageMappingReader.cpp - Code coverage mapping reader ----*- C++ -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains support for reading coverage mapping data for
11 // instrumentation based coverage.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ProfileData/CoverageMappingReader.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/Object/MachOUniversal.h"
18 #include "llvm/Object/ObjectFile.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Endian.h"
21 #include "llvm/Support/LEB128.h"
22 #include "llvm/Support/raw_ostream.h"
23 
24 using namespace llvm;
25 using namespace coverage;
26 using namespace object;
27 
28 #define DEBUG_TYPE "coverage-mapping"
29 
increment()30 void CoverageMappingIterator::increment() {
31   // Check if all the records were read or if an error occurred while reading
32   // the next record.
33   if (Reader->readNextRecord(Record))
34     *this = CoverageMappingIterator();
35 }
36 
readULEB128(uint64_t & Result)37 std::error_code RawCoverageReader::readULEB128(uint64_t &Result) {
38   if (Data.size() < 1)
39     return error(instrprof_error::truncated);
40   unsigned N = 0;
41   Result = decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
42   if (N > Data.size())
43     return error(instrprof_error::malformed);
44   Data = Data.substr(N);
45   return success();
46 }
47 
readIntMax(uint64_t & Result,uint64_t MaxPlus1)48 std::error_code RawCoverageReader::readIntMax(uint64_t &Result,
49                                               uint64_t MaxPlus1) {
50   if (auto Err = readULEB128(Result))
51     return Err;
52   if (Result >= MaxPlus1)
53     return error(instrprof_error::malformed);
54   return success();
55 }
56 
readSize(uint64_t & Result)57 std::error_code RawCoverageReader::readSize(uint64_t &Result) {
58   if (auto Err = readULEB128(Result))
59     return Err;
60   // Sanity check the number.
61   if (Result > Data.size())
62     return error(instrprof_error::malformed);
63   return success();
64 }
65 
readString(StringRef & Result)66 std::error_code RawCoverageReader::readString(StringRef &Result) {
67   uint64_t Length;
68   if (auto Err = readSize(Length))
69     return Err;
70   Result = Data.substr(0, Length);
71   Data = Data.substr(Length);
72   return success();
73 }
74 
read()75 std::error_code RawCoverageFilenamesReader::read() {
76   uint64_t NumFilenames;
77   if (auto Err = readSize(NumFilenames))
78     return Err;
79   for (size_t I = 0; I < NumFilenames; ++I) {
80     StringRef Filename;
81     if (auto Err = readString(Filename))
82       return Err;
83     Filenames.push_back(Filename);
84   }
85   return success();
86 }
87 
decodeCounter(unsigned Value,Counter & C)88 std::error_code RawCoverageMappingReader::decodeCounter(unsigned Value,
89                                                         Counter &C) {
90   auto Tag = Value & Counter::EncodingTagMask;
91   switch (Tag) {
92   case Counter::Zero:
93     C = Counter::getZero();
94     return success();
95   case Counter::CounterValueReference:
96     C = Counter::getCounter(Value >> Counter::EncodingTagBits);
97     return success();
98   default:
99     break;
100   }
101   Tag -= Counter::Expression;
102   switch (Tag) {
103   case CounterExpression::Subtract:
104   case CounterExpression::Add: {
105     auto ID = Value >> Counter::EncodingTagBits;
106     if (ID >= Expressions.size())
107       return error(instrprof_error::malformed);
108     Expressions[ID].Kind = CounterExpression::ExprKind(Tag);
109     C = Counter::getExpression(ID);
110     break;
111   }
112   default:
113     return error(instrprof_error::malformed);
114   }
115   return success();
116 }
117 
readCounter(Counter & C)118 std::error_code RawCoverageMappingReader::readCounter(Counter &C) {
119   uint64_t EncodedCounter;
120   if (auto Err =
121           readIntMax(EncodedCounter, std::numeric_limits<unsigned>::max()))
122     return Err;
123   if (auto Err = decodeCounter(EncodedCounter, C))
124     return Err;
125   return success();
126 }
127 
128 static const unsigned EncodingExpansionRegionBit = 1
129                                                    << Counter::EncodingTagBits;
130 
131 /// \brief Read the sub-array of regions for the given inferred file id.
132 /// \param NumFileIDs the number of file ids that are defined for this
133 /// function.
readMappingRegionsSubArray(std::vector<CounterMappingRegion> & MappingRegions,unsigned InferredFileID,size_t NumFileIDs)134 std::error_code RawCoverageMappingReader::readMappingRegionsSubArray(
135     std::vector<CounterMappingRegion> &MappingRegions, unsigned InferredFileID,
136     size_t NumFileIDs) {
137   uint64_t NumRegions;
138   if (auto Err = readSize(NumRegions))
139     return Err;
140   unsigned LineStart = 0;
141   for (size_t I = 0; I < NumRegions; ++I) {
142     Counter C;
143     CounterMappingRegion::RegionKind Kind = CounterMappingRegion::CodeRegion;
144 
145     // Read the combined counter + region kind.
146     uint64_t EncodedCounterAndRegion;
147     if (auto Err = readIntMax(EncodedCounterAndRegion,
148                               std::numeric_limits<unsigned>::max()))
149       return Err;
150     unsigned Tag = EncodedCounterAndRegion & Counter::EncodingTagMask;
151     uint64_t ExpandedFileID = 0;
152     if (Tag != Counter::Zero) {
153       if (auto Err = decodeCounter(EncodedCounterAndRegion, C))
154         return Err;
155     } else {
156       // Is it an expansion region?
157       if (EncodedCounterAndRegion & EncodingExpansionRegionBit) {
158         Kind = CounterMappingRegion::ExpansionRegion;
159         ExpandedFileID = EncodedCounterAndRegion >>
160                          Counter::EncodingCounterTagAndExpansionRegionTagBits;
161         if (ExpandedFileID >= NumFileIDs)
162           return error(instrprof_error::malformed);
163       } else {
164         switch (EncodedCounterAndRegion >>
165                 Counter::EncodingCounterTagAndExpansionRegionTagBits) {
166         case CounterMappingRegion::CodeRegion:
167           // Don't do anything when we have a code region with a zero counter.
168           break;
169         case CounterMappingRegion::SkippedRegion:
170           Kind = CounterMappingRegion::SkippedRegion;
171           break;
172         default:
173           return error(instrprof_error::malformed);
174         }
175       }
176     }
177 
178     // Read the source range.
179     uint64_t LineStartDelta, ColumnStart, NumLines, ColumnEnd;
180     if (auto Err =
181             readIntMax(LineStartDelta, std::numeric_limits<unsigned>::max()))
182       return Err;
183     if (auto Err = readULEB128(ColumnStart))
184       return Err;
185     if (ColumnStart > std::numeric_limits<unsigned>::max())
186       return error(instrprof_error::malformed);
187     if (auto Err = readIntMax(NumLines, std::numeric_limits<unsigned>::max()))
188       return Err;
189     if (auto Err = readIntMax(ColumnEnd, std::numeric_limits<unsigned>::max()))
190       return Err;
191     LineStart += LineStartDelta;
192     // Adjust the column locations for the empty regions that are supposed to
193     // cover whole lines. Those regions should be encoded with the
194     // column range (1 -> std::numeric_limits<unsigned>::max()), but because
195     // the encoded std::numeric_limits<unsigned>::max() is several bytes long,
196     // we set the column range to (0 -> 0) to ensure that the column start and
197     // column end take up one byte each.
198     // The std::numeric_limits<unsigned>::max() is used to represent a column
199     // position at the end of the line without knowing the length of that line.
200     if (ColumnStart == 0 && ColumnEnd == 0) {
201       ColumnStart = 1;
202       ColumnEnd = std::numeric_limits<unsigned>::max();
203     }
204 
205     DEBUG({
206       dbgs() << "Counter in file " << InferredFileID << " " << LineStart << ":"
207              << ColumnStart << " -> " << (LineStart + NumLines) << ":"
208              << ColumnEnd << ", ";
209       if (Kind == CounterMappingRegion::ExpansionRegion)
210         dbgs() << "Expands to file " << ExpandedFileID;
211       else
212         CounterMappingContext(Expressions).dump(C, dbgs());
213       dbgs() << "\n";
214     });
215 
216     MappingRegions.push_back(CounterMappingRegion(
217         C, InferredFileID, ExpandedFileID, LineStart, ColumnStart,
218         LineStart + NumLines, ColumnEnd, Kind));
219   }
220   return success();
221 }
222 
read()223 std::error_code RawCoverageMappingReader::read() {
224 
225   // Read the virtual file mapping.
226   llvm::SmallVector<unsigned, 8> VirtualFileMapping;
227   uint64_t NumFileMappings;
228   if (auto Err = readSize(NumFileMappings))
229     return Err;
230   for (size_t I = 0; I < NumFileMappings; ++I) {
231     uint64_t FilenameIndex;
232     if (auto Err = readIntMax(FilenameIndex, TranslationUnitFilenames.size()))
233       return Err;
234     VirtualFileMapping.push_back(FilenameIndex);
235   }
236 
237   // Construct the files using unique filenames and virtual file mapping.
238   for (auto I : VirtualFileMapping) {
239     Filenames.push_back(TranslationUnitFilenames[I]);
240   }
241 
242   // Read the expressions.
243   uint64_t NumExpressions;
244   if (auto Err = readSize(NumExpressions))
245     return Err;
246   // Create an array of dummy expressions that get the proper counters
247   // when the expressions are read, and the proper kinds when the counters
248   // are decoded.
249   Expressions.resize(
250       NumExpressions,
251       CounterExpression(CounterExpression::Subtract, Counter(), Counter()));
252   for (size_t I = 0; I < NumExpressions; ++I) {
253     if (auto Err = readCounter(Expressions[I].LHS))
254       return Err;
255     if (auto Err = readCounter(Expressions[I].RHS))
256       return Err;
257   }
258 
259   // Read the mapping regions sub-arrays.
260   for (unsigned InferredFileID = 0, S = VirtualFileMapping.size();
261        InferredFileID < S; ++InferredFileID) {
262     if (auto Err = readMappingRegionsSubArray(MappingRegions, InferredFileID,
263                                               VirtualFileMapping.size()))
264       return Err;
265   }
266 
267   // Set the counters for the expansion regions.
268   // i.e. Counter of expansion region = counter of the first region
269   // from the expanded file.
270   // Perform multiple passes to correctly propagate the counters through
271   // all the nested expansion regions.
272   SmallVector<CounterMappingRegion *, 8> FileIDExpansionRegionMapping;
273   FileIDExpansionRegionMapping.resize(VirtualFileMapping.size(), nullptr);
274   for (unsigned Pass = 1, S = VirtualFileMapping.size(); Pass < S; ++Pass) {
275     for (auto &R : MappingRegions) {
276       if (R.Kind != CounterMappingRegion::ExpansionRegion)
277         continue;
278       assert(!FileIDExpansionRegionMapping[R.ExpandedFileID]);
279       FileIDExpansionRegionMapping[R.ExpandedFileID] = &R;
280     }
281     for (auto &R : MappingRegions) {
282       if (FileIDExpansionRegionMapping[R.FileID]) {
283         FileIDExpansionRegionMapping[R.FileID]->Count = R.Count;
284         FileIDExpansionRegionMapping[R.FileID] = nullptr;
285       }
286     }
287   }
288 
289   return success();
290 }
291 
292 namespace {
293 
294 /// \brief A helper structure to access the data from a section
295 /// in an object file.
296 struct SectionData {
297   StringRef Data;
298   uint64_t Address;
299 
load__anon5c93d7f70111::SectionData300   std::error_code load(SectionRef &Section) {
301     if (auto Err = Section.getContents(Data))
302       return Err;
303     Address = Section.getAddress();
304     return instrprof_error::success;
305   }
306 
get__anon5c93d7f70111::SectionData307   std::error_code get(uint64_t Pointer, size_t Size, StringRef &Result) {
308     if (Pointer < Address)
309       return instrprof_error::malformed;
310     auto Offset = Pointer - Address;
311     if (Offset + Size > Data.size())
312       return instrprof_error::malformed;
313     Result = Data.substr(Pointer - Address, Size);
314     return instrprof_error::success;
315   }
316 };
317 }
318 
319 template <typename T, support::endianness Endian>
readCoverageMappingData(SectionData & ProfileNames,StringRef Data,std::vector<BinaryCoverageReader::ProfileMappingRecord> & Records,std::vector<StringRef> & Filenames)320 std::error_code readCoverageMappingData(
321     SectionData &ProfileNames, StringRef Data,
322     std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records,
323     std::vector<StringRef> &Filenames) {
324   using namespace support;
325   llvm::DenseSet<T> UniqueFunctionMappingData;
326 
327   // Read the records in the coverage data section.
328   for (const char *Buf = Data.data(), *End = Buf + Data.size(); Buf < End;) {
329     if (Buf + 4 * sizeof(uint32_t) > End)
330       return instrprof_error::malformed;
331     uint32_t NRecords = endian::readNext<uint32_t, Endian, unaligned>(Buf);
332     uint32_t FilenamesSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
333     uint32_t CoverageSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
334     uint32_t Version = endian::readNext<uint32_t, Endian, unaligned>(Buf);
335 
336     switch (Version) {
337     case CoverageMappingVersion1:
338       break;
339     default:
340       return instrprof_error::unsupported_version;
341     }
342 
343     // Skip past the function records, saving the start and end for later.
344     const char *FunBuf = Buf;
345     Buf += NRecords * (sizeof(T) + 2 * sizeof(uint32_t) + sizeof(uint64_t));
346     const char *FunEnd = Buf;
347 
348     // Get the filenames.
349     if (Buf + FilenamesSize > End)
350       return instrprof_error::malformed;
351     size_t FilenamesBegin = Filenames.size();
352     RawCoverageFilenamesReader Reader(StringRef(Buf, FilenamesSize), Filenames);
353     if (auto Err = Reader.read())
354       return Err;
355     Buf += FilenamesSize;
356 
357     // We'll read the coverage mapping records in the loop below.
358     const char *CovBuf = Buf;
359     Buf += CoverageSize;
360     const char *CovEnd = Buf;
361     if (Buf > End)
362       return instrprof_error::malformed;
363 
364     while (FunBuf < FunEnd) {
365       // Read the function information
366       T NamePtr = endian::readNext<T, Endian, unaligned>(FunBuf);
367       uint32_t NameSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf);
368       uint32_t DataSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf);
369       uint64_t FuncHash = endian::readNext<uint64_t, Endian, unaligned>(FunBuf);
370 
371       // Now use that to read the coverage data.
372       if (CovBuf + DataSize > CovEnd)
373         return instrprof_error::malformed;
374       auto Mapping = StringRef(CovBuf, DataSize);
375       CovBuf += DataSize;
376 
377       // Ignore this record if we already have a record that points to the same
378       // function name. This is useful to ignore the redundant records for the
379       // functions with ODR linkage.
380       if (!UniqueFunctionMappingData.insert(NamePtr).second)
381         continue;
382 
383       // Finally, grab the name and create a record.
384       StringRef FuncName;
385       if (std::error_code EC = ProfileNames.get(NamePtr, NameSize, FuncName))
386         return EC;
387       Records.push_back(BinaryCoverageReader::ProfileMappingRecord(
388           CoverageMappingVersion(Version), FuncName, FuncHash, Mapping,
389           FilenamesBegin, Filenames.size() - FilenamesBegin));
390     }
391   }
392 
393   return instrprof_error::success;
394 }
395 
396 static const char *TestingFormatMagic = "llvmcovmtestdata";
397 
loadTestingFormat(StringRef Data,SectionData & ProfileNames,StringRef & CoverageMapping,uint8_t & BytesInAddress,support::endianness & Endian)398 static std::error_code loadTestingFormat(StringRef Data,
399                                          SectionData &ProfileNames,
400                                          StringRef &CoverageMapping,
401                                          uint8_t &BytesInAddress,
402                                          support::endianness &Endian) {
403   BytesInAddress = 8;
404   Endian = support::endianness::little;
405 
406   Data = Data.substr(StringRef(TestingFormatMagic).size());
407   if (Data.size() < 1)
408     return instrprof_error::truncated;
409   unsigned N = 0;
410   auto ProfileNamesSize =
411       decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
412   if (N > Data.size())
413     return instrprof_error::malformed;
414   Data = Data.substr(N);
415   if (Data.size() < 1)
416     return instrprof_error::truncated;
417   N = 0;
418   ProfileNames.Address =
419       decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
420   if (N > Data.size())
421     return instrprof_error::malformed;
422   Data = Data.substr(N);
423   if (Data.size() < ProfileNamesSize)
424     return instrprof_error::malformed;
425   ProfileNames.Data = Data.substr(0, ProfileNamesSize);
426   CoverageMapping = Data.substr(ProfileNamesSize);
427   return instrprof_error::success;
428 }
429 
loadBinaryFormat(MemoryBufferRef ObjectBuffer,SectionData & ProfileNames,StringRef & CoverageMapping,uint8_t & BytesInAddress,support::endianness & Endian,Triple::ArchType Arch)430 static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer,
431                                         SectionData &ProfileNames,
432                                         StringRef &CoverageMapping,
433                                         uint8_t &BytesInAddress,
434                                         support::endianness &Endian,
435                                         Triple::ArchType Arch) {
436   auto BinOrErr = object::createBinary(ObjectBuffer);
437   if (std::error_code EC = BinOrErr.getError())
438     return EC;
439   auto Bin = std::move(BinOrErr.get());
440   std::unique_ptr<ObjectFile> OF;
441   if (auto *Universal = dyn_cast<object::MachOUniversalBinary>(Bin.get())) {
442     // If we have a universal binary, try to look up the object for the
443     // appropriate architecture.
444     auto ObjectFileOrErr = Universal->getObjectForArch(Arch);
445     if (std::error_code EC = ObjectFileOrErr.getError())
446       return EC;
447     OF = std::move(ObjectFileOrErr.get());
448   } else if (isa<object::ObjectFile>(Bin.get())) {
449     // For any other object file, upcast and take ownership.
450     OF.reset(cast<object::ObjectFile>(Bin.release()));
451     // If we've asked for a particular arch, make sure they match.
452     if (Arch != Triple::ArchType::UnknownArch && OF->getArch() != Arch)
453       return object_error::arch_not_found;
454   } else
455     // We can only handle object files.
456     return instrprof_error::malformed;
457 
458   // The coverage uses native pointer sizes for the object it's written in.
459   BytesInAddress = OF->getBytesInAddress();
460   Endian = OF->isLittleEndian() ? support::endianness::little
461                                 : support::endianness::big;
462 
463   // Look for the sections that we are interested in.
464   int FoundSectionCount = 0;
465   SectionRef NamesSection, CoverageSection;
466   for (const auto &Section : OF->sections()) {
467     StringRef Name;
468     if (auto Err = Section.getName(Name))
469       return Err;
470     if (Name == "__llvm_prf_names") {
471       NamesSection = Section;
472     } else if (Name == "__llvm_covmap") {
473       CoverageSection = Section;
474     } else
475       continue;
476     ++FoundSectionCount;
477   }
478   if (FoundSectionCount != 2)
479     return instrprof_error::bad_header;
480 
481   // Get the contents of the given sections.
482   if (std::error_code EC = CoverageSection.getContents(CoverageMapping))
483     return EC;
484   if (std::error_code EC = ProfileNames.load(NamesSection))
485     return EC;
486 
487   return std::error_code();
488 }
489 
490 ErrorOr<std::unique_ptr<BinaryCoverageReader>>
create(std::unique_ptr<MemoryBuffer> & ObjectBuffer,Triple::ArchType Arch)491 BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer,
492                              Triple::ArchType Arch) {
493   std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader());
494 
495   SectionData Profile;
496   StringRef Coverage;
497   uint8_t BytesInAddress;
498   support::endianness Endian;
499   std::error_code EC;
500   if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic))
501     // This is a special format used for testing.
502     EC = loadTestingFormat(ObjectBuffer->getBuffer(), Profile, Coverage,
503                            BytesInAddress, Endian);
504   else
505     EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), Profile, Coverage,
506                           BytesInAddress, Endian, Arch);
507   if (EC)
508     return EC;
509 
510   if (BytesInAddress == 4 && Endian == support::endianness::little)
511     EC = readCoverageMappingData<uint32_t, support::endianness::little>(
512         Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
513   else if (BytesInAddress == 4 && Endian == support::endianness::big)
514     EC = readCoverageMappingData<uint32_t, support::endianness::big>(
515         Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
516   else if (BytesInAddress == 8 && Endian == support::endianness::little)
517     EC = readCoverageMappingData<uint64_t, support::endianness::little>(
518         Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
519   else if (BytesInAddress == 8 && Endian == support::endianness::big)
520     EC = readCoverageMappingData<uint64_t, support::endianness::big>(
521         Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
522   else
523     return instrprof_error::malformed;
524   if (EC)
525     return EC;
526   return std::move(Reader);
527 }
528 
529 std::error_code
readNextRecord(CoverageMappingRecord & Record)530 BinaryCoverageReader::readNextRecord(CoverageMappingRecord &Record) {
531   if (CurrentRecord >= MappingRecords.size())
532     return instrprof_error::eof;
533 
534   FunctionsFilenames.clear();
535   Expressions.clear();
536   MappingRegions.clear();
537   auto &R = MappingRecords[CurrentRecord];
538   RawCoverageMappingReader Reader(
539       R.CoverageMapping,
540       makeArrayRef(Filenames).slice(R.FilenamesBegin, R.FilenamesSize),
541       FunctionsFilenames, Expressions, MappingRegions);
542   if (auto Err = Reader.read())
543     return Err;
544 
545   Record.FunctionName = R.FunctionName;
546   Record.FunctionHash = R.FunctionHash;
547   Record.Filenames = FunctionsFilenames;
548   Record.Expressions = Expressions;
549   Record.MappingRegions = MappingRegions;
550 
551   ++CurrentRecord;
552   return std::error_code();
553 }
554