1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
11 
12 #include "llvm/ADT/ArrayRef.h"
13 #include "llvm/DebugInfo/CodeView/StreamArray.h"
14 #include "llvm/DebugInfo/CodeView/StreamInterface.h"
15 #include "llvm/DebugInfo/CodeView/StreamReader.h"
16 #include "llvm/DebugInfo/CodeView/StreamWriter.h"
17 #include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
18 #include "llvm/DebugInfo/PDB/Raw/DirectoryStreamData.h"
19 #include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h"
20 #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
21 #include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
22 #include "llvm/DebugInfo/PDB/Raw/PublicsStream.h"
23 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
24 #include "llvm/DebugInfo/PDB/Raw/SymbolStream.h"
25 #include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/FileOutputBuffer.h"
28 #include "llvm/Support/MemoryBuffer.h"
29 
30 using namespace llvm;
31 using namespace llvm::codeview;
32 using namespace llvm::pdb;
33 
34 namespace {
35 typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
36 }
37 
PDBFile(std::unique_ptr<StreamInterface> PdbFileBuffer)38 PDBFile::PDBFile(std::unique_ptr<StreamInterface> PdbFileBuffer)
39     : Buffer(std::move(PdbFileBuffer)), SB(nullptr) {}
40 
~PDBFile()41 PDBFile::~PDBFile() {}
42 
getBlockSize() const43 uint32_t PDBFile::getBlockSize() const { return SB->BlockSize; }
44 
getUnknown0() const45 uint32_t PDBFile::getUnknown0() const { return SB->Unknown0; }
46 
getBlockCount() const47 uint32_t PDBFile::getBlockCount() const { return SB->NumBlocks; }
48 
getNumDirectoryBytes() const49 uint32_t PDBFile::getNumDirectoryBytes() const { return SB->NumDirectoryBytes; }
50 
getBlockMapIndex() const51 uint32_t PDBFile::getBlockMapIndex() const { return SB->BlockMapAddr; }
52 
getUnknown1() const53 uint32_t PDBFile::getUnknown1() const { return SB->Unknown1; }
54 
getNumDirectoryBlocks() const55 uint32_t PDBFile::getNumDirectoryBlocks() const {
56   return bytesToBlocks(SB->NumDirectoryBytes, SB->BlockSize);
57 }
58 
getBlockMapOffset() const59 uint64_t PDBFile::getBlockMapOffset() const {
60   return (uint64_t)SB->BlockMapAddr * SB->BlockSize;
61 }
62 
getNumStreams() const63 uint32_t PDBFile::getNumStreams() const { return StreamSizes.size(); }
64 
getStreamByteSize(uint32_t StreamIndex) const65 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
66   return StreamSizes[StreamIndex];
67 }
68 
69 ArrayRef<support::ulittle32_t>
getStreamBlockList(uint32_t StreamIndex) const70 PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
71   return StreamMap[StreamIndex];
72 }
73 
getFileSize() const74 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
75 
getBlockData(uint32_t BlockIndex,uint32_t NumBytes) const76 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
77                                                   uint32_t NumBytes) const {
78   uint64_t StreamBlockOffset = blockToOffset(BlockIndex, getBlockSize());
79 
80   ArrayRef<uint8_t> Result;
81   if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
82     return std::move(EC);
83   return Result;
84 }
85 
setBlockData(uint32_t BlockIndex,uint32_t Offset,ArrayRef<uint8_t> Data) const86 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
87                             ArrayRef<uint8_t> Data) const {
88   if (Offset >= getBlockSize())
89     return make_error<RawError>(
90         raw_error_code::invalid_block_address,
91         "setBlockData attempted to write out of block bounds.");
92   if (Data.size() > getBlockSize() - Offset)
93     return make_error<RawError>(
94         raw_error_code::invalid_block_address,
95         "setBlockData attempted to write out of block bounds.");
96 
97   uint64_t StreamBlockOffset = blockToOffset(BlockIndex, getBlockSize());
98   StreamBlockOffset += Offset;
99   return Buffer->writeBytes(StreamBlockOffset, Data);
100 }
101 
parseFileHeaders()102 Error PDBFile::parseFileHeaders() {
103   StreamReader Reader(*Buffer);
104 
105   if (auto EC = Reader.readObject(SB)) {
106     consumeError(std::move(EC));
107     return make_error<RawError>(raw_error_code::corrupt_file,
108                                 "Does not contain superblock");
109   }
110 
111   if (auto EC = setSuperBlock(SB))
112     return EC;
113 
114   Reader.setOffset(getBlockMapOffset());
115   if (auto EC = Reader.readArray(DirectoryBlocks, getNumDirectoryBlocks()))
116     return EC;
117 
118   return Error::success();
119 }
120 
parseStreamData()121 Error PDBFile::parseStreamData() {
122   assert(SB);
123   if (DirectoryStream)
124     return Error::success();
125 
126   uint32_t NumStreams = 0;
127 
128   // Normally you can't use a MappedBlockStream without having fully parsed the
129   // PDB file, because it accesses the directory and various other things, which
130   // is exactly what we are attempting to parse.  By specifying a custom
131   // subclass of IPDBStreamData which only accesses the fields that have already
132   // been parsed, we can avoid this and reuse MappedBlockStream.
133   auto DS = MappedBlockStream::createDirectoryStream(*this);
134   if (!DS)
135     return DS.takeError();
136   StreamReader Reader(**DS);
137   if (auto EC = Reader.readInteger(NumStreams))
138     return EC;
139 
140   if (auto EC = Reader.readArray(StreamSizes, NumStreams))
141     return EC;
142   for (uint32_t I = 0; I < NumStreams; ++I) {
143     uint32_t StreamSize = getStreamByteSize(I);
144     // FIXME: What does StreamSize ~0U mean?
145     uint64_t NumExpectedStreamBlocks =
146         StreamSize == UINT32_MAX ? 0 : bytesToBlocks(StreamSize, SB->BlockSize);
147 
148     // For convenience, we store the block array contiguously.  This is because
149     // if someone calls setStreamMap(), it is more convenient to be able to call
150     // it with an ArrayRef instead of setting up a StreamRef.  Since the
151     // DirectoryStream is cached in the class and thus lives for the life of the
152     // class, we can be guaranteed that readArray() will return a stable
153     // reference, even if it has to allocate from its internal pool.
154     ArrayRef<support::ulittle32_t> Blocks;
155     if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
156       return EC;
157     for (uint32_t Block : Blocks) {
158       uint64_t BlockEndOffset = (uint64_t)(Block + 1) * SB->BlockSize;
159       if (BlockEndOffset > getFileSize())
160         return make_error<RawError>(raw_error_code::corrupt_file,
161                                     "Stream block map is corrupt.");
162     }
163     StreamMap.push_back(Blocks);
164   }
165 
166   // We should have read exactly SB->NumDirectoryBytes bytes.
167   assert(Reader.bytesRemaining() == 0);
168   DirectoryStream = std::move(*DS);
169   return Error::success();
170 }
171 
getDirectoryBlockArray() const172 llvm::ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
173   return DirectoryBlocks;
174 }
175 
getPDBInfoStream()176 Expected<InfoStream &> PDBFile::getPDBInfoStream() {
177   if (!Info) {
178     auto InfoS = MappedBlockStream::createIndexedStream(StreamPDB, *this);
179     if (!InfoS)
180       return InfoS.takeError();
181     auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
182     if (auto EC = TempInfo->reload())
183       return std::move(EC);
184     Info = std::move(TempInfo);
185   }
186   return *Info;
187 }
188 
getPDBDbiStream()189 Expected<DbiStream &> PDBFile::getPDBDbiStream() {
190   if (!Dbi) {
191     auto DbiS = MappedBlockStream::createIndexedStream(StreamDBI, *this);
192     if (!DbiS)
193       return DbiS.takeError();
194     auto TempDbi = llvm::make_unique<DbiStream>(*this, std::move(*DbiS));
195     if (auto EC = TempDbi->reload())
196       return std::move(EC);
197     Dbi = std::move(TempDbi);
198   }
199   return *Dbi;
200 }
201 
getPDBTpiStream()202 Expected<TpiStream &> PDBFile::getPDBTpiStream() {
203   if (!Tpi) {
204     auto TpiS = MappedBlockStream::createIndexedStream(StreamTPI, *this);
205     if (!TpiS)
206       return TpiS.takeError();
207     auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
208     if (auto EC = TempTpi->reload())
209       return std::move(EC);
210     Tpi = std::move(TempTpi);
211   }
212   return *Tpi;
213 }
214 
getPDBIpiStream()215 Expected<TpiStream &> PDBFile::getPDBIpiStream() {
216   if (!Ipi) {
217     auto IpiS = MappedBlockStream::createIndexedStream(StreamIPI, *this);
218     if (!IpiS)
219       return IpiS.takeError();
220     auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
221     if (auto EC = TempIpi->reload())
222       return std::move(EC);
223     Ipi = std::move(TempIpi);
224   }
225   return *Ipi;
226 }
227 
getPDBPublicsStream()228 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
229   if (!Publics) {
230     auto DbiS = getPDBDbiStream();
231     if (!DbiS)
232       return DbiS.takeError();
233 
234     uint32_t PublicsStreamNum = DbiS->getPublicSymbolStreamIndex();
235 
236     auto PublicS =
237         MappedBlockStream::createIndexedStream(PublicsStreamNum, *this);
238     if (!PublicS)
239       return PublicS.takeError();
240     auto TempPublics =
241         llvm::make_unique<PublicsStream>(*this, std::move(*PublicS));
242     if (auto EC = TempPublics->reload())
243       return std::move(EC);
244     Publics = std::move(TempPublics);
245   }
246   return *Publics;
247 }
248 
getPDBSymbolStream()249 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
250   if (!Symbols) {
251     auto DbiS = getPDBDbiStream();
252     if (!DbiS)
253       return DbiS.takeError();
254 
255     uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
256 
257     auto SymbolS =
258         MappedBlockStream::createIndexedStream(SymbolStreamNum, *this);
259     if (!SymbolS)
260       return SymbolS.takeError();
261     auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS));
262     if (auto EC = TempSymbols->reload())
263       return std::move(EC);
264     Symbols = std::move(TempSymbols);
265   }
266   return *Symbols;
267 }
268 
getStringTable()269 Expected<NameHashTable &> PDBFile::getStringTable() {
270   if (!StringTable || !StringTableStream) {
271     auto IS = getPDBInfoStream();
272     if (!IS)
273       return IS.takeError();
274 
275     uint32_t NameStreamIndex = IS->getNamedStreamIndex("/names");
276 
277     if (NameStreamIndex == 0)
278       return make_error<RawError>(raw_error_code::no_stream);
279     if (NameStreamIndex >= getNumStreams())
280       return make_error<RawError>(raw_error_code::no_stream);
281 
282     auto NS = MappedBlockStream::createIndexedStream(NameStreamIndex, *this);
283     if (!NS)
284       return NS.takeError();
285 
286     StreamReader Reader(**NS);
287     auto N = llvm::make_unique<NameHashTable>();
288     if (auto EC = N->load(Reader))
289       return std::move(EC);
290     StringTable = std::move(N);
291     StringTableStream = std::move(*NS);
292   }
293   return *StringTable;
294 }
295 
setSuperBlock(const SuperBlock * Block)296 Error PDBFile::setSuperBlock(const SuperBlock *Block) {
297   SB = Block;
298 
299   // Check the magic bytes.
300   if (memcmp(SB->MagicBytes, MsfMagic, sizeof(MsfMagic)) != 0)
301     return make_error<RawError>(raw_error_code::corrupt_file,
302                                 "MSF magic header doesn't match");
303 
304   // We don't support blocksizes which aren't a multiple of four bytes.
305   if (SB->BlockSize % sizeof(support::ulittle32_t) != 0)
306     return make_error<RawError>(raw_error_code::corrupt_file,
307                                 "Block size is not multiple of 4.");
308 
309   switch (SB->BlockSize) {
310   case 512:
311   case 1024:
312   case 2048:
313   case 4096:
314     break;
315   default:
316     // An invalid block size suggests a corrupt PDB file.
317     return make_error<RawError>(raw_error_code::corrupt_file,
318                                 "Unsupported block size.");
319   }
320 
321   if (Buffer->getLength() % SB->BlockSize != 0)
322     return make_error<RawError>(raw_error_code::corrupt_file,
323                                 "File size is not a multiple of block size");
324 
325   // We don't support directories whose sizes aren't a multiple of four bytes.
326   if (SB->NumDirectoryBytes % sizeof(support::ulittle32_t) != 0)
327     return make_error<RawError>(raw_error_code::corrupt_file,
328                                 "Directory size is not multiple of 4.");
329 
330   // The number of blocks which comprise the directory is a simple function of
331   // the number of bytes it contains.
332   uint64_t NumDirectoryBlocks = getNumDirectoryBlocks();
333 
334   // The directory, as we understand it, is a block which consists of a list of
335   // block numbers.  It is unclear what would happen if the number of blocks
336   // couldn't fit on a single block.
337   if (NumDirectoryBlocks > SB->BlockSize / sizeof(support::ulittle32_t))
338     return make_error<RawError>(raw_error_code::corrupt_file,
339                                 "Too many directory blocks.");
340 
341   return Error::success();
342 }
343 
commit()344 Error PDBFile::commit() {
345   StreamWriter Writer(*Buffer);
346 
347   if (auto EC = Writer.writeObject(*SB))
348     return EC;
349   Writer.setOffset(getBlockMapOffset());
350   if (auto EC = Writer.writeArray(DirectoryBlocks))
351     return EC;
352 
353   auto DS = MappedBlockStream::createDirectoryStream(*this);
354   if (!DS)
355     return DS.takeError();
356   auto DirStream = std::move(*DS);
357   StreamWriter DW(*DirStream);
358   if (auto EC = DW.writeInteger(this->getNumStreams()))
359     return EC;
360 
361   if (auto EC = DW.writeArray(StreamSizes))
362     return EC;
363 
364   for (const auto &Blocks : StreamMap) {
365     if (auto EC = DW.writeArray(Blocks))
366       return EC;
367   }
368 
369   if (Info) {
370     if (auto EC = Info->commit())
371       return EC;
372   }
373 
374   if (Dbi) {
375     if (auto EC = Dbi->commit())
376       return EC;
377   }
378 
379   if (Symbols) {
380     if (auto EC = Symbols->commit())
381       return EC;
382   }
383 
384   if (Publics) {
385     if (auto EC = Publics->commit())
386       return EC;
387   }
388 
389   if (Tpi) {
390     if (auto EC = Tpi->commit())
391       return EC;
392   }
393 
394   if (Ipi) {
395     if (auto EC = Ipi->commit())
396       return EC;
397   }
398 
399   return Buffer->commit();
400 }
401