1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This header defines the BitstreamReader class.  This class can be used to
11 // read an arbitrary bitstream, regardless of its contents.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_BITCODE_BITSTREAMREADER_H
16 #define LLVM_BITCODE_BITSTREAMREADER_H
17 
18 #include "llvm/Bitcode/BitCodes.h"
19 #include "llvm/Support/Endian.h"
20 #include "llvm/Support/MathExtras.h"
21 #include "llvm/Support/StreamingMemoryObject.h"
22 #include <climits>
23 #include <string>
24 #include <vector>
25 
26 namespace llvm {
27 
28 /// This class is used to read from an LLVM bitcode stream, maintaining
29 /// information that is global to decoding the entire file. While a file is
30 /// being read, multiple cursors can be independently advanced or skipped around
31 /// within the file.  These are represented by the BitstreamCursor class.
32 class BitstreamReader {
33 public:
34   /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
35   /// describe abbreviations that all blocks of the specified ID inherit.
36   struct BlockInfo {
37     unsigned BlockID;
38     std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs;
39     std::string Name;
40 
41     std::vector<std::pair<unsigned, std::string> > RecordNames;
42   };
43 private:
44   std::unique_ptr<MemoryObject> BitcodeBytes;
45 
46   std::vector<BlockInfo> BlockInfoRecords;
47 
48   /// This is set to true if we don't care about the block/record name
49   /// information in the BlockInfo block. Only llvm-bcanalyzer uses this.
50   bool IgnoreBlockInfoNames;
51 
52   BitstreamReader(const BitstreamReader&) = delete;
53   void operator=(const BitstreamReader&) = delete;
54 public:
BitstreamReader()55   BitstreamReader() : IgnoreBlockInfoNames(true) {
56   }
57 
BitstreamReader(const unsigned char * Start,const unsigned char * End)58   BitstreamReader(const unsigned char *Start, const unsigned char *End)
59       : IgnoreBlockInfoNames(true) {
60     init(Start, End);
61   }
62 
BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes)63   BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes)
64       : BitcodeBytes(std::move(BitcodeBytes)), IgnoreBlockInfoNames(true) {}
65 
BitstreamReader(BitstreamReader && Other)66   BitstreamReader(BitstreamReader &&Other) {
67     *this = std::move(Other);
68   }
69 
70   BitstreamReader &operator=(BitstreamReader &&Other) {
71     BitcodeBytes = std::move(Other.BitcodeBytes);
72     // Explicitly swap block info, so that nothing gets destroyed twice.
73     std::swap(BlockInfoRecords, Other.BlockInfoRecords);
74     IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames;
75     return *this;
76   }
77 
init(const unsigned char * Start,const unsigned char * End)78   void init(const unsigned char *Start, const unsigned char *End) {
79     assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
80     BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
81   }
82 
getBitcodeBytes()83   MemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
84 
85   /// This is called by clients that want block/record name information.
CollectBlockInfoNames()86   void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
isIgnoringBlockInfoNames()87   bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
88 
89   //===--------------------------------------------------------------------===//
90   // Block Manipulation
91   //===--------------------------------------------------------------------===//
92 
93   /// Return true if we've already read and processed the block info block for
94   /// this Bitstream. We only process it for the first cursor that walks over
95   /// it.
hasBlockInfoRecords()96   bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
97 
98   /// If there is block info for the specified ID, return it, otherwise return
99   /// null.
getBlockInfo(unsigned BlockID)100   const BlockInfo *getBlockInfo(unsigned BlockID) const {
101     // Common case, the most recent entry matches BlockID.
102     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
103       return &BlockInfoRecords.back();
104 
105     for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
106          i != e; ++i)
107       if (BlockInfoRecords[i].BlockID == BlockID)
108         return &BlockInfoRecords[i];
109     return nullptr;
110   }
111 
getOrCreateBlockInfo(unsigned BlockID)112   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
113     if (const BlockInfo *BI = getBlockInfo(BlockID))
114       return *const_cast<BlockInfo*>(BI);
115 
116     // Otherwise, add a new record.
117     BlockInfoRecords.emplace_back();
118     BlockInfoRecords.back().BlockID = BlockID;
119     return BlockInfoRecords.back();
120   }
121 
122   /// Takes block info from the other bitstream reader.
123   ///
124   /// This is a "take" operation because BlockInfo records are non-trivial, and
125   /// indeed rather expensive.
takeBlockInfo(BitstreamReader && Other)126   void takeBlockInfo(BitstreamReader &&Other) {
127     assert(!hasBlockInfoRecords());
128     BlockInfoRecords = std::move(Other.BlockInfoRecords);
129   }
130 };
131 
132 /// This represents a position within a bitstream. There may be multiple
133 /// independent cursors reading within one bitstream, each maintaining their
134 /// own local state.
135 class SimpleBitstreamCursor {
136   BitstreamReader *R = nullptr;
137   size_t NextChar = 0;
138 
139   // The size of the bicode. 0 if we don't know it yet.
140   size_t Size = 0;
141 
142   /// This is the current data we have pulled from the stream but have not
143   /// returned to the client. This is specifically and intentionally defined to
144   /// follow the word size of the host machine for efficiency. We use word_t in
145   /// places that are aware of this to make it perfectly explicit what is going
146   /// on.
147 public:
148   typedef size_t word_t;
149 
150 private:
151   word_t CurWord = 0;
152 
153   /// This is the number of bits in CurWord that are valid. This is always from
154   /// [0...bits_of(size_t)-1] inclusive.
155   unsigned BitsInCurWord = 0;
156 
157 public:
158   static const size_t MaxChunkSize = sizeof(word_t) * 8;
159 
160   SimpleBitstreamCursor() = default;
161 
SimpleBitstreamCursor(BitstreamReader & R)162   explicit SimpleBitstreamCursor(BitstreamReader &R) : R(&R) {}
SimpleBitstreamCursor(BitstreamReader * R)163   explicit SimpleBitstreamCursor(BitstreamReader *R) : R(R) {}
164 
canSkipToPos(size_t pos)165   bool canSkipToPos(size_t pos) const {
166     // pos can be skipped to if it is a valid address or one byte past the end.
167     return pos == 0 ||
168            R->getBitcodeBytes().isValidAddress(static_cast<uint64_t>(pos - 1));
169   }
170 
AtEndOfStream()171   bool AtEndOfStream() {
172     if (BitsInCurWord != 0)
173       return false;
174     if (Size != 0)
175       return Size <= NextChar;
176     fillCurWord();
177     return BitsInCurWord == 0;
178   }
179 
180   /// Return the bit # of the bit we are reading.
GetCurrentBitNo()181   uint64_t GetCurrentBitNo() const {
182     return NextChar*CHAR_BIT - BitsInCurWord;
183   }
184 
185   // Return the byte # of the current bit.
getCurrentByteNo()186   uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; }
187 
getBitStreamReader()188   BitstreamReader *getBitStreamReader() { return R; }
getBitStreamReader()189   const BitstreamReader *getBitStreamReader() const { return R; }
190 
191   /// Reset the stream to the specified bit number.
JumpToBit(uint64_t BitNo)192   void JumpToBit(uint64_t BitNo) {
193     size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
194     unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
195     assert(canSkipToPos(ByteNo) && "Invalid location");
196 
197     // Move the cursor to the right word.
198     NextChar = ByteNo;
199     BitsInCurWord = 0;
200 
201     // Skip over any bits that are already consumed.
202     if (WordBitNo)
203       Read(WordBitNo);
204   }
205 
206   /// Reset the stream to the bit pointed at by the specified pointer.
207   ///
208   /// The pointer must be a dereferenceable pointer into the bytes in the
209   /// underlying memory object.
jumpToPointer(const uint8_t * Pointer)210   void jumpToPointer(const uint8_t *Pointer) {
211     auto *Pointer0 = getPointerToByte(0, 1);
212     assert((intptr_t)Pointer0 <= (intptr_t)Pointer &&
213            "Expected pointer into bitstream");
214 
215     JumpToBit(8 * (Pointer - Pointer0));
216     assert((intptr_t)getPointerToByte(getCurrentByteNo(), 1) ==
217                (intptr_t)Pointer &&
218            "Expected to reach pointer");
219   }
jumpToPointer(const char * Pointer)220   void jumpToPointer(const char *Pointer) {
221     jumpToPointer((const uint8_t *)Pointer);
222   }
223 
224   /// Get a pointer into the bitstream at the specified byte offset.
getPointerToByte(uint64_t ByteNo,uint64_t NumBytes)225   const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
226     return R->getBitcodeBytes().getPointer(ByteNo, NumBytes);
227   }
228 
229   /// Get a pointer into the bitstream at the specified bit offset.
230   ///
231   /// The bit offset must be on a byte boundary.
getPointerToBit(uint64_t BitNo,uint64_t NumBytes)232   const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) {
233     assert(!(BitNo % 8) && "Expected bit on byte boundary");
234     return getPointerToByte(BitNo / 8, NumBytes);
235   }
236 
fillCurWord()237   void fillCurWord() {
238     if (Size != 0 && NextChar >= Size)
239       report_fatal_error("Unexpected end of file");
240 
241     // Read the next word from the stream.
242     uint8_t Array[sizeof(word_t)] = {0};
243 
244     uint64_t BytesRead =
245         R->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
246 
247     // If we run out of data, stop at the end of the stream.
248     if (BytesRead == 0) {
249       CurWord = 0;
250       BitsInCurWord = 0;
251       Size = NextChar;
252       return;
253     }
254 
255     CurWord =
256         support::endian::read<word_t, support::little, support::unaligned>(
257             Array);
258     NextChar += BytesRead;
259     BitsInCurWord = BytesRead * 8;
260   }
261 
Read(unsigned NumBits)262   word_t Read(unsigned NumBits) {
263     static const unsigned BitsInWord = MaxChunkSize;
264 
265     assert(NumBits && NumBits <= BitsInWord &&
266            "Cannot return zero or more than BitsInWord bits!");
267 
268     static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
269 
270     // If the field is fully contained by CurWord, return it quickly.
271     if (BitsInCurWord >= NumBits) {
272       word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
273 
274       // Use a mask to avoid undefined behavior.
275       CurWord >>= (NumBits & Mask);
276 
277       BitsInCurWord -= NumBits;
278       return R;
279     }
280 
281     word_t R = BitsInCurWord ? CurWord : 0;
282     unsigned BitsLeft = NumBits - BitsInCurWord;
283 
284     fillCurWord();
285 
286     // If we run out of data, stop at the end of the stream.
287     if (BitsLeft > BitsInCurWord)
288       return 0;
289 
290     word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
291 
292     // Use a mask to avoid undefined behavior.
293     CurWord >>= (BitsLeft & Mask);
294 
295     BitsInCurWord -= BitsLeft;
296 
297     R |= R2 << (NumBits - BitsLeft);
298 
299     return R;
300   }
301 
ReadVBR(unsigned NumBits)302   uint32_t ReadVBR(unsigned NumBits) {
303     uint32_t Piece = Read(NumBits);
304     if ((Piece & (1U << (NumBits-1))) == 0)
305       return Piece;
306 
307     uint32_t Result = 0;
308     unsigned NextBit = 0;
309     while (1) {
310       Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
311 
312       if ((Piece & (1U << (NumBits-1))) == 0)
313         return Result;
314 
315       NextBit += NumBits-1;
316       Piece = Read(NumBits);
317     }
318   }
319 
320   // Read a VBR that may have a value up to 64-bits in size. The chunk size of
321   // the VBR must still be <= 32 bits though.
ReadVBR64(unsigned NumBits)322   uint64_t ReadVBR64(unsigned NumBits) {
323     uint32_t Piece = Read(NumBits);
324     if ((Piece & (1U << (NumBits-1))) == 0)
325       return uint64_t(Piece);
326 
327     uint64_t Result = 0;
328     unsigned NextBit = 0;
329     while (1) {
330       Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
331 
332       if ((Piece & (1U << (NumBits-1))) == 0)
333         return Result;
334 
335       NextBit += NumBits-1;
336       Piece = Read(NumBits);
337     }
338   }
339 
SkipToFourByteBoundary()340   void SkipToFourByteBoundary() {
341     // If word_t is 64-bits and if we've read less than 32 bits, just dump
342     // the bits we have up to the next 32-bit boundary.
343     if (sizeof(word_t) > 4 &&
344         BitsInCurWord >= 32) {
345       CurWord >>= BitsInCurWord-32;
346       BitsInCurWord = 32;
347       return;
348     }
349 
350     BitsInCurWord = 0;
351   }
352 
353   /// Skip to the end of the file.
skipToEnd()354   void skipToEnd() { NextChar = R->getBitcodeBytes().getExtent(); }
355 
356   /// Prevent the cursor from reading past a byte boundary.
357   ///
358   /// Prevent the cursor from requesting byte reads past \c Limit.  This is
359   /// useful when working with a cursor on a StreamingMemoryObject, when it's
360   /// desirable to avoid invalidating the result of getPointerToByte().
361   ///
362   /// If \c Limit is on a word boundary, AtEndOfStream() will return true if
363   /// the cursor position reaches or exceeds \c Limit, regardless of the true
364   /// number of available bytes.  Otherwise, AtEndOfStream() returns true when
365   /// it reaches or exceeds the next word boundary.
setArtificialByteLimit(uint64_t Limit)366   void setArtificialByteLimit(uint64_t Limit) {
367     assert(getCurrentByteNo() < Limit && "Move cursor before lowering limit");
368 
369     // Round to word boundary.
370     Limit = alignTo(Limit, sizeof(word_t));
371 
372     // Only change size if the new one is lower.
373     if (!Size || Size > Limit)
374       Size = Limit;
375   }
376 
377   /// Return the Size, if known.
getSizeIfKnown()378   uint64_t getSizeIfKnown() const { return Size; }
379 };
380 
381 /// When advancing through a bitstream cursor, each advance can discover a few
382 /// different kinds of entries:
383 struct BitstreamEntry {
384   enum {
385     Error,    // Malformed bitcode was found.
386     EndBlock, // We've reached the end of the current block, (or the end of the
387               // file, which is treated like a series of EndBlock records.
388     SubBlock, // This is the start of a new subblock of a specific ID.
389     Record    // This is a record with a specific AbbrevID.
390   } Kind;
391 
392   unsigned ID;
393 
getErrorBitstreamEntry394   static BitstreamEntry getError() {
395     BitstreamEntry E; E.Kind = Error; return E;
396   }
getEndBlockBitstreamEntry397   static BitstreamEntry getEndBlock() {
398     BitstreamEntry E; E.Kind = EndBlock; return E;
399   }
getSubBlockBitstreamEntry400   static BitstreamEntry getSubBlock(unsigned ID) {
401     BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
402   }
getRecordBitstreamEntry403   static BitstreamEntry getRecord(unsigned AbbrevID) {
404     BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
405   }
406 };
407 
408 /// This represents a position within a bitcode file, implemented on top of a
409 /// SimpleBitstreamCursor.
410 ///
411 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
412 /// be passed by value.
413 class BitstreamCursor : SimpleBitstreamCursor {
414   // This is the declared size of code values used for the current block, in
415   // bits.
416   unsigned CurCodeSize = 2;
417 
418   /// Abbrevs installed at in this block.
419   std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
420 
421   struct Block {
422     unsigned PrevCodeSize;
423     std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
BlockBlock424     explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
425   };
426 
427   /// This tracks the codesize of parent blocks.
428   SmallVector<Block, 8> BlockScope;
429 
430 
431 public:
432   static const size_t MaxChunkSize = sizeof(word_t) * 8;
433 
434   BitstreamCursor() = default;
435 
BitstreamCursor(BitstreamReader & R)436   explicit BitstreamCursor(BitstreamReader &R) { init(&R); }
437 
init(BitstreamReader * R)438   void init(BitstreamReader *R) {
439     freeState();
440     SimpleBitstreamCursor::operator=(SimpleBitstreamCursor(R));
441     CurCodeSize = 2;
442   }
443 
444   void freeState();
445 
446   using SimpleBitstreamCursor::canSkipToPos;
447   using SimpleBitstreamCursor::AtEndOfStream;
448   using SimpleBitstreamCursor::GetCurrentBitNo;
449   using SimpleBitstreamCursor::getCurrentByteNo;
450   using SimpleBitstreamCursor::getPointerToByte;
451   using SimpleBitstreamCursor::getBitStreamReader;
452   using SimpleBitstreamCursor::JumpToBit;
453   using SimpleBitstreamCursor::fillCurWord;
454   using SimpleBitstreamCursor::Read;
455   using SimpleBitstreamCursor::ReadVBR;
456   using SimpleBitstreamCursor::ReadVBR64;
457 
458   /// Return the number of bits used to encode an abbrev #.
getAbbrevIDWidth()459   unsigned getAbbrevIDWidth() const { return CurCodeSize; }
460 
461   /// Flags that modify the behavior of advance().
462   enum {
463     /// If this flag is used, the advance() method does not automatically pop
464     /// the block scope when the end of a block is reached.
465     AF_DontPopBlockAtEnd = 1,
466 
467     /// If this flag is used, abbrev entries are returned just like normal
468     /// records.
469     AF_DontAutoprocessAbbrevs = 2
470   };
471 
472   /// Advance the current bitstream, returning the next entry in the stream.
473   BitstreamEntry advance(unsigned Flags = 0) {
474     while (1) {
475       unsigned Code = ReadCode();
476       if (Code == bitc::END_BLOCK) {
477         // Pop the end of the block unless Flags tells us not to.
478         if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
479           return BitstreamEntry::getError();
480         return BitstreamEntry::getEndBlock();
481       }
482 
483       if (Code == bitc::ENTER_SUBBLOCK)
484         return BitstreamEntry::getSubBlock(ReadSubBlockID());
485 
486       if (Code == bitc::DEFINE_ABBREV &&
487           !(Flags & AF_DontAutoprocessAbbrevs)) {
488         // We read and accumulate abbrev's, the client can't do anything with
489         // them anyway.
490         ReadAbbrevRecord();
491         continue;
492       }
493 
494       return BitstreamEntry::getRecord(Code);
495     }
496   }
497 
498   /// This is a convenience function for clients that don't expect any
499   /// subblocks. This just skips over them automatically.
500   BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
501     while (1) {
502       // If we found a normal entry, return it.
503       BitstreamEntry Entry = advance(Flags);
504       if (Entry.Kind != BitstreamEntry::SubBlock)
505         return Entry;
506 
507       // If we found a sub-block, just skip over it and check the next entry.
508       if (SkipBlock())
509         return BitstreamEntry::getError();
510     }
511   }
512 
ReadCode()513   unsigned ReadCode() {
514     return Read(CurCodeSize);
515   }
516 
517 
518   // Block header:
519   //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
520 
521   /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
ReadSubBlockID()522   unsigned ReadSubBlockID() {
523     return ReadVBR(bitc::BlockIDWidth);
524   }
525 
526   /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
527   /// of this block. If the block record is malformed, return true.
SkipBlock()528   bool SkipBlock() {
529     // Read and ignore the codelen value.  Since we are skipping this block, we
530     // don't care what code widths are used inside of it.
531     ReadVBR(bitc::CodeLenWidth);
532     SkipToFourByteBoundary();
533     unsigned NumFourBytes = Read(bitc::BlockSizeWidth);
534 
535     // Check that the block wasn't partially defined, and that the offset isn't
536     // bogus.
537     size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
538     if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
539       return true;
540 
541     JumpToBit(SkipTo);
542     return false;
543   }
544 
545   /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true
546   /// if the block has an error.
547   bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
548 
ReadBlockEnd()549   bool ReadBlockEnd() {
550     if (BlockScope.empty()) return true;
551 
552     // Block tail:
553     //    [END_BLOCK, <align4bytes>]
554     SkipToFourByteBoundary();
555 
556     popBlockScope();
557     return false;
558   }
559 
560 private:
561 
popBlockScope()562   void popBlockScope() {
563     CurCodeSize = BlockScope.back().PrevCodeSize;
564 
565     CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
566     BlockScope.pop_back();
567   }
568 
569   //===--------------------------------------------------------------------===//
570   // Record Processing
571   //===--------------------------------------------------------------------===//
572 
573 public:
574   /// Return the abbreviation for the specified AbbrevId.
getAbbrev(unsigned AbbrevID)575   const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
576     unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
577     if (AbbrevNo >= CurAbbrevs.size())
578       report_fatal_error("Invalid abbrev number");
579     return CurAbbrevs[AbbrevNo].get();
580   }
581 
582   /// Read the current record and discard it.
583   void skipRecord(unsigned AbbrevID);
584 
585   unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
586                       StringRef *Blob = nullptr);
587 
588   //===--------------------------------------------------------------------===//
589   // Abbrev Processing
590   //===--------------------------------------------------------------------===//
591   void ReadAbbrevRecord();
592 
593   bool ReadBlockInfoBlock();
594 };
595 
596 } // End llvm namespace
597 
598 #endif
599