1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This header defines the BitstreamReader class. This class can be used to 11 // read an arbitrary bitstream, regardless of its contents. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_BITCODE_BITSTREAMREADER_H 16 #define LLVM_BITCODE_BITSTREAMREADER_H 17 18 #include "llvm/Bitcode/BitCodes.h" 19 #include "llvm/Support/Endian.h" 20 #include "llvm/Support/StreamingMemoryObject.h" 21 #include <climits> 22 #include <string> 23 #include <vector> 24 25 namespace llvm { 26 27 /// This class is used to read from an LLVM bitcode stream, maintaining 28 /// information that is global to decoding the entire file. While a file is 29 /// being read, multiple cursors can be independently advanced or skipped around 30 /// within the file. These are represented by the BitstreamCursor class. 31 class BitstreamReader { 32 public: 33 /// This contains information emitted to BLOCKINFO_BLOCK blocks. These 34 /// describe abbreviations that all blocks of the specified ID inherit. 35 struct BlockInfo { 36 unsigned BlockID; 37 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs; 38 std::string Name; 39 40 std::vector<std::pair<unsigned, std::string> > RecordNames; 41 }; 42 private: 43 std::unique_ptr<MemoryObject> BitcodeBytes; 44 45 std::vector<BlockInfo> BlockInfoRecords; 46 47 /// This is set to true if we don't care about the block/record name 48 /// information in the BlockInfo block. Only llvm-bcanalyzer uses this. 49 bool IgnoreBlockInfoNames; 50 51 BitstreamReader(const BitstreamReader&) = delete; 52 void operator=(const BitstreamReader&) = delete; 53 public: BitstreamReader()54 BitstreamReader() : IgnoreBlockInfoNames(true) { 55 } 56 BitstreamReader(const unsigned char * Start,const unsigned char * End)57 BitstreamReader(const unsigned char *Start, const unsigned char *End) 58 : IgnoreBlockInfoNames(true) { 59 init(Start, End); 60 } 61 BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes)62 BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes) 63 : BitcodeBytes(std::move(BitcodeBytes)), IgnoreBlockInfoNames(true) {} 64 BitstreamReader(BitstreamReader && Other)65 BitstreamReader(BitstreamReader &&Other) { 66 *this = std::move(Other); 67 } 68 69 BitstreamReader &operator=(BitstreamReader &&Other) { 70 BitcodeBytes = std::move(Other.BitcodeBytes); 71 // Explicitly swap block info, so that nothing gets destroyed twice. 72 std::swap(BlockInfoRecords, Other.BlockInfoRecords); 73 IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames; 74 return *this; 75 } 76 init(const unsigned char * Start,const unsigned char * End)77 void init(const unsigned char *Start, const unsigned char *End) { 78 assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); 79 BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End)); 80 } 81 getBitcodeBytes()82 MemoryObject &getBitcodeBytes() { return *BitcodeBytes; } 83 84 /// This is called by clients that want block/record name information. CollectBlockInfoNames()85 void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; } isIgnoringBlockInfoNames()86 bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; } 87 88 //===--------------------------------------------------------------------===// 89 // Block Manipulation 90 //===--------------------------------------------------------------------===// 91 92 /// Return true if we've already read and processed the block info block for 93 /// this Bitstream. We only process it for the first cursor that walks over 94 /// it. hasBlockInfoRecords()95 bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); } 96 97 /// If there is block info for the specified ID, return it, otherwise return 98 /// null. getBlockInfo(unsigned BlockID)99 const BlockInfo *getBlockInfo(unsigned BlockID) const { 100 // Common case, the most recent entry matches BlockID. 101 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 102 return &BlockInfoRecords.back(); 103 104 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); 105 i != e; ++i) 106 if (BlockInfoRecords[i].BlockID == BlockID) 107 return &BlockInfoRecords[i]; 108 return nullptr; 109 } 110 getOrCreateBlockInfo(unsigned BlockID)111 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 112 if (const BlockInfo *BI = getBlockInfo(BlockID)) 113 return *const_cast<BlockInfo*>(BI); 114 115 // Otherwise, add a new record. 116 BlockInfoRecords.push_back(BlockInfo()); 117 BlockInfoRecords.back().BlockID = BlockID; 118 return BlockInfoRecords.back(); 119 } 120 121 /// Takes block info from the other bitstream reader. 122 /// 123 /// This is a "take" operation because BlockInfo records are non-trivial, and 124 /// indeed rather expensive. takeBlockInfo(BitstreamReader && Other)125 void takeBlockInfo(BitstreamReader &&Other) { 126 assert(!hasBlockInfoRecords()); 127 BlockInfoRecords = std::move(Other.BlockInfoRecords); 128 } 129 }; 130 131 /// When advancing through a bitstream cursor, each advance can discover a few 132 /// different kinds of entries: 133 struct BitstreamEntry { 134 enum { 135 Error, // Malformed bitcode was found. 136 EndBlock, // We've reached the end of the current block, (or the end of the 137 // file, which is treated like a series of EndBlock records. 138 SubBlock, // This is the start of a new subblock of a specific ID. 139 Record // This is a record with a specific AbbrevID. 140 } Kind; 141 142 unsigned ID; 143 getErrorBitstreamEntry144 static BitstreamEntry getError() { 145 BitstreamEntry E; E.Kind = Error; return E; 146 } getEndBlockBitstreamEntry147 static BitstreamEntry getEndBlock() { 148 BitstreamEntry E; E.Kind = EndBlock; return E; 149 } getSubBlockBitstreamEntry150 static BitstreamEntry getSubBlock(unsigned ID) { 151 BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; 152 } getRecordBitstreamEntry153 static BitstreamEntry getRecord(unsigned AbbrevID) { 154 BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; 155 } 156 }; 157 158 /// This represents a position within a bitcode file. There may be multiple 159 /// independent cursors reading within one bitstream, each maintaining their own 160 /// local state. 161 /// 162 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not 163 /// be passed by value. 164 class BitstreamCursor { 165 BitstreamReader *BitStream; 166 size_t NextChar; 167 168 // The size of the bicode. 0 if we don't know it yet. 169 size_t Size; 170 171 /// This is the current data we have pulled from the stream but have not 172 /// returned to the client. This is specifically and intentionally defined to 173 /// follow the word size of the host machine for efficiency. We use word_t in 174 /// places that are aware of this to make it perfectly explicit what is going 175 /// on. 176 typedef size_t word_t; 177 word_t CurWord; 178 179 /// This is the number of bits in CurWord that are valid. This is always from 180 /// [0...bits_of(size_t)-1] inclusive. 181 unsigned BitsInCurWord; 182 183 // This is the declared size of code values used for the current block, in 184 // bits. 185 unsigned CurCodeSize; 186 187 /// Abbrevs installed at in this block. 188 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs; 189 190 struct Block { 191 unsigned PrevCodeSize; 192 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs; BlockBlock193 explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} 194 }; 195 196 /// This tracks the codesize of parent blocks. 197 SmallVector<Block, 8> BlockScope; 198 199 200 public: BitstreamCursor()201 BitstreamCursor() { init(nullptr); } 202 BitstreamCursor(BitstreamReader & R)203 explicit BitstreamCursor(BitstreamReader &R) { init(&R); } 204 init(BitstreamReader * R)205 void init(BitstreamReader *R) { 206 freeState(); 207 208 BitStream = R; 209 NextChar = 0; 210 Size = 0; 211 BitsInCurWord = 0; 212 CurCodeSize = 2; 213 } 214 215 void freeState(); 216 canSkipToPos(size_t pos)217 bool canSkipToPos(size_t pos) const { 218 // pos can be skipped to if it is a valid address or one byte past the end. 219 return pos == 0 || BitStream->getBitcodeBytes().isValidAddress( 220 static_cast<uint64_t>(pos - 1)); 221 } 222 AtEndOfStream()223 bool AtEndOfStream() { 224 if (BitsInCurWord != 0) 225 return false; 226 if (Size != 0) 227 return Size == NextChar; 228 fillCurWord(); 229 return BitsInCurWord == 0; 230 } 231 232 /// Return the number of bits used to encode an abbrev #. getAbbrevIDWidth()233 unsigned getAbbrevIDWidth() const { return CurCodeSize; } 234 235 /// Return the bit # of the bit we are reading. GetCurrentBitNo()236 uint64_t GetCurrentBitNo() const { 237 return NextChar*CHAR_BIT - BitsInCurWord; 238 } 239 getBitStreamReader()240 BitstreamReader *getBitStreamReader() { 241 return BitStream; 242 } getBitStreamReader()243 const BitstreamReader *getBitStreamReader() const { 244 return BitStream; 245 } 246 247 /// Flags that modify the behavior of advance(). 248 enum { 249 /// If this flag is used, the advance() method does not automatically pop 250 /// the block scope when the end of a block is reached. 251 AF_DontPopBlockAtEnd = 1, 252 253 /// If this flag is used, abbrev entries are returned just like normal 254 /// records. 255 AF_DontAutoprocessAbbrevs = 2 256 }; 257 258 /// Advance the current bitstream, returning the next entry in the stream. 259 BitstreamEntry advance(unsigned Flags = 0) { 260 while (1) { 261 unsigned Code = ReadCode(); 262 if (Code == bitc::END_BLOCK) { 263 // Pop the end of the block unless Flags tells us not to. 264 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) 265 return BitstreamEntry::getError(); 266 return BitstreamEntry::getEndBlock(); 267 } 268 269 if (Code == bitc::ENTER_SUBBLOCK) 270 return BitstreamEntry::getSubBlock(ReadSubBlockID()); 271 272 if (Code == bitc::DEFINE_ABBREV && 273 !(Flags & AF_DontAutoprocessAbbrevs)) { 274 // We read and accumulate abbrev's, the client can't do anything with 275 // them anyway. 276 ReadAbbrevRecord(); 277 continue; 278 } 279 280 return BitstreamEntry::getRecord(Code); 281 } 282 } 283 284 /// This is a convenience function for clients that don't expect any 285 /// subblocks. This just skips over them automatically. 286 BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { 287 while (1) { 288 // If we found a normal entry, return it. 289 BitstreamEntry Entry = advance(Flags); 290 if (Entry.Kind != BitstreamEntry::SubBlock) 291 return Entry; 292 293 // If we found a sub-block, just skip over it and check the next entry. 294 if (SkipBlock()) 295 return BitstreamEntry::getError(); 296 } 297 } 298 299 /// Reset the stream to the specified bit number. JumpToBit(uint64_t BitNo)300 void JumpToBit(uint64_t BitNo) { 301 size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1); 302 unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); 303 assert(canSkipToPos(ByteNo) && "Invalid location"); 304 305 // Move the cursor to the right word. 306 NextChar = ByteNo; 307 BitsInCurWord = 0; 308 309 // Skip over any bits that are already consumed. 310 if (WordBitNo) 311 Read(WordBitNo); 312 } 313 fillCurWord()314 void fillCurWord() { 315 if (Size != 0 && NextChar >= Size) 316 report_fatal_error("Unexpected end of file"); 317 318 // Read the next word from the stream. 319 uint8_t Array[sizeof(word_t)] = {0}; 320 321 uint64_t BytesRead = 322 BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar); 323 324 // If we run out of data, stop at the end of the stream. 325 if (BytesRead == 0) { 326 Size = NextChar; 327 return; 328 } 329 330 CurWord = 331 support::endian::read<word_t, support::little, support::unaligned>( 332 Array); 333 NextChar += BytesRead; 334 BitsInCurWord = BytesRead * 8; 335 } 336 Read(unsigned NumBits)337 word_t Read(unsigned NumBits) { 338 static const unsigned BitsInWord = sizeof(word_t) * 8; 339 340 assert(NumBits && NumBits <= BitsInWord && 341 "Cannot return zero or more than BitsInWord bits!"); 342 343 static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; 344 345 // If the field is fully contained by CurWord, return it quickly. 346 if (BitsInCurWord >= NumBits) { 347 word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); 348 349 // Use a mask to avoid undefined behavior. 350 CurWord >>= (NumBits & Mask); 351 352 BitsInCurWord -= NumBits; 353 return R; 354 } 355 356 word_t R = BitsInCurWord ? CurWord : 0; 357 unsigned BitsLeft = NumBits - BitsInCurWord; 358 359 fillCurWord(); 360 361 // If we run out of data, stop at the end of the stream. 362 if (BitsLeft > BitsInCurWord) 363 return 0; 364 365 word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); 366 367 // Use a mask to avoid undefined behavior. 368 CurWord >>= (BitsLeft & Mask); 369 370 BitsInCurWord -= BitsLeft; 371 372 R |= R2 << (NumBits - BitsLeft); 373 374 return R; 375 } 376 ReadVBR(unsigned NumBits)377 uint32_t ReadVBR(unsigned NumBits) { 378 uint32_t Piece = Read(NumBits); 379 if ((Piece & (1U << (NumBits-1))) == 0) 380 return Piece; 381 382 uint32_t Result = 0; 383 unsigned NextBit = 0; 384 while (1) { 385 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; 386 387 if ((Piece & (1U << (NumBits-1))) == 0) 388 return Result; 389 390 NextBit += NumBits-1; 391 Piece = Read(NumBits); 392 } 393 } 394 395 // Read a VBR that may have a value up to 64-bits in size. The chunk size of 396 // the VBR must still be <= 32 bits though. ReadVBR64(unsigned NumBits)397 uint64_t ReadVBR64(unsigned NumBits) { 398 uint32_t Piece = Read(NumBits); 399 if ((Piece & (1U << (NumBits-1))) == 0) 400 return uint64_t(Piece); 401 402 uint64_t Result = 0; 403 unsigned NextBit = 0; 404 while (1) { 405 Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; 406 407 if ((Piece & (1U << (NumBits-1))) == 0) 408 return Result; 409 410 NextBit += NumBits-1; 411 Piece = Read(NumBits); 412 } 413 } 414 415 private: SkipToFourByteBoundary()416 void SkipToFourByteBoundary() { 417 // If word_t is 64-bits and if we've read less than 32 bits, just dump 418 // the bits we have up to the next 32-bit boundary. 419 if (sizeof(word_t) > 4 && 420 BitsInCurWord >= 32) { 421 CurWord >>= BitsInCurWord-32; 422 BitsInCurWord = 32; 423 return; 424 } 425 426 BitsInCurWord = 0; 427 } 428 public: 429 ReadCode()430 unsigned ReadCode() { 431 return Read(CurCodeSize); 432 } 433 434 435 // Block header: 436 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 437 438 /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. ReadSubBlockID()439 unsigned ReadSubBlockID() { 440 return ReadVBR(bitc::BlockIDWidth); 441 } 442 443 /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body 444 /// of this block. If the block record is malformed, return true. SkipBlock()445 bool SkipBlock() { 446 // Read and ignore the codelen value. Since we are skipping this block, we 447 // don't care what code widths are used inside of it. 448 ReadVBR(bitc::CodeLenWidth); 449 SkipToFourByteBoundary(); 450 unsigned NumFourBytes = Read(bitc::BlockSizeWidth); 451 452 // Check that the block wasn't partially defined, and that the offset isn't 453 // bogus. 454 size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8; 455 if (AtEndOfStream() || !canSkipToPos(SkipTo/8)) 456 return true; 457 458 JumpToBit(SkipTo); 459 return false; 460 } 461 462 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true 463 /// if the block has an error. 464 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); 465 ReadBlockEnd()466 bool ReadBlockEnd() { 467 if (BlockScope.empty()) return true; 468 469 // Block tail: 470 // [END_BLOCK, <align4bytes>] 471 SkipToFourByteBoundary(); 472 473 popBlockScope(); 474 return false; 475 } 476 477 private: 478 popBlockScope()479 void popBlockScope() { 480 CurCodeSize = BlockScope.back().PrevCodeSize; 481 482 CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs); 483 BlockScope.pop_back(); 484 } 485 486 //===--------------------------------------------------------------------===// 487 // Record Processing 488 //===--------------------------------------------------------------------===// 489 490 public: 491 /// Return the abbreviation for the specified AbbrevId. getAbbrev(unsigned AbbrevID)492 const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) { 493 unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV; 494 if (AbbrevNo >= CurAbbrevs.size()) 495 report_fatal_error("Invalid abbrev number"); 496 return CurAbbrevs[AbbrevNo].get(); 497 } 498 499 /// Read the current record and discard it. 500 void skipRecord(unsigned AbbrevID); 501 502 unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals, 503 StringRef *Blob = nullptr); 504 505 //===--------------------------------------------------------------------===// 506 // Abbrev Processing 507 //===--------------------------------------------------------------------===// 508 void ReadAbbrevRecord(); 509 510 bool ReadBlockInfoBlock(); 511 }; 512 513 } // End llvm namespace 514 515 #endif 516