1 //===-- llvm-bcanalyzer.cpp - Bitcode Analyzer --------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tool may be invoked in the following manner:
11 //  llvm-bcanalyzer [options]      - Read LLVM bitcode from stdin
12 //  llvm-bcanalyzer [options] x.bc - Read LLVM bitcode from the x.bc file
13 //
14 //  Options:
15 //      --help      - Output information about command line switches
16 //      --dump      - Dump low-level bitcode structure in readable format
17 //
18 // This tool provides analytical information about a bitcode file. It is
19 // intended as an aid to developers of bitcode reading and writing software. It
20 // produces on std::out a summary of the bitcode file that shows various
21 // statistics about the contents of the file. By default this information is
22 // detailed and contains information about individual bitcode blocks and the
23 // functions in the module.
24 // The tool is also able to print a bitcode file in a straight forward text
25 // format that shows the containment and relationships of the information in
26 // the bitcode file (-dump option).
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitstreamReader.h"
32 #include "llvm/Bitcode/LLVMBitCodes.h"
33 #include "llvm/Bitcode/ReaderWriter.h"
34 #include "llvm/IR/Verifier.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Format.h"
37 #include "llvm/Support/ManagedStatic.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/PrettyStackTrace.h"
40 #include "llvm/Support/SHA1.h"
41 #include "llvm/Support/Signals.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <algorithm>
44 #include <cctype>
45 #include <map>
46 #include <system_error>
47 using namespace llvm;
48 
49 static cl::opt<std::string>
50   InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
51 
52 static cl::opt<bool> Dump("dump", cl::desc("Dump low level bitcode trace"));
53 
54 //===----------------------------------------------------------------------===//
55 // Bitcode specific analysis.
56 //===----------------------------------------------------------------------===//
57 
58 static cl::opt<bool> NoHistogram("disable-histogram",
59                                  cl::desc("Do not print per-code histogram"));
60 
61 static cl::opt<bool>
62 NonSymbolic("non-symbolic",
63             cl::desc("Emit numeric info in dump even if"
64                      " symbolic info is available"));
65 
66 static cl::opt<std::string>
67   BlockInfoFilename("block-info",
68                     cl::desc("Use the BLOCK_INFO from the given file"));
69 
70 static cl::opt<bool>
71   ShowBinaryBlobs("show-binary-blobs",
72                   cl::desc("Print binary blobs using hex escapes"));
73 
74 namespace {
75 
76 /// CurStreamTypeType - A type for CurStreamType
77 enum CurStreamTypeType {
78   UnknownBitstream,
79   LLVMIRBitstream
80 };
81 
82 }
83 
84 /// GetBlockName - Return a symbolic block name if known, otherwise return
85 /// null.
GetBlockName(unsigned BlockID,const BitstreamReader & StreamFile,CurStreamTypeType CurStreamType)86 static const char *GetBlockName(unsigned BlockID,
87                                 const BitstreamReader &StreamFile,
88                                 CurStreamTypeType CurStreamType) {
89   // Standard blocks for all bitcode files.
90   if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
91     if (BlockID == bitc::BLOCKINFO_BLOCK_ID)
92       return "BLOCKINFO_BLOCK";
93     return nullptr;
94   }
95 
96   // Check to see if we have a blockinfo record for this block, with a name.
97   if (const BitstreamReader::BlockInfo *Info =
98         StreamFile.getBlockInfo(BlockID)) {
99     if (!Info->Name.empty())
100       return Info->Name.c_str();
101   }
102 
103 
104   if (CurStreamType != LLVMIRBitstream) return nullptr;
105 
106   switch (BlockID) {
107   default:                                 return nullptr;
108   case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: return "OPERAND_BUNDLE_TAGS_BLOCK";
109   case bitc::MODULE_BLOCK_ID:              return "MODULE_BLOCK";
110   case bitc::PARAMATTR_BLOCK_ID:           return "PARAMATTR_BLOCK";
111   case bitc::PARAMATTR_GROUP_BLOCK_ID:     return "PARAMATTR_GROUP_BLOCK_ID";
112   case bitc::TYPE_BLOCK_ID_NEW:            return "TYPE_BLOCK_ID";
113   case bitc::CONSTANTS_BLOCK_ID:           return "CONSTANTS_BLOCK";
114   case bitc::FUNCTION_BLOCK_ID:            return "FUNCTION_BLOCK";
115   case bitc::IDENTIFICATION_BLOCK_ID:
116                                            return "IDENTIFICATION_BLOCK_ID";
117   case bitc::VALUE_SYMTAB_BLOCK_ID:        return "VALUE_SYMTAB";
118   case bitc::METADATA_BLOCK_ID:            return "METADATA_BLOCK";
119   case bitc::METADATA_KIND_BLOCK_ID:       return "METADATA_KIND_BLOCK";
120   case bitc::METADATA_ATTACHMENT_ID:       return "METADATA_ATTACHMENT_BLOCK";
121   case bitc::USELIST_BLOCK_ID:             return "USELIST_BLOCK_ID";
122   case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
123                                            return "GLOBALVAL_SUMMARY_BLOCK";
124   case bitc::MODULE_STRTAB_BLOCK_ID:       return "MODULE_STRTAB_BLOCK";
125   }
126 }
127 
128 /// GetCodeName - Return a symbolic code name if known, otherwise return
129 /// null.
GetCodeName(unsigned CodeID,unsigned BlockID,const BitstreamReader & StreamFile,CurStreamTypeType CurStreamType)130 static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
131                                const BitstreamReader &StreamFile,
132                                CurStreamTypeType CurStreamType) {
133   // Standard blocks for all bitcode files.
134   if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
135     if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
136       switch (CodeID) {
137       default: return nullptr;
138       case bitc::BLOCKINFO_CODE_SETBID:        return "SETBID";
139       case bitc::BLOCKINFO_CODE_BLOCKNAME:     return "BLOCKNAME";
140       case bitc::BLOCKINFO_CODE_SETRECORDNAME: return "SETRECORDNAME";
141       }
142     }
143     return nullptr;
144   }
145 
146   // Check to see if we have a blockinfo record for this record, with a name.
147   if (const BitstreamReader::BlockInfo *Info =
148         StreamFile.getBlockInfo(BlockID)) {
149     for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i)
150       if (Info->RecordNames[i].first == CodeID)
151         return Info->RecordNames[i].second.c_str();
152   }
153 
154 
155   if (CurStreamType != LLVMIRBitstream) return nullptr;
156 
157 #define STRINGIFY_CODE(PREFIX, CODE)                                           \
158   case bitc::PREFIX##_##CODE:                                                  \
159     return #CODE;
160   switch (BlockID) {
161   default: return nullptr;
162   case bitc::MODULE_BLOCK_ID:
163     switch (CodeID) {
164     default: return nullptr;
165       STRINGIFY_CODE(MODULE_CODE, VERSION)
166       STRINGIFY_CODE(MODULE_CODE, TRIPLE)
167       STRINGIFY_CODE(MODULE_CODE, DATALAYOUT)
168       STRINGIFY_CODE(MODULE_CODE, ASM)
169       STRINGIFY_CODE(MODULE_CODE, SECTIONNAME)
170       STRINGIFY_CODE(MODULE_CODE, DEPLIB) // FIXME: Remove in 4.0
171       STRINGIFY_CODE(MODULE_CODE, GLOBALVAR)
172       STRINGIFY_CODE(MODULE_CODE, FUNCTION)
173       STRINGIFY_CODE(MODULE_CODE, ALIAS)
174       STRINGIFY_CODE(MODULE_CODE, PURGEVALS)
175       STRINGIFY_CODE(MODULE_CODE, GCNAME)
176       STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
177       STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
178       STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
179       STRINGIFY_CODE(MODULE_CODE, HASH)
180     }
181   case bitc::IDENTIFICATION_BLOCK_ID:
182     switch (CodeID) {
183     default:
184       return nullptr;
185       STRINGIFY_CODE(IDENTIFICATION_CODE, STRING)
186       STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH)
187     }
188   case bitc::PARAMATTR_BLOCK_ID:
189     switch (CodeID) {
190     default: return nullptr;
191     // FIXME: Should these be different?
192     case bitc::PARAMATTR_CODE_ENTRY_OLD: return "ENTRY";
193     case bitc::PARAMATTR_CODE_ENTRY:     return "ENTRY";
194     }
195   case bitc::PARAMATTR_GROUP_BLOCK_ID:
196     switch (CodeID) {
197     default: return nullptr;
198     case bitc::PARAMATTR_GRP_CODE_ENTRY: return "ENTRY";
199     }
200   case bitc::TYPE_BLOCK_ID_NEW:
201     switch (CodeID) {
202     default: return nullptr;
203       STRINGIFY_CODE(TYPE_CODE, NUMENTRY)
204       STRINGIFY_CODE(TYPE_CODE, VOID)
205       STRINGIFY_CODE(TYPE_CODE, FLOAT)
206       STRINGIFY_CODE(TYPE_CODE, DOUBLE)
207       STRINGIFY_CODE(TYPE_CODE, LABEL)
208       STRINGIFY_CODE(TYPE_CODE, OPAQUE)
209       STRINGIFY_CODE(TYPE_CODE, INTEGER)
210       STRINGIFY_CODE(TYPE_CODE, POINTER)
211       STRINGIFY_CODE(TYPE_CODE, ARRAY)
212       STRINGIFY_CODE(TYPE_CODE, VECTOR)
213       STRINGIFY_CODE(TYPE_CODE, X86_FP80)
214       STRINGIFY_CODE(TYPE_CODE, FP128)
215       STRINGIFY_CODE(TYPE_CODE, PPC_FP128)
216       STRINGIFY_CODE(TYPE_CODE, METADATA)
217       STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON)
218       STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME)
219       STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED)
220       STRINGIFY_CODE(TYPE_CODE, FUNCTION)
221     }
222 
223   case bitc::CONSTANTS_BLOCK_ID:
224     switch (CodeID) {
225     default: return nullptr;
226       STRINGIFY_CODE(CST_CODE, SETTYPE)
227       STRINGIFY_CODE(CST_CODE, NULL)
228       STRINGIFY_CODE(CST_CODE, UNDEF)
229       STRINGIFY_CODE(CST_CODE, INTEGER)
230       STRINGIFY_CODE(CST_CODE, WIDE_INTEGER)
231       STRINGIFY_CODE(CST_CODE, FLOAT)
232       STRINGIFY_CODE(CST_CODE, AGGREGATE)
233       STRINGIFY_CODE(CST_CODE, STRING)
234       STRINGIFY_CODE(CST_CODE, CSTRING)
235       STRINGIFY_CODE(CST_CODE, CE_BINOP)
236       STRINGIFY_CODE(CST_CODE, CE_CAST)
237       STRINGIFY_CODE(CST_CODE, CE_GEP)
238       STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP)
239       STRINGIFY_CODE(CST_CODE, CE_SELECT)
240       STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT)
241       STRINGIFY_CODE(CST_CODE, CE_INSERTELT)
242       STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC)
243       STRINGIFY_CODE(CST_CODE, CE_CMP)
244       STRINGIFY_CODE(CST_CODE, INLINEASM)
245       STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX)
246     case bitc::CST_CODE_BLOCKADDRESS:    return "CST_CODE_BLOCKADDRESS";
247       STRINGIFY_CODE(CST_CODE, DATA)
248     }
249   case bitc::FUNCTION_BLOCK_ID:
250     switch (CodeID) {
251     default: return nullptr;
252       STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS)
253       STRINGIFY_CODE(FUNC_CODE, INST_BINOP)
254       STRINGIFY_CODE(FUNC_CODE, INST_CAST)
255       STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD)
256       STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD)
257       STRINGIFY_CODE(FUNC_CODE, INST_SELECT)
258       STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT)
259       STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT)
260       STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC)
261       STRINGIFY_CODE(FUNC_CODE, INST_CMP)
262       STRINGIFY_CODE(FUNC_CODE, INST_RET)
263       STRINGIFY_CODE(FUNC_CODE, INST_BR)
264       STRINGIFY_CODE(FUNC_CODE, INST_SWITCH)
265       STRINGIFY_CODE(FUNC_CODE, INST_INVOKE)
266       STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE)
267       STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET)
268       STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET)
269       STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD)
270       STRINGIFY_CODE(FUNC_CODE, INST_PHI)
271       STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA)
272       STRINGIFY_CODE(FUNC_CODE, INST_LOAD)
273       STRINGIFY_CODE(FUNC_CODE, INST_VAARG)
274       STRINGIFY_CODE(FUNC_CODE, INST_STORE)
275       STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL)
276       STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL)
277       STRINGIFY_CODE(FUNC_CODE, INST_CMP2)
278       STRINGIFY_CODE(FUNC_CODE, INST_VSELECT)
279       STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN)
280       STRINGIFY_CODE(FUNC_CODE, INST_CALL)
281       STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC)
282       STRINGIFY_CODE(FUNC_CODE, INST_GEP)
283       STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE)
284     }
285   case bitc::VALUE_SYMTAB_BLOCK_ID:
286     switch (CodeID) {
287     default: return nullptr;
288     STRINGIFY_CODE(VST_CODE, ENTRY)
289     STRINGIFY_CODE(VST_CODE, BBENTRY)
290     STRINGIFY_CODE(VST_CODE, FNENTRY)
291     STRINGIFY_CODE(VST_CODE, COMBINED_ENTRY)
292     }
293   case bitc::MODULE_STRTAB_BLOCK_ID:
294     switch (CodeID) {
295     default:
296       return nullptr;
297       STRINGIFY_CODE(MST_CODE, ENTRY)
298       STRINGIFY_CODE(MST_CODE, HASH)
299     }
300   case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
301     switch (CodeID) {
302     default:
303       return nullptr;
304       STRINGIFY_CODE(FS, PERMODULE)
305       STRINGIFY_CODE(FS, PERMODULE_PROFILE)
306       STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS)
307       STRINGIFY_CODE(FS, COMBINED)
308       STRINGIFY_CODE(FS, COMBINED_PROFILE)
309       STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS)
310       STRINGIFY_CODE(FS, ALIAS)
311       STRINGIFY_CODE(FS, COMBINED_ALIAS)
312       STRINGIFY_CODE(FS, COMBINED_ORIGINAL_NAME)
313       STRINGIFY_CODE(FS, VERSION)
314     }
315   case bitc::METADATA_ATTACHMENT_ID:
316     switch(CodeID) {
317     default:return nullptr;
318       STRINGIFY_CODE(METADATA, ATTACHMENT)
319     }
320   case bitc::METADATA_BLOCK_ID:
321     switch(CodeID) {
322     default:return nullptr;
323       STRINGIFY_CODE(METADATA, STRING_OLD)
324       STRINGIFY_CODE(METADATA, STRINGS)
325       STRINGIFY_CODE(METADATA, NAME)
326       STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
327       STRINGIFY_CODE(METADATA, NODE)
328       STRINGIFY_CODE(METADATA, VALUE)
329       STRINGIFY_CODE(METADATA, OLD_NODE)
330       STRINGIFY_CODE(METADATA, OLD_FN_NODE)
331       STRINGIFY_CODE(METADATA, NAMED_NODE)
332       STRINGIFY_CODE(METADATA, DISTINCT_NODE)
333       STRINGIFY_CODE(METADATA, LOCATION)
334       STRINGIFY_CODE(METADATA, GENERIC_DEBUG)
335       STRINGIFY_CODE(METADATA, SUBRANGE)
336       STRINGIFY_CODE(METADATA, ENUMERATOR)
337       STRINGIFY_CODE(METADATA, BASIC_TYPE)
338       STRINGIFY_CODE(METADATA, FILE)
339       STRINGIFY_CODE(METADATA, DERIVED_TYPE)
340       STRINGIFY_CODE(METADATA, COMPOSITE_TYPE)
341       STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE)
342       STRINGIFY_CODE(METADATA, COMPILE_UNIT)
343       STRINGIFY_CODE(METADATA, SUBPROGRAM)
344       STRINGIFY_CODE(METADATA, LEXICAL_BLOCK)
345       STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE)
346       STRINGIFY_CODE(METADATA, NAMESPACE)
347       STRINGIFY_CODE(METADATA, TEMPLATE_TYPE)
348       STRINGIFY_CODE(METADATA, TEMPLATE_VALUE)
349       STRINGIFY_CODE(METADATA, GLOBAL_VAR)
350       STRINGIFY_CODE(METADATA, LOCAL_VAR)
351       STRINGIFY_CODE(METADATA, EXPRESSION)
352       STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
353       STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
354       STRINGIFY_CODE(METADATA, MODULE)
355     }
356   case bitc::METADATA_KIND_BLOCK_ID:
357     switch (CodeID) {
358     default:
359       return nullptr;
360       STRINGIFY_CODE(METADATA, KIND)
361     }
362   case bitc::USELIST_BLOCK_ID:
363     switch(CodeID) {
364     default:return nullptr;
365     case bitc::USELIST_CODE_DEFAULT: return "USELIST_CODE_DEFAULT";
366     case bitc::USELIST_CODE_BB:      return "USELIST_CODE_BB";
367     }
368 
369   case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
370     switch(CodeID) {
371     default: return nullptr;
372     case bitc::OPERAND_BUNDLE_TAG: return "OPERAND_BUNDLE_TAG";
373     }
374   }
375 #undef STRINGIFY_CODE
376 }
377 
378 struct PerRecordStats {
379   unsigned NumInstances;
380   unsigned NumAbbrev;
381   uint64_t TotalBits;
382 
PerRecordStatsPerRecordStats383   PerRecordStats() : NumInstances(0), NumAbbrev(0), TotalBits(0) {}
384 };
385 
386 struct PerBlockIDStats {
387   /// NumInstances - This the number of times this block ID has been seen.
388   unsigned NumInstances;
389 
390   /// NumBits - The total size in bits of all of these blocks.
391   uint64_t NumBits;
392 
393   /// NumSubBlocks - The total number of blocks these blocks contain.
394   unsigned NumSubBlocks;
395 
396   /// NumAbbrevs - The total number of abbreviations.
397   unsigned NumAbbrevs;
398 
399   /// NumRecords - The total number of records these blocks contain, and the
400   /// number that are abbreviated.
401   unsigned NumRecords, NumAbbreviatedRecords;
402 
403   /// CodeFreq - Keep track of the number of times we see each code.
404   std::vector<PerRecordStats> CodeFreq;
405 
PerBlockIDStatsPerBlockIDStats406   PerBlockIDStats()
407     : NumInstances(0), NumBits(0),
408       NumSubBlocks(0), NumAbbrevs(0), NumRecords(0), NumAbbreviatedRecords(0) {}
409 };
410 
411 static std::map<unsigned, PerBlockIDStats> BlockIDStats;
412 
413 
414 
415 /// Error - All bitcode analysis errors go through this function, making this a
416 /// good place to breakpoint if debugging.
Error(const Twine & Err)417 static bool Error(const Twine &Err) {
418   errs() << Err << "\n";
419   return true;
420 }
421 
decodeMetadataStringsBlob(BitstreamReader & Reader,StringRef Indent,ArrayRef<uint64_t> Record,StringRef Blob)422 static bool decodeMetadataStringsBlob(BitstreamReader &Reader, StringRef Indent,
423                                       ArrayRef<uint64_t> Record,
424                                       StringRef Blob) {
425   if (Blob.empty())
426     return true;
427 
428   if (Record.size() != 2)
429     return true;
430 
431   unsigned NumStrings = Record[0];
432   unsigned StringsOffset = Record[1];
433   outs() << " num-strings = " << NumStrings << " {\n";
434 
435   StringRef Lengths = Blob.slice(0, StringsOffset);
436   SimpleBitstreamCursor R(Reader);
437   R.jumpToPointer(Lengths.begin());
438 
439   // Ensure that Blob doesn't get invalidated, even if this is reading from a
440   // StreamingMemoryObject with corrupt data.
441   R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset);
442 
443   StringRef Strings = Blob.drop_front(StringsOffset);
444   do {
445     if (R.AtEndOfStream())
446       return Error("bad length");
447 
448     unsigned Size = R.ReadVBR(6);
449     if (Strings.size() < Size)
450       return Error("truncated chars");
451 
452     outs() << Indent << "    '";
453     outs().write_escaped(Strings.slice(0, Size), /*hex=*/true);
454     outs() << "'\n";
455     Strings = Strings.drop_front(Size);
456   } while (--NumStrings);
457 
458   outs() << Indent << "  }";
459   return false;
460 }
461 
decodeBlob(unsigned Code,unsigned BlockID,BitstreamReader & Reader,StringRef Indent,ArrayRef<uint64_t> Record,StringRef Blob)462 static bool decodeBlob(unsigned Code, unsigned BlockID, BitstreamReader &Reader,
463                        StringRef Indent, ArrayRef<uint64_t> Record,
464                        StringRef Blob) {
465   if (BlockID != bitc::METADATA_BLOCK_ID)
466     return true;
467   if (Code != bitc::METADATA_STRINGS)
468     return true;
469 
470   return decodeMetadataStringsBlob(Reader, Indent, Record, Blob);
471 }
472 
473 /// ParseBlock - Read a block, updating statistics, etc.
ParseBlock(BitstreamCursor & Stream,unsigned BlockID,unsigned IndentLevel,CurStreamTypeType CurStreamType)474 static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
475                        unsigned IndentLevel, CurStreamTypeType CurStreamType) {
476   std::string Indent(IndentLevel*2, ' ');
477   uint64_t BlockBitStart = Stream.GetCurrentBitNo();
478 
479   // Get the statistics for this BlockID.
480   PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
481 
482   BlockStats.NumInstances++;
483 
484   // BLOCKINFO is a special part of the stream.
485   bool DumpRecords = Dump;
486   if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
487     if (Dump) outs() << Indent << "<BLOCKINFO_BLOCK/>\n";
488     if (BitstreamCursor(Stream).ReadBlockInfoBlock())
489       return Error("Malformed BlockInfoBlock");
490     // It's not really interesting to dump the contents of the blockinfo block.
491     DumpRecords = false;
492   }
493 
494   unsigned NumWords = 0;
495   if (Stream.EnterSubBlock(BlockID, &NumWords))
496     return Error("Malformed block record");
497 
498   // Keep it for later, when we see a MODULE_HASH record
499   uint64_t BlockEntryPos = Stream.getCurrentByteNo();
500 
501   const char *BlockName = nullptr;
502   if (DumpRecords) {
503     outs() << Indent << "<";
504     if ((BlockName = GetBlockName(BlockID, *Stream.getBitStreamReader(),
505                                   CurStreamType)))
506       outs() << BlockName;
507     else
508       outs() << "UnknownBlock" << BlockID;
509 
510     if (NonSymbolic && BlockName)
511       outs() << " BlockID=" << BlockID;
512 
513     outs() << " NumWords=" << NumWords
514            << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n";
515   }
516 
517   SmallVector<uint64_t, 64> Record;
518 
519   // Read all the records for this block.
520   while (1) {
521     if (Stream.AtEndOfStream())
522       return Error("Premature end of bitstream");
523 
524     uint64_t RecordStartBit = Stream.GetCurrentBitNo();
525 
526     BitstreamEntry Entry =
527       Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
528 
529     switch (Entry.Kind) {
530     case BitstreamEntry::Error:
531       return Error("malformed bitcode file");
532     case BitstreamEntry::EndBlock: {
533       uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
534       BlockStats.NumBits += BlockBitEnd-BlockBitStart;
535       if (DumpRecords) {
536         outs() << Indent << "</";
537         if (BlockName)
538           outs() << BlockName << ">\n";
539         else
540           outs() << "UnknownBlock" << BlockID << ">\n";
541       }
542       return false;
543     }
544 
545     case BitstreamEntry::SubBlock: {
546       uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
547       if (ParseBlock(Stream, Entry.ID, IndentLevel+1, CurStreamType))
548         return true;
549       ++BlockStats.NumSubBlocks;
550       uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
551 
552       // Don't include subblock sizes in the size of this block.
553       BlockBitStart += SubBlockBitEnd-SubBlockBitStart;
554       continue;
555     }
556     case BitstreamEntry::Record:
557       // The interesting case.
558       break;
559     }
560 
561     if (Entry.ID == bitc::DEFINE_ABBREV) {
562       Stream.ReadAbbrevRecord();
563       ++BlockStats.NumAbbrevs;
564       continue;
565     }
566 
567     Record.clear();
568 
569     ++BlockStats.NumRecords;
570 
571     StringRef Blob;
572     unsigned CurrentRecordPos = Stream.getCurrentByteNo();
573     unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
574 
575     // Increment the # occurrences of this code.
576     if (BlockStats.CodeFreq.size() <= Code)
577       BlockStats.CodeFreq.resize(Code+1);
578     BlockStats.CodeFreq[Code].NumInstances++;
579     BlockStats.CodeFreq[Code].TotalBits +=
580       Stream.GetCurrentBitNo()-RecordStartBit;
581     if (Entry.ID != bitc::UNABBREV_RECORD) {
582       BlockStats.CodeFreq[Code].NumAbbrev++;
583       ++BlockStats.NumAbbreviatedRecords;
584     }
585 
586     if (DumpRecords) {
587       outs() << Indent << "  <";
588       if (const char *CodeName =
589             GetCodeName(Code, BlockID, *Stream.getBitStreamReader(),
590                         CurStreamType))
591         outs() << CodeName;
592       else
593         outs() << "UnknownCode" << Code;
594       if (NonSymbolic &&
595           GetCodeName(Code, BlockID, *Stream.getBitStreamReader(),
596                       CurStreamType))
597         outs() << " codeid=" << Code;
598       const BitCodeAbbrev *Abbv = nullptr;
599       if (Entry.ID != bitc::UNABBREV_RECORD) {
600         Abbv = Stream.getAbbrev(Entry.ID);
601         outs() << " abbrevid=" << Entry.ID;
602       }
603 
604       for (unsigned i = 0, e = Record.size(); i != e; ++i)
605         outs() << " op" << i << "=" << (int64_t)Record[i];
606 
607       // If we found a module hash, let's verify that it matches!
608       if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) {
609         if (Record.size() != 5)
610           outs() << " (invalid)";
611         else {
612           // Recompute the hash and compare it to the one in the bitcode
613           SHA1 Hasher;
614           StringRef Hash;
615           {
616             int BlockSize = CurrentRecordPos - BlockEntryPos;
617             auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
618             Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
619             Hash = Hasher.result();
620           }
621           SmallString<20> RecordedHash;
622           RecordedHash.resize(20);
623           int Pos = 0;
624           for (auto &Val : Record) {
625             assert(!(Val >> 32) && "Unexpected high bits set");
626             RecordedHash[Pos++] = (Val >> 24) & 0xFF;
627             RecordedHash[Pos++] = (Val >> 16) & 0xFF;
628             RecordedHash[Pos++] = (Val >> 8) & 0xFF;
629             RecordedHash[Pos++] = (Val >> 0) & 0xFF;
630           }
631           if (Hash == RecordedHash)
632             outs() << " (match)";
633           else
634             outs() << " (!mismatch!)";
635         }
636       }
637 
638       outs() << "/>";
639 
640       if (Abbv) {
641         for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
642           const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
643           if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array)
644             continue;
645           assert(i + 2 == e && "Array op not second to last");
646           std::string Str;
647           bool ArrayIsPrintable = true;
648           for (unsigned j = i - 1, je = Record.size(); j != je; ++j) {
649             if (!isprint(static_cast<unsigned char>(Record[j]))) {
650               ArrayIsPrintable = false;
651               break;
652             }
653             Str += (char)Record[j];
654           }
655           if (ArrayIsPrintable)
656             outs() << " record string = '" << Str << "'";
657           break;
658         }
659       }
660 
661       if (Blob.data() && decodeBlob(Code, BlockID, *Stream.getBitStreamReader(),
662                                     Indent, Record, Blob)) {
663         outs() << " blob data = ";
664         if (ShowBinaryBlobs) {
665           outs() << "'";
666           outs().write_escaped(Blob, /*hex=*/true) << "'";
667         } else {
668           bool BlobIsPrintable = true;
669           for (unsigned i = 0, e = Blob.size(); i != e; ++i)
670             if (!isprint(static_cast<unsigned char>(Blob[i]))) {
671               BlobIsPrintable = false;
672               break;
673             }
674 
675           if (BlobIsPrintable)
676             outs() << "'" << Blob << "'";
677           else
678             outs() << "unprintable, " << Blob.size() << " bytes.";
679         }
680       }
681 
682       outs() << "\n";
683     }
684   }
685 }
686 
PrintSize(double Bits)687 static void PrintSize(double Bits) {
688   outs() << format("%.2f/%.2fB/%luW", Bits, Bits/8,(unsigned long)(Bits/32));
689 }
PrintSize(uint64_t Bits)690 static void PrintSize(uint64_t Bits) {
691   outs() << format("%lub/%.2fB/%luW", (unsigned long)Bits,
692                    (double)Bits/8, (unsigned long)(Bits/32));
693 }
694 
openBitcodeFile(StringRef Path,std::unique_ptr<MemoryBuffer> & MemBuf,BitstreamReader & StreamFile,BitstreamCursor & Stream,CurStreamTypeType & CurStreamType)695 static bool openBitcodeFile(StringRef Path,
696                             std::unique_ptr<MemoryBuffer> &MemBuf,
697                             BitstreamReader &StreamFile,
698                             BitstreamCursor &Stream,
699                             CurStreamTypeType &CurStreamType) {
700   // Read the input file.
701   ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufOrErr =
702       MemoryBuffer::getFileOrSTDIN(Path);
703   if (std::error_code EC = MemBufOrErr.getError())
704     return Error(Twine("Error reading '") + Path + "': " + EC.message());
705   MemBuf = std::move(MemBufOrErr.get());
706 
707   if (MemBuf->getBufferSize() & 3)
708     return Error("Bitcode stream should be a multiple of 4 bytes in length");
709 
710   const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
711   const unsigned char *EndBufPtr = BufPtr + MemBuf->getBufferSize();
712 
713   // If we have a wrapper header, parse it and ignore the non-bc file contents.
714   // The magic number is 0x0B17C0DE stored in little endian.
715   if (isBitcodeWrapper(BufPtr, EndBufPtr)) {
716     if (MemBuf->getBufferSize() < BWH_HeaderSize)
717       return Error("Invalid bitcode wrapper header");
718 
719     if (Dump) {
720       unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]);
721       unsigned Version = support::endian::read32le(&BufPtr[BWH_VersionField]);
722       unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
723       unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
724       unsigned CPUType = support::endian::read32le(&BufPtr[BWH_CPUTypeField]);
725 
726       outs() << "<BITCODE_WRAPPER_HEADER"
727              << " Magic=" << format_hex(Magic, 10)
728              << " Version=" << format_hex(Version, 10)
729              << " Offset=" << format_hex(Offset, 10)
730              << " Size=" << format_hex(Size, 10)
731              << " CPUType=" << format_hex(CPUType, 10) << "/>\n";
732     }
733 
734     if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true))
735       return Error("Invalid bitcode wrapper header");
736   }
737 
738   StreamFile = BitstreamReader(BufPtr, EndBufPtr);
739   Stream = BitstreamCursor(StreamFile);
740   StreamFile.CollectBlockInfoNames();
741 
742   // Read the stream signature.
743   char Signature[6];
744   Signature[0] = Stream.Read(8);
745   Signature[1] = Stream.Read(8);
746   Signature[2] = Stream.Read(4);
747   Signature[3] = Stream.Read(4);
748   Signature[4] = Stream.Read(4);
749   Signature[5] = Stream.Read(4);
750 
751   // Autodetect the file contents, if it is one we know.
752   CurStreamType = UnknownBitstream;
753   if (Signature[0] == 'B' && Signature[1] == 'C' &&
754       Signature[2] == 0x0 && Signature[3] == 0xC &&
755       Signature[4] == 0xE && Signature[5] == 0xD)
756     CurStreamType = LLVMIRBitstream;
757 
758   return false;
759 }
760 
761 /// AnalyzeBitcode - Analyze the bitcode file specified by InputFilename.
AnalyzeBitcode()762 static int AnalyzeBitcode() {
763   std::unique_ptr<MemoryBuffer> StreamBuffer;
764   BitstreamReader StreamFile;
765   BitstreamCursor Stream;
766   CurStreamTypeType CurStreamType;
767   if (openBitcodeFile(InputFilename, StreamBuffer, StreamFile, Stream,
768                       CurStreamType))
769     return true;
770 
771   // Read block info from BlockInfoFilename, if specified.
772   // The block info must be a top-level block.
773   if (!BlockInfoFilename.empty()) {
774     std::unique_ptr<MemoryBuffer> BlockInfoBuffer;
775     BitstreamReader BlockInfoFile;
776     BitstreamCursor BlockInfoCursor;
777     CurStreamTypeType BlockInfoStreamType;
778     if (openBitcodeFile(BlockInfoFilename, BlockInfoBuffer, BlockInfoFile,
779                         BlockInfoCursor, BlockInfoStreamType))
780       return true;
781 
782     while (!BlockInfoCursor.AtEndOfStream()) {
783       unsigned Code = BlockInfoCursor.ReadCode();
784       if (Code != bitc::ENTER_SUBBLOCK)
785         return Error("Invalid record at top-level in block info file");
786 
787       unsigned BlockID = BlockInfoCursor.ReadSubBlockID();
788       if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
789         if (BlockInfoCursor.ReadBlockInfoBlock())
790           return Error("Malformed BlockInfoBlock in block info file");
791         break;
792       }
793 
794       BlockInfoCursor.SkipBlock();
795     }
796 
797     StreamFile.takeBlockInfo(std::move(BlockInfoFile));
798   }
799 
800   unsigned NumTopBlocks = 0;
801 
802   // Parse the top-level structure.  We only allow blocks at the top-level.
803   while (!Stream.AtEndOfStream()) {
804     unsigned Code = Stream.ReadCode();
805     if (Code != bitc::ENTER_SUBBLOCK)
806       return Error("Invalid record at top-level");
807 
808     unsigned BlockID = Stream.ReadSubBlockID();
809 
810     if (ParseBlock(Stream, BlockID, 0, CurStreamType))
811       return true;
812     ++NumTopBlocks;
813   }
814 
815   if (Dump) outs() << "\n\n";
816 
817   uint64_t BufferSizeBits = StreamFile.getBitcodeBytes().getExtent() * CHAR_BIT;
818   // Print a summary of the read file.
819   outs() << "Summary of " << InputFilename << ":\n";
820   outs() << "         Total size: ";
821   PrintSize(BufferSizeBits);
822   outs() << "\n";
823   outs() << "        Stream type: ";
824   switch (CurStreamType) {
825   case UnknownBitstream: outs() << "unknown\n"; break;
826   case LLVMIRBitstream:  outs() << "LLVM IR\n"; break;
827   }
828   outs() << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
829   outs() << "\n";
830 
831   // Emit per-block stats.
832   outs() << "Per-block Summary:\n";
833   for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
834        E = BlockIDStats.end(); I != E; ++I) {
835     outs() << "  Block ID #" << I->first;
836     if (const char *BlockName = GetBlockName(I->first, StreamFile,
837                                              CurStreamType))
838       outs() << " (" << BlockName << ")";
839     outs() << ":\n";
840 
841     const PerBlockIDStats &Stats = I->second;
842     outs() << "      Num Instances: " << Stats.NumInstances << "\n";
843     outs() << "         Total Size: ";
844     PrintSize(Stats.NumBits);
845     outs() << "\n";
846     double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
847     outs() << "    Percent of file: " << format("%2.4f%%", pct) << "\n";
848     if (Stats.NumInstances > 1) {
849       outs() << "       Average Size: ";
850       PrintSize(Stats.NumBits/(double)Stats.NumInstances);
851       outs() << "\n";
852       outs() << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
853              << Stats.NumSubBlocks/(double)Stats.NumInstances << "\n";
854       outs() << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
855              << Stats.NumAbbrevs/(double)Stats.NumInstances << "\n";
856       outs() << "    Tot/Avg Records: " << Stats.NumRecords << "/"
857              << Stats.NumRecords/(double)Stats.NumInstances << "\n";
858     } else {
859       outs() << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
860       outs() << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
861       outs() << "        Num Records: " << Stats.NumRecords << "\n";
862     }
863     if (Stats.NumRecords) {
864       double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
865       outs() << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
866     }
867     outs() << "\n";
868 
869     // Print a histogram of the codes we see.
870     if (!NoHistogram && !Stats.CodeFreq.empty()) {
871       std::vector<std::pair<unsigned, unsigned> > FreqPairs;  // <freq,code>
872       for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i)
873         if (unsigned Freq = Stats.CodeFreq[i].NumInstances)
874           FreqPairs.push_back(std::make_pair(Freq, i));
875       std::stable_sort(FreqPairs.begin(), FreqPairs.end());
876       std::reverse(FreqPairs.begin(), FreqPairs.end());
877 
878       outs() << "\tRecord Histogram:\n";
879       outs() << "\t\t  Count    # Bits     b/Rec   % Abv  Record Kind\n";
880       for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
881         const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
882 
883         outs() << format("\t\t%7d %9lu",
884                          RecStats.NumInstances,
885                          (unsigned long)RecStats.TotalBits);
886 
887         if (RecStats.NumInstances > 1)
888           outs() << format(" %9.1f",
889                            (double)RecStats.TotalBits/RecStats.NumInstances);
890         else
891           outs() << "          ";
892 
893         if (RecStats.NumAbbrev)
894           outs() <<
895               format(" %7.2f",
896                      (double)RecStats.NumAbbrev/RecStats.NumInstances*100);
897         else
898           outs() << "        ";
899 
900         outs() << "  ";
901         if (const char *CodeName =
902               GetCodeName(FreqPairs[i].second, I->first, StreamFile,
903                           CurStreamType))
904           outs() << CodeName << "\n";
905         else
906           outs() << "UnknownCode" << FreqPairs[i].second << "\n";
907       }
908       outs() << "\n";
909 
910     }
911   }
912   return 0;
913 }
914 
915 
main(int argc,char ** argv)916 int main(int argc, char **argv) {
917   // Print a stack trace if we signal out.
918   sys::PrintStackTraceOnErrorSignal(argv[0]);
919   PrettyStackTraceProgram X(argc, argv);
920   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
921   cl::ParseCommandLineOptions(argc, argv, "llvm-bcanalyzer file analyzer\n");
922 
923   return AnalyzeBitcode();
924 }
925