1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
2 //
3 //                      The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "FileAnalysis.h"
11 #include "GraphBuilder.h"
12 
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
15 #include "llvm/MC/MCAsmInfo.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCInstPrinter.h"
20 #include "llvm/MC/MCInstrAnalysis.h"
21 #include "llvm/MC/MCInstrDesc.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCObjectFileInfo.h"
24 #include "llvm/MC/MCRegisterInfo.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/Object/Binary.h"
27 #include "llvm/Object/COFF.h"
28 #include "llvm/Object/ELFObjectFile.h"
29 #include "llvm/Object/ObjectFile.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Error.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/TargetSelect.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 
39 using Instr = llvm::cfi_verify::FileAnalysis::Instr;
40 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer;
41 
42 namespace llvm {
43 namespace cfi_verify {
44 
45 bool IgnoreDWARFFlag;
46 
47 static cl::opt<bool, true> IgnoreDWARFArg(
48     "ignore-dwarf",
49     cl::desc(
50         "Ignore all DWARF data. This relaxes the requirements for all "
51         "statically linked libraries to have been compiled with '-g', but "
52         "will result in false positives for 'CFI unprotected' instructions."),
53     cl::location(IgnoreDWARFFlag), cl::init(false));
54 
stringCFIProtectionStatus(CFIProtectionStatus Status)55 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) {
56   switch (Status) {
57   case CFIProtectionStatus::PROTECTED:
58     return "PROTECTED";
59   case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF:
60     return "FAIL_NOT_INDIRECT_CF";
61   case CFIProtectionStatus::FAIL_ORPHANS:
62     return "FAIL_ORPHANS";
63   case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH:
64     return "FAIL_BAD_CONDITIONAL_BRANCH";
65   case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED:
66     return "FAIL_REGISTER_CLOBBERED";
67   case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION:
68     return "FAIL_INVALID_INSTRUCTION";
69   }
70   llvm_unreachable("Attempted to stringify an unknown enum value.");
71 }
72 
Create(StringRef Filename)73 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) {
74   // Open the filename provided.
75   Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
76       object::createBinary(Filename);
77   if (!BinaryOrErr)
78     return BinaryOrErr.takeError();
79 
80   // Construct the object and allow it to take ownership of the binary.
81   object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get());
82   FileAnalysis Analysis(std::move(Binary));
83 
84   Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary());
85   if (!Analysis.Object)
86     return make_error<UnsupportedDisassembly>("Failed to cast object");
87 
88   switch (Analysis.Object->getArch()) {
89     case Triple::x86:
90     case Triple::x86_64:
91     case Triple::aarch64:
92     case Triple::aarch64_be:
93       break;
94     default:
95       return make_error<UnsupportedDisassembly>("Unsupported architecture.");
96   }
97 
98   Analysis.ObjectTriple = Analysis.Object->makeTriple();
99   Analysis.Features = Analysis.Object->getFeatures();
100 
101   // Init the rest of the object.
102   if (auto InitResponse = Analysis.initialiseDisassemblyMembers())
103     return std::move(InitResponse);
104 
105   if (auto SectionParseResponse = Analysis.parseCodeSections())
106     return std::move(SectionParseResponse);
107 
108   return std::move(Analysis);
109 }
110 
FileAnalysis(object::OwningBinary<object::Binary> Binary)111 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary)
112     : Binary(std::move(Binary)) {}
113 
FileAnalysis(const Triple & ObjectTriple,const SubtargetFeatures & Features)114 FileAnalysis::FileAnalysis(const Triple &ObjectTriple,
115                            const SubtargetFeatures &Features)
116     : ObjectTriple(ObjectTriple), Features(Features) {}
117 
118 const Instr *
getPrevInstructionSequential(const Instr & InstrMeta) const119 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const {
120   std::map<uint64_t, Instr>::const_iterator KV =
121       Instructions.find(InstrMeta.VMAddress);
122   if (KV == Instructions.end() || KV == Instructions.begin())
123     return nullptr;
124 
125   if (!(--KV)->second.Valid)
126     return nullptr;
127 
128   return &KV->second;
129 }
130 
131 const Instr *
getNextInstructionSequential(const Instr & InstrMeta) const132 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const {
133   std::map<uint64_t, Instr>::const_iterator KV =
134       Instructions.find(InstrMeta.VMAddress);
135   if (KV == Instructions.end() || ++KV == Instructions.end())
136     return nullptr;
137 
138   if (!KV->second.Valid)
139     return nullptr;
140 
141   return &KV->second;
142 }
143 
usesRegisterOperand(const Instr & InstrMeta) const144 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const {
145   for (const auto &Operand : InstrMeta.Instruction) {
146     if (Operand.isReg())
147       return true;
148   }
149   return false;
150 }
151 
getInstruction(uint64_t Address) const152 const Instr *FileAnalysis::getInstruction(uint64_t Address) const {
153   const auto &InstrKV = Instructions.find(Address);
154   if (InstrKV == Instructions.end())
155     return nullptr;
156 
157   return &InstrKV->second;
158 }
159 
getInstructionOrDie(uint64_t Address) const160 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const {
161   const auto &InstrKV = Instructions.find(Address);
162   assert(InstrKV != Instructions.end() && "Address doesn't exist.");
163   return InstrKV->second;
164 }
165 
isCFITrap(const Instr & InstrMeta) const166 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const {
167   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
168   return InstrDesc.isTrap();
169 }
170 
canFallThrough(const Instr & InstrMeta) const171 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const {
172   if (!InstrMeta.Valid)
173     return false;
174 
175   if (isCFITrap(InstrMeta))
176     return false;
177 
178   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
179   if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo))
180     return InstrDesc.isConditionalBranch();
181 
182   return true;
183 }
184 
185 const Instr *
getDefiniteNextInstruction(const Instr & InstrMeta) const186 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const {
187   if (!InstrMeta.Valid)
188     return nullptr;
189 
190   if (isCFITrap(InstrMeta))
191     return nullptr;
192 
193   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
194   const Instr *NextMetaPtr;
195   if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) {
196     if (InstrDesc.isConditionalBranch())
197       return nullptr;
198 
199     uint64_t Target;
200     if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
201                              InstrMeta.InstructionSize, Target))
202       return nullptr;
203 
204     NextMetaPtr = getInstruction(Target);
205   } else {
206     NextMetaPtr =
207         getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize);
208   }
209 
210   if (!NextMetaPtr || !NextMetaPtr->Valid)
211     return nullptr;
212 
213   return NextMetaPtr;
214 }
215 
216 std::set<const Instr *>
getDirectControlFlowXRefs(const Instr & InstrMeta) const217 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const {
218   std::set<const Instr *> CFCrossReferences;
219   const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta);
220 
221   if (PrevInstruction && canFallThrough(*PrevInstruction))
222     CFCrossReferences.insert(PrevInstruction);
223 
224   const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress);
225   if (TargetRefsKV == StaticBranchTargetings.end())
226     return CFCrossReferences;
227 
228   for (uint64_t SourceInstrAddress : TargetRefsKV->second) {
229     const auto &SourceInstrKV = Instructions.find(SourceInstrAddress);
230     if (SourceInstrKV == Instructions.end()) {
231       errs() << "Failed to find source instruction at address "
232              << format_hex(SourceInstrAddress, 2)
233              << " for the cross-reference to instruction at address "
234              << format_hex(InstrMeta.VMAddress, 2) << ".\n";
235       continue;
236     }
237 
238     CFCrossReferences.insert(&SourceInstrKV->second);
239   }
240 
241   return CFCrossReferences;
242 }
243 
getIndirectInstructions() const244 const std::set<uint64_t> &FileAnalysis::getIndirectInstructions() const {
245   return IndirectInstructions;
246 }
247 
getRegisterInfo() const248 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const {
249   return RegisterInfo.get();
250 }
251 
getMCInstrInfo() const252 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); }
253 
getMCInstrAnalysis() const254 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const {
255   return MIA.get();
256 }
257 
symbolizeInlinedCode(uint64_t Address)258 Expected<DIInliningInfo> FileAnalysis::symbolizeInlinedCode(uint64_t Address) {
259   assert(Symbolizer != nullptr && "Symbolizer is invalid.");
260   return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address);
261 }
262 
263 CFIProtectionStatus
validateCFIProtection(const GraphResult & Graph) const264 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const {
265   const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress);
266   if (!InstrMetaPtr)
267     return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION;
268 
269   const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode());
270   if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo))
271     return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
272 
273   if (!usesRegisterOperand(*InstrMetaPtr))
274     return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
275 
276   if (!Graph.OrphanedNodes.empty())
277     return CFIProtectionStatus::FAIL_ORPHANS;
278 
279   for (const auto &BranchNode : Graph.ConditionalBranchNodes) {
280     if (!BranchNode.CFIProtection)
281       return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH;
282   }
283 
284   if (indirectCFOperandClobber(Graph) != Graph.BaseAddress)
285     return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED;
286 
287   return CFIProtectionStatus::PROTECTED;
288 }
289 
indirectCFOperandClobber(const GraphResult & Graph) const290 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const {
291   assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty.");
292 
293   // Get the set of registers we must check to ensure they're not clobbered.
294   const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress);
295   DenseSet<unsigned> RegisterNumbers;
296   for (const auto &Operand : IndirectCF.Instruction) {
297     if (Operand.isReg())
298       RegisterNumbers.insert(Operand.getReg());
299   }
300   assert(RegisterNumbers.size() && "Zero register operands on indirect CF.");
301 
302   // Now check all branches to indirect CFs and ensure no clobbering happens.
303   for (const auto &Branch : Graph.ConditionalBranchNodes) {
304     uint64_t Node;
305     if (Branch.IndirectCFIsOnTargetPath)
306       Node = Branch.Target;
307     else
308       Node = Branch.Fallthrough;
309 
310     // Some architectures (e.g., AArch64) cannot load in an indirect branch, so
311     // we allow them one load.
312     bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad();
313 
314     // We walk backwards from the indirect CF.  It is the last node returned by
315     // Graph.flattenAddress, so we skip it since we already handled it.
316     DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers;
317     std::vector<uint64_t> Nodes = Graph.flattenAddress(Node);
318     for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) {
319       Node = *I;
320       const Instr &NodeInstr = getInstructionOrDie(Node);
321       const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode());
322 
323       for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end();
324            RI != RE; ++RI) {
325         unsigned RegNum = *RI;
326         if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum,
327                                       *RegisterInfo)) {
328           if (!canLoad || !InstrDesc.mayLoad())
329             return Node;
330           canLoad = false;
331           CurRegisterNumbers.erase(RI);
332           // Add the registers this load reads to those we check for clobbers.
333           for (unsigned i = InstrDesc.getNumDefs(),
334                         e = InstrDesc.getNumOperands(); i != e; i++) {
335             const auto Operand = NodeInstr.Instruction.getOperand(i);
336             if (Operand.isReg())
337               CurRegisterNumbers.insert(Operand.getReg());
338           }
339           break;
340         }
341       }
342     }
343   }
344 
345   return Graph.BaseAddress;
346 }
347 
printInstruction(const Instr & InstrMeta,raw_ostream & OS) const348 void FileAnalysis::printInstruction(const Instr &InstrMeta,
349                                     raw_ostream &OS) const {
350   Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get());
351 }
352 
initialiseDisassemblyMembers()353 Error FileAnalysis::initialiseDisassemblyMembers() {
354   std::string TripleName = ObjectTriple.getTriple();
355   ArchName = "";
356   MCPU = "";
357   std::string ErrorString;
358 
359   Symbolizer.reset(new LLVMSymbolizer());
360 
361   ObjectTarget =
362       TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString);
363   if (!ObjectTarget)
364     return make_error<UnsupportedDisassembly>(
365         (Twine("Couldn't find target \"") + ObjectTriple.getTriple() +
366          "\", failed with error: " + ErrorString)
367             .str());
368 
369   RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName));
370   if (!RegisterInfo)
371     return make_error<UnsupportedDisassembly>(
372         "Failed to initialise RegisterInfo.");
373 
374   AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName));
375   if (!AsmInfo)
376     return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo.");
377 
378   SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo(
379       TripleName, MCPU, Features.getString()));
380   if (!SubtargetInfo)
381     return make_error<UnsupportedDisassembly>(
382         "Failed to initialise SubtargetInfo.");
383 
384   MII.reset(ObjectTarget->createMCInstrInfo());
385   if (!MII)
386     return make_error<UnsupportedDisassembly>("Failed to initialise MII.");
387 
388   Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI));
389 
390   Disassembler.reset(
391       ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context));
392 
393   if (!Disassembler)
394     return make_error<UnsupportedDisassembly>(
395         "No disassembler available for target");
396 
397   MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get()));
398 
399   Printer.reset(ObjectTarget->createMCInstPrinter(
400       ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII,
401       *RegisterInfo));
402 
403   return Error::success();
404 }
405 
parseCodeSections()406 Error FileAnalysis::parseCodeSections() {
407   if (!IgnoreDWARFFlag) {
408     std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object);
409     if (!DWARF)
410       return make_error<StringError>("Could not create DWARF information.",
411                                      inconvertibleErrorCode());
412 
413     bool LineInfoValid = false;
414 
415     for (auto &Unit : DWARF->compile_units()) {
416       const auto &LineTable = DWARF->getLineTableForUnit(Unit.get());
417       if (LineTable && !LineTable->Rows.empty()) {
418         LineInfoValid = true;
419         break;
420       }
421     }
422 
423     if (!LineInfoValid)
424       return make_error<StringError>(
425           "DWARF line information missing. Did you compile with '-g'?",
426           inconvertibleErrorCode());
427   }
428 
429   for (const object::SectionRef &Section : Object->sections()) {
430     // Ensure only executable sections get analysed.
431     if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR))
432       continue;
433 
434     StringRef SectionContents;
435     if (Section.getContents(SectionContents))
436       return make_error<StringError>("Failed to retrieve section contents",
437                                      inconvertibleErrorCode());
438 
439     ArrayRef<uint8_t> SectionBytes((const uint8_t *)SectionContents.data(),
440                                    Section.getSize());
441     parseSectionContents(SectionBytes, Section.getAddress());
442   }
443   return Error::success();
444 }
445 
parseSectionContents(ArrayRef<uint8_t> SectionBytes,uint64_t SectionAddress)446 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes,
447                                         uint64_t SectionAddress) {
448   assert(Symbolizer && "Symbolizer is uninitialised.");
449   MCInst Instruction;
450   Instr InstrMeta;
451   uint64_t InstructionSize;
452 
453   for (uint64_t Byte = 0; Byte < SectionBytes.size();) {
454     bool ValidInstruction =
455         Disassembler->getInstruction(Instruction, InstructionSize,
456                                      SectionBytes.drop_front(Byte), 0, nulls(),
457                                      outs()) == MCDisassembler::Success;
458 
459     Byte += InstructionSize;
460 
461     uint64_t VMAddress = SectionAddress + Byte - InstructionSize;
462     InstrMeta.Instruction = Instruction;
463     InstrMeta.VMAddress = VMAddress;
464     InstrMeta.InstructionSize = InstructionSize;
465     InstrMeta.Valid = ValidInstruction;
466 
467     addInstruction(InstrMeta);
468 
469     if (!ValidInstruction)
470       continue;
471 
472     // Skip additional parsing for instructions that do not affect the control
473     // flow.
474     const auto &InstrDesc = MII->get(Instruction.getOpcode());
475     if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo))
476       continue;
477 
478     uint64_t Target;
479     if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) {
480       // If the target can be evaluated, it's not indirect.
481       StaticBranchTargetings[Target].push_back(VMAddress);
482       continue;
483     }
484 
485     if (!usesRegisterOperand(InstrMeta))
486       continue;
487 
488     if (InstrDesc.isReturn())
489       continue;
490 
491     // Check if this instruction exists in the range of the DWARF metadata.
492     if (!IgnoreDWARFFlag) {
493       auto LineInfo =
494           Symbolizer->symbolizeCode(Object->getFileName(), VMAddress);
495       if (!LineInfo) {
496         handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) {
497           errs() << "Symbolizer failed to get line: " << E.message() << "\n";
498         });
499         continue;
500       }
501 
502       if (LineInfo->FileName == "<invalid>")
503         continue;
504     }
505 
506     IndirectInstructions.insert(VMAddress);
507   }
508 }
509 
addInstruction(const Instr & Instruction)510 void FileAnalysis::addInstruction(const Instr &Instruction) {
511   const auto &KV =
512       Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction));
513   if (!KV.second) {
514     errs() << "Failed to add instruction at address "
515            << format_hex(Instruction.VMAddress, 2)
516            << ": Instruction at this address already exists.\n";
517     exit(EXIT_FAILURE);
518   }
519 }
520 
UnsupportedDisassembly(StringRef Text)521 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {}
522 
523 char UnsupportedDisassembly::ID;
log(raw_ostream & OS) const524 void UnsupportedDisassembly::log(raw_ostream &OS) const {
525   OS << "Could not initialise disassembler: " << Text;
526 }
527 
convertToErrorCode() const528 std::error_code UnsupportedDisassembly::convertToErrorCode() const {
529   return std::error_code();
530 }
531 
532 } // namespace cfi_verify
533 } // namespace llvm
534