1 //===- FileAnalysis.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CFI_VERIFY_FILE_ANALYSIS_H 10 #define LLVM_CFI_VERIFY_FILE_ANALYSIS_H 11 12 #include "llvm/ADT/DenseMap.h" 13 #include "llvm/ADT/SmallSet.h" 14 #include "llvm/BinaryFormat/ELF.h" 15 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 16 #include "llvm/MC/MCAsmInfo.h" 17 #include "llvm/MC/MCContext.h" 18 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 19 #include "llvm/MC/MCInst.h" 20 #include "llvm/MC/MCInstPrinter.h" 21 #include "llvm/MC/MCInstrAnalysis.h" 22 #include "llvm/MC/MCInstrDesc.h" 23 #include "llvm/MC/MCInstrInfo.h" 24 #include "llvm/MC/MCObjectFileInfo.h" 25 #include "llvm/MC/MCRegisterInfo.h" 26 #include "llvm/MC/MCSubtargetInfo.h" 27 #include "llvm/Object/Binary.h" 28 #include "llvm/Object/COFF.h" 29 #include "llvm/Object/ELFObjectFile.h" 30 #include "llvm/Object/ObjectFile.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Error.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Support/TargetRegistry.h" 36 #include "llvm/Support/TargetSelect.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 #include <functional> 40 #include <set> 41 #include <string> 42 #include <unordered_map> 43 44 namespace llvm { 45 namespace cfi_verify { 46 47 struct GraphResult; 48 49 extern bool IgnoreDWARFFlag; 50 51 enum class CFIProtectionStatus { 52 // This instruction is protected by CFI. 53 PROTECTED, 54 // The instruction is not an indirect control flow instruction, and thus 55 // shouldn't be protected. 56 FAIL_NOT_INDIRECT_CF, 57 // There is a path to the instruction that was unexpected. 58 FAIL_ORPHANS, 59 // There is a path to the instruction from a conditional branch that does not 60 // properly check the destination for this vcall/icall. 61 FAIL_BAD_CONDITIONAL_BRANCH, 62 // One of the operands of the indirect CF instruction is modified between the 63 // CFI-check and execution. 64 FAIL_REGISTER_CLOBBERED, 65 // The instruction referenced does not exist. This normally indicates an 66 // error in the program, where you try and validate a graph that was created 67 // in a different FileAnalysis object. 68 FAIL_INVALID_INSTRUCTION, 69 }; 70 71 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status); 72 73 // Disassembler and analysis tool for machine code files. Keeps track of non- 74 // sequential control flows, including indirect control flow instructions. 75 class FileAnalysis { 76 public: 77 // A metadata struct for an instruction. 78 struct Instr { 79 uint64_t VMAddress; // Virtual memory address of this instruction. 80 MCInst Instruction; // Instruction. 81 uint64_t InstructionSize; // Size of this instruction. 82 bool Valid; // Is this a valid instruction? If false, Instr::Instruction is 83 // undefined. 84 }; 85 86 // Construct a FileAnalysis from a file path. 87 static Expected<FileAnalysis> Create(StringRef Filename); 88 89 // Construct and take ownership of the supplied object. Do not use this 90 // constructor, prefer to use FileAnalysis::Create instead. 91 FileAnalysis(object::OwningBinary<object::Binary> Binary); 92 FileAnalysis() = delete; 93 FileAnalysis(const FileAnalysis &) = delete; 94 FileAnalysis(FileAnalysis &&Other) = default; 95 96 // Returns the instruction at the provided address. Returns nullptr if there 97 // is no instruction at the provided address. 98 const Instr *getInstruction(uint64_t Address) const; 99 100 // Returns the instruction at the provided adress, dying if the instruction is 101 // not found. 102 const Instr &getInstructionOrDie(uint64_t Address) const; 103 104 // Returns a pointer to the previous/next instruction in sequence, 105 // respectively. Returns nullptr if the next/prev instruction doesn't exist, 106 // or if the provided instruction doesn't exist. 107 const Instr *getPrevInstructionSequential(const Instr &InstrMeta) const; 108 const Instr *getNextInstructionSequential(const Instr &InstrMeta) const; 109 110 // Returns whether this instruction is used by CFI to trap the program. 111 bool isCFITrap(const Instr &InstrMeta) const; 112 113 // Returns whether this instruction is a call to a function that will trap on 114 // CFI violations (i.e., it serves as a trap in this instance). 115 bool willTrapOnCFIViolation(const Instr &InstrMeta) const; 116 117 // Returns whether this function can fall through to the next instruction. 118 // Undefined (and bad) instructions cannot fall through, and instruction that 119 // modify the control flow can only fall through if they are conditional 120 // branches or calls. 121 bool canFallThrough(const Instr &InstrMeta) const; 122 123 // Returns the definitive next instruction. This is different from the next 124 // instruction sequentially as it will follow unconditional branches (assuming 125 // they can be resolved at compile time, i.e. not indirect). This method 126 // returns nullptr if the provided instruction does not transfer control flow 127 // to exactly one instruction that is known deterministically at compile time. 128 // Also returns nullptr if the deterministic target does not exist in this 129 // file. 130 const Instr *getDefiniteNextInstruction(const Instr &InstrMeta) const; 131 132 // Get a list of deterministic control flows that lead to the provided 133 // instruction. This list includes all static control flow cross-references as 134 // well as the previous instruction if it can fall through. 135 std::set<const Instr *> 136 getDirectControlFlowXRefs(const Instr &InstrMeta) const; 137 138 // Returns whether this instruction uses a register operand. 139 bool usesRegisterOperand(const Instr &InstrMeta) const; 140 141 // Returns the list of indirect instructions. 142 const std::set<object::SectionedAddress> &getIndirectInstructions() const; 143 144 const MCRegisterInfo *getRegisterInfo() const; 145 const MCInstrInfo *getMCInstrInfo() const; 146 const MCInstrAnalysis *getMCInstrAnalysis() const; 147 148 // Returns the inlining information for the provided address. 149 Expected<DIInliningInfo> 150 symbolizeInlinedCode(object::SectionedAddress Address); 151 152 // Returns whether the provided Graph represents a protected indirect control 153 // flow instruction in this file. 154 CFIProtectionStatus validateCFIProtection(const GraphResult &Graph) const; 155 156 // Returns the first place the operand register is clobbered between the CFI- 157 // check and the indirect CF instruction execution. We do this by walking 158 // backwards from the indirect CF and ensuring there is at most one load 159 // involving the operand register (which is the indirect CF itself on x86). 160 // If the register is not modified, returns the address of the indirect CF 161 // instruction. The result is undefined if the provided graph does not fall 162 // under either the FAIL_REGISTER_CLOBBERED or PROTECTED status (see 163 // CFIProtectionStatus). 164 uint64_t indirectCFOperandClobber(const GraphResult& Graph) const; 165 166 // Prints an instruction to the provided stream using this object's pretty- 167 // printers. 168 void printInstruction(const Instr &InstrMeta, raw_ostream &OS) const; 169 170 protected: 171 // Construct a blank object with the provided triple and features. Used in 172 // testing, where a sub class will dependency inject protected methods to 173 // allow analysis of raw binary, without requiring a fully valid ELF file. 174 FileAnalysis(const Triple &ObjectTriple, const SubtargetFeatures &Features); 175 176 // Add an instruction to this object. 177 void addInstruction(const Instr &Instruction); 178 179 // Disassemble and parse the provided bytes into this object. Instruction 180 // address calculation is done relative to the provided SectionAddress. 181 void parseSectionContents(ArrayRef<uint8_t> SectionBytes, 182 object::SectionedAddress Address); 183 184 // Constructs and initialises members required for disassembly. 185 Error initialiseDisassemblyMembers(); 186 187 // Parses code sections from the internal object file. Saves them into the 188 // internal members. Should only be called once by Create(). 189 Error parseCodeSections(); 190 191 // Parses the symbol table to look for the addresses of functions that will 192 // trap on CFI violations. 193 Error parseSymbolTable(); 194 195 private: 196 // Members that describe the input file. 197 object::OwningBinary<object::Binary> Binary; 198 const object::ObjectFile *Object = nullptr; 199 Triple ObjectTriple; 200 std::string ArchName; 201 std::string MCPU; 202 const Target *ObjectTarget = nullptr; 203 SubtargetFeatures Features; 204 205 // Members required for disassembly. 206 std::unique_ptr<const MCRegisterInfo> RegisterInfo; 207 std::unique_ptr<const MCAsmInfo> AsmInfo; 208 std::unique_ptr<MCSubtargetInfo> SubtargetInfo; 209 std::unique_ptr<const MCInstrInfo> MII; 210 MCObjectFileInfo MOFI; 211 std::unique_ptr<MCContext> Context; 212 std::unique_ptr<const MCDisassembler> Disassembler; 213 std::unique_ptr<const MCInstrAnalysis> MIA; 214 std::unique_ptr<MCInstPrinter> Printer; 215 216 // Symbolizer used for debug information parsing. 217 std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer; 218 219 // A mapping between the virtual memory address to the instruction metadata 220 // struct. TODO(hctim): Reimplement this as a sorted vector to avoid per- 221 // insertion allocation. 222 std::map<uint64_t, Instr> Instructions; 223 224 // Contains a mapping between a specific address, and a list of instructions 225 // that use this address as a branch target (including call instructions). 226 DenseMap<uint64_t, std::vector<uint64_t>> StaticBranchTargetings; 227 228 // A list of addresses of indirect control flow instructions. 229 std::set<object::SectionedAddress> IndirectInstructions; 230 231 // The addresses of functions that will trap on CFI violations. 232 SmallSet<uint64_t, 4> TrapOnFailFunctionAddresses; 233 }; 234 235 class UnsupportedDisassembly : public ErrorInfo<UnsupportedDisassembly> { 236 public: 237 static char ID; 238 std::string Text; 239 240 UnsupportedDisassembly(StringRef Text); 241 242 void log(raw_ostream &OS) const override; 243 std::error_code convertToErrorCode() const override; 244 }; 245 246 } // namespace cfi_verify 247 } // namespace llvm 248 249 #endif // LLVM_CFI_VERIFY_FILE_ANALYSIS_H 250