1 //===- GraphBuilder.cpp -----------------------------------------*- C++ -*-===//
2 //
3 //                      The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "GraphBuilder.h"
11 
12 #include "llvm/BinaryFormat/ELF.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/MC/MCContext.h"
15 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MC/MCInstPrinter.h"
18 #include "llvm/MC/MCInstrAnalysis.h"
19 #include "llvm/MC/MCInstrDesc.h"
20 #include "llvm/MC/MCInstrInfo.h"
21 #include "llvm/MC/MCObjectFileInfo.h"
22 #include "llvm/MC/MCRegisterInfo.h"
23 #include "llvm/MC/MCSubtargetInfo.h"
24 #include "llvm/Object/Binary.h"
25 #include "llvm/Object/COFF.h"
26 #include "llvm/Object/ELFObjectFile.h"
27 #include "llvm/Object/ObjectFile.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Error.h"
31 #include "llvm/Support/MemoryBuffer.h"
32 #include "llvm/Support/TargetRegistry.h"
33 #include "llvm/Support/TargetSelect.h"
34 #include "llvm/Support/raw_ostream.h"
35 
36 
37 using Instr = llvm::cfi_verify::FileAnalysis::Instr;
38 
39 namespace llvm {
40 namespace cfi_verify {
41 
42 unsigned long long SearchLengthForUndef;
43 unsigned long long SearchLengthForConditionalBranch;
44 
45 static cl::opt<unsigned long long, true> SearchLengthForUndefArg(
46     "search-length-undef",
47     cl::desc("Specify the maximum amount of instructions "
48              "to inspect when searching for an undefined "
49              "instruction from a conditional branch."),
50     cl::location(SearchLengthForUndef), cl::init(2));
51 
52 static cl::opt<unsigned long long, true> SearchLengthForConditionalBranchArg(
53     "search-length-cb",
54     cl::desc("Specify the maximum amount of instructions "
55              "to inspect when searching for a conditional "
56              "branch from an indirect control flow."),
57     cl::location(SearchLengthForConditionalBranch), cl::init(20));
58 
flattenAddress(uint64_t Address) const59 std::vector<uint64_t> GraphResult::flattenAddress(uint64_t Address) const {
60   std::vector<uint64_t> Addresses;
61 
62   auto It = IntermediateNodes.find(Address);
63   Addresses.push_back(Address);
64 
65   while (It != IntermediateNodes.end()) {
66     Addresses.push_back(It->second);
67     It = IntermediateNodes.find(It->second);
68   }
69   return Addresses;
70 }
71 
printPairToDOT(const FileAnalysis & Analysis,raw_ostream & OS,uint64_t From,uint64_t To)72 void printPairToDOT(const FileAnalysis &Analysis, raw_ostream &OS,
73                           uint64_t From, uint64_t To) {
74   OS << "  \"" << format_hex(From, 2) << ": ";
75   Analysis.printInstruction(Analysis.getInstructionOrDie(From), OS);
76   OS << "\" -> \"" << format_hex(To, 2) << ": ";
77   Analysis.printInstruction(Analysis.getInstructionOrDie(To), OS);
78   OS << "\"\n";
79 }
80 
printToDOT(const FileAnalysis & Analysis,raw_ostream & OS) const81 void GraphResult::printToDOT(const FileAnalysis &Analysis,
82                              raw_ostream &OS) const {
83   std::map<uint64_t, uint64_t> SortedIntermediateNodes(
84       IntermediateNodes.begin(), IntermediateNodes.end());
85   OS << "digraph graph_" << format_hex(BaseAddress, 2) << " {\n";
86   for (const auto &KV : SortedIntermediateNodes)
87     printPairToDOT(Analysis, OS, KV.first, KV.second);
88 
89   for (auto &BranchNode : ConditionalBranchNodes) {
90     for (auto &V : {BranchNode.Target, BranchNode.Fallthrough})
91       printPairToDOT(Analysis, OS, BranchNode.Address, V);
92   }
93   OS << "}\n";
94 }
95 
buildFlowGraph(const FileAnalysis & Analysis,uint64_t Address)96 GraphResult GraphBuilder::buildFlowGraph(const FileAnalysis &Analysis,
97                                          uint64_t Address) {
98   GraphResult Result;
99   Result.BaseAddress = Address;
100   DenseSet<uint64_t> OpenedNodes;
101 
102   const auto &IndirectInstructions = Analysis.getIndirectInstructions();
103 
104   if (IndirectInstructions.find(Address) == IndirectInstructions.end())
105     return Result;
106 
107   buildFlowGraphImpl(Analysis, OpenedNodes, Result, Address, 0);
108   return Result;
109 }
110 
buildFlowsToUndefined(const FileAnalysis & Analysis,GraphResult & Result,ConditionalBranchNode & BranchNode,const Instr & BranchInstrMeta)111 void GraphBuilder::buildFlowsToUndefined(const FileAnalysis &Analysis,
112                                          GraphResult &Result,
113                                          ConditionalBranchNode &BranchNode,
114                                          const Instr &BranchInstrMeta) {
115   assert(SearchLengthForUndef > 0 &&
116          "Search length for undefined flow must be greater than zero.");
117 
118   // Start setting up the next node in the block.
119   uint64_t NextAddress = 0;
120   const Instr *NextMetaPtr;
121 
122   // Find out the next instruction in the block and add it to the new
123   // node.
124   if (BranchNode.Target && !BranchNode.Fallthrough) {
125     // We know the target of the branch, find the fallthrough.
126     NextMetaPtr = Analysis.getNextInstructionSequential(BranchInstrMeta);
127     if (!NextMetaPtr) {
128       errs() << "Failed to get next instruction from "
129              << format_hex(BranchNode.Address, 2) << ".\n";
130       return;
131     }
132 
133     NextAddress = NextMetaPtr->VMAddress;
134     BranchNode.Fallthrough =
135         NextMetaPtr->VMAddress; // Add the new node to the branch head.
136   } else if (BranchNode.Fallthrough && !BranchNode.Target) {
137     // We already know the fallthrough, evaluate the target.
138     uint64_t Target;
139     if (!Analysis.getMCInstrAnalysis()->evaluateBranch(
140             BranchInstrMeta.Instruction, BranchInstrMeta.VMAddress,
141             BranchInstrMeta.InstructionSize, Target)) {
142       errs() << "Failed to get branch target for conditional branch at address "
143              << format_hex(BranchInstrMeta.VMAddress, 2) << ".\n";
144       return;
145     }
146 
147     // Resolve the meta pointer for the target of this branch.
148     NextMetaPtr = Analysis.getInstruction(Target);
149     if (!NextMetaPtr) {
150       errs() << "Failed to find instruction at address "
151              << format_hex(Target, 2) << ".\n";
152       return;
153     }
154 
155     NextAddress = Target;
156     BranchNode.Target =
157         NextMetaPtr->VMAddress; // Add the new node to the branch head.
158   } else {
159     errs() << "ControlBranchNode supplied to buildFlowsToUndefined should "
160               "provide Target xor Fallthrough.\n";
161     return;
162   }
163 
164   uint64_t CurrentAddress = NextAddress;
165   const Instr *CurrentMetaPtr = NextMetaPtr;
166 
167   // Now the branch head has been set properly, complete the rest of the block.
168   for (uint64_t i = 1; i < SearchLengthForUndef; ++i) {
169     // Check to see whether the block should die.
170     if (Analysis.isCFITrap(*CurrentMetaPtr)) {
171       BranchNode.CFIProtection = true;
172       return;
173     }
174 
175     // Find the metadata of the next instruction.
176     NextMetaPtr = Analysis.getDefiniteNextInstruction(*CurrentMetaPtr);
177     if (!NextMetaPtr)
178       return;
179 
180     // Setup the next node.
181     NextAddress = NextMetaPtr->VMAddress;
182 
183     // Add this as an intermediate.
184     Result.IntermediateNodes[CurrentAddress] = NextAddress;
185 
186     // Move the 'current' pointers to the new tail of the block.
187     CurrentMetaPtr = NextMetaPtr;
188     CurrentAddress = NextAddress;
189   }
190 
191   // Final check of the last thing we added to the block.
192   if (Analysis.isCFITrap(*CurrentMetaPtr))
193     BranchNode.CFIProtection = true;
194 }
195 
buildFlowGraphImpl(const FileAnalysis & Analysis,DenseSet<uint64_t> & OpenedNodes,GraphResult & Result,uint64_t Address,uint64_t Depth)196 void GraphBuilder::buildFlowGraphImpl(const FileAnalysis &Analysis,
197                                       DenseSet<uint64_t> &OpenedNodes,
198                                       GraphResult &Result, uint64_t Address,
199                                       uint64_t Depth) {
200   // If we've exceeded the flow length, terminate.
201   if (Depth >= SearchLengthForConditionalBranch) {
202     Result.OrphanedNodes.push_back(Address);
203     return;
204   }
205 
206   // Ensure this flow is acyclic.
207   if (OpenedNodes.count(Address))
208     Result.OrphanedNodes.push_back(Address);
209 
210   // If this flow is already explored, stop here.
211   if (Result.IntermediateNodes.count(Address))
212     return;
213 
214   // Get the metadata for the node instruction.
215   const auto &InstrMetaPtr = Analysis.getInstruction(Address);
216   if (!InstrMetaPtr) {
217     errs() << "Failed to build flow graph for instruction at address "
218            << format_hex(Address, 2) << ".\n";
219     Result.OrphanedNodes.push_back(Address);
220     return;
221   }
222   const auto &ChildMeta = *InstrMetaPtr;
223 
224   OpenedNodes.insert(Address);
225   std::set<const Instr *> CFCrossRefs =
226       Analysis.getDirectControlFlowXRefs(ChildMeta);
227 
228   bool HasValidCrossRef = false;
229 
230   for (const auto *ParentMetaPtr : CFCrossRefs) {
231     assert(ParentMetaPtr && "CFCrossRefs returned nullptr.");
232     const auto &ParentMeta = *ParentMetaPtr;
233     const auto &ParentDesc =
234         Analysis.getMCInstrInfo()->get(ParentMeta.Instruction.getOpcode());
235 
236     if (!ParentDesc.mayAffectControlFlow(ParentMeta.Instruction,
237                                          *Analysis.getRegisterInfo())) {
238       // If this cross reference doesn't affect CF, continue the graph.
239       buildFlowGraphImpl(Analysis, OpenedNodes, Result, ParentMeta.VMAddress,
240                          Depth + 1);
241       Result.IntermediateNodes[ParentMeta.VMAddress] = Address;
242       HasValidCrossRef = true;
243       continue;
244     }
245 
246     // Call instructions are not valid in the upwards traversal.
247     if (ParentDesc.isCall()) {
248       Result.IntermediateNodes[ParentMeta.VMAddress] = Address;
249       Result.OrphanedNodes.push_back(ParentMeta.VMAddress);
250       continue;
251     }
252 
253     // Evaluate the branch target to ascertain whether this XRef is the result
254     // of a fallthrough or the target of a branch.
255     uint64_t BranchTarget;
256     if (!Analysis.getMCInstrAnalysis()->evaluateBranch(
257             ParentMeta.Instruction, ParentMeta.VMAddress,
258             ParentMeta.InstructionSize, BranchTarget)) {
259       errs() << "Failed to evaluate branch target for instruction at address "
260              << format_hex(ParentMeta.VMAddress, 2) << ".\n";
261       Result.IntermediateNodes[ParentMeta.VMAddress] = Address;
262       Result.OrphanedNodes.push_back(ParentMeta.VMAddress);
263       continue;
264     }
265 
266     // Allow unconditional branches to be part of the upwards traversal.
267     if (ParentDesc.isUnconditionalBranch()) {
268       // Ensures that the unconditional branch is actually an XRef to the child.
269       if (BranchTarget != Address) {
270         errs() << "Control flow to " << format_hex(Address, 2)
271                << ", but target resolution of "
272                << format_hex(ParentMeta.VMAddress, 2)
273                << " is not this address?\n";
274         Result.IntermediateNodes[ParentMeta.VMAddress] = Address;
275         Result.OrphanedNodes.push_back(ParentMeta.VMAddress);
276         continue;
277       }
278 
279       buildFlowGraphImpl(Analysis, OpenedNodes, Result, ParentMeta.VMAddress,
280                          Depth + 1);
281       Result.IntermediateNodes[ParentMeta.VMAddress] = Address;
282       HasValidCrossRef = true;
283       continue;
284     }
285 
286     // Ensure that any unknown CFs are caught.
287     if (!ParentDesc.isConditionalBranch()) {
288       errs() << "Unknown control flow encountered when building graph at "
289              << format_hex(Address, 2) << "\n.";
290       Result.IntermediateNodes[ParentMeta.VMAddress] = Address;
291       Result.OrphanedNodes.push_back(ParentMeta.VMAddress);
292       continue;
293     }
294 
295     // Only direct conditional branches should be present at this point. Setup
296     // a conditional branch node and build flows to the ud2.
297     ConditionalBranchNode BranchNode;
298     BranchNode.Address = ParentMeta.VMAddress;
299     BranchNode.Target = 0;
300     BranchNode.Fallthrough = 0;
301     BranchNode.CFIProtection = false;
302     BranchNode.IndirectCFIsOnTargetPath = (BranchTarget == Address);
303 
304     if (BranchTarget == Address)
305       BranchNode.Target = Address;
306     else
307       BranchNode.Fallthrough = Address;
308 
309     HasValidCrossRef = true;
310     buildFlowsToUndefined(Analysis, Result, BranchNode, ParentMeta);
311     Result.ConditionalBranchNodes.push_back(BranchNode);
312   }
313 
314   if (!HasValidCrossRef)
315     Result.OrphanedNodes.push_back(Address);
316 
317   OpenedNodes.erase(Address);
318 }
319 
320 } // namespace cfi_verify
321 } // namespace llvm
322