1 //===------- ShadowCallStack.cpp - Shadow Call Stack pass -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The ShadowCallStack pass instruments function prologs/epilogs to check that
11 // the return address has not been corrupted during the execution of the
12 // function. The return address is stored in a 'shadow call stack' addressed
13 // using the %gs segment register.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "X86.h"
18 #include "X86InstrBuilder.h"
19 #include "X86InstrInfo.h"
20 #include "X86Subtarget.h"
21 
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/Passes.h"
28 #include "llvm/CodeGen/TargetInstrInfo.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/raw_ostream.h"
31 
32 using namespace llvm;
33 
34 namespace llvm {
35 void initializeShadowCallStackPass(PassRegistry &);
36 }
37 
38 namespace {
39 
40 class ShadowCallStack : public MachineFunctionPass {
41 public:
42   static char ID;
43 
ShadowCallStack()44   ShadowCallStack() : MachineFunctionPass(ID) {
45     initializeShadowCallStackPass(*PassRegistry::getPassRegistry());
46   }
47 
getAnalysisUsage(AnalysisUsage & AU) const48   void getAnalysisUsage(AnalysisUsage &AU) const override {
49     MachineFunctionPass::getAnalysisUsage(AU);
50   }
51 
52   bool runOnMachineFunction(MachineFunction &Fn) override;
53 
54 private:
55   // Do not instrument leaf functions with this many or fewer instructions. The
56   // shadow call stack instrumented prolog/epilog are slightly race-y reading
57   // and checking the saved return address, so it is better to not instrument
58   // functions that have fewer instructions than the instrumented prolog/epilog
59   // race.
60   static const size_t SkipLeafInstructions = 3;
61 };
62 
63 char ShadowCallStack::ID = 0;
64 } // end anonymous namespace.
65 
66 static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII,
67                       MachineBasicBlock &MBB, const DebugLoc &DL);
68 static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII,
69                           MachineBasicBlock &MBB, const DebugLoc &DL,
70                           MCPhysReg FreeRegister);
71 
72 static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
73                       MachineInstr &MI, MachineBasicBlock &TrapBB);
74 static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
75                           MachineInstr &MI, MachineBasicBlock &TrapBB,
76                           MCPhysReg FreeRegister);
77 // Generate a longer epilog that only uses r10 when a tailcall branches to r11.
78 static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
79                              MachineInstr &MI, MachineBasicBlock &TrapBB);
80 
81 // Helper function to add ModR/M references for [Seg: Reg + Offset] memory
82 // accesses
83 static inline const MachineInstrBuilder &
addSegmentedMem(const MachineInstrBuilder & MIB,MCPhysReg Seg,MCPhysReg Reg,int Offset=0)84 addSegmentedMem(const MachineInstrBuilder &MIB, MCPhysReg Seg, MCPhysReg Reg,
85                 int Offset = 0) {
86   return MIB.addReg(Reg).addImm(1).addReg(0).addImm(Offset).addReg(Seg);
87 }
88 
addProlog(MachineFunction & Fn,const TargetInstrInfo * TII,MachineBasicBlock & MBB,const DebugLoc & DL)89 static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII,
90                       MachineBasicBlock &MBB, const DebugLoc &DL) {
91   const MCPhysReg ReturnReg = X86::R10;
92   const MCPhysReg OffsetReg = X86::R11;
93 
94   auto MBBI = MBB.begin();
95   // mov r10, [rsp]
96   addDirectMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(ReturnReg),
97                X86::RSP);
98   // xor r11, r11
99   BuildMI(MBB, MBBI, DL, TII->get(X86::XOR64rr))
100       .addDef(OffsetReg)
101       .addReg(OffsetReg, RegState::Undef)
102       .addReg(OffsetReg, RegState::Undef);
103   // add QWORD [gs:r11], 8
104   addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::ADD64mi8)), X86::GS,
105                   OffsetReg)
106       .addImm(8);
107   // mov r11, [gs:r11]
108   addSegmentedMem(
109       BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(OffsetReg), X86::GS,
110       OffsetReg);
111   // mov [gs:r11], r10
112   addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64mr)), X86::GS,
113                   OffsetReg)
114       .addReg(ReturnReg);
115 }
116 
addPrologLeaf(MachineFunction & Fn,const TargetInstrInfo * TII,MachineBasicBlock & MBB,const DebugLoc & DL,MCPhysReg FreeRegister)117 static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII,
118                           MachineBasicBlock &MBB, const DebugLoc &DL,
119                           MCPhysReg FreeRegister) {
120   // mov REG, [rsp]
121   addDirectMem(BuildMI(MBB, MBB.begin(), DL, TII->get(X86::MOV64rm))
122                    .addDef(FreeRegister),
123                X86::RSP);
124 }
125 
addEpilog(const TargetInstrInfo * TII,MachineBasicBlock & MBB,MachineInstr & MI,MachineBasicBlock & TrapBB)126 static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
127                       MachineInstr &MI, MachineBasicBlock &TrapBB) {
128   const DebugLoc &DL = MI.getDebugLoc();
129 
130   // xor r11, r11
131   BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr))
132       .addDef(X86::R11)
133       .addReg(X86::R11, RegState::Undef)
134       .addReg(X86::R11, RegState::Undef);
135   // mov r10, [gs:r11]
136   addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
137                   X86::GS, X86::R11);
138   // mov r10, [gs:r10]
139   addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
140                   X86::GS, X86::R10);
141   // sub QWORD [gs:r11], 8
142   // This instruction should not be moved up to avoid a signal race.
143   addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)),
144                   X86::GS, X86::R11)
145       .addImm(8);
146   // cmp [rsp], r10
147   addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
148       .addReg(X86::R10);
149   // jne trap
150   BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
151   MBB.addSuccessor(&TrapBB);
152 }
153 
addEpilogLeaf(const TargetInstrInfo * TII,MachineBasicBlock & MBB,MachineInstr & MI,MachineBasicBlock & TrapBB,MCPhysReg FreeRegister)154 static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
155                           MachineInstr &MI, MachineBasicBlock &TrapBB,
156                           MCPhysReg FreeRegister) {
157   const DebugLoc &DL = MI.getDebugLoc();
158 
159   // cmp [rsp], REG
160   addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
161       .addReg(FreeRegister);
162   // jne trap
163   BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
164   MBB.addSuccessor(&TrapBB);
165 }
166 
addEpilogOnlyR10(const TargetInstrInfo * TII,MachineBasicBlock & MBB,MachineInstr & MI,MachineBasicBlock & TrapBB)167 static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
168                              MachineInstr &MI, MachineBasicBlock &TrapBB) {
169   const DebugLoc &DL = MI.getDebugLoc();
170 
171   // xor r10, r10
172   BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr))
173       .addDef(X86::R10)
174       .addReg(X86::R10, RegState::Undef)
175       .addReg(X86::R10, RegState::Undef);
176   // mov r10, [gs:r10]
177   addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
178                   X86::GS, X86::R10);
179   // mov r10, [gs:r10]
180   addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
181                   X86::GS, X86::R10);
182   // sub QWORD [gs:0], 8
183   // This instruction should not be moved up to avoid a signal race.
184   addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)), X86::GS, 0)
185       .addImm(8);
186   // cmp [rsp], r10
187   addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
188       .addReg(X86::R10);
189   // jne trap
190   BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
191   MBB.addSuccessor(&TrapBB);
192 }
193 
runOnMachineFunction(MachineFunction & Fn)194 bool ShadowCallStack::runOnMachineFunction(MachineFunction &Fn) {
195   if (!Fn.getFunction().hasFnAttribute(Attribute::ShadowCallStack) ||
196       Fn.getFunction().hasFnAttribute(Attribute::Naked))
197     return false;
198 
199   if (Fn.empty() || !Fn.getRegInfo().tracksLiveness())
200     return false;
201 
202   // FIXME: Skip functions that have r10 or r11 live on entry (r10 can be live
203   // on entry for parameters with the nest attribute.)
204   if (Fn.front().isLiveIn(X86::R10) || Fn.front().isLiveIn(X86::R11))
205     return false;
206 
207   // FIXME: Skip functions with conditional and r10 tail calls for now.
208   bool HasReturn = false;
209   for (auto &MBB : Fn) {
210     if (MBB.empty())
211       continue;
212 
213     const MachineInstr &MI = MBB.instr_back();
214     if (MI.isReturn())
215       HasReturn = true;
216 
217     if (MI.isReturn() && MI.isCall()) {
218       if (MI.findRegisterUseOperand(X86::EFLAGS))
219         return false;
220       // This should only be possible on Windows 64 (see GR64_TC versus
221       // GR64_TCW64.)
222       if (MI.findRegisterUseOperand(X86::R10) ||
223           MI.hasRegisterImplicitUseOperand(X86::R10))
224         return false;
225     }
226   }
227 
228   if (!HasReturn)
229     return false;
230 
231   // For leaf functions:
232   // 1. Do not instrument very short functions where it would not improve that
233   //    function's security.
234   // 2. Detect if there is an unused caller-saved register we can reserve to
235   //    hold the return address instead of writing/reading it from the shadow
236   //    call stack.
237   MCPhysReg LeafFuncRegister = X86::NoRegister;
238   if (!Fn.getFrameInfo().adjustsStack()) {
239     size_t InstructionCount = 0;
240     std::bitset<X86::NUM_TARGET_REGS> UsedRegs;
241     for (auto &MBB : Fn) {
242       for (auto &LiveIn : MBB.liveins())
243         UsedRegs.set(LiveIn.PhysReg);
244       for (auto &MI : MBB) {
245         if (!MI.isDebugValue() && !MI.isCFIInstruction() && !MI.isLabel())
246           InstructionCount++;
247         for (auto &Op : MI.operands())
248           if (Op.isReg() && Op.isDef())
249             UsedRegs.set(Op.getReg());
250       }
251     }
252 
253     if (InstructionCount <= SkipLeafInstructions)
254       return false;
255 
256     std::bitset<X86::NUM_TARGET_REGS> CalleeSavedRegs;
257     const MCPhysReg *CSRegs = Fn.getRegInfo().getCalleeSavedRegs();
258     for (size_t i = 0; CSRegs[i]; i++)
259       CalleeSavedRegs.set(CSRegs[i]);
260 
261     const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
262     for (auto &Reg : X86::GR64_NOSPRegClass.getRegisters()) {
263       // FIXME: Optimization opportunity: spill/restore a callee-saved register
264       // if a caller-saved register is unavailable.
265       if (CalleeSavedRegs.test(Reg))
266         continue;
267 
268       bool Used = false;
269       for (MCSubRegIterator SR(Reg, TRI, true); SR.isValid(); ++SR)
270         if ((Used = UsedRegs.test(*SR)))
271           break;
272 
273       if (!Used) {
274         LeafFuncRegister = Reg;
275         break;
276       }
277     }
278   }
279 
280   const bool LeafFuncOptimization = LeafFuncRegister != X86::NoRegister;
281   if (LeafFuncOptimization)
282     // Mark the leaf function register live-in for all MBBs except the entry MBB
283     for (auto I = ++Fn.begin(), E = Fn.end(); I != E; ++I)
284       I->addLiveIn(LeafFuncRegister);
285 
286   MachineBasicBlock &MBB = Fn.front();
287   const MachineBasicBlock *NonEmpty = MBB.empty() ? MBB.getFallThrough() : &MBB;
288   const DebugLoc &DL = NonEmpty->front().getDebugLoc();
289 
290   const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
291   if (LeafFuncOptimization)
292     addPrologLeaf(Fn, TII, MBB, DL, LeafFuncRegister);
293   else
294     addProlog(Fn, TII, MBB, DL);
295 
296   MachineBasicBlock *Trap = nullptr;
297   for (auto &MBB : Fn) {
298     if (MBB.empty())
299       continue;
300 
301     MachineInstr &MI = MBB.instr_back();
302     if (MI.isReturn()) {
303       if (!Trap) {
304         Trap = Fn.CreateMachineBasicBlock();
305         BuildMI(Trap, MI.getDebugLoc(), TII->get(X86::TRAP));
306         Fn.push_back(Trap);
307       }
308 
309       if (LeafFuncOptimization)
310         addEpilogLeaf(TII, MBB, MI, *Trap, LeafFuncRegister);
311       else if (MI.findRegisterUseOperand(X86::R11))
312         addEpilogOnlyR10(TII, MBB, MI, *Trap);
313       else
314         addEpilog(TII, MBB, MI, *Trap);
315     }
316   }
317 
318   return true;
319 }
320 
321 INITIALIZE_PASS(ShadowCallStack, "shadow-call-stack", "Shadow Call Stack",
322                 false, false)
323 
createShadowCallStackPass()324 FunctionPass *llvm::createShadowCallStackPass() {
325   return new ShadowCallStack();
326 }
327