1 //=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Does various transformations for exception handling.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "WebAssembly.h"
17 #include "WebAssemblySubtarget.h"
18 #include "WebAssemblyUtilities.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/WasmEHFuncInfo.h"
21 #include "llvm/MC/MCAsmInfo.h"
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "wasm-exception-prepare"
25 
26 namespace {
27 class WebAssemblyLateEHPrepare final : public MachineFunctionPass {
getPassName() const28   StringRef getPassName() const override {
29     return "WebAssembly Prepare Exception";
30   }
31 
32   bool runOnMachineFunction(MachineFunction &MF) override;
33 
34   bool replaceFuncletReturns(MachineFunction &MF);
35   bool hoistCatches(MachineFunction &MF);
36   bool addCatchAlls(MachineFunction &MF);
37   bool addRethrows(MachineFunction &MF);
38   bool ensureSingleBBTermPads(MachineFunction &MF);
39   bool mergeTerminatePads(MachineFunction &MF);
40   bool addCatchAllTerminatePads(MachineFunction &MF);
41 
42 public:
43   static char ID; // Pass identification, replacement for typeid
WebAssemblyLateEHPrepare()44   WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {}
45 };
46 } // end anonymous namespace
47 
48 char WebAssemblyLateEHPrepare::ID = 0;
49 INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE,
50                 "WebAssembly Exception Preparation", false, false)
51 
createWebAssemblyLateEHPrepare()52 FunctionPass *llvm::createWebAssemblyLateEHPrepare() {
53   return new WebAssemblyLateEHPrepare();
54 }
55 
56 // Returns the nearest EH pad that dominates this instruction. This does not use
57 // dominator analysis; it just does BFS on its predecessors until arriving at an
58 // EH pad. This assumes valid EH scopes so the first EH pad it arrives in all
59 // possible search paths should be the same.
60 // Returns nullptr in case it does not find any EH pad in the search, or finds
61 // multiple different EH pads.
GetMatchingEHPad(MachineInstr * MI)62 MachineBasicBlock *GetMatchingEHPad(MachineInstr *MI) {
63   MachineFunction *MF = MI->getParent()->getParent();
64   SmallVector<MachineBasicBlock *, 2> WL;
65   SmallPtrSet<MachineBasicBlock *, 2> Visited;
66   WL.push_back(MI->getParent());
67   MachineBasicBlock *EHPad = nullptr;
68   while (!WL.empty()) {
69     MachineBasicBlock *MBB = WL.pop_back_val();
70     if (Visited.count(MBB))
71       continue;
72     Visited.insert(MBB);
73     if (MBB->isEHPad()) {
74       if (EHPad && EHPad != MBB)
75         return nullptr;
76       EHPad = MBB;
77       continue;
78     }
79     if (MBB == &MF->front())
80       return nullptr;
81     WL.append(MBB->pred_begin(), MBB->pred_end());
82   }
83   return EHPad;
84 }
85 
86 // Erases the given BB and all its children from the function. If other BBs have
87 // this BB as a successor, the successor relationships will be deleted as well.
EraseBBAndChildren(MachineBasicBlock * MBB)88 static void EraseBBAndChildren(MachineBasicBlock *MBB) {
89   SmallVector<MachineBasicBlock *, 8> WL;
90   WL.push_back(MBB);
91   while (!WL.empty()) {
92     MachineBasicBlock *MBB = WL.pop_back_val();
93     for (auto *Pred : MBB->predecessors())
94       Pred->removeSuccessor(MBB);
95     for (auto *Succ : MBB->successors()) {
96       WL.push_back(Succ);
97       MBB->removeSuccessor(Succ);
98     }
99     MBB->eraseFromParent();
100   }
101 }
102 
runOnMachineFunction(MachineFunction & MF)103 bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
104   if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() !=
105       ExceptionHandling::Wasm)
106     return false;
107 
108   bool Changed = false;
109   Changed |= addRethrows(MF);
110   if (!MF.getFunction().hasPersonalityFn())
111     return Changed;
112   Changed |= replaceFuncletReturns(MF);
113   Changed |= hoistCatches(MF);
114   Changed |= addCatchAlls(MF);
115   Changed |= ensureSingleBBTermPads(MF);
116   Changed |= mergeTerminatePads(MF);
117   Changed |= addCatchAllTerminatePads(MF);
118   return Changed;
119 }
120 
replaceFuncletReturns(MachineFunction & MF)121 bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
122   bool Changed = false;
123   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
124   auto *EHInfo = MF.getWasmEHFuncInfo();
125 
126   for (auto &MBB : MF) {
127     auto Pos = MBB.getFirstTerminator();
128     if (Pos == MBB.end())
129       continue;
130     MachineInstr *TI = &*Pos;
131 
132     switch (TI->getOpcode()) {
133     case WebAssembly::CATCHRET: {
134       // Replace a catchret with a branch
135       MachineBasicBlock *TBB = TI->getOperand(0).getMBB();
136       if (!MBB.isLayoutSuccessor(TBB))
137         BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::BR))
138             .addMBB(TBB);
139       TI->eraseFromParent();
140       Changed = true;
141       break;
142     }
143     case WebAssembly::CLEANUPRET: {
144       // Replace a cleanupret with a rethrow
145       if (EHInfo->hasThrowUnwindDest(&MBB))
146         BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW))
147             .addMBB(EHInfo->getThrowUnwindDest(&MBB));
148       else
149         BuildMI(MBB, TI, TI->getDebugLoc(),
150                 TII.get(WebAssembly::RETHROW_TO_CALLER));
151 
152       TI->eraseFromParent();
153       Changed = true;
154       break;
155     }
156     }
157   }
158   return Changed;
159 }
160 
161 // Hoist catch instructions to the beginning of their matching EH pad BBs in
162 // case,
163 // (1) catch instruction is not the first instruction in EH pad.
164 // ehpad:
165 //   some_other_instruction
166 //   ...
167 //   %exn = catch 0
168 // (2) catch instruction is in a non-EH pad BB. For example,
169 // ehpad:
170 //   br bb0
171 // bb0:
172 //   %exn = catch 0
hoistCatches(MachineFunction & MF)173 bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) {
174   bool Changed = false;
175   SmallVector<MachineInstr *, 16> Catches;
176   for (auto &MBB : MF)
177     for (auto &MI : MBB)
178       if (WebAssembly::isCatch(MI))
179         Catches.push_back(&MI);
180 
181   for (auto *Catch : Catches) {
182     MachineBasicBlock *EHPad = GetMatchingEHPad(Catch);
183     assert(EHPad && "No matching EH pad for catch");
184     if (EHPad->begin() == Catch)
185       continue;
186     Changed = true;
187     EHPad->insert(EHPad->begin(), Catch->removeFromParent());
188   }
189   return Changed;
190 }
191 
192 // Add catch_all to beginning of cleanup pads.
addCatchAlls(MachineFunction & MF)193 bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) {
194   bool Changed = false;
195   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
196 
197   for (auto &MBB : MF) {
198     if (!MBB.isEHPad())
199       continue;
200     // This runs after hoistCatches(), so we assume that if there is a catch,
201     // that should be the first instruction in an EH pad.
202     if (!WebAssembly::isCatch(*MBB.begin())) {
203       Changed = true;
204       BuildMI(MBB, MBB.begin(), MBB.begin()->getDebugLoc(),
205               TII.get(WebAssembly::CATCH_ALL));
206     }
207   }
208   return Changed;
209 }
210 
211 // Add a 'rethrow' instruction after __cxa_rethrow() call
addRethrows(MachineFunction & MF)212 bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) {
213   bool Changed = false;
214   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
215   auto *EHInfo = MF.getWasmEHFuncInfo();
216 
217   for (auto &MBB : MF)
218     for (auto &MI : MBB) {
219       // Check if it is a call to __cxa_rethrow()
220       if (!MI.isCall())
221         continue;
222       MachineOperand &CalleeOp = MI.getOperand(0);
223       if (!CalleeOp.isGlobal() ||
224           CalleeOp.getGlobal()->getName() != WebAssembly::CxaRethrowFn)
225         continue;
226 
227       // Now we have __cxa_rethrow() call
228       Changed = true;
229       auto InsertPt = std::next(MachineBasicBlock::iterator(MI));
230       while (InsertPt != MBB.end() && InsertPt->isLabel()) // Skip EH_LABELs
231         ++InsertPt;
232       MachineInstr *Rethrow = nullptr;
233       if (EHInfo->hasThrowUnwindDest(&MBB))
234         Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
235                           TII.get(WebAssembly::RETHROW))
236                       .addMBB(EHInfo->getThrowUnwindDest(&MBB));
237       else
238         Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
239                           TII.get(WebAssembly::RETHROW_TO_CALLER));
240 
241       // Becasue __cxa_rethrow does not return, the instruction after the
242       // rethrow should be an unreachable or a branch to another BB that should
243       // eventually lead to an unreachable. Delete it because rethrow itself is
244       // a terminator, and also delete non-EH pad successors if any.
245       MBB.erase(std::next(MachineBasicBlock::iterator(Rethrow)), MBB.end());
246       for (auto *Succ : MBB.successors())
247         if (!Succ->isEHPad())
248           EraseBBAndChildren(Succ);
249     }
250   return Changed;
251 }
252 
253 // Terminate pads are an single-BB EH pad in the form of
254 // termpad:
255 //   %exn = catch 0
256 //   call @__clang_call_terminate(%exn)
257 //   unreachable
258 // (There can be set_local and get_locals before the call if we didn't run
259 // RegStackify)
260 // But code transformations can change or add more control flow, so the call to
261 // __clang_call_terminate() function may not be in the original EH pad anymore.
262 // This ensures every terminate pad is a single BB in the form illustrated
263 // above.
ensureSingleBBTermPads(MachineFunction & MF)264 bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
265   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
266 
267   // Find calls to __clang_call_terminate()
268   SmallVector<MachineInstr *, 8> ClangCallTerminateCalls;
269   for (auto &MBB : MF)
270     for (auto &MI : MBB)
271       if (MI.isCall()) {
272         const MachineOperand &CalleeOp = MI.getOperand(0);
273         if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() ==
274                                        WebAssembly::ClangCallTerminateFn)
275           ClangCallTerminateCalls.push_back(&MI);
276       }
277 
278   bool Changed = false;
279   for (auto *Call : ClangCallTerminateCalls) {
280     MachineBasicBlock *EHPad = GetMatchingEHPad(Call);
281     assert(EHPad && "No matching EH pad for catch");
282 
283     // If it is already the form we want, skip it
284     if (Call->getParent() == EHPad &&
285         Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE)
286       continue;
287 
288     // In case the __clang_call_terminate() call is not in its matching EH pad,
289     // move the call to the end of EH pad and add an unreachable instruction
290     // after that. Delete all successors and their children if any, because here
291     // the program terminates.
292     Changed = true;
293     MachineInstr *Catch = &*EHPad->begin();
294     // This runs after hoistCatches(), so catch instruction should be at the top
295     assert(WebAssembly::isCatch(*Catch));
296     // Takes the result register of the catch instruction as argument. There may
297     // have been some other set_local/get_locals in between, but at this point
298     // we don't care.
299     Call->getOperand(1).setReg(Catch->getOperand(0).getReg());
300     auto InsertPos = std::next(MachineBasicBlock::iterator(Catch));
301     EHPad->insert(InsertPos, Call->removeFromParent());
302     BuildMI(*EHPad, InsertPos, Call->getDebugLoc(),
303             TII.get(WebAssembly::UNREACHABLE));
304     EHPad->erase(InsertPos, EHPad->end());
305     for (auto *Succ : EHPad->successors())
306       EraseBBAndChildren(Succ);
307   }
308   return Changed;
309 }
310 
311 // In case there are multiple terminate pads, merge them into one for code size.
312 // This runs after ensureSingleBBTermPads() and assumes every terminate pad is a
313 // single BB.
314 // In principle this violates EH scope relationship because it can merge
315 // multiple inner EH scopes, each of which is in different outer EH scope. But
316 // getEHScopeMembership() function will not be called after this, so it is fine.
mergeTerminatePads(MachineFunction & MF)317 bool WebAssemblyLateEHPrepare::mergeTerminatePads(MachineFunction &MF) {
318   SmallVector<MachineBasicBlock *, 8> TermPads;
319   for (auto &MBB : MF)
320     if (WebAssembly::isCatchTerminatePad(MBB))
321       TermPads.push_back(&MBB);
322   if (TermPads.empty())
323     return false;
324 
325   MachineBasicBlock *UniqueTermPad = TermPads.front();
326   for (auto *TermPad :
327        llvm::make_range(std::next(TermPads.begin()), TermPads.end())) {
328     SmallVector<MachineBasicBlock *, 2> Preds(TermPad->pred_begin(),
329                                               TermPad->pred_end());
330     for (auto *Pred : Preds)
331       Pred->replaceSuccessor(TermPad, UniqueTermPad);
332     TermPad->eraseFromParent();
333   }
334   return true;
335 }
336 
337 // Terminate pads are cleanup pads, so they should start with a 'catch_all'
338 // instruction. But in the Itanium model, when we have a C++ exception object,
339 // we pass them to __clang_call_terminate function, which calls __cxa_end_catch
340 // with the passed exception pointer and then std::terminate. This is the reason
341 // that terminate pads are generated with not a catch_all but a catch
342 // instruction in clang and earlier llvm passes. Here we append a terminate pad
343 // with a catch_all after each existing terminate pad so we can also catch
344 // foreign exceptions. For every terminate pad:
345 //   %exn = catch 0
346 //   call @__clang_call_terminate(%exn)
347 //   unreachable
348 // We append this BB right after that:
349 //   catch_all
350 //   call @std::terminate()
351 //   unreachable
addCatchAllTerminatePads(MachineFunction & MF)352 bool WebAssemblyLateEHPrepare::addCatchAllTerminatePads(MachineFunction &MF) {
353   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
354   SmallVector<MachineBasicBlock *, 8> TermPads;
355   for (auto &MBB : MF)
356     if (WebAssembly::isCatchTerminatePad(MBB))
357       TermPads.push_back(&MBB);
358   if (TermPads.empty())
359     return false;
360 
361   Function *StdTerminateFn =
362       MF.getFunction().getParent()->getFunction(WebAssembly::StdTerminateFn);
363   assert(StdTerminateFn && "There is no std::terminate() function");
364   for (auto *CatchTermPad : TermPads) {
365     DebugLoc DL = CatchTermPad->findDebugLoc(CatchTermPad->begin());
366     auto *CatchAllTermPad = MF.CreateMachineBasicBlock();
367     MF.insert(std::next(MachineFunction::iterator(CatchTermPad)),
368               CatchAllTermPad);
369     CatchAllTermPad->setIsEHPad();
370     BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CATCH_ALL));
371     BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CALL_VOID))
372         .addGlobalAddress(StdTerminateFn);
373     BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::UNREACHABLE));
374 
375     // Actually this CatchAllTermPad (new terminate pad with a catch_all) is not
376     // a successor of an existing terminate pad. CatchAllTermPad should have all
377     // predecessors CatchTermPad has instead. This is a hack to force
378     // CatchAllTermPad be always sorted right after CatchTermPad; the correct
379     // predecessor-successor relationships will be restored in CFGStackify pass.
380     CatchTermPad->addSuccessor(CatchAllTermPad);
381   }
382   return true;
383 }
384