1 //===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a register stacking pass.
12 ///
13 /// This pass reorders instructions to put register uses and defs in an order
14 /// such that they form single-use expression trees. Registers fitting this form
15 /// are then marked as "stackified", meaning references to them are replaced by
16 /// "push" and "pop" from the stack.
17 ///
18 /// This is primarily a code size optimization, since temporary values on the
19 /// expression don't need to be named.
20 ///
21 //===----------------------------------------------------------------------===//
22 
23 #include "WebAssembly.h"
24 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
25 #include "WebAssemblyMachineFunctionInfo.h"
26 #include "WebAssemblySubtarget.h"
27 #include "llvm/Analysis/AliasAnalysis.h"
28 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
29 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
30 #include "llvm/CodeGen/MachineDominators.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/Passes.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "wasm-reg-stackify"
39 
40 namespace {
41 class WebAssemblyRegStackify final : public MachineFunctionPass {
getPassName() const42   const char *getPassName() const override {
43     return "WebAssembly Register Stackify";
44   }
45 
getAnalysisUsage(AnalysisUsage & AU) const46   void getAnalysisUsage(AnalysisUsage &AU) const override {
47     AU.setPreservesCFG();
48     AU.addRequired<AAResultsWrapperPass>();
49     AU.addRequired<MachineDominatorTree>();
50     AU.addRequired<LiveIntervals>();
51     AU.addPreserved<MachineBlockFrequencyInfo>();
52     AU.addPreserved<SlotIndexes>();
53     AU.addPreserved<LiveIntervals>();
54     AU.addPreservedID(LiveVariablesID);
55     AU.addPreserved<MachineDominatorTree>();
56     MachineFunctionPass::getAnalysisUsage(AU);
57   }
58 
59   bool runOnMachineFunction(MachineFunction &MF) override;
60 
61 public:
62   static char ID; // Pass identification, replacement for typeid
WebAssemblyRegStackify()63   WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
64 };
65 } // end anonymous namespace
66 
67 char WebAssemblyRegStackify::ID = 0;
createWebAssemblyRegStackify()68 FunctionPass *llvm::createWebAssemblyRegStackify() {
69   return new WebAssemblyRegStackify();
70 }
71 
72 // Decorate the given instruction with implicit operands that enforce the
73 // expression stack ordering constraints for an instruction which is on
74 // the expression stack.
ImposeStackOrdering(MachineInstr * MI)75 static void ImposeStackOrdering(MachineInstr *MI) {
76   // Write the opaque EXPR_STACK register.
77   if (!MI->definesRegister(WebAssembly::EXPR_STACK))
78     MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
79                                              /*isDef=*/true,
80                                              /*isImp=*/true));
81 
82   // Also read the opaque EXPR_STACK register.
83   if (!MI->readsRegister(WebAssembly::EXPR_STACK))
84     MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
85                                              /*isDef=*/false,
86                                              /*isImp=*/true));
87 }
88 
89 // Determine whether a call to the callee referenced by
90 // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
91 // effects.
QueryCallee(const MachineInstr & MI,unsigned CalleeOpNo,bool & Read,bool & Write,bool & Effects,bool & StackPointer)92 static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
93                         bool &Write, bool &Effects, bool &StackPointer) {
94   // All calls can use the stack pointer.
95   StackPointer = true;
96 
97   const MachineOperand &MO = MI.getOperand(CalleeOpNo);
98   if (MO.isGlobal()) {
99     const Constant *GV = MO.getGlobal();
100     if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
101       if (!GA->isInterposable())
102         GV = GA->getAliasee();
103 
104     if (const Function *F = dyn_cast<Function>(GV)) {
105       if (!F->doesNotThrow())
106         Effects = true;
107       if (F->doesNotAccessMemory())
108         return;
109       if (F->onlyReadsMemory()) {
110         Read = true;
111         return;
112       }
113     }
114   }
115 
116   // Assume the worst.
117   Write = true;
118   Read = true;
119   Effects = true;
120 }
121 
122 // Determine whether MI reads memory, writes memory, has side effects,
123 // and/or uses the __stack_pointer value.
Query(const MachineInstr & MI,AliasAnalysis & AA,bool & Read,bool & Write,bool & Effects,bool & StackPointer)124 static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
125                   bool &Write, bool &Effects, bool &StackPointer) {
126   assert(!MI.isPosition());
127   assert(!MI.isTerminator());
128 
129   if (MI.isDebugValue())
130     return;
131 
132   // Check for loads.
133   if (MI.mayLoad() && !MI.isInvariantLoad(&AA))
134     Read = true;
135 
136   // Check for stores.
137   if (MI.mayStore()) {
138     Write = true;
139 
140     // Check for stores to __stack_pointer.
141     for (auto MMO : MI.memoperands()) {
142       const MachinePointerInfo &MPI = MMO->getPointerInfo();
143       if (MPI.V.is<const PseudoSourceValue *>()) {
144         auto PSV = MPI.V.get<const PseudoSourceValue *>();
145         if (const ExternalSymbolPseudoSourceValue *EPSV =
146                 dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
147           if (StringRef(EPSV->getSymbol()) == "__stack_pointer")
148             StackPointer = true;
149       }
150     }
151   } else if (MI.hasOrderedMemoryRef()) {
152     switch (MI.getOpcode()) {
153     case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
154     case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
155     case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
156     case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
157     case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
158     case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
159     case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
160     case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
161       // These instruction have hasUnmodeledSideEffects() returning true
162       // because they trap on overflow and invalid so they can't be arbitrarily
163       // moved, however hasOrderedMemoryRef() interprets this plus their lack
164       // of memoperands as having a potential unknown memory reference.
165       break;
166     default:
167       // Record volatile accesses, unless it's a call, as calls are handled
168       // specially below.
169       if (!MI.isCall()) {
170         Write = true;
171         Effects = true;
172       }
173       break;
174     }
175   }
176 
177   // Check for side effects.
178   if (MI.hasUnmodeledSideEffects()) {
179     switch (MI.getOpcode()) {
180     case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
181     case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
182     case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
183     case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
184     case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
185     case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
186     case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
187     case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
188       // These instructions have hasUnmodeledSideEffects() returning true
189       // because they trap on overflow and invalid so they can't be arbitrarily
190       // moved, however in the specific case of register stackifying, it is safe
191       // to move them because overflow and invalid are Undefined Behavior.
192       break;
193     default:
194       Effects = true;
195       break;
196     }
197   }
198 
199   // Analyze calls.
200   if (MI.isCall()) {
201     switch (MI.getOpcode()) {
202     case WebAssembly::CALL_VOID:
203     case WebAssembly::CALL_INDIRECT_VOID:
204       QueryCallee(MI, 0, Read, Write, Effects, StackPointer);
205       break;
206     case WebAssembly::CALL_I32: case WebAssembly::CALL_I64:
207     case WebAssembly::CALL_F32: case WebAssembly::CALL_F64:
208     case WebAssembly::CALL_INDIRECT_I32: case WebAssembly::CALL_INDIRECT_I64:
209     case WebAssembly::CALL_INDIRECT_F32: case WebAssembly::CALL_INDIRECT_F64:
210       QueryCallee(MI, 1, Read, Write, Effects, StackPointer);
211       break;
212     default:
213       llvm_unreachable("unexpected call opcode");
214     }
215   }
216 }
217 
218 // Test whether Def is safe and profitable to rematerialize.
ShouldRematerialize(const MachineInstr & Def,AliasAnalysis & AA,const WebAssemblyInstrInfo * TII)219 static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
220                                 const WebAssemblyInstrInfo *TII) {
221   return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA);
222 }
223 
224 // Identify the definition for this register at this point. This is a
225 // generalization of MachineRegisterInfo::getUniqueVRegDef that uses
226 // LiveIntervals to handle complex cases.
GetVRegDef(unsigned Reg,const MachineInstr * Insert,const MachineRegisterInfo & MRI,const LiveIntervals & LIS)227 static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
228                                 const MachineRegisterInfo &MRI,
229                                 const LiveIntervals &LIS)
230 {
231   // Most registers are in SSA form here so we try a quick MRI query first.
232   if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
233     return Def;
234 
235   // MRI doesn't know what the Def is. Try asking LIS.
236   if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
237           LIS.getInstructionIndex(*Insert)))
238     return LIS.getInstructionFromIndex(ValNo->def);
239 
240   return nullptr;
241 }
242 
243 // Test whether Reg, as defined at Def, has exactly one use. This is a
244 // generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
245 // to handle complex cases.
HasOneUse(unsigned Reg,MachineInstr * Def,MachineRegisterInfo & MRI,MachineDominatorTree & MDT,LiveIntervals & LIS)246 static bool HasOneUse(unsigned Reg, MachineInstr *Def,
247                       MachineRegisterInfo &MRI, MachineDominatorTree &MDT,
248                       LiveIntervals &LIS) {
249   // Most registers are in SSA form here so we try a quick MRI query first.
250   if (MRI.hasOneUse(Reg))
251     return true;
252 
253   bool HasOne = false;
254   const LiveInterval &LI = LIS.getInterval(Reg);
255   const VNInfo *DefVNI = LI.getVNInfoAt(
256       LIS.getInstructionIndex(*Def).getRegSlot());
257   assert(DefVNI);
258   for (auto I : MRI.use_nodbg_operands(Reg)) {
259     const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
260     if (Result.valueIn() == DefVNI) {
261       if (!Result.isKill())
262         return false;
263       if (HasOne)
264         return false;
265       HasOne = true;
266     }
267   }
268   return HasOne;
269 }
270 
271 // Test whether it's safe to move Def to just before Insert.
272 // TODO: Compute memory dependencies in a way that doesn't require always
273 // walking the block.
274 // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
275 // more precise.
IsSafeToMove(const MachineInstr * Def,const MachineInstr * Insert,AliasAnalysis & AA,const LiveIntervals & LIS,const MachineRegisterInfo & MRI)276 static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
277                          AliasAnalysis &AA, const LiveIntervals &LIS,
278                          const MachineRegisterInfo &MRI) {
279   assert(Def->getParent() == Insert->getParent());
280 
281   // Check for register dependencies.
282   for (const MachineOperand &MO : Def->operands()) {
283     if (!MO.isReg() || MO.isUndef())
284       continue;
285     unsigned Reg = MO.getReg();
286 
287     // If the register is dead here and at Insert, ignore it.
288     if (MO.isDead() && Insert->definesRegister(Reg) &&
289         !Insert->readsRegister(Reg))
290       continue;
291 
292     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
293       // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
294       // from moving down, and we've already checked for that.
295       if (Reg == WebAssembly::ARGUMENTS)
296         continue;
297       // If the physical register is never modified, ignore it.
298       if (!MRI.isPhysRegModified(Reg))
299         continue;
300       // Otherwise, it's a physical register with unknown liveness.
301       return false;
302     }
303 
304     // Ask LiveIntervals whether moving this virtual register use or def to
305     // Insert will change which value numbers are seen.
306     //
307     // If the operand is a use of a register that is also defined in the same
308     // instruction, test that the newly defined value reaches the insert point,
309     // since the operand will be moving along with the def.
310     const LiveInterval &LI = LIS.getInterval(Reg);
311     VNInfo *DefVNI =
312         (MO.isDef() || Def->definesRegister(Reg)) ?
313         LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot()) :
314         LI.getVNInfoBefore(LIS.getInstructionIndex(*Def));
315     assert(DefVNI && "Instruction input missing value number");
316     VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*Insert));
317     if (InsVNI && DefVNI != InsVNI)
318       return false;
319   }
320 
321   bool Read = false, Write = false, Effects = false, StackPointer = false;
322   Query(*Def, AA, Read, Write, Effects, StackPointer);
323 
324   // If the instruction does not access memory and has no side effects, it has
325   // no additional dependencies.
326   if (!Read && !Write && !Effects && !StackPointer)
327     return true;
328 
329   // Scan through the intervening instructions between Def and Insert.
330   MachineBasicBlock::const_iterator D(Def), I(Insert);
331   for (--I; I != D; --I) {
332     bool InterveningRead = false;
333     bool InterveningWrite = false;
334     bool InterveningEffects = false;
335     bool InterveningStackPointer = false;
336     Query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
337           InterveningStackPointer);
338     if (Effects && InterveningEffects)
339       return false;
340     if (Read && InterveningWrite)
341       return false;
342     if (Write && (InterveningRead || InterveningWrite))
343       return false;
344     if (StackPointer && InterveningStackPointer)
345       return false;
346   }
347 
348   return true;
349 }
350 
351 /// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
OneUseDominatesOtherUses(unsigned Reg,const MachineOperand & OneUse,const MachineBasicBlock & MBB,const MachineRegisterInfo & MRI,const MachineDominatorTree & MDT,LiveIntervals & LIS,WebAssemblyFunctionInfo & MFI)352 static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
353                                      const MachineBasicBlock &MBB,
354                                      const MachineRegisterInfo &MRI,
355                                      const MachineDominatorTree &MDT,
356                                      LiveIntervals &LIS,
357                                      WebAssemblyFunctionInfo &MFI) {
358   const LiveInterval &LI = LIS.getInterval(Reg);
359 
360   const MachineInstr *OneUseInst = OneUse.getParent();
361   VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
362 
363   for (const MachineOperand &Use : MRI.use_operands(Reg)) {
364     if (&Use == &OneUse)
365       continue;
366 
367     const MachineInstr *UseInst = Use.getParent();
368     VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
369 
370     if (UseVNI != OneUseVNI)
371       continue;
372 
373     const MachineInstr *OneUseInst = OneUse.getParent();
374     if (UseInst == OneUseInst) {
375       // Another use in the same instruction. We need to ensure that the one
376       // selected use happens "before" it.
377       if (&OneUse > &Use)
378         return false;
379     } else {
380       // Test that the use is dominated by the one selected use.
381       while (!MDT.dominates(OneUseInst, UseInst)) {
382         // Actually, dominating is over-conservative. Test that the use would
383         // happen after the one selected use in the stack evaluation order.
384         //
385         // This is needed as a consequence of using implicit get_locals for
386         // uses and implicit set_locals for defs.
387         if (UseInst->getDesc().getNumDefs() == 0)
388           return false;
389         const MachineOperand &MO = UseInst->getOperand(0);
390         if (!MO.isReg())
391           return false;
392         unsigned DefReg = MO.getReg();
393         if (!TargetRegisterInfo::isVirtualRegister(DefReg) ||
394             !MFI.isVRegStackified(DefReg))
395           return false;
396         assert(MRI.hasOneUse(DefReg));
397         const MachineOperand &NewUse = *MRI.use_begin(DefReg);
398         const MachineInstr *NewUseInst = NewUse.getParent();
399         if (NewUseInst == OneUseInst) {
400           if (&OneUse > &NewUse)
401             return false;
402           break;
403         }
404         UseInst = NewUseInst;
405       }
406     }
407   }
408   return true;
409 }
410 
411 /// Get the appropriate tee_local opcode for the given register class.
GetTeeLocalOpcode(const TargetRegisterClass * RC)412 static unsigned GetTeeLocalOpcode(const TargetRegisterClass *RC) {
413   if (RC == &WebAssembly::I32RegClass)
414     return WebAssembly::TEE_LOCAL_I32;
415   if (RC == &WebAssembly::I64RegClass)
416     return WebAssembly::TEE_LOCAL_I64;
417   if (RC == &WebAssembly::F32RegClass)
418     return WebAssembly::TEE_LOCAL_F32;
419   if (RC == &WebAssembly::F64RegClass)
420     return WebAssembly::TEE_LOCAL_F64;
421   llvm_unreachable("Unexpected register class");
422 }
423 
424 // Shrink LI to its uses, cleaning up LI.
ShrinkToUses(LiveInterval & LI,LiveIntervals & LIS)425 static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
426   if (LIS.shrinkToUses(&LI)) {
427     SmallVector<LiveInterval*, 4> SplitLIs;
428     LIS.splitSeparateComponents(LI, SplitLIs);
429   }
430 }
431 
432 /// A single-use def in the same block with no intervening memory or register
433 /// dependencies; move the def down and nest it with the current instruction.
MoveForSingleUse(unsigned Reg,MachineOperand & Op,MachineInstr * Def,MachineBasicBlock & MBB,MachineInstr * Insert,LiveIntervals & LIS,WebAssemblyFunctionInfo & MFI,MachineRegisterInfo & MRI)434 static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
435                                       MachineInstr *Def,
436                                       MachineBasicBlock &MBB,
437                                       MachineInstr *Insert, LiveIntervals &LIS,
438                                       WebAssemblyFunctionInfo &MFI,
439                                       MachineRegisterInfo &MRI) {
440   DEBUG(dbgs() << "Move for single use: "; Def->dump());
441 
442   MBB.splice(Insert, &MBB, Def);
443   LIS.handleMove(*Def);
444 
445   if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
446     // No one else is using this register for anything so we can just stackify
447     // it in place.
448     MFI.stackifyVReg(Reg);
449   } else {
450     // The register may have unrelated uses or defs; create a new register for
451     // just our one def and use so that we can stackify it.
452     unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
453     Def->getOperand(0).setReg(NewReg);
454     Op.setReg(NewReg);
455 
456     // Tell LiveIntervals about the new register.
457     LIS.createAndComputeVirtRegInterval(NewReg);
458 
459     // Tell LiveIntervals about the changes to the old register.
460     LiveInterval &LI = LIS.getInterval(Reg);
461     LI.removeSegment(LIS.getInstructionIndex(*Def).getRegSlot(),
462                      LIS.getInstructionIndex(*Op.getParent()).getRegSlot(),
463                      /*RemoveDeadValNo=*/true);
464 
465     MFI.stackifyVReg(NewReg);
466 
467     DEBUG(dbgs() << " - Replaced register: "; Def->dump());
468   }
469 
470   ImposeStackOrdering(Def);
471   return Def;
472 }
473 
474 /// A trivially cloneable instruction; clone it and nest the new copy with the
475 /// current instruction.
RematerializeCheapDef(unsigned Reg,MachineOperand & Op,MachineInstr & Def,MachineBasicBlock & MBB,MachineBasicBlock::instr_iterator Insert,LiveIntervals & LIS,WebAssemblyFunctionInfo & MFI,MachineRegisterInfo & MRI,const WebAssemblyInstrInfo * TII,const WebAssemblyRegisterInfo * TRI)476 static MachineInstr *RematerializeCheapDef(
477     unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB,
478     MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS,
479     WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI,
480     const WebAssemblyInstrInfo *TII, const WebAssemblyRegisterInfo *TRI) {
481   DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
482   DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
483 
484   unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
485   TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
486   Op.setReg(NewReg);
487   MachineInstr *Clone = &*std::prev(Insert);
488   LIS.InsertMachineInstrInMaps(*Clone);
489   LIS.createAndComputeVirtRegInterval(NewReg);
490   MFI.stackifyVReg(NewReg);
491   ImposeStackOrdering(Clone);
492 
493   DEBUG(dbgs() << " - Cloned to "; Clone->dump());
494 
495   // Shrink the interval.
496   bool IsDead = MRI.use_empty(Reg);
497   if (!IsDead) {
498     LiveInterval &LI = LIS.getInterval(Reg);
499     ShrinkToUses(LI, LIS);
500     IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot());
501   }
502 
503   // If that was the last use of the original, delete the original.
504   if (IsDead) {
505     DEBUG(dbgs() << " - Deleting original\n");
506     SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
507     LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
508     LIS.removeInterval(Reg);
509     LIS.RemoveMachineInstrFromMaps(Def);
510     Def.eraseFromParent();
511   }
512 
513   return Clone;
514 }
515 
516 /// A multiple-use def in the same block with no intervening memory or register
517 /// dependencies; move the def down, nest it with the current instruction, and
518 /// insert a tee_local to satisfy the rest of the uses. As an illustration,
519 /// rewrite this:
520 ///
521 ///    Reg = INST ...        // Def
522 ///    INST ..., Reg, ...    // Insert
523 ///    INST ..., Reg, ...
524 ///    INST ..., Reg, ...
525 ///
526 /// to this:
527 ///
528 ///    DefReg = INST ...     // Def (to become the new Insert)
529 ///    TeeReg, Reg = TEE_LOCAL_... DefReg
530 ///    INST ..., TeeReg, ... // Insert
531 ///    INST ..., Reg, ...
532 ///    INST ..., Reg, ...
533 ///
534 /// with DefReg and TeeReg stackified. This eliminates a get_local from the
535 /// resulting code.
MoveAndTeeForMultiUse(unsigned Reg,MachineOperand & Op,MachineInstr * Def,MachineBasicBlock & MBB,MachineInstr * Insert,LiveIntervals & LIS,WebAssemblyFunctionInfo & MFI,MachineRegisterInfo & MRI,const WebAssemblyInstrInfo * TII)536 static MachineInstr *MoveAndTeeForMultiUse(
537     unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
538     MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
539     MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
540   DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
541 
542   // Move Def into place.
543   MBB.splice(Insert, &MBB, Def);
544   LIS.handleMove(*Def);
545 
546   // Create the Tee and attach the registers.
547   const auto *RegClass = MRI.getRegClass(Reg);
548   unsigned TeeReg = MRI.createVirtualRegister(RegClass);
549   unsigned DefReg = MRI.createVirtualRegister(RegClass);
550   MachineOperand &DefMO = Def->getOperand(0);
551   MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
552                               TII->get(GetTeeLocalOpcode(RegClass)), TeeReg)
553                           .addReg(Reg, RegState::Define)
554                           .addReg(DefReg, getUndefRegState(DefMO.isDead()));
555   Op.setReg(TeeReg);
556   DefMO.setReg(DefReg);
557   SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
558   SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
559 
560   // Tell LiveIntervals we moved the original vreg def from Def to Tee.
561   LiveInterval &LI = LIS.getInterval(Reg);
562   LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
563   VNInfo *ValNo = LI.getVNInfoAt(DefIdx);
564   I->start = TeeIdx;
565   ValNo->def = TeeIdx;
566   ShrinkToUses(LI, LIS);
567 
568   // Finish stackifying the new regs.
569   LIS.createAndComputeVirtRegInterval(TeeReg);
570   LIS.createAndComputeVirtRegInterval(DefReg);
571   MFI.stackifyVReg(DefReg);
572   MFI.stackifyVReg(TeeReg);
573   ImposeStackOrdering(Def);
574   ImposeStackOrdering(Tee);
575 
576   DEBUG(dbgs() << " - Replaced register: "; Def->dump());
577   DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
578   return Def;
579 }
580 
581 namespace {
582 /// A stack for walking the tree of instructions being built, visiting the
583 /// MachineOperands in DFS order.
584 class TreeWalkerState {
585   typedef MachineInstr::mop_iterator mop_iterator;
586   typedef std::reverse_iterator<mop_iterator> mop_reverse_iterator;
587   typedef iterator_range<mop_reverse_iterator> RangeTy;
588   SmallVector<RangeTy, 4> Worklist;
589 
590 public:
TreeWalkerState(MachineInstr * Insert)591   explicit TreeWalkerState(MachineInstr *Insert) {
592     const iterator_range<mop_iterator> &Range = Insert->explicit_uses();
593     if (Range.begin() != Range.end())
594       Worklist.push_back(reverse(Range));
595   }
596 
Done() const597   bool Done() const { return Worklist.empty(); }
598 
Pop()599   MachineOperand &Pop() {
600     RangeTy &Range = Worklist.back();
601     MachineOperand &Op = *Range.begin();
602     Range = drop_begin(Range, 1);
603     if (Range.begin() == Range.end())
604       Worklist.pop_back();
605     assert((Worklist.empty() ||
606             Worklist.back().begin() != Worklist.back().end()) &&
607            "Empty ranges shouldn't remain in the worklist");
608     return Op;
609   }
610 
611   /// Push Instr's operands onto the stack to be visited.
PushOperands(MachineInstr * Instr)612   void PushOperands(MachineInstr *Instr) {
613     const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
614     if (Range.begin() != Range.end())
615       Worklist.push_back(reverse(Range));
616   }
617 
618   /// Some of Instr's operands are on the top of the stack; remove them and
619   /// re-insert them starting from the beginning (because we've commuted them).
ResetTopOperands(MachineInstr * Instr)620   void ResetTopOperands(MachineInstr *Instr) {
621     assert(HasRemainingOperands(Instr) &&
622            "Reseting operands should only be done when the instruction has "
623            "an operand still on the stack");
624     Worklist.back() = reverse(Instr->explicit_uses());
625   }
626 
627   /// Test whether Instr has operands remaining to be visited at the top of
628   /// the stack.
HasRemainingOperands(const MachineInstr * Instr) const629   bool HasRemainingOperands(const MachineInstr *Instr) const {
630     if (Worklist.empty())
631       return false;
632     const RangeTy &Range = Worklist.back();
633     return Range.begin() != Range.end() && Range.begin()->getParent() == Instr;
634   }
635 
636   /// Test whether the given register is present on the stack, indicating an
637   /// operand in the tree that we haven't visited yet. Moving a definition of
638   /// Reg to a point in the tree after that would change its value.
639   ///
640   /// This is needed as a consequence of using implicit get_locals for
641   /// uses and implicit set_locals for defs.
IsOnStack(unsigned Reg) const642   bool IsOnStack(unsigned Reg) const {
643     for (const RangeTy &Range : Worklist)
644       for (const MachineOperand &MO : Range)
645         if (MO.isReg() && MO.getReg() == Reg)
646           return true;
647     return false;
648   }
649 };
650 
651 /// State to keep track of whether commuting is in flight or whether it's been
652 /// tried for the current instruction and didn't work.
653 class CommutingState {
654   /// There are effectively three states: the initial state where we haven't
655   /// started commuting anything and we don't know anything yet, the tenative
656   /// state where we've commuted the operands of the current instruction and are
657   /// revisting it, and the declined state where we've reverted the operands
658   /// back to their original order and will no longer commute it further.
659   bool TentativelyCommuting;
660   bool Declined;
661 
662   /// During the tentative state, these hold the operand indices of the commuted
663   /// operands.
664   unsigned Operand0, Operand1;
665 
666 public:
CommutingState()667   CommutingState() : TentativelyCommuting(false), Declined(false) {}
668 
669   /// Stackification for an operand was not successful due to ordering
670   /// constraints. If possible, and if we haven't already tried it and declined
671   /// it, commute Insert's operands and prepare to revisit it.
MaybeCommute(MachineInstr * Insert,TreeWalkerState & TreeWalker,const WebAssemblyInstrInfo * TII)672   void MaybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
673                     const WebAssemblyInstrInfo *TII) {
674     if (TentativelyCommuting) {
675       assert(!Declined &&
676              "Don't decline commuting until you've finished trying it");
677       // Commuting didn't help. Revert it.
678       TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
679       TentativelyCommuting = false;
680       Declined = true;
681     } else if (!Declined && TreeWalker.HasRemainingOperands(Insert)) {
682       Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
683       Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
684       if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) {
685         // Tentatively commute the operands and try again.
686         TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
687         TreeWalker.ResetTopOperands(Insert);
688         TentativelyCommuting = true;
689         Declined = false;
690       }
691     }
692   }
693 
694   /// Stackification for some operand was successful. Reset to the default
695   /// state.
Reset()696   void Reset() {
697     TentativelyCommuting = false;
698     Declined = false;
699   }
700 };
701 } // end anonymous namespace
702 
runOnMachineFunction(MachineFunction & MF)703 bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
704   DEBUG(dbgs() << "********** Register Stackifying **********\n"
705                   "********** Function: "
706                << MF.getName() << '\n');
707 
708   bool Changed = false;
709   MachineRegisterInfo &MRI = MF.getRegInfo();
710   WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
711   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
712   const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
713   AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
714   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
715   LiveIntervals &LIS = getAnalysis<LiveIntervals>();
716 
717   // Walk the instructions from the bottom up. Currently we don't look past
718   // block boundaries, and the blocks aren't ordered so the block visitation
719   // order isn't significant, but we may want to change this in the future.
720   for (MachineBasicBlock &MBB : MF) {
721     // Don't use a range-based for loop, because we modify the list as we're
722     // iterating over it and the end iterator may change.
723     for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
724       MachineInstr *Insert = &*MII;
725       // Don't nest anything inside an inline asm, because we don't have
726       // constraints for $push inputs.
727       if (Insert->getOpcode() == TargetOpcode::INLINEASM)
728         continue;
729 
730       // Ignore debugging intrinsics.
731       if (Insert->getOpcode() == TargetOpcode::DBG_VALUE)
732         continue;
733 
734       // Iterate through the inputs in reverse order, since we'll be pulling
735       // operands off the stack in LIFO order.
736       CommutingState Commuting;
737       TreeWalkerState TreeWalker(Insert);
738       while (!TreeWalker.Done()) {
739         MachineOperand &Op = TreeWalker.Pop();
740 
741         // We're only interested in explicit virtual register operands.
742         if (!Op.isReg())
743           continue;
744 
745         unsigned Reg = Op.getReg();
746         assert(Op.isUse() && "explicit_uses() should only iterate over uses");
747         assert(!Op.isImplicit() &&
748                "explicit_uses() should only iterate over explicit operands");
749         if (TargetRegisterInfo::isPhysicalRegister(Reg))
750           continue;
751 
752         // Identify the definition for this register at this point. Most
753         // registers are in SSA form here so we try a quick MRI query first.
754         MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
755         if (!Def)
756           continue;
757 
758         // Don't nest an INLINE_ASM def into anything, because we don't have
759         // constraints for $pop outputs.
760         if (Def->getOpcode() == TargetOpcode::INLINEASM)
761           continue;
762 
763         // Argument instructions represent live-in registers and not real
764         // instructions.
765         if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
766             Def->getOpcode() == WebAssembly::ARGUMENT_I64 ||
767             Def->getOpcode() == WebAssembly::ARGUMENT_F32 ||
768             Def->getOpcode() == WebAssembly::ARGUMENT_F64)
769           continue;
770 
771         // Decide which strategy to take. Prefer to move a single-use value
772         // over cloning it, and prefer cloning over introducing a tee_local.
773         // For moving, we require the def to be in the same block as the use;
774         // this makes things simpler (LiveIntervals' handleMove function only
775         // supports intra-block moves) and it's MachineSink's job to catch all
776         // the sinking opportunities anyway.
777         bool SameBlock = Def->getParent() == &MBB;
778         bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, LIS, MRI) &&
779                        !TreeWalker.IsOnStack(Reg);
780         if (CanMove && HasOneUse(Reg, Def, MRI, MDT, LIS)) {
781           Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
782         } else if (ShouldRematerialize(*Def, AA, TII)) {
783           Insert =
784               RematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
785                                     LIS, MFI, MRI, TII, TRI);
786         } else if (CanMove &&
787                    OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
788           Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
789                                          MRI, TII);
790         } else {
791           // We failed to stackify the operand. If the problem was ordering
792           // constraints, Commuting may be able to help.
793           if (!CanMove && SameBlock)
794             Commuting.MaybeCommute(Insert, TreeWalker, TII);
795           // Proceed to the next operand.
796           continue;
797         }
798 
799         // We stackified an operand. Add the defining instruction's operands to
800         // the worklist stack now to continue to build an ever deeper tree.
801         Commuting.Reset();
802         TreeWalker.PushOperands(Insert);
803       }
804 
805       // If we stackified any operands, skip over the tree to start looking for
806       // the next instruction we can build a tree on.
807       if (Insert != &*MII) {
808         ImposeStackOrdering(&*MII);
809         MII = std::prev(
810             llvm::make_reverse_iterator(MachineBasicBlock::iterator(Insert)));
811         Changed = true;
812       }
813     }
814   }
815 
816   // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere so
817   // that it never looks like a use-before-def.
818   if (Changed) {
819     MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK);
820     for (MachineBasicBlock &MBB : MF)
821       MBB.addLiveIn(WebAssembly::EXPR_STACK);
822   }
823 
824 #ifndef NDEBUG
825   // Verify that pushes and pops are performed in LIFO order.
826   SmallVector<unsigned, 0> Stack;
827   for (MachineBasicBlock &MBB : MF) {
828     for (MachineInstr &MI : MBB) {
829       if (MI.isDebugValue())
830         continue;
831       for (MachineOperand &MO : reverse(MI.explicit_operands())) {
832         if (!MO.isReg())
833           continue;
834         unsigned Reg = MO.getReg();
835 
836         if (MFI.isVRegStackified(Reg)) {
837           if (MO.isDef())
838             Stack.push_back(Reg);
839           else
840             assert(Stack.pop_back_val() == Reg &&
841                    "Register stack pop should be paired with a push");
842         }
843       }
844     }
845     // TODO: Generalize this code to support keeping values on the stack across
846     // basic block boundaries.
847     assert(Stack.empty() &&
848            "Register stack pushes and pops should be balanced");
849   }
850 #endif
851 
852   return Changed;
853 }
854