1 //===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a register stacking pass.
12 ///
13 /// This pass reorders instructions to put register uses and defs in an order
14 /// such that they form single-use expression trees. Registers fitting this form
15 /// are then marked as "stackified", meaning references to them are replaced by
16 /// "push" and "pop" from the stack.
17 ///
18 /// This is primarily a code size optimization, since temporary values on the
19 /// expression don't need to be named.
20 ///
21 //===----------------------------------------------------------------------===//
22
23 #include "WebAssembly.h"
24 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
25 #include "WebAssemblyMachineFunctionInfo.h"
26 #include "WebAssemblySubtarget.h"
27 #include "llvm/Analysis/AliasAnalysis.h"
28 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
29 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
30 #include "llvm/CodeGen/MachineDominators.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/Passes.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36 using namespace llvm;
37
38 #define DEBUG_TYPE "wasm-reg-stackify"
39
40 namespace {
41 class WebAssemblyRegStackify final : public MachineFunctionPass {
getPassName() const42 const char *getPassName() const override {
43 return "WebAssembly Register Stackify";
44 }
45
getAnalysisUsage(AnalysisUsage & AU) const46 void getAnalysisUsage(AnalysisUsage &AU) const override {
47 AU.setPreservesCFG();
48 AU.addRequired<AAResultsWrapperPass>();
49 AU.addRequired<MachineDominatorTree>();
50 AU.addRequired<LiveIntervals>();
51 AU.addPreserved<MachineBlockFrequencyInfo>();
52 AU.addPreserved<SlotIndexes>();
53 AU.addPreserved<LiveIntervals>();
54 AU.addPreservedID(LiveVariablesID);
55 AU.addPreserved<MachineDominatorTree>();
56 MachineFunctionPass::getAnalysisUsage(AU);
57 }
58
59 bool runOnMachineFunction(MachineFunction &MF) override;
60
61 public:
62 static char ID; // Pass identification, replacement for typeid
WebAssemblyRegStackify()63 WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
64 };
65 } // end anonymous namespace
66
67 char WebAssemblyRegStackify::ID = 0;
createWebAssemblyRegStackify()68 FunctionPass *llvm::createWebAssemblyRegStackify() {
69 return new WebAssemblyRegStackify();
70 }
71
72 // Decorate the given instruction with implicit operands that enforce the
73 // expression stack ordering constraints for an instruction which is on
74 // the expression stack.
ImposeStackOrdering(MachineInstr * MI)75 static void ImposeStackOrdering(MachineInstr *MI) {
76 // Write the opaque EXPR_STACK register.
77 if (!MI->definesRegister(WebAssembly::EXPR_STACK))
78 MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
79 /*isDef=*/true,
80 /*isImp=*/true));
81
82 // Also read the opaque EXPR_STACK register.
83 if (!MI->readsRegister(WebAssembly::EXPR_STACK))
84 MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
85 /*isDef=*/false,
86 /*isImp=*/true));
87 }
88
89 // Determine whether a call to the callee referenced by
90 // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
91 // effects.
QueryCallee(const MachineInstr & MI,unsigned CalleeOpNo,bool & Read,bool & Write,bool & Effects,bool & StackPointer)92 static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
93 bool &Write, bool &Effects, bool &StackPointer) {
94 // All calls can use the stack pointer.
95 StackPointer = true;
96
97 const MachineOperand &MO = MI.getOperand(CalleeOpNo);
98 if (MO.isGlobal()) {
99 const Constant *GV = MO.getGlobal();
100 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
101 if (!GA->isInterposable())
102 GV = GA->getAliasee();
103
104 if (const Function *F = dyn_cast<Function>(GV)) {
105 if (!F->doesNotThrow())
106 Effects = true;
107 if (F->doesNotAccessMemory())
108 return;
109 if (F->onlyReadsMemory()) {
110 Read = true;
111 return;
112 }
113 }
114 }
115
116 // Assume the worst.
117 Write = true;
118 Read = true;
119 Effects = true;
120 }
121
122 // Determine whether MI reads memory, writes memory, has side effects,
123 // and/or uses the __stack_pointer value.
Query(const MachineInstr & MI,AliasAnalysis & AA,bool & Read,bool & Write,bool & Effects,bool & StackPointer)124 static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
125 bool &Write, bool &Effects, bool &StackPointer) {
126 assert(!MI.isPosition());
127 assert(!MI.isTerminator());
128
129 if (MI.isDebugValue())
130 return;
131
132 // Check for loads.
133 if (MI.mayLoad() && !MI.isInvariantLoad(&AA))
134 Read = true;
135
136 // Check for stores.
137 if (MI.mayStore()) {
138 Write = true;
139
140 // Check for stores to __stack_pointer.
141 for (auto MMO : MI.memoperands()) {
142 const MachinePointerInfo &MPI = MMO->getPointerInfo();
143 if (MPI.V.is<const PseudoSourceValue *>()) {
144 auto PSV = MPI.V.get<const PseudoSourceValue *>();
145 if (const ExternalSymbolPseudoSourceValue *EPSV =
146 dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
147 if (StringRef(EPSV->getSymbol()) == "__stack_pointer")
148 StackPointer = true;
149 }
150 }
151 } else if (MI.hasOrderedMemoryRef()) {
152 switch (MI.getOpcode()) {
153 case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
154 case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
155 case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
156 case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
157 case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
158 case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
159 case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
160 case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
161 // These instruction have hasUnmodeledSideEffects() returning true
162 // because they trap on overflow and invalid so they can't be arbitrarily
163 // moved, however hasOrderedMemoryRef() interprets this plus their lack
164 // of memoperands as having a potential unknown memory reference.
165 break;
166 default:
167 // Record volatile accesses, unless it's a call, as calls are handled
168 // specially below.
169 if (!MI.isCall()) {
170 Write = true;
171 Effects = true;
172 }
173 break;
174 }
175 }
176
177 // Check for side effects.
178 if (MI.hasUnmodeledSideEffects()) {
179 switch (MI.getOpcode()) {
180 case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
181 case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
182 case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
183 case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
184 case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
185 case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
186 case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
187 case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
188 // These instructions have hasUnmodeledSideEffects() returning true
189 // because they trap on overflow and invalid so they can't be arbitrarily
190 // moved, however in the specific case of register stackifying, it is safe
191 // to move them because overflow and invalid are Undefined Behavior.
192 break;
193 default:
194 Effects = true;
195 break;
196 }
197 }
198
199 // Analyze calls.
200 if (MI.isCall()) {
201 switch (MI.getOpcode()) {
202 case WebAssembly::CALL_VOID:
203 case WebAssembly::CALL_INDIRECT_VOID:
204 QueryCallee(MI, 0, Read, Write, Effects, StackPointer);
205 break;
206 case WebAssembly::CALL_I32: case WebAssembly::CALL_I64:
207 case WebAssembly::CALL_F32: case WebAssembly::CALL_F64:
208 case WebAssembly::CALL_INDIRECT_I32: case WebAssembly::CALL_INDIRECT_I64:
209 case WebAssembly::CALL_INDIRECT_F32: case WebAssembly::CALL_INDIRECT_F64:
210 QueryCallee(MI, 1, Read, Write, Effects, StackPointer);
211 break;
212 default:
213 llvm_unreachable("unexpected call opcode");
214 }
215 }
216 }
217
218 // Test whether Def is safe and profitable to rematerialize.
ShouldRematerialize(const MachineInstr & Def,AliasAnalysis & AA,const WebAssemblyInstrInfo * TII)219 static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
220 const WebAssemblyInstrInfo *TII) {
221 return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA);
222 }
223
224 // Identify the definition for this register at this point. This is a
225 // generalization of MachineRegisterInfo::getUniqueVRegDef that uses
226 // LiveIntervals to handle complex cases.
GetVRegDef(unsigned Reg,const MachineInstr * Insert,const MachineRegisterInfo & MRI,const LiveIntervals & LIS)227 static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
228 const MachineRegisterInfo &MRI,
229 const LiveIntervals &LIS)
230 {
231 // Most registers are in SSA form here so we try a quick MRI query first.
232 if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
233 return Def;
234
235 // MRI doesn't know what the Def is. Try asking LIS.
236 if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
237 LIS.getInstructionIndex(*Insert)))
238 return LIS.getInstructionFromIndex(ValNo->def);
239
240 return nullptr;
241 }
242
243 // Test whether Reg, as defined at Def, has exactly one use. This is a
244 // generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
245 // to handle complex cases.
HasOneUse(unsigned Reg,MachineInstr * Def,MachineRegisterInfo & MRI,MachineDominatorTree & MDT,LiveIntervals & LIS)246 static bool HasOneUse(unsigned Reg, MachineInstr *Def,
247 MachineRegisterInfo &MRI, MachineDominatorTree &MDT,
248 LiveIntervals &LIS) {
249 // Most registers are in SSA form here so we try a quick MRI query first.
250 if (MRI.hasOneUse(Reg))
251 return true;
252
253 bool HasOne = false;
254 const LiveInterval &LI = LIS.getInterval(Reg);
255 const VNInfo *DefVNI = LI.getVNInfoAt(
256 LIS.getInstructionIndex(*Def).getRegSlot());
257 assert(DefVNI);
258 for (auto I : MRI.use_nodbg_operands(Reg)) {
259 const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
260 if (Result.valueIn() == DefVNI) {
261 if (!Result.isKill())
262 return false;
263 if (HasOne)
264 return false;
265 HasOne = true;
266 }
267 }
268 return HasOne;
269 }
270
271 // Test whether it's safe to move Def to just before Insert.
272 // TODO: Compute memory dependencies in a way that doesn't require always
273 // walking the block.
274 // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
275 // more precise.
IsSafeToMove(const MachineInstr * Def,const MachineInstr * Insert,AliasAnalysis & AA,const LiveIntervals & LIS,const MachineRegisterInfo & MRI)276 static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
277 AliasAnalysis &AA, const LiveIntervals &LIS,
278 const MachineRegisterInfo &MRI) {
279 assert(Def->getParent() == Insert->getParent());
280
281 // Check for register dependencies.
282 for (const MachineOperand &MO : Def->operands()) {
283 if (!MO.isReg() || MO.isUndef())
284 continue;
285 unsigned Reg = MO.getReg();
286
287 // If the register is dead here and at Insert, ignore it.
288 if (MO.isDead() && Insert->definesRegister(Reg) &&
289 !Insert->readsRegister(Reg))
290 continue;
291
292 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
293 // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
294 // from moving down, and we've already checked for that.
295 if (Reg == WebAssembly::ARGUMENTS)
296 continue;
297 // If the physical register is never modified, ignore it.
298 if (!MRI.isPhysRegModified(Reg))
299 continue;
300 // Otherwise, it's a physical register with unknown liveness.
301 return false;
302 }
303
304 // Ask LiveIntervals whether moving this virtual register use or def to
305 // Insert will change which value numbers are seen.
306 //
307 // If the operand is a use of a register that is also defined in the same
308 // instruction, test that the newly defined value reaches the insert point,
309 // since the operand will be moving along with the def.
310 const LiveInterval &LI = LIS.getInterval(Reg);
311 VNInfo *DefVNI =
312 (MO.isDef() || Def->definesRegister(Reg)) ?
313 LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot()) :
314 LI.getVNInfoBefore(LIS.getInstructionIndex(*Def));
315 assert(DefVNI && "Instruction input missing value number");
316 VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*Insert));
317 if (InsVNI && DefVNI != InsVNI)
318 return false;
319 }
320
321 bool Read = false, Write = false, Effects = false, StackPointer = false;
322 Query(*Def, AA, Read, Write, Effects, StackPointer);
323
324 // If the instruction does not access memory and has no side effects, it has
325 // no additional dependencies.
326 if (!Read && !Write && !Effects && !StackPointer)
327 return true;
328
329 // Scan through the intervening instructions between Def and Insert.
330 MachineBasicBlock::const_iterator D(Def), I(Insert);
331 for (--I; I != D; --I) {
332 bool InterveningRead = false;
333 bool InterveningWrite = false;
334 bool InterveningEffects = false;
335 bool InterveningStackPointer = false;
336 Query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
337 InterveningStackPointer);
338 if (Effects && InterveningEffects)
339 return false;
340 if (Read && InterveningWrite)
341 return false;
342 if (Write && (InterveningRead || InterveningWrite))
343 return false;
344 if (StackPointer && InterveningStackPointer)
345 return false;
346 }
347
348 return true;
349 }
350
351 /// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
OneUseDominatesOtherUses(unsigned Reg,const MachineOperand & OneUse,const MachineBasicBlock & MBB,const MachineRegisterInfo & MRI,const MachineDominatorTree & MDT,LiveIntervals & LIS,WebAssemblyFunctionInfo & MFI)352 static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
353 const MachineBasicBlock &MBB,
354 const MachineRegisterInfo &MRI,
355 const MachineDominatorTree &MDT,
356 LiveIntervals &LIS,
357 WebAssemblyFunctionInfo &MFI) {
358 const LiveInterval &LI = LIS.getInterval(Reg);
359
360 const MachineInstr *OneUseInst = OneUse.getParent();
361 VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
362
363 for (const MachineOperand &Use : MRI.use_operands(Reg)) {
364 if (&Use == &OneUse)
365 continue;
366
367 const MachineInstr *UseInst = Use.getParent();
368 VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
369
370 if (UseVNI != OneUseVNI)
371 continue;
372
373 const MachineInstr *OneUseInst = OneUse.getParent();
374 if (UseInst == OneUseInst) {
375 // Another use in the same instruction. We need to ensure that the one
376 // selected use happens "before" it.
377 if (&OneUse > &Use)
378 return false;
379 } else {
380 // Test that the use is dominated by the one selected use.
381 while (!MDT.dominates(OneUseInst, UseInst)) {
382 // Actually, dominating is over-conservative. Test that the use would
383 // happen after the one selected use in the stack evaluation order.
384 //
385 // This is needed as a consequence of using implicit get_locals for
386 // uses and implicit set_locals for defs.
387 if (UseInst->getDesc().getNumDefs() == 0)
388 return false;
389 const MachineOperand &MO = UseInst->getOperand(0);
390 if (!MO.isReg())
391 return false;
392 unsigned DefReg = MO.getReg();
393 if (!TargetRegisterInfo::isVirtualRegister(DefReg) ||
394 !MFI.isVRegStackified(DefReg))
395 return false;
396 assert(MRI.hasOneUse(DefReg));
397 const MachineOperand &NewUse = *MRI.use_begin(DefReg);
398 const MachineInstr *NewUseInst = NewUse.getParent();
399 if (NewUseInst == OneUseInst) {
400 if (&OneUse > &NewUse)
401 return false;
402 break;
403 }
404 UseInst = NewUseInst;
405 }
406 }
407 }
408 return true;
409 }
410
411 /// Get the appropriate tee_local opcode for the given register class.
GetTeeLocalOpcode(const TargetRegisterClass * RC)412 static unsigned GetTeeLocalOpcode(const TargetRegisterClass *RC) {
413 if (RC == &WebAssembly::I32RegClass)
414 return WebAssembly::TEE_LOCAL_I32;
415 if (RC == &WebAssembly::I64RegClass)
416 return WebAssembly::TEE_LOCAL_I64;
417 if (RC == &WebAssembly::F32RegClass)
418 return WebAssembly::TEE_LOCAL_F32;
419 if (RC == &WebAssembly::F64RegClass)
420 return WebAssembly::TEE_LOCAL_F64;
421 llvm_unreachable("Unexpected register class");
422 }
423
424 // Shrink LI to its uses, cleaning up LI.
ShrinkToUses(LiveInterval & LI,LiveIntervals & LIS)425 static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
426 if (LIS.shrinkToUses(&LI)) {
427 SmallVector<LiveInterval*, 4> SplitLIs;
428 LIS.splitSeparateComponents(LI, SplitLIs);
429 }
430 }
431
432 /// A single-use def in the same block with no intervening memory or register
433 /// dependencies; move the def down and nest it with the current instruction.
MoveForSingleUse(unsigned Reg,MachineOperand & Op,MachineInstr * Def,MachineBasicBlock & MBB,MachineInstr * Insert,LiveIntervals & LIS,WebAssemblyFunctionInfo & MFI,MachineRegisterInfo & MRI)434 static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
435 MachineInstr *Def,
436 MachineBasicBlock &MBB,
437 MachineInstr *Insert, LiveIntervals &LIS,
438 WebAssemblyFunctionInfo &MFI,
439 MachineRegisterInfo &MRI) {
440 DEBUG(dbgs() << "Move for single use: "; Def->dump());
441
442 MBB.splice(Insert, &MBB, Def);
443 LIS.handleMove(*Def);
444
445 if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
446 // No one else is using this register for anything so we can just stackify
447 // it in place.
448 MFI.stackifyVReg(Reg);
449 } else {
450 // The register may have unrelated uses or defs; create a new register for
451 // just our one def and use so that we can stackify it.
452 unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
453 Def->getOperand(0).setReg(NewReg);
454 Op.setReg(NewReg);
455
456 // Tell LiveIntervals about the new register.
457 LIS.createAndComputeVirtRegInterval(NewReg);
458
459 // Tell LiveIntervals about the changes to the old register.
460 LiveInterval &LI = LIS.getInterval(Reg);
461 LI.removeSegment(LIS.getInstructionIndex(*Def).getRegSlot(),
462 LIS.getInstructionIndex(*Op.getParent()).getRegSlot(),
463 /*RemoveDeadValNo=*/true);
464
465 MFI.stackifyVReg(NewReg);
466
467 DEBUG(dbgs() << " - Replaced register: "; Def->dump());
468 }
469
470 ImposeStackOrdering(Def);
471 return Def;
472 }
473
474 /// A trivially cloneable instruction; clone it and nest the new copy with the
475 /// current instruction.
RematerializeCheapDef(unsigned Reg,MachineOperand & Op,MachineInstr & Def,MachineBasicBlock & MBB,MachineBasicBlock::instr_iterator Insert,LiveIntervals & LIS,WebAssemblyFunctionInfo & MFI,MachineRegisterInfo & MRI,const WebAssemblyInstrInfo * TII,const WebAssemblyRegisterInfo * TRI)476 static MachineInstr *RematerializeCheapDef(
477 unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB,
478 MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS,
479 WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI,
480 const WebAssemblyInstrInfo *TII, const WebAssemblyRegisterInfo *TRI) {
481 DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
482 DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
483
484 unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
485 TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
486 Op.setReg(NewReg);
487 MachineInstr *Clone = &*std::prev(Insert);
488 LIS.InsertMachineInstrInMaps(*Clone);
489 LIS.createAndComputeVirtRegInterval(NewReg);
490 MFI.stackifyVReg(NewReg);
491 ImposeStackOrdering(Clone);
492
493 DEBUG(dbgs() << " - Cloned to "; Clone->dump());
494
495 // Shrink the interval.
496 bool IsDead = MRI.use_empty(Reg);
497 if (!IsDead) {
498 LiveInterval &LI = LIS.getInterval(Reg);
499 ShrinkToUses(LI, LIS);
500 IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot());
501 }
502
503 // If that was the last use of the original, delete the original.
504 if (IsDead) {
505 DEBUG(dbgs() << " - Deleting original\n");
506 SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
507 LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
508 LIS.removeInterval(Reg);
509 LIS.RemoveMachineInstrFromMaps(Def);
510 Def.eraseFromParent();
511 }
512
513 return Clone;
514 }
515
516 /// A multiple-use def in the same block with no intervening memory or register
517 /// dependencies; move the def down, nest it with the current instruction, and
518 /// insert a tee_local to satisfy the rest of the uses. As an illustration,
519 /// rewrite this:
520 ///
521 /// Reg = INST ... // Def
522 /// INST ..., Reg, ... // Insert
523 /// INST ..., Reg, ...
524 /// INST ..., Reg, ...
525 ///
526 /// to this:
527 ///
528 /// DefReg = INST ... // Def (to become the new Insert)
529 /// TeeReg, Reg = TEE_LOCAL_... DefReg
530 /// INST ..., TeeReg, ... // Insert
531 /// INST ..., Reg, ...
532 /// INST ..., Reg, ...
533 ///
534 /// with DefReg and TeeReg stackified. This eliminates a get_local from the
535 /// resulting code.
MoveAndTeeForMultiUse(unsigned Reg,MachineOperand & Op,MachineInstr * Def,MachineBasicBlock & MBB,MachineInstr * Insert,LiveIntervals & LIS,WebAssemblyFunctionInfo & MFI,MachineRegisterInfo & MRI,const WebAssemblyInstrInfo * TII)536 static MachineInstr *MoveAndTeeForMultiUse(
537 unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
538 MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
539 MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
540 DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
541
542 // Move Def into place.
543 MBB.splice(Insert, &MBB, Def);
544 LIS.handleMove(*Def);
545
546 // Create the Tee and attach the registers.
547 const auto *RegClass = MRI.getRegClass(Reg);
548 unsigned TeeReg = MRI.createVirtualRegister(RegClass);
549 unsigned DefReg = MRI.createVirtualRegister(RegClass);
550 MachineOperand &DefMO = Def->getOperand(0);
551 MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
552 TII->get(GetTeeLocalOpcode(RegClass)), TeeReg)
553 .addReg(Reg, RegState::Define)
554 .addReg(DefReg, getUndefRegState(DefMO.isDead()));
555 Op.setReg(TeeReg);
556 DefMO.setReg(DefReg);
557 SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
558 SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
559
560 // Tell LiveIntervals we moved the original vreg def from Def to Tee.
561 LiveInterval &LI = LIS.getInterval(Reg);
562 LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
563 VNInfo *ValNo = LI.getVNInfoAt(DefIdx);
564 I->start = TeeIdx;
565 ValNo->def = TeeIdx;
566 ShrinkToUses(LI, LIS);
567
568 // Finish stackifying the new regs.
569 LIS.createAndComputeVirtRegInterval(TeeReg);
570 LIS.createAndComputeVirtRegInterval(DefReg);
571 MFI.stackifyVReg(DefReg);
572 MFI.stackifyVReg(TeeReg);
573 ImposeStackOrdering(Def);
574 ImposeStackOrdering(Tee);
575
576 DEBUG(dbgs() << " - Replaced register: "; Def->dump());
577 DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
578 return Def;
579 }
580
581 namespace {
582 /// A stack for walking the tree of instructions being built, visiting the
583 /// MachineOperands in DFS order.
584 class TreeWalkerState {
585 typedef MachineInstr::mop_iterator mop_iterator;
586 typedef std::reverse_iterator<mop_iterator> mop_reverse_iterator;
587 typedef iterator_range<mop_reverse_iterator> RangeTy;
588 SmallVector<RangeTy, 4> Worklist;
589
590 public:
TreeWalkerState(MachineInstr * Insert)591 explicit TreeWalkerState(MachineInstr *Insert) {
592 const iterator_range<mop_iterator> &Range = Insert->explicit_uses();
593 if (Range.begin() != Range.end())
594 Worklist.push_back(reverse(Range));
595 }
596
Done() const597 bool Done() const { return Worklist.empty(); }
598
Pop()599 MachineOperand &Pop() {
600 RangeTy &Range = Worklist.back();
601 MachineOperand &Op = *Range.begin();
602 Range = drop_begin(Range, 1);
603 if (Range.begin() == Range.end())
604 Worklist.pop_back();
605 assert((Worklist.empty() ||
606 Worklist.back().begin() != Worklist.back().end()) &&
607 "Empty ranges shouldn't remain in the worklist");
608 return Op;
609 }
610
611 /// Push Instr's operands onto the stack to be visited.
PushOperands(MachineInstr * Instr)612 void PushOperands(MachineInstr *Instr) {
613 const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
614 if (Range.begin() != Range.end())
615 Worklist.push_back(reverse(Range));
616 }
617
618 /// Some of Instr's operands are on the top of the stack; remove them and
619 /// re-insert them starting from the beginning (because we've commuted them).
ResetTopOperands(MachineInstr * Instr)620 void ResetTopOperands(MachineInstr *Instr) {
621 assert(HasRemainingOperands(Instr) &&
622 "Reseting operands should only be done when the instruction has "
623 "an operand still on the stack");
624 Worklist.back() = reverse(Instr->explicit_uses());
625 }
626
627 /// Test whether Instr has operands remaining to be visited at the top of
628 /// the stack.
HasRemainingOperands(const MachineInstr * Instr) const629 bool HasRemainingOperands(const MachineInstr *Instr) const {
630 if (Worklist.empty())
631 return false;
632 const RangeTy &Range = Worklist.back();
633 return Range.begin() != Range.end() && Range.begin()->getParent() == Instr;
634 }
635
636 /// Test whether the given register is present on the stack, indicating an
637 /// operand in the tree that we haven't visited yet. Moving a definition of
638 /// Reg to a point in the tree after that would change its value.
639 ///
640 /// This is needed as a consequence of using implicit get_locals for
641 /// uses and implicit set_locals for defs.
IsOnStack(unsigned Reg) const642 bool IsOnStack(unsigned Reg) const {
643 for (const RangeTy &Range : Worklist)
644 for (const MachineOperand &MO : Range)
645 if (MO.isReg() && MO.getReg() == Reg)
646 return true;
647 return false;
648 }
649 };
650
651 /// State to keep track of whether commuting is in flight or whether it's been
652 /// tried for the current instruction and didn't work.
653 class CommutingState {
654 /// There are effectively three states: the initial state where we haven't
655 /// started commuting anything and we don't know anything yet, the tenative
656 /// state where we've commuted the operands of the current instruction and are
657 /// revisting it, and the declined state where we've reverted the operands
658 /// back to their original order and will no longer commute it further.
659 bool TentativelyCommuting;
660 bool Declined;
661
662 /// During the tentative state, these hold the operand indices of the commuted
663 /// operands.
664 unsigned Operand0, Operand1;
665
666 public:
CommutingState()667 CommutingState() : TentativelyCommuting(false), Declined(false) {}
668
669 /// Stackification for an operand was not successful due to ordering
670 /// constraints. If possible, and if we haven't already tried it and declined
671 /// it, commute Insert's operands and prepare to revisit it.
MaybeCommute(MachineInstr * Insert,TreeWalkerState & TreeWalker,const WebAssemblyInstrInfo * TII)672 void MaybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
673 const WebAssemblyInstrInfo *TII) {
674 if (TentativelyCommuting) {
675 assert(!Declined &&
676 "Don't decline commuting until you've finished trying it");
677 // Commuting didn't help. Revert it.
678 TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
679 TentativelyCommuting = false;
680 Declined = true;
681 } else if (!Declined && TreeWalker.HasRemainingOperands(Insert)) {
682 Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
683 Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
684 if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) {
685 // Tentatively commute the operands and try again.
686 TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
687 TreeWalker.ResetTopOperands(Insert);
688 TentativelyCommuting = true;
689 Declined = false;
690 }
691 }
692 }
693
694 /// Stackification for some operand was successful. Reset to the default
695 /// state.
Reset()696 void Reset() {
697 TentativelyCommuting = false;
698 Declined = false;
699 }
700 };
701 } // end anonymous namespace
702
runOnMachineFunction(MachineFunction & MF)703 bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
704 DEBUG(dbgs() << "********** Register Stackifying **********\n"
705 "********** Function: "
706 << MF.getName() << '\n');
707
708 bool Changed = false;
709 MachineRegisterInfo &MRI = MF.getRegInfo();
710 WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
711 const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
712 const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
713 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
714 MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
715 LiveIntervals &LIS = getAnalysis<LiveIntervals>();
716
717 // Walk the instructions from the bottom up. Currently we don't look past
718 // block boundaries, and the blocks aren't ordered so the block visitation
719 // order isn't significant, but we may want to change this in the future.
720 for (MachineBasicBlock &MBB : MF) {
721 // Don't use a range-based for loop, because we modify the list as we're
722 // iterating over it and the end iterator may change.
723 for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
724 MachineInstr *Insert = &*MII;
725 // Don't nest anything inside an inline asm, because we don't have
726 // constraints for $push inputs.
727 if (Insert->getOpcode() == TargetOpcode::INLINEASM)
728 continue;
729
730 // Ignore debugging intrinsics.
731 if (Insert->getOpcode() == TargetOpcode::DBG_VALUE)
732 continue;
733
734 // Iterate through the inputs in reverse order, since we'll be pulling
735 // operands off the stack in LIFO order.
736 CommutingState Commuting;
737 TreeWalkerState TreeWalker(Insert);
738 while (!TreeWalker.Done()) {
739 MachineOperand &Op = TreeWalker.Pop();
740
741 // We're only interested in explicit virtual register operands.
742 if (!Op.isReg())
743 continue;
744
745 unsigned Reg = Op.getReg();
746 assert(Op.isUse() && "explicit_uses() should only iterate over uses");
747 assert(!Op.isImplicit() &&
748 "explicit_uses() should only iterate over explicit operands");
749 if (TargetRegisterInfo::isPhysicalRegister(Reg))
750 continue;
751
752 // Identify the definition for this register at this point. Most
753 // registers are in SSA form here so we try a quick MRI query first.
754 MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
755 if (!Def)
756 continue;
757
758 // Don't nest an INLINE_ASM def into anything, because we don't have
759 // constraints for $pop outputs.
760 if (Def->getOpcode() == TargetOpcode::INLINEASM)
761 continue;
762
763 // Argument instructions represent live-in registers and not real
764 // instructions.
765 if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
766 Def->getOpcode() == WebAssembly::ARGUMENT_I64 ||
767 Def->getOpcode() == WebAssembly::ARGUMENT_F32 ||
768 Def->getOpcode() == WebAssembly::ARGUMENT_F64)
769 continue;
770
771 // Decide which strategy to take. Prefer to move a single-use value
772 // over cloning it, and prefer cloning over introducing a tee_local.
773 // For moving, we require the def to be in the same block as the use;
774 // this makes things simpler (LiveIntervals' handleMove function only
775 // supports intra-block moves) and it's MachineSink's job to catch all
776 // the sinking opportunities anyway.
777 bool SameBlock = Def->getParent() == &MBB;
778 bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, LIS, MRI) &&
779 !TreeWalker.IsOnStack(Reg);
780 if (CanMove && HasOneUse(Reg, Def, MRI, MDT, LIS)) {
781 Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
782 } else if (ShouldRematerialize(*Def, AA, TII)) {
783 Insert =
784 RematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
785 LIS, MFI, MRI, TII, TRI);
786 } else if (CanMove &&
787 OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
788 Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
789 MRI, TII);
790 } else {
791 // We failed to stackify the operand. If the problem was ordering
792 // constraints, Commuting may be able to help.
793 if (!CanMove && SameBlock)
794 Commuting.MaybeCommute(Insert, TreeWalker, TII);
795 // Proceed to the next operand.
796 continue;
797 }
798
799 // We stackified an operand. Add the defining instruction's operands to
800 // the worklist stack now to continue to build an ever deeper tree.
801 Commuting.Reset();
802 TreeWalker.PushOperands(Insert);
803 }
804
805 // If we stackified any operands, skip over the tree to start looking for
806 // the next instruction we can build a tree on.
807 if (Insert != &*MII) {
808 ImposeStackOrdering(&*MII);
809 MII = std::prev(
810 llvm::make_reverse_iterator(MachineBasicBlock::iterator(Insert)));
811 Changed = true;
812 }
813 }
814 }
815
816 // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere so
817 // that it never looks like a use-before-def.
818 if (Changed) {
819 MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK);
820 for (MachineBasicBlock &MBB : MF)
821 MBB.addLiveIn(WebAssembly::EXPR_STACK);
822 }
823
824 #ifndef NDEBUG
825 // Verify that pushes and pops are performed in LIFO order.
826 SmallVector<unsigned, 0> Stack;
827 for (MachineBasicBlock &MBB : MF) {
828 for (MachineInstr &MI : MBB) {
829 if (MI.isDebugValue())
830 continue;
831 for (MachineOperand &MO : reverse(MI.explicit_operands())) {
832 if (!MO.isReg())
833 continue;
834 unsigned Reg = MO.getReg();
835
836 if (MFI.isVRegStackified(Reg)) {
837 if (MO.isDef())
838 Stack.push_back(Reg);
839 else
840 assert(Stack.pop_back_val() == Reg &&
841 "Register stack pop should be paired with a push");
842 }
843 }
844 }
845 // TODO: Generalize this code to support keeping values on the stack across
846 // basic block boundaries.
847 assert(Stack.empty() &&
848 "Register stack pushes and pops should be balanced");
849 }
850 #endif
851
852 return Changed;
853 }
854