• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  //===-- HexagonFrameLowering.cpp - Define frame lowering ------------------===//
2  //
3  //                     The LLVM Compiler Infrastructure
4  //
5  // This file is distributed under the University of Illinois Open Source
6  // License. See LICENSE.TXT for details.
7  //
8  //
9  //===----------------------------------------------------------------------===//
10  
11  #define DEBUG_TYPE "hexagon-pei"
12  
13  #include "HexagonBlockRanges.h"
14  #include "HexagonFrameLowering.h"
15  #include "HexagonInstrInfo.h"
16  #include "HexagonMachineFunctionInfo.h"
17  #include "HexagonRegisterInfo.h"
18  #include "HexagonSubtarget.h"
19  #include "HexagonTargetMachine.h"
20  #include "llvm/ADT/BitVector.h"
21  #include "llvm/ADT/PostOrderIterator.h"
22  #include "llvm/CodeGen/MachineDominators.h"
23  #include "llvm/CodeGen/MachineFunction.h"
24  #include "llvm/CodeGen/MachineFunctionPass.h"
25  #include "llvm/CodeGen/MachineInstrBuilder.h"
26  #include "llvm/CodeGen/MachineInstrBuilder.h"
27  #include "llvm/CodeGen/MachineModuleInfo.h"
28  #include "llvm/CodeGen/MachinePostDominators.h"
29  #include "llvm/CodeGen/MachineRegisterInfo.h"
30  #include "llvm/CodeGen/RegisterScavenging.h"
31  #include "llvm/IR/Function.h"
32  #include "llvm/IR/Type.h"
33  #include "llvm/Support/CommandLine.h"
34  #include "llvm/Support/Debug.h"
35  #include "llvm/Support/raw_ostream.h"
36  #include "llvm/Target/TargetInstrInfo.h"
37  #include "llvm/Target/TargetMachine.h"
38  #include "llvm/Target/TargetOptions.h"
39  
40  // Hexagon stack frame layout as defined by the ABI:
41  //
42  //                                                       Incoming arguments
43  //                                                       passed via stack
44  //                                                                      |
45  //                                                                      |
46  //        SP during function's                 FP during function's     |
47  //    +-- runtime (top of stack)               runtime (bottom) --+     |
48  //    |                                                           |     |
49  // --++---------------------+------------------+-----------------++-+-------
50  //   |  parameter area for  |  variable-size   |   fixed-size    |LR|  arg
51  //   |   called functions   |  local objects   |  local objects  |FP|
52  // --+----------------------+------------------+-----------------+--+-------
53  //    <-    size known    -> <- size unknown -> <- size known  ->
54  //
55  // Low address                                                 High address
56  //
57  // <--- stack growth
58  //
59  //
60  // - In any circumstances, the outgoing function arguments are always accessi-
61  //   ble using the SP, and the incoming arguments are accessible using the FP.
62  // - If the local objects are not aligned, they can always be accessed using
63  //   the FP.
64  // - If there are no variable-sized objects, the local objects can always be
65  //   accessed using the SP, regardless whether they are aligned or not. (The
66  //   alignment padding will be at the bottom of the stack (highest address),
67  //   and so the offset with respect to the SP will be known at the compile-
68  //   -time.)
69  //
70  // The only complication occurs if there are both, local aligned objects, and
71  // dynamically allocated (variable-sized) objects. The alignment pad will be
72  // placed between the FP and the local objects, thus preventing the use of the
73  // FP to access the local objects. At the same time, the variable-sized objects
74  // will be between the SP and the local objects, thus introducing an unknown
75  // distance from the SP to the locals.
76  //
77  // To avoid this problem, a new register is created that holds the aligned
78  // address of the bottom of the stack, referred in the sources as AP (aligned
79  // pointer). The AP will be equal to "FP-p", where "p" is the smallest pad
80  // that aligns AP to the required boundary (a maximum of the alignments of
81  // all stack objects, fixed- and variable-sized). All local objects[1] will
82  // then use AP as the base pointer.
83  // [1] The exception is with "fixed" stack objects. "Fixed" stack objects get
84  // their name from being allocated at fixed locations on the stack, relative
85  // to the FP. In the presence of dynamic allocation and local alignment, such
86  // objects can only be accessed through the FP.
87  //
88  // Illustration of the AP:
89  //                                                                FP --+
90  //                                                                     |
91  // ---------------+---------------------+-----+-----------------------++-+--
92  //   Rest of the  | Local stack objects | Pad |  Fixed stack objects  |LR|
93  //   stack frame  | (aligned)           |     |  (CSR, spills, etc.)  |FP|
94  // ---------------+---------------------+-----+-----------------+-----+--+--
95  //                                      |<-- Multiple of the -->|
96  //                                           stack alignment    +-- AP
97  //
98  // The AP is set up at the beginning of the function. Since it is not a dedi-
99  // cated (reserved) register, it needs to be kept live throughout the function
100  // to be available as the base register for local object accesses.
101  // Normally, an address of a stack objects is obtained by a pseudo-instruction
102  // TFR_FI. To access local objects with the AP register present, a different
103  // pseudo-instruction needs to be used: TFR_FIA. The TFR_FIA takes one extra
104  // argument compared to TFR_FI: the first input register is the AP register.
105  // This keeps the register live between its definition and its uses.
106  
107  // The AP register is originally set up using pseudo-instruction ALIGNA:
108  //   AP = ALIGNA A
109  // where
110  //   A  - required stack alignment
111  // The alignment value must be the maximum of all alignments required by
112  // any stack object.
113  
114  // The dynamic allocation uses a pseudo-instruction ALLOCA:
115  //   Rd = ALLOCA Rs, A
116  // where
117  //   Rd - address of the allocated space
118  //   Rs - minimum size (the actual allocated can be larger to accommodate
119  //        alignment)
120  //   A  - required alignment
121  
122  
123  using namespace llvm;
124  
125  static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
126      cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));
127  
128  static cl::opt<unsigned> NumberScavengerSlots("number-scavenger-slots",
129      cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2),
130      cl::ZeroOrMore);
131  
132  static cl::opt<int> SpillFuncThreshold("spill-func-threshold",
133      cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"),
134      cl::init(6), cl::ZeroOrMore);
135  
136  static cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os",
137      cl::Hidden, cl::desc("Specify Os spill func threshold"),
138      cl::init(1), cl::ZeroOrMore);
139  
140  static cl::opt<bool> EnableStackOVFSanitizer("enable-stackovf-sanitizer",
141      cl::Hidden, cl::desc("Enable runtime checks for stack overflow."),
142      cl::init(false), cl::ZeroOrMore);
143  
144  static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame",
145      cl::init(true), cl::Hidden, cl::ZeroOrMore,
146      cl::desc("Enable stack frame shrink wrapping"));
147  
148  static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX),
149      cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame "
150      "shrink-wraps"));
151  
152  static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
153      cl::Hidden, cl::desc("Use allocframe more conservatively"));
154  
155  static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
156      cl::init(true), cl::desc("Optimize spill slots"));
157  
158  
159  namespace llvm {
160    void initializeHexagonCallFrameInformationPass(PassRegistry&);
161    FunctionPass *createHexagonCallFrameInformation();
162  }
163  
164  namespace {
165    class HexagonCallFrameInformation : public MachineFunctionPass {
166    public:
167      static char ID;
HexagonCallFrameInformation()168      HexagonCallFrameInformation() : MachineFunctionPass(ID) {
169        PassRegistry &PR = *PassRegistry::getPassRegistry();
170        initializeHexagonCallFrameInformationPass(PR);
171      }
172      bool runOnMachineFunction(MachineFunction &MF) override;
getRequiredProperties() const173      MachineFunctionProperties getRequiredProperties() const override {
174        return MachineFunctionProperties().set(
175            MachineFunctionProperties::Property::AllVRegsAllocated);
176      }
177    };
178  
179    char HexagonCallFrameInformation::ID = 0;
180  }
181  
runOnMachineFunction(MachineFunction & MF)182  bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
183    auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
184    bool NeedCFI = MF.getMMI().hasDebugInfo() ||
185                   MF.getFunction()->needsUnwindTableEntry();
186  
187    if (!NeedCFI)
188      return false;
189    HFI.insertCFIInstructions(MF);
190    return true;
191  }
192  
193  INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",
194                  "Hexagon call frame information", false, false)
195  
createHexagonCallFrameInformation()196  FunctionPass *llvm::createHexagonCallFrameInformation() {
197    return new HexagonCallFrameInformation();
198  }
199  
200  
201  namespace {
202    /// Map a register pair Reg to the subregister that has the greater "number",
203    /// i.e. D3 (aka R7:6) will be mapped to R7, etc.
getMax32BitSubRegister(unsigned Reg,const TargetRegisterInfo & TRI,bool hireg=true)204    unsigned getMax32BitSubRegister(unsigned Reg, const TargetRegisterInfo &TRI,
205                                    bool hireg = true) {
206      if (Reg < Hexagon::D0 || Reg > Hexagon::D15)
207        return Reg;
208  
209      unsigned RegNo = 0;
210      for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) {
211        if (hireg) {
212          if (*SubRegs > RegNo)
213            RegNo = *SubRegs;
214        } else {
215          if (!RegNo || *SubRegs < RegNo)
216            RegNo = *SubRegs;
217        }
218      }
219      return RegNo;
220    }
221  
222    /// Returns the callee saved register with the largest id in the vector.
getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> & CSI,const TargetRegisterInfo & TRI)223    unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI,
224                                  const TargetRegisterInfo &TRI) {
225      static_assert(Hexagon::R1 > 0,
226                    "Assume physical registers are encoded as positive integers");
227      if (CSI.empty())
228        return 0;
229  
230      unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
231      for (unsigned I = 1, E = CSI.size(); I < E; ++I) {
232        unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);
233        if (Reg > Max)
234          Max = Reg;
235      }
236      return Max;
237    }
238  
239    /// Checks if the basic block contains any instruction that needs a stack
240    /// frame to be already in place.
needsStackFrame(const MachineBasicBlock & MBB,const BitVector & CSR,const HexagonRegisterInfo & HRI)241    bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
242          const HexagonRegisterInfo &HRI) {
243      for (auto &I : MBB) {
244        const MachineInstr *MI = &I;
245        if (MI->isCall())
246          return true;
247        unsigned Opc = MI->getOpcode();
248        switch (Opc) {
249          case Hexagon::ALLOCA:
250          case Hexagon::ALIGNA:
251            return true;
252          default:
253            break;
254        }
255        // Check individual operands.
256        for (const MachineOperand &MO : MI->operands()) {
257          // While the presence of a frame index does not prove that a stack
258          // frame will be required, all frame indexes should be within alloc-
259          // frame/deallocframe. Otherwise, the code that translates a frame
260          // index into an offset would have to be aware of the placement of
261          // the frame creation/destruction instructions.
262          if (MO.isFI())
263            return true;
264          if (!MO.isReg())
265            continue;
266          unsigned R = MO.getReg();
267          // Virtual registers will need scavenging, which then may require
268          // a stack slot.
269          if (TargetRegisterInfo::isVirtualRegister(R))
270            return true;
271          for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S)
272            if (CSR[*S])
273              return true;
274        }
275      }
276      return false;
277    }
278  
279    /// Returns true if MBB has a machine instructions that indicates a tail call
280    /// in the block.
hasTailCall(const MachineBasicBlock & MBB)281    bool hasTailCall(const MachineBasicBlock &MBB) {
282      MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
283      unsigned RetOpc = I->getOpcode();
284      return RetOpc == Hexagon::TCRETURNi || RetOpc == Hexagon::TCRETURNr;
285    }
286  
287    /// Returns true if MBB contains an instruction that returns.
hasReturn(const MachineBasicBlock & MBB)288    bool hasReturn(const MachineBasicBlock &MBB) {
289      for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I)
290        if (I->isReturn())
291          return true;
292      return false;
293    }
294  
295    /// Returns the "return" instruction from this block, or nullptr if there
296    /// isn't any.
getReturn(MachineBasicBlock & MBB)297    MachineInstr *getReturn(MachineBasicBlock &MBB) {
298      for (auto &I : MBB)
299        if (I.isReturn())
300          return &I;
301      return nullptr;
302    }
303  
isRestoreCall(unsigned Opc)304    bool isRestoreCall(unsigned Opc) {
305      switch (Opc) {
306        case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
307        case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC:
308        case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4:
309        case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC:
310          return true;
311      }
312      return false;
313    }
314  
isOptNone(const MachineFunction & MF)315    inline bool isOptNone(const MachineFunction &MF) {
316      return MF.getFunction()->hasFnAttribute(Attribute::OptimizeNone) ||
317             MF.getTarget().getOptLevel() == CodeGenOpt::None;
318    }
319  
isOptSize(const MachineFunction & MF)320    inline bool isOptSize(const MachineFunction &MF) {
321      const Function &F = *MF.getFunction();
322      return F.optForSize() && !F.optForMinSize();
323    }
324  
isMinSize(const MachineFunction & MF)325    inline bool isMinSize(const MachineFunction &MF) {
326      return MF.getFunction()->optForMinSize();
327    }
328  }
329  
330  
331  /// Implements shrink-wrapping of the stack frame. By default, stack frame
332  /// is created in the function entry block, and is cleaned up in every block
333  /// that returns. This function finds alternate blocks: one for the frame
334  /// setup (prolog) and one for the cleanup (epilog).
findShrunkPrologEpilog(MachineFunction & MF,MachineBasicBlock * & PrologB,MachineBasicBlock * & EpilogB) const335  void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
336        MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
337    static unsigned ShrinkCounter = 0;
338  
339    if (ShrinkLimit.getPosition()) {
340      if (ShrinkCounter >= ShrinkLimit)
341        return;
342      ShrinkCounter++;
343    }
344  
345    auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
346    auto &HRI = *HST.getRegisterInfo();
347  
348    MachineDominatorTree MDT;
349    MDT.runOnMachineFunction(MF);
350    MachinePostDominatorTree MPT;
351    MPT.runOnMachineFunction(MF);
352  
353    typedef DenseMap<unsigned,unsigned> UnsignedMap;
354    UnsignedMap RPO;
355    typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType;
356    RPOTType RPOT(&MF);
357    unsigned RPON = 0;
358    for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
359      RPO[(*I)->getNumber()] = RPON++;
360  
361    // Don't process functions that have loops, at least for now. Placement
362    // of prolog and epilog must take loop structure into account. For simpli-
363    // city don't do it right now.
364    for (auto &I : MF) {
365      unsigned BN = RPO[I.getNumber()];
366      for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) {
367        // If found a back-edge, return.
368        if (RPO[(*SI)->getNumber()] <= BN)
369          return;
370      }
371    }
372  
373    // Collect the set of blocks that need a stack frame to execute. Scan
374    // each block for uses/defs of callee-saved registers, calls, etc.
375    SmallVector<MachineBasicBlock*,16> SFBlocks;
376    BitVector CSR(Hexagon::NUM_TARGET_REGS);
377    for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P)
378      for (MCSubRegIterator S(*P, &HRI, true); S.isValid(); ++S)
379        CSR[*S] = true;
380  
381    for (auto &I : MF)
382      if (needsStackFrame(I, CSR, HRI))
383        SFBlocks.push_back(&I);
384  
385    DEBUG({
386      dbgs() << "Blocks needing SF: {";
387      for (auto &B : SFBlocks)
388        dbgs() << " BB#" << B->getNumber();
389      dbgs() << " }\n";
390    });
391    // No frame needed?
392    if (SFBlocks.empty())
393      return;
394  
395    // Pick a common dominator and a common post-dominator.
396    MachineBasicBlock *DomB = SFBlocks[0];
397    for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
398      DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]);
399      if (!DomB)
400        break;
401    }
402    MachineBasicBlock *PDomB = SFBlocks[0];
403    for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
404      PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]);
405      if (!PDomB)
406        break;
407    }
408    DEBUG({
409      dbgs() << "Computed dom block: BB#";
410      if (DomB) dbgs() << DomB->getNumber();
411      else      dbgs() << "<null>";
412      dbgs() << ", computed pdom block: BB#";
413      if (PDomB) dbgs() << PDomB->getNumber();
414      else       dbgs() << "<null>";
415      dbgs() << "\n";
416    });
417    if (!DomB || !PDomB)
418      return;
419  
420    // Make sure that DomB dominates PDomB and PDomB post-dominates DomB.
421    if (!MDT.dominates(DomB, PDomB)) {
422      DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
423      return;
424    }
425    if (!MPT.dominates(PDomB, DomB)) {
426      DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
427      return;
428    }
429  
430    // Finally, everything seems right.
431    PrologB = DomB;
432    EpilogB = PDomB;
433  }
434  
435  
436  /// Perform most of the PEI work here:
437  /// - saving/restoring of the callee-saved registers,
438  /// - stack frame creation and destruction.
439  /// Normally, this work is distributed among various functions, but doing it
440  /// in one place allows shrink-wrapping of the stack frame.
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const441  void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
442                                          MachineBasicBlock &MBB) const {
443    auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
444    auto &HRI = *HST.getRegisterInfo();
445  
446    MachineFrameInfo *MFI = MF.getFrameInfo();
447    const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
448  
449    MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;
450    if (EnableShrinkWrapping)
451      findShrunkPrologEpilog(MF, PrologB, EpilogB);
452  
453    bool PrologueStubs = false;
454    insertCSRSpillsInBlock(*PrologB, CSI, HRI, PrologueStubs);
455    insertPrologueInBlock(*PrologB, PrologueStubs);
456  
457    if (EpilogB) {
458      insertCSRRestoresInBlock(*EpilogB, CSI, HRI);
459      insertEpilogueInBlock(*EpilogB);
460    } else {
461      for (auto &B : MF)
462        if (B.isReturnBlock())
463          insertCSRRestoresInBlock(B, CSI, HRI);
464  
465      for (auto &B : MF)
466        if (B.isReturnBlock())
467          insertEpilogueInBlock(B);
468  
469      for (auto &B : MF) {
470        if (B.empty())
471          continue;
472        MachineInstr *RetI = getReturn(B);
473        if (!RetI || isRestoreCall(RetI->getOpcode()))
474          continue;
475        for (auto &R : CSI)
476          RetI->addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
477      }
478    }
479  
480    if (EpilogB) {
481      // If there is an epilog block, it may not have a return instruction.
482      // In such case, we need to add the callee-saved registers as live-ins
483      // in all blocks on all paths from the epilog to any return block.
484      unsigned MaxBN = 0;
485      for (auto &B : MF)
486        if (B.getNumber() >= 0)
487          MaxBN = std::max(MaxBN, unsigned(B.getNumber()));
488      BitVector DoneT(MaxBN+1), DoneF(MaxBN+1), Path(MaxBN+1);
489      updateExitPaths(*EpilogB, EpilogB, DoneT, DoneF, Path);
490    }
491  }
492  
493  
insertPrologueInBlock(MachineBasicBlock & MBB,bool PrologueStubs) const494  void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
495        bool PrologueStubs) const {
496    MachineFunction &MF = *MBB.getParent();
497    MachineFrameInfo *MFI = MF.getFrameInfo();
498    auto &HST = MF.getSubtarget<HexagonSubtarget>();
499    auto &HII = *HST.getInstrInfo();
500    auto &HRI = *HST.getRegisterInfo();
501    DebugLoc dl;
502  
503    unsigned MaxAlign = std::max(MFI->getMaxAlignment(), getStackAlignment());
504  
505    // Calculate the total stack frame size.
506    // Get the number of bytes to allocate from the FrameInfo.
507    unsigned FrameSize = MFI->getStackSize();
508    // Round up the max call frame size to the max alignment on the stack.
509    unsigned MaxCFA = alignTo(MFI->getMaxCallFrameSize(), MaxAlign);
510    MFI->setMaxCallFrameSize(MaxCFA);
511  
512    FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign);
513    MFI->setStackSize(FrameSize);
514  
515    bool AlignStack = (MaxAlign > getStackAlignment());
516  
517    // Get the number of bytes to allocate from the FrameInfo.
518    unsigned NumBytes = MFI->getStackSize();
519    unsigned SP = HRI.getStackRegister();
520    unsigned MaxCF = MFI->getMaxCallFrameSize();
521    MachineBasicBlock::iterator InsertPt = MBB.begin();
522  
523    auto *FuncInfo = MF.getInfo<HexagonMachineFunctionInfo>();
524    auto &AdjustRegs = FuncInfo->getAllocaAdjustInsts();
525  
526    for (auto MI : AdjustRegs) {
527      assert((MI->getOpcode() == Hexagon::ALLOCA) && "Expected alloca");
528      expandAlloca(MI, HII, SP, MaxCF);
529      MI->eraseFromParent();
530    }
531  
532    if (!hasFP(MF))
533      return;
534  
535    // Check for overflow.
536    // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
537    const unsigned int ALLOCFRAME_MAX = 16384;
538  
539    // Create a dummy memory operand to avoid allocframe from being treated as
540    // a volatile memory reference.
541    MachineMemOperand *MMO =
542      MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
543                              4, 4);
544  
545    if (NumBytes >= ALLOCFRAME_MAX) {
546      // Emit allocframe(#0).
547      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
548        .addImm(0)
549        .addMemOperand(MMO);
550  
551      // Subtract offset from frame pointer.
552      // We use a caller-saved non-parameter register for that.
553      unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg();
554      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32_Int_Real),
555              CallerSavedReg).addImm(NumBytes);
556      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP)
557        .addReg(SP)
558        .addReg(CallerSavedReg);
559    } else {
560      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
561        .addImm(NumBytes)
562        .addMemOperand(MMO);
563    }
564  
565    if (AlignStack) {
566      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
567          .addReg(SP)
568          .addImm(-int64_t(MaxAlign));
569    }
570  
571    // If the stack-checking is enabled, and we spilled the callee-saved
572    // registers inline (i.e. did not use a spill function), then call
573    // the stack checker directly.
574    if (EnableStackOVFSanitizer && !PrologueStubs)
575      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CALLstk))
576             .addExternalSymbol("__runtime_stack_check");
577  }
578  
insertEpilogueInBlock(MachineBasicBlock & MBB) const579  void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
580    MachineFunction &MF = *MBB.getParent();
581    if (!hasFP(MF))
582      return;
583  
584    auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
585    auto &HII = *HST.getInstrInfo();
586    auto &HRI = *HST.getRegisterInfo();
587    unsigned SP = HRI.getStackRegister();
588  
589    MachineInstr *RetI = getReturn(MBB);
590    unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
591  
592    MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
593    DebugLoc DL;
594    if (InsertPt != MBB.end())
595      DL = InsertPt->getDebugLoc();
596    else if (!MBB.empty())
597      DL = std::prev(MBB.end())->getDebugLoc();
598  
599    // Handle EH_RETURN.
600    if (RetOpc == Hexagon::EH_RETURN_JMPR) {
601      BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
602      BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP)
603          .addReg(SP)
604          .addReg(Hexagon::R28);
605      return;
606    }
607  
608    // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-
609    // frame instruction if we encounter it.
610    if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4 ||
611        RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC) {
612      MachineBasicBlock::iterator It = RetI;
613      ++It;
614      // Delete all instructions after the RESTORE (except labels).
615      while (It != MBB.end()) {
616        if (!It->isLabel())
617          It = MBB.erase(It);
618        else
619          ++It;
620      }
621      return;
622    }
623  
624    // It is possible that the restoring code is a call to a library function.
625    // All of the restore* functions include "deallocframe", so we need to make
626    // sure that we don't add an extra one.
627    bool NeedsDeallocframe = true;
628    if (!MBB.empty() && InsertPt != MBB.begin()) {
629      MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);
630      unsigned COpc = PrevIt->getOpcode();
631      if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 ||
632          COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC)
633        NeedsDeallocframe = false;
634    }
635  
636    if (!NeedsDeallocframe)
637      return;
638    // If the returning instruction is JMPret, replace it with dealloc_return,
639    // otherwise just add deallocframe. The function could be returning via a
640    // tail call.
641    if (RetOpc != Hexagon::JMPret || DisableDeallocRet) {
642      BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
643      return;
644    }
645    unsigned NewOpc = Hexagon::L4_return;
646    MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc));
647    // Transfer the function live-out registers.
648    NewI->copyImplicitOps(MF, *RetI);
649    MBB.erase(RetI);
650  }
651  
652  
updateExitPaths(MachineBasicBlock & MBB,MachineBasicBlock * RestoreB,BitVector & DoneT,BitVector & DoneF,BitVector & Path) const653  bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB,
654        MachineBasicBlock *RestoreB, BitVector &DoneT, BitVector &DoneF,
655        BitVector &Path) const {
656    assert(MBB.getNumber() >= 0);
657    unsigned BN = MBB.getNumber();
658    if (Path[BN] || DoneF[BN])
659      return false;
660    if (DoneT[BN])
661      return true;
662  
663    auto &CSI = MBB.getParent()->getFrameInfo()->getCalleeSavedInfo();
664  
665    Path[BN] = true;
666    bool ReachedExit = false;
667    for (auto &SB : MBB.successors())
668      ReachedExit |= updateExitPaths(*SB, RestoreB, DoneT, DoneF, Path);
669  
670    if (!MBB.empty() && MBB.back().isReturn()) {
671      // Add implicit uses of all callee-saved registers to the reached
672      // return instructions. This is to prevent the anti-dependency breaker
673      // from renaming these registers.
674      MachineInstr &RetI = MBB.back();
675      if (!isRestoreCall(RetI.getOpcode()))
676        for (auto &R : CSI)
677          RetI.addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
678      ReachedExit = true;
679    }
680  
681    // We don't want to add unnecessary live-ins to the restore block: since
682    // the callee-saved registers are being defined in it, the entry of the
683    // restore block cannot be on the path from the definitions to any exit.
684    if (ReachedExit && &MBB != RestoreB) {
685      for (auto &R : CSI)
686        if (!MBB.isLiveIn(R.getReg()))
687          MBB.addLiveIn(R.getReg());
688      DoneT[BN] = true;
689    }
690    if (!ReachedExit)
691      DoneF[BN] = true;
692  
693    Path[BN] = false;
694    return ReachedExit;
695  }
696  
697  
698  namespace {
IsAllocFrame(MachineBasicBlock::const_iterator It)699    bool IsAllocFrame(MachineBasicBlock::const_iterator It) {
700      if (!It->isBundle())
701        return It->getOpcode() == Hexagon::S2_allocframe;
702      auto End = It->getParent()->instr_end();
703      MachineBasicBlock::const_instr_iterator I = It.getInstrIterator();
704      while (++I != End && I->isBundled())
705        if (I->getOpcode() == Hexagon::S2_allocframe)
706          return true;
707      return false;
708    }
709  
FindAllocFrame(MachineBasicBlock & B)710    MachineBasicBlock::iterator FindAllocFrame(MachineBasicBlock &B) {
711      for (auto &I : B)
712        if (IsAllocFrame(I))
713          return I;
714      return B.end();
715    }
716  }
717  
718  
insertCFIInstructions(MachineFunction & MF) const719  void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
720    for (auto &B : MF) {
721      auto AF = FindAllocFrame(B);
722      if (AF == B.end())
723        continue;
724      insertCFIInstructionsAt(B, ++AF);
725    }
726  }
727  
728  
insertCFIInstructionsAt(MachineBasicBlock & MBB,MachineBasicBlock::iterator At) const729  void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
730        MachineBasicBlock::iterator At) const {
731    MachineFunction &MF = *MBB.getParent();
732    MachineFrameInfo &MFI = *MF.getFrameInfo();
733    MachineModuleInfo &MMI = MF.getMMI();
734    auto &HST = MF.getSubtarget<HexagonSubtarget>();
735    auto &HII = *HST.getInstrInfo();
736    auto &HRI = *HST.getRegisterInfo();
737  
738    // If CFI instructions have debug information attached, something goes
739    // wrong with the final assembly generation: the prolog_end is placed
740    // in a wrong location.
741    DebugLoc DL;
742    const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);
743  
744    MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
745    bool HasFP = hasFP(MF);
746  
747    if (HasFP) {
748      unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
749      unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
750  
751      // Define CFA via an offset from the value of FP.
752      //
753      //  -8   -4    0 (SP)
754      // --+----+----+---------------------
755      //   | FP | LR |          increasing addresses -->
756      // --+----+----+---------------------
757      //   |         +-- Old SP (before allocframe)
758      //   +-- New FP (after allocframe)
759      //
760      // MCCFIInstruction::createDefCfa subtracts the offset from the register.
761      // MCCFIInstruction::createOffset takes the offset without sign change.
762      auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8);
763      BuildMI(MBB, At, DL, CFID)
764          .addCFIIndex(MMI.addFrameInst(DefCfa));
765      // R31 (return addr) = CFA - 4
766      auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
767      BuildMI(MBB, At, DL, CFID)
768          .addCFIIndex(MMI.addFrameInst(OffR31));
769      // R30 (frame ptr) = CFA - 8
770      auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
771      BuildMI(MBB, At, DL, CFID)
772          .addCFIIndex(MMI.addFrameInst(OffR30));
773    }
774  
775    static unsigned int RegsToMove[] = {
776      Hexagon::R1,  Hexagon::R0,  Hexagon::R3,  Hexagon::R2,
777      Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
778      Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
779      Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
780      Hexagon::D0,  Hexagon::D1,  Hexagon::D8,  Hexagon::D9,
781      Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13,
782      Hexagon::NoRegister
783    };
784  
785    const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
786  
787    for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) {
788      unsigned Reg = RegsToMove[i];
789      auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
790        return C.getReg() == Reg;
791      };
792      auto F = std::find_if(CSI.begin(), CSI.end(), IfR);
793      if (F == CSI.end())
794        continue;
795  
796      int64_t Offset;
797      if (HasFP) {
798        // If the function has a frame pointer (i.e. has an allocframe),
799        // then the CFA has been defined in terms of FP. Any offsets in
800        // the following CFI instructions have to be defined relative
801        // to FP, which points to the bottom of the stack frame.
802        // The function getFrameIndexReference can still choose to use SP
803        // for the offset calculation, so we cannot simply call it here.
804        // Instead, get the offset (relative to the FP) directly.
805        Offset = MFI.getObjectOffset(F->getFrameIdx());
806      } else {
807        unsigned FrameReg;
808        Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg);
809      }
810      // Subtract 8 to make room for R30 and R31, which are added above.
811      Offset -= 8;
812  
813      if (Reg < Hexagon::D0 || Reg > Hexagon::D15) {
814        unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);
815        auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
816                                                     Offset);
817        BuildMI(MBB, At, DL, CFID)
818            .addCFIIndex(MMI.addFrameInst(OffReg));
819      } else {
820        // Split the double regs into subregs, and generate appropriate
821        // cfi_offsets.
822        // The only reason, we are split double regs is, llvm-mc does not
823        // understand paired registers for cfi_offset.
824        // Eg .cfi_offset r1:0, -64
825  
826        unsigned HiReg = HRI.getSubReg(Reg, Hexagon::subreg_hireg);
827        unsigned LoReg = HRI.getSubReg(Reg, Hexagon::subreg_loreg);
828        unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
829        unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
830        auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
831                                                    Offset+4);
832        BuildMI(MBB, At, DL, CFID)
833            .addCFIIndex(MMI.addFrameInst(OffHi));
834        auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
835                                                    Offset);
836        BuildMI(MBB, At, DL, CFID)
837            .addCFIIndex(MMI.addFrameInst(OffLo));
838      }
839    }
840  }
841  
842  
hasFP(const MachineFunction & MF) const843  bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
844    auto &MFI = *MF.getFrameInfo();
845    auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
846  
847    bool HasFixed = MFI.getNumFixedObjects();
848    bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI)
849                          .getLocalFrameObjectCount();
850    bool HasExtraAlign = HRI.needsStackRealignment(MF);
851    bool HasAlloca = MFI.hasVarSizedObjects();
852  
853    // Insert ALLOCFRAME if we need to or at -O0 for the debugger.  Think
854    // that this shouldn't be required, but doing so now because gcc does and
855    // gdb can't break at the start of the function without it.  Will remove if
856    // this turns out to be a gdb bug.
857    //
858    if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
859      return true;
860  
861    // By default we want to use SP (since it's always there). FP requires
862    // some setup (i.e. ALLOCFRAME).
863    // Fixed and preallocated objects need FP if the distance from them to
864    // the SP is unknown (as is with alloca or aligna).
865    if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign))
866      return true;
867  
868    if (MFI.getStackSize() > 0) {
869      if (EnableStackOVFSanitizer || UseAllocframe)
870        return true;
871    }
872  
873    if (MFI.hasCalls() ||
874        MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR())
875      return true;
876  
877    return false;
878  }
879  
880  
881  enum SpillKind {
882    SK_ToMem,
883    SK_FromMem,
884    SK_FromMemTailcall
885  };
886  
getSpillFunctionFor(unsigned MaxReg,SpillKind SpillType,bool Stkchk=false)887  static const char *getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType,
888        bool Stkchk = false) {
889    const char * V4SpillToMemoryFunctions[] = {
890      "__save_r16_through_r17",
891      "__save_r16_through_r19",
892      "__save_r16_through_r21",
893      "__save_r16_through_r23",
894      "__save_r16_through_r25",
895      "__save_r16_through_r27" };
896  
897    const char * V4SpillToMemoryStkchkFunctions[] = {
898      "__save_r16_through_r17_stkchk",
899      "__save_r16_through_r19_stkchk",
900      "__save_r16_through_r21_stkchk",
901      "__save_r16_through_r23_stkchk",
902      "__save_r16_through_r25_stkchk",
903      "__save_r16_through_r27_stkchk" };
904  
905    const char * V4SpillFromMemoryFunctions[] = {
906      "__restore_r16_through_r17_and_deallocframe",
907      "__restore_r16_through_r19_and_deallocframe",
908      "__restore_r16_through_r21_and_deallocframe",
909      "__restore_r16_through_r23_and_deallocframe",
910      "__restore_r16_through_r25_and_deallocframe",
911      "__restore_r16_through_r27_and_deallocframe" };
912  
913    const char * V4SpillFromMemoryTailcallFunctions[] = {
914      "__restore_r16_through_r17_and_deallocframe_before_tailcall",
915      "__restore_r16_through_r19_and_deallocframe_before_tailcall",
916      "__restore_r16_through_r21_and_deallocframe_before_tailcall",
917      "__restore_r16_through_r23_and_deallocframe_before_tailcall",
918      "__restore_r16_through_r25_and_deallocframe_before_tailcall",
919      "__restore_r16_through_r27_and_deallocframe_before_tailcall"
920    };
921  
922    const char **SpillFunc = nullptr;
923  
924    switch(SpillType) {
925    case SK_ToMem:
926      SpillFunc = Stkchk ? V4SpillToMemoryStkchkFunctions
927                         : V4SpillToMemoryFunctions;
928      break;
929    case SK_FromMem:
930      SpillFunc = V4SpillFromMemoryFunctions;
931      break;
932    case SK_FromMemTailcall:
933      SpillFunc = V4SpillFromMemoryTailcallFunctions;
934      break;
935    }
936    assert(SpillFunc && "Unknown spill kind");
937  
938    // Spill all callee-saved registers up to the highest register used.
939    switch (MaxReg) {
940    case Hexagon::R17:
941      return SpillFunc[0];
942    case Hexagon::R19:
943      return SpillFunc[1];
944    case Hexagon::R21:
945      return SpillFunc[2];
946    case Hexagon::R23:
947      return SpillFunc[3];
948    case Hexagon::R25:
949      return SpillFunc[4];
950    case Hexagon::R27:
951      return SpillFunc[5];
952    default:
953      llvm_unreachable("Unhandled maximum callee save register");
954    }
955    return 0;
956  }
957  
958  
getFrameIndexReference(const MachineFunction & MF,int FI,unsigned & FrameReg) const959  int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
960        int FI, unsigned &FrameReg) const {
961    auto &MFI = *MF.getFrameInfo();
962    auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
963  
964    int Offset = MFI.getObjectOffset(FI);
965    bool HasAlloca = MFI.hasVarSizedObjects();
966    bool HasExtraAlign = HRI.needsStackRealignment(MF);
967    bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
968  
969    unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister();
970    auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
971    unsigned AP = HMFI.getStackAlignBasePhysReg();
972    unsigned FrameSize = MFI.getStackSize();
973  
974    bool UseFP = false, UseAP = false;  // Default: use SP (except at -O0).
975    // Use FP at -O0, except when there are objects with extra alignment.
976    // That additional alignment requirement may cause a pad to be inserted,
977    // which will make it impossible to use FP to access objects located
978    // past the pad.
979    if (NoOpt && !HasExtraAlign)
980      UseFP = true;
981    if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
982      // Fixed and preallocated objects will be located before any padding
983      // so FP must be used to access them.
984      UseFP |= (HasAlloca || HasExtraAlign);
985    } else {
986      if (HasAlloca) {
987        if (HasExtraAlign)
988          UseAP = true;
989        else
990          UseFP = true;
991      }
992    }
993  
994    // If FP was picked, then there had better be FP.
995    bool HasFP = hasFP(MF);
996    assert((HasFP || !UseFP) && "This function must have frame pointer");
997  
998    // Having FP implies allocframe. Allocframe will store extra 8 bytes:
999    // FP/LR. If the base register is used to access an object across these
1000    // 8 bytes, then the offset will need to be adjusted by 8.
1001    //
1002    // After allocframe:
1003    //                    HexagonISelLowering adds 8 to ---+
1004    //                    the offsets of all stack-based   |
1005    //                    arguments (*)                    |
1006    //                                                     |
1007    //   getObjectOffset < 0   0     8  getObjectOffset >= 8
1008    // ------------------------+-----+------------------------> increasing
1009    //     <local objects>     |FP/LR|    <input arguments>     addresses
1010    // -----------------+------+-----+------------------------>
1011    //                  |      |
1012    //    SP/AP point --+      +-- FP points here (**)
1013    //    somewhere on
1014    //    this side of FP/LR
1015    //
1016    // (*) See LowerFormalArguments. The FP/LR is assumed to be present.
1017    // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.
1018  
1019    // The lowering assumes that FP/LR is present, and so the offsets of
1020    // the formal arguments start at 8. If FP/LR is not there we need to
1021    // reduce the offset by 8.
1022    if (Offset > 0 && !HasFP)
1023      Offset -= 8;
1024  
1025    if (UseFP)
1026      FrameReg = FP;
1027    else if (UseAP)
1028      FrameReg = AP;
1029    else
1030      FrameReg = SP;
1031  
1032    // Calculate the actual offset in the instruction. If there is no FP
1033    // (in other words, no allocframe), then SP will not be adjusted (i.e.
1034    // there will be no SP -= FrameSize), so the frame size should not be
1035    // added to the calculated offset.
1036    int RealOffset = Offset;
1037    if (!UseFP && !UseAP && HasFP)
1038      RealOffset = FrameSize+Offset;
1039    return RealOffset;
1040  }
1041  
1042  
insertCSRSpillsInBlock(MachineBasicBlock & MBB,const CSIVect & CSI,const HexagonRegisterInfo & HRI,bool & PrologueStubs) const1043  bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
1044        const CSIVect &CSI, const HexagonRegisterInfo &HRI,
1045        bool &PrologueStubs) const {
1046    if (CSI.empty())
1047      return true;
1048  
1049    MachineBasicBlock::iterator MI = MBB.begin();
1050    PrologueStubs = false;
1051    MachineFunction &MF = *MBB.getParent();
1052    auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
1053  
1054    if (useSpillFunction(MF, CSI)) {
1055      PrologueStubs = true;
1056      unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI);
1057      bool StkOvrFlowEnabled = EnableStackOVFSanitizer;
1058      const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem,
1059                                                 StkOvrFlowEnabled);
1060      auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1061      bool IsPIC = HTM.isPositionIndependent();
1062  
1063      // Call spill function.
1064      DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1065      unsigned SpillOpc;
1066      if (StkOvrFlowEnabled)
1067        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC
1068                         : Hexagon::SAVE_REGISTERS_CALL_V4STK;
1069      else
1070        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_PIC
1071                         : Hexagon::SAVE_REGISTERS_CALL_V4;
1072  
1073      MachineInstr *SaveRegsCall =
1074          BuildMI(MBB, MI, DL, HII.get(SpillOpc))
1075            .addExternalSymbol(SpillFun);
1076      // Add callee-saved registers as use.
1077      addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true);
1078      // Add live in registers.
1079      for (unsigned I = 0; I < CSI.size(); ++I)
1080        MBB.addLiveIn(CSI[I].getReg());
1081      return true;
1082    }
1083  
1084    for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
1085      unsigned Reg = CSI[i].getReg();
1086      // Add live in registers. We treat eh_return callee saved register r0 - r3
1087      // specially. They are not really callee saved registers as they are not
1088      // supposed to be killed.
1089      bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
1090      int FI = CSI[i].getFrameIdx();
1091      const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1092      HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
1093      if (IsKill)
1094        MBB.addLiveIn(Reg);
1095    }
1096    return true;
1097  }
1098  
1099  
insertCSRRestoresInBlock(MachineBasicBlock & MBB,const CSIVect & CSI,const HexagonRegisterInfo & HRI) const1100  bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
1101        const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
1102    if (CSI.empty())
1103      return false;
1104  
1105    MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
1106    MachineFunction &MF = *MBB.getParent();
1107    auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
1108  
1109    if (useRestoreFunction(MF, CSI)) {
1110      bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
1111      unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI);
1112      SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem;
1113      const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);
1114      auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1115      bool IsPIC = HTM.isPositionIndependent();
1116  
1117      // Call spill function.
1118      DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
1119                                    : MBB.getLastNonDebugInstr()->getDebugLoc();
1120      MachineInstr *DeallocCall = nullptr;
1121  
1122      if (HasTC) {
1123        unsigned ROpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC
1124                              : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
1125        DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc))
1126            .addExternalSymbol(RestoreFn);
1127      } else {
1128        // The block has a return.
1129        MachineBasicBlock::iterator It = MBB.getFirstTerminator();
1130        assert(It->isReturn() && std::next(It) == MBB.end());
1131        unsigned ROpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC
1132                              : Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
1133        DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc))
1134            .addExternalSymbol(RestoreFn);
1135        // Transfer the function live-out registers.
1136        DeallocCall->copyImplicitOps(MF, *It);
1137      }
1138      addCalleeSaveRegistersAsImpOperand(DeallocCall, CSI, true, false);
1139      return true;
1140    }
1141  
1142    for (unsigned i = 0; i < CSI.size(); ++i) {
1143      unsigned Reg = CSI[i].getReg();
1144      const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1145      int FI = CSI[i].getFrameIdx();
1146      HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
1147    }
1148  
1149    return true;
1150  }
1151  
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const1152  MachineBasicBlock::iterator HexagonFrameLowering::eliminateCallFramePseudoInstr(
1153      MachineFunction &MF, MachineBasicBlock &MBB,
1154      MachineBasicBlock::iterator I) const {
1155    MachineInstr &MI = *I;
1156    unsigned Opc = MI.getOpcode();
1157    (void)Opc; // Silence compiler warning.
1158    assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) &&
1159           "Cannot handle this call frame pseudo instruction");
1160    return MBB.erase(I);
1161  }
1162  
1163  
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const1164  void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
1165      MachineFunction &MF, RegScavenger *RS) const {
1166    // If this function has uses aligned stack and also has variable sized stack
1167    // objects, then we need to map all spill slots to fixed positions, so that
1168    // they can be accessed through FP. Otherwise they would have to be accessed
1169    // via AP, which may not be available at the particular place in the program.
1170    MachineFrameInfo *MFI = MF.getFrameInfo();
1171    bool HasAlloca = MFI->hasVarSizedObjects();
1172    bool NeedsAlign = (MFI->getMaxAlignment() > getStackAlignment());
1173  
1174    if (!HasAlloca || !NeedsAlign)
1175      return;
1176  
1177    unsigned LFS = MFI->getLocalFrameSize();
1178    for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
1179      if (!MFI->isSpillSlotObjectIndex(i) || MFI->isDeadObjectIndex(i))
1180        continue;
1181      unsigned S = MFI->getObjectSize(i);
1182      // Reduce the alignment to at most 8. This will require unaligned vector
1183      // stores if they happen here.
1184      unsigned A = std::max(MFI->getObjectAlignment(i), 8U);
1185      MFI->setObjectAlignment(i, 8);
1186      LFS = alignTo(LFS+S, A);
1187      MFI->mapLocalFrameObject(i, -LFS);
1188    }
1189  
1190    MFI->setLocalFrameSize(LFS);
1191    unsigned A = MFI->getLocalFrameMaxAlign();
1192    assert(A <= 8 && "Unexpected local frame alignment");
1193    if (A == 0)
1194      MFI->setLocalFrameMaxAlign(8);
1195    MFI->setUseLocalStackAllocationBlock(true);
1196  
1197    // Set the physical aligned-stack base address register.
1198    unsigned AP = 0;
1199    if (const MachineInstr *AI = getAlignaInstr(MF))
1200      AP = AI->getOperand(0).getReg();
1201    auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1202    HMFI.setStackAlignBasePhysReg(AP);
1203  }
1204  
1205  /// Returns true if there are no caller-saved registers available in class RC.
needToReserveScavengingSpillSlots(MachineFunction & MF,const HexagonRegisterInfo & HRI,const TargetRegisterClass * RC)1206  static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
1207        const HexagonRegisterInfo &HRI, const TargetRegisterClass *RC) {
1208    MachineRegisterInfo &MRI = MF.getRegInfo();
1209  
1210    auto IsUsed = [&HRI,&MRI] (unsigned Reg) -> bool {
1211      for (MCRegAliasIterator AI(Reg, &HRI, true); AI.isValid(); ++AI)
1212        if (MRI.isPhysRegUsed(*AI))
1213          return true;
1214      return false;
1215    };
1216  
1217    // Check for an unused caller-saved register. Callee-saved registers
1218    // have become pristine by now.
1219    for (const MCPhysReg *P = HRI.getCallerSavedRegs(&MF, RC); *P; ++P)
1220      if (!IsUsed(*P))
1221        return false;
1222  
1223    // All caller-saved registers are used.
1224    return true;
1225  }
1226  
1227  
1228  #ifndef NDEBUG
dump_registers(BitVector & Regs,const TargetRegisterInfo & TRI)1229  static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
1230    dbgs() << '{';
1231    for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) {
1232      unsigned R = x;
1233      dbgs() << ' ' << PrintReg(R, &TRI);
1234    }
1235    dbgs() << " }";
1236  }
1237  #endif
1238  
1239  
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const1240  bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
1241        const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
1242    DEBUG(dbgs() << LLVM_FUNCTION_NAME << " on "
1243                 << MF.getFunction()->getName() << '\n');
1244    MachineFrameInfo *MFI = MF.getFrameInfo();
1245    BitVector SRegs(Hexagon::NUM_TARGET_REGS);
1246  
1247    // Generate a set of unique, callee-saved registers (SRegs), where each
1248    // register in the set is maximal in terms of sub-/super-register relation,
1249    // i.e. for each R in SRegs, no proper super-register of R is also in SRegs.
1250  
1251    // (1) For each callee-saved register, add that register and all of its
1252    // sub-registers to SRegs.
1253    DEBUG(dbgs() << "Initial CS registers: {");
1254    for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
1255      unsigned R = CSI[i].getReg();
1256      DEBUG(dbgs() << ' ' << PrintReg(R, TRI));
1257      for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
1258        SRegs[*SR] = true;
1259    }
1260    DEBUG(dbgs() << " }\n");
1261    DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
1262  
1263    // (2) For each reserved register, remove that register and all of its
1264    // sub- and super-registers from SRegs.
1265    BitVector Reserved = TRI->getReservedRegs(MF);
1266    for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) {
1267      unsigned R = x;
1268      for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
1269        SRegs[*SR] = false;
1270    }
1271    DEBUG(dbgs() << "Res:     "; dump_registers(Reserved, *TRI); dbgs() << "\n");
1272    DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
1273  
1274    // (3) Collect all registers that have at least one sub-register in SRegs,
1275    // and also have no sub-registers that are reserved. These will be the can-
1276    // didates for saving as a whole instead of their individual sub-registers.
1277    // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.)
1278    BitVector TmpSup(Hexagon::NUM_TARGET_REGS);
1279    for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1280      unsigned R = x;
1281      for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR)
1282        TmpSup[*SR] = true;
1283    }
1284    for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) {
1285      unsigned R = x;
1286      for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) {
1287        if (!Reserved[*SR])
1288          continue;
1289        TmpSup[R] = false;
1290        break;
1291      }
1292    }
1293    DEBUG(dbgs() << "TmpSup:  "; dump_registers(TmpSup, *TRI); dbgs() << "\n");
1294  
1295    // (4) Include all super-registers found in (3) into SRegs.
1296    SRegs |= TmpSup;
1297    DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
1298  
1299    // (5) For each register R in SRegs, if any super-register of R is in SRegs,
1300    // remove R from SRegs.
1301    for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1302      unsigned R = x;
1303      for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) {
1304        if (!SRegs[*SR])
1305          continue;
1306        SRegs[R] = false;
1307        break;
1308      }
1309    }
1310    DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
1311  
1312    // Now, for each register that has a fixed stack slot, create the stack
1313    // object for it.
1314    CSI.clear();
1315  
1316    typedef TargetFrameLowering::SpillSlot SpillSlot;
1317    unsigned NumFixed;
1318    int MinOffset = 0;  // CS offsets are negative.
1319    const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);
1320    for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) {
1321      if (!SRegs[S->Reg])
1322        continue;
1323      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
1324      int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), S->Offset);
1325      MinOffset = std::min(MinOffset, S->Offset);
1326      CSI.push_back(CalleeSavedInfo(S->Reg, FI));
1327      SRegs[S->Reg] = false;
1328    }
1329  
1330    // There can be some registers that don't have fixed slots. For example,
1331    // we need to store R0-R3 in functions with exception handling. For each
1332    // such register, create a non-fixed stack object.
1333    for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1334      unsigned R = x;
1335      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
1336      int Off = MinOffset - RC->getSize();
1337      unsigned Align = std::min(RC->getAlignment(), getStackAlignment());
1338      assert(isPowerOf2_32(Align));
1339      Off &= -Align;
1340      int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), Off);
1341      MinOffset = std::min(MinOffset, Off);
1342      CSI.push_back(CalleeSavedInfo(R, FI));
1343      SRegs[R] = false;
1344    }
1345  
1346    DEBUG({
1347      dbgs() << "CS information: {";
1348      for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
1349        int FI = CSI[i].getFrameIdx();
1350        int Off = MFI->getObjectOffset(FI);
1351        dbgs() << ' ' << PrintReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp";
1352        if (Off >= 0)
1353          dbgs() << '+';
1354        dbgs() << Off;
1355      }
1356      dbgs() << " }\n";
1357    });
1358  
1359  #ifndef NDEBUG
1360    // Verify that all registers were handled.
1361    bool MissedReg = false;
1362    for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1363      unsigned R = x;
1364      dbgs() << PrintReg(R, TRI) << ' ';
1365      MissedReg = true;
1366    }
1367    if (MissedReg)
1368      llvm_unreachable("...there are unhandled callee-saved registers!");
1369  #endif
1370  
1371    return true;
1372  }
1373  
1374  
expandCopy(MachineBasicBlock & B,MachineBasicBlock::iterator It,MachineRegisterInfo & MRI,const HexagonInstrInfo & HII,SmallVectorImpl<unsigned> & NewRegs) const1375  bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B,
1376        MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1377        const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1378    MachineInstr *MI = &*It;
1379    DebugLoc DL = MI->getDebugLoc();
1380    unsigned DstR = MI->getOperand(0).getReg();
1381    unsigned SrcR = MI->getOperand(1).getReg();
1382    if (!Hexagon::ModRegsRegClass.contains(DstR) ||
1383        !Hexagon::ModRegsRegClass.contains(SrcR))
1384      return false;
1385  
1386    unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1387    BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR)
1388      .addOperand(MI->getOperand(1));
1389    BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR)
1390      .addReg(TmpR, RegState::Kill);
1391  
1392    NewRegs.push_back(TmpR);
1393    B.erase(It);
1394    return true;
1395  }
1396  
expandStoreInt(MachineBasicBlock & B,MachineBasicBlock::iterator It,MachineRegisterInfo & MRI,const HexagonInstrInfo & HII,SmallVectorImpl<unsigned> & NewRegs) const1397  bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,
1398        MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1399        const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1400    MachineInstr *MI = &*It;
1401    DebugLoc DL = MI->getDebugLoc();
1402    unsigned Opc = MI->getOpcode();
1403    unsigned SrcR = MI->getOperand(2).getReg();
1404    bool IsKill = MI->getOperand(2).isKill();
1405  
1406    assert(MI->getOperand(0).isFI() && "Expect a frame index");
1407    int FI = MI->getOperand(0).getIndex();
1408  
1409    // TmpR = C2_tfrpr SrcR   if SrcR is a predicate register
1410    // TmpR = A2_tfrcrr SrcR  if SrcR is a modifier register
1411    unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1412    unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr
1413                                                   : Hexagon::A2_tfrcrr;
1414    BuildMI(B, It, DL, HII.get(TfrOpc), TmpR)
1415      .addReg(SrcR, getKillRegState(IsKill));
1416  
1417    // S2_storeri_io FI, 0, TmpR
1418    BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io))
1419      .addFrameIndex(FI)
1420      .addImm(0)
1421      .addReg(TmpR, RegState::Kill)
1422      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1423  
1424    NewRegs.push_back(TmpR);
1425    B.erase(It);
1426    return true;
1427  }
1428  
expandLoadInt(MachineBasicBlock & B,MachineBasicBlock::iterator It,MachineRegisterInfo & MRI,const HexagonInstrInfo & HII,SmallVectorImpl<unsigned> & NewRegs) const1429  bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
1430        MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1431        const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1432    MachineInstr *MI = &*It;
1433    DebugLoc DL = MI->getDebugLoc();
1434    unsigned Opc = MI->getOpcode();
1435    unsigned DstR = MI->getOperand(0).getReg();
1436  
1437    assert(MI->getOperand(1).isFI() && "Expect a frame index");
1438    int FI = MI->getOperand(1).getIndex();
1439  
1440    // TmpR = L2_loadri_io FI, 0
1441    unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1442    BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR)
1443      .addFrameIndex(FI)
1444      .addImm(0)
1445      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1446  
1447    // DstR = C2_tfrrp TmpR   if DstR is a predicate register
1448    // DstR = A2_tfrrcr TmpR  if DstR is a modifier register
1449    unsigned TfrOpc = (Opc == Hexagon::LDriw_pred) ? Hexagon::C2_tfrrp
1450                                                   : Hexagon::A2_tfrrcr;
1451    BuildMI(B, It, DL, HII.get(TfrOpc), DstR)
1452      .addReg(TmpR, RegState::Kill);
1453  
1454    NewRegs.push_back(TmpR);
1455    B.erase(It);
1456    return true;
1457  }
1458  
1459  
expandStoreVecPred(MachineBasicBlock & B,MachineBasicBlock::iterator It,MachineRegisterInfo & MRI,const HexagonInstrInfo & HII,SmallVectorImpl<unsigned> & NewRegs) const1460  bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B,
1461        MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1462        const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1463    auto &HST = B.getParent()->getSubtarget<HexagonSubtarget>();
1464    MachineInstr *MI = &*It;
1465    DebugLoc DL = MI->getDebugLoc();
1466    unsigned SrcR = MI->getOperand(2).getReg();
1467    bool IsKill = MI->getOperand(2).isKill();
1468  
1469    assert(MI->getOperand(0).isFI() && "Expect a frame index");
1470    int FI = MI->getOperand(0).getIndex();
1471  
1472    bool Is128B = HST.useHVXDblOps();
1473    auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass
1474                       : &Hexagon::VectorRegs128BRegClass;
1475  
1476    // Insert transfer to general vector register.
1477    //   TmpR0 = A2_tfrsi 0x01010101
1478    //   TmpR1 = V6_vandqrt Qx, TmpR0
1479    //   store FI, 0, TmpR1
1480    unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1481    unsigned TmpR1 = MRI.createVirtualRegister(RC);
1482  
1483    BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
1484      .addImm(0x01010101);
1485  
1486    unsigned VandOpc = !Is128B ? Hexagon::V6_vandqrt : Hexagon::V6_vandqrt_128B;
1487    BuildMI(B, It, DL, HII.get(VandOpc), TmpR1)
1488      .addReg(SrcR, getKillRegState(IsKill))
1489      .addReg(TmpR0, RegState::Kill);
1490  
1491    auto *HRI = B.getParent()->getSubtarget<HexagonSubtarget>().getRegisterInfo();
1492    HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI);
1493    expandStoreVec(B, std::prev(It), MRI, HII, NewRegs);
1494  
1495    NewRegs.push_back(TmpR0);
1496    NewRegs.push_back(TmpR1);
1497    B.erase(It);
1498    return true;
1499  }
1500  
expandLoadVecPred(MachineBasicBlock & B,MachineBasicBlock::iterator It,MachineRegisterInfo & MRI,const HexagonInstrInfo & HII,SmallVectorImpl<unsigned> & NewRegs) const1501  bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B,
1502        MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1503        const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1504    auto &HST = B.getParent()->getSubtarget<HexagonSubtarget>();
1505    MachineInstr *MI = &*It;
1506    DebugLoc DL = MI->getDebugLoc();
1507    unsigned DstR = MI->getOperand(0).getReg();
1508  
1509    assert(MI->getOperand(1).isFI() && "Expect a frame index");
1510    int FI = MI->getOperand(1).getIndex();
1511  
1512    bool Is128B = HST.useHVXDblOps();
1513    auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass
1514                       : &Hexagon::VectorRegs128BRegClass;
1515  
1516    // TmpR0 = A2_tfrsi 0x01010101
1517    // TmpR1 = load FI, 0
1518    // DstR = V6_vandvrt TmpR1, TmpR0
1519    unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1520    unsigned TmpR1 = MRI.createVirtualRegister(RC);
1521  
1522    BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
1523      .addImm(0x01010101);
1524    auto *HRI = B.getParent()->getSubtarget<HexagonSubtarget>().getRegisterInfo();
1525    HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI);
1526    expandLoadVec(B, std::prev(It), MRI, HII, NewRegs);
1527  
1528    unsigned VandOpc = !Is128B ? Hexagon::V6_vandvrt : Hexagon::V6_vandvrt_128B;
1529    BuildMI(B, It, DL, HII.get(VandOpc), DstR)
1530      .addReg(TmpR1, RegState::Kill)
1531      .addReg(TmpR0, RegState::Kill);
1532  
1533    NewRegs.push_back(TmpR0);
1534    NewRegs.push_back(TmpR1);
1535    B.erase(It);
1536    return true;
1537  }
1538  
expandStoreVec2(MachineBasicBlock & B,MachineBasicBlock::iterator It,MachineRegisterInfo & MRI,const HexagonInstrInfo & HII,SmallVectorImpl<unsigned> & NewRegs) const1539  bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
1540        MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1541        const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1542    MachineFunction &MF = *B.getParent();
1543    auto &HST = MF.getSubtarget<HexagonSubtarget>();
1544    auto &MFI = *MF.getFrameInfo();
1545    auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1546    MachineInstr *MI = &*It;
1547    DebugLoc DL = MI->getDebugLoc();
1548  
1549    unsigned SrcR = MI->getOperand(2).getReg();
1550    unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::subreg_loreg);
1551    unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::subreg_hireg);
1552    bool IsKill = MI->getOperand(2).isKill();
1553  
1554    assert(MI->getOperand(0).isFI() && "Expect a frame index");
1555    int FI = MI->getOperand(0).getIndex();
1556  
1557    bool Is128B = HST.useHVXDblOps();
1558    auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass
1559                       : &Hexagon::VectorRegs128BRegClass;
1560    unsigned Size = RC->getSize();
1561    unsigned NeedAlign = RC->getAlignment();
1562    unsigned HasAlign = MFI.getObjectAlignment(FI);
1563    unsigned StoreOpc;
1564  
1565    // Store low part.
1566    if (NeedAlign <= HasAlign)
1567      StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai  : Hexagon::V6_vS32b_ai_128B;
1568    else
1569      StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B;
1570  
1571    BuildMI(B, It, DL, HII.get(StoreOpc))
1572      .addFrameIndex(FI)
1573      .addImm(0)
1574      .addReg(SrcLo, getKillRegState(IsKill))
1575      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1576  
1577    // Load high part.
1578    if (NeedAlign <= MinAlign(HasAlign, Size))
1579      StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai  : Hexagon::V6_vS32b_ai_128B;
1580    else
1581      StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B;
1582  
1583    BuildMI(B, It, DL, HII.get(StoreOpc))
1584      .addFrameIndex(FI)
1585      .addImm(Size)
1586      .addReg(SrcHi, getKillRegState(IsKill))
1587      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1588  
1589    B.erase(It);
1590    return true;
1591  }
1592  
expandLoadVec2(MachineBasicBlock & B,MachineBasicBlock::iterator It,MachineRegisterInfo & MRI,const HexagonInstrInfo & HII,SmallVectorImpl<unsigned> & NewRegs) const1593  bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
1594        MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1595        const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1596    MachineFunction &MF = *B.getParent();
1597    auto &HST = MF.getSubtarget<HexagonSubtarget>();
1598    auto &MFI = *MF.getFrameInfo();
1599    auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1600    MachineInstr *MI = &*It;
1601    DebugLoc DL = MI->getDebugLoc();
1602  
1603    unsigned DstR = MI->getOperand(0).getReg();
1604    unsigned DstHi = HRI.getSubReg(DstR, Hexagon::subreg_hireg);
1605    unsigned DstLo = HRI.getSubReg(DstR, Hexagon::subreg_loreg);
1606  
1607    assert(MI->getOperand(1).isFI() && "Expect a frame index");
1608    int FI = MI->getOperand(1).getIndex();
1609  
1610    bool Is128B = HST.useHVXDblOps();
1611    auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass
1612                       : &Hexagon::VectorRegs128BRegClass;
1613    unsigned Size = RC->getSize();
1614    unsigned NeedAlign = RC->getAlignment();
1615    unsigned HasAlign = MFI.getObjectAlignment(FI);
1616    unsigned LoadOpc;
1617  
1618    // Load low part.
1619    if (NeedAlign <= HasAlign)
1620      LoadOpc = !Is128B ? Hexagon::V6_vL32b_ai  : Hexagon::V6_vL32b_ai_128B;
1621    else
1622      LoadOpc = !Is128B ? Hexagon::V6_vL32Ub_ai : Hexagon::V6_vL32Ub_ai_128B;
1623  
1624    BuildMI(B, It, DL, HII.get(LoadOpc), DstLo)
1625      .addFrameIndex(FI)
1626      .addImm(0)
1627      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1628  
1629    // Load high part.
1630    if (NeedAlign <= MinAlign(HasAlign, Size))
1631      LoadOpc = !Is128B ? Hexagon::V6_vL32b_ai  : Hexagon::V6_vL32b_ai_128B;
1632    else
1633      LoadOpc = !Is128B ? Hexagon::V6_vL32Ub_ai : Hexagon::V6_vL32Ub_ai_128B;
1634  
1635    BuildMI(B, It, DL, HII.get(LoadOpc), DstHi)
1636      .addFrameIndex(FI)
1637      .addImm(Size)
1638      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1639  
1640    B.erase(It);
1641    return true;
1642  }
1643  
expandStoreVec(MachineBasicBlock & B,MachineBasicBlock::iterator It,MachineRegisterInfo & MRI,const HexagonInstrInfo & HII,SmallVectorImpl<unsigned> & NewRegs) const1644  bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
1645        MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1646        const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1647    MachineFunction &MF = *B.getParent();
1648    auto &HST = MF.getSubtarget<HexagonSubtarget>();
1649    auto &MFI = *MF.getFrameInfo();
1650    MachineInstr *MI = &*It;
1651    DebugLoc DL = MI->getDebugLoc();
1652  
1653    unsigned SrcR = MI->getOperand(2).getReg();
1654    bool IsKill = MI->getOperand(2).isKill();
1655  
1656    assert(MI->getOperand(0).isFI() && "Expect a frame index");
1657    int FI = MI->getOperand(0).getIndex();
1658  
1659    bool Is128B = HST.useHVXDblOps();
1660    auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass
1661                       : &Hexagon::VectorRegs128BRegClass;
1662  
1663    unsigned NeedAlign = RC->getAlignment();
1664    unsigned HasAlign = MFI.getObjectAlignment(FI);
1665    unsigned StoreOpc;
1666  
1667    if (NeedAlign <= HasAlign)
1668      StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32b_ai_128B;
1669    else
1670      StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B;
1671  
1672    BuildMI(B, It, DL, HII.get(StoreOpc))
1673      .addFrameIndex(FI)
1674      .addImm(0)
1675      .addReg(SrcR, getKillRegState(IsKill))
1676      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1677  
1678    B.erase(It);
1679    return true;
1680  }
1681  
expandLoadVec(MachineBasicBlock & B,MachineBasicBlock::iterator It,MachineRegisterInfo & MRI,const HexagonInstrInfo & HII,SmallVectorImpl<unsigned> & NewRegs) const1682  bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
1683        MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1684        const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1685    MachineFunction &MF = *B.getParent();
1686    auto &HST = MF.getSubtarget<HexagonSubtarget>();
1687    auto &MFI = *MF.getFrameInfo();
1688    MachineInstr *MI = &*It;
1689    DebugLoc DL = MI->getDebugLoc();
1690  
1691    unsigned DstR = MI->getOperand(0).getReg();
1692  
1693    assert(MI->getOperand(1).isFI() && "Expect a frame index");
1694    int FI = MI->getOperand(1).getIndex();
1695  
1696    bool Is128B = HST.useHVXDblOps();
1697    auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass
1698                       : &Hexagon::VectorRegs128BRegClass;
1699  
1700    unsigned NeedAlign = RC->getAlignment();
1701    unsigned HasAlign = MFI.getObjectAlignment(FI);
1702    unsigned LoadOpc;
1703  
1704    if (NeedAlign <= HasAlign)
1705      LoadOpc = !Is128B ? Hexagon::V6_vL32b_ai : Hexagon::V6_vL32b_ai_128B;
1706    else
1707      LoadOpc = !Is128B ? Hexagon::V6_vL32Ub_ai : Hexagon::V6_vL32Ub_ai_128B;
1708  
1709    BuildMI(B, It, DL, HII.get(LoadOpc), DstR)
1710      .addFrameIndex(FI)
1711      .addImm(0)
1712      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1713  
1714    B.erase(It);
1715    return true;
1716  }
1717  
1718  
expandSpillMacros(MachineFunction & MF,SmallVectorImpl<unsigned> & NewRegs) const1719  bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,
1720        SmallVectorImpl<unsigned> &NewRegs) const {
1721    auto &HST = MF.getSubtarget<HexagonSubtarget>();
1722    auto &HII = *HST.getInstrInfo();
1723    MachineRegisterInfo &MRI = MF.getRegInfo();
1724    bool Changed = false;
1725  
1726    for (auto &B : MF) {
1727      // Traverse the basic block.
1728      MachineBasicBlock::iterator NextI;
1729      for (auto I = B.begin(), E = B.end(); I != E; I = NextI) {
1730        MachineInstr *MI = &*I;
1731        NextI = std::next(I);
1732        unsigned Opc = MI->getOpcode();
1733  
1734        switch (Opc) {
1735          case TargetOpcode::COPY:
1736            Changed |= expandCopy(B, I, MRI, HII, NewRegs);
1737            break;
1738          case Hexagon::STriw_pred:
1739          case Hexagon::STriw_mod:
1740            Changed |= expandStoreInt(B, I, MRI, HII, NewRegs);
1741            break;
1742          case Hexagon::LDriw_pred:
1743          case Hexagon::LDriw_mod:
1744            Changed |= expandLoadInt(B, I, MRI, HII, NewRegs);
1745            break;
1746          case Hexagon::STriq_pred_V6:
1747          case Hexagon::STriq_pred_V6_128B:
1748            Changed |= expandStoreVecPred(B, I, MRI, HII, NewRegs);
1749            break;
1750          case Hexagon::LDriq_pred_V6:
1751          case Hexagon::LDriq_pred_V6_128B:
1752            Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs);
1753            break;
1754          case Hexagon::LDrivv_pseudo_V6:
1755          case Hexagon::LDrivv_pseudo_V6_128B:
1756            Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs);
1757            break;
1758          case Hexagon::STrivv_pseudo_V6:
1759          case Hexagon::STrivv_pseudo_V6_128B:
1760            Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs);
1761            break;
1762          case Hexagon::STriv_pseudo_V6:
1763          case Hexagon::STriv_pseudo_V6_128B:
1764            Changed |= expandStoreVec(B, I, MRI, HII, NewRegs);
1765            break;
1766          case Hexagon::LDriv_pseudo_V6:
1767          case Hexagon::LDriv_pseudo_V6_128B:
1768            Changed |= expandLoadVec(B, I, MRI, HII, NewRegs);
1769            break;
1770        }
1771      }
1772    }
1773  
1774    return Changed;
1775  }
1776  
1777  
determineCalleeSaves(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const1778  void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
1779                                                  BitVector &SavedRegs,
1780                                                  RegScavenger *RS) const {
1781    auto &HST = MF.getSubtarget<HexagonSubtarget>();
1782    auto &HRI = *HST.getRegisterInfo();
1783  
1784    SavedRegs.resize(HRI.getNumRegs());
1785  
1786    // If we have a function containing __builtin_eh_return we want to spill and
1787    // restore all callee saved registers. Pretend that they are used.
1788    if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
1789      for (const MCPhysReg *R = HRI.getCalleeSavedRegs(&MF); *R; ++R)
1790        SavedRegs.set(*R);
1791  
1792    // Replace predicate register pseudo spill code.
1793    SmallVector<unsigned,8> NewRegs;
1794    expandSpillMacros(MF, NewRegs);
1795    if (OptimizeSpillSlots && !isOptNone(MF))
1796      optimizeSpillSlots(MF, NewRegs);
1797  
1798    // We need to reserve a a spill slot if scavenging could potentially require
1799    // spilling a scavenged register.
1800    if (!NewRegs.empty()) {
1801      MachineFrameInfo &MFI = *MF.getFrameInfo();
1802      MachineRegisterInfo &MRI = MF.getRegInfo();
1803      SetVector<const TargetRegisterClass*> SpillRCs;
1804      // Reserve an int register in any case, because it could be used to hold
1805      // the stack offset in case it does not fit into a spill instruction.
1806      SpillRCs.insert(&Hexagon::IntRegsRegClass);
1807  
1808      for (unsigned VR : NewRegs)
1809        SpillRCs.insert(MRI.getRegClass(VR));
1810  
1811      for (auto *RC : SpillRCs) {
1812        if (!needToReserveScavengingSpillSlots(MF, HRI, RC))
1813          continue;
1814        unsigned Num = RC == &Hexagon::IntRegsRegClass ? NumberScavengerSlots : 1;
1815        unsigned S = RC->getSize(), A = RC->getAlignment();
1816        for (unsigned i = 0; i < Num; i++) {
1817          int NewFI = MFI.CreateSpillStackObject(S, A);
1818          RS->addScavengingFrameIndex(NewFI);
1819        }
1820      }
1821    }
1822  
1823    TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1824  }
1825  
1826  
findPhysReg(MachineFunction & MF,HexagonBlockRanges::IndexRange & FIR,HexagonBlockRanges::InstrIndexMap & IndexMap,HexagonBlockRanges::RegToRangeMap & DeadMap,const TargetRegisterClass * RC) const1827  unsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF,
1828        HexagonBlockRanges::IndexRange &FIR,
1829        HexagonBlockRanges::InstrIndexMap &IndexMap,
1830        HexagonBlockRanges::RegToRangeMap &DeadMap,
1831        const TargetRegisterClass *RC) const {
1832    auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1833    auto &MRI = MF.getRegInfo();
1834  
1835    auto isDead = [&FIR,&DeadMap] (unsigned Reg) -> bool {
1836      auto F = DeadMap.find({Reg,0});
1837      if (F == DeadMap.end())
1838        return false;
1839      for (auto &DR : F->second)
1840        if (DR.contains(FIR))
1841          return true;
1842      return false;
1843    };
1844  
1845    for (unsigned Reg : RC->getRawAllocationOrder(MF)) {
1846      bool Dead = true;
1847      for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) {
1848        if (isDead(R.Reg))
1849          continue;
1850        Dead = false;
1851        break;
1852      }
1853      if (Dead)
1854        return Reg;
1855    }
1856    return 0;
1857  }
1858  
optimizeSpillSlots(MachineFunction & MF,SmallVectorImpl<unsigned> & VRegs) const1859  void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
1860        SmallVectorImpl<unsigned> &VRegs) const {
1861    auto &HST = MF.getSubtarget<HexagonSubtarget>();
1862    auto &HII = *HST.getInstrInfo();
1863    auto &HRI = *HST.getRegisterInfo();
1864    auto &MRI = MF.getRegInfo();
1865    HexagonBlockRanges HBR(MF);
1866  
1867    typedef std::map<MachineBasicBlock*,HexagonBlockRanges::InstrIndexMap>
1868        BlockIndexMap;
1869    typedef std::map<MachineBasicBlock*,HexagonBlockRanges::RangeList>
1870        BlockRangeMap;
1871    typedef HexagonBlockRanges::IndexType IndexType;
1872  
1873    struct SlotInfo {
1874      BlockRangeMap Map;
1875      unsigned Size;
1876      const TargetRegisterClass *RC;
1877  
1878      SlotInfo() : Map(), Size(0), RC(nullptr) {}
1879    };
1880  
1881    BlockIndexMap BlockIndexes;
1882    SmallSet<int,4> BadFIs;
1883    std::map<int,SlotInfo> FIRangeMap;
1884  
1885    auto getRegClass = [&MRI,&HRI] (HexagonBlockRanges::RegisterRef R)
1886          -> const TargetRegisterClass* {
1887      if (TargetRegisterInfo::isPhysicalRegister(R.Reg))
1888        assert(R.Sub == 0);
1889      if (TargetRegisterInfo::isVirtualRegister(R.Reg)) {
1890        auto *RCR = MRI.getRegClass(R.Reg);
1891        if (R.Sub == 0)
1892          return RCR;
1893        unsigned PR = *RCR->begin();
1894        R.Reg = HRI.getSubReg(PR, R.Sub);
1895      }
1896      return HRI.getMinimalPhysRegClass(R.Reg);
1897    };
1898    // Accumulate register classes: get a common class for a pre-existing
1899    // class HaveRC and a new class NewRC. Return nullptr if a common class
1900    // cannot be found, otherwise return the resulting class. If HaveRC is
1901    // nullptr, assume that it is still unset.
1902    auto getCommonRC = [&HRI] (const TargetRegisterClass *HaveRC,
1903                               const TargetRegisterClass *NewRC)
1904          -> const TargetRegisterClass* {
1905      if (HaveRC == nullptr || HaveRC == NewRC)
1906        return NewRC;
1907      // Different classes, both non-null. Pick the more general one.
1908      if (HaveRC->hasSubClassEq(NewRC))
1909        return HaveRC;
1910      if (NewRC->hasSubClassEq(HaveRC))
1911        return NewRC;
1912      return nullptr;
1913    };
1914  
1915    // Scan all blocks in the function. Check all occurrences of frame indexes,
1916    // and collect relevant information.
1917    for (auto &B : MF) {
1918      std::map<int,IndexType> LastStore, LastLoad;
1919      // Emplace appears not to be supported in gcc 4.7.2-4.
1920      //auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B));
1921      auto P = BlockIndexes.insert(
1922                  std::make_pair(&B, HexagonBlockRanges::InstrIndexMap(B)));
1923      auto &IndexMap = P.first->second;
1924      DEBUG(dbgs() << "Index map for BB#" << B.getNumber() << "\n"
1925                   << IndexMap << '\n');
1926  
1927      for (auto &In : B) {
1928        int LFI, SFI;
1929        bool Load = HII.isLoadFromStackSlot(In, LFI) && !HII.isPredicated(In);
1930        bool Store = HII.isStoreToStackSlot(In, SFI) && !HII.isPredicated(In);
1931        if (Load && Store) {
1932          // If it's both a load and a store, then we won't handle it.
1933          BadFIs.insert(LFI);
1934          BadFIs.insert(SFI);
1935          continue;
1936        }
1937        // Check for register classes of the register used as the source for
1938        // the store, and the register used as the destination for the load.
1939        // Also, only accept base+imm_offset addressing modes. Other addressing
1940        // modes can have side-effects (post-increments, etc.). For stack
1941        // slots they are very unlikely, so there is not much loss due to
1942        // this restriction.
1943        if (Load || Store) {
1944          int TFI = Load ? LFI : SFI;
1945          unsigned AM = HII.getAddrMode(&In);
1946          SlotInfo &SI = FIRangeMap[TFI];
1947          bool Bad = (AM != HexagonII::BaseImmOffset);
1948          if (!Bad) {
1949            // If the addressing mode is ok, check the register class.
1950            const TargetRegisterClass *RC = nullptr;
1951            if (Load) {
1952              MachineOperand &DataOp = In.getOperand(0);
1953              RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()});
1954            } else {
1955              MachineOperand &DataOp = In.getOperand(2);
1956              RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()});
1957            }
1958            RC = getCommonRC(SI.RC, RC);
1959            if (RC == nullptr)
1960              Bad = true;
1961            else
1962              SI.RC = RC;
1963          }
1964          if (!Bad) {
1965            // Check sizes.
1966            unsigned S = (1U << (HII.getMemAccessSize(&In) - 1));
1967            if (SI.Size != 0 && SI.Size != S)
1968              Bad = true;
1969            else
1970              SI.Size = S;
1971          }
1972          if (Bad)
1973            BadFIs.insert(TFI);
1974        }
1975  
1976        // Locate uses of frame indices.
1977        for (unsigned i = 0, n = In.getNumOperands(); i < n; ++i) {
1978          const MachineOperand &Op = In.getOperand(i);
1979          if (!Op.isFI())
1980            continue;
1981          int FI = Op.getIndex();
1982          // Make sure that the following operand is an immediate and that
1983          // it is 0. This is the offset in the stack object.
1984          if (i+1 >= n || !In.getOperand(i+1).isImm() ||
1985              In.getOperand(i+1).getImm() != 0)
1986            BadFIs.insert(FI);
1987          if (BadFIs.count(FI))
1988            continue;
1989  
1990          IndexType Index = IndexMap.getIndex(&In);
1991          if (Load) {
1992            if (LastStore[FI] == IndexType::None)
1993              LastStore[FI] = IndexType::Entry;
1994            LastLoad[FI] = Index;
1995          } else if (Store) {
1996            HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
1997            if (LastStore[FI] != IndexType::None)
1998              RL.add(LastStore[FI], LastLoad[FI], false, false);
1999            else if (LastLoad[FI] != IndexType::None)
2000              RL.add(IndexType::Entry, LastLoad[FI], false, false);
2001            LastLoad[FI] = IndexType::None;
2002            LastStore[FI] = Index;
2003          } else {
2004            BadFIs.insert(FI);
2005          }
2006        }
2007      }
2008  
2009      for (auto &I : LastLoad) {
2010        IndexType LL = I.second;
2011        if (LL == IndexType::None)
2012          continue;
2013        auto &RL = FIRangeMap[I.first].Map[&B];
2014        IndexType &LS = LastStore[I.first];
2015        if (LS != IndexType::None)
2016          RL.add(LS, LL, false, false);
2017        else
2018          RL.add(IndexType::Entry, LL, false, false);
2019        LS = IndexType::None;
2020      }
2021      for (auto &I : LastStore) {
2022        IndexType LS = I.second;
2023        if (LS == IndexType::None)
2024          continue;
2025        auto &RL = FIRangeMap[I.first].Map[&B];
2026        RL.add(LS, IndexType::None, false, false);
2027      }
2028    }
2029  
2030    DEBUG({
2031      for (auto &P : FIRangeMap) {
2032        dbgs() << "fi#" << P.first;
2033        if (BadFIs.count(P.first))
2034          dbgs() << " (bad)";
2035        dbgs() << "  RC: ";
2036        if (P.second.RC != nullptr)
2037          dbgs() << HRI.getRegClassName(P.second.RC) << '\n';
2038        else
2039          dbgs() << "<null>\n";
2040        for (auto &R : P.second.Map)
2041          dbgs() << "  BB#" << R.first->getNumber() << " { " << R.second << "}\n";
2042      }
2043    });
2044  
2045    // When a slot is loaded from in a block without being stored to in the
2046    // same block, it is live-on-entry to this block. To avoid CFG analysis,
2047    // consider this slot to be live-on-exit from all blocks.
2048    SmallSet<int,4> LoxFIs;
2049  
2050    std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap;
2051  
2052    for (auto &P : FIRangeMap) {
2053      // P = pair(FI, map: BB->RangeList)
2054      if (BadFIs.count(P.first))
2055        continue;
2056      for (auto &B : MF) {
2057        auto F = P.second.Map.find(&B);
2058        // F = pair(BB, RangeList)
2059        if (F == P.second.Map.end() || F->second.empty())
2060          continue;
2061        HexagonBlockRanges::IndexRange &IR = F->second.front();
2062        if (IR.start() == IndexType::Entry)
2063          LoxFIs.insert(P.first);
2064        BlockFIMap[&B].push_back(P.first);
2065      }
2066    }
2067  
2068    DEBUG({
2069      dbgs() << "Block-to-FI map (* -- live-on-exit):\n";
2070      for (auto &P : BlockFIMap) {
2071        auto &FIs = P.second;
2072        if (FIs.empty())
2073          continue;
2074        dbgs() << "  BB#" << P.first->getNumber() << ": {";
2075        for (auto I : FIs) {
2076          dbgs() << " fi#" << I;
2077          if (LoxFIs.count(I))
2078            dbgs() << '*';
2079        }
2080        dbgs() << " }\n";
2081      }
2082    });
2083  
2084    // eliminate loads, when all loads eliminated, eliminate all stores.
2085    for (auto &B : MF) {
2086      auto F = BlockIndexes.find(&B);
2087      assert(F != BlockIndexes.end());
2088      HexagonBlockRanges::InstrIndexMap &IM = F->second;
2089      HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM);
2090      HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM);
2091      DEBUG(dbgs() << "BB#" << B.getNumber() << " dead map\n"
2092                   << HexagonBlockRanges::PrintRangeMap(DM, HRI));
2093  
2094      for (auto FI : BlockFIMap[&B]) {
2095        if (BadFIs.count(FI))
2096          continue;
2097        DEBUG(dbgs() << "Working on fi#" << FI << '\n');
2098        HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
2099        for (auto &Range : RL) {
2100          DEBUG(dbgs() << "--Examining range:" << RL << '\n');
2101          if (!IndexType::isInstr(Range.start()) ||
2102              !IndexType::isInstr(Range.end()))
2103            continue;
2104          MachineInstr *SI = IM.getInstr(Range.start());
2105          MachineInstr *EI = IM.getInstr(Range.end());
2106          assert(SI->mayStore() && "Unexpected start instruction");
2107          assert(EI->mayLoad() && "Unexpected end instruction");
2108          MachineOperand &SrcOp = SI->getOperand(2);
2109  
2110          HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),
2111                                                    SrcOp.getSubReg() };
2112          auto *RC = getRegClass({SrcOp.getReg(), SrcOp.getSubReg()});
2113          // The this-> is needed to unconfuse MSVC.
2114          unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
2115          DEBUG(dbgs() << "Replacement reg:" << PrintReg(FoundR, &HRI) << '\n');
2116          if (FoundR == 0)
2117            continue;
2118  
2119          // Generate the copy-in: "FoundR = COPY SrcR" at the store location.
2120          MachineBasicBlock::iterator StartIt = SI, NextIt;
2121          MachineInstr *CopyIn = nullptr;
2122          if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) {
2123            const DebugLoc &DL = SI->getDebugLoc();
2124            CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)
2125                        .addOperand(SrcOp);
2126          }
2127  
2128          ++StartIt;
2129          // Check if this is a last store and the FI is live-on-exit.
2130          if (LoxFIs.count(FI) && (&Range == &RL.back())) {
2131            // Update store's source register.
2132            if (unsigned SR = SrcOp.getSubReg())
2133              SrcOp.setReg(HRI.getSubReg(FoundR, SR));
2134            else
2135              SrcOp.setReg(FoundR);
2136            SrcOp.setSubReg(0);
2137            // We are keeping this register live.
2138            SrcOp.setIsKill(false);
2139          } else {
2140            B.erase(SI);
2141            IM.replaceInstr(SI, CopyIn);
2142          }
2143  
2144          auto EndIt = std::next(MachineBasicBlock::iterator(EI));
2145          for (auto It = StartIt; It != EndIt; It = NextIt) {
2146            MachineInstr *MI = &*It;
2147            NextIt = std::next(It);
2148            int TFI;
2149            if (!HII.isLoadFromStackSlot(*MI, TFI) || TFI != FI)
2150              continue;
2151            unsigned DstR = MI->getOperand(0).getReg();
2152            assert(MI->getOperand(0).getSubReg() == 0);
2153            MachineInstr *CopyOut = nullptr;
2154            if (DstR != FoundR) {
2155              DebugLoc DL = MI->getDebugLoc();
2156              unsigned MemSize = (1U << (HII.getMemAccessSize(MI) - 1));
2157              assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset);
2158              unsigned CopyOpc = TargetOpcode::COPY;
2159              if (HII.isSignExtendingLoad(*MI))
2160                CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth;
2161              else if (HII.isZeroExtendingLoad(*MI))
2162                CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth;
2163              CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR)
2164                          .addReg(FoundR, getKillRegState(MI == EI));
2165            }
2166            IM.replaceInstr(MI, CopyOut);
2167            B.erase(It);
2168          }
2169  
2170          // Update the dead map.
2171          HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 };
2172          for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI))
2173            DM[RR].subtract(Range);
2174        } // for Range in range list
2175      }
2176    }
2177  }
2178  
2179  
expandAlloca(MachineInstr * AI,const HexagonInstrInfo & HII,unsigned SP,unsigned CF) const2180  void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
2181        const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
2182    MachineBasicBlock &MB = *AI->getParent();
2183    DebugLoc DL = AI->getDebugLoc();
2184    unsigned A = AI->getOperand(2).getImm();
2185  
2186    // Have
2187    //    Rd  = alloca Rs, #A
2188    //
2189    // If Rs and Rd are different registers, use this sequence:
2190    //    Rd  = sub(r29, Rs)
2191    //    r29 = sub(r29, Rs)
2192    //    Rd  = and(Rd, #-A)    ; if necessary
2193    //    r29 = and(r29, #-A)   ; if necessary
2194    //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
2195    // otherwise, do
2196    //    Rd  = sub(r29, Rs)
2197    //    Rd  = and(Rd, #-A)    ; if necessary
2198    //    r29 = Rd
2199    //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
2200  
2201    MachineOperand &RdOp = AI->getOperand(0);
2202    MachineOperand &RsOp = AI->getOperand(1);
2203    unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg();
2204  
2205    // Rd = sub(r29, Rs)
2206    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd)
2207        .addReg(SP)
2208        .addReg(Rs);
2209    if (Rs != Rd) {
2210      // r29 = sub(r29, Rs)
2211      BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP)
2212          .addReg(SP)
2213          .addReg(Rs);
2214    }
2215    if (A > 8) {
2216      // Rd  = and(Rd, #-A)
2217      BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)
2218          .addReg(Rd)
2219          .addImm(-int64_t(A));
2220      if (Rs != Rd)
2221        BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP)
2222            .addReg(SP)
2223            .addImm(-int64_t(A));
2224    }
2225    if (Rs == Rd) {
2226      // r29 = Rd
2227      BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP)
2228          .addReg(Rd);
2229    }
2230    if (CF > 0) {
2231      // Rd = add(Rd, #CF)
2232      BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd)
2233          .addReg(Rd)
2234          .addImm(CF);
2235    }
2236  }
2237  
2238  
needsAligna(const MachineFunction & MF) const2239  bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
2240    const MachineFrameInfo *MFI = MF.getFrameInfo();
2241    if (!MFI->hasVarSizedObjects())
2242      return false;
2243    unsigned MaxA = MFI->getMaxAlignment();
2244    if (MaxA <= getStackAlignment())
2245      return false;
2246    return true;
2247  }
2248  
2249  
getAlignaInstr(const MachineFunction & MF) const2250  const MachineInstr *HexagonFrameLowering::getAlignaInstr(
2251        const MachineFunction &MF) const {
2252    for (auto &B : MF)
2253      for (auto &I : B)
2254        if (I.getOpcode() == Hexagon::ALIGNA)
2255          return &I;
2256    return nullptr;
2257  }
2258  
2259  
2260  /// Adds all callee-saved registers as implicit uses or defs to the
2261  /// instruction.
addCalleeSaveRegistersAsImpOperand(MachineInstr * MI,const CSIVect & CSI,bool IsDef,bool IsKill) const2262  void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
2263        const CSIVect &CSI, bool IsDef, bool IsKill) const {
2264    // Add the callee-saved registers as implicit uses.
2265    for (auto &R : CSI)
2266      MI->addOperand(MachineOperand::CreateReg(R.getReg(), IsDef, true, IsKill));
2267  }
2268  
2269  
2270  /// Determine whether the callee-saved register saves and restores should
2271  /// be generated via inline code. If this function returns "true", inline
2272  /// code will be generated. If this function returns "false", additional
2273  /// checks are performed, which may still lead to the inline code.
shouldInlineCSR(MachineFunction & MF,const CSIVect & CSI) const2274  bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
2275        const CSIVect &CSI) const {
2276    if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
2277      return true;
2278    if (!isOptSize(MF) && !isMinSize(MF))
2279      if (MF.getTarget().getOptLevel() > CodeGenOpt::Default)
2280        return true;
2281  
2282    // Check if CSI only has double registers, and if the registers form
2283    // a contiguous block starting from D8.
2284    BitVector Regs(Hexagon::NUM_TARGET_REGS);
2285    for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
2286      unsigned R = CSI[i].getReg();
2287      if (!Hexagon::DoubleRegsRegClass.contains(R))
2288        return true;
2289      Regs[R] = true;
2290    }
2291    int F = Regs.find_first();
2292    if (F != Hexagon::D8)
2293      return true;
2294    while (F >= 0) {
2295      int N = Regs.find_next(F);
2296      if (N >= 0 && N != F+1)
2297        return true;
2298      F = N;
2299    }
2300  
2301    return false;
2302  }
2303  
2304  
useSpillFunction(MachineFunction & MF,const CSIVect & CSI) const2305  bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
2306        const CSIVect &CSI) const {
2307    if (shouldInlineCSR(MF, CSI))
2308      return false;
2309    unsigned NumCSI = CSI.size();
2310    if (NumCSI <= 1)
2311      return false;
2312  
2313    unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs
2314                                       : SpillFuncThreshold;
2315    return Threshold < NumCSI;
2316  }
2317  
2318  
useRestoreFunction(MachineFunction & MF,const CSIVect & CSI) const2319  bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
2320        const CSIVect &CSI) const {
2321    if (shouldInlineCSR(MF, CSI))
2322      return false;
2323    // The restore functions do a bit more than just restoring registers.
2324    // The non-returning versions will go back directly to the caller's
2325    // caller, others will clean up the stack frame in preparation for
2326    // a tail call. Using them can still save code size even if only one
2327    // register is getting restores. Make the decision based on -Oz:
2328    // using -Os will use inline restore for a single register.
2329    if (isMinSize(MF))
2330      return true;
2331    unsigned NumCSI = CSI.size();
2332    if (NumCSI <= 1)
2333      return false;
2334  
2335    unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1
2336                                       : SpillFuncThreshold;
2337    return Threshold < NumCSI;
2338  }
2339