1 //===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The QPX vector registers overlay the scalar floating-point registers, and
11 // any scalar floating-point loads splat their value across all vector lanes.
12 // Thus, if we have a scalar load followed by a splat, we can remove the splat
13 // (i.e. replace the load with a load-and-splat pseudo instruction).
14 //
15 // This pass must run after anything that might do store-to-load forwarding.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "PPC.h"
20 #include "PPCInstrBuilder.h"
21 #include "PPCInstrInfo.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/TargetSubtargetInfo.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Target/TargetMachine.h"
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "ppc-qpx-load-splat"
31 
32 STATISTIC(NumSimplified, "Number of QPX load splats simplified");
33 
34 namespace llvm {
35   void initializePPCQPXLoadSplatPass(PassRegistry&);
36 }
37 
38 namespace {
39   struct PPCQPXLoadSplat : public MachineFunctionPass {
40     static char ID;
PPCQPXLoadSplat__anon7190338f0111::PPCQPXLoadSplat41     PPCQPXLoadSplat() : MachineFunctionPass(ID) {
42       initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
43     }
44 
45     bool runOnMachineFunction(MachineFunction &Fn) override;
46 
getPassName__anon7190338f0111::PPCQPXLoadSplat47     StringRef getPassName() const override {
48       return "PowerPC QPX Load Splat Simplification";
49     }
50   };
51   char PPCQPXLoadSplat::ID = 0;
52 }
53 
54 INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
55                 "PowerPC QPX Load Splat Simplification",
56                 false, false)
57 
createPPCQPXLoadSplatPass()58 FunctionPass *llvm::createPPCQPXLoadSplatPass() {
59   return new PPCQPXLoadSplat();
60 }
61 
runOnMachineFunction(MachineFunction & MF)62 bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
63   if (skipFunction(MF.getFunction()))
64     return false;
65 
66   bool MadeChange = false;
67   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
68 
69   for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
70     MachineBasicBlock *MBB = &*MFI;
71     SmallVector<MachineInstr *, 4> Splats;
72 
73     for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
74       MachineInstr *MI = &*MBBI;
75 
76       if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
77         Splats.clear();
78         continue;
79       }
80 
81       // We're looking for a sequence like this:
82       // %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
83       // %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
84 
85       for (auto SI = Splats.begin(); SI != Splats.end();) {
86         MachineInstr *SMI = *SI;
87         unsigned SplatReg = SMI->getOperand(0).getReg();
88         unsigned SrcReg = SMI->getOperand(1).getReg();
89 
90         if (MI->modifiesRegister(SrcReg, TRI)) {
91           switch (MI->getOpcode()) {
92           default:
93             SI = Splats.erase(SI);
94             continue;
95           case PPC::LFS:
96           case PPC::LFD:
97           case PPC::LFSU:
98           case PPC::LFDU:
99           case PPC::LFSUX:
100           case PPC::LFDUX:
101           case PPC::LFSX:
102           case PPC::LFDX:
103           case PPC::LFIWAX:
104           case PPC::LFIWZX:
105             if (SplatReg != SrcReg) {
106               // We need to change the load to define the scalar subregister of
107               // the QPX splat source register.
108               unsigned SubRegIndex =
109                 TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
110               unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
111 
112               // Substitute both the explicit defined register, and also the
113               // implicit def of the containing QPX register.
114               MI->getOperand(0).setReg(SplatSubReg);
115               MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
116             }
117 
118             SI = Splats.erase(SI);
119 
120             // If SMI is directly after MI, then MBBI's base iterator is
121             // pointing at SMI.  Adjust MBBI around the call to erase SMI to
122             // avoid invalidating MBBI.
123             ++MBBI;
124             SMI->eraseFromParent();
125             --MBBI;
126 
127             ++NumSimplified;
128             MadeChange = true;
129             continue;
130           }
131         }
132 
133         // If this instruction defines the splat register, then we cannot move
134         // the previous definition above it. If it reads from the splat
135         // register, then it must already be alive from some previous
136         // definition, and if the splat register is different from the source
137         // register, then this definition must not be the load for which we're
138         // searching.
139         if (MI->modifiesRegister(SplatReg, TRI) ||
140             (SrcReg != SplatReg &&
141              MI->readsRegister(SplatReg, TRI))) {
142           SI = Splats.erase(SI);
143           continue;
144         }
145 
146         ++SI;
147       }
148 
149       if (MI->getOpcode() != PPC::QVESPLATI &&
150           MI->getOpcode() != PPC::QVESPLATIs &&
151           MI->getOpcode() != PPC::QVESPLATIb)
152         continue;
153       if (MI->getOperand(2).getImm() != 0)
154         continue;
155 
156       // If there are other uses of the scalar value after this, replacing
157       // those uses might be non-trivial.
158       if (!MI->getOperand(1).isKill())
159         continue;
160 
161       Splats.push_back(MI);
162     }
163   }
164 
165   return MadeChange;
166 }
167