1 //===-- AArch64CleanupLocalDynamicTLSPass.cpp ---------------------*- C++ -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Local-dynamic access to thread-local variables proceeds in three stages.
11 //
12 // 1. The offset of this Module's thread-local area from TPIDR_EL0 is calculated
13 //    in much the same way as a general-dynamic TLS-descriptor access against
14 //    the special symbol _TLS_MODULE_BASE.
15 // 2. The variable's offset from _TLS_MODULE_BASE_ is calculated using
16 //    instructions with "dtprel" modifiers.
17 // 3. These two are added, together with TPIDR_EL0, to obtain the variable's
18 //    true address.
19 //
20 // This is only better than general-dynamic access to the variable if two or
21 // more of the first stage TLS-descriptor calculations can be combined. This
22 // pass looks through a function and performs such combinations.
23 //
24 //===----------------------------------------------------------------------===//
25 #include "AArch64.h"
26 #include "AArch64InstrInfo.h"
27 #include "AArch64MachineFunctionInfo.h"
28 #include "AArch64TargetMachine.h"
29 #include "llvm/CodeGen/MachineDominators.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineFunctionPass.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 using namespace llvm;
35 
36 namespace {
37 struct LDTLSCleanup : public MachineFunctionPass {
38   static char ID;
LDTLSCleanup__anonfdaa28eb0111::LDTLSCleanup39   LDTLSCleanup() : MachineFunctionPass(ID) {}
40 
runOnMachineFunction__anonfdaa28eb0111::LDTLSCleanup41   bool runOnMachineFunction(MachineFunction &MF) override {
42     if (skipFunction(*MF.getFunction()))
43       return false;
44 
45     AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
46     if (AFI->getNumLocalDynamicTLSAccesses() < 2) {
47       // No point folding accesses if there isn't at least two.
48       return false;
49     }
50 
51     MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
52     return VisitNode(DT->getRootNode(), 0);
53   }
54 
55   // Visit the dominator subtree rooted at Node in pre-order.
56   // If TLSBaseAddrReg is non-null, then use that to replace any
57   // TLS_base_addr instructions. Otherwise, create the register
58   // when the first such instruction is seen, and then use it
59   // as we encounter more instructions.
VisitNode__anonfdaa28eb0111::LDTLSCleanup60   bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
61     MachineBasicBlock *BB = Node->getBlock();
62     bool Changed = false;
63 
64     // Traverse the current block.
65     for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
66          ++I) {
67       switch (I->getOpcode()) {
68       case AArch64::TLSDESC_CALLSEQ:
69         // Make sure it's a local dynamic access.
70         if (!I->getOperand(0).isSymbol() ||
71             strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_"))
72           break;
73 
74         if (TLSBaseAddrReg)
75           I = replaceTLSBaseAddrCall(*I, TLSBaseAddrReg);
76         else
77           I = setRegister(*I, &TLSBaseAddrReg);
78         Changed = true;
79         break;
80       default:
81         break;
82       }
83     }
84 
85     // Visit the children of this block in the dominator tree.
86     for (MachineDomTreeNode *N : *Node) {
87       Changed |= VisitNode(N, TLSBaseAddrReg);
88     }
89 
90     return Changed;
91   }
92 
93   // Replace the TLS_base_addr instruction I with a copy from
94   // TLSBaseAddrReg, returning the new instruction.
replaceTLSBaseAddrCall__anonfdaa28eb0111::LDTLSCleanup95   MachineInstr *replaceTLSBaseAddrCall(MachineInstr &I,
96                                        unsigned TLSBaseAddrReg) {
97     MachineFunction *MF = I.getParent()->getParent();
98     const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
99 
100     // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
101     // code sequence assumes the address will be.
102     MachineInstr *Copy = BuildMI(*I.getParent(), I, I.getDebugLoc(),
103                                  TII->get(TargetOpcode::COPY), AArch64::X0)
104                              .addReg(TLSBaseAddrReg);
105 
106     // Erase the TLS_base_addr instruction.
107     I.eraseFromParent();
108 
109     return Copy;
110   }
111 
112   // Create a virtal register in *TLSBaseAddrReg, and populate it by
113   // inserting a copy instruction after I. Returns the new instruction.
setRegister__anonfdaa28eb0111::LDTLSCleanup114   MachineInstr *setRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) {
115     MachineFunction *MF = I.getParent()->getParent();
116     const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
117 
118     // Create a virtual register for the TLS base address.
119     MachineRegisterInfo &RegInfo = MF->getRegInfo();
120     *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
121 
122     // Insert a copy from X0 to TLSBaseAddrReg for later.
123     MachineInstr *Copy =
124         BuildMI(*I.getParent(), ++I.getIterator(), I.getDebugLoc(),
125                 TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
126             .addReg(AArch64::X0);
127 
128     return Copy;
129   }
130 
getPassName__anonfdaa28eb0111::LDTLSCleanup131   const char *getPassName() const override {
132     return "Local Dynamic TLS Access Clean-up";
133   }
134 
getAnalysisUsage__anonfdaa28eb0111::LDTLSCleanup135   void getAnalysisUsage(AnalysisUsage &AU) const override {
136     AU.setPreservesCFG();
137     AU.addRequired<MachineDominatorTree>();
138     MachineFunctionPass::getAnalysisUsage(AU);
139   }
140 };
141 }
142 
143 char LDTLSCleanup::ID = 0;
createAArch64CleanupLocalDynamicTLSPass()144 FunctionPass *llvm::createAArch64CleanupLocalDynamicTLSPass() {
145   return new LDTLSCleanup();
146 }
147