1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the ARMSelectionDAGInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMTargetMachine.h"
15 #include "llvm/CodeGen/SelectionDAG.h"
16 #include "llvm/IR/DerivedTypes.h"
17 using namespace llvm;
18 
19 #define DEBUG_TYPE "arm-selectiondag-info"
20 
21 // Emit, if possible, a specialized version of the given Libcall. Typically this
22 // means selecting the appropriately aligned version, but we also convert memset
23 // of 0 into memclr.
EmitSpecializedLibcall(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,RTLIB::Libcall LC) const24 SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
25     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
26     SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
27   const ARMSubtarget &Subtarget =
28       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
29   const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
30 
31   // Only use a specialized AEABI function if the default version of this
32   // Libcall is an AEABI function.
33   if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
34     return SDValue();
35 
36   // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
37   // able to translate memset to memclr and use the value to index the function
38   // name array.
39   enum {
40     AEABI_MEMCPY = 0,
41     AEABI_MEMMOVE,
42     AEABI_MEMSET,
43     AEABI_MEMCLR
44   } AEABILibcall;
45   switch (LC) {
46   case RTLIB::MEMCPY:
47     AEABILibcall = AEABI_MEMCPY;
48     break;
49   case RTLIB::MEMMOVE:
50     AEABILibcall = AEABI_MEMMOVE;
51     break;
52   case RTLIB::MEMSET:
53     AEABILibcall = AEABI_MEMSET;
54     if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
55       if (ConstantSrc->getZExtValue() == 0)
56         AEABILibcall = AEABI_MEMCLR;
57     break;
58   default:
59     return SDValue();
60   }
61 
62   // Choose the most-aligned libcall variant that we can
63   enum {
64     ALIGN1 = 0,
65     ALIGN4,
66     ALIGN8
67   } AlignVariant;
68   if ((Align & 7) == 0)
69     AlignVariant = ALIGN8;
70   else if ((Align & 3) == 0)
71     AlignVariant = ALIGN4;
72   else
73     AlignVariant = ALIGN1;
74 
75   TargetLowering::ArgListTy Args;
76   TargetLowering::ArgListEntry Entry;
77   Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
78   Entry.Node = Dst;
79   Args.push_back(Entry);
80   if (AEABILibcall == AEABI_MEMCLR) {
81     Entry.Node = Size;
82     Args.push_back(Entry);
83   } else if (AEABILibcall == AEABI_MEMSET) {
84     // Adjust parameters for memset, EABI uses format (ptr, size, value),
85     // GNU library uses (ptr, value, size)
86     // See RTABI section 4.3.4
87     Entry.Node = Size;
88     Args.push_back(Entry);
89 
90     // Extend or truncate the argument to be an i32 value for the call.
91     if (Src.getValueType().bitsGT(MVT::i32))
92       Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
93     else if (Src.getValueType().bitsLT(MVT::i32))
94       Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
95 
96     Entry.Node = Src;
97     Entry.Ty = Type::getInt32Ty(*DAG.getContext());
98     Entry.isSExt = false;
99     Args.push_back(Entry);
100   } else {
101     Entry.Node = Src;
102     Args.push_back(Entry);
103 
104     Entry.Node = Size;
105     Args.push_back(Entry);
106   }
107 
108   char const *FunctionNames[4][3] = {
109     { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
110     { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
111     { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
112     { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
113   };
114   TargetLowering::CallLoweringInfo CLI(DAG);
115   CLI.setDebugLoc(dl)
116       .setChain(Chain)
117       .setCallee(
118            TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
119            DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
120                                  TLI->getPointerTy(DAG.getDataLayout())),
121            std::move(Args))
122       .setDiscardResult();
123   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
124 
125   return CallResult.second;
126 }
127 
EmitTargetCodeForMemcpy(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,bool isVolatile,bool AlwaysInline,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo) const128 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
129     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
130     SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
131     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
132   const ARMSubtarget &Subtarget =
133       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
134   // Do repeated 4-byte loads and stores. To be improved.
135   // This requires 4-byte alignment.
136   if ((Align & 3) != 0)
137     return SDValue();
138   // This requires the copy size to be a constant, preferably
139   // within a subtarget-specific limit.
140   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
141   if (!ConstantSize)
142     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
143                                   RTLIB::MEMCPY);
144   uint64_t SizeVal = ConstantSize->getZExtValue();
145   if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
146     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
147                                   RTLIB::MEMCPY);
148 
149   unsigned BytesLeft = SizeVal & 3;
150   unsigned NumMemOps = SizeVal >> 2;
151   unsigned EmittedNumMemOps = 0;
152   EVT VT = MVT::i32;
153   unsigned VTSize = 4;
154   unsigned i = 0;
155   // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
156   const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
157   SDValue TFOps[6];
158   SDValue Loads[6];
159   uint64_t SrcOff = 0, DstOff = 0;
160 
161   // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
162   // VLDM/VSTM and make this code emit it when appropriate. This would reduce
163   // pressure on the general purpose registers. However this seems harder to map
164   // onto the register allocator's view of the world.
165 
166   // The number of MEMCPY pseudo-instructions to emit. We use up to
167   // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
168   // later on. This is a lower bound on the number of MEMCPY operations we must
169   // emit.
170   unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
171 
172   // Code size optimisation: do not inline memcpy if expansion results in
173   // more instructions than the libary call.
174   if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) {
175     return SDValue();
176   }
177 
178   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
179 
180   for (unsigned I = 0; I != NumMEMCPYs; ++I) {
181     // Evenly distribute registers among MEMCPY operations to reduce register
182     // pressure.
183     unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
184     unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
185 
186     Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
187                       DAG.getConstant(NumRegs, dl, MVT::i32));
188     Src = Dst.getValue(1);
189     Chain = Dst.getValue(2);
190 
191     DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
192     SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
193 
194     EmittedNumMemOps = NextEmittedNumMemOps;
195   }
196 
197   if (BytesLeft == 0)
198     return Chain;
199 
200   // Issue loads / stores for the trailing (1 - 3) bytes.
201   unsigned BytesLeftSave = BytesLeft;
202   i = 0;
203   while (BytesLeft) {
204     if (BytesLeft >= 2) {
205       VT = MVT::i16;
206       VTSize = 2;
207     } else {
208       VT = MVT::i8;
209       VTSize = 1;
210     }
211 
212     Loads[i] = DAG.getLoad(VT, dl, Chain,
213                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
214                                        DAG.getConstant(SrcOff, dl, MVT::i32)),
215                            SrcPtrInfo.getWithOffset(SrcOff),
216                            false, false, false, 0);
217     TFOps[i] = Loads[i].getValue(1);
218     ++i;
219     SrcOff += VTSize;
220     BytesLeft -= VTSize;
221   }
222   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
223                       makeArrayRef(TFOps, i));
224 
225   i = 0;
226   BytesLeft = BytesLeftSave;
227   while (BytesLeft) {
228     if (BytesLeft >= 2) {
229       VT = MVT::i16;
230       VTSize = 2;
231     } else {
232       VT = MVT::i8;
233       VTSize = 1;
234     }
235 
236     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
237                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
238                                         DAG.getConstant(DstOff, dl, MVT::i32)),
239                             DstPtrInfo.getWithOffset(DstOff), false, false, 0);
240     ++i;
241     DstOff += VTSize;
242     BytesLeft -= VTSize;
243   }
244   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
245                      makeArrayRef(TFOps, i));
246 }
247 
EmitTargetCodeForMemmove(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,bool isVolatile,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo) const248 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
249     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
250     SDValue Size, unsigned Align, bool isVolatile,
251     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
252   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
253                                 RTLIB::MEMMOVE);
254 }
255 
EmitTargetCodeForMemset(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,bool isVolatile,MachinePointerInfo DstPtrInfo) const256 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
257     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
258     SDValue Size, unsigned Align, bool isVolatile,
259     MachinePointerInfo DstPtrInfo) const {
260   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
261                                 RTLIB::MEMSET);
262 }
263