1 //===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PTXSelectionDAGInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #define DEBUG_TYPE "ptx-selectiondag-info"
15 #include "PTXTargetMachine.h"
16 #include "llvm/DerivedTypes.h"
17 #include "llvm/CodeGen/SelectionDAG.h"
18 using namespace llvm;
19 
PTXSelectionDAGInfo(const TargetMachine & TM)20 PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
21   : TargetSelectionDAGInfo(TM),
22     Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
23 }
24 
~PTXSelectionDAGInfo()25 PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
26 }
27 
28 SDValue
EmitTargetCodeForMemcpy(SelectionDAG & DAG,DebugLoc dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,bool isVolatile,bool AlwaysInline,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo) const29 PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
30                                              SDValue Chain,
31                                              SDValue Dst, SDValue Src,
32                                              SDValue Size, unsigned Align,
33                                              bool isVolatile, bool AlwaysInline,
34                                              MachinePointerInfo DstPtrInfo,
35                                           MachinePointerInfo SrcPtrInfo) const {
36   // Do repeated 4-byte loads and stores. To be improved.
37   // This requires 4-byte alignment.
38   if ((Align & 3) != 0)
39     return SDValue();
40   // This requires the copy size to be a constant, preferably
41   // within a subtarget-specific limit.
42   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
43   if (!ConstantSize)
44     return SDValue();
45   uint64_t SizeVal = ConstantSize->getZExtValue();
46   // Always inline memcpys. In PTX, we do not have a C library that provides
47   // a memcpy function.
48   //if (!AlwaysInline)
49   //  return SDValue();
50 
51   unsigned BytesLeft = SizeVal & 3;
52   unsigned NumMemOps = SizeVal >> 2;
53   unsigned EmittedNumMemOps = 0;
54   EVT VT = MVT::i32;
55   unsigned VTSize = 4;
56   unsigned i = 0;
57   const unsigned MAX_LOADS_IN_LDM = 6;
58   SDValue TFOps[MAX_LOADS_IN_LDM];
59   SDValue Loads[MAX_LOADS_IN_LDM];
60   uint64_t SrcOff = 0, DstOff = 0;
61   EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
62 
63   // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
64   // same number of stores.  The loads and stores will get combined into
65   // ldm/stm later on.
66   while (EmittedNumMemOps < NumMemOps) {
67     for (i = 0;
68          i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
69       Loads[i] = DAG.getLoad(VT, dl, Chain,
70                              DAG.getNode(ISD::ADD, dl, PointerType, Src,
71                                          DAG.getConstant(SrcOff, PointerType)),
72                              SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
73                              false, 0);
74       TFOps[i] = Loads[i].getValue(1);
75       SrcOff += VTSize;
76     }
77     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
78 
79     for (i = 0;
80          i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
81       TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
82                               DAG.getNode(ISD::ADD, dl, PointerType, Dst,
83                                           DAG.getConstant(DstOff, PointerType)),
84                               DstPtrInfo.getWithOffset(DstOff),
85                               isVolatile, false, 0);
86       DstOff += VTSize;
87     }
88     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
89 
90     EmittedNumMemOps += i;
91   }
92 
93   if (BytesLeft == 0)
94     return Chain;
95 
96   // Issue loads / stores for the trailing (1 - 3) bytes.
97   unsigned BytesLeftSave = BytesLeft;
98   i = 0;
99   while (BytesLeft) {
100     if (BytesLeft >= 2) {
101       VT = MVT::i16;
102       VTSize = 2;
103     } else {
104       VT = MVT::i8;
105       VTSize = 1;
106     }
107 
108     Loads[i] = DAG.getLoad(VT, dl, Chain,
109                            DAG.getNode(ISD::ADD, dl, PointerType, Src,
110                                        DAG.getConstant(SrcOff, PointerType)),
111                            SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
112     TFOps[i] = Loads[i].getValue(1);
113     ++i;
114     SrcOff += VTSize;
115     BytesLeft -= VTSize;
116   }
117   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
118 
119   i = 0;
120   BytesLeft = BytesLeftSave;
121   while (BytesLeft) {
122     if (BytesLeft >= 2) {
123       VT = MVT::i16;
124       VTSize = 2;
125     } else {
126       VT = MVT::i8;
127       VTSize = 1;
128     }
129 
130     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
131                             DAG.getNode(ISD::ADD, dl, PointerType, Dst,
132                                         DAG.getConstant(DstOff, PointerType)),
133                             DstPtrInfo.getWithOffset(DstOff), false, false, 0);
134     ++i;
135     DstOff += VTSize;
136     BytesLeft -= VTSize;
137   }
138   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
139 }
140 
141 SDValue PTXSelectionDAGInfo::
EmitTargetCodeForMemset(SelectionDAG & DAG,DebugLoc dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,bool isVolatile,MachinePointerInfo DstPtrInfo) const142 EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
143                         SDValue Chain, SDValue Dst,
144                         SDValue Src, SDValue Size,
145                         unsigned Align, bool isVolatile,
146                         MachinePointerInfo DstPtrInfo) const {
147   llvm_unreachable("memset lowering not implemented for PTX yet");
148 }
149 
150