1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "nvptx-isel"
26 
27 static cl::opt<int> UsePrecDivF32(
28     "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
29     cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
30              " IEEE Compliant F32 div.rnd if available."),
31     cl::init(2));
32 
33 static cl::opt<bool>
34 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
35           cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
36           cl::init(true));
37 
38 static cl::opt<bool>
39 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
40            cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
41            cl::init(false));
42 
43 
44 /// createNVPTXISelDag - This pass converts a legalized DAG into a
45 /// NVPTX-specific DAG, ready for instruction scheduling.
createNVPTXISelDag(NVPTXTargetMachine & TM,llvm::CodeGenOpt::Level OptLevel)46 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
47                                        llvm::CodeGenOpt::Level OptLevel) {
48   return new NVPTXDAGToDAGISel(TM, OptLevel);
49 }
50 
NVPTXDAGToDAGISel(NVPTXTargetMachine & tm,CodeGenOpt::Level OptLevel)51 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
52                                      CodeGenOpt::Level OptLevel)
53     : SelectionDAGISel(tm, OptLevel), TM(tm) {
54   doMulWide = (OptLevel > 0);
55 }
56 
runOnMachineFunction(MachineFunction & MF)57 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
58     Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
59     return SelectionDAGISel::runOnMachineFunction(MF);
60 }
61 
getDivF32Level() const62 int NVPTXDAGToDAGISel::getDivF32Level() const {
63   if (UsePrecDivF32.getNumOccurrences() > 0) {
64     // If nvptx-prec-div32=N is used on the command-line, always honor it
65     return UsePrecDivF32;
66   } else {
67     // Otherwise, use div.approx if fast math is enabled
68     if (TM.Options.UnsafeFPMath)
69       return 0;
70     else
71       return 2;
72   }
73 }
74 
usePrecSqrtF32() const75 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
76   if (UsePrecSqrtF32.getNumOccurrences() > 0) {
77     // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
78     return UsePrecSqrtF32;
79   } else {
80     // Otherwise, use sqrt.approx if fast math is enabled
81     return !TM.Options.UnsafeFPMath;
82   }
83 }
84 
useF32FTZ() const85 bool NVPTXDAGToDAGISel::useF32FTZ() const {
86   if (FtzEnabled.getNumOccurrences() > 0) {
87     // If nvptx-f32ftz is used on the command-line, always honor it
88     return FtzEnabled;
89   } else {
90     const Function *F = MF->getFunction();
91     // Otherwise, check for an nvptx-f32ftz attribute on the function
92     if (F->hasFnAttribute("nvptx-f32ftz"))
93       return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
94     else
95       return false;
96   }
97 }
98 
allowFMA() const99 bool NVPTXDAGToDAGISel::allowFMA() const {
100   const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
101   return TL->allowFMA(*MF, OptLevel);
102 }
103 
104 /// Select - Select instructions not customized! Used for
105 /// expanded, promoted and normal instructions.
Select(SDNode * N)106 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
107 
108   if (N->isMachineOpcode()) {
109     N->setNodeId(-1);
110     return nullptr; // Already selected.
111   }
112 
113   SDNode *ResNode = nullptr;
114   switch (N->getOpcode()) {
115   case ISD::LOAD:
116     ResNode = SelectLoad(N);
117     break;
118   case ISD::STORE:
119     ResNode = SelectStore(N);
120     break;
121   case NVPTXISD::LoadV2:
122   case NVPTXISD::LoadV4:
123     ResNode = SelectLoadVector(N);
124     break;
125   case NVPTXISD::LDGV2:
126   case NVPTXISD::LDGV4:
127   case NVPTXISD::LDUV2:
128   case NVPTXISD::LDUV4:
129     ResNode = SelectLDGLDU(N);
130     break;
131   case NVPTXISD::StoreV2:
132   case NVPTXISD::StoreV4:
133     ResNode = SelectStoreVector(N);
134     break;
135   case NVPTXISD::LoadParam:
136   case NVPTXISD::LoadParamV2:
137   case NVPTXISD::LoadParamV4:
138     ResNode = SelectLoadParam(N);
139     break;
140   case NVPTXISD::StoreRetval:
141   case NVPTXISD::StoreRetvalV2:
142   case NVPTXISD::StoreRetvalV4:
143     ResNode = SelectStoreRetval(N);
144     break;
145   case NVPTXISD::StoreParam:
146   case NVPTXISD::StoreParamV2:
147   case NVPTXISD::StoreParamV4:
148   case NVPTXISD::StoreParamS32:
149   case NVPTXISD::StoreParamU32:
150     ResNode = SelectStoreParam(N);
151     break;
152   case ISD::INTRINSIC_WO_CHAIN:
153     ResNode = SelectIntrinsicNoChain(N);
154     break;
155   case ISD::INTRINSIC_W_CHAIN:
156     ResNode = SelectIntrinsicChain(N);
157     break;
158   case NVPTXISD::Tex1DFloatS32:
159   case NVPTXISD::Tex1DFloatFloat:
160   case NVPTXISD::Tex1DFloatFloatLevel:
161   case NVPTXISD::Tex1DFloatFloatGrad:
162   case NVPTXISD::Tex1DS32S32:
163   case NVPTXISD::Tex1DS32Float:
164   case NVPTXISD::Tex1DS32FloatLevel:
165   case NVPTXISD::Tex1DS32FloatGrad:
166   case NVPTXISD::Tex1DU32S32:
167   case NVPTXISD::Tex1DU32Float:
168   case NVPTXISD::Tex1DU32FloatLevel:
169   case NVPTXISD::Tex1DU32FloatGrad:
170   case NVPTXISD::Tex1DArrayFloatS32:
171   case NVPTXISD::Tex1DArrayFloatFloat:
172   case NVPTXISD::Tex1DArrayFloatFloatLevel:
173   case NVPTXISD::Tex1DArrayFloatFloatGrad:
174   case NVPTXISD::Tex1DArrayS32S32:
175   case NVPTXISD::Tex1DArrayS32Float:
176   case NVPTXISD::Tex1DArrayS32FloatLevel:
177   case NVPTXISD::Tex1DArrayS32FloatGrad:
178   case NVPTXISD::Tex1DArrayU32S32:
179   case NVPTXISD::Tex1DArrayU32Float:
180   case NVPTXISD::Tex1DArrayU32FloatLevel:
181   case NVPTXISD::Tex1DArrayU32FloatGrad:
182   case NVPTXISD::Tex2DFloatS32:
183   case NVPTXISD::Tex2DFloatFloat:
184   case NVPTXISD::Tex2DFloatFloatLevel:
185   case NVPTXISD::Tex2DFloatFloatGrad:
186   case NVPTXISD::Tex2DS32S32:
187   case NVPTXISD::Tex2DS32Float:
188   case NVPTXISD::Tex2DS32FloatLevel:
189   case NVPTXISD::Tex2DS32FloatGrad:
190   case NVPTXISD::Tex2DU32S32:
191   case NVPTXISD::Tex2DU32Float:
192   case NVPTXISD::Tex2DU32FloatLevel:
193   case NVPTXISD::Tex2DU32FloatGrad:
194   case NVPTXISD::Tex2DArrayFloatS32:
195   case NVPTXISD::Tex2DArrayFloatFloat:
196   case NVPTXISD::Tex2DArrayFloatFloatLevel:
197   case NVPTXISD::Tex2DArrayFloatFloatGrad:
198   case NVPTXISD::Tex2DArrayS32S32:
199   case NVPTXISD::Tex2DArrayS32Float:
200   case NVPTXISD::Tex2DArrayS32FloatLevel:
201   case NVPTXISD::Tex2DArrayS32FloatGrad:
202   case NVPTXISD::Tex2DArrayU32S32:
203   case NVPTXISD::Tex2DArrayU32Float:
204   case NVPTXISD::Tex2DArrayU32FloatLevel:
205   case NVPTXISD::Tex2DArrayU32FloatGrad:
206   case NVPTXISD::Tex3DFloatS32:
207   case NVPTXISD::Tex3DFloatFloat:
208   case NVPTXISD::Tex3DFloatFloatLevel:
209   case NVPTXISD::Tex3DFloatFloatGrad:
210   case NVPTXISD::Tex3DS32S32:
211   case NVPTXISD::Tex3DS32Float:
212   case NVPTXISD::Tex3DS32FloatLevel:
213   case NVPTXISD::Tex3DS32FloatGrad:
214   case NVPTXISD::Tex3DU32S32:
215   case NVPTXISD::Tex3DU32Float:
216   case NVPTXISD::Tex3DU32FloatLevel:
217   case NVPTXISD::Tex3DU32FloatGrad:
218   case NVPTXISD::TexCubeFloatFloat:
219   case NVPTXISD::TexCubeFloatFloatLevel:
220   case NVPTXISD::TexCubeS32Float:
221   case NVPTXISD::TexCubeS32FloatLevel:
222   case NVPTXISD::TexCubeU32Float:
223   case NVPTXISD::TexCubeU32FloatLevel:
224   case NVPTXISD::TexCubeArrayFloatFloat:
225   case NVPTXISD::TexCubeArrayFloatFloatLevel:
226   case NVPTXISD::TexCubeArrayS32Float:
227   case NVPTXISD::TexCubeArrayS32FloatLevel:
228   case NVPTXISD::TexCubeArrayU32Float:
229   case NVPTXISD::TexCubeArrayU32FloatLevel:
230   case NVPTXISD::Tld4R2DFloatFloat:
231   case NVPTXISD::Tld4G2DFloatFloat:
232   case NVPTXISD::Tld4B2DFloatFloat:
233   case NVPTXISD::Tld4A2DFloatFloat:
234   case NVPTXISD::Tld4R2DS64Float:
235   case NVPTXISD::Tld4G2DS64Float:
236   case NVPTXISD::Tld4B2DS64Float:
237   case NVPTXISD::Tld4A2DS64Float:
238   case NVPTXISD::Tld4R2DU64Float:
239   case NVPTXISD::Tld4G2DU64Float:
240   case NVPTXISD::Tld4B2DU64Float:
241   case NVPTXISD::Tld4A2DU64Float:
242   case NVPTXISD::TexUnified1DFloatS32:
243   case NVPTXISD::TexUnified1DFloatFloat:
244   case NVPTXISD::TexUnified1DFloatFloatLevel:
245   case NVPTXISD::TexUnified1DFloatFloatGrad:
246   case NVPTXISD::TexUnified1DS32S32:
247   case NVPTXISD::TexUnified1DS32Float:
248   case NVPTXISD::TexUnified1DS32FloatLevel:
249   case NVPTXISD::TexUnified1DS32FloatGrad:
250   case NVPTXISD::TexUnified1DU32S32:
251   case NVPTXISD::TexUnified1DU32Float:
252   case NVPTXISD::TexUnified1DU32FloatLevel:
253   case NVPTXISD::TexUnified1DU32FloatGrad:
254   case NVPTXISD::TexUnified1DArrayFloatS32:
255   case NVPTXISD::TexUnified1DArrayFloatFloat:
256   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
257   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
258   case NVPTXISD::TexUnified1DArrayS32S32:
259   case NVPTXISD::TexUnified1DArrayS32Float:
260   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
261   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
262   case NVPTXISD::TexUnified1DArrayU32S32:
263   case NVPTXISD::TexUnified1DArrayU32Float:
264   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
265   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
266   case NVPTXISD::TexUnified2DFloatS32:
267   case NVPTXISD::TexUnified2DFloatFloat:
268   case NVPTXISD::TexUnified2DFloatFloatLevel:
269   case NVPTXISD::TexUnified2DFloatFloatGrad:
270   case NVPTXISD::TexUnified2DS32S32:
271   case NVPTXISD::TexUnified2DS32Float:
272   case NVPTXISD::TexUnified2DS32FloatLevel:
273   case NVPTXISD::TexUnified2DS32FloatGrad:
274   case NVPTXISD::TexUnified2DU32S32:
275   case NVPTXISD::TexUnified2DU32Float:
276   case NVPTXISD::TexUnified2DU32FloatLevel:
277   case NVPTXISD::TexUnified2DU32FloatGrad:
278   case NVPTXISD::TexUnified2DArrayFloatS32:
279   case NVPTXISD::TexUnified2DArrayFloatFloat:
280   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
281   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
282   case NVPTXISD::TexUnified2DArrayS32S32:
283   case NVPTXISD::TexUnified2DArrayS32Float:
284   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
285   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
286   case NVPTXISD::TexUnified2DArrayU32S32:
287   case NVPTXISD::TexUnified2DArrayU32Float:
288   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
289   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
290   case NVPTXISD::TexUnified3DFloatS32:
291   case NVPTXISD::TexUnified3DFloatFloat:
292   case NVPTXISD::TexUnified3DFloatFloatLevel:
293   case NVPTXISD::TexUnified3DFloatFloatGrad:
294   case NVPTXISD::TexUnified3DS32S32:
295   case NVPTXISD::TexUnified3DS32Float:
296   case NVPTXISD::TexUnified3DS32FloatLevel:
297   case NVPTXISD::TexUnified3DS32FloatGrad:
298   case NVPTXISD::TexUnified3DU32S32:
299   case NVPTXISD::TexUnified3DU32Float:
300   case NVPTXISD::TexUnified3DU32FloatLevel:
301   case NVPTXISD::TexUnified3DU32FloatGrad:
302   case NVPTXISD::TexUnifiedCubeFloatFloat:
303   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
304   case NVPTXISD::TexUnifiedCubeS32Float:
305   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
306   case NVPTXISD::TexUnifiedCubeU32Float:
307   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
308   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
309   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
310   case NVPTXISD::TexUnifiedCubeArrayS32Float:
311   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
312   case NVPTXISD::TexUnifiedCubeArrayU32Float:
313   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
314   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
315   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
316   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
317   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
318   case NVPTXISD::Tld4UnifiedR2DS64Float:
319   case NVPTXISD::Tld4UnifiedG2DS64Float:
320   case NVPTXISD::Tld4UnifiedB2DS64Float:
321   case NVPTXISD::Tld4UnifiedA2DS64Float:
322   case NVPTXISD::Tld4UnifiedR2DU64Float:
323   case NVPTXISD::Tld4UnifiedG2DU64Float:
324   case NVPTXISD::Tld4UnifiedB2DU64Float:
325   case NVPTXISD::Tld4UnifiedA2DU64Float:
326     ResNode = SelectTextureIntrinsic(N);
327     break;
328   case NVPTXISD::Suld1DI8Clamp:
329   case NVPTXISD::Suld1DI16Clamp:
330   case NVPTXISD::Suld1DI32Clamp:
331   case NVPTXISD::Suld1DI64Clamp:
332   case NVPTXISD::Suld1DV2I8Clamp:
333   case NVPTXISD::Suld1DV2I16Clamp:
334   case NVPTXISD::Suld1DV2I32Clamp:
335   case NVPTXISD::Suld1DV2I64Clamp:
336   case NVPTXISD::Suld1DV4I8Clamp:
337   case NVPTXISD::Suld1DV4I16Clamp:
338   case NVPTXISD::Suld1DV4I32Clamp:
339   case NVPTXISD::Suld1DArrayI8Clamp:
340   case NVPTXISD::Suld1DArrayI16Clamp:
341   case NVPTXISD::Suld1DArrayI32Clamp:
342   case NVPTXISD::Suld1DArrayI64Clamp:
343   case NVPTXISD::Suld1DArrayV2I8Clamp:
344   case NVPTXISD::Suld1DArrayV2I16Clamp:
345   case NVPTXISD::Suld1DArrayV2I32Clamp:
346   case NVPTXISD::Suld1DArrayV2I64Clamp:
347   case NVPTXISD::Suld1DArrayV4I8Clamp:
348   case NVPTXISD::Suld1DArrayV4I16Clamp:
349   case NVPTXISD::Suld1DArrayV4I32Clamp:
350   case NVPTXISD::Suld2DI8Clamp:
351   case NVPTXISD::Suld2DI16Clamp:
352   case NVPTXISD::Suld2DI32Clamp:
353   case NVPTXISD::Suld2DI64Clamp:
354   case NVPTXISD::Suld2DV2I8Clamp:
355   case NVPTXISD::Suld2DV2I16Clamp:
356   case NVPTXISD::Suld2DV2I32Clamp:
357   case NVPTXISD::Suld2DV2I64Clamp:
358   case NVPTXISD::Suld2DV4I8Clamp:
359   case NVPTXISD::Suld2DV4I16Clamp:
360   case NVPTXISD::Suld2DV4I32Clamp:
361   case NVPTXISD::Suld2DArrayI8Clamp:
362   case NVPTXISD::Suld2DArrayI16Clamp:
363   case NVPTXISD::Suld2DArrayI32Clamp:
364   case NVPTXISD::Suld2DArrayI64Clamp:
365   case NVPTXISD::Suld2DArrayV2I8Clamp:
366   case NVPTXISD::Suld2DArrayV2I16Clamp:
367   case NVPTXISD::Suld2DArrayV2I32Clamp:
368   case NVPTXISD::Suld2DArrayV2I64Clamp:
369   case NVPTXISD::Suld2DArrayV4I8Clamp:
370   case NVPTXISD::Suld2DArrayV4I16Clamp:
371   case NVPTXISD::Suld2DArrayV4I32Clamp:
372   case NVPTXISD::Suld3DI8Clamp:
373   case NVPTXISD::Suld3DI16Clamp:
374   case NVPTXISD::Suld3DI32Clamp:
375   case NVPTXISD::Suld3DI64Clamp:
376   case NVPTXISD::Suld3DV2I8Clamp:
377   case NVPTXISD::Suld3DV2I16Clamp:
378   case NVPTXISD::Suld3DV2I32Clamp:
379   case NVPTXISD::Suld3DV2I64Clamp:
380   case NVPTXISD::Suld3DV4I8Clamp:
381   case NVPTXISD::Suld3DV4I16Clamp:
382   case NVPTXISD::Suld3DV4I32Clamp:
383   case NVPTXISD::Suld1DI8Trap:
384   case NVPTXISD::Suld1DI16Trap:
385   case NVPTXISD::Suld1DI32Trap:
386   case NVPTXISD::Suld1DI64Trap:
387   case NVPTXISD::Suld1DV2I8Trap:
388   case NVPTXISD::Suld1DV2I16Trap:
389   case NVPTXISD::Suld1DV2I32Trap:
390   case NVPTXISD::Suld1DV2I64Trap:
391   case NVPTXISD::Suld1DV4I8Trap:
392   case NVPTXISD::Suld1DV4I16Trap:
393   case NVPTXISD::Suld1DV4I32Trap:
394   case NVPTXISD::Suld1DArrayI8Trap:
395   case NVPTXISD::Suld1DArrayI16Trap:
396   case NVPTXISD::Suld1DArrayI32Trap:
397   case NVPTXISD::Suld1DArrayI64Trap:
398   case NVPTXISD::Suld1DArrayV2I8Trap:
399   case NVPTXISD::Suld1DArrayV2I16Trap:
400   case NVPTXISD::Suld1DArrayV2I32Trap:
401   case NVPTXISD::Suld1DArrayV2I64Trap:
402   case NVPTXISD::Suld1DArrayV4I8Trap:
403   case NVPTXISD::Suld1DArrayV4I16Trap:
404   case NVPTXISD::Suld1DArrayV4I32Trap:
405   case NVPTXISD::Suld2DI8Trap:
406   case NVPTXISD::Suld2DI16Trap:
407   case NVPTXISD::Suld2DI32Trap:
408   case NVPTXISD::Suld2DI64Trap:
409   case NVPTXISD::Suld2DV2I8Trap:
410   case NVPTXISD::Suld2DV2I16Trap:
411   case NVPTXISD::Suld2DV2I32Trap:
412   case NVPTXISD::Suld2DV2I64Trap:
413   case NVPTXISD::Suld2DV4I8Trap:
414   case NVPTXISD::Suld2DV4I16Trap:
415   case NVPTXISD::Suld2DV4I32Trap:
416   case NVPTXISD::Suld2DArrayI8Trap:
417   case NVPTXISD::Suld2DArrayI16Trap:
418   case NVPTXISD::Suld2DArrayI32Trap:
419   case NVPTXISD::Suld2DArrayI64Trap:
420   case NVPTXISD::Suld2DArrayV2I8Trap:
421   case NVPTXISD::Suld2DArrayV2I16Trap:
422   case NVPTXISD::Suld2DArrayV2I32Trap:
423   case NVPTXISD::Suld2DArrayV2I64Trap:
424   case NVPTXISD::Suld2DArrayV4I8Trap:
425   case NVPTXISD::Suld2DArrayV4I16Trap:
426   case NVPTXISD::Suld2DArrayV4I32Trap:
427   case NVPTXISD::Suld3DI8Trap:
428   case NVPTXISD::Suld3DI16Trap:
429   case NVPTXISD::Suld3DI32Trap:
430   case NVPTXISD::Suld3DI64Trap:
431   case NVPTXISD::Suld3DV2I8Trap:
432   case NVPTXISD::Suld3DV2I16Trap:
433   case NVPTXISD::Suld3DV2I32Trap:
434   case NVPTXISD::Suld3DV2I64Trap:
435   case NVPTXISD::Suld3DV4I8Trap:
436   case NVPTXISD::Suld3DV4I16Trap:
437   case NVPTXISD::Suld3DV4I32Trap:
438   case NVPTXISD::Suld1DI8Zero:
439   case NVPTXISD::Suld1DI16Zero:
440   case NVPTXISD::Suld1DI32Zero:
441   case NVPTXISD::Suld1DI64Zero:
442   case NVPTXISD::Suld1DV2I8Zero:
443   case NVPTXISD::Suld1DV2I16Zero:
444   case NVPTXISD::Suld1DV2I32Zero:
445   case NVPTXISD::Suld1DV2I64Zero:
446   case NVPTXISD::Suld1DV4I8Zero:
447   case NVPTXISD::Suld1DV4I16Zero:
448   case NVPTXISD::Suld1DV4I32Zero:
449   case NVPTXISD::Suld1DArrayI8Zero:
450   case NVPTXISD::Suld1DArrayI16Zero:
451   case NVPTXISD::Suld1DArrayI32Zero:
452   case NVPTXISD::Suld1DArrayI64Zero:
453   case NVPTXISD::Suld1DArrayV2I8Zero:
454   case NVPTXISD::Suld1DArrayV2I16Zero:
455   case NVPTXISD::Suld1DArrayV2I32Zero:
456   case NVPTXISD::Suld1DArrayV2I64Zero:
457   case NVPTXISD::Suld1DArrayV4I8Zero:
458   case NVPTXISD::Suld1DArrayV4I16Zero:
459   case NVPTXISD::Suld1DArrayV4I32Zero:
460   case NVPTXISD::Suld2DI8Zero:
461   case NVPTXISD::Suld2DI16Zero:
462   case NVPTXISD::Suld2DI32Zero:
463   case NVPTXISD::Suld2DI64Zero:
464   case NVPTXISD::Suld2DV2I8Zero:
465   case NVPTXISD::Suld2DV2I16Zero:
466   case NVPTXISD::Suld2DV2I32Zero:
467   case NVPTXISD::Suld2DV2I64Zero:
468   case NVPTXISD::Suld2DV4I8Zero:
469   case NVPTXISD::Suld2DV4I16Zero:
470   case NVPTXISD::Suld2DV4I32Zero:
471   case NVPTXISD::Suld2DArrayI8Zero:
472   case NVPTXISD::Suld2DArrayI16Zero:
473   case NVPTXISD::Suld2DArrayI32Zero:
474   case NVPTXISD::Suld2DArrayI64Zero:
475   case NVPTXISD::Suld2DArrayV2I8Zero:
476   case NVPTXISD::Suld2DArrayV2I16Zero:
477   case NVPTXISD::Suld2DArrayV2I32Zero:
478   case NVPTXISD::Suld2DArrayV2I64Zero:
479   case NVPTXISD::Suld2DArrayV4I8Zero:
480   case NVPTXISD::Suld2DArrayV4I16Zero:
481   case NVPTXISD::Suld2DArrayV4I32Zero:
482   case NVPTXISD::Suld3DI8Zero:
483   case NVPTXISD::Suld3DI16Zero:
484   case NVPTXISD::Suld3DI32Zero:
485   case NVPTXISD::Suld3DI64Zero:
486   case NVPTXISD::Suld3DV2I8Zero:
487   case NVPTXISD::Suld3DV2I16Zero:
488   case NVPTXISD::Suld3DV2I32Zero:
489   case NVPTXISD::Suld3DV2I64Zero:
490   case NVPTXISD::Suld3DV4I8Zero:
491   case NVPTXISD::Suld3DV4I16Zero:
492   case NVPTXISD::Suld3DV4I32Zero:
493     ResNode = SelectSurfaceIntrinsic(N);
494     break;
495   case ISD::AND:
496   case ISD::SRA:
497   case ISD::SRL:
498     // Try to select BFE
499     ResNode = SelectBFE(N);
500     break;
501   case ISD::ADDRSPACECAST:
502     ResNode = SelectAddrSpaceCast(N);
503     break;
504   default:
505     break;
506   }
507   if (ResNode)
508     return ResNode;
509   return SelectCode(N);
510 }
511 
SelectIntrinsicChain(SDNode * N)512 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
513   unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
514   switch (IID) {
515   default:
516     return NULL;
517   case Intrinsic::nvvm_ldg_global_f:
518   case Intrinsic::nvvm_ldg_global_i:
519   case Intrinsic::nvvm_ldg_global_p:
520   case Intrinsic::nvvm_ldu_global_f:
521   case Intrinsic::nvvm_ldu_global_i:
522   case Intrinsic::nvvm_ldu_global_p:
523     return SelectLDGLDU(N);
524   }
525 }
526 
getCodeAddrSpace(MemSDNode * N)527 static unsigned int getCodeAddrSpace(MemSDNode *N) {
528   const Value *Src = N->getMemOperand()->getValue();
529 
530   if (!Src)
531     return NVPTX::PTXLdStInstCode::GENERIC;
532 
533   if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
534     switch (PT->getAddressSpace()) {
535     case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
536     case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
537     case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
538     case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
539     case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
540     case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
541     default: break;
542     }
543   }
544   return NVPTX::PTXLdStInstCode::GENERIC;
545 }
546 
SelectIntrinsicNoChain(SDNode * N)547 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
548   unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
549   switch (IID) {
550   default:
551     return nullptr;
552   case Intrinsic::nvvm_texsurf_handle_internal:
553     return SelectTexSurfHandle(N);
554   }
555 }
556 
SelectTexSurfHandle(SDNode * N)557 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
558   // Op 0 is the intrinsic ID
559   SDValue Wrapper = N->getOperand(1);
560   SDValue GlobalVal = Wrapper.getOperand(0);
561   return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
562                                 GlobalVal);
563 }
564 
SelectAddrSpaceCast(SDNode * N)565 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
566   SDValue Src = N->getOperand(0);
567   AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
568   unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
569   unsigned DstAddrSpace = CastN->getDestAddressSpace();
570 
571   assert(SrcAddrSpace != DstAddrSpace &&
572          "addrspacecast must be between different address spaces");
573 
574   if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
575     // Specific to generic
576     unsigned Opc;
577     switch (SrcAddrSpace) {
578     default: report_fatal_error("Bad address space in addrspacecast");
579     case ADDRESS_SPACE_GLOBAL:
580       Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
581       break;
582     case ADDRESS_SPACE_SHARED:
583       Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
584       break;
585     case ADDRESS_SPACE_CONST:
586       Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
587       break;
588     case ADDRESS_SPACE_LOCAL:
589       Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
590       break;
591     }
592     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
593   } else {
594     // Generic to specific
595     if (SrcAddrSpace != 0)
596       report_fatal_error("Cannot cast between two non-generic address spaces");
597     unsigned Opc;
598     switch (DstAddrSpace) {
599     default: report_fatal_error("Bad address space in addrspacecast");
600     case ADDRESS_SPACE_GLOBAL:
601       Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
602                          : NVPTX::cvta_to_global_yes;
603       break;
604     case ADDRESS_SPACE_SHARED:
605       Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
606                          : NVPTX::cvta_to_shared_yes;
607       break;
608     case ADDRESS_SPACE_CONST:
609       Opc =
610           TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
611       break;
612     case ADDRESS_SPACE_LOCAL:
613       Opc =
614           TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
615       break;
616     }
617     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
618   }
619 }
620 
SelectLoad(SDNode * N)621 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
622   SDLoc dl(N);
623   LoadSDNode *LD = cast<LoadSDNode>(N);
624   EVT LoadedVT = LD->getMemoryVT();
625   SDNode *NVPTXLD = nullptr;
626 
627   // do not support pre/post inc/dec
628   if (LD->isIndexed())
629     return nullptr;
630 
631   if (!LoadedVT.isSimple())
632     return nullptr;
633 
634   // Address Space Setting
635   unsigned int codeAddrSpace = getCodeAddrSpace(LD);
636 
637   // Volatile Setting
638   // - .volatile is only availalble for .global and .shared
639   bool isVolatile = LD->isVolatile();
640   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
641       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
642       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
643     isVolatile = false;
644 
645   // Vector Setting
646   MVT SimpleVT = LoadedVT.getSimpleVT();
647   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
648   if (SimpleVT.isVector()) {
649     unsigned num = SimpleVT.getVectorNumElements();
650     if (num == 2)
651       vecType = NVPTX::PTXLdStInstCode::V2;
652     else if (num == 4)
653       vecType = NVPTX::PTXLdStInstCode::V4;
654     else
655       return nullptr;
656   }
657 
658   // Type Setting: fromType + fromTypeWidth
659   //
660   // Sign   : ISD::SEXTLOAD
661   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
662   //          type is integer
663   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
664   MVT ScalarVT = SimpleVT.getScalarType();
665   // Read at least 8 bits (predicates are stored as 8-bit values)
666   unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
667   unsigned int fromType;
668   if ((LD->getExtensionType() == ISD::SEXTLOAD))
669     fromType = NVPTX::PTXLdStInstCode::Signed;
670   else if (ScalarVT.isFloatingPoint())
671     fromType = NVPTX::PTXLdStInstCode::Float;
672   else
673     fromType = NVPTX::PTXLdStInstCode::Unsigned;
674 
675   // Create the machine instruction DAG
676   SDValue Chain = N->getOperand(0);
677   SDValue N1 = N->getOperand(1);
678   SDValue Addr;
679   SDValue Offset, Base;
680   unsigned Opcode;
681   MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
682 
683   if (SelectDirectAddr(N1, Addr)) {
684     switch (TargetVT) {
685     case MVT::i8:
686       Opcode = NVPTX::LD_i8_avar;
687       break;
688     case MVT::i16:
689       Opcode = NVPTX::LD_i16_avar;
690       break;
691     case MVT::i32:
692       Opcode = NVPTX::LD_i32_avar;
693       break;
694     case MVT::i64:
695       Opcode = NVPTX::LD_i64_avar;
696       break;
697     case MVT::f32:
698       Opcode = NVPTX::LD_f32_avar;
699       break;
700     case MVT::f64:
701       Opcode = NVPTX::LD_f64_avar;
702       break;
703     default:
704       return nullptr;
705     }
706     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
707                       getI32Imm(vecType), getI32Imm(fromType),
708                       getI32Imm(fromTypeWidth), Addr, Chain };
709     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
710   } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
711                           : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
712     switch (TargetVT) {
713     case MVT::i8:
714       Opcode = NVPTX::LD_i8_asi;
715       break;
716     case MVT::i16:
717       Opcode = NVPTX::LD_i16_asi;
718       break;
719     case MVT::i32:
720       Opcode = NVPTX::LD_i32_asi;
721       break;
722     case MVT::i64:
723       Opcode = NVPTX::LD_i64_asi;
724       break;
725     case MVT::f32:
726       Opcode = NVPTX::LD_f32_asi;
727       break;
728     case MVT::f64:
729       Opcode = NVPTX::LD_f64_asi;
730       break;
731     default:
732       return nullptr;
733     }
734     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
735                       getI32Imm(vecType), getI32Imm(fromType),
736                       getI32Imm(fromTypeWidth), Base, Offset, Chain };
737     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
738   } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
739                           : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
740     if (TM.is64Bit()) {
741       switch (TargetVT) {
742       case MVT::i8:
743         Opcode = NVPTX::LD_i8_ari_64;
744         break;
745       case MVT::i16:
746         Opcode = NVPTX::LD_i16_ari_64;
747         break;
748       case MVT::i32:
749         Opcode = NVPTX::LD_i32_ari_64;
750         break;
751       case MVT::i64:
752         Opcode = NVPTX::LD_i64_ari_64;
753         break;
754       case MVT::f32:
755         Opcode = NVPTX::LD_f32_ari_64;
756         break;
757       case MVT::f64:
758         Opcode = NVPTX::LD_f64_ari_64;
759         break;
760       default:
761         return nullptr;
762       }
763     } else {
764       switch (TargetVT) {
765       case MVT::i8:
766         Opcode = NVPTX::LD_i8_ari;
767         break;
768       case MVT::i16:
769         Opcode = NVPTX::LD_i16_ari;
770         break;
771       case MVT::i32:
772         Opcode = NVPTX::LD_i32_ari;
773         break;
774       case MVT::i64:
775         Opcode = NVPTX::LD_i64_ari;
776         break;
777       case MVT::f32:
778         Opcode = NVPTX::LD_f32_ari;
779         break;
780       case MVT::f64:
781         Opcode = NVPTX::LD_f64_ari;
782         break;
783       default:
784         return nullptr;
785       }
786     }
787     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
788                       getI32Imm(vecType), getI32Imm(fromType),
789                       getI32Imm(fromTypeWidth), Base, Offset, Chain };
790     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
791   } else {
792     if (TM.is64Bit()) {
793       switch (TargetVT) {
794       case MVT::i8:
795         Opcode = NVPTX::LD_i8_areg_64;
796         break;
797       case MVT::i16:
798         Opcode = NVPTX::LD_i16_areg_64;
799         break;
800       case MVT::i32:
801         Opcode = NVPTX::LD_i32_areg_64;
802         break;
803       case MVT::i64:
804         Opcode = NVPTX::LD_i64_areg_64;
805         break;
806       case MVT::f32:
807         Opcode = NVPTX::LD_f32_areg_64;
808         break;
809       case MVT::f64:
810         Opcode = NVPTX::LD_f64_areg_64;
811         break;
812       default:
813         return nullptr;
814       }
815     } else {
816       switch (TargetVT) {
817       case MVT::i8:
818         Opcode = NVPTX::LD_i8_areg;
819         break;
820       case MVT::i16:
821         Opcode = NVPTX::LD_i16_areg;
822         break;
823       case MVT::i32:
824         Opcode = NVPTX::LD_i32_areg;
825         break;
826       case MVT::i64:
827         Opcode = NVPTX::LD_i64_areg;
828         break;
829       case MVT::f32:
830         Opcode = NVPTX::LD_f32_areg;
831         break;
832       case MVT::f64:
833         Opcode = NVPTX::LD_f64_areg;
834         break;
835       default:
836         return nullptr;
837       }
838     }
839     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
840                       getI32Imm(vecType), getI32Imm(fromType),
841                       getI32Imm(fromTypeWidth), N1, Chain };
842     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
843   }
844 
845   if (NVPTXLD) {
846     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
847     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
848     cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
849   }
850 
851   return NVPTXLD;
852 }
853 
SelectLoadVector(SDNode * N)854 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
855 
856   SDValue Chain = N->getOperand(0);
857   SDValue Op1 = N->getOperand(1);
858   SDValue Addr, Offset, Base;
859   unsigned Opcode;
860   SDLoc DL(N);
861   SDNode *LD;
862   MemSDNode *MemSD = cast<MemSDNode>(N);
863   EVT LoadedVT = MemSD->getMemoryVT();
864 
865   if (!LoadedVT.isSimple())
866     return nullptr;
867 
868   // Address Space Setting
869   unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
870 
871   // Volatile Setting
872   // - .volatile is only availalble for .global and .shared
873   bool IsVolatile = MemSD->isVolatile();
874   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
875       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
876       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
877     IsVolatile = false;
878 
879   // Vector Setting
880   MVT SimpleVT = LoadedVT.getSimpleVT();
881 
882   // Type Setting: fromType + fromTypeWidth
883   //
884   // Sign   : ISD::SEXTLOAD
885   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
886   //          type is integer
887   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
888   MVT ScalarVT = SimpleVT.getScalarType();
889   // Read at least 8 bits (predicates are stored as 8-bit values)
890   unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
891   unsigned int FromType;
892   // The last operand holds the original LoadSDNode::getExtensionType() value
893   unsigned ExtensionType = cast<ConstantSDNode>(
894       N->getOperand(N->getNumOperands() - 1))->getZExtValue();
895   if (ExtensionType == ISD::SEXTLOAD)
896     FromType = NVPTX::PTXLdStInstCode::Signed;
897   else if (ScalarVT.isFloatingPoint())
898     FromType = NVPTX::PTXLdStInstCode::Float;
899   else
900     FromType = NVPTX::PTXLdStInstCode::Unsigned;
901 
902   unsigned VecType;
903 
904   switch (N->getOpcode()) {
905   case NVPTXISD::LoadV2:
906     VecType = NVPTX::PTXLdStInstCode::V2;
907     break;
908   case NVPTXISD::LoadV4:
909     VecType = NVPTX::PTXLdStInstCode::V4;
910     break;
911   default:
912     return nullptr;
913   }
914 
915   EVT EltVT = N->getValueType(0);
916 
917   if (SelectDirectAddr(Op1, Addr)) {
918     switch (N->getOpcode()) {
919     default:
920       return nullptr;
921     case NVPTXISD::LoadV2:
922       switch (EltVT.getSimpleVT().SimpleTy) {
923       default:
924         return nullptr;
925       case MVT::i8:
926         Opcode = NVPTX::LDV_i8_v2_avar;
927         break;
928       case MVT::i16:
929         Opcode = NVPTX::LDV_i16_v2_avar;
930         break;
931       case MVT::i32:
932         Opcode = NVPTX::LDV_i32_v2_avar;
933         break;
934       case MVT::i64:
935         Opcode = NVPTX::LDV_i64_v2_avar;
936         break;
937       case MVT::f32:
938         Opcode = NVPTX::LDV_f32_v2_avar;
939         break;
940       case MVT::f64:
941         Opcode = NVPTX::LDV_f64_v2_avar;
942         break;
943       }
944       break;
945     case NVPTXISD::LoadV4:
946       switch (EltVT.getSimpleVT().SimpleTy) {
947       default:
948         return nullptr;
949       case MVT::i8:
950         Opcode = NVPTX::LDV_i8_v4_avar;
951         break;
952       case MVT::i16:
953         Opcode = NVPTX::LDV_i16_v4_avar;
954         break;
955       case MVT::i32:
956         Opcode = NVPTX::LDV_i32_v4_avar;
957         break;
958       case MVT::f32:
959         Opcode = NVPTX::LDV_f32_v4_avar;
960         break;
961       }
962       break;
963     }
964 
965     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
966                       getI32Imm(VecType), getI32Imm(FromType),
967                       getI32Imm(FromTypeWidth), Addr, Chain };
968     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
969   } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
970                           : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
971     switch (N->getOpcode()) {
972     default:
973       return nullptr;
974     case NVPTXISD::LoadV2:
975       switch (EltVT.getSimpleVT().SimpleTy) {
976       default:
977         return nullptr;
978       case MVT::i8:
979         Opcode = NVPTX::LDV_i8_v2_asi;
980         break;
981       case MVT::i16:
982         Opcode = NVPTX::LDV_i16_v2_asi;
983         break;
984       case MVT::i32:
985         Opcode = NVPTX::LDV_i32_v2_asi;
986         break;
987       case MVT::i64:
988         Opcode = NVPTX::LDV_i64_v2_asi;
989         break;
990       case MVT::f32:
991         Opcode = NVPTX::LDV_f32_v2_asi;
992         break;
993       case MVT::f64:
994         Opcode = NVPTX::LDV_f64_v2_asi;
995         break;
996       }
997       break;
998     case NVPTXISD::LoadV4:
999       switch (EltVT.getSimpleVT().SimpleTy) {
1000       default:
1001         return nullptr;
1002       case MVT::i8:
1003         Opcode = NVPTX::LDV_i8_v4_asi;
1004         break;
1005       case MVT::i16:
1006         Opcode = NVPTX::LDV_i16_v4_asi;
1007         break;
1008       case MVT::i32:
1009         Opcode = NVPTX::LDV_i32_v4_asi;
1010         break;
1011       case MVT::f32:
1012         Opcode = NVPTX::LDV_f32_v4_asi;
1013         break;
1014       }
1015       break;
1016     }
1017 
1018     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1019                       getI32Imm(VecType), getI32Imm(FromType),
1020                       getI32Imm(FromTypeWidth), Base, Offset, Chain };
1021     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1022   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1023                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1024     if (TM.is64Bit()) {
1025       switch (N->getOpcode()) {
1026       default:
1027         return nullptr;
1028       case NVPTXISD::LoadV2:
1029         switch (EltVT.getSimpleVT().SimpleTy) {
1030         default:
1031           return nullptr;
1032         case MVT::i8:
1033           Opcode = NVPTX::LDV_i8_v2_ari_64;
1034           break;
1035         case MVT::i16:
1036           Opcode = NVPTX::LDV_i16_v2_ari_64;
1037           break;
1038         case MVT::i32:
1039           Opcode = NVPTX::LDV_i32_v2_ari_64;
1040           break;
1041         case MVT::i64:
1042           Opcode = NVPTX::LDV_i64_v2_ari_64;
1043           break;
1044         case MVT::f32:
1045           Opcode = NVPTX::LDV_f32_v2_ari_64;
1046           break;
1047         case MVT::f64:
1048           Opcode = NVPTX::LDV_f64_v2_ari_64;
1049           break;
1050         }
1051         break;
1052       case NVPTXISD::LoadV4:
1053         switch (EltVT.getSimpleVT().SimpleTy) {
1054         default:
1055           return nullptr;
1056         case MVT::i8:
1057           Opcode = NVPTX::LDV_i8_v4_ari_64;
1058           break;
1059         case MVT::i16:
1060           Opcode = NVPTX::LDV_i16_v4_ari_64;
1061           break;
1062         case MVT::i32:
1063           Opcode = NVPTX::LDV_i32_v4_ari_64;
1064           break;
1065         case MVT::f32:
1066           Opcode = NVPTX::LDV_f32_v4_ari_64;
1067           break;
1068         }
1069         break;
1070       }
1071     } else {
1072       switch (N->getOpcode()) {
1073       default:
1074         return nullptr;
1075       case NVPTXISD::LoadV2:
1076         switch (EltVT.getSimpleVT().SimpleTy) {
1077         default:
1078           return nullptr;
1079         case MVT::i8:
1080           Opcode = NVPTX::LDV_i8_v2_ari;
1081           break;
1082         case MVT::i16:
1083           Opcode = NVPTX::LDV_i16_v2_ari;
1084           break;
1085         case MVT::i32:
1086           Opcode = NVPTX::LDV_i32_v2_ari;
1087           break;
1088         case MVT::i64:
1089           Opcode = NVPTX::LDV_i64_v2_ari;
1090           break;
1091         case MVT::f32:
1092           Opcode = NVPTX::LDV_f32_v2_ari;
1093           break;
1094         case MVT::f64:
1095           Opcode = NVPTX::LDV_f64_v2_ari;
1096           break;
1097         }
1098         break;
1099       case NVPTXISD::LoadV4:
1100         switch (EltVT.getSimpleVT().SimpleTy) {
1101         default:
1102           return nullptr;
1103         case MVT::i8:
1104           Opcode = NVPTX::LDV_i8_v4_ari;
1105           break;
1106         case MVT::i16:
1107           Opcode = NVPTX::LDV_i16_v4_ari;
1108           break;
1109         case MVT::i32:
1110           Opcode = NVPTX::LDV_i32_v4_ari;
1111           break;
1112         case MVT::f32:
1113           Opcode = NVPTX::LDV_f32_v4_ari;
1114           break;
1115         }
1116         break;
1117       }
1118     }
1119 
1120     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1121                       getI32Imm(VecType), getI32Imm(FromType),
1122                       getI32Imm(FromTypeWidth), Base, Offset, Chain };
1123 
1124     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1125   } else {
1126     if (TM.is64Bit()) {
1127       switch (N->getOpcode()) {
1128       default:
1129         return nullptr;
1130       case NVPTXISD::LoadV2:
1131         switch (EltVT.getSimpleVT().SimpleTy) {
1132         default:
1133           return nullptr;
1134         case MVT::i8:
1135           Opcode = NVPTX::LDV_i8_v2_areg_64;
1136           break;
1137         case MVT::i16:
1138           Opcode = NVPTX::LDV_i16_v2_areg_64;
1139           break;
1140         case MVT::i32:
1141           Opcode = NVPTX::LDV_i32_v2_areg_64;
1142           break;
1143         case MVT::i64:
1144           Opcode = NVPTX::LDV_i64_v2_areg_64;
1145           break;
1146         case MVT::f32:
1147           Opcode = NVPTX::LDV_f32_v2_areg_64;
1148           break;
1149         case MVT::f64:
1150           Opcode = NVPTX::LDV_f64_v2_areg_64;
1151           break;
1152         }
1153         break;
1154       case NVPTXISD::LoadV4:
1155         switch (EltVT.getSimpleVT().SimpleTy) {
1156         default:
1157           return nullptr;
1158         case MVT::i8:
1159           Opcode = NVPTX::LDV_i8_v4_areg_64;
1160           break;
1161         case MVT::i16:
1162           Opcode = NVPTX::LDV_i16_v4_areg_64;
1163           break;
1164         case MVT::i32:
1165           Opcode = NVPTX::LDV_i32_v4_areg_64;
1166           break;
1167         case MVT::f32:
1168           Opcode = NVPTX::LDV_f32_v4_areg_64;
1169           break;
1170         }
1171         break;
1172       }
1173     } else {
1174       switch (N->getOpcode()) {
1175       default:
1176         return nullptr;
1177       case NVPTXISD::LoadV2:
1178         switch (EltVT.getSimpleVT().SimpleTy) {
1179         default:
1180           return nullptr;
1181         case MVT::i8:
1182           Opcode = NVPTX::LDV_i8_v2_areg;
1183           break;
1184         case MVT::i16:
1185           Opcode = NVPTX::LDV_i16_v2_areg;
1186           break;
1187         case MVT::i32:
1188           Opcode = NVPTX::LDV_i32_v2_areg;
1189           break;
1190         case MVT::i64:
1191           Opcode = NVPTX::LDV_i64_v2_areg;
1192           break;
1193         case MVT::f32:
1194           Opcode = NVPTX::LDV_f32_v2_areg;
1195           break;
1196         case MVT::f64:
1197           Opcode = NVPTX::LDV_f64_v2_areg;
1198           break;
1199         }
1200         break;
1201       case NVPTXISD::LoadV4:
1202         switch (EltVT.getSimpleVT().SimpleTy) {
1203         default:
1204           return nullptr;
1205         case MVT::i8:
1206           Opcode = NVPTX::LDV_i8_v4_areg;
1207           break;
1208         case MVT::i16:
1209           Opcode = NVPTX::LDV_i16_v4_areg;
1210           break;
1211         case MVT::i32:
1212           Opcode = NVPTX::LDV_i32_v4_areg;
1213           break;
1214         case MVT::f32:
1215           Opcode = NVPTX::LDV_f32_v4_areg;
1216           break;
1217         }
1218         break;
1219       }
1220     }
1221 
1222     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1223                       getI32Imm(VecType), getI32Imm(FromType),
1224                       getI32Imm(FromTypeWidth), Op1, Chain };
1225     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1226   }
1227 
1228   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1229   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1230   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1231 
1232   return LD;
1233 }
1234 
SelectLDGLDU(SDNode * N)1235 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1236 
1237   SDValue Chain = N->getOperand(0);
1238   SDValue Op1;
1239   MemSDNode *Mem;
1240   bool IsLDG = true;
1241 
1242   // If this is an LDG intrinsic, the address is the third operand. Its its an
1243   // LDG/LDU SD node (from custom vector handling), then its the second operand
1244   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1245     Op1 = N->getOperand(2);
1246     Mem = cast<MemIntrinsicSDNode>(N);
1247     unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1248     switch (IID) {
1249     default:
1250       return NULL;
1251     case Intrinsic::nvvm_ldg_global_f:
1252     case Intrinsic::nvvm_ldg_global_i:
1253     case Intrinsic::nvvm_ldg_global_p:
1254       IsLDG = true;
1255       break;
1256     case Intrinsic::nvvm_ldu_global_f:
1257     case Intrinsic::nvvm_ldu_global_i:
1258     case Intrinsic::nvvm_ldu_global_p:
1259       IsLDG = false;
1260       break;
1261     }
1262   } else {
1263     Op1 = N->getOperand(1);
1264     Mem = cast<MemSDNode>(N);
1265   }
1266 
1267   unsigned Opcode;
1268   SDLoc DL(N);
1269   SDNode *LD;
1270   SDValue Base, Offset, Addr;
1271 
1272   EVT EltVT = Mem->getMemoryVT();
1273   if (EltVT.isVector()) {
1274     EltVT = EltVT.getVectorElementType();
1275   }
1276 
1277   if (SelectDirectAddr(Op1, Addr)) {
1278     switch (N->getOpcode()) {
1279     default:
1280       return nullptr;
1281     case ISD::INTRINSIC_W_CHAIN:
1282       if (IsLDG) {
1283         switch (EltVT.getSimpleVT().SimpleTy) {
1284         default:
1285           return nullptr;
1286         case MVT::i8:
1287           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1288           break;
1289         case MVT::i16:
1290           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1291           break;
1292         case MVT::i32:
1293           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1294           break;
1295         case MVT::i64:
1296           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1297           break;
1298         case MVT::f32:
1299           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1300           break;
1301         case MVT::f64:
1302           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1303           break;
1304         }
1305       } else {
1306         switch (EltVT.getSimpleVT().SimpleTy) {
1307         default:
1308           return nullptr;
1309         case MVT::i8:
1310           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1311           break;
1312         case MVT::i16:
1313           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1314           break;
1315         case MVT::i32:
1316           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1317           break;
1318         case MVT::i64:
1319           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1320           break;
1321         case MVT::f32:
1322           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1323           break;
1324         case MVT::f64:
1325           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1326           break;
1327         }
1328       }
1329       break;
1330     case NVPTXISD::LDGV2:
1331       switch (EltVT.getSimpleVT().SimpleTy) {
1332       default:
1333         return nullptr;
1334       case MVT::i8:
1335         Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1336         break;
1337       case MVT::i16:
1338         Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1339         break;
1340       case MVT::i32:
1341         Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1342         break;
1343       case MVT::i64:
1344         Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1345         break;
1346       case MVT::f32:
1347         Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1348         break;
1349       case MVT::f64:
1350         Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1351         break;
1352       }
1353       break;
1354     case NVPTXISD::LDUV2:
1355       switch (EltVT.getSimpleVT().SimpleTy) {
1356       default:
1357         return nullptr;
1358       case MVT::i8:
1359         Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1360         break;
1361       case MVT::i16:
1362         Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1363         break;
1364       case MVT::i32:
1365         Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1366         break;
1367       case MVT::i64:
1368         Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1369         break;
1370       case MVT::f32:
1371         Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1372         break;
1373       case MVT::f64:
1374         Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1375         break;
1376       }
1377       break;
1378     case NVPTXISD::LDGV4:
1379       switch (EltVT.getSimpleVT().SimpleTy) {
1380       default:
1381         return nullptr;
1382       case MVT::i8:
1383         Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1384         break;
1385       case MVT::i16:
1386         Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1387         break;
1388       case MVT::i32:
1389         Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1390         break;
1391       case MVT::f32:
1392         Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1393         break;
1394       }
1395       break;
1396     case NVPTXISD::LDUV4:
1397       switch (EltVT.getSimpleVT().SimpleTy) {
1398       default:
1399         return nullptr;
1400       case MVT::i8:
1401         Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1402         break;
1403       case MVT::i16:
1404         Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1405         break;
1406       case MVT::i32:
1407         Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1408         break;
1409       case MVT::f32:
1410         Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1411         break;
1412       }
1413       break;
1414     }
1415 
1416     SDValue Ops[] = { Addr, Chain };
1417     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1418   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1419                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1420     if (TM.is64Bit()) {
1421       switch (N->getOpcode()) {
1422       default:
1423         return nullptr;
1424       case ISD::INTRINSIC_W_CHAIN:
1425         if (IsLDG) {
1426           switch (EltVT.getSimpleVT().SimpleTy) {
1427           default:
1428             return nullptr;
1429           case MVT::i8:
1430             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1431             break;
1432           case MVT::i16:
1433             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1434             break;
1435           case MVT::i32:
1436             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1437             break;
1438           case MVT::i64:
1439             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1440             break;
1441           case MVT::f32:
1442             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1443             break;
1444           case MVT::f64:
1445             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1446             break;
1447           }
1448         } else {
1449           switch (EltVT.getSimpleVT().SimpleTy) {
1450           default:
1451             return nullptr;
1452           case MVT::i8:
1453             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1454             break;
1455           case MVT::i16:
1456             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1457             break;
1458           case MVT::i32:
1459             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1460             break;
1461           case MVT::i64:
1462             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1463             break;
1464           case MVT::f32:
1465             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1466             break;
1467           case MVT::f64:
1468             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1469             break;
1470           }
1471         }
1472         break;
1473       case NVPTXISD::LDGV2:
1474         switch (EltVT.getSimpleVT().SimpleTy) {
1475         default:
1476           return nullptr;
1477         case MVT::i8:
1478           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1479           break;
1480         case MVT::i16:
1481           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1482           break;
1483         case MVT::i32:
1484           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1485           break;
1486         case MVT::i64:
1487           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1488           break;
1489         case MVT::f32:
1490           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1491           break;
1492         case MVT::f64:
1493           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1494           break;
1495         }
1496         break;
1497       case NVPTXISD::LDUV2:
1498         switch (EltVT.getSimpleVT().SimpleTy) {
1499         default:
1500           return nullptr;
1501         case MVT::i8:
1502           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1503           break;
1504         case MVT::i16:
1505           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1506           break;
1507         case MVT::i32:
1508           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1509           break;
1510         case MVT::i64:
1511           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1512           break;
1513         case MVT::f32:
1514           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1515           break;
1516         case MVT::f64:
1517           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1518           break;
1519         }
1520         break;
1521       case NVPTXISD::LDGV4:
1522         switch (EltVT.getSimpleVT().SimpleTy) {
1523         default:
1524           return nullptr;
1525         case MVT::i8:
1526           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1527           break;
1528         case MVT::i16:
1529           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1530           break;
1531         case MVT::i32:
1532           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1533           break;
1534         case MVT::f32:
1535           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1536           break;
1537         }
1538         break;
1539       case NVPTXISD::LDUV4:
1540         switch (EltVT.getSimpleVT().SimpleTy) {
1541         default:
1542           return nullptr;
1543         case MVT::i8:
1544           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1545           break;
1546         case MVT::i16:
1547           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1548           break;
1549         case MVT::i32:
1550           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1551           break;
1552         case MVT::f32:
1553           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1554           break;
1555         }
1556         break;
1557       }
1558     } else {
1559       switch (N->getOpcode()) {
1560       default:
1561         return nullptr;
1562       case ISD::INTRINSIC_W_CHAIN:
1563         if (IsLDG) {
1564           switch (EltVT.getSimpleVT().SimpleTy) {
1565           default:
1566             return nullptr;
1567           case MVT::i8:
1568             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1569             break;
1570           case MVT::i16:
1571             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1572             break;
1573           case MVT::i32:
1574             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1575             break;
1576           case MVT::i64:
1577             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1578             break;
1579           case MVT::f32:
1580             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1581             break;
1582           case MVT::f64:
1583             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1584             break;
1585           }
1586         } else {
1587           switch (EltVT.getSimpleVT().SimpleTy) {
1588           default:
1589             return nullptr;
1590           case MVT::i8:
1591             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1592             break;
1593           case MVT::i16:
1594             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1595             break;
1596           case MVT::i32:
1597             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1598             break;
1599           case MVT::i64:
1600             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1601             break;
1602           case MVT::f32:
1603             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1604             break;
1605           case MVT::f64:
1606             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1607             break;
1608           }
1609         }
1610         break;
1611       case NVPTXISD::LDGV2:
1612         switch (EltVT.getSimpleVT().SimpleTy) {
1613         default:
1614           return nullptr;
1615         case MVT::i8:
1616           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1617           break;
1618         case MVT::i16:
1619           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1620           break;
1621         case MVT::i32:
1622           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1623           break;
1624         case MVT::i64:
1625           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1626           break;
1627         case MVT::f32:
1628           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1629           break;
1630         case MVT::f64:
1631           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1632           break;
1633         }
1634         break;
1635       case NVPTXISD::LDUV2:
1636         switch (EltVT.getSimpleVT().SimpleTy) {
1637         default:
1638           return nullptr;
1639         case MVT::i8:
1640           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1641           break;
1642         case MVT::i16:
1643           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1644           break;
1645         case MVT::i32:
1646           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1647           break;
1648         case MVT::i64:
1649           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1650           break;
1651         case MVT::f32:
1652           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1653           break;
1654         case MVT::f64:
1655           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1656           break;
1657         }
1658         break;
1659       case NVPTXISD::LDGV4:
1660         switch (EltVT.getSimpleVT().SimpleTy) {
1661         default:
1662           return nullptr;
1663         case MVT::i8:
1664           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1665           break;
1666         case MVT::i16:
1667           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1668           break;
1669         case MVT::i32:
1670           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1671           break;
1672         case MVT::f32:
1673           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1674           break;
1675         }
1676         break;
1677       case NVPTXISD::LDUV4:
1678         switch (EltVT.getSimpleVT().SimpleTy) {
1679         default:
1680           return nullptr;
1681         case MVT::i8:
1682           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1683           break;
1684         case MVT::i16:
1685           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1686           break;
1687         case MVT::i32:
1688           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1689           break;
1690         case MVT::f32:
1691           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1692           break;
1693         }
1694         break;
1695       }
1696     }
1697 
1698     SDValue Ops[] = { Base, Offset, Chain };
1699 
1700     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1701   } else {
1702     if (TM.is64Bit()) {
1703       switch (N->getOpcode()) {
1704       default:
1705         return nullptr;
1706       case ISD::INTRINSIC_W_CHAIN:
1707         if (IsLDG) {
1708           switch (EltVT.getSimpleVT().SimpleTy) {
1709           default:
1710             return nullptr;
1711           case MVT::i8:
1712             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1713             break;
1714           case MVT::i16:
1715             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1716             break;
1717           case MVT::i32:
1718             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1719             break;
1720           case MVT::i64:
1721             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1722             break;
1723           case MVT::f32:
1724             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1725             break;
1726           case MVT::f64:
1727             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1728             break;
1729           }
1730         } else {
1731           switch (EltVT.getSimpleVT().SimpleTy) {
1732           default:
1733             return nullptr;
1734           case MVT::i8:
1735             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1736             break;
1737           case MVT::i16:
1738             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1739             break;
1740           case MVT::i32:
1741             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1742             break;
1743           case MVT::i64:
1744             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1745             break;
1746           case MVT::f32:
1747             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1748             break;
1749           case MVT::f64:
1750             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1751             break;
1752           }
1753         }
1754         break;
1755       case NVPTXISD::LDGV2:
1756         switch (EltVT.getSimpleVT().SimpleTy) {
1757         default:
1758           return nullptr;
1759         case MVT::i8:
1760           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1761           break;
1762         case MVT::i16:
1763           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1764           break;
1765         case MVT::i32:
1766           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1767           break;
1768         case MVT::i64:
1769           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1770           break;
1771         case MVT::f32:
1772           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1773           break;
1774         case MVT::f64:
1775           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1776           break;
1777         }
1778         break;
1779       case NVPTXISD::LDUV2:
1780         switch (EltVT.getSimpleVT().SimpleTy) {
1781         default:
1782           return nullptr;
1783         case MVT::i8:
1784           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1785           break;
1786         case MVT::i16:
1787           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1788           break;
1789         case MVT::i32:
1790           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1791           break;
1792         case MVT::i64:
1793           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1794           break;
1795         case MVT::f32:
1796           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1797           break;
1798         case MVT::f64:
1799           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1800           break;
1801         }
1802         break;
1803       case NVPTXISD::LDGV4:
1804         switch (EltVT.getSimpleVT().SimpleTy) {
1805         default:
1806           return nullptr;
1807         case MVT::i8:
1808           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1809           break;
1810         case MVT::i16:
1811           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1812           break;
1813         case MVT::i32:
1814           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1815           break;
1816         case MVT::f32:
1817           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1818           break;
1819         }
1820         break;
1821       case NVPTXISD::LDUV4:
1822         switch (EltVT.getSimpleVT().SimpleTy) {
1823         default:
1824           return nullptr;
1825         case MVT::i8:
1826           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1827           break;
1828         case MVT::i16:
1829           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1830           break;
1831         case MVT::i32:
1832           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1833           break;
1834         case MVT::f32:
1835           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1836           break;
1837         }
1838         break;
1839       }
1840     } else {
1841       switch (N->getOpcode()) {
1842       default:
1843         return nullptr;
1844       case ISD::INTRINSIC_W_CHAIN:
1845         if (IsLDG) {
1846           switch (EltVT.getSimpleVT().SimpleTy) {
1847           default:
1848             return nullptr;
1849           case MVT::i8:
1850             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1851             break;
1852           case MVT::i16:
1853             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1854             break;
1855           case MVT::i32:
1856             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1857             break;
1858           case MVT::i64:
1859             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1860             break;
1861           case MVT::f32:
1862             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1863             break;
1864           case MVT::f64:
1865             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1866             break;
1867           }
1868         } else {
1869           switch (EltVT.getSimpleVT().SimpleTy) {
1870           default:
1871             return nullptr;
1872           case MVT::i8:
1873             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1874             break;
1875           case MVT::i16:
1876             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1877             break;
1878           case MVT::i32:
1879             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1880             break;
1881           case MVT::i64:
1882             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1883             break;
1884           case MVT::f32:
1885             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1886             break;
1887           case MVT::f64:
1888             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1889             break;
1890           }
1891         }
1892         break;
1893       case NVPTXISD::LDGV2:
1894         switch (EltVT.getSimpleVT().SimpleTy) {
1895         default:
1896           return nullptr;
1897         case MVT::i8:
1898           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1899           break;
1900         case MVT::i16:
1901           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1902           break;
1903         case MVT::i32:
1904           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1905           break;
1906         case MVT::i64:
1907           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1908           break;
1909         case MVT::f32:
1910           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1911           break;
1912         case MVT::f64:
1913           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1914           break;
1915         }
1916         break;
1917       case NVPTXISD::LDUV2:
1918         switch (EltVT.getSimpleVT().SimpleTy) {
1919         default:
1920           return nullptr;
1921         case MVT::i8:
1922           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1923           break;
1924         case MVT::i16:
1925           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1926           break;
1927         case MVT::i32:
1928           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1929           break;
1930         case MVT::i64:
1931           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1932           break;
1933         case MVT::f32:
1934           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1935           break;
1936         case MVT::f64:
1937           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1938           break;
1939         }
1940         break;
1941       case NVPTXISD::LDGV4:
1942         switch (EltVT.getSimpleVT().SimpleTy) {
1943         default:
1944           return nullptr;
1945         case MVT::i8:
1946           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1947           break;
1948         case MVT::i16:
1949           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1950           break;
1951         case MVT::i32:
1952           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1953           break;
1954         case MVT::f32:
1955           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1956           break;
1957         }
1958         break;
1959       case NVPTXISD::LDUV4:
1960         switch (EltVT.getSimpleVT().SimpleTy) {
1961         default:
1962           return nullptr;
1963         case MVT::i8:
1964           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1965           break;
1966         case MVT::i16:
1967           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1968           break;
1969         case MVT::i32:
1970           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1971           break;
1972         case MVT::f32:
1973           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1974           break;
1975         }
1976         break;
1977       }
1978     }
1979 
1980     SDValue Ops[] = { Op1, Chain };
1981     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1982   }
1983 
1984   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1985   MemRefs0[0] = Mem->getMemOperand();
1986   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1987 
1988   return LD;
1989 }
1990 
SelectStore(SDNode * N)1991 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1992   SDLoc dl(N);
1993   StoreSDNode *ST = cast<StoreSDNode>(N);
1994   EVT StoreVT = ST->getMemoryVT();
1995   SDNode *NVPTXST = nullptr;
1996 
1997   // do not support pre/post inc/dec
1998   if (ST->isIndexed())
1999     return nullptr;
2000 
2001   if (!StoreVT.isSimple())
2002     return nullptr;
2003 
2004   // Address Space Setting
2005   unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2006 
2007   // Volatile Setting
2008   // - .volatile is only availalble for .global and .shared
2009   bool isVolatile = ST->isVolatile();
2010   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2011       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2012       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2013     isVolatile = false;
2014 
2015   // Vector Setting
2016   MVT SimpleVT = StoreVT.getSimpleVT();
2017   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2018   if (SimpleVT.isVector()) {
2019     unsigned num = SimpleVT.getVectorNumElements();
2020     if (num == 2)
2021       vecType = NVPTX::PTXLdStInstCode::V2;
2022     else if (num == 4)
2023       vecType = NVPTX::PTXLdStInstCode::V4;
2024     else
2025       return nullptr;
2026   }
2027 
2028   // Type Setting: toType + toTypeWidth
2029   // - for integer type, always use 'u'
2030   //
2031   MVT ScalarVT = SimpleVT.getScalarType();
2032   unsigned toTypeWidth = ScalarVT.getSizeInBits();
2033   unsigned int toType;
2034   if (ScalarVT.isFloatingPoint())
2035     toType = NVPTX::PTXLdStInstCode::Float;
2036   else
2037     toType = NVPTX::PTXLdStInstCode::Unsigned;
2038 
2039   // Create the machine instruction DAG
2040   SDValue Chain = N->getOperand(0);
2041   SDValue N1 = N->getOperand(1);
2042   SDValue N2 = N->getOperand(2);
2043   SDValue Addr;
2044   SDValue Offset, Base;
2045   unsigned Opcode;
2046   MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2047 
2048   if (SelectDirectAddr(N2, Addr)) {
2049     switch (SourceVT) {
2050     case MVT::i8:
2051       Opcode = NVPTX::ST_i8_avar;
2052       break;
2053     case MVT::i16:
2054       Opcode = NVPTX::ST_i16_avar;
2055       break;
2056     case MVT::i32:
2057       Opcode = NVPTX::ST_i32_avar;
2058       break;
2059     case MVT::i64:
2060       Opcode = NVPTX::ST_i64_avar;
2061       break;
2062     case MVT::f32:
2063       Opcode = NVPTX::ST_f32_avar;
2064       break;
2065     case MVT::f64:
2066       Opcode = NVPTX::ST_f64_avar;
2067       break;
2068     default:
2069       return nullptr;
2070     }
2071     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2072                       getI32Imm(vecType), getI32Imm(toType),
2073                       getI32Imm(toTypeWidth), Addr, Chain };
2074     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2075   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2076                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2077     switch (SourceVT) {
2078     case MVT::i8:
2079       Opcode = NVPTX::ST_i8_asi;
2080       break;
2081     case MVT::i16:
2082       Opcode = NVPTX::ST_i16_asi;
2083       break;
2084     case MVT::i32:
2085       Opcode = NVPTX::ST_i32_asi;
2086       break;
2087     case MVT::i64:
2088       Opcode = NVPTX::ST_i64_asi;
2089       break;
2090     case MVT::f32:
2091       Opcode = NVPTX::ST_f32_asi;
2092       break;
2093     case MVT::f64:
2094       Opcode = NVPTX::ST_f64_asi;
2095       break;
2096     default:
2097       return nullptr;
2098     }
2099     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2100                       getI32Imm(vecType), getI32Imm(toType),
2101                       getI32Imm(toTypeWidth), Base, Offset, Chain };
2102     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2103   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2104                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2105     if (TM.is64Bit()) {
2106       switch (SourceVT) {
2107       case MVT::i8:
2108         Opcode = NVPTX::ST_i8_ari_64;
2109         break;
2110       case MVT::i16:
2111         Opcode = NVPTX::ST_i16_ari_64;
2112         break;
2113       case MVT::i32:
2114         Opcode = NVPTX::ST_i32_ari_64;
2115         break;
2116       case MVT::i64:
2117         Opcode = NVPTX::ST_i64_ari_64;
2118         break;
2119       case MVT::f32:
2120         Opcode = NVPTX::ST_f32_ari_64;
2121         break;
2122       case MVT::f64:
2123         Opcode = NVPTX::ST_f64_ari_64;
2124         break;
2125       default:
2126         return nullptr;
2127       }
2128     } else {
2129       switch (SourceVT) {
2130       case MVT::i8:
2131         Opcode = NVPTX::ST_i8_ari;
2132         break;
2133       case MVT::i16:
2134         Opcode = NVPTX::ST_i16_ari;
2135         break;
2136       case MVT::i32:
2137         Opcode = NVPTX::ST_i32_ari;
2138         break;
2139       case MVT::i64:
2140         Opcode = NVPTX::ST_i64_ari;
2141         break;
2142       case MVT::f32:
2143         Opcode = NVPTX::ST_f32_ari;
2144         break;
2145       case MVT::f64:
2146         Opcode = NVPTX::ST_f64_ari;
2147         break;
2148       default:
2149         return nullptr;
2150       }
2151     }
2152     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2153                       getI32Imm(vecType), getI32Imm(toType),
2154                       getI32Imm(toTypeWidth), Base, Offset, Chain };
2155     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2156   } else {
2157     if (TM.is64Bit()) {
2158       switch (SourceVT) {
2159       case MVT::i8:
2160         Opcode = NVPTX::ST_i8_areg_64;
2161         break;
2162       case MVT::i16:
2163         Opcode = NVPTX::ST_i16_areg_64;
2164         break;
2165       case MVT::i32:
2166         Opcode = NVPTX::ST_i32_areg_64;
2167         break;
2168       case MVT::i64:
2169         Opcode = NVPTX::ST_i64_areg_64;
2170         break;
2171       case MVT::f32:
2172         Opcode = NVPTX::ST_f32_areg_64;
2173         break;
2174       case MVT::f64:
2175         Opcode = NVPTX::ST_f64_areg_64;
2176         break;
2177       default:
2178         return nullptr;
2179       }
2180     } else {
2181       switch (SourceVT) {
2182       case MVT::i8:
2183         Opcode = NVPTX::ST_i8_areg;
2184         break;
2185       case MVT::i16:
2186         Opcode = NVPTX::ST_i16_areg;
2187         break;
2188       case MVT::i32:
2189         Opcode = NVPTX::ST_i32_areg;
2190         break;
2191       case MVT::i64:
2192         Opcode = NVPTX::ST_i64_areg;
2193         break;
2194       case MVT::f32:
2195         Opcode = NVPTX::ST_f32_areg;
2196         break;
2197       case MVT::f64:
2198         Opcode = NVPTX::ST_f64_areg;
2199         break;
2200       default:
2201         return nullptr;
2202       }
2203     }
2204     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2205                       getI32Imm(vecType), getI32Imm(toType),
2206                       getI32Imm(toTypeWidth), N2, Chain };
2207     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2208   }
2209 
2210   if (NVPTXST) {
2211     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2212     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2213     cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2214   }
2215 
2216   return NVPTXST;
2217 }
2218 
SelectStoreVector(SDNode * N)2219 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2220   SDValue Chain = N->getOperand(0);
2221   SDValue Op1 = N->getOperand(1);
2222   SDValue Addr, Offset, Base;
2223   unsigned Opcode;
2224   SDLoc DL(N);
2225   SDNode *ST;
2226   EVT EltVT = Op1.getValueType();
2227   MemSDNode *MemSD = cast<MemSDNode>(N);
2228   EVT StoreVT = MemSD->getMemoryVT();
2229 
2230   // Address Space Setting
2231   unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2232 
2233   if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2234     report_fatal_error("Cannot store to pointer that points to constant "
2235                        "memory space");
2236   }
2237 
2238   // Volatile Setting
2239   // - .volatile is only availalble for .global and .shared
2240   bool IsVolatile = MemSD->isVolatile();
2241   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2242       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2243       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2244     IsVolatile = false;
2245 
2246   // Type Setting: toType + toTypeWidth
2247   // - for integer type, always use 'u'
2248   assert(StoreVT.isSimple() && "Store value is not simple");
2249   MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2250   unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2251   unsigned ToType;
2252   if (ScalarVT.isFloatingPoint())
2253     ToType = NVPTX::PTXLdStInstCode::Float;
2254   else
2255     ToType = NVPTX::PTXLdStInstCode::Unsigned;
2256 
2257   SmallVector<SDValue, 12> StOps;
2258   SDValue N2;
2259   unsigned VecType;
2260 
2261   switch (N->getOpcode()) {
2262   case NVPTXISD::StoreV2:
2263     VecType = NVPTX::PTXLdStInstCode::V2;
2264     StOps.push_back(N->getOperand(1));
2265     StOps.push_back(N->getOperand(2));
2266     N2 = N->getOperand(3);
2267     break;
2268   case NVPTXISD::StoreV4:
2269     VecType = NVPTX::PTXLdStInstCode::V4;
2270     StOps.push_back(N->getOperand(1));
2271     StOps.push_back(N->getOperand(2));
2272     StOps.push_back(N->getOperand(3));
2273     StOps.push_back(N->getOperand(4));
2274     N2 = N->getOperand(5);
2275     break;
2276   default:
2277     return nullptr;
2278   }
2279 
2280   StOps.push_back(getI32Imm(IsVolatile));
2281   StOps.push_back(getI32Imm(CodeAddrSpace));
2282   StOps.push_back(getI32Imm(VecType));
2283   StOps.push_back(getI32Imm(ToType));
2284   StOps.push_back(getI32Imm(ToTypeWidth));
2285 
2286   if (SelectDirectAddr(N2, Addr)) {
2287     switch (N->getOpcode()) {
2288     default:
2289       return nullptr;
2290     case NVPTXISD::StoreV2:
2291       switch (EltVT.getSimpleVT().SimpleTy) {
2292       default:
2293         return nullptr;
2294       case MVT::i8:
2295         Opcode = NVPTX::STV_i8_v2_avar;
2296         break;
2297       case MVT::i16:
2298         Opcode = NVPTX::STV_i16_v2_avar;
2299         break;
2300       case MVT::i32:
2301         Opcode = NVPTX::STV_i32_v2_avar;
2302         break;
2303       case MVT::i64:
2304         Opcode = NVPTX::STV_i64_v2_avar;
2305         break;
2306       case MVT::f32:
2307         Opcode = NVPTX::STV_f32_v2_avar;
2308         break;
2309       case MVT::f64:
2310         Opcode = NVPTX::STV_f64_v2_avar;
2311         break;
2312       }
2313       break;
2314     case NVPTXISD::StoreV4:
2315       switch (EltVT.getSimpleVT().SimpleTy) {
2316       default:
2317         return nullptr;
2318       case MVT::i8:
2319         Opcode = NVPTX::STV_i8_v4_avar;
2320         break;
2321       case MVT::i16:
2322         Opcode = NVPTX::STV_i16_v4_avar;
2323         break;
2324       case MVT::i32:
2325         Opcode = NVPTX::STV_i32_v4_avar;
2326         break;
2327       case MVT::f32:
2328         Opcode = NVPTX::STV_f32_v4_avar;
2329         break;
2330       }
2331       break;
2332     }
2333     StOps.push_back(Addr);
2334   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2335                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2336     switch (N->getOpcode()) {
2337     default:
2338       return nullptr;
2339     case NVPTXISD::StoreV2:
2340       switch (EltVT.getSimpleVT().SimpleTy) {
2341       default:
2342         return nullptr;
2343       case MVT::i8:
2344         Opcode = NVPTX::STV_i8_v2_asi;
2345         break;
2346       case MVT::i16:
2347         Opcode = NVPTX::STV_i16_v2_asi;
2348         break;
2349       case MVT::i32:
2350         Opcode = NVPTX::STV_i32_v2_asi;
2351         break;
2352       case MVT::i64:
2353         Opcode = NVPTX::STV_i64_v2_asi;
2354         break;
2355       case MVT::f32:
2356         Opcode = NVPTX::STV_f32_v2_asi;
2357         break;
2358       case MVT::f64:
2359         Opcode = NVPTX::STV_f64_v2_asi;
2360         break;
2361       }
2362       break;
2363     case NVPTXISD::StoreV4:
2364       switch (EltVT.getSimpleVT().SimpleTy) {
2365       default:
2366         return nullptr;
2367       case MVT::i8:
2368         Opcode = NVPTX::STV_i8_v4_asi;
2369         break;
2370       case MVT::i16:
2371         Opcode = NVPTX::STV_i16_v4_asi;
2372         break;
2373       case MVT::i32:
2374         Opcode = NVPTX::STV_i32_v4_asi;
2375         break;
2376       case MVT::f32:
2377         Opcode = NVPTX::STV_f32_v4_asi;
2378         break;
2379       }
2380       break;
2381     }
2382     StOps.push_back(Base);
2383     StOps.push_back(Offset);
2384   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2385                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2386     if (TM.is64Bit()) {
2387       switch (N->getOpcode()) {
2388       default:
2389         return nullptr;
2390       case NVPTXISD::StoreV2:
2391         switch (EltVT.getSimpleVT().SimpleTy) {
2392         default:
2393           return nullptr;
2394         case MVT::i8:
2395           Opcode = NVPTX::STV_i8_v2_ari_64;
2396           break;
2397         case MVT::i16:
2398           Opcode = NVPTX::STV_i16_v2_ari_64;
2399           break;
2400         case MVT::i32:
2401           Opcode = NVPTX::STV_i32_v2_ari_64;
2402           break;
2403         case MVT::i64:
2404           Opcode = NVPTX::STV_i64_v2_ari_64;
2405           break;
2406         case MVT::f32:
2407           Opcode = NVPTX::STV_f32_v2_ari_64;
2408           break;
2409         case MVT::f64:
2410           Opcode = NVPTX::STV_f64_v2_ari_64;
2411           break;
2412         }
2413         break;
2414       case NVPTXISD::StoreV4:
2415         switch (EltVT.getSimpleVT().SimpleTy) {
2416         default:
2417           return nullptr;
2418         case MVT::i8:
2419           Opcode = NVPTX::STV_i8_v4_ari_64;
2420           break;
2421         case MVT::i16:
2422           Opcode = NVPTX::STV_i16_v4_ari_64;
2423           break;
2424         case MVT::i32:
2425           Opcode = NVPTX::STV_i32_v4_ari_64;
2426           break;
2427         case MVT::f32:
2428           Opcode = NVPTX::STV_f32_v4_ari_64;
2429           break;
2430         }
2431         break;
2432       }
2433     } else {
2434       switch (N->getOpcode()) {
2435       default:
2436         return nullptr;
2437       case NVPTXISD::StoreV2:
2438         switch (EltVT.getSimpleVT().SimpleTy) {
2439         default:
2440           return nullptr;
2441         case MVT::i8:
2442           Opcode = NVPTX::STV_i8_v2_ari;
2443           break;
2444         case MVT::i16:
2445           Opcode = NVPTX::STV_i16_v2_ari;
2446           break;
2447         case MVT::i32:
2448           Opcode = NVPTX::STV_i32_v2_ari;
2449           break;
2450         case MVT::i64:
2451           Opcode = NVPTX::STV_i64_v2_ari;
2452           break;
2453         case MVT::f32:
2454           Opcode = NVPTX::STV_f32_v2_ari;
2455           break;
2456         case MVT::f64:
2457           Opcode = NVPTX::STV_f64_v2_ari;
2458           break;
2459         }
2460         break;
2461       case NVPTXISD::StoreV4:
2462         switch (EltVT.getSimpleVT().SimpleTy) {
2463         default:
2464           return nullptr;
2465         case MVT::i8:
2466           Opcode = NVPTX::STV_i8_v4_ari;
2467           break;
2468         case MVT::i16:
2469           Opcode = NVPTX::STV_i16_v4_ari;
2470           break;
2471         case MVT::i32:
2472           Opcode = NVPTX::STV_i32_v4_ari;
2473           break;
2474         case MVT::f32:
2475           Opcode = NVPTX::STV_f32_v4_ari;
2476           break;
2477         }
2478         break;
2479       }
2480     }
2481     StOps.push_back(Base);
2482     StOps.push_back(Offset);
2483   } else {
2484     if (TM.is64Bit()) {
2485       switch (N->getOpcode()) {
2486       default:
2487         return nullptr;
2488       case NVPTXISD::StoreV2:
2489         switch (EltVT.getSimpleVT().SimpleTy) {
2490         default:
2491           return nullptr;
2492         case MVT::i8:
2493           Opcode = NVPTX::STV_i8_v2_areg_64;
2494           break;
2495         case MVT::i16:
2496           Opcode = NVPTX::STV_i16_v2_areg_64;
2497           break;
2498         case MVT::i32:
2499           Opcode = NVPTX::STV_i32_v2_areg_64;
2500           break;
2501         case MVT::i64:
2502           Opcode = NVPTX::STV_i64_v2_areg_64;
2503           break;
2504         case MVT::f32:
2505           Opcode = NVPTX::STV_f32_v2_areg_64;
2506           break;
2507         case MVT::f64:
2508           Opcode = NVPTX::STV_f64_v2_areg_64;
2509           break;
2510         }
2511         break;
2512       case NVPTXISD::StoreV4:
2513         switch (EltVT.getSimpleVT().SimpleTy) {
2514         default:
2515           return nullptr;
2516         case MVT::i8:
2517           Opcode = NVPTX::STV_i8_v4_areg_64;
2518           break;
2519         case MVT::i16:
2520           Opcode = NVPTX::STV_i16_v4_areg_64;
2521           break;
2522         case MVT::i32:
2523           Opcode = NVPTX::STV_i32_v4_areg_64;
2524           break;
2525         case MVT::f32:
2526           Opcode = NVPTX::STV_f32_v4_areg_64;
2527           break;
2528         }
2529         break;
2530       }
2531     } else {
2532       switch (N->getOpcode()) {
2533       default:
2534         return nullptr;
2535       case NVPTXISD::StoreV2:
2536         switch (EltVT.getSimpleVT().SimpleTy) {
2537         default:
2538           return nullptr;
2539         case MVT::i8:
2540           Opcode = NVPTX::STV_i8_v2_areg;
2541           break;
2542         case MVT::i16:
2543           Opcode = NVPTX::STV_i16_v2_areg;
2544           break;
2545         case MVT::i32:
2546           Opcode = NVPTX::STV_i32_v2_areg;
2547           break;
2548         case MVT::i64:
2549           Opcode = NVPTX::STV_i64_v2_areg;
2550           break;
2551         case MVT::f32:
2552           Opcode = NVPTX::STV_f32_v2_areg;
2553           break;
2554         case MVT::f64:
2555           Opcode = NVPTX::STV_f64_v2_areg;
2556           break;
2557         }
2558         break;
2559       case NVPTXISD::StoreV4:
2560         switch (EltVT.getSimpleVT().SimpleTy) {
2561         default:
2562           return nullptr;
2563         case MVT::i8:
2564           Opcode = NVPTX::STV_i8_v4_areg;
2565           break;
2566         case MVT::i16:
2567           Opcode = NVPTX::STV_i16_v4_areg;
2568           break;
2569         case MVT::i32:
2570           Opcode = NVPTX::STV_i32_v4_areg;
2571           break;
2572         case MVT::f32:
2573           Opcode = NVPTX::STV_f32_v4_areg;
2574           break;
2575         }
2576         break;
2577       }
2578     }
2579     StOps.push_back(N2);
2580   }
2581 
2582   StOps.push_back(Chain);
2583 
2584   ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2585 
2586   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2587   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2588   cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2589 
2590   return ST;
2591 }
2592 
SelectLoadParam(SDNode * Node)2593 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2594   SDValue Chain = Node->getOperand(0);
2595   SDValue Offset = Node->getOperand(2);
2596   SDValue Flag = Node->getOperand(3);
2597   SDLoc DL(Node);
2598   MemSDNode *Mem = cast<MemSDNode>(Node);
2599 
2600   unsigned VecSize;
2601   switch (Node->getOpcode()) {
2602   default:
2603     return nullptr;
2604   case NVPTXISD::LoadParam:
2605     VecSize = 1;
2606     break;
2607   case NVPTXISD::LoadParamV2:
2608     VecSize = 2;
2609     break;
2610   case NVPTXISD::LoadParamV4:
2611     VecSize = 4;
2612     break;
2613   }
2614 
2615   EVT EltVT = Node->getValueType(0);
2616   EVT MemVT = Mem->getMemoryVT();
2617 
2618   unsigned Opc = 0;
2619 
2620   switch (VecSize) {
2621   default:
2622     return nullptr;
2623   case 1:
2624     switch (MemVT.getSimpleVT().SimpleTy) {
2625     default:
2626       return nullptr;
2627     case MVT::i1:
2628       Opc = NVPTX::LoadParamMemI8;
2629       break;
2630     case MVT::i8:
2631       Opc = NVPTX::LoadParamMemI8;
2632       break;
2633     case MVT::i16:
2634       Opc = NVPTX::LoadParamMemI16;
2635       break;
2636     case MVT::i32:
2637       Opc = NVPTX::LoadParamMemI32;
2638       break;
2639     case MVT::i64:
2640       Opc = NVPTX::LoadParamMemI64;
2641       break;
2642     case MVT::f32:
2643       Opc = NVPTX::LoadParamMemF32;
2644       break;
2645     case MVT::f64:
2646       Opc = NVPTX::LoadParamMemF64;
2647       break;
2648     }
2649     break;
2650   case 2:
2651     switch (MemVT.getSimpleVT().SimpleTy) {
2652     default:
2653       return nullptr;
2654     case MVT::i1:
2655       Opc = NVPTX::LoadParamMemV2I8;
2656       break;
2657     case MVT::i8:
2658       Opc = NVPTX::LoadParamMemV2I8;
2659       break;
2660     case MVT::i16:
2661       Opc = NVPTX::LoadParamMemV2I16;
2662       break;
2663     case MVT::i32:
2664       Opc = NVPTX::LoadParamMemV2I32;
2665       break;
2666     case MVT::i64:
2667       Opc = NVPTX::LoadParamMemV2I64;
2668       break;
2669     case MVT::f32:
2670       Opc = NVPTX::LoadParamMemV2F32;
2671       break;
2672     case MVT::f64:
2673       Opc = NVPTX::LoadParamMemV2F64;
2674       break;
2675     }
2676     break;
2677   case 4:
2678     switch (MemVT.getSimpleVT().SimpleTy) {
2679     default:
2680       return nullptr;
2681     case MVT::i1:
2682       Opc = NVPTX::LoadParamMemV4I8;
2683       break;
2684     case MVT::i8:
2685       Opc = NVPTX::LoadParamMemV4I8;
2686       break;
2687     case MVT::i16:
2688       Opc = NVPTX::LoadParamMemV4I16;
2689       break;
2690     case MVT::i32:
2691       Opc = NVPTX::LoadParamMemV4I32;
2692       break;
2693     case MVT::f32:
2694       Opc = NVPTX::LoadParamMemV4F32;
2695       break;
2696     }
2697     break;
2698   }
2699 
2700   SDVTList VTs;
2701   if (VecSize == 1) {
2702     VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2703   } else if (VecSize == 2) {
2704     VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2705   } else {
2706     EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2707     VTs = CurDAG->getVTList(EVTs);
2708   }
2709 
2710   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2711 
2712   SmallVector<SDValue, 2> Ops;
2713   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2714   Ops.push_back(Chain);
2715   Ops.push_back(Flag);
2716 
2717   SDNode *Ret =
2718       CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2719   return Ret;
2720 }
2721 
SelectStoreRetval(SDNode * N)2722 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2723   SDLoc DL(N);
2724   SDValue Chain = N->getOperand(0);
2725   SDValue Offset = N->getOperand(1);
2726   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2727   MemSDNode *Mem = cast<MemSDNode>(N);
2728 
2729   // How many elements do we have?
2730   unsigned NumElts = 1;
2731   switch (N->getOpcode()) {
2732   default:
2733     return nullptr;
2734   case NVPTXISD::StoreRetval:
2735     NumElts = 1;
2736     break;
2737   case NVPTXISD::StoreRetvalV2:
2738     NumElts = 2;
2739     break;
2740   case NVPTXISD::StoreRetvalV4:
2741     NumElts = 4;
2742     break;
2743   }
2744 
2745   // Build vector of operands
2746   SmallVector<SDValue, 6> Ops;
2747   for (unsigned i = 0; i < NumElts; ++i)
2748     Ops.push_back(N->getOperand(i + 2));
2749   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2750   Ops.push_back(Chain);
2751 
2752   // Determine target opcode
2753   // If we have an i1, use an 8-bit store. The lowering code in
2754   // NVPTXISelLowering will have already emitted an upcast.
2755   unsigned Opcode = 0;
2756   switch (NumElts) {
2757   default:
2758     return nullptr;
2759   case 1:
2760     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2761     default:
2762       return nullptr;
2763     case MVT::i1:
2764       Opcode = NVPTX::StoreRetvalI8;
2765       break;
2766     case MVT::i8:
2767       Opcode = NVPTX::StoreRetvalI8;
2768       break;
2769     case MVT::i16:
2770       Opcode = NVPTX::StoreRetvalI16;
2771       break;
2772     case MVT::i32:
2773       Opcode = NVPTX::StoreRetvalI32;
2774       break;
2775     case MVT::i64:
2776       Opcode = NVPTX::StoreRetvalI64;
2777       break;
2778     case MVT::f32:
2779       Opcode = NVPTX::StoreRetvalF32;
2780       break;
2781     case MVT::f64:
2782       Opcode = NVPTX::StoreRetvalF64;
2783       break;
2784     }
2785     break;
2786   case 2:
2787     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2788     default:
2789       return nullptr;
2790     case MVT::i1:
2791       Opcode = NVPTX::StoreRetvalV2I8;
2792       break;
2793     case MVT::i8:
2794       Opcode = NVPTX::StoreRetvalV2I8;
2795       break;
2796     case MVT::i16:
2797       Opcode = NVPTX::StoreRetvalV2I16;
2798       break;
2799     case MVT::i32:
2800       Opcode = NVPTX::StoreRetvalV2I32;
2801       break;
2802     case MVT::i64:
2803       Opcode = NVPTX::StoreRetvalV2I64;
2804       break;
2805     case MVT::f32:
2806       Opcode = NVPTX::StoreRetvalV2F32;
2807       break;
2808     case MVT::f64:
2809       Opcode = NVPTX::StoreRetvalV2F64;
2810       break;
2811     }
2812     break;
2813   case 4:
2814     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2815     default:
2816       return nullptr;
2817     case MVT::i1:
2818       Opcode = NVPTX::StoreRetvalV4I8;
2819       break;
2820     case MVT::i8:
2821       Opcode = NVPTX::StoreRetvalV4I8;
2822       break;
2823     case MVT::i16:
2824       Opcode = NVPTX::StoreRetvalV4I16;
2825       break;
2826     case MVT::i32:
2827       Opcode = NVPTX::StoreRetvalV4I32;
2828       break;
2829     case MVT::f32:
2830       Opcode = NVPTX::StoreRetvalV4F32;
2831       break;
2832     }
2833     break;
2834   }
2835 
2836   SDNode *Ret =
2837       CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2838   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2839   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2840   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2841 
2842   return Ret;
2843 }
2844 
SelectStoreParam(SDNode * N)2845 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2846   SDLoc DL(N);
2847   SDValue Chain = N->getOperand(0);
2848   SDValue Param = N->getOperand(1);
2849   unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2850   SDValue Offset = N->getOperand(2);
2851   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2852   MemSDNode *Mem = cast<MemSDNode>(N);
2853   SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2854 
2855   // How many elements do we have?
2856   unsigned NumElts = 1;
2857   switch (N->getOpcode()) {
2858   default:
2859     return nullptr;
2860   case NVPTXISD::StoreParamU32:
2861   case NVPTXISD::StoreParamS32:
2862   case NVPTXISD::StoreParam:
2863     NumElts = 1;
2864     break;
2865   case NVPTXISD::StoreParamV2:
2866     NumElts = 2;
2867     break;
2868   case NVPTXISD::StoreParamV4:
2869     NumElts = 4;
2870     break;
2871   }
2872 
2873   // Build vector of operands
2874   SmallVector<SDValue, 8> Ops;
2875   for (unsigned i = 0; i < NumElts; ++i)
2876     Ops.push_back(N->getOperand(i + 3));
2877   Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2878   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2879   Ops.push_back(Chain);
2880   Ops.push_back(Flag);
2881 
2882   // Determine target opcode
2883   // If we have an i1, use an 8-bit store. The lowering code in
2884   // NVPTXISelLowering will have already emitted an upcast.
2885   unsigned Opcode = 0;
2886   switch (N->getOpcode()) {
2887   default:
2888     switch (NumElts) {
2889     default:
2890       return nullptr;
2891     case 1:
2892       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2893       default:
2894         return nullptr;
2895       case MVT::i1:
2896         Opcode = NVPTX::StoreParamI8;
2897         break;
2898       case MVT::i8:
2899         Opcode = NVPTX::StoreParamI8;
2900         break;
2901       case MVT::i16:
2902         Opcode = NVPTX::StoreParamI16;
2903         break;
2904       case MVT::i32:
2905         Opcode = NVPTX::StoreParamI32;
2906         break;
2907       case MVT::i64:
2908         Opcode = NVPTX::StoreParamI64;
2909         break;
2910       case MVT::f32:
2911         Opcode = NVPTX::StoreParamF32;
2912         break;
2913       case MVT::f64:
2914         Opcode = NVPTX::StoreParamF64;
2915         break;
2916       }
2917       break;
2918     case 2:
2919       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2920       default:
2921         return nullptr;
2922       case MVT::i1:
2923         Opcode = NVPTX::StoreParamV2I8;
2924         break;
2925       case MVT::i8:
2926         Opcode = NVPTX::StoreParamV2I8;
2927         break;
2928       case MVT::i16:
2929         Opcode = NVPTX::StoreParamV2I16;
2930         break;
2931       case MVT::i32:
2932         Opcode = NVPTX::StoreParamV2I32;
2933         break;
2934       case MVT::i64:
2935         Opcode = NVPTX::StoreParamV2I64;
2936         break;
2937       case MVT::f32:
2938         Opcode = NVPTX::StoreParamV2F32;
2939         break;
2940       case MVT::f64:
2941         Opcode = NVPTX::StoreParamV2F64;
2942         break;
2943       }
2944       break;
2945     case 4:
2946       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2947       default:
2948         return nullptr;
2949       case MVT::i1:
2950         Opcode = NVPTX::StoreParamV4I8;
2951         break;
2952       case MVT::i8:
2953         Opcode = NVPTX::StoreParamV4I8;
2954         break;
2955       case MVT::i16:
2956         Opcode = NVPTX::StoreParamV4I16;
2957         break;
2958       case MVT::i32:
2959         Opcode = NVPTX::StoreParamV4I32;
2960         break;
2961       case MVT::f32:
2962         Opcode = NVPTX::StoreParamV4F32;
2963         break;
2964       }
2965       break;
2966     }
2967     break;
2968   // Special case: if we have a sign-extend/zero-extend node, insert the
2969   // conversion instruction first, and use that as the value operand to
2970   // the selected StoreParam node.
2971   case NVPTXISD::StoreParamU32: {
2972     Opcode = NVPTX::StoreParamI32;
2973     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2974                                                 MVT::i32);
2975     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2976                                          MVT::i32, Ops[0], CvtNone);
2977     Ops[0] = SDValue(Cvt, 0);
2978     break;
2979   }
2980   case NVPTXISD::StoreParamS32: {
2981     Opcode = NVPTX::StoreParamI32;
2982     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2983                                                 MVT::i32);
2984     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2985                                          MVT::i32, Ops[0], CvtNone);
2986     Ops[0] = SDValue(Cvt, 0);
2987     break;
2988   }
2989   }
2990 
2991   SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2992   SDNode *Ret =
2993       CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2994   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2995   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2996   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2997 
2998   return Ret;
2999 }
3000 
SelectTextureIntrinsic(SDNode * N)3001 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3002   SDValue Chain = N->getOperand(0);
3003   SDNode *Ret = nullptr;
3004   unsigned Opc = 0;
3005   SmallVector<SDValue, 8> Ops;
3006 
3007   switch (N->getOpcode()) {
3008   default: return nullptr;
3009   case NVPTXISD::Tex1DFloatS32:
3010     Opc = NVPTX::TEX_1D_F32_S32;
3011     break;
3012   case NVPTXISD::Tex1DFloatFloat:
3013     Opc = NVPTX::TEX_1D_F32_F32;
3014     break;
3015   case NVPTXISD::Tex1DFloatFloatLevel:
3016     Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3017     break;
3018   case NVPTXISD::Tex1DFloatFloatGrad:
3019     Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3020     break;
3021   case NVPTXISD::Tex1DS32S32:
3022     Opc = NVPTX::TEX_1D_S32_S32;
3023     break;
3024   case NVPTXISD::Tex1DS32Float:
3025     Opc = NVPTX::TEX_1D_S32_F32;
3026     break;
3027   case NVPTXISD::Tex1DS32FloatLevel:
3028     Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3029     break;
3030   case NVPTXISD::Tex1DS32FloatGrad:
3031     Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3032     break;
3033   case NVPTXISD::Tex1DU32S32:
3034     Opc = NVPTX::TEX_1D_U32_S32;
3035     break;
3036   case NVPTXISD::Tex1DU32Float:
3037     Opc = NVPTX::TEX_1D_U32_F32;
3038     break;
3039   case NVPTXISD::Tex1DU32FloatLevel:
3040     Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3041     break;
3042   case NVPTXISD::Tex1DU32FloatGrad:
3043     Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3044     break;
3045   case NVPTXISD::Tex1DArrayFloatS32:
3046     Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3047     break;
3048   case NVPTXISD::Tex1DArrayFloatFloat:
3049     Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3050     break;
3051   case NVPTXISD::Tex1DArrayFloatFloatLevel:
3052     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3053     break;
3054   case NVPTXISD::Tex1DArrayFloatFloatGrad:
3055     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3056     break;
3057   case NVPTXISD::Tex1DArrayS32S32:
3058     Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3059     break;
3060   case NVPTXISD::Tex1DArrayS32Float:
3061     Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3062     break;
3063   case NVPTXISD::Tex1DArrayS32FloatLevel:
3064     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3065     break;
3066   case NVPTXISD::Tex1DArrayS32FloatGrad:
3067     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3068     break;
3069   case NVPTXISD::Tex1DArrayU32S32:
3070     Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3071     break;
3072   case NVPTXISD::Tex1DArrayU32Float:
3073     Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3074     break;
3075   case NVPTXISD::Tex1DArrayU32FloatLevel:
3076     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3077     break;
3078   case NVPTXISD::Tex1DArrayU32FloatGrad:
3079     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3080     break;
3081   case NVPTXISD::Tex2DFloatS32:
3082     Opc = NVPTX::TEX_2D_F32_S32;
3083     break;
3084   case NVPTXISD::Tex2DFloatFloat:
3085     Opc = NVPTX::TEX_2D_F32_F32;
3086     break;
3087   case NVPTXISD::Tex2DFloatFloatLevel:
3088     Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3089     break;
3090   case NVPTXISD::Tex2DFloatFloatGrad:
3091     Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3092     break;
3093   case NVPTXISD::Tex2DS32S32:
3094     Opc = NVPTX::TEX_2D_S32_S32;
3095     break;
3096   case NVPTXISD::Tex2DS32Float:
3097     Opc = NVPTX::TEX_2D_S32_F32;
3098     break;
3099   case NVPTXISD::Tex2DS32FloatLevel:
3100     Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3101     break;
3102   case NVPTXISD::Tex2DS32FloatGrad:
3103     Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3104     break;
3105   case NVPTXISD::Tex2DU32S32:
3106     Opc = NVPTX::TEX_2D_U32_S32;
3107     break;
3108   case NVPTXISD::Tex2DU32Float:
3109     Opc = NVPTX::TEX_2D_U32_F32;
3110     break;
3111   case NVPTXISD::Tex2DU32FloatLevel:
3112     Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3113     break;
3114   case NVPTXISD::Tex2DU32FloatGrad:
3115     Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3116     break;
3117   case NVPTXISD::Tex2DArrayFloatS32:
3118     Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3119     break;
3120   case NVPTXISD::Tex2DArrayFloatFloat:
3121     Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3122     break;
3123   case NVPTXISD::Tex2DArrayFloatFloatLevel:
3124     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3125     break;
3126   case NVPTXISD::Tex2DArrayFloatFloatGrad:
3127     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3128     break;
3129   case NVPTXISD::Tex2DArrayS32S32:
3130     Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3131     break;
3132   case NVPTXISD::Tex2DArrayS32Float:
3133     Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3134     break;
3135   case NVPTXISD::Tex2DArrayS32FloatLevel:
3136     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3137     break;
3138   case NVPTXISD::Tex2DArrayS32FloatGrad:
3139     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3140     break;
3141   case NVPTXISD::Tex2DArrayU32S32:
3142     Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3143     break;
3144   case NVPTXISD::Tex2DArrayU32Float:
3145     Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3146     break;
3147   case NVPTXISD::Tex2DArrayU32FloatLevel:
3148     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3149     break;
3150   case NVPTXISD::Tex2DArrayU32FloatGrad:
3151     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3152     break;
3153   case NVPTXISD::Tex3DFloatS32:
3154     Opc = NVPTX::TEX_3D_F32_S32;
3155     break;
3156   case NVPTXISD::Tex3DFloatFloat:
3157     Opc = NVPTX::TEX_3D_F32_F32;
3158     break;
3159   case NVPTXISD::Tex3DFloatFloatLevel:
3160     Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3161     break;
3162   case NVPTXISD::Tex3DFloatFloatGrad:
3163     Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3164     break;
3165   case NVPTXISD::Tex3DS32S32:
3166     Opc = NVPTX::TEX_3D_S32_S32;
3167     break;
3168   case NVPTXISD::Tex3DS32Float:
3169     Opc = NVPTX::TEX_3D_S32_F32;
3170     break;
3171   case NVPTXISD::Tex3DS32FloatLevel:
3172     Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3173     break;
3174   case NVPTXISD::Tex3DS32FloatGrad:
3175     Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3176     break;
3177   case NVPTXISD::Tex3DU32S32:
3178     Opc = NVPTX::TEX_3D_U32_S32;
3179     break;
3180   case NVPTXISD::Tex3DU32Float:
3181     Opc = NVPTX::TEX_3D_U32_F32;
3182     break;
3183   case NVPTXISD::Tex3DU32FloatLevel:
3184     Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3185     break;
3186   case NVPTXISD::Tex3DU32FloatGrad:
3187     Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3188     break;
3189   case NVPTXISD::TexCubeFloatFloat:
3190     Opc = NVPTX::TEX_CUBE_F32_F32;
3191     break;
3192   case NVPTXISD::TexCubeFloatFloatLevel:
3193     Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3194     break;
3195   case NVPTXISD::TexCubeS32Float:
3196     Opc = NVPTX::TEX_CUBE_S32_F32;
3197     break;
3198   case NVPTXISD::TexCubeS32FloatLevel:
3199     Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3200     break;
3201   case NVPTXISD::TexCubeU32Float:
3202     Opc = NVPTX::TEX_CUBE_U32_F32;
3203     break;
3204   case NVPTXISD::TexCubeU32FloatLevel:
3205     Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3206     break;
3207   case NVPTXISD::TexCubeArrayFloatFloat:
3208     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3209     break;
3210   case NVPTXISD::TexCubeArrayFloatFloatLevel:
3211     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3212     break;
3213   case NVPTXISD::TexCubeArrayS32Float:
3214     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3215     break;
3216   case NVPTXISD::TexCubeArrayS32FloatLevel:
3217     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3218     break;
3219   case NVPTXISD::TexCubeArrayU32Float:
3220     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3221     break;
3222   case NVPTXISD::TexCubeArrayU32FloatLevel:
3223     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3224     break;
3225   case NVPTXISD::Tld4R2DFloatFloat:
3226     Opc = NVPTX::TLD4_R_2D_F32_F32;
3227     break;
3228   case NVPTXISD::Tld4G2DFloatFloat:
3229     Opc = NVPTX::TLD4_G_2D_F32_F32;
3230     break;
3231   case NVPTXISD::Tld4B2DFloatFloat:
3232     Opc = NVPTX::TLD4_B_2D_F32_F32;
3233     break;
3234   case NVPTXISD::Tld4A2DFloatFloat:
3235     Opc = NVPTX::TLD4_A_2D_F32_F32;
3236     break;
3237   case NVPTXISD::Tld4R2DS64Float:
3238     Opc = NVPTX::TLD4_R_2D_S32_F32;
3239     break;
3240   case NVPTXISD::Tld4G2DS64Float:
3241     Opc = NVPTX::TLD4_G_2D_S32_F32;
3242     break;
3243   case NVPTXISD::Tld4B2DS64Float:
3244     Opc = NVPTX::TLD4_B_2D_S32_F32;
3245     break;
3246   case NVPTXISD::Tld4A2DS64Float:
3247     Opc = NVPTX::TLD4_A_2D_S32_F32;
3248     break;
3249   case NVPTXISD::Tld4R2DU64Float:
3250     Opc = NVPTX::TLD4_R_2D_U32_F32;
3251     break;
3252   case NVPTXISD::Tld4G2DU64Float:
3253     Opc = NVPTX::TLD4_G_2D_U32_F32;
3254     break;
3255   case NVPTXISD::Tld4B2DU64Float:
3256     Opc = NVPTX::TLD4_B_2D_U32_F32;
3257     break;
3258   case NVPTXISD::Tld4A2DU64Float:
3259     Opc = NVPTX::TLD4_A_2D_U32_F32;
3260     break;
3261   case NVPTXISD::TexUnified1DFloatS32:
3262     Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3263     break;
3264   case NVPTXISD::TexUnified1DFloatFloat:
3265     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3266     break;
3267   case NVPTXISD::TexUnified1DFloatFloatLevel:
3268     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3269     break;
3270   case NVPTXISD::TexUnified1DFloatFloatGrad:
3271     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3272     break;
3273   case NVPTXISD::TexUnified1DS32S32:
3274     Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3275     break;
3276   case NVPTXISD::TexUnified1DS32Float:
3277     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3278     break;
3279   case NVPTXISD::TexUnified1DS32FloatLevel:
3280     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3281     break;
3282   case NVPTXISD::TexUnified1DS32FloatGrad:
3283     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3284     break;
3285   case NVPTXISD::TexUnified1DU32S32:
3286     Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3287     break;
3288   case NVPTXISD::TexUnified1DU32Float:
3289     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3290     break;
3291   case NVPTXISD::TexUnified1DU32FloatLevel:
3292     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3293     break;
3294   case NVPTXISD::TexUnified1DU32FloatGrad:
3295     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3296     break;
3297   case NVPTXISD::TexUnified1DArrayFloatS32:
3298     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3299     break;
3300   case NVPTXISD::TexUnified1DArrayFloatFloat:
3301     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3302     break;
3303   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3304     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3305     break;
3306   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3307     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3308     break;
3309   case NVPTXISD::TexUnified1DArrayS32S32:
3310     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3311     break;
3312   case NVPTXISD::TexUnified1DArrayS32Float:
3313     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3314     break;
3315   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3316     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3317     break;
3318   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3319     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3320     break;
3321   case NVPTXISD::TexUnified1DArrayU32S32:
3322     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3323     break;
3324   case NVPTXISD::TexUnified1DArrayU32Float:
3325     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3326     break;
3327   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3328     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3329     break;
3330   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3331     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3332     break;
3333   case NVPTXISD::TexUnified2DFloatS32:
3334     Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3335     break;
3336   case NVPTXISD::TexUnified2DFloatFloat:
3337     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3338     break;
3339   case NVPTXISD::TexUnified2DFloatFloatLevel:
3340     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3341     break;
3342   case NVPTXISD::TexUnified2DFloatFloatGrad:
3343     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3344     break;
3345   case NVPTXISD::TexUnified2DS32S32:
3346     Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3347     break;
3348   case NVPTXISD::TexUnified2DS32Float:
3349     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3350     break;
3351   case NVPTXISD::TexUnified2DS32FloatLevel:
3352     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3353     break;
3354   case NVPTXISD::TexUnified2DS32FloatGrad:
3355     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3356     break;
3357   case NVPTXISD::TexUnified2DU32S32:
3358     Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3359     break;
3360   case NVPTXISD::TexUnified2DU32Float:
3361     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3362     break;
3363   case NVPTXISD::TexUnified2DU32FloatLevel:
3364     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3365     break;
3366   case NVPTXISD::TexUnified2DU32FloatGrad:
3367     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3368     break;
3369   case NVPTXISD::TexUnified2DArrayFloatS32:
3370     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3371     break;
3372   case NVPTXISD::TexUnified2DArrayFloatFloat:
3373     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3374     break;
3375   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3376     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3377     break;
3378   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3379     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3380     break;
3381   case NVPTXISD::TexUnified2DArrayS32S32:
3382     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3383     break;
3384   case NVPTXISD::TexUnified2DArrayS32Float:
3385     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3386     break;
3387   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3388     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3389     break;
3390   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3391     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3392     break;
3393   case NVPTXISD::TexUnified2DArrayU32S32:
3394     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3395     break;
3396   case NVPTXISD::TexUnified2DArrayU32Float:
3397     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3398     break;
3399   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3400     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3401     break;
3402   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3403     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3404     break;
3405   case NVPTXISD::TexUnified3DFloatS32:
3406     Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3407     break;
3408   case NVPTXISD::TexUnified3DFloatFloat:
3409     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3410     break;
3411   case NVPTXISD::TexUnified3DFloatFloatLevel:
3412     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3413     break;
3414   case NVPTXISD::TexUnified3DFloatFloatGrad:
3415     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3416     break;
3417   case NVPTXISD::TexUnified3DS32S32:
3418     Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3419     break;
3420   case NVPTXISD::TexUnified3DS32Float:
3421     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3422     break;
3423   case NVPTXISD::TexUnified3DS32FloatLevel:
3424     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3425     break;
3426   case NVPTXISD::TexUnified3DS32FloatGrad:
3427     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3428     break;
3429   case NVPTXISD::TexUnified3DU32S32:
3430     Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3431     break;
3432   case NVPTXISD::TexUnified3DU32Float:
3433     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3434     break;
3435   case NVPTXISD::TexUnified3DU32FloatLevel:
3436     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3437     break;
3438   case NVPTXISD::TexUnified3DU32FloatGrad:
3439     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3440     break;
3441   case NVPTXISD::TexUnifiedCubeFloatFloat:
3442     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3443     break;
3444   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3445     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3446     break;
3447   case NVPTXISD::TexUnifiedCubeS32Float:
3448     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3449     break;
3450   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3451     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3452     break;
3453   case NVPTXISD::TexUnifiedCubeU32Float:
3454     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3455     break;
3456   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3457     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3458     break;
3459   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3460     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3461     break;
3462   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3463     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3464     break;
3465   case NVPTXISD::TexUnifiedCubeArrayS32Float:
3466     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3467     break;
3468   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3469     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3470     break;
3471   case NVPTXISD::TexUnifiedCubeArrayU32Float:
3472     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3473     break;
3474   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3475     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3476     break;
3477   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3478     Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3479     break;
3480   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3481     Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3482     break;
3483   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3484     Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3485     break;
3486   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3487     Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3488     break;
3489   case NVPTXISD::Tld4UnifiedR2DS64Float:
3490     Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3491     break;
3492   case NVPTXISD::Tld4UnifiedG2DS64Float:
3493     Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3494     break;
3495   case NVPTXISD::Tld4UnifiedB2DS64Float:
3496     Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3497     break;
3498   case NVPTXISD::Tld4UnifiedA2DS64Float:
3499     Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3500     break;
3501   case NVPTXISD::Tld4UnifiedR2DU64Float:
3502     Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3503     break;
3504   case NVPTXISD::Tld4UnifiedG2DU64Float:
3505     Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3506     break;
3507   case NVPTXISD::Tld4UnifiedB2DU64Float:
3508     Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3509     break;
3510   case NVPTXISD::Tld4UnifiedA2DU64Float:
3511     Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3512     break;
3513   }
3514 
3515   // Copy over operands
3516   for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3517     Ops.push_back(N->getOperand(i));
3518   }
3519 
3520   Ops.push_back(Chain);
3521   Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3522   return Ret;
3523 }
3524 
SelectSurfaceIntrinsic(SDNode * N)3525 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3526   SDValue Chain = N->getOperand(0);
3527   SDValue TexHandle = N->getOperand(1);
3528   SDNode *Ret = nullptr;
3529   unsigned Opc = 0;
3530   SmallVector<SDValue, 8> Ops;
3531   switch (N->getOpcode()) {
3532   default: return nullptr;
3533   case NVPTXISD::Suld1DI8Clamp:
3534     Opc = NVPTX::SULD_1D_I8_CLAMP;
3535     Ops.push_back(TexHandle);
3536     Ops.push_back(N->getOperand(2));
3537     Ops.push_back(Chain);
3538     break;
3539   case NVPTXISD::Suld1DI16Clamp:
3540     Opc = NVPTX::SULD_1D_I16_CLAMP;
3541     Ops.push_back(TexHandle);
3542     Ops.push_back(N->getOperand(2));
3543     Ops.push_back(Chain);
3544     break;
3545   case NVPTXISD::Suld1DI32Clamp:
3546     Opc = NVPTX::SULD_1D_I32_CLAMP;
3547     Ops.push_back(TexHandle);
3548     Ops.push_back(N->getOperand(2));
3549     Ops.push_back(Chain);
3550     break;
3551   case NVPTXISD::Suld1DI64Clamp:
3552     Opc = NVPTX::SULD_1D_I64_CLAMP;
3553     Ops.push_back(TexHandle);
3554     Ops.push_back(N->getOperand(2));
3555     Ops.push_back(Chain);
3556     break;
3557   case NVPTXISD::Suld1DV2I8Clamp:
3558     Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3559     Ops.push_back(TexHandle);
3560     Ops.push_back(N->getOperand(2));
3561     Ops.push_back(Chain);
3562     break;
3563   case NVPTXISD::Suld1DV2I16Clamp:
3564     Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3565     Ops.push_back(TexHandle);
3566     Ops.push_back(N->getOperand(2));
3567     Ops.push_back(Chain);
3568     break;
3569   case NVPTXISD::Suld1DV2I32Clamp:
3570     Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3571     Ops.push_back(TexHandle);
3572     Ops.push_back(N->getOperand(2));
3573     Ops.push_back(Chain);
3574     break;
3575   case NVPTXISD::Suld1DV2I64Clamp:
3576     Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3577     Ops.push_back(TexHandle);
3578     Ops.push_back(N->getOperand(2));
3579     Ops.push_back(Chain);
3580     break;
3581   case NVPTXISD::Suld1DV4I8Clamp:
3582     Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3583     Ops.push_back(TexHandle);
3584     Ops.push_back(N->getOperand(2));
3585     Ops.push_back(Chain);
3586     break;
3587   case NVPTXISD::Suld1DV4I16Clamp:
3588     Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3589     Ops.push_back(TexHandle);
3590     Ops.push_back(N->getOperand(2));
3591     Ops.push_back(Chain);
3592     break;
3593   case NVPTXISD::Suld1DV4I32Clamp:
3594     Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3595     Ops.push_back(TexHandle);
3596     Ops.push_back(N->getOperand(2));
3597     Ops.push_back(Chain);
3598     break;
3599   case NVPTXISD::Suld1DArrayI8Clamp:
3600     Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3601     Ops.push_back(TexHandle);
3602     Ops.push_back(N->getOperand(2));
3603     Ops.push_back(N->getOperand(3));
3604     Ops.push_back(Chain);
3605     break;
3606   case NVPTXISD::Suld1DArrayI16Clamp:
3607     Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3608     Ops.push_back(TexHandle);
3609     Ops.push_back(N->getOperand(2));
3610     Ops.push_back(N->getOperand(3));
3611     Ops.push_back(Chain);
3612     break;
3613   case NVPTXISD::Suld1DArrayI32Clamp:
3614     Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3615     Ops.push_back(TexHandle);
3616     Ops.push_back(N->getOperand(2));
3617     Ops.push_back(N->getOperand(3));
3618     Ops.push_back(Chain);
3619     break;
3620   case NVPTXISD::Suld1DArrayI64Clamp:
3621     Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3622     Ops.push_back(TexHandle);
3623     Ops.push_back(N->getOperand(2));
3624     Ops.push_back(N->getOperand(3));
3625     Ops.push_back(Chain);
3626     break;
3627   case NVPTXISD::Suld1DArrayV2I8Clamp:
3628     Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3629     Ops.push_back(TexHandle);
3630     Ops.push_back(N->getOperand(2));
3631     Ops.push_back(N->getOperand(3));
3632     Ops.push_back(Chain);
3633     break;
3634   case NVPTXISD::Suld1DArrayV2I16Clamp:
3635     Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3636     Ops.push_back(TexHandle);
3637     Ops.push_back(N->getOperand(2));
3638     Ops.push_back(N->getOperand(3));
3639     Ops.push_back(Chain);
3640     break;
3641   case NVPTXISD::Suld1DArrayV2I32Clamp:
3642     Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3643     Ops.push_back(TexHandle);
3644     Ops.push_back(N->getOperand(2));
3645     Ops.push_back(N->getOperand(3));
3646     Ops.push_back(Chain);
3647     break;
3648   case NVPTXISD::Suld1DArrayV2I64Clamp:
3649     Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3650     Ops.push_back(TexHandle);
3651     Ops.push_back(N->getOperand(2));
3652     Ops.push_back(N->getOperand(3));
3653     Ops.push_back(Chain);
3654     break;
3655   case NVPTXISD::Suld1DArrayV4I8Clamp:
3656     Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3657     Ops.push_back(TexHandle);
3658     Ops.push_back(N->getOperand(2));
3659     Ops.push_back(N->getOperand(3));
3660     Ops.push_back(Chain);
3661     break;
3662   case NVPTXISD::Suld1DArrayV4I16Clamp:
3663     Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3664     Ops.push_back(TexHandle);
3665     Ops.push_back(N->getOperand(2));
3666     Ops.push_back(N->getOperand(3));
3667     Ops.push_back(Chain);
3668     break;
3669   case NVPTXISD::Suld1DArrayV4I32Clamp:
3670     Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3671     Ops.push_back(TexHandle);
3672     Ops.push_back(N->getOperand(2));
3673     Ops.push_back(N->getOperand(3));
3674     Ops.push_back(Chain);
3675     break;
3676   case NVPTXISD::Suld2DI8Clamp:
3677     Opc = NVPTX::SULD_2D_I8_CLAMP;
3678     Ops.push_back(TexHandle);
3679     Ops.push_back(N->getOperand(2));
3680     Ops.push_back(N->getOperand(3));
3681     Ops.push_back(Chain);
3682     break;
3683   case NVPTXISD::Suld2DI16Clamp:
3684     Opc = NVPTX::SULD_2D_I16_CLAMP;
3685     Ops.push_back(TexHandle);
3686     Ops.push_back(N->getOperand(2));
3687     Ops.push_back(N->getOperand(3));
3688     Ops.push_back(Chain);
3689     break;
3690   case NVPTXISD::Suld2DI32Clamp:
3691     Opc = NVPTX::SULD_2D_I32_CLAMP;
3692     Ops.push_back(TexHandle);
3693     Ops.push_back(N->getOperand(2));
3694     Ops.push_back(N->getOperand(3));
3695     Ops.push_back(Chain);
3696     break;
3697   case NVPTXISD::Suld2DI64Clamp:
3698     Opc = NVPTX::SULD_2D_I64_CLAMP;
3699     Ops.push_back(TexHandle);
3700     Ops.push_back(N->getOperand(2));
3701     Ops.push_back(N->getOperand(3));
3702     Ops.push_back(Chain);
3703     break;
3704   case NVPTXISD::Suld2DV2I8Clamp:
3705     Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3706     Ops.push_back(TexHandle);
3707     Ops.push_back(N->getOperand(2));
3708     Ops.push_back(N->getOperand(3));
3709     Ops.push_back(Chain);
3710     break;
3711   case NVPTXISD::Suld2DV2I16Clamp:
3712     Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3713     Ops.push_back(TexHandle);
3714     Ops.push_back(N->getOperand(2));
3715     Ops.push_back(N->getOperand(3));
3716     Ops.push_back(Chain);
3717     break;
3718   case NVPTXISD::Suld2DV2I32Clamp:
3719     Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3720     Ops.push_back(TexHandle);
3721     Ops.push_back(N->getOperand(2));
3722     Ops.push_back(N->getOperand(3));
3723     Ops.push_back(Chain);
3724     break;
3725   case NVPTXISD::Suld2DV2I64Clamp:
3726     Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3727     Ops.push_back(TexHandle);
3728     Ops.push_back(N->getOperand(2));
3729     Ops.push_back(N->getOperand(3));
3730     Ops.push_back(Chain);
3731     break;
3732   case NVPTXISD::Suld2DV4I8Clamp:
3733     Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3734     Ops.push_back(TexHandle);
3735     Ops.push_back(N->getOperand(2));
3736     Ops.push_back(N->getOperand(3));
3737     Ops.push_back(Chain);
3738     break;
3739   case NVPTXISD::Suld2DV4I16Clamp:
3740     Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3741     Ops.push_back(TexHandle);
3742     Ops.push_back(N->getOperand(2));
3743     Ops.push_back(N->getOperand(3));
3744     Ops.push_back(Chain);
3745     break;
3746   case NVPTXISD::Suld2DV4I32Clamp:
3747     Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3748     Ops.push_back(TexHandle);
3749     Ops.push_back(N->getOperand(2));
3750     Ops.push_back(N->getOperand(3));
3751     Ops.push_back(Chain);
3752     break;
3753   case NVPTXISD::Suld2DArrayI8Clamp:
3754     Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3755     Ops.push_back(TexHandle);
3756     Ops.push_back(N->getOperand(2));
3757     Ops.push_back(N->getOperand(3));
3758     Ops.push_back(N->getOperand(4));
3759     Ops.push_back(Chain);
3760     break;
3761   case NVPTXISD::Suld2DArrayI16Clamp:
3762     Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3763     Ops.push_back(TexHandle);
3764     Ops.push_back(N->getOperand(2));
3765     Ops.push_back(N->getOperand(3));
3766     Ops.push_back(N->getOperand(4));
3767     Ops.push_back(Chain);
3768     break;
3769   case NVPTXISD::Suld2DArrayI32Clamp:
3770     Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3771     Ops.push_back(TexHandle);
3772     Ops.push_back(N->getOperand(2));
3773     Ops.push_back(N->getOperand(3));
3774     Ops.push_back(N->getOperand(4));
3775     Ops.push_back(Chain);
3776     break;
3777   case NVPTXISD::Suld2DArrayI64Clamp:
3778     Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3779     Ops.push_back(TexHandle);
3780     Ops.push_back(N->getOperand(2));
3781     Ops.push_back(N->getOperand(3));
3782     Ops.push_back(N->getOperand(4));
3783     Ops.push_back(Chain);
3784     break;
3785   case NVPTXISD::Suld2DArrayV2I8Clamp:
3786     Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3787     Ops.push_back(TexHandle);
3788     Ops.push_back(N->getOperand(2));
3789     Ops.push_back(N->getOperand(3));
3790     Ops.push_back(N->getOperand(4));
3791     Ops.push_back(Chain);
3792     break;
3793   case NVPTXISD::Suld2DArrayV2I16Clamp:
3794     Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3795     Ops.push_back(TexHandle);
3796     Ops.push_back(N->getOperand(2));
3797     Ops.push_back(N->getOperand(3));
3798     Ops.push_back(N->getOperand(4));
3799     Ops.push_back(Chain);
3800     break;
3801   case NVPTXISD::Suld2DArrayV2I32Clamp:
3802     Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3803     Ops.push_back(TexHandle);
3804     Ops.push_back(N->getOperand(2));
3805     Ops.push_back(N->getOperand(3));
3806     Ops.push_back(N->getOperand(4));
3807     Ops.push_back(Chain);
3808     break;
3809   case NVPTXISD::Suld2DArrayV2I64Clamp:
3810     Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3811     Ops.push_back(TexHandle);
3812     Ops.push_back(N->getOperand(2));
3813     Ops.push_back(N->getOperand(3));
3814     Ops.push_back(N->getOperand(4));
3815     Ops.push_back(Chain);
3816     break;
3817   case NVPTXISD::Suld2DArrayV4I8Clamp:
3818     Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3819     Ops.push_back(TexHandle);
3820     Ops.push_back(N->getOperand(2));
3821     Ops.push_back(N->getOperand(3));
3822     Ops.push_back(N->getOperand(4));
3823     Ops.push_back(Chain);
3824     break;
3825   case NVPTXISD::Suld2DArrayV4I16Clamp:
3826     Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3827     Ops.push_back(TexHandle);
3828     Ops.push_back(N->getOperand(2));
3829     Ops.push_back(N->getOperand(3));
3830     Ops.push_back(N->getOperand(4));
3831     Ops.push_back(Chain);
3832     break;
3833   case NVPTXISD::Suld2DArrayV4I32Clamp:
3834     Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3835     Ops.push_back(TexHandle);
3836     Ops.push_back(N->getOperand(2));
3837     Ops.push_back(N->getOperand(3));
3838     Ops.push_back(N->getOperand(4));
3839     Ops.push_back(Chain);
3840     break;
3841   case NVPTXISD::Suld3DI8Clamp:
3842     Opc = NVPTX::SULD_3D_I8_CLAMP;
3843     Ops.push_back(TexHandle);
3844     Ops.push_back(N->getOperand(2));
3845     Ops.push_back(N->getOperand(3));
3846     Ops.push_back(N->getOperand(4));
3847     Ops.push_back(Chain);
3848     break;
3849   case NVPTXISD::Suld3DI16Clamp:
3850     Opc = NVPTX::SULD_3D_I16_CLAMP;
3851     Ops.push_back(TexHandle);
3852     Ops.push_back(N->getOperand(2));
3853     Ops.push_back(N->getOperand(3));
3854     Ops.push_back(N->getOperand(4));
3855     Ops.push_back(Chain);
3856     break;
3857   case NVPTXISD::Suld3DI32Clamp:
3858     Opc = NVPTX::SULD_3D_I32_CLAMP;
3859     Ops.push_back(TexHandle);
3860     Ops.push_back(N->getOperand(2));
3861     Ops.push_back(N->getOperand(3));
3862     Ops.push_back(N->getOperand(4));
3863     Ops.push_back(Chain);
3864     break;
3865   case NVPTXISD::Suld3DI64Clamp:
3866     Opc = NVPTX::SULD_3D_I64_CLAMP;
3867     Ops.push_back(TexHandle);
3868     Ops.push_back(N->getOperand(2));
3869     Ops.push_back(N->getOperand(3));
3870     Ops.push_back(N->getOperand(4));
3871     Ops.push_back(Chain);
3872     break;
3873   case NVPTXISD::Suld3DV2I8Clamp:
3874     Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3875     Ops.push_back(TexHandle);
3876     Ops.push_back(N->getOperand(2));
3877     Ops.push_back(N->getOperand(3));
3878     Ops.push_back(N->getOperand(4));
3879     Ops.push_back(Chain);
3880     break;
3881   case NVPTXISD::Suld3DV2I16Clamp:
3882     Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3883     Ops.push_back(TexHandle);
3884     Ops.push_back(N->getOperand(2));
3885     Ops.push_back(N->getOperand(3));
3886     Ops.push_back(N->getOperand(4));
3887     Ops.push_back(Chain);
3888     break;
3889   case NVPTXISD::Suld3DV2I32Clamp:
3890     Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3891     Ops.push_back(TexHandle);
3892     Ops.push_back(N->getOperand(2));
3893     Ops.push_back(N->getOperand(3));
3894     Ops.push_back(N->getOperand(4));
3895     Ops.push_back(Chain);
3896     break;
3897   case NVPTXISD::Suld3DV2I64Clamp:
3898     Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3899     Ops.push_back(TexHandle);
3900     Ops.push_back(N->getOperand(2));
3901     Ops.push_back(N->getOperand(3));
3902     Ops.push_back(N->getOperand(4));
3903     Ops.push_back(Chain);
3904     break;
3905   case NVPTXISD::Suld3DV4I8Clamp:
3906     Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3907     Ops.push_back(TexHandle);
3908     Ops.push_back(N->getOperand(2));
3909     Ops.push_back(N->getOperand(3));
3910     Ops.push_back(N->getOperand(4));
3911     Ops.push_back(Chain);
3912     break;
3913   case NVPTXISD::Suld3DV4I16Clamp:
3914     Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3915     Ops.push_back(TexHandle);
3916     Ops.push_back(N->getOperand(2));
3917     Ops.push_back(N->getOperand(3));
3918     Ops.push_back(N->getOperand(4));
3919     Ops.push_back(Chain);
3920     break;
3921   case NVPTXISD::Suld3DV4I32Clamp:
3922     Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3923     Ops.push_back(TexHandle);
3924     Ops.push_back(N->getOperand(2));
3925     Ops.push_back(N->getOperand(3));
3926     Ops.push_back(N->getOperand(4));
3927     Ops.push_back(Chain);
3928     break;
3929   case NVPTXISD::Suld1DI8Trap:
3930     Opc = NVPTX::SULD_1D_I8_TRAP;
3931     Ops.push_back(TexHandle);
3932     Ops.push_back(N->getOperand(2));
3933     Ops.push_back(Chain);
3934     break;
3935   case NVPTXISD::Suld1DI16Trap:
3936     Opc = NVPTX::SULD_1D_I16_TRAP;
3937     Ops.push_back(TexHandle);
3938     Ops.push_back(N->getOperand(2));
3939     Ops.push_back(Chain);
3940     break;
3941   case NVPTXISD::Suld1DI32Trap:
3942     Opc = NVPTX::SULD_1D_I32_TRAP;
3943     Ops.push_back(TexHandle);
3944     Ops.push_back(N->getOperand(2));
3945     Ops.push_back(Chain);
3946     break;
3947   case NVPTXISD::Suld1DI64Trap:
3948     Opc = NVPTX::SULD_1D_I64_TRAP;
3949     Ops.push_back(TexHandle);
3950     Ops.push_back(N->getOperand(2));
3951     Ops.push_back(Chain);
3952     break;
3953   case NVPTXISD::Suld1DV2I8Trap:
3954     Opc = NVPTX::SULD_1D_V2I8_TRAP;
3955     Ops.push_back(TexHandle);
3956     Ops.push_back(N->getOperand(2));
3957     Ops.push_back(Chain);
3958     break;
3959   case NVPTXISD::Suld1DV2I16Trap:
3960     Opc = NVPTX::SULD_1D_V2I16_TRAP;
3961     Ops.push_back(TexHandle);
3962     Ops.push_back(N->getOperand(2));
3963     Ops.push_back(Chain);
3964     break;
3965   case NVPTXISD::Suld1DV2I32Trap:
3966     Opc = NVPTX::SULD_1D_V2I32_TRAP;
3967     Ops.push_back(TexHandle);
3968     Ops.push_back(N->getOperand(2));
3969     Ops.push_back(Chain);
3970     break;
3971   case NVPTXISD::Suld1DV2I64Trap:
3972     Opc = NVPTX::SULD_1D_V2I64_TRAP;
3973     Ops.push_back(TexHandle);
3974     Ops.push_back(N->getOperand(2));
3975     Ops.push_back(Chain);
3976     break;
3977   case NVPTXISD::Suld1DV4I8Trap:
3978     Opc = NVPTX::SULD_1D_V4I8_TRAP;
3979     Ops.push_back(TexHandle);
3980     Ops.push_back(N->getOperand(2));
3981     Ops.push_back(Chain);
3982     break;
3983   case NVPTXISD::Suld1DV4I16Trap:
3984     Opc = NVPTX::SULD_1D_V4I16_TRAP;
3985     Ops.push_back(TexHandle);
3986     Ops.push_back(N->getOperand(2));
3987     Ops.push_back(Chain);
3988     break;
3989   case NVPTXISD::Suld1DV4I32Trap:
3990     Opc = NVPTX::SULD_1D_V4I32_TRAP;
3991     Ops.push_back(TexHandle);
3992     Ops.push_back(N->getOperand(2));
3993     Ops.push_back(Chain);
3994     break;
3995   case NVPTXISD::Suld1DArrayI8Trap:
3996     Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
3997     Ops.push_back(TexHandle);
3998     Ops.push_back(N->getOperand(2));
3999     Ops.push_back(N->getOperand(3));
4000     Ops.push_back(Chain);
4001     break;
4002   case NVPTXISD::Suld1DArrayI16Trap:
4003     Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4004     Ops.push_back(TexHandle);
4005     Ops.push_back(N->getOperand(2));
4006     Ops.push_back(N->getOperand(3));
4007     Ops.push_back(Chain);
4008     break;
4009   case NVPTXISD::Suld1DArrayI32Trap:
4010     Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4011     Ops.push_back(TexHandle);
4012     Ops.push_back(N->getOperand(2));
4013     Ops.push_back(N->getOperand(3));
4014     Ops.push_back(Chain);
4015     break;
4016   case NVPTXISD::Suld1DArrayI64Trap:
4017     Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4018     Ops.push_back(TexHandle);
4019     Ops.push_back(N->getOperand(2));
4020     Ops.push_back(N->getOperand(3));
4021     Ops.push_back(Chain);
4022     break;
4023   case NVPTXISD::Suld1DArrayV2I8Trap:
4024     Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4025     Ops.push_back(TexHandle);
4026     Ops.push_back(N->getOperand(2));
4027     Ops.push_back(N->getOperand(3));
4028     Ops.push_back(Chain);
4029     break;
4030   case NVPTXISD::Suld1DArrayV2I16Trap:
4031     Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4032     Ops.push_back(TexHandle);
4033     Ops.push_back(N->getOperand(2));
4034     Ops.push_back(N->getOperand(3));
4035     Ops.push_back(Chain);
4036     break;
4037   case NVPTXISD::Suld1DArrayV2I32Trap:
4038     Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4039     Ops.push_back(TexHandle);
4040     Ops.push_back(N->getOperand(2));
4041     Ops.push_back(N->getOperand(3));
4042     Ops.push_back(Chain);
4043     break;
4044   case NVPTXISD::Suld1DArrayV2I64Trap:
4045     Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4046     Ops.push_back(TexHandle);
4047     Ops.push_back(N->getOperand(2));
4048     Ops.push_back(N->getOperand(3));
4049     Ops.push_back(Chain);
4050     break;
4051   case NVPTXISD::Suld1DArrayV4I8Trap:
4052     Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4053     Ops.push_back(TexHandle);
4054     Ops.push_back(N->getOperand(2));
4055     Ops.push_back(N->getOperand(3));
4056     Ops.push_back(Chain);
4057     break;
4058   case NVPTXISD::Suld1DArrayV4I16Trap:
4059     Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4060     Ops.push_back(TexHandle);
4061     Ops.push_back(N->getOperand(2));
4062     Ops.push_back(N->getOperand(3));
4063     Ops.push_back(Chain);
4064     break;
4065   case NVPTXISD::Suld1DArrayV4I32Trap:
4066     Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4067     Ops.push_back(TexHandle);
4068     Ops.push_back(N->getOperand(2));
4069     Ops.push_back(N->getOperand(3));
4070     Ops.push_back(Chain);
4071     break;
4072   case NVPTXISD::Suld2DI8Trap:
4073     Opc = NVPTX::SULD_2D_I8_TRAP;
4074     Ops.push_back(TexHandle);
4075     Ops.push_back(N->getOperand(2));
4076     Ops.push_back(N->getOperand(3));
4077     Ops.push_back(Chain);
4078     break;
4079   case NVPTXISD::Suld2DI16Trap:
4080     Opc = NVPTX::SULD_2D_I16_TRAP;
4081     Ops.push_back(TexHandle);
4082     Ops.push_back(N->getOperand(2));
4083     Ops.push_back(N->getOperand(3));
4084     Ops.push_back(Chain);
4085     break;
4086   case NVPTXISD::Suld2DI32Trap:
4087     Opc = NVPTX::SULD_2D_I32_TRAP;
4088     Ops.push_back(TexHandle);
4089     Ops.push_back(N->getOperand(2));
4090     Ops.push_back(N->getOperand(3));
4091     Ops.push_back(Chain);
4092     break;
4093   case NVPTXISD::Suld2DI64Trap:
4094     Opc = NVPTX::SULD_2D_I64_TRAP;
4095     Ops.push_back(TexHandle);
4096     Ops.push_back(N->getOperand(2));
4097     Ops.push_back(N->getOperand(3));
4098     Ops.push_back(Chain);
4099     break;
4100   case NVPTXISD::Suld2DV2I8Trap:
4101     Opc = NVPTX::SULD_2D_V2I8_TRAP;
4102     Ops.push_back(TexHandle);
4103     Ops.push_back(N->getOperand(2));
4104     Ops.push_back(N->getOperand(3));
4105     Ops.push_back(Chain);
4106     break;
4107   case NVPTXISD::Suld2DV2I16Trap:
4108     Opc = NVPTX::SULD_2D_V2I16_TRAP;
4109     Ops.push_back(TexHandle);
4110     Ops.push_back(N->getOperand(2));
4111     Ops.push_back(N->getOperand(3));
4112     Ops.push_back(Chain);
4113     break;
4114   case NVPTXISD::Suld2DV2I32Trap:
4115     Opc = NVPTX::SULD_2D_V2I32_TRAP;
4116     Ops.push_back(TexHandle);
4117     Ops.push_back(N->getOperand(2));
4118     Ops.push_back(N->getOperand(3));
4119     Ops.push_back(Chain);
4120     break;
4121   case NVPTXISD::Suld2DV2I64Trap:
4122     Opc = NVPTX::SULD_2D_V2I64_TRAP;
4123     Ops.push_back(TexHandle);
4124     Ops.push_back(N->getOperand(2));
4125     Ops.push_back(N->getOperand(3));
4126     Ops.push_back(Chain);
4127     break;
4128   case NVPTXISD::Suld2DV4I8Trap:
4129     Opc = NVPTX::SULD_2D_V4I8_TRAP;
4130     Ops.push_back(TexHandle);
4131     Ops.push_back(N->getOperand(2));
4132     Ops.push_back(N->getOperand(3));
4133     Ops.push_back(Chain);
4134     break;
4135   case NVPTXISD::Suld2DV4I16Trap:
4136     Opc = NVPTX::SULD_2D_V4I16_TRAP;
4137     Ops.push_back(TexHandle);
4138     Ops.push_back(N->getOperand(2));
4139     Ops.push_back(N->getOperand(3));
4140     Ops.push_back(Chain);
4141     break;
4142   case NVPTXISD::Suld2DV4I32Trap:
4143     Opc = NVPTX::SULD_2D_V4I32_TRAP;
4144     Ops.push_back(TexHandle);
4145     Ops.push_back(N->getOperand(2));
4146     Ops.push_back(N->getOperand(3));
4147     Ops.push_back(Chain);
4148     break;
4149   case NVPTXISD::Suld2DArrayI8Trap:
4150     Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4151     Ops.push_back(TexHandle);
4152     Ops.push_back(N->getOperand(2));
4153     Ops.push_back(N->getOperand(3));
4154     Ops.push_back(N->getOperand(4));
4155     Ops.push_back(Chain);
4156     break;
4157   case NVPTXISD::Suld2DArrayI16Trap:
4158     Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4159     Ops.push_back(TexHandle);
4160     Ops.push_back(N->getOperand(2));
4161     Ops.push_back(N->getOperand(3));
4162     Ops.push_back(N->getOperand(4));
4163     Ops.push_back(Chain);
4164     break;
4165   case NVPTXISD::Suld2DArrayI32Trap:
4166     Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4167     Ops.push_back(TexHandle);
4168     Ops.push_back(N->getOperand(2));
4169     Ops.push_back(N->getOperand(3));
4170     Ops.push_back(N->getOperand(4));
4171     Ops.push_back(Chain);
4172     break;
4173   case NVPTXISD::Suld2DArrayI64Trap:
4174     Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4175     Ops.push_back(TexHandle);
4176     Ops.push_back(N->getOperand(2));
4177     Ops.push_back(N->getOperand(3));
4178     Ops.push_back(N->getOperand(4));
4179     Ops.push_back(Chain);
4180     break;
4181   case NVPTXISD::Suld2DArrayV2I8Trap:
4182     Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4183     Ops.push_back(TexHandle);
4184     Ops.push_back(N->getOperand(2));
4185     Ops.push_back(N->getOperand(3));
4186     Ops.push_back(N->getOperand(4));
4187     Ops.push_back(Chain);
4188     break;
4189   case NVPTXISD::Suld2DArrayV2I16Trap:
4190     Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4191     Ops.push_back(TexHandle);
4192     Ops.push_back(N->getOperand(2));
4193     Ops.push_back(N->getOperand(3));
4194     Ops.push_back(N->getOperand(4));
4195     Ops.push_back(Chain);
4196     break;
4197   case NVPTXISD::Suld2DArrayV2I32Trap:
4198     Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4199     Ops.push_back(TexHandle);
4200     Ops.push_back(N->getOperand(2));
4201     Ops.push_back(N->getOperand(3));
4202     Ops.push_back(N->getOperand(4));
4203     Ops.push_back(Chain);
4204     break;
4205   case NVPTXISD::Suld2DArrayV2I64Trap:
4206     Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4207     Ops.push_back(TexHandle);
4208     Ops.push_back(N->getOperand(2));
4209     Ops.push_back(N->getOperand(3));
4210     Ops.push_back(N->getOperand(4));
4211     Ops.push_back(Chain);
4212     break;
4213   case NVPTXISD::Suld2DArrayV4I8Trap:
4214     Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4215     Ops.push_back(TexHandle);
4216     Ops.push_back(N->getOperand(2));
4217     Ops.push_back(N->getOperand(3));
4218     Ops.push_back(N->getOperand(4));
4219     Ops.push_back(Chain);
4220     break;
4221   case NVPTXISD::Suld2DArrayV4I16Trap:
4222     Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4223     Ops.push_back(TexHandle);
4224     Ops.push_back(N->getOperand(2));
4225     Ops.push_back(N->getOperand(3));
4226     Ops.push_back(N->getOperand(4));
4227     Ops.push_back(Chain);
4228     break;
4229   case NVPTXISD::Suld2DArrayV4I32Trap:
4230     Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4231     Ops.push_back(TexHandle);
4232     Ops.push_back(N->getOperand(2));
4233     Ops.push_back(N->getOperand(3));
4234     Ops.push_back(N->getOperand(4));
4235     Ops.push_back(Chain);
4236     break;
4237   case NVPTXISD::Suld3DI8Trap:
4238     Opc = NVPTX::SULD_3D_I8_TRAP;
4239     Ops.push_back(TexHandle);
4240     Ops.push_back(N->getOperand(2));
4241     Ops.push_back(N->getOperand(3));
4242     Ops.push_back(N->getOperand(4));
4243     Ops.push_back(Chain);
4244     break;
4245   case NVPTXISD::Suld3DI16Trap:
4246     Opc = NVPTX::SULD_3D_I16_TRAP;
4247     Ops.push_back(TexHandle);
4248     Ops.push_back(N->getOperand(2));
4249     Ops.push_back(N->getOperand(3));
4250     Ops.push_back(N->getOperand(4));
4251     Ops.push_back(Chain);
4252     break;
4253   case NVPTXISD::Suld3DI32Trap:
4254     Opc = NVPTX::SULD_3D_I32_TRAP;
4255     Ops.push_back(TexHandle);
4256     Ops.push_back(N->getOperand(2));
4257     Ops.push_back(N->getOperand(3));
4258     Ops.push_back(N->getOperand(4));
4259     Ops.push_back(Chain);
4260     break;
4261   case NVPTXISD::Suld3DI64Trap:
4262     Opc = NVPTX::SULD_3D_I64_TRAP;
4263     Ops.push_back(TexHandle);
4264     Ops.push_back(N->getOperand(2));
4265     Ops.push_back(N->getOperand(3));
4266     Ops.push_back(N->getOperand(4));
4267     Ops.push_back(Chain);
4268     break;
4269   case NVPTXISD::Suld3DV2I8Trap:
4270     Opc = NVPTX::SULD_3D_V2I8_TRAP;
4271     Ops.push_back(TexHandle);
4272     Ops.push_back(N->getOperand(2));
4273     Ops.push_back(N->getOperand(3));
4274     Ops.push_back(N->getOperand(4));
4275     Ops.push_back(Chain);
4276     break;
4277   case NVPTXISD::Suld3DV2I16Trap:
4278     Opc = NVPTX::SULD_3D_V2I16_TRAP;
4279     Ops.push_back(TexHandle);
4280     Ops.push_back(N->getOperand(2));
4281     Ops.push_back(N->getOperand(3));
4282     Ops.push_back(N->getOperand(4));
4283     Ops.push_back(Chain);
4284     break;
4285   case NVPTXISD::Suld3DV2I32Trap:
4286     Opc = NVPTX::SULD_3D_V2I32_TRAP;
4287     Ops.push_back(TexHandle);
4288     Ops.push_back(N->getOperand(2));
4289     Ops.push_back(N->getOperand(3));
4290     Ops.push_back(N->getOperand(4));
4291     Ops.push_back(Chain);
4292     break;
4293   case NVPTXISD::Suld3DV2I64Trap:
4294     Opc = NVPTX::SULD_3D_V2I64_TRAP;
4295     Ops.push_back(TexHandle);
4296     Ops.push_back(N->getOperand(2));
4297     Ops.push_back(N->getOperand(3));
4298     Ops.push_back(N->getOperand(4));
4299     Ops.push_back(Chain);
4300     break;
4301   case NVPTXISD::Suld3DV4I8Trap:
4302     Opc = NVPTX::SULD_3D_V4I8_TRAP;
4303     Ops.push_back(TexHandle);
4304     Ops.push_back(N->getOperand(2));
4305     Ops.push_back(N->getOperand(3));
4306     Ops.push_back(N->getOperand(4));
4307     Ops.push_back(Chain);
4308     break;
4309   case NVPTXISD::Suld3DV4I16Trap:
4310     Opc = NVPTX::SULD_3D_V4I16_TRAP;
4311     Ops.push_back(TexHandle);
4312     Ops.push_back(N->getOperand(2));
4313     Ops.push_back(N->getOperand(3));
4314     Ops.push_back(N->getOperand(4));
4315     Ops.push_back(Chain);
4316     break;
4317   case NVPTXISD::Suld3DV4I32Trap:
4318     Opc = NVPTX::SULD_3D_V4I32_TRAP;
4319     Ops.push_back(TexHandle);
4320     Ops.push_back(N->getOperand(2));
4321     Ops.push_back(N->getOperand(3));
4322     Ops.push_back(N->getOperand(4));
4323     Ops.push_back(Chain);
4324     break;
4325   case NVPTXISD::Suld1DI8Zero:
4326     Opc = NVPTX::SULD_1D_I8_ZERO;
4327     Ops.push_back(TexHandle);
4328     Ops.push_back(N->getOperand(2));
4329     Ops.push_back(Chain);
4330     break;
4331   case NVPTXISD::Suld1DI16Zero:
4332     Opc = NVPTX::SULD_1D_I16_ZERO;
4333     Ops.push_back(TexHandle);
4334     Ops.push_back(N->getOperand(2));
4335     Ops.push_back(Chain);
4336     break;
4337   case NVPTXISD::Suld1DI32Zero:
4338     Opc = NVPTX::SULD_1D_I32_ZERO;
4339     Ops.push_back(TexHandle);
4340     Ops.push_back(N->getOperand(2));
4341     Ops.push_back(Chain);
4342     break;
4343   case NVPTXISD::Suld1DI64Zero:
4344     Opc = NVPTX::SULD_1D_I64_ZERO;
4345     Ops.push_back(TexHandle);
4346     Ops.push_back(N->getOperand(2));
4347     Ops.push_back(Chain);
4348     break;
4349   case NVPTXISD::Suld1DV2I8Zero:
4350     Opc = NVPTX::SULD_1D_V2I8_ZERO;
4351     Ops.push_back(TexHandle);
4352     Ops.push_back(N->getOperand(2));
4353     Ops.push_back(Chain);
4354     break;
4355   case NVPTXISD::Suld1DV2I16Zero:
4356     Opc = NVPTX::SULD_1D_V2I16_ZERO;
4357     Ops.push_back(TexHandle);
4358     Ops.push_back(N->getOperand(2));
4359     Ops.push_back(Chain);
4360     break;
4361   case NVPTXISD::Suld1DV2I32Zero:
4362     Opc = NVPTX::SULD_1D_V2I32_ZERO;
4363     Ops.push_back(TexHandle);
4364     Ops.push_back(N->getOperand(2));
4365     Ops.push_back(Chain);
4366     break;
4367   case NVPTXISD::Suld1DV2I64Zero:
4368     Opc = NVPTX::SULD_1D_V2I64_ZERO;
4369     Ops.push_back(TexHandle);
4370     Ops.push_back(N->getOperand(2));
4371     Ops.push_back(Chain);
4372     break;
4373   case NVPTXISD::Suld1DV4I8Zero:
4374     Opc = NVPTX::SULD_1D_V4I8_ZERO;
4375     Ops.push_back(TexHandle);
4376     Ops.push_back(N->getOperand(2));
4377     Ops.push_back(Chain);
4378     break;
4379   case NVPTXISD::Suld1DV4I16Zero:
4380     Opc = NVPTX::SULD_1D_V4I16_ZERO;
4381     Ops.push_back(TexHandle);
4382     Ops.push_back(N->getOperand(2));
4383     Ops.push_back(Chain);
4384     break;
4385   case NVPTXISD::Suld1DV4I32Zero:
4386     Opc = NVPTX::SULD_1D_V4I32_ZERO;
4387     Ops.push_back(TexHandle);
4388     Ops.push_back(N->getOperand(2));
4389     Ops.push_back(Chain);
4390     break;
4391   case NVPTXISD::Suld1DArrayI8Zero:
4392     Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4393     Ops.push_back(TexHandle);
4394     Ops.push_back(N->getOperand(2));
4395     Ops.push_back(N->getOperand(3));
4396     Ops.push_back(Chain);
4397     break;
4398   case NVPTXISD::Suld1DArrayI16Zero:
4399     Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4400     Ops.push_back(TexHandle);
4401     Ops.push_back(N->getOperand(2));
4402     Ops.push_back(N->getOperand(3));
4403     Ops.push_back(Chain);
4404     break;
4405   case NVPTXISD::Suld1DArrayI32Zero:
4406     Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4407     Ops.push_back(TexHandle);
4408     Ops.push_back(N->getOperand(2));
4409     Ops.push_back(N->getOperand(3));
4410     Ops.push_back(Chain);
4411     break;
4412   case NVPTXISD::Suld1DArrayI64Zero:
4413     Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4414     Ops.push_back(TexHandle);
4415     Ops.push_back(N->getOperand(2));
4416     Ops.push_back(N->getOperand(3));
4417     Ops.push_back(Chain);
4418     break;
4419   case NVPTXISD::Suld1DArrayV2I8Zero:
4420     Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4421     Ops.push_back(TexHandle);
4422     Ops.push_back(N->getOperand(2));
4423     Ops.push_back(N->getOperand(3));
4424     Ops.push_back(Chain);
4425     break;
4426   case NVPTXISD::Suld1DArrayV2I16Zero:
4427     Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4428     Ops.push_back(TexHandle);
4429     Ops.push_back(N->getOperand(2));
4430     Ops.push_back(N->getOperand(3));
4431     Ops.push_back(Chain);
4432     break;
4433   case NVPTXISD::Suld1DArrayV2I32Zero:
4434     Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4435     Ops.push_back(TexHandle);
4436     Ops.push_back(N->getOperand(2));
4437     Ops.push_back(N->getOperand(3));
4438     Ops.push_back(Chain);
4439     break;
4440   case NVPTXISD::Suld1DArrayV2I64Zero:
4441     Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4442     Ops.push_back(TexHandle);
4443     Ops.push_back(N->getOperand(2));
4444     Ops.push_back(N->getOperand(3));
4445     Ops.push_back(Chain);
4446     break;
4447   case NVPTXISD::Suld1DArrayV4I8Zero:
4448     Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4449     Ops.push_back(TexHandle);
4450     Ops.push_back(N->getOperand(2));
4451     Ops.push_back(N->getOperand(3));
4452     Ops.push_back(Chain);
4453     break;
4454   case NVPTXISD::Suld1DArrayV4I16Zero:
4455     Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4456     Ops.push_back(TexHandle);
4457     Ops.push_back(N->getOperand(2));
4458     Ops.push_back(N->getOperand(3));
4459     Ops.push_back(Chain);
4460     break;
4461   case NVPTXISD::Suld1DArrayV4I32Zero:
4462     Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4463     Ops.push_back(TexHandle);
4464     Ops.push_back(N->getOperand(2));
4465     Ops.push_back(N->getOperand(3));
4466     Ops.push_back(Chain);
4467     break;
4468   case NVPTXISD::Suld2DI8Zero:
4469     Opc = NVPTX::SULD_2D_I8_ZERO;
4470     Ops.push_back(TexHandle);
4471     Ops.push_back(N->getOperand(2));
4472     Ops.push_back(N->getOperand(3));
4473     Ops.push_back(Chain);
4474     break;
4475   case NVPTXISD::Suld2DI16Zero:
4476     Opc = NVPTX::SULD_2D_I16_ZERO;
4477     Ops.push_back(TexHandle);
4478     Ops.push_back(N->getOperand(2));
4479     Ops.push_back(N->getOperand(3));
4480     Ops.push_back(Chain);
4481     break;
4482   case NVPTXISD::Suld2DI32Zero:
4483     Opc = NVPTX::SULD_2D_I32_ZERO;
4484     Ops.push_back(TexHandle);
4485     Ops.push_back(N->getOperand(2));
4486     Ops.push_back(N->getOperand(3));
4487     Ops.push_back(Chain);
4488     break;
4489   case NVPTXISD::Suld2DI64Zero:
4490     Opc = NVPTX::SULD_2D_I64_ZERO;
4491     Ops.push_back(TexHandle);
4492     Ops.push_back(N->getOperand(2));
4493     Ops.push_back(N->getOperand(3));
4494     Ops.push_back(Chain);
4495     break;
4496   case NVPTXISD::Suld2DV2I8Zero:
4497     Opc = NVPTX::SULD_2D_V2I8_ZERO;
4498     Ops.push_back(TexHandle);
4499     Ops.push_back(N->getOperand(2));
4500     Ops.push_back(N->getOperand(3));
4501     Ops.push_back(Chain);
4502     break;
4503   case NVPTXISD::Suld2DV2I16Zero:
4504     Opc = NVPTX::SULD_2D_V2I16_ZERO;
4505     Ops.push_back(TexHandle);
4506     Ops.push_back(N->getOperand(2));
4507     Ops.push_back(N->getOperand(3));
4508     Ops.push_back(Chain);
4509     break;
4510   case NVPTXISD::Suld2DV2I32Zero:
4511     Opc = NVPTX::SULD_2D_V2I32_ZERO;
4512     Ops.push_back(TexHandle);
4513     Ops.push_back(N->getOperand(2));
4514     Ops.push_back(N->getOperand(3));
4515     Ops.push_back(Chain);
4516     break;
4517   case NVPTXISD::Suld2DV2I64Zero:
4518     Opc = NVPTX::SULD_2D_V2I64_ZERO;
4519     Ops.push_back(TexHandle);
4520     Ops.push_back(N->getOperand(2));
4521     Ops.push_back(N->getOperand(3));
4522     Ops.push_back(Chain);
4523     break;
4524   case NVPTXISD::Suld2DV4I8Zero:
4525     Opc = NVPTX::SULD_2D_V4I8_ZERO;
4526     Ops.push_back(TexHandle);
4527     Ops.push_back(N->getOperand(2));
4528     Ops.push_back(N->getOperand(3));
4529     Ops.push_back(Chain);
4530     break;
4531   case NVPTXISD::Suld2DV4I16Zero:
4532     Opc = NVPTX::SULD_2D_V4I16_ZERO;
4533     Ops.push_back(TexHandle);
4534     Ops.push_back(N->getOperand(2));
4535     Ops.push_back(N->getOperand(3));
4536     Ops.push_back(Chain);
4537     break;
4538   case NVPTXISD::Suld2DV4I32Zero:
4539     Opc = NVPTX::SULD_2D_V4I32_ZERO;
4540     Ops.push_back(TexHandle);
4541     Ops.push_back(N->getOperand(2));
4542     Ops.push_back(N->getOperand(3));
4543     Ops.push_back(Chain);
4544     break;
4545   case NVPTXISD::Suld2DArrayI8Zero:
4546     Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4547     Ops.push_back(TexHandle);
4548     Ops.push_back(N->getOperand(2));
4549     Ops.push_back(N->getOperand(3));
4550     Ops.push_back(N->getOperand(4));
4551     Ops.push_back(Chain);
4552     break;
4553   case NVPTXISD::Suld2DArrayI16Zero:
4554     Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4555     Ops.push_back(TexHandle);
4556     Ops.push_back(N->getOperand(2));
4557     Ops.push_back(N->getOperand(3));
4558     Ops.push_back(N->getOperand(4));
4559     Ops.push_back(Chain);
4560     break;
4561   case NVPTXISD::Suld2DArrayI32Zero:
4562     Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4563     Ops.push_back(TexHandle);
4564     Ops.push_back(N->getOperand(2));
4565     Ops.push_back(N->getOperand(3));
4566     Ops.push_back(N->getOperand(4));
4567     Ops.push_back(Chain);
4568     break;
4569   case NVPTXISD::Suld2DArrayI64Zero:
4570     Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4571     Ops.push_back(TexHandle);
4572     Ops.push_back(N->getOperand(2));
4573     Ops.push_back(N->getOperand(3));
4574     Ops.push_back(N->getOperand(4));
4575     Ops.push_back(Chain);
4576     break;
4577   case NVPTXISD::Suld2DArrayV2I8Zero:
4578     Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4579     Ops.push_back(TexHandle);
4580     Ops.push_back(N->getOperand(2));
4581     Ops.push_back(N->getOperand(3));
4582     Ops.push_back(N->getOperand(4));
4583     Ops.push_back(Chain);
4584     break;
4585   case NVPTXISD::Suld2DArrayV2I16Zero:
4586     Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4587     Ops.push_back(TexHandle);
4588     Ops.push_back(N->getOperand(2));
4589     Ops.push_back(N->getOperand(3));
4590     Ops.push_back(N->getOperand(4));
4591     Ops.push_back(Chain);
4592     break;
4593   case NVPTXISD::Suld2DArrayV2I32Zero:
4594     Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4595     Ops.push_back(TexHandle);
4596     Ops.push_back(N->getOperand(2));
4597     Ops.push_back(N->getOperand(3));
4598     Ops.push_back(N->getOperand(4));
4599     Ops.push_back(Chain);
4600     break;
4601   case NVPTXISD::Suld2DArrayV2I64Zero:
4602     Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4603     Ops.push_back(TexHandle);
4604     Ops.push_back(N->getOperand(2));
4605     Ops.push_back(N->getOperand(3));
4606     Ops.push_back(N->getOperand(4));
4607     Ops.push_back(Chain);
4608     break;
4609   case NVPTXISD::Suld2DArrayV4I8Zero:
4610     Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4611     Ops.push_back(TexHandle);
4612     Ops.push_back(N->getOperand(2));
4613     Ops.push_back(N->getOperand(3));
4614     Ops.push_back(N->getOperand(4));
4615     Ops.push_back(Chain);
4616     break;
4617   case NVPTXISD::Suld2DArrayV4I16Zero:
4618     Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4619     Ops.push_back(TexHandle);
4620     Ops.push_back(N->getOperand(2));
4621     Ops.push_back(N->getOperand(3));
4622     Ops.push_back(N->getOperand(4));
4623     Ops.push_back(Chain);
4624     break;
4625   case NVPTXISD::Suld2DArrayV4I32Zero:
4626     Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4627     Ops.push_back(TexHandle);
4628     Ops.push_back(N->getOperand(2));
4629     Ops.push_back(N->getOperand(3));
4630     Ops.push_back(N->getOperand(4));
4631     Ops.push_back(Chain);
4632     break;
4633   case NVPTXISD::Suld3DI8Zero:
4634     Opc = NVPTX::SULD_3D_I8_ZERO;
4635     Ops.push_back(TexHandle);
4636     Ops.push_back(N->getOperand(2));
4637     Ops.push_back(N->getOperand(3));
4638     Ops.push_back(N->getOperand(4));
4639     Ops.push_back(Chain);
4640     break;
4641   case NVPTXISD::Suld3DI16Zero:
4642     Opc = NVPTX::SULD_3D_I16_ZERO;
4643     Ops.push_back(TexHandle);
4644     Ops.push_back(N->getOperand(2));
4645     Ops.push_back(N->getOperand(3));
4646     Ops.push_back(N->getOperand(4));
4647     Ops.push_back(Chain);
4648     break;
4649   case NVPTXISD::Suld3DI32Zero:
4650     Opc = NVPTX::SULD_3D_I32_ZERO;
4651     Ops.push_back(TexHandle);
4652     Ops.push_back(N->getOperand(2));
4653     Ops.push_back(N->getOperand(3));
4654     Ops.push_back(N->getOperand(4));
4655     Ops.push_back(Chain);
4656     break;
4657   case NVPTXISD::Suld3DI64Zero:
4658     Opc = NVPTX::SULD_3D_I64_ZERO;
4659     Ops.push_back(TexHandle);
4660     Ops.push_back(N->getOperand(2));
4661     Ops.push_back(N->getOperand(3));
4662     Ops.push_back(N->getOperand(4));
4663     Ops.push_back(Chain);
4664     break;
4665   case NVPTXISD::Suld3DV2I8Zero:
4666     Opc = NVPTX::SULD_3D_V2I8_ZERO;
4667     Ops.push_back(TexHandle);
4668     Ops.push_back(N->getOperand(2));
4669     Ops.push_back(N->getOperand(3));
4670     Ops.push_back(N->getOperand(4));
4671     Ops.push_back(Chain);
4672     break;
4673   case NVPTXISD::Suld3DV2I16Zero:
4674     Opc = NVPTX::SULD_3D_V2I16_ZERO;
4675     Ops.push_back(TexHandle);
4676     Ops.push_back(N->getOperand(2));
4677     Ops.push_back(N->getOperand(3));
4678     Ops.push_back(N->getOperand(4));
4679     Ops.push_back(Chain);
4680     break;
4681   case NVPTXISD::Suld3DV2I32Zero:
4682     Opc = NVPTX::SULD_3D_V2I32_ZERO;
4683     Ops.push_back(TexHandle);
4684     Ops.push_back(N->getOperand(2));
4685     Ops.push_back(N->getOperand(3));
4686     Ops.push_back(N->getOperand(4));
4687     Ops.push_back(Chain);
4688     break;
4689   case NVPTXISD::Suld3DV2I64Zero:
4690     Opc = NVPTX::SULD_3D_V2I64_ZERO;
4691     Ops.push_back(TexHandle);
4692     Ops.push_back(N->getOperand(2));
4693     Ops.push_back(N->getOperand(3));
4694     Ops.push_back(N->getOperand(4));
4695     Ops.push_back(Chain);
4696     break;
4697   case NVPTXISD::Suld3DV4I8Zero:
4698     Opc = NVPTX::SULD_3D_V4I8_ZERO;
4699     Ops.push_back(TexHandle);
4700     Ops.push_back(N->getOperand(2));
4701     Ops.push_back(N->getOperand(3));
4702     Ops.push_back(N->getOperand(4));
4703     Ops.push_back(Chain);
4704     break;
4705   case NVPTXISD::Suld3DV4I16Zero:
4706     Opc = NVPTX::SULD_3D_V4I16_ZERO;
4707     Ops.push_back(TexHandle);
4708     Ops.push_back(N->getOperand(2));
4709     Ops.push_back(N->getOperand(3));
4710     Ops.push_back(N->getOperand(4));
4711     Ops.push_back(Chain);
4712     break;
4713   case NVPTXISD::Suld3DV4I32Zero:
4714     Opc = NVPTX::SULD_3D_V4I32_ZERO;
4715     Ops.push_back(TexHandle);
4716     Ops.push_back(N->getOperand(2));
4717     Ops.push_back(N->getOperand(3));
4718     Ops.push_back(N->getOperand(4));
4719     Ops.push_back(Chain);
4720     break;
4721   }
4722   Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4723   return Ret;
4724 }
4725 
4726 
4727 /// SelectBFE - Look for instruction sequences that can be made more efficient
4728 /// by using the 'bfe' (bit-field extract) PTX instruction
SelectBFE(SDNode * N)4729 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
4730   SDValue LHS = N->getOperand(0);
4731   SDValue RHS = N->getOperand(1);
4732   SDValue Len;
4733   SDValue Start;
4734   SDValue Val;
4735   bool IsSigned = false;
4736 
4737   if (N->getOpcode() == ISD::AND) {
4738     // Canonicalize the operands
4739     // We want 'and %val, %mask'
4740     if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4741       std::swap(LHS, RHS);
4742     }
4743 
4744     ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4745     if (!Mask) {
4746       // We need a constant mask on the RHS of the AND
4747       return NULL;
4748     }
4749 
4750     // Extract the mask bits
4751     uint64_t MaskVal = Mask->getZExtValue();
4752     if (!isMask_64(MaskVal)) {
4753       // We *could* handle shifted masks here, but doing so would require an
4754       // 'and' operation to fix up the low-order bits so we would trade
4755       // shr+and for bfe+and, which has the same throughput
4756       return NULL;
4757     }
4758 
4759     // How many bits are in our mask?
4760     uint64_t NumBits = countTrailingOnes(MaskVal);
4761     Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4762 
4763     if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4764       // We have a 'srl/and' pair, extract the effective start bit and length
4765       Val = LHS.getNode()->getOperand(0);
4766       Start = LHS.getNode()->getOperand(1);
4767       ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4768       if (StartConst) {
4769         uint64_t StartVal = StartConst->getZExtValue();
4770         // How many "good" bits do we have left?  "good" is defined here as bits
4771         // that exist in the original value, not shifted in.
4772         uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4773         if (NumBits > GoodBits) {
4774           // Do not handle the case where bits have been shifted in. In theory
4775           // we could handle this, but the cost is likely higher than just
4776           // emitting the srl/and pair.
4777           return NULL;
4778         }
4779         Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
4780       } else {
4781         // Do not handle the case where the shift amount (can be zero if no srl
4782         // was found) is not constant. We could handle this case, but it would
4783         // require run-time logic that would be more expensive than just
4784         // emitting the srl/and pair.
4785         return NULL;
4786       }
4787     } else {
4788       // Do not handle the case where the LHS of the and is not a shift. While
4789       // it would be trivial to handle this case, it would just transform
4790       // 'and' -> 'bfe', but 'and' has higher-throughput.
4791       return NULL;
4792     }
4793   } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4794     if (LHS->getOpcode() == ISD::AND) {
4795       ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4796       if (!ShiftCnst) {
4797         // Shift amount must be constant
4798         return NULL;
4799       }
4800 
4801       uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4802 
4803       SDValue AndLHS = LHS->getOperand(0);
4804       SDValue AndRHS = LHS->getOperand(1);
4805 
4806       // Canonicalize the AND to have the mask on the RHS
4807       if (isa<ConstantSDNode>(AndLHS)) {
4808         std::swap(AndLHS, AndRHS);
4809       }
4810 
4811       ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4812       if (!MaskCnst) {
4813         // Mask must be constant
4814         return NULL;
4815       }
4816 
4817       uint64_t MaskVal = MaskCnst->getZExtValue();
4818       uint64_t NumZeros;
4819       uint64_t NumBits;
4820       if (isMask_64(MaskVal)) {
4821         NumZeros = 0;
4822         // The number of bits in the result bitfield will be the number of
4823         // trailing ones (the AND) minus the number of bits we shift off
4824         NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4825       } else if (isShiftedMask_64(MaskVal)) {
4826         NumZeros = countTrailingZeros(MaskVal);
4827         unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4828         // The number of bits in the result bitfield will be the number of
4829         // trailing zeros plus the number of set bits in the mask minus the
4830         // number of bits we shift off
4831         NumBits = NumZeros + NumOnes - ShiftAmt;
4832       } else {
4833         // This is not a mask we can handle
4834         return NULL;
4835       }
4836 
4837       if (ShiftAmt < NumZeros) {
4838         // Handling this case would require extra logic that would make this
4839         // transformation non-profitable
4840         return NULL;
4841       }
4842 
4843       Val = AndLHS;
4844       Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
4845       Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4846     } else if (LHS->getOpcode() == ISD::SHL) {
4847       // Here, we have a pattern like:
4848       //
4849       // (sra (shl val, NN), MM)
4850       // or
4851       // (srl (shl val, NN), MM)
4852       //
4853       // If MM >= NN, we can efficiently optimize this with bfe
4854       Val = LHS->getOperand(0);
4855 
4856       SDValue ShlRHS = LHS->getOperand(1);
4857       ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4858       if (!ShlCnst) {
4859         // Shift amount must be constant
4860         return NULL;
4861       }
4862       uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4863 
4864       SDValue ShrRHS = RHS;
4865       ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4866       if (!ShrCnst) {
4867         // Shift amount must be constant
4868         return NULL;
4869       }
4870       uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4871 
4872       // To avoid extra codegen and be profitable, we need Outer >= Inner
4873       if (OuterShiftAmt < InnerShiftAmt) {
4874         return NULL;
4875       }
4876 
4877       // If the outer shift is more than the type size, we have no bitfield to
4878       // extract (since we also check that the inner shift is <= the outer shift
4879       // then this also implies that the inner shift is < the type size)
4880       if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4881         return NULL;
4882       }
4883 
4884       Start =
4885         CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
4886       Len =
4887         CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
4888                                   OuterShiftAmt, MVT::i32);
4889 
4890       if (N->getOpcode() == ISD::SRA) {
4891         // If we have a arithmetic right shift, we need to use the signed bfe
4892         // variant
4893         IsSigned = true;
4894       }
4895     } else {
4896       // No can do...
4897       return NULL;
4898     }
4899   } else {
4900     // No can do...
4901     return NULL;
4902   }
4903 
4904 
4905   unsigned Opc;
4906   // For the BFE operations we form here from "and" and "srl", always use the
4907   // unsigned variants.
4908   if (Val.getValueType() == MVT::i32) {
4909     if (IsSigned) {
4910       Opc = NVPTX::BFE_S32rii;
4911     } else {
4912       Opc = NVPTX::BFE_U32rii;
4913     }
4914   } else if (Val.getValueType() == MVT::i64) {
4915     if (IsSigned) {
4916       Opc = NVPTX::BFE_S64rii;
4917     } else {
4918       Opc = NVPTX::BFE_U64rii;
4919     }
4920   } else {
4921     // We cannot handle this type
4922     return NULL;
4923   }
4924 
4925   SDValue Ops[] = {
4926     Val, Start, Len
4927   };
4928 
4929   SDNode *Ret =
4930     CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4931 
4932   return Ret;
4933 }
4934 
4935 // SelectDirectAddr - Match a direct address for DAG.
4936 // A direct address could be a globaladdress or externalsymbol.
SelectDirectAddr(SDValue N,SDValue & Address)4937 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4938   // Return true if TGA or ES.
4939   if (N.getOpcode() == ISD::TargetGlobalAddress ||
4940       N.getOpcode() == ISD::TargetExternalSymbol) {
4941     Address = N;
4942     return true;
4943   }
4944   if (N.getOpcode() == NVPTXISD::Wrapper) {
4945     Address = N.getOperand(0);
4946     return true;
4947   }
4948   if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
4949     unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
4950     if (IID == Intrinsic::nvvm_ptr_gen_to_param)
4951       if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
4952         return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
4953   }
4954   return false;
4955 }
4956 
4957 // symbol+offset
SelectADDRsi_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)4958 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
4959     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4960   if (Addr.getOpcode() == ISD::ADD) {
4961     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
4962       SDValue base = Addr.getOperand(0);
4963       if (SelectDirectAddr(base, Base)) {
4964         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
4965         return true;
4966       }
4967     }
4968   }
4969   return false;
4970 }
4971 
4972 // symbol+offset
SelectADDRsi(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)4973 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
4974                                      SDValue &Base, SDValue &Offset) {
4975   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
4976 }
4977 
4978 // symbol+offset
SelectADDRsi64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)4979 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
4980                                        SDValue &Base, SDValue &Offset) {
4981   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
4982 }
4983 
4984 // register+offset
SelectADDRri_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)4985 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
4986     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4987   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
4988     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
4989     Offset = CurDAG->getTargetConstant(0, mvt);
4990     return true;
4991   }
4992   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
4993       Addr.getOpcode() == ISD::TargetGlobalAddress)
4994     return false; // direct calls.
4995 
4996   if (Addr.getOpcode() == ISD::ADD) {
4997     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
4998       return false;
4999     }
5000     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5001       if (FrameIndexSDNode *FIN =
5002               dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5003         // Constant offset from frame ref.
5004         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5005       else
5006         Base = Addr.getOperand(0);
5007       Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
5008       return true;
5009     }
5010   }
5011   return false;
5012 }
5013 
5014 // register+offset
SelectADDRri(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5015 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5016                                      SDValue &Base, SDValue &Offset) {
5017   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5018 }
5019 
5020 // register+offset
SelectADDRri64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5021 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5022                                        SDValue &Base, SDValue &Offset) {
5023   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5024 }
5025 
ChkMemSDNodeAddressSpace(SDNode * N,unsigned int spN) const5026 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5027                                                  unsigned int spN) const {
5028   const Value *Src = nullptr;
5029   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5030     if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5031       return true;
5032     Src = mN->getMemOperand()->getValue();
5033   }
5034   if (!Src)
5035     return false;
5036   if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
5037     return (PT->getAddressSpace() == spN);
5038   return false;
5039 }
5040 
5041 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5042 /// inline asm expressions.
SelectInlineAsmMemoryOperand(const SDValue & Op,unsigned ConstraintID,std::vector<SDValue> & OutOps)5043 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5044     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5045   SDValue Op0, Op1;
5046   switch (ConstraintID) {
5047   default:
5048     return true;
5049   case InlineAsm::Constraint_m: // memory
5050     if (SelectDirectAddr(Op, Op0)) {
5051       OutOps.push_back(Op0);
5052       OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
5053       return false;
5054     }
5055     if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5056       OutOps.push_back(Op0);
5057       OutOps.push_back(Op1);
5058       return false;
5059     }
5060     break;
5061   }
5062   return true;
5063 }
5064