1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXISelDAGToDAG.h"
15 #include "NVPTXUtilities.h"
16 #include "llvm/Analysis/ValueTracking.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include "llvm/Target/TargetIntrinsicInfo.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "nvptx-isel"
28 
29 static cl::opt<int> UsePrecDivF32(
30     "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
31     cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
32              " IEEE Compliant F32 div.rnd if available."),
33     cl::init(2));
34 
35 static cl::opt<bool>
36 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
37           cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
38           cl::init(true));
39 
40 static cl::opt<bool>
41 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
42            cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
43            cl::init(false));
44 
45 
46 /// createNVPTXISelDag - This pass converts a legalized DAG into a
47 /// NVPTX-specific DAG, ready for instruction scheduling.
createNVPTXISelDag(NVPTXTargetMachine & TM,llvm::CodeGenOpt::Level OptLevel)48 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
49                                        llvm::CodeGenOpt::Level OptLevel) {
50   return new NVPTXDAGToDAGISel(TM, OptLevel);
51 }
52 
NVPTXDAGToDAGISel(NVPTXTargetMachine & tm,CodeGenOpt::Level OptLevel)53 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
54                                      CodeGenOpt::Level OptLevel)
55     : SelectionDAGISel(tm, OptLevel), TM(tm) {
56   doMulWide = (OptLevel > 0);
57 }
58 
runOnMachineFunction(MachineFunction & MF)59 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
60     Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
61     return SelectionDAGISel::runOnMachineFunction(MF);
62 }
63 
getDivF32Level() const64 int NVPTXDAGToDAGISel::getDivF32Level() const {
65   if (UsePrecDivF32.getNumOccurrences() > 0) {
66     // If nvptx-prec-div32=N is used on the command-line, always honor it
67     return UsePrecDivF32;
68   } else {
69     // Otherwise, use div.approx if fast math is enabled
70     if (TM.Options.UnsafeFPMath)
71       return 0;
72     else
73       return 2;
74   }
75 }
76 
usePrecSqrtF32() const77 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
78   if (UsePrecSqrtF32.getNumOccurrences() > 0) {
79     // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
80     return UsePrecSqrtF32;
81   } else {
82     // Otherwise, use sqrt.approx if fast math is enabled
83     return !TM.Options.UnsafeFPMath;
84   }
85 }
86 
useF32FTZ() const87 bool NVPTXDAGToDAGISel::useF32FTZ() const {
88   if (FtzEnabled.getNumOccurrences() > 0) {
89     // If nvptx-f32ftz is used on the command-line, always honor it
90     return FtzEnabled;
91   } else {
92     const Function *F = MF->getFunction();
93     // Otherwise, check for an nvptx-f32ftz attribute on the function
94     if (F->hasFnAttribute("nvptx-f32ftz"))
95       return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
96     else
97       return false;
98   }
99 }
100 
allowFMA() const101 bool NVPTXDAGToDAGISel::allowFMA() const {
102   const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
103   return TL->allowFMA(*MF, OptLevel);
104 }
105 
106 /// Select - Select instructions not customized! Used for
107 /// expanded, promoted and normal instructions.
Select(SDNode * N)108 void NVPTXDAGToDAGISel::Select(SDNode *N) {
109 
110   if (N->isMachineOpcode()) {
111     N->setNodeId(-1);
112     return; // Already selected.
113   }
114 
115   switch (N->getOpcode()) {
116   case ISD::LOAD:
117     if (tryLoad(N))
118       return;
119     break;
120   case ISD::STORE:
121     if (tryStore(N))
122       return;
123     break;
124   case NVPTXISD::LoadV2:
125   case NVPTXISD::LoadV4:
126     if (tryLoadVector(N))
127       return;
128     break;
129   case NVPTXISD::LDGV2:
130   case NVPTXISD::LDGV4:
131   case NVPTXISD::LDUV2:
132   case NVPTXISD::LDUV4:
133     if (tryLDGLDU(N))
134       return;
135     break;
136   case NVPTXISD::StoreV2:
137   case NVPTXISD::StoreV4:
138     if (tryStoreVector(N))
139       return;
140     break;
141   case NVPTXISD::LoadParam:
142   case NVPTXISD::LoadParamV2:
143   case NVPTXISD::LoadParamV4:
144     if (tryLoadParam(N))
145       return;
146     break;
147   case NVPTXISD::StoreRetval:
148   case NVPTXISD::StoreRetvalV2:
149   case NVPTXISD::StoreRetvalV4:
150     if (tryStoreRetval(N))
151       return;
152     break;
153   case NVPTXISD::StoreParam:
154   case NVPTXISD::StoreParamV2:
155   case NVPTXISD::StoreParamV4:
156   case NVPTXISD::StoreParamS32:
157   case NVPTXISD::StoreParamU32:
158     if (tryStoreParam(N))
159       return;
160     break;
161   case ISD::INTRINSIC_WO_CHAIN:
162     if (tryIntrinsicNoChain(N))
163       return;
164     break;
165   case ISD::INTRINSIC_W_CHAIN:
166     if (tryIntrinsicChain(N))
167       return;
168     break;
169   case NVPTXISD::Tex1DFloatS32:
170   case NVPTXISD::Tex1DFloatFloat:
171   case NVPTXISD::Tex1DFloatFloatLevel:
172   case NVPTXISD::Tex1DFloatFloatGrad:
173   case NVPTXISD::Tex1DS32S32:
174   case NVPTXISD::Tex1DS32Float:
175   case NVPTXISD::Tex1DS32FloatLevel:
176   case NVPTXISD::Tex1DS32FloatGrad:
177   case NVPTXISD::Tex1DU32S32:
178   case NVPTXISD::Tex1DU32Float:
179   case NVPTXISD::Tex1DU32FloatLevel:
180   case NVPTXISD::Tex1DU32FloatGrad:
181   case NVPTXISD::Tex1DArrayFloatS32:
182   case NVPTXISD::Tex1DArrayFloatFloat:
183   case NVPTXISD::Tex1DArrayFloatFloatLevel:
184   case NVPTXISD::Tex1DArrayFloatFloatGrad:
185   case NVPTXISD::Tex1DArrayS32S32:
186   case NVPTXISD::Tex1DArrayS32Float:
187   case NVPTXISD::Tex1DArrayS32FloatLevel:
188   case NVPTXISD::Tex1DArrayS32FloatGrad:
189   case NVPTXISD::Tex1DArrayU32S32:
190   case NVPTXISD::Tex1DArrayU32Float:
191   case NVPTXISD::Tex1DArrayU32FloatLevel:
192   case NVPTXISD::Tex1DArrayU32FloatGrad:
193   case NVPTXISD::Tex2DFloatS32:
194   case NVPTXISD::Tex2DFloatFloat:
195   case NVPTXISD::Tex2DFloatFloatLevel:
196   case NVPTXISD::Tex2DFloatFloatGrad:
197   case NVPTXISD::Tex2DS32S32:
198   case NVPTXISD::Tex2DS32Float:
199   case NVPTXISD::Tex2DS32FloatLevel:
200   case NVPTXISD::Tex2DS32FloatGrad:
201   case NVPTXISD::Tex2DU32S32:
202   case NVPTXISD::Tex2DU32Float:
203   case NVPTXISD::Tex2DU32FloatLevel:
204   case NVPTXISD::Tex2DU32FloatGrad:
205   case NVPTXISD::Tex2DArrayFloatS32:
206   case NVPTXISD::Tex2DArrayFloatFloat:
207   case NVPTXISD::Tex2DArrayFloatFloatLevel:
208   case NVPTXISD::Tex2DArrayFloatFloatGrad:
209   case NVPTXISD::Tex2DArrayS32S32:
210   case NVPTXISD::Tex2DArrayS32Float:
211   case NVPTXISD::Tex2DArrayS32FloatLevel:
212   case NVPTXISD::Tex2DArrayS32FloatGrad:
213   case NVPTXISD::Tex2DArrayU32S32:
214   case NVPTXISD::Tex2DArrayU32Float:
215   case NVPTXISD::Tex2DArrayU32FloatLevel:
216   case NVPTXISD::Tex2DArrayU32FloatGrad:
217   case NVPTXISD::Tex3DFloatS32:
218   case NVPTXISD::Tex3DFloatFloat:
219   case NVPTXISD::Tex3DFloatFloatLevel:
220   case NVPTXISD::Tex3DFloatFloatGrad:
221   case NVPTXISD::Tex3DS32S32:
222   case NVPTXISD::Tex3DS32Float:
223   case NVPTXISD::Tex3DS32FloatLevel:
224   case NVPTXISD::Tex3DS32FloatGrad:
225   case NVPTXISD::Tex3DU32S32:
226   case NVPTXISD::Tex3DU32Float:
227   case NVPTXISD::Tex3DU32FloatLevel:
228   case NVPTXISD::Tex3DU32FloatGrad:
229   case NVPTXISD::TexCubeFloatFloat:
230   case NVPTXISD::TexCubeFloatFloatLevel:
231   case NVPTXISD::TexCubeS32Float:
232   case NVPTXISD::TexCubeS32FloatLevel:
233   case NVPTXISD::TexCubeU32Float:
234   case NVPTXISD::TexCubeU32FloatLevel:
235   case NVPTXISD::TexCubeArrayFloatFloat:
236   case NVPTXISD::TexCubeArrayFloatFloatLevel:
237   case NVPTXISD::TexCubeArrayS32Float:
238   case NVPTXISD::TexCubeArrayS32FloatLevel:
239   case NVPTXISD::TexCubeArrayU32Float:
240   case NVPTXISD::TexCubeArrayU32FloatLevel:
241   case NVPTXISD::Tld4R2DFloatFloat:
242   case NVPTXISD::Tld4G2DFloatFloat:
243   case NVPTXISD::Tld4B2DFloatFloat:
244   case NVPTXISD::Tld4A2DFloatFloat:
245   case NVPTXISD::Tld4R2DS64Float:
246   case NVPTXISD::Tld4G2DS64Float:
247   case NVPTXISD::Tld4B2DS64Float:
248   case NVPTXISD::Tld4A2DS64Float:
249   case NVPTXISD::Tld4R2DU64Float:
250   case NVPTXISD::Tld4G2DU64Float:
251   case NVPTXISD::Tld4B2DU64Float:
252   case NVPTXISD::Tld4A2DU64Float:
253   case NVPTXISD::TexUnified1DFloatS32:
254   case NVPTXISD::TexUnified1DFloatFloat:
255   case NVPTXISD::TexUnified1DFloatFloatLevel:
256   case NVPTXISD::TexUnified1DFloatFloatGrad:
257   case NVPTXISD::TexUnified1DS32S32:
258   case NVPTXISD::TexUnified1DS32Float:
259   case NVPTXISD::TexUnified1DS32FloatLevel:
260   case NVPTXISD::TexUnified1DS32FloatGrad:
261   case NVPTXISD::TexUnified1DU32S32:
262   case NVPTXISD::TexUnified1DU32Float:
263   case NVPTXISD::TexUnified1DU32FloatLevel:
264   case NVPTXISD::TexUnified1DU32FloatGrad:
265   case NVPTXISD::TexUnified1DArrayFloatS32:
266   case NVPTXISD::TexUnified1DArrayFloatFloat:
267   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
268   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
269   case NVPTXISD::TexUnified1DArrayS32S32:
270   case NVPTXISD::TexUnified1DArrayS32Float:
271   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
272   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
273   case NVPTXISD::TexUnified1DArrayU32S32:
274   case NVPTXISD::TexUnified1DArrayU32Float:
275   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
276   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
277   case NVPTXISD::TexUnified2DFloatS32:
278   case NVPTXISD::TexUnified2DFloatFloat:
279   case NVPTXISD::TexUnified2DFloatFloatLevel:
280   case NVPTXISD::TexUnified2DFloatFloatGrad:
281   case NVPTXISD::TexUnified2DS32S32:
282   case NVPTXISD::TexUnified2DS32Float:
283   case NVPTXISD::TexUnified2DS32FloatLevel:
284   case NVPTXISD::TexUnified2DS32FloatGrad:
285   case NVPTXISD::TexUnified2DU32S32:
286   case NVPTXISD::TexUnified2DU32Float:
287   case NVPTXISD::TexUnified2DU32FloatLevel:
288   case NVPTXISD::TexUnified2DU32FloatGrad:
289   case NVPTXISD::TexUnified2DArrayFloatS32:
290   case NVPTXISD::TexUnified2DArrayFloatFloat:
291   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
292   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
293   case NVPTXISD::TexUnified2DArrayS32S32:
294   case NVPTXISD::TexUnified2DArrayS32Float:
295   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
296   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
297   case NVPTXISD::TexUnified2DArrayU32S32:
298   case NVPTXISD::TexUnified2DArrayU32Float:
299   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
300   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
301   case NVPTXISD::TexUnified3DFloatS32:
302   case NVPTXISD::TexUnified3DFloatFloat:
303   case NVPTXISD::TexUnified3DFloatFloatLevel:
304   case NVPTXISD::TexUnified3DFloatFloatGrad:
305   case NVPTXISD::TexUnified3DS32S32:
306   case NVPTXISD::TexUnified3DS32Float:
307   case NVPTXISD::TexUnified3DS32FloatLevel:
308   case NVPTXISD::TexUnified3DS32FloatGrad:
309   case NVPTXISD::TexUnified3DU32S32:
310   case NVPTXISD::TexUnified3DU32Float:
311   case NVPTXISD::TexUnified3DU32FloatLevel:
312   case NVPTXISD::TexUnified3DU32FloatGrad:
313   case NVPTXISD::TexUnifiedCubeFloatFloat:
314   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
315   case NVPTXISD::TexUnifiedCubeS32Float:
316   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
317   case NVPTXISD::TexUnifiedCubeU32Float:
318   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
319   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
320   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
321   case NVPTXISD::TexUnifiedCubeArrayS32Float:
322   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
323   case NVPTXISD::TexUnifiedCubeArrayU32Float:
324   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
325   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
326   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
327   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
328   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
329   case NVPTXISD::Tld4UnifiedR2DS64Float:
330   case NVPTXISD::Tld4UnifiedG2DS64Float:
331   case NVPTXISD::Tld4UnifiedB2DS64Float:
332   case NVPTXISD::Tld4UnifiedA2DS64Float:
333   case NVPTXISD::Tld4UnifiedR2DU64Float:
334   case NVPTXISD::Tld4UnifiedG2DU64Float:
335   case NVPTXISD::Tld4UnifiedB2DU64Float:
336   case NVPTXISD::Tld4UnifiedA2DU64Float:
337     if (tryTextureIntrinsic(N))
338       return;
339     break;
340   case NVPTXISD::Suld1DI8Clamp:
341   case NVPTXISD::Suld1DI16Clamp:
342   case NVPTXISD::Suld1DI32Clamp:
343   case NVPTXISD::Suld1DI64Clamp:
344   case NVPTXISD::Suld1DV2I8Clamp:
345   case NVPTXISD::Suld1DV2I16Clamp:
346   case NVPTXISD::Suld1DV2I32Clamp:
347   case NVPTXISD::Suld1DV2I64Clamp:
348   case NVPTXISD::Suld1DV4I8Clamp:
349   case NVPTXISD::Suld1DV4I16Clamp:
350   case NVPTXISD::Suld1DV4I32Clamp:
351   case NVPTXISD::Suld1DArrayI8Clamp:
352   case NVPTXISD::Suld1DArrayI16Clamp:
353   case NVPTXISD::Suld1DArrayI32Clamp:
354   case NVPTXISD::Suld1DArrayI64Clamp:
355   case NVPTXISD::Suld1DArrayV2I8Clamp:
356   case NVPTXISD::Suld1DArrayV2I16Clamp:
357   case NVPTXISD::Suld1DArrayV2I32Clamp:
358   case NVPTXISD::Suld1DArrayV2I64Clamp:
359   case NVPTXISD::Suld1DArrayV4I8Clamp:
360   case NVPTXISD::Suld1DArrayV4I16Clamp:
361   case NVPTXISD::Suld1DArrayV4I32Clamp:
362   case NVPTXISD::Suld2DI8Clamp:
363   case NVPTXISD::Suld2DI16Clamp:
364   case NVPTXISD::Suld2DI32Clamp:
365   case NVPTXISD::Suld2DI64Clamp:
366   case NVPTXISD::Suld2DV2I8Clamp:
367   case NVPTXISD::Suld2DV2I16Clamp:
368   case NVPTXISD::Suld2DV2I32Clamp:
369   case NVPTXISD::Suld2DV2I64Clamp:
370   case NVPTXISD::Suld2DV4I8Clamp:
371   case NVPTXISD::Suld2DV4I16Clamp:
372   case NVPTXISD::Suld2DV4I32Clamp:
373   case NVPTXISD::Suld2DArrayI8Clamp:
374   case NVPTXISD::Suld2DArrayI16Clamp:
375   case NVPTXISD::Suld2DArrayI32Clamp:
376   case NVPTXISD::Suld2DArrayI64Clamp:
377   case NVPTXISD::Suld2DArrayV2I8Clamp:
378   case NVPTXISD::Suld2DArrayV2I16Clamp:
379   case NVPTXISD::Suld2DArrayV2I32Clamp:
380   case NVPTXISD::Suld2DArrayV2I64Clamp:
381   case NVPTXISD::Suld2DArrayV4I8Clamp:
382   case NVPTXISD::Suld2DArrayV4I16Clamp:
383   case NVPTXISD::Suld2DArrayV4I32Clamp:
384   case NVPTXISD::Suld3DI8Clamp:
385   case NVPTXISD::Suld3DI16Clamp:
386   case NVPTXISD::Suld3DI32Clamp:
387   case NVPTXISD::Suld3DI64Clamp:
388   case NVPTXISD::Suld3DV2I8Clamp:
389   case NVPTXISD::Suld3DV2I16Clamp:
390   case NVPTXISD::Suld3DV2I32Clamp:
391   case NVPTXISD::Suld3DV2I64Clamp:
392   case NVPTXISD::Suld3DV4I8Clamp:
393   case NVPTXISD::Suld3DV4I16Clamp:
394   case NVPTXISD::Suld3DV4I32Clamp:
395   case NVPTXISD::Suld1DI8Trap:
396   case NVPTXISD::Suld1DI16Trap:
397   case NVPTXISD::Suld1DI32Trap:
398   case NVPTXISD::Suld1DI64Trap:
399   case NVPTXISD::Suld1DV2I8Trap:
400   case NVPTXISD::Suld1DV2I16Trap:
401   case NVPTXISD::Suld1DV2I32Trap:
402   case NVPTXISD::Suld1DV2I64Trap:
403   case NVPTXISD::Suld1DV4I8Trap:
404   case NVPTXISD::Suld1DV4I16Trap:
405   case NVPTXISD::Suld1DV4I32Trap:
406   case NVPTXISD::Suld1DArrayI8Trap:
407   case NVPTXISD::Suld1DArrayI16Trap:
408   case NVPTXISD::Suld1DArrayI32Trap:
409   case NVPTXISD::Suld1DArrayI64Trap:
410   case NVPTXISD::Suld1DArrayV2I8Trap:
411   case NVPTXISD::Suld1DArrayV2I16Trap:
412   case NVPTXISD::Suld1DArrayV2I32Trap:
413   case NVPTXISD::Suld1DArrayV2I64Trap:
414   case NVPTXISD::Suld1DArrayV4I8Trap:
415   case NVPTXISD::Suld1DArrayV4I16Trap:
416   case NVPTXISD::Suld1DArrayV4I32Trap:
417   case NVPTXISD::Suld2DI8Trap:
418   case NVPTXISD::Suld2DI16Trap:
419   case NVPTXISD::Suld2DI32Trap:
420   case NVPTXISD::Suld2DI64Trap:
421   case NVPTXISD::Suld2DV2I8Trap:
422   case NVPTXISD::Suld2DV2I16Trap:
423   case NVPTXISD::Suld2DV2I32Trap:
424   case NVPTXISD::Suld2DV2I64Trap:
425   case NVPTXISD::Suld2DV4I8Trap:
426   case NVPTXISD::Suld2DV4I16Trap:
427   case NVPTXISD::Suld2DV4I32Trap:
428   case NVPTXISD::Suld2DArrayI8Trap:
429   case NVPTXISD::Suld2DArrayI16Trap:
430   case NVPTXISD::Suld2DArrayI32Trap:
431   case NVPTXISD::Suld2DArrayI64Trap:
432   case NVPTXISD::Suld2DArrayV2I8Trap:
433   case NVPTXISD::Suld2DArrayV2I16Trap:
434   case NVPTXISD::Suld2DArrayV2I32Trap:
435   case NVPTXISD::Suld2DArrayV2I64Trap:
436   case NVPTXISD::Suld2DArrayV4I8Trap:
437   case NVPTXISD::Suld2DArrayV4I16Trap:
438   case NVPTXISD::Suld2DArrayV4I32Trap:
439   case NVPTXISD::Suld3DI8Trap:
440   case NVPTXISD::Suld3DI16Trap:
441   case NVPTXISD::Suld3DI32Trap:
442   case NVPTXISD::Suld3DI64Trap:
443   case NVPTXISD::Suld3DV2I8Trap:
444   case NVPTXISD::Suld3DV2I16Trap:
445   case NVPTXISD::Suld3DV2I32Trap:
446   case NVPTXISD::Suld3DV2I64Trap:
447   case NVPTXISD::Suld3DV4I8Trap:
448   case NVPTXISD::Suld3DV4I16Trap:
449   case NVPTXISD::Suld3DV4I32Trap:
450   case NVPTXISD::Suld1DI8Zero:
451   case NVPTXISD::Suld1DI16Zero:
452   case NVPTXISD::Suld1DI32Zero:
453   case NVPTXISD::Suld1DI64Zero:
454   case NVPTXISD::Suld1DV2I8Zero:
455   case NVPTXISD::Suld1DV2I16Zero:
456   case NVPTXISD::Suld1DV2I32Zero:
457   case NVPTXISD::Suld1DV2I64Zero:
458   case NVPTXISD::Suld1DV4I8Zero:
459   case NVPTXISD::Suld1DV4I16Zero:
460   case NVPTXISD::Suld1DV4I32Zero:
461   case NVPTXISD::Suld1DArrayI8Zero:
462   case NVPTXISD::Suld1DArrayI16Zero:
463   case NVPTXISD::Suld1DArrayI32Zero:
464   case NVPTXISD::Suld1DArrayI64Zero:
465   case NVPTXISD::Suld1DArrayV2I8Zero:
466   case NVPTXISD::Suld1DArrayV2I16Zero:
467   case NVPTXISD::Suld1DArrayV2I32Zero:
468   case NVPTXISD::Suld1DArrayV2I64Zero:
469   case NVPTXISD::Suld1DArrayV4I8Zero:
470   case NVPTXISD::Suld1DArrayV4I16Zero:
471   case NVPTXISD::Suld1DArrayV4I32Zero:
472   case NVPTXISD::Suld2DI8Zero:
473   case NVPTXISD::Suld2DI16Zero:
474   case NVPTXISD::Suld2DI32Zero:
475   case NVPTXISD::Suld2DI64Zero:
476   case NVPTXISD::Suld2DV2I8Zero:
477   case NVPTXISD::Suld2DV2I16Zero:
478   case NVPTXISD::Suld2DV2I32Zero:
479   case NVPTXISD::Suld2DV2I64Zero:
480   case NVPTXISD::Suld2DV4I8Zero:
481   case NVPTXISD::Suld2DV4I16Zero:
482   case NVPTXISD::Suld2DV4I32Zero:
483   case NVPTXISD::Suld2DArrayI8Zero:
484   case NVPTXISD::Suld2DArrayI16Zero:
485   case NVPTXISD::Suld2DArrayI32Zero:
486   case NVPTXISD::Suld2DArrayI64Zero:
487   case NVPTXISD::Suld2DArrayV2I8Zero:
488   case NVPTXISD::Suld2DArrayV2I16Zero:
489   case NVPTXISD::Suld2DArrayV2I32Zero:
490   case NVPTXISD::Suld2DArrayV2I64Zero:
491   case NVPTXISD::Suld2DArrayV4I8Zero:
492   case NVPTXISD::Suld2DArrayV4I16Zero:
493   case NVPTXISD::Suld2DArrayV4I32Zero:
494   case NVPTXISD::Suld3DI8Zero:
495   case NVPTXISD::Suld3DI16Zero:
496   case NVPTXISD::Suld3DI32Zero:
497   case NVPTXISD::Suld3DI64Zero:
498   case NVPTXISD::Suld3DV2I8Zero:
499   case NVPTXISD::Suld3DV2I16Zero:
500   case NVPTXISD::Suld3DV2I32Zero:
501   case NVPTXISD::Suld3DV2I64Zero:
502   case NVPTXISD::Suld3DV4I8Zero:
503   case NVPTXISD::Suld3DV4I16Zero:
504   case NVPTXISD::Suld3DV4I32Zero:
505     if (trySurfaceIntrinsic(N))
506       return;
507     break;
508   case ISD::AND:
509   case ISD::SRA:
510   case ISD::SRL:
511     // Try to select BFE
512     if (tryBFE(N))
513       return;
514     break;
515   case ISD::ADDRSPACECAST:
516     SelectAddrSpaceCast(N);
517     return;
518   default:
519     break;
520   }
521   SelectCode(N);
522 }
523 
tryIntrinsicChain(SDNode * N)524 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
525   unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
526   switch (IID) {
527   default:
528     return false;
529   case Intrinsic::nvvm_ldg_global_f:
530   case Intrinsic::nvvm_ldg_global_i:
531   case Intrinsic::nvvm_ldg_global_p:
532   case Intrinsic::nvvm_ldu_global_f:
533   case Intrinsic::nvvm_ldu_global_i:
534   case Intrinsic::nvvm_ldu_global_p:
535     return tryLDGLDU(N);
536   }
537 }
538 
getCodeAddrSpace(MemSDNode * N)539 static unsigned int getCodeAddrSpace(MemSDNode *N) {
540   const Value *Src = N->getMemOperand()->getValue();
541 
542   if (!Src)
543     return NVPTX::PTXLdStInstCode::GENERIC;
544 
545   if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
546     switch (PT->getAddressSpace()) {
547     case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
548     case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
549     case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
550     case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
551     case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
552     case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
553     default: break;
554     }
555   }
556   return NVPTX::PTXLdStInstCode::GENERIC;
557 }
558 
canLowerToLDG(MemSDNode * N,const NVPTXSubtarget & Subtarget,unsigned CodeAddrSpace,MachineFunction * F)559 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
560                           unsigned CodeAddrSpace, MachineFunction *F) {
561   // To use non-coherent caching, the load has to be from global
562   // memory and we have to prove that the memory area is not written
563   // to anywhere for the duration of the kernel call, not even after
564   // the load.
565   //
566   // To ensure that there are no writes to the memory, we require the
567   // underlying pointer to be a noalias (__restrict) kernel parameter
568   // that is never used for a write. We can only do this for kernel
569   // functions since from within a device function, we cannot know if
570   // there were or will be writes to the memory from the caller - or we
571   // could, but then we would have to do inter-procedural analysis.
572   if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
573       !isKernelFunction(*F->getFunction())) {
574     return false;
575   }
576 
577   // We use GetUnderlyingObjects() here instead of
578   // GetUnderlyingObject() mainly because the former looks through phi
579   // nodes while the latter does not. We need to look through phi
580   // nodes to handle pointer induction variables.
581   SmallVector<Value *, 8> Objs;
582   GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
583                        Objs, F->getDataLayout());
584   for (Value *Obj : Objs) {
585     auto *A = dyn_cast<const Argument>(Obj);
586     if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
587   }
588 
589   return true;
590 }
591 
tryIntrinsicNoChain(SDNode * N)592 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
593   unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
594   switch (IID) {
595   default:
596     return false;
597   case Intrinsic::nvvm_texsurf_handle_internal:
598     SelectTexSurfHandle(N);
599     return true;
600   }
601 }
602 
SelectTexSurfHandle(SDNode * N)603 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
604   // Op 0 is the intrinsic ID
605   SDValue Wrapper = N->getOperand(1);
606   SDValue GlobalVal = Wrapper.getOperand(0);
607   ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
608                                         MVT::i64, GlobalVal));
609 }
610 
SelectAddrSpaceCast(SDNode * N)611 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
612   SDValue Src = N->getOperand(0);
613   AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
614   unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
615   unsigned DstAddrSpace = CastN->getDestAddressSpace();
616 
617   assert(SrcAddrSpace != DstAddrSpace &&
618          "addrspacecast must be between different address spaces");
619 
620   if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
621     // Specific to generic
622     unsigned Opc;
623     switch (SrcAddrSpace) {
624     default: report_fatal_error("Bad address space in addrspacecast");
625     case ADDRESS_SPACE_GLOBAL:
626       Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
627       break;
628     case ADDRESS_SPACE_SHARED:
629       Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
630       break;
631     case ADDRESS_SPACE_CONST:
632       Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
633       break;
634     case ADDRESS_SPACE_LOCAL:
635       Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
636       break;
637     }
638     ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
639                                           Src));
640     return;
641   } else {
642     // Generic to specific
643     if (SrcAddrSpace != 0)
644       report_fatal_error("Cannot cast between two non-generic address spaces");
645     unsigned Opc;
646     switch (DstAddrSpace) {
647     default: report_fatal_error("Bad address space in addrspacecast");
648     case ADDRESS_SPACE_GLOBAL:
649       Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
650                          : NVPTX::cvta_to_global_yes;
651       break;
652     case ADDRESS_SPACE_SHARED:
653       Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
654                          : NVPTX::cvta_to_shared_yes;
655       break;
656     case ADDRESS_SPACE_CONST:
657       Opc =
658           TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
659       break;
660     case ADDRESS_SPACE_LOCAL:
661       Opc =
662           TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
663       break;
664     case ADDRESS_SPACE_PARAM:
665       Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
666                          : NVPTX::nvvm_ptr_gen_to_param;
667       break;
668     }
669     ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
670                                           Src));
671     return;
672   }
673 }
674 
tryLoad(SDNode * N)675 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
676   SDLoc dl(N);
677   LoadSDNode *LD = cast<LoadSDNode>(N);
678   EVT LoadedVT = LD->getMemoryVT();
679   SDNode *NVPTXLD = nullptr;
680 
681   // do not support pre/post inc/dec
682   if (LD->isIndexed())
683     return false;
684 
685   if (!LoadedVT.isSimple())
686     return false;
687 
688   // Address Space Setting
689   unsigned int codeAddrSpace = getCodeAddrSpace(LD);
690 
691   if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
692     return tryLDGLDU(N);
693   }
694 
695   // Volatile Setting
696   // - .volatile is only availalble for .global and .shared
697   bool isVolatile = LD->isVolatile();
698   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
699       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
700       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
701     isVolatile = false;
702 
703   // Vector Setting
704   MVT SimpleVT = LoadedVT.getSimpleVT();
705   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
706   if (SimpleVT.isVector()) {
707     unsigned num = SimpleVT.getVectorNumElements();
708     if (num == 2)
709       vecType = NVPTX::PTXLdStInstCode::V2;
710     else if (num == 4)
711       vecType = NVPTX::PTXLdStInstCode::V4;
712     else
713       return false;
714   }
715 
716   // Type Setting: fromType + fromTypeWidth
717   //
718   // Sign   : ISD::SEXTLOAD
719   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
720   //          type is integer
721   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
722   MVT ScalarVT = SimpleVT.getScalarType();
723   // Read at least 8 bits (predicates are stored as 8-bit values)
724   unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
725   unsigned int fromType;
726   if ((LD->getExtensionType() == ISD::SEXTLOAD))
727     fromType = NVPTX::PTXLdStInstCode::Signed;
728   else if (ScalarVT.isFloatingPoint())
729     fromType = NVPTX::PTXLdStInstCode::Float;
730   else
731     fromType = NVPTX::PTXLdStInstCode::Unsigned;
732 
733   // Create the machine instruction DAG
734   SDValue Chain = N->getOperand(0);
735   SDValue N1 = N->getOperand(1);
736   SDValue Addr;
737   SDValue Offset, Base;
738   unsigned Opcode;
739   MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
740 
741   if (SelectDirectAddr(N1, Addr)) {
742     switch (TargetVT) {
743     case MVT::i8:
744       Opcode = NVPTX::LD_i8_avar;
745       break;
746     case MVT::i16:
747       Opcode = NVPTX::LD_i16_avar;
748       break;
749     case MVT::i32:
750       Opcode = NVPTX::LD_i32_avar;
751       break;
752     case MVT::i64:
753       Opcode = NVPTX::LD_i64_avar;
754       break;
755     case MVT::f32:
756       Opcode = NVPTX::LD_f32_avar;
757       break;
758     case MVT::f64:
759       Opcode = NVPTX::LD_f64_avar;
760       break;
761     default:
762       return false;
763     }
764     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
765                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
766                       getI32Imm(fromTypeWidth, dl), Addr, Chain };
767     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
768   } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
769                           : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
770     switch (TargetVT) {
771     case MVT::i8:
772       Opcode = NVPTX::LD_i8_asi;
773       break;
774     case MVT::i16:
775       Opcode = NVPTX::LD_i16_asi;
776       break;
777     case MVT::i32:
778       Opcode = NVPTX::LD_i32_asi;
779       break;
780     case MVT::i64:
781       Opcode = NVPTX::LD_i64_asi;
782       break;
783     case MVT::f32:
784       Opcode = NVPTX::LD_f32_asi;
785       break;
786     case MVT::f64:
787       Opcode = NVPTX::LD_f64_asi;
788       break;
789     default:
790       return false;
791     }
792     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
793                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
794                       getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
795     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
796   } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
797                           : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
798     if (TM.is64Bit()) {
799       switch (TargetVT) {
800       case MVT::i8:
801         Opcode = NVPTX::LD_i8_ari_64;
802         break;
803       case MVT::i16:
804         Opcode = NVPTX::LD_i16_ari_64;
805         break;
806       case MVT::i32:
807         Opcode = NVPTX::LD_i32_ari_64;
808         break;
809       case MVT::i64:
810         Opcode = NVPTX::LD_i64_ari_64;
811         break;
812       case MVT::f32:
813         Opcode = NVPTX::LD_f32_ari_64;
814         break;
815       case MVT::f64:
816         Opcode = NVPTX::LD_f64_ari_64;
817         break;
818       default:
819         return false;
820       }
821     } else {
822       switch (TargetVT) {
823       case MVT::i8:
824         Opcode = NVPTX::LD_i8_ari;
825         break;
826       case MVT::i16:
827         Opcode = NVPTX::LD_i16_ari;
828         break;
829       case MVT::i32:
830         Opcode = NVPTX::LD_i32_ari;
831         break;
832       case MVT::i64:
833         Opcode = NVPTX::LD_i64_ari;
834         break;
835       case MVT::f32:
836         Opcode = NVPTX::LD_f32_ari;
837         break;
838       case MVT::f64:
839         Opcode = NVPTX::LD_f64_ari;
840         break;
841       default:
842         return false;
843       }
844     }
845     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
846                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
847                       getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
848     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
849   } else {
850     if (TM.is64Bit()) {
851       switch (TargetVT) {
852       case MVT::i8:
853         Opcode = NVPTX::LD_i8_areg_64;
854         break;
855       case MVT::i16:
856         Opcode = NVPTX::LD_i16_areg_64;
857         break;
858       case MVT::i32:
859         Opcode = NVPTX::LD_i32_areg_64;
860         break;
861       case MVT::i64:
862         Opcode = NVPTX::LD_i64_areg_64;
863         break;
864       case MVT::f32:
865         Opcode = NVPTX::LD_f32_areg_64;
866         break;
867       case MVT::f64:
868         Opcode = NVPTX::LD_f64_areg_64;
869         break;
870       default:
871         return false;
872       }
873     } else {
874       switch (TargetVT) {
875       case MVT::i8:
876         Opcode = NVPTX::LD_i8_areg;
877         break;
878       case MVT::i16:
879         Opcode = NVPTX::LD_i16_areg;
880         break;
881       case MVT::i32:
882         Opcode = NVPTX::LD_i32_areg;
883         break;
884       case MVT::i64:
885         Opcode = NVPTX::LD_i64_areg;
886         break;
887       case MVT::f32:
888         Opcode = NVPTX::LD_f32_areg;
889         break;
890       case MVT::f64:
891         Opcode = NVPTX::LD_f64_areg;
892         break;
893       default:
894         return false;
895       }
896     }
897     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
898                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
899                       getI32Imm(fromTypeWidth, dl), N1, Chain };
900     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
901   }
902 
903   if (!NVPTXLD)
904     return false;
905 
906   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
907   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
908   cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
909 
910   ReplaceNode(N, NVPTXLD);
911   return true;
912 }
913 
tryLoadVector(SDNode * N)914 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
915 
916   SDValue Chain = N->getOperand(0);
917   SDValue Op1 = N->getOperand(1);
918   SDValue Addr, Offset, Base;
919   unsigned Opcode;
920   SDLoc DL(N);
921   SDNode *LD;
922   MemSDNode *MemSD = cast<MemSDNode>(N);
923   EVT LoadedVT = MemSD->getMemoryVT();
924 
925   if (!LoadedVT.isSimple())
926     return false;
927 
928   // Address Space Setting
929   unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
930 
931   if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
932     return tryLDGLDU(N);
933   }
934 
935   // Volatile Setting
936   // - .volatile is only availalble for .global and .shared
937   bool IsVolatile = MemSD->isVolatile();
938   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
939       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
940       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
941     IsVolatile = false;
942 
943   // Vector Setting
944   MVT SimpleVT = LoadedVT.getSimpleVT();
945 
946   // Type Setting: fromType + fromTypeWidth
947   //
948   // Sign   : ISD::SEXTLOAD
949   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
950   //          type is integer
951   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
952   MVT ScalarVT = SimpleVT.getScalarType();
953   // Read at least 8 bits (predicates are stored as 8-bit values)
954   unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
955   unsigned int FromType;
956   // The last operand holds the original LoadSDNode::getExtensionType() value
957   unsigned ExtensionType = cast<ConstantSDNode>(
958       N->getOperand(N->getNumOperands() - 1))->getZExtValue();
959   if (ExtensionType == ISD::SEXTLOAD)
960     FromType = NVPTX::PTXLdStInstCode::Signed;
961   else if (ScalarVT.isFloatingPoint())
962     FromType = NVPTX::PTXLdStInstCode::Float;
963   else
964     FromType = NVPTX::PTXLdStInstCode::Unsigned;
965 
966   unsigned VecType;
967 
968   switch (N->getOpcode()) {
969   case NVPTXISD::LoadV2:
970     VecType = NVPTX::PTXLdStInstCode::V2;
971     break;
972   case NVPTXISD::LoadV4:
973     VecType = NVPTX::PTXLdStInstCode::V4;
974     break;
975   default:
976     return false;
977   }
978 
979   EVT EltVT = N->getValueType(0);
980 
981   if (SelectDirectAddr(Op1, Addr)) {
982     switch (N->getOpcode()) {
983     default:
984       return false;
985     case NVPTXISD::LoadV2:
986       switch (EltVT.getSimpleVT().SimpleTy) {
987       default:
988         return false;
989       case MVT::i8:
990         Opcode = NVPTX::LDV_i8_v2_avar;
991         break;
992       case MVT::i16:
993         Opcode = NVPTX::LDV_i16_v2_avar;
994         break;
995       case MVT::i32:
996         Opcode = NVPTX::LDV_i32_v2_avar;
997         break;
998       case MVT::i64:
999         Opcode = NVPTX::LDV_i64_v2_avar;
1000         break;
1001       case MVT::f32:
1002         Opcode = NVPTX::LDV_f32_v2_avar;
1003         break;
1004       case MVT::f64:
1005         Opcode = NVPTX::LDV_f64_v2_avar;
1006         break;
1007       }
1008       break;
1009     case NVPTXISD::LoadV4:
1010       switch (EltVT.getSimpleVT().SimpleTy) {
1011       default:
1012         return false;
1013       case MVT::i8:
1014         Opcode = NVPTX::LDV_i8_v4_avar;
1015         break;
1016       case MVT::i16:
1017         Opcode = NVPTX::LDV_i16_v4_avar;
1018         break;
1019       case MVT::i32:
1020         Opcode = NVPTX::LDV_i32_v4_avar;
1021         break;
1022       case MVT::f32:
1023         Opcode = NVPTX::LDV_f32_v4_avar;
1024         break;
1025       }
1026       break;
1027     }
1028 
1029     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1030                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1031                       getI32Imm(FromTypeWidth, DL), Addr, Chain };
1032     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1033   } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1034                           : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1035     switch (N->getOpcode()) {
1036     default:
1037       return false;
1038     case NVPTXISD::LoadV2:
1039       switch (EltVT.getSimpleVT().SimpleTy) {
1040       default:
1041         return false;
1042       case MVT::i8:
1043         Opcode = NVPTX::LDV_i8_v2_asi;
1044         break;
1045       case MVT::i16:
1046         Opcode = NVPTX::LDV_i16_v2_asi;
1047         break;
1048       case MVT::i32:
1049         Opcode = NVPTX::LDV_i32_v2_asi;
1050         break;
1051       case MVT::i64:
1052         Opcode = NVPTX::LDV_i64_v2_asi;
1053         break;
1054       case MVT::f32:
1055         Opcode = NVPTX::LDV_f32_v2_asi;
1056         break;
1057       case MVT::f64:
1058         Opcode = NVPTX::LDV_f64_v2_asi;
1059         break;
1060       }
1061       break;
1062     case NVPTXISD::LoadV4:
1063       switch (EltVT.getSimpleVT().SimpleTy) {
1064       default:
1065         return false;
1066       case MVT::i8:
1067         Opcode = NVPTX::LDV_i8_v4_asi;
1068         break;
1069       case MVT::i16:
1070         Opcode = NVPTX::LDV_i16_v4_asi;
1071         break;
1072       case MVT::i32:
1073         Opcode = NVPTX::LDV_i32_v4_asi;
1074         break;
1075       case MVT::f32:
1076         Opcode = NVPTX::LDV_f32_v4_asi;
1077         break;
1078       }
1079       break;
1080     }
1081 
1082     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1083                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1084                       getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1085     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1086   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1087                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1088     if (TM.is64Bit()) {
1089       switch (N->getOpcode()) {
1090       default:
1091         return false;
1092       case NVPTXISD::LoadV2:
1093         switch (EltVT.getSimpleVT().SimpleTy) {
1094         default:
1095           return false;
1096         case MVT::i8:
1097           Opcode = NVPTX::LDV_i8_v2_ari_64;
1098           break;
1099         case MVT::i16:
1100           Opcode = NVPTX::LDV_i16_v2_ari_64;
1101           break;
1102         case MVT::i32:
1103           Opcode = NVPTX::LDV_i32_v2_ari_64;
1104           break;
1105         case MVT::i64:
1106           Opcode = NVPTX::LDV_i64_v2_ari_64;
1107           break;
1108         case MVT::f32:
1109           Opcode = NVPTX::LDV_f32_v2_ari_64;
1110           break;
1111         case MVT::f64:
1112           Opcode = NVPTX::LDV_f64_v2_ari_64;
1113           break;
1114         }
1115         break;
1116       case NVPTXISD::LoadV4:
1117         switch (EltVT.getSimpleVT().SimpleTy) {
1118         default:
1119           return false;
1120         case MVT::i8:
1121           Opcode = NVPTX::LDV_i8_v4_ari_64;
1122           break;
1123         case MVT::i16:
1124           Opcode = NVPTX::LDV_i16_v4_ari_64;
1125           break;
1126         case MVT::i32:
1127           Opcode = NVPTX::LDV_i32_v4_ari_64;
1128           break;
1129         case MVT::f32:
1130           Opcode = NVPTX::LDV_f32_v4_ari_64;
1131           break;
1132         }
1133         break;
1134       }
1135     } else {
1136       switch (N->getOpcode()) {
1137       default:
1138         return false;
1139       case NVPTXISD::LoadV2:
1140         switch (EltVT.getSimpleVT().SimpleTy) {
1141         default:
1142           return false;
1143         case MVT::i8:
1144           Opcode = NVPTX::LDV_i8_v2_ari;
1145           break;
1146         case MVT::i16:
1147           Opcode = NVPTX::LDV_i16_v2_ari;
1148           break;
1149         case MVT::i32:
1150           Opcode = NVPTX::LDV_i32_v2_ari;
1151           break;
1152         case MVT::i64:
1153           Opcode = NVPTX::LDV_i64_v2_ari;
1154           break;
1155         case MVT::f32:
1156           Opcode = NVPTX::LDV_f32_v2_ari;
1157           break;
1158         case MVT::f64:
1159           Opcode = NVPTX::LDV_f64_v2_ari;
1160           break;
1161         }
1162         break;
1163       case NVPTXISD::LoadV4:
1164         switch (EltVT.getSimpleVT().SimpleTy) {
1165         default:
1166           return false;
1167         case MVT::i8:
1168           Opcode = NVPTX::LDV_i8_v4_ari;
1169           break;
1170         case MVT::i16:
1171           Opcode = NVPTX::LDV_i16_v4_ari;
1172           break;
1173         case MVT::i32:
1174           Opcode = NVPTX::LDV_i32_v4_ari;
1175           break;
1176         case MVT::f32:
1177           Opcode = NVPTX::LDV_f32_v4_ari;
1178           break;
1179         }
1180         break;
1181       }
1182     }
1183 
1184     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1185                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1186                       getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1187 
1188     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1189   } else {
1190     if (TM.is64Bit()) {
1191       switch (N->getOpcode()) {
1192       default:
1193         return false;
1194       case NVPTXISD::LoadV2:
1195         switch (EltVT.getSimpleVT().SimpleTy) {
1196         default:
1197           return false;
1198         case MVT::i8:
1199           Opcode = NVPTX::LDV_i8_v2_areg_64;
1200           break;
1201         case MVT::i16:
1202           Opcode = NVPTX::LDV_i16_v2_areg_64;
1203           break;
1204         case MVT::i32:
1205           Opcode = NVPTX::LDV_i32_v2_areg_64;
1206           break;
1207         case MVT::i64:
1208           Opcode = NVPTX::LDV_i64_v2_areg_64;
1209           break;
1210         case MVT::f32:
1211           Opcode = NVPTX::LDV_f32_v2_areg_64;
1212           break;
1213         case MVT::f64:
1214           Opcode = NVPTX::LDV_f64_v2_areg_64;
1215           break;
1216         }
1217         break;
1218       case NVPTXISD::LoadV4:
1219         switch (EltVT.getSimpleVT().SimpleTy) {
1220         default:
1221           return false;
1222         case MVT::i8:
1223           Opcode = NVPTX::LDV_i8_v4_areg_64;
1224           break;
1225         case MVT::i16:
1226           Opcode = NVPTX::LDV_i16_v4_areg_64;
1227           break;
1228         case MVT::i32:
1229           Opcode = NVPTX::LDV_i32_v4_areg_64;
1230           break;
1231         case MVT::f32:
1232           Opcode = NVPTX::LDV_f32_v4_areg_64;
1233           break;
1234         }
1235         break;
1236       }
1237     } else {
1238       switch (N->getOpcode()) {
1239       default:
1240         return false;
1241       case NVPTXISD::LoadV2:
1242         switch (EltVT.getSimpleVT().SimpleTy) {
1243         default:
1244           return false;
1245         case MVT::i8:
1246           Opcode = NVPTX::LDV_i8_v2_areg;
1247           break;
1248         case MVT::i16:
1249           Opcode = NVPTX::LDV_i16_v2_areg;
1250           break;
1251         case MVT::i32:
1252           Opcode = NVPTX::LDV_i32_v2_areg;
1253           break;
1254         case MVT::i64:
1255           Opcode = NVPTX::LDV_i64_v2_areg;
1256           break;
1257         case MVT::f32:
1258           Opcode = NVPTX::LDV_f32_v2_areg;
1259           break;
1260         case MVT::f64:
1261           Opcode = NVPTX::LDV_f64_v2_areg;
1262           break;
1263         }
1264         break;
1265       case NVPTXISD::LoadV4:
1266         switch (EltVT.getSimpleVT().SimpleTy) {
1267         default:
1268           return false;
1269         case MVT::i8:
1270           Opcode = NVPTX::LDV_i8_v4_areg;
1271           break;
1272         case MVT::i16:
1273           Opcode = NVPTX::LDV_i16_v4_areg;
1274           break;
1275         case MVT::i32:
1276           Opcode = NVPTX::LDV_i32_v4_areg;
1277           break;
1278         case MVT::f32:
1279           Opcode = NVPTX::LDV_f32_v4_areg;
1280           break;
1281         }
1282         break;
1283       }
1284     }
1285 
1286     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1287                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1288                       getI32Imm(FromTypeWidth, DL), Op1, Chain };
1289     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1290   }
1291 
1292   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1293   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1294   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1295 
1296   ReplaceNode(N, LD);
1297   return true;
1298 }
1299 
tryLDGLDU(SDNode * N)1300 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1301 
1302   SDValue Chain = N->getOperand(0);
1303   SDValue Op1;
1304   MemSDNode *Mem;
1305   bool IsLDG = true;
1306 
1307   // If this is an LDG intrinsic, the address is the third operand. If its an
1308   // LDG/LDU SD node (from custom vector handling), then its the second operand
1309   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1310     Op1 = N->getOperand(2);
1311     Mem = cast<MemIntrinsicSDNode>(N);
1312     unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1313     switch (IID) {
1314     default:
1315       return false;
1316     case Intrinsic::nvvm_ldg_global_f:
1317     case Intrinsic::nvvm_ldg_global_i:
1318     case Intrinsic::nvvm_ldg_global_p:
1319       IsLDG = true;
1320       break;
1321     case Intrinsic::nvvm_ldu_global_f:
1322     case Intrinsic::nvvm_ldu_global_i:
1323     case Intrinsic::nvvm_ldu_global_p:
1324       IsLDG = false;
1325       break;
1326     }
1327   } else {
1328     Op1 = N->getOperand(1);
1329     Mem = cast<MemSDNode>(N);
1330   }
1331 
1332   unsigned Opcode;
1333   SDLoc DL(N);
1334   SDNode *LD;
1335   SDValue Base, Offset, Addr;
1336 
1337   EVT EltVT = Mem->getMemoryVT();
1338   unsigned NumElts = 1;
1339   if (EltVT.isVector()) {
1340     NumElts = EltVT.getVectorNumElements();
1341     EltVT = EltVT.getVectorElementType();
1342   }
1343 
1344   // Build the "promoted" result VTList for the load. If we are really loading
1345   // i8s, then the return type will be promoted to i16 since we do not expose
1346   // 8-bit registers in NVPTX.
1347   EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1348   SmallVector<EVT, 5> InstVTs;
1349   for (unsigned i = 0; i != NumElts; ++i) {
1350     InstVTs.push_back(NodeVT);
1351   }
1352   InstVTs.push_back(MVT::Other);
1353   SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1354 
1355   if (SelectDirectAddr(Op1, Addr)) {
1356     switch (N->getOpcode()) {
1357     default:
1358       return false;
1359     case ISD::INTRINSIC_W_CHAIN:
1360       if (IsLDG) {
1361         switch (EltVT.getSimpleVT().SimpleTy) {
1362         default:
1363           return false;
1364         case MVT::i8:
1365           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1366           break;
1367         case MVT::i16:
1368           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1369           break;
1370         case MVT::i32:
1371           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1372           break;
1373         case MVT::i64:
1374           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1375           break;
1376         case MVT::f32:
1377           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1378           break;
1379         case MVT::f64:
1380           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1381           break;
1382         }
1383       } else {
1384         switch (EltVT.getSimpleVT().SimpleTy) {
1385         default:
1386           return false;
1387         case MVT::i8:
1388           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1389           break;
1390         case MVT::i16:
1391           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1392           break;
1393         case MVT::i32:
1394           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1395           break;
1396         case MVT::i64:
1397           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1398           break;
1399         case MVT::f32:
1400           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1401           break;
1402         case MVT::f64:
1403           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1404           break;
1405         }
1406       }
1407       break;
1408     case NVPTXISD::LDGV2:
1409       switch (EltVT.getSimpleVT().SimpleTy) {
1410       default:
1411         return false;
1412       case MVT::i8:
1413         Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1414         break;
1415       case MVT::i16:
1416         Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1417         break;
1418       case MVT::i32:
1419         Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1420         break;
1421       case MVT::i64:
1422         Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1423         break;
1424       case MVT::f32:
1425         Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1426         break;
1427       case MVT::f64:
1428         Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1429         break;
1430       }
1431       break;
1432     case NVPTXISD::LDUV2:
1433       switch (EltVT.getSimpleVT().SimpleTy) {
1434       default:
1435         return false;
1436       case MVT::i8:
1437         Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1438         break;
1439       case MVT::i16:
1440         Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1441         break;
1442       case MVT::i32:
1443         Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1444         break;
1445       case MVT::i64:
1446         Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1447         break;
1448       case MVT::f32:
1449         Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1450         break;
1451       case MVT::f64:
1452         Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1453         break;
1454       }
1455       break;
1456     case NVPTXISD::LDGV4:
1457       switch (EltVT.getSimpleVT().SimpleTy) {
1458       default:
1459         return false;
1460       case MVT::i8:
1461         Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1462         break;
1463       case MVT::i16:
1464         Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1465         break;
1466       case MVT::i32:
1467         Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1468         break;
1469       case MVT::f32:
1470         Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1471         break;
1472       }
1473       break;
1474     case NVPTXISD::LDUV4:
1475       switch (EltVT.getSimpleVT().SimpleTy) {
1476       default:
1477         return false;
1478       case MVT::i8:
1479         Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1480         break;
1481       case MVT::i16:
1482         Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1483         break;
1484       case MVT::i32:
1485         Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1486         break;
1487       case MVT::f32:
1488         Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1489         break;
1490       }
1491       break;
1492     }
1493 
1494     SDValue Ops[] = { Addr, Chain };
1495     LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
1496   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1497                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1498     if (TM.is64Bit()) {
1499       switch (N->getOpcode()) {
1500       default:
1501         return false;
1502       case ISD::LOAD:
1503       case ISD::INTRINSIC_W_CHAIN:
1504         if (IsLDG) {
1505           switch (EltVT.getSimpleVT().SimpleTy) {
1506           default:
1507             return false;
1508           case MVT::i8:
1509             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1510             break;
1511           case MVT::i16:
1512             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1513             break;
1514           case MVT::i32:
1515             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1516             break;
1517           case MVT::i64:
1518             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1519             break;
1520           case MVT::f32:
1521             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1522             break;
1523           case MVT::f64:
1524             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1525             break;
1526           }
1527         } else {
1528           switch (EltVT.getSimpleVT().SimpleTy) {
1529           default:
1530             return false;
1531           case MVT::i8:
1532             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1533             break;
1534           case MVT::i16:
1535             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1536             break;
1537           case MVT::i32:
1538             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1539             break;
1540           case MVT::i64:
1541             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1542             break;
1543           case MVT::f32:
1544             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1545             break;
1546           case MVT::f64:
1547             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1548             break;
1549           }
1550         }
1551         break;
1552       case NVPTXISD::LoadV2:
1553       case NVPTXISD::LDGV2:
1554         switch (EltVT.getSimpleVT().SimpleTy) {
1555         default:
1556           return false;
1557         case MVT::i8:
1558           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1559           break;
1560         case MVT::i16:
1561           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1562           break;
1563         case MVT::i32:
1564           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1565           break;
1566         case MVT::i64:
1567           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1568           break;
1569         case MVT::f32:
1570           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1571           break;
1572         case MVT::f64:
1573           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1574           break;
1575         }
1576         break;
1577       case NVPTXISD::LDUV2:
1578         switch (EltVT.getSimpleVT().SimpleTy) {
1579         default:
1580           return false;
1581         case MVT::i8:
1582           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1583           break;
1584         case MVT::i16:
1585           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1586           break;
1587         case MVT::i32:
1588           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1589           break;
1590         case MVT::i64:
1591           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1592           break;
1593         case MVT::f32:
1594           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1595           break;
1596         case MVT::f64:
1597           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1598           break;
1599         }
1600         break;
1601       case NVPTXISD::LoadV4:
1602       case NVPTXISD::LDGV4:
1603         switch (EltVT.getSimpleVT().SimpleTy) {
1604         default:
1605           return false;
1606         case MVT::i8:
1607           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1608           break;
1609         case MVT::i16:
1610           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1611           break;
1612         case MVT::i32:
1613           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1614           break;
1615         case MVT::f32:
1616           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1617           break;
1618         }
1619         break;
1620       case NVPTXISD::LDUV4:
1621         switch (EltVT.getSimpleVT().SimpleTy) {
1622         default:
1623           return false;
1624         case MVT::i8:
1625           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1626           break;
1627         case MVT::i16:
1628           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1629           break;
1630         case MVT::i32:
1631           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1632           break;
1633         case MVT::f32:
1634           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1635           break;
1636         }
1637         break;
1638       }
1639     } else {
1640       switch (N->getOpcode()) {
1641       default:
1642         return false;
1643       case ISD::LOAD:
1644       case ISD::INTRINSIC_W_CHAIN:
1645         if (IsLDG) {
1646           switch (EltVT.getSimpleVT().SimpleTy) {
1647           default:
1648             return false;
1649           case MVT::i8:
1650             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1651             break;
1652           case MVT::i16:
1653             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1654             break;
1655           case MVT::i32:
1656             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1657             break;
1658           case MVT::i64:
1659             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1660             break;
1661           case MVT::f32:
1662             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1663             break;
1664           case MVT::f64:
1665             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1666             break;
1667           }
1668         } else {
1669           switch (EltVT.getSimpleVT().SimpleTy) {
1670           default:
1671             return false;
1672           case MVT::i8:
1673             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1674             break;
1675           case MVT::i16:
1676             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1677             break;
1678           case MVT::i32:
1679             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1680             break;
1681           case MVT::i64:
1682             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1683             break;
1684           case MVT::f32:
1685             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1686             break;
1687           case MVT::f64:
1688             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1689             break;
1690           }
1691         }
1692         break;
1693       case NVPTXISD::LoadV2:
1694       case NVPTXISD::LDGV2:
1695         switch (EltVT.getSimpleVT().SimpleTy) {
1696         default:
1697           return false;
1698         case MVT::i8:
1699           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1700           break;
1701         case MVT::i16:
1702           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1703           break;
1704         case MVT::i32:
1705           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1706           break;
1707         case MVT::i64:
1708           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1709           break;
1710         case MVT::f32:
1711           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1712           break;
1713         case MVT::f64:
1714           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1715           break;
1716         }
1717         break;
1718       case NVPTXISD::LDUV2:
1719         switch (EltVT.getSimpleVT().SimpleTy) {
1720         default:
1721           return false;
1722         case MVT::i8:
1723           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1724           break;
1725         case MVT::i16:
1726           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1727           break;
1728         case MVT::i32:
1729           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1730           break;
1731         case MVT::i64:
1732           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1733           break;
1734         case MVT::f32:
1735           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1736           break;
1737         case MVT::f64:
1738           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1739           break;
1740         }
1741         break;
1742       case NVPTXISD::LoadV4:
1743       case NVPTXISD::LDGV4:
1744         switch (EltVT.getSimpleVT().SimpleTy) {
1745         default:
1746           return false;
1747         case MVT::i8:
1748           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1749           break;
1750         case MVT::i16:
1751           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1752           break;
1753         case MVT::i32:
1754           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1755           break;
1756         case MVT::f32:
1757           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1758           break;
1759         }
1760         break;
1761       case NVPTXISD::LDUV4:
1762         switch (EltVT.getSimpleVT().SimpleTy) {
1763         default:
1764           return false;
1765         case MVT::i8:
1766           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1767           break;
1768         case MVT::i16:
1769           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1770           break;
1771         case MVT::i32:
1772           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1773           break;
1774         case MVT::f32:
1775           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1776           break;
1777         }
1778         break;
1779       }
1780     }
1781 
1782     SDValue Ops[] = { Base, Offset, Chain };
1783 
1784     LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
1785   } else {
1786     if (TM.is64Bit()) {
1787       switch (N->getOpcode()) {
1788       default:
1789         return false;
1790       case ISD::LOAD:
1791       case ISD::INTRINSIC_W_CHAIN:
1792         if (IsLDG) {
1793           switch (EltVT.getSimpleVT().SimpleTy) {
1794           default:
1795             return false;
1796           case MVT::i8:
1797             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1798             break;
1799           case MVT::i16:
1800             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1801             break;
1802           case MVT::i32:
1803             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1804             break;
1805           case MVT::i64:
1806             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1807             break;
1808           case MVT::f32:
1809             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1810             break;
1811           case MVT::f64:
1812             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1813             break;
1814           }
1815         } else {
1816           switch (EltVT.getSimpleVT().SimpleTy) {
1817           default:
1818             return false;
1819           case MVT::i8:
1820             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1821             break;
1822           case MVT::i16:
1823             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1824             break;
1825           case MVT::i32:
1826             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1827             break;
1828           case MVT::i64:
1829             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1830             break;
1831           case MVT::f32:
1832             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1833             break;
1834           case MVT::f64:
1835             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1836             break;
1837           }
1838         }
1839         break;
1840       case NVPTXISD::LoadV2:
1841       case NVPTXISD::LDGV2:
1842         switch (EltVT.getSimpleVT().SimpleTy) {
1843         default:
1844           return false;
1845         case MVT::i8:
1846           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1847           break;
1848         case MVT::i16:
1849           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1850           break;
1851         case MVT::i32:
1852           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1853           break;
1854         case MVT::i64:
1855           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1856           break;
1857         case MVT::f32:
1858           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1859           break;
1860         case MVT::f64:
1861           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1862           break;
1863         }
1864         break;
1865       case NVPTXISD::LDUV2:
1866         switch (EltVT.getSimpleVT().SimpleTy) {
1867         default:
1868           return false;
1869         case MVT::i8:
1870           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1871           break;
1872         case MVT::i16:
1873           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1874           break;
1875         case MVT::i32:
1876           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1877           break;
1878         case MVT::i64:
1879           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1880           break;
1881         case MVT::f32:
1882           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1883           break;
1884         case MVT::f64:
1885           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1886           break;
1887         }
1888         break;
1889       case NVPTXISD::LoadV4:
1890       case NVPTXISD::LDGV4:
1891         switch (EltVT.getSimpleVT().SimpleTy) {
1892         default:
1893           return false;
1894         case MVT::i8:
1895           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1896           break;
1897         case MVT::i16:
1898           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1899           break;
1900         case MVT::i32:
1901           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1902           break;
1903         case MVT::f32:
1904           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1905           break;
1906         }
1907         break;
1908       case NVPTXISD::LDUV4:
1909         switch (EltVT.getSimpleVT().SimpleTy) {
1910         default:
1911           return false;
1912         case MVT::i8:
1913           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1914           break;
1915         case MVT::i16:
1916           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1917           break;
1918         case MVT::i32:
1919           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1920           break;
1921         case MVT::f32:
1922           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1923           break;
1924         }
1925         break;
1926       }
1927     } else {
1928       switch (N->getOpcode()) {
1929       default:
1930         return false;
1931       case ISD::LOAD:
1932       case ISD::INTRINSIC_W_CHAIN:
1933         if (IsLDG) {
1934           switch (EltVT.getSimpleVT().SimpleTy) {
1935           default:
1936             return false;
1937           case MVT::i8:
1938             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1939             break;
1940           case MVT::i16:
1941             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1942             break;
1943           case MVT::i32:
1944             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1945             break;
1946           case MVT::i64:
1947             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1948             break;
1949           case MVT::f32:
1950             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1951             break;
1952           case MVT::f64:
1953             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1954             break;
1955           }
1956         } else {
1957           switch (EltVT.getSimpleVT().SimpleTy) {
1958           default:
1959             return false;
1960           case MVT::i8:
1961             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1962             break;
1963           case MVT::i16:
1964             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1965             break;
1966           case MVT::i32:
1967             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1968             break;
1969           case MVT::i64:
1970             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1971             break;
1972           case MVT::f32:
1973             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1974             break;
1975           case MVT::f64:
1976             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1977             break;
1978           }
1979         }
1980         break;
1981       case NVPTXISD::LoadV2:
1982       case NVPTXISD::LDGV2:
1983         switch (EltVT.getSimpleVT().SimpleTy) {
1984         default:
1985           return false;
1986         case MVT::i8:
1987           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1988           break;
1989         case MVT::i16:
1990           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1991           break;
1992         case MVT::i32:
1993           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1994           break;
1995         case MVT::i64:
1996           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1997           break;
1998         case MVT::f32:
1999           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
2000           break;
2001         case MVT::f64:
2002           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
2003           break;
2004         }
2005         break;
2006       case NVPTXISD::LDUV2:
2007         switch (EltVT.getSimpleVT().SimpleTy) {
2008         default:
2009           return false;
2010         case MVT::i8:
2011           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
2012           break;
2013         case MVT::i16:
2014           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
2015           break;
2016         case MVT::i32:
2017           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
2018           break;
2019         case MVT::i64:
2020           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
2021           break;
2022         case MVT::f32:
2023           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
2024           break;
2025         case MVT::f64:
2026           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
2027           break;
2028         }
2029         break;
2030       case NVPTXISD::LoadV4:
2031       case NVPTXISD::LDGV4:
2032         switch (EltVT.getSimpleVT().SimpleTy) {
2033         default:
2034           return false;
2035         case MVT::i8:
2036           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
2037           break;
2038         case MVT::i16:
2039           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
2040           break;
2041         case MVT::i32:
2042           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
2043           break;
2044         case MVT::f32:
2045           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
2046           break;
2047         }
2048         break;
2049       case NVPTXISD::LDUV4:
2050         switch (EltVT.getSimpleVT().SimpleTy) {
2051         default:
2052           return false;
2053         case MVT::i8:
2054           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
2055           break;
2056         case MVT::i16:
2057           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
2058           break;
2059         case MVT::i32:
2060           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
2061           break;
2062         case MVT::f32:
2063           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
2064           break;
2065         }
2066         break;
2067       }
2068     }
2069 
2070     SDValue Ops[] = { Op1, Chain };
2071     LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
2072   }
2073 
2074   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2075   MemRefs0[0] = Mem->getMemOperand();
2076   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2077 
2078   // For automatic generation of LDG (through SelectLoad[Vector], not the
2079   // intrinsics), we may have an extending load like:
2080   //
2081   //   i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
2082   //
2083   // In this case, the matching logic above will select a load for the original
2084   // memory type (in this case, i8) and our types will not match (the node needs
2085   // to return an i32 in this case). Our LDG/LDU nodes do not support the
2086   // concept of sign-/zero-extension, so emulate it here by adding an explicit
2087   // CVT instruction. Ptxas should clean up any redundancies here.
2088 
2089   EVT OrigType = N->getValueType(0);
2090   LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
2091 
2092   if (OrigType != EltVT && LdNode) {
2093     // We have an extending-load. The instruction we selected operates on the
2094     // smaller type, but the SDNode we are replacing has the larger type. We
2095     // need to emit a CVT to make the types match.
2096     bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
2097     unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
2098                                        EltVT.getSimpleVT(), IsSigned);
2099 
2100     // For each output value, apply the manual sign/zero-extension and make sure
2101     // all users of the load go through that CVT.
2102     for (unsigned i = 0; i != NumElts; ++i) {
2103       SDValue Res(LD, i);
2104       SDValue OrigVal(N, i);
2105 
2106       SDNode *CvtNode =
2107         CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
2108                                CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2109                                                          DL, MVT::i32));
2110       ReplaceUses(OrigVal, SDValue(CvtNode, 0));
2111     }
2112   }
2113 
2114   ReplaceNode(N, LD);
2115   return true;
2116 }
2117 
tryStore(SDNode * N)2118 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
2119   SDLoc dl(N);
2120   StoreSDNode *ST = cast<StoreSDNode>(N);
2121   EVT StoreVT = ST->getMemoryVT();
2122   SDNode *NVPTXST = nullptr;
2123 
2124   // do not support pre/post inc/dec
2125   if (ST->isIndexed())
2126     return false;
2127 
2128   if (!StoreVT.isSimple())
2129     return false;
2130 
2131   // Address Space Setting
2132   unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2133 
2134   // Volatile Setting
2135   // - .volatile is only availalble for .global and .shared
2136   bool isVolatile = ST->isVolatile();
2137   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2138       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2139       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2140     isVolatile = false;
2141 
2142   // Vector Setting
2143   MVT SimpleVT = StoreVT.getSimpleVT();
2144   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2145   if (SimpleVT.isVector()) {
2146     unsigned num = SimpleVT.getVectorNumElements();
2147     if (num == 2)
2148       vecType = NVPTX::PTXLdStInstCode::V2;
2149     else if (num == 4)
2150       vecType = NVPTX::PTXLdStInstCode::V4;
2151     else
2152       return false;
2153   }
2154 
2155   // Type Setting: toType + toTypeWidth
2156   // - for integer type, always use 'u'
2157   //
2158   MVT ScalarVT = SimpleVT.getScalarType();
2159   unsigned toTypeWidth = ScalarVT.getSizeInBits();
2160   unsigned int toType;
2161   if (ScalarVT.isFloatingPoint())
2162     toType = NVPTX::PTXLdStInstCode::Float;
2163   else
2164     toType = NVPTX::PTXLdStInstCode::Unsigned;
2165 
2166   // Create the machine instruction DAG
2167   SDValue Chain = N->getOperand(0);
2168   SDValue N1 = N->getOperand(1);
2169   SDValue N2 = N->getOperand(2);
2170   SDValue Addr;
2171   SDValue Offset, Base;
2172   unsigned Opcode;
2173   MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2174 
2175   if (SelectDirectAddr(N2, Addr)) {
2176     switch (SourceVT) {
2177     case MVT::i8:
2178       Opcode = NVPTX::ST_i8_avar;
2179       break;
2180     case MVT::i16:
2181       Opcode = NVPTX::ST_i16_avar;
2182       break;
2183     case MVT::i32:
2184       Opcode = NVPTX::ST_i32_avar;
2185       break;
2186     case MVT::i64:
2187       Opcode = NVPTX::ST_i64_avar;
2188       break;
2189     case MVT::f32:
2190       Opcode = NVPTX::ST_f32_avar;
2191       break;
2192     case MVT::f64:
2193       Opcode = NVPTX::ST_f64_avar;
2194       break;
2195     default:
2196       return false;
2197     }
2198     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2199                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2200                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2201                       Chain };
2202     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2203   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2204                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2205     switch (SourceVT) {
2206     case MVT::i8:
2207       Opcode = NVPTX::ST_i8_asi;
2208       break;
2209     case MVT::i16:
2210       Opcode = NVPTX::ST_i16_asi;
2211       break;
2212     case MVT::i32:
2213       Opcode = NVPTX::ST_i32_asi;
2214       break;
2215     case MVT::i64:
2216       Opcode = NVPTX::ST_i64_asi;
2217       break;
2218     case MVT::f32:
2219       Opcode = NVPTX::ST_f32_asi;
2220       break;
2221     case MVT::f64:
2222       Opcode = NVPTX::ST_f64_asi;
2223       break;
2224     default:
2225       return false;
2226     }
2227     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2228                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2229                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2230                       Offset, Chain };
2231     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2232   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2233                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2234     if (TM.is64Bit()) {
2235       switch (SourceVT) {
2236       case MVT::i8:
2237         Opcode = NVPTX::ST_i8_ari_64;
2238         break;
2239       case MVT::i16:
2240         Opcode = NVPTX::ST_i16_ari_64;
2241         break;
2242       case MVT::i32:
2243         Opcode = NVPTX::ST_i32_ari_64;
2244         break;
2245       case MVT::i64:
2246         Opcode = NVPTX::ST_i64_ari_64;
2247         break;
2248       case MVT::f32:
2249         Opcode = NVPTX::ST_f32_ari_64;
2250         break;
2251       case MVT::f64:
2252         Opcode = NVPTX::ST_f64_ari_64;
2253         break;
2254       default:
2255         return false;
2256       }
2257     } else {
2258       switch (SourceVT) {
2259       case MVT::i8:
2260         Opcode = NVPTX::ST_i8_ari;
2261         break;
2262       case MVT::i16:
2263         Opcode = NVPTX::ST_i16_ari;
2264         break;
2265       case MVT::i32:
2266         Opcode = NVPTX::ST_i32_ari;
2267         break;
2268       case MVT::i64:
2269         Opcode = NVPTX::ST_i64_ari;
2270         break;
2271       case MVT::f32:
2272         Opcode = NVPTX::ST_f32_ari;
2273         break;
2274       case MVT::f64:
2275         Opcode = NVPTX::ST_f64_ari;
2276         break;
2277       default:
2278         return false;
2279       }
2280     }
2281     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2282                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2283                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2284                       Offset, Chain };
2285     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2286   } else {
2287     if (TM.is64Bit()) {
2288       switch (SourceVT) {
2289       case MVT::i8:
2290         Opcode = NVPTX::ST_i8_areg_64;
2291         break;
2292       case MVT::i16:
2293         Opcode = NVPTX::ST_i16_areg_64;
2294         break;
2295       case MVT::i32:
2296         Opcode = NVPTX::ST_i32_areg_64;
2297         break;
2298       case MVT::i64:
2299         Opcode = NVPTX::ST_i64_areg_64;
2300         break;
2301       case MVT::f32:
2302         Opcode = NVPTX::ST_f32_areg_64;
2303         break;
2304       case MVT::f64:
2305         Opcode = NVPTX::ST_f64_areg_64;
2306         break;
2307       default:
2308         return false;
2309       }
2310     } else {
2311       switch (SourceVT) {
2312       case MVT::i8:
2313         Opcode = NVPTX::ST_i8_areg;
2314         break;
2315       case MVT::i16:
2316         Opcode = NVPTX::ST_i16_areg;
2317         break;
2318       case MVT::i32:
2319         Opcode = NVPTX::ST_i32_areg;
2320         break;
2321       case MVT::i64:
2322         Opcode = NVPTX::ST_i64_areg;
2323         break;
2324       case MVT::f32:
2325         Opcode = NVPTX::ST_f32_areg;
2326         break;
2327       case MVT::f64:
2328         Opcode = NVPTX::ST_f64_areg;
2329         break;
2330       default:
2331         return false;
2332       }
2333     }
2334     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2335                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2336                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2337                       Chain };
2338     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2339   }
2340 
2341   if (!NVPTXST)
2342     return false;
2343 
2344   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2345   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2346   cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2347   ReplaceNode(N, NVPTXST);
2348   return true;
2349 }
2350 
tryStoreVector(SDNode * N)2351 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
2352   SDValue Chain = N->getOperand(0);
2353   SDValue Op1 = N->getOperand(1);
2354   SDValue Addr, Offset, Base;
2355   unsigned Opcode;
2356   SDLoc DL(N);
2357   SDNode *ST;
2358   EVT EltVT = Op1.getValueType();
2359   MemSDNode *MemSD = cast<MemSDNode>(N);
2360   EVT StoreVT = MemSD->getMemoryVT();
2361 
2362   // Address Space Setting
2363   unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2364 
2365   if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2366     report_fatal_error("Cannot store to pointer that points to constant "
2367                        "memory space");
2368   }
2369 
2370   // Volatile Setting
2371   // - .volatile is only availalble for .global and .shared
2372   bool IsVolatile = MemSD->isVolatile();
2373   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2374       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2375       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2376     IsVolatile = false;
2377 
2378   // Type Setting: toType + toTypeWidth
2379   // - for integer type, always use 'u'
2380   assert(StoreVT.isSimple() && "Store value is not simple");
2381   MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2382   unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2383   unsigned ToType;
2384   if (ScalarVT.isFloatingPoint())
2385     ToType = NVPTX::PTXLdStInstCode::Float;
2386   else
2387     ToType = NVPTX::PTXLdStInstCode::Unsigned;
2388 
2389   SmallVector<SDValue, 12> StOps;
2390   SDValue N2;
2391   unsigned VecType;
2392 
2393   switch (N->getOpcode()) {
2394   case NVPTXISD::StoreV2:
2395     VecType = NVPTX::PTXLdStInstCode::V2;
2396     StOps.push_back(N->getOperand(1));
2397     StOps.push_back(N->getOperand(2));
2398     N2 = N->getOperand(3);
2399     break;
2400   case NVPTXISD::StoreV4:
2401     VecType = NVPTX::PTXLdStInstCode::V4;
2402     StOps.push_back(N->getOperand(1));
2403     StOps.push_back(N->getOperand(2));
2404     StOps.push_back(N->getOperand(3));
2405     StOps.push_back(N->getOperand(4));
2406     N2 = N->getOperand(5);
2407     break;
2408   default:
2409     return false;
2410   }
2411 
2412   StOps.push_back(getI32Imm(IsVolatile, DL));
2413   StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2414   StOps.push_back(getI32Imm(VecType, DL));
2415   StOps.push_back(getI32Imm(ToType, DL));
2416   StOps.push_back(getI32Imm(ToTypeWidth, DL));
2417 
2418   if (SelectDirectAddr(N2, Addr)) {
2419     switch (N->getOpcode()) {
2420     default:
2421       return false;
2422     case NVPTXISD::StoreV2:
2423       switch (EltVT.getSimpleVT().SimpleTy) {
2424       default:
2425         return false;
2426       case MVT::i8:
2427         Opcode = NVPTX::STV_i8_v2_avar;
2428         break;
2429       case MVT::i16:
2430         Opcode = NVPTX::STV_i16_v2_avar;
2431         break;
2432       case MVT::i32:
2433         Opcode = NVPTX::STV_i32_v2_avar;
2434         break;
2435       case MVT::i64:
2436         Opcode = NVPTX::STV_i64_v2_avar;
2437         break;
2438       case MVT::f32:
2439         Opcode = NVPTX::STV_f32_v2_avar;
2440         break;
2441       case MVT::f64:
2442         Opcode = NVPTX::STV_f64_v2_avar;
2443         break;
2444       }
2445       break;
2446     case NVPTXISD::StoreV4:
2447       switch (EltVT.getSimpleVT().SimpleTy) {
2448       default:
2449         return false;
2450       case MVT::i8:
2451         Opcode = NVPTX::STV_i8_v4_avar;
2452         break;
2453       case MVT::i16:
2454         Opcode = NVPTX::STV_i16_v4_avar;
2455         break;
2456       case MVT::i32:
2457         Opcode = NVPTX::STV_i32_v4_avar;
2458         break;
2459       case MVT::f32:
2460         Opcode = NVPTX::STV_f32_v4_avar;
2461         break;
2462       }
2463       break;
2464     }
2465     StOps.push_back(Addr);
2466   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2467                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2468     switch (N->getOpcode()) {
2469     default:
2470       return false;
2471     case NVPTXISD::StoreV2:
2472       switch (EltVT.getSimpleVT().SimpleTy) {
2473       default:
2474         return false;
2475       case MVT::i8:
2476         Opcode = NVPTX::STV_i8_v2_asi;
2477         break;
2478       case MVT::i16:
2479         Opcode = NVPTX::STV_i16_v2_asi;
2480         break;
2481       case MVT::i32:
2482         Opcode = NVPTX::STV_i32_v2_asi;
2483         break;
2484       case MVT::i64:
2485         Opcode = NVPTX::STV_i64_v2_asi;
2486         break;
2487       case MVT::f32:
2488         Opcode = NVPTX::STV_f32_v2_asi;
2489         break;
2490       case MVT::f64:
2491         Opcode = NVPTX::STV_f64_v2_asi;
2492         break;
2493       }
2494       break;
2495     case NVPTXISD::StoreV4:
2496       switch (EltVT.getSimpleVT().SimpleTy) {
2497       default:
2498         return false;
2499       case MVT::i8:
2500         Opcode = NVPTX::STV_i8_v4_asi;
2501         break;
2502       case MVT::i16:
2503         Opcode = NVPTX::STV_i16_v4_asi;
2504         break;
2505       case MVT::i32:
2506         Opcode = NVPTX::STV_i32_v4_asi;
2507         break;
2508       case MVT::f32:
2509         Opcode = NVPTX::STV_f32_v4_asi;
2510         break;
2511       }
2512       break;
2513     }
2514     StOps.push_back(Base);
2515     StOps.push_back(Offset);
2516   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2517                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2518     if (TM.is64Bit()) {
2519       switch (N->getOpcode()) {
2520       default:
2521         return false;
2522       case NVPTXISD::StoreV2:
2523         switch (EltVT.getSimpleVT().SimpleTy) {
2524         default:
2525           return false;
2526         case MVT::i8:
2527           Opcode = NVPTX::STV_i8_v2_ari_64;
2528           break;
2529         case MVT::i16:
2530           Opcode = NVPTX::STV_i16_v2_ari_64;
2531           break;
2532         case MVT::i32:
2533           Opcode = NVPTX::STV_i32_v2_ari_64;
2534           break;
2535         case MVT::i64:
2536           Opcode = NVPTX::STV_i64_v2_ari_64;
2537           break;
2538         case MVT::f32:
2539           Opcode = NVPTX::STV_f32_v2_ari_64;
2540           break;
2541         case MVT::f64:
2542           Opcode = NVPTX::STV_f64_v2_ari_64;
2543           break;
2544         }
2545         break;
2546       case NVPTXISD::StoreV4:
2547         switch (EltVT.getSimpleVT().SimpleTy) {
2548         default:
2549           return false;
2550         case MVT::i8:
2551           Opcode = NVPTX::STV_i8_v4_ari_64;
2552           break;
2553         case MVT::i16:
2554           Opcode = NVPTX::STV_i16_v4_ari_64;
2555           break;
2556         case MVT::i32:
2557           Opcode = NVPTX::STV_i32_v4_ari_64;
2558           break;
2559         case MVT::f32:
2560           Opcode = NVPTX::STV_f32_v4_ari_64;
2561           break;
2562         }
2563         break;
2564       }
2565     } else {
2566       switch (N->getOpcode()) {
2567       default:
2568         return false;
2569       case NVPTXISD::StoreV2:
2570         switch (EltVT.getSimpleVT().SimpleTy) {
2571         default:
2572           return false;
2573         case MVT::i8:
2574           Opcode = NVPTX::STV_i8_v2_ari;
2575           break;
2576         case MVT::i16:
2577           Opcode = NVPTX::STV_i16_v2_ari;
2578           break;
2579         case MVT::i32:
2580           Opcode = NVPTX::STV_i32_v2_ari;
2581           break;
2582         case MVT::i64:
2583           Opcode = NVPTX::STV_i64_v2_ari;
2584           break;
2585         case MVT::f32:
2586           Opcode = NVPTX::STV_f32_v2_ari;
2587           break;
2588         case MVT::f64:
2589           Opcode = NVPTX::STV_f64_v2_ari;
2590           break;
2591         }
2592         break;
2593       case NVPTXISD::StoreV4:
2594         switch (EltVT.getSimpleVT().SimpleTy) {
2595         default:
2596           return false;
2597         case MVT::i8:
2598           Opcode = NVPTX::STV_i8_v4_ari;
2599           break;
2600         case MVT::i16:
2601           Opcode = NVPTX::STV_i16_v4_ari;
2602           break;
2603         case MVT::i32:
2604           Opcode = NVPTX::STV_i32_v4_ari;
2605           break;
2606         case MVT::f32:
2607           Opcode = NVPTX::STV_f32_v4_ari;
2608           break;
2609         }
2610         break;
2611       }
2612     }
2613     StOps.push_back(Base);
2614     StOps.push_back(Offset);
2615   } else {
2616     if (TM.is64Bit()) {
2617       switch (N->getOpcode()) {
2618       default:
2619         return false;
2620       case NVPTXISD::StoreV2:
2621         switch (EltVT.getSimpleVT().SimpleTy) {
2622         default:
2623           return false;
2624         case MVT::i8:
2625           Opcode = NVPTX::STV_i8_v2_areg_64;
2626           break;
2627         case MVT::i16:
2628           Opcode = NVPTX::STV_i16_v2_areg_64;
2629           break;
2630         case MVT::i32:
2631           Opcode = NVPTX::STV_i32_v2_areg_64;
2632           break;
2633         case MVT::i64:
2634           Opcode = NVPTX::STV_i64_v2_areg_64;
2635           break;
2636         case MVT::f32:
2637           Opcode = NVPTX::STV_f32_v2_areg_64;
2638           break;
2639         case MVT::f64:
2640           Opcode = NVPTX::STV_f64_v2_areg_64;
2641           break;
2642         }
2643         break;
2644       case NVPTXISD::StoreV4:
2645         switch (EltVT.getSimpleVT().SimpleTy) {
2646         default:
2647           return false;
2648         case MVT::i8:
2649           Opcode = NVPTX::STV_i8_v4_areg_64;
2650           break;
2651         case MVT::i16:
2652           Opcode = NVPTX::STV_i16_v4_areg_64;
2653           break;
2654         case MVT::i32:
2655           Opcode = NVPTX::STV_i32_v4_areg_64;
2656           break;
2657         case MVT::f32:
2658           Opcode = NVPTX::STV_f32_v4_areg_64;
2659           break;
2660         }
2661         break;
2662       }
2663     } else {
2664       switch (N->getOpcode()) {
2665       default:
2666         return false;
2667       case NVPTXISD::StoreV2:
2668         switch (EltVT.getSimpleVT().SimpleTy) {
2669         default:
2670           return false;
2671         case MVT::i8:
2672           Opcode = NVPTX::STV_i8_v2_areg;
2673           break;
2674         case MVT::i16:
2675           Opcode = NVPTX::STV_i16_v2_areg;
2676           break;
2677         case MVT::i32:
2678           Opcode = NVPTX::STV_i32_v2_areg;
2679           break;
2680         case MVT::i64:
2681           Opcode = NVPTX::STV_i64_v2_areg;
2682           break;
2683         case MVT::f32:
2684           Opcode = NVPTX::STV_f32_v2_areg;
2685           break;
2686         case MVT::f64:
2687           Opcode = NVPTX::STV_f64_v2_areg;
2688           break;
2689         }
2690         break;
2691       case NVPTXISD::StoreV4:
2692         switch (EltVT.getSimpleVT().SimpleTy) {
2693         default:
2694           return false;
2695         case MVT::i8:
2696           Opcode = NVPTX::STV_i8_v4_areg;
2697           break;
2698         case MVT::i16:
2699           Opcode = NVPTX::STV_i16_v4_areg;
2700           break;
2701         case MVT::i32:
2702           Opcode = NVPTX::STV_i32_v4_areg;
2703           break;
2704         case MVT::f32:
2705           Opcode = NVPTX::STV_f32_v4_areg;
2706           break;
2707         }
2708         break;
2709       }
2710     }
2711     StOps.push_back(N2);
2712   }
2713 
2714   StOps.push_back(Chain);
2715 
2716   ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2717 
2718   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2719   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2720   cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2721 
2722   ReplaceNode(N, ST);
2723   return true;
2724 }
2725 
tryLoadParam(SDNode * Node)2726 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2727   SDValue Chain = Node->getOperand(0);
2728   SDValue Offset = Node->getOperand(2);
2729   SDValue Flag = Node->getOperand(3);
2730   SDLoc DL(Node);
2731   MemSDNode *Mem = cast<MemSDNode>(Node);
2732 
2733   unsigned VecSize;
2734   switch (Node->getOpcode()) {
2735   default:
2736     return false;
2737   case NVPTXISD::LoadParam:
2738     VecSize = 1;
2739     break;
2740   case NVPTXISD::LoadParamV2:
2741     VecSize = 2;
2742     break;
2743   case NVPTXISD::LoadParamV4:
2744     VecSize = 4;
2745     break;
2746   }
2747 
2748   EVT EltVT = Node->getValueType(0);
2749   EVT MemVT = Mem->getMemoryVT();
2750 
2751   unsigned Opc = 0;
2752 
2753   switch (VecSize) {
2754   default:
2755     return false;
2756   case 1:
2757     switch (MemVT.getSimpleVT().SimpleTy) {
2758     default:
2759       return false;
2760     case MVT::i1:
2761       Opc = NVPTX::LoadParamMemI8;
2762       break;
2763     case MVT::i8:
2764       Opc = NVPTX::LoadParamMemI8;
2765       break;
2766     case MVT::i16:
2767       Opc = NVPTX::LoadParamMemI16;
2768       break;
2769     case MVT::i32:
2770       Opc = NVPTX::LoadParamMemI32;
2771       break;
2772     case MVT::i64:
2773       Opc = NVPTX::LoadParamMemI64;
2774       break;
2775     case MVT::f32:
2776       Opc = NVPTX::LoadParamMemF32;
2777       break;
2778     case MVT::f64:
2779       Opc = NVPTX::LoadParamMemF64;
2780       break;
2781     }
2782     break;
2783   case 2:
2784     switch (MemVT.getSimpleVT().SimpleTy) {
2785     default:
2786       return false;
2787     case MVT::i1:
2788       Opc = NVPTX::LoadParamMemV2I8;
2789       break;
2790     case MVT::i8:
2791       Opc = NVPTX::LoadParamMemV2I8;
2792       break;
2793     case MVT::i16:
2794       Opc = NVPTX::LoadParamMemV2I16;
2795       break;
2796     case MVT::i32:
2797       Opc = NVPTX::LoadParamMemV2I32;
2798       break;
2799     case MVT::i64:
2800       Opc = NVPTX::LoadParamMemV2I64;
2801       break;
2802     case MVT::f32:
2803       Opc = NVPTX::LoadParamMemV2F32;
2804       break;
2805     case MVT::f64:
2806       Opc = NVPTX::LoadParamMemV2F64;
2807       break;
2808     }
2809     break;
2810   case 4:
2811     switch (MemVT.getSimpleVT().SimpleTy) {
2812     default:
2813       return false;
2814     case MVT::i1:
2815       Opc = NVPTX::LoadParamMemV4I8;
2816       break;
2817     case MVT::i8:
2818       Opc = NVPTX::LoadParamMemV4I8;
2819       break;
2820     case MVT::i16:
2821       Opc = NVPTX::LoadParamMemV4I16;
2822       break;
2823     case MVT::i32:
2824       Opc = NVPTX::LoadParamMemV4I32;
2825       break;
2826     case MVT::f32:
2827       Opc = NVPTX::LoadParamMemV4F32;
2828       break;
2829     }
2830     break;
2831   }
2832 
2833   SDVTList VTs;
2834   if (VecSize == 1) {
2835     VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2836   } else if (VecSize == 2) {
2837     VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2838   } else {
2839     EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2840     VTs = CurDAG->getVTList(EVTs);
2841   }
2842 
2843   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2844 
2845   SmallVector<SDValue, 2> Ops;
2846   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2847   Ops.push_back(Chain);
2848   Ops.push_back(Flag);
2849 
2850   ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, VTs, Ops));
2851   return true;
2852 }
2853 
tryStoreRetval(SDNode * N)2854 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2855   SDLoc DL(N);
2856   SDValue Chain = N->getOperand(0);
2857   SDValue Offset = N->getOperand(1);
2858   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2859   MemSDNode *Mem = cast<MemSDNode>(N);
2860 
2861   // How many elements do we have?
2862   unsigned NumElts = 1;
2863   switch (N->getOpcode()) {
2864   default:
2865     return false;
2866   case NVPTXISD::StoreRetval:
2867     NumElts = 1;
2868     break;
2869   case NVPTXISD::StoreRetvalV2:
2870     NumElts = 2;
2871     break;
2872   case NVPTXISD::StoreRetvalV4:
2873     NumElts = 4;
2874     break;
2875   }
2876 
2877   // Build vector of operands
2878   SmallVector<SDValue, 6> Ops;
2879   for (unsigned i = 0; i < NumElts; ++i)
2880     Ops.push_back(N->getOperand(i + 2));
2881   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2882   Ops.push_back(Chain);
2883 
2884   // Determine target opcode
2885   // If we have an i1, use an 8-bit store. The lowering code in
2886   // NVPTXISelLowering will have already emitted an upcast.
2887   unsigned Opcode = 0;
2888   switch (NumElts) {
2889   default:
2890     return false;
2891   case 1:
2892     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2893     default:
2894       return false;
2895     case MVT::i1:
2896       Opcode = NVPTX::StoreRetvalI8;
2897       break;
2898     case MVT::i8:
2899       Opcode = NVPTX::StoreRetvalI8;
2900       break;
2901     case MVT::i16:
2902       Opcode = NVPTX::StoreRetvalI16;
2903       break;
2904     case MVT::i32:
2905       Opcode = NVPTX::StoreRetvalI32;
2906       break;
2907     case MVT::i64:
2908       Opcode = NVPTX::StoreRetvalI64;
2909       break;
2910     case MVT::f32:
2911       Opcode = NVPTX::StoreRetvalF32;
2912       break;
2913     case MVT::f64:
2914       Opcode = NVPTX::StoreRetvalF64;
2915       break;
2916     }
2917     break;
2918   case 2:
2919     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2920     default:
2921       return false;
2922     case MVT::i1:
2923       Opcode = NVPTX::StoreRetvalV2I8;
2924       break;
2925     case MVT::i8:
2926       Opcode = NVPTX::StoreRetvalV2I8;
2927       break;
2928     case MVT::i16:
2929       Opcode = NVPTX::StoreRetvalV2I16;
2930       break;
2931     case MVT::i32:
2932       Opcode = NVPTX::StoreRetvalV2I32;
2933       break;
2934     case MVT::i64:
2935       Opcode = NVPTX::StoreRetvalV2I64;
2936       break;
2937     case MVT::f32:
2938       Opcode = NVPTX::StoreRetvalV2F32;
2939       break;
2940     case MVT::f64:
2941       Opcode = NVPTX::StoreRetvalV2F64;
2942       break;
2943     }
2944     break;
2945   case 4:
2946     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2947     default:
2948       return false;
2949     case MVT::i1:
2950       Opcode = NVPTX::StoreRetvalV4I8;
2951       break;
2952     case MVT::i8:
2953       Opcode = NVPTX::StoreRetvalV4I8;
2954       break;
2955     case MVT::i16:
2956       Opcode = NVPTX::StoreRetvalV4I16;
2957       break;
2958     case MVT::i32:
2959       Opcode = NVPTX::StoreRetvalV4I32;
2960       break;
2961     case MVT::f32:
2962       Opcode = NVPTX::StoreRetvalV4F32;
2963       break;
2964     }
2965     break;
2966   }
2967 
2968   SDNode *Ret =
2969       CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2970   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2971   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2972   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2973 
2974   ReplaceNode(N, Ret);
2975   return true;
2976 }
2977 
tryStoreParam(SDNode * N)2978 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2979   SDLoc DL(N);
2980   SDValue Chain = N->getOperand(0);
2981   SDValue Param = N->getOperand(1);
2982   unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2983   SDValue Offset = N->getOperand(2);
2984   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2985   MemSDNode *Mem = cast<MemSDNode>(N);
2986   SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2987 
2988   // How many elements do we have?
2989   unsigned NumElts = 1;
2990   switch (N->getOpcode()) {
2991   default:
2992     return false;
2993   case NVPTXISD::StoreParamU32:
2994   case NVPTXISD::StoreParamS32:
2995   case NVPTXISD::StoreParam:
2996     NumElts = 1;
2997     break;
2998   case NVPTXISD::StoreParamV2:
2999     NumElts = 2;
3000     break;
3001   case NVPTXISD::StoreParamV4:
3002     NumElts = 4;
3003     break;
3004   }
3005 
3006   // Build vector of operands
3007   SmallVector<SDValue, 8> Ops;
3008   for (unsigned i = 0; i < NumElts; ++i)
3009     Ops.push_back(N->getOperand(i + 3));
3010   Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
3011   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
3012   Ops.push_back(Chain);
3013   Ops.push_back(Flag);
3014 
3015   // Determine target opcode
3016   // If we have an i1, use an 8-bit store. The lowering code in
3017   // NVPTXISelLowering will have already emitted an upcast.
3018   unsigned Opcode = 0;
3019   switch (N->getOpcode()) {
3020   default:
3021     switch (NumElts) {
3022     default:
3023       return false;
3024     case 1:
3025       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3026       default:
3027         return false;
3028       case MVT::i1:
3029         Opcode = NVPTX::StoreParamI8;
3030         break;
3031       case MVT::i8:
3032         Opcode = NVPTX::StoreParamI8;
3033         break;
3034       case MVT::i16:
3035         Opcode = NVPTX::StoreParamI16;
3036         break;
3037       case MVT::i32:
3038         Opcode = NVPTX::StoreParamI32;
3039         break;
3040       case MVT::i64:
3041         Opcode = NVPTX::StoreParamI64;
3042         break;
3043       case MVT::f32:
3044         Opcode = NVPTX::StoreParamF32;
3045         break;
3046       case MVT::f64:
3047         Opcode = NVPTX::StoreParamF64;
3048         break;
3049       }
3050       break;
3051     case 2:
3052       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3053       default:
3054         return false;
3055       case MVT::i1:
3056         Opcode = NVPTX::StoreParamV2I8;
3057         break;
3058       case MVT::i8:
3059         Opcode = NVPTX::StoreParamV2I8;
3060         break;
3061       case MVT::i16:
3062         Opcode = NVPTX::StoreParamV2I16;
3063         break;
3064       case MVT::i32:
3065         Opcode = NVPTX::StoreParamV2I32;
3066         break;
3067       case MVT::i64:
3068         Opcode = NVPTX::StoreParamV2I64;
3069         break;
3070       case MVT::f32:
3071         Opcode = NVPTX::StoreParamV2F32;
3072         break;
3073       case MVT::f64:
3074         Opcode = NVPTX::StoreParamV2F64;
3075         break;
3076       }
3077       break;
3078     case 4:
3079       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3080       default:
3081         return false;
3082       case MVT::i1:
3083         Opcode = NVPTX::StoreParamV4I8;
3084         break;
3085       case MVT::i8:
3086         Opcode = NVPTX::StoreParamV4I8;
3087         break;
3088       case MVT::i16:
3089         Opcode = NVPTX::StoreParamV4I16;
3090         break;
3091       case MVT::i32:
3092         Opcode = NVPTX::StoreParamV4I32;
3093         break;
3094       case MVT::f32:
3095         Opcode = NVPTX::StoreParamV4F32;
3096         break;
3097       }
3098       break;
3099     }
3100     break;
3101   // Special case: if we have a sign-extend/zero-extend node, insert the
3102   // conversion instruction first, and use that as the value operand to
3103   // the selected StoreParam node.
3104   case NVPTXISD::StoreParamU32: {
3105     Opcode = NVPTX::StoreParamI32;
3106     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3107                                                 MVT::i32);
3108     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
3109                                          MVT::i32, Ops[0], CvtNone);
3110     Ops[0] = SDValue(Cvt, 0);
3111     break;
3112   }
3113   case NVPTXISD::StoreParamS32: {
3114     Opcode = NVPTX::StoreParamI32;
3115     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3116                                                 MVT::i32);
3117     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3118                                          MVT::i32, Ops[0], CvtNone);
3119     Ops[0] = SDValue(Cvt, 0);
3120     break;
3121   }
3122   }
3123 
3124   SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3125   SDNode *Ret =
3126       CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3127   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3128   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3129   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3130 
3131   ReplaceNode(N, Ret);
3132   return true;
3133 }
3134 
tryTextureIntrinsic(SDNode * N)3135 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
3136   SDValue Chain = N->getOperand(0);
3137   unsigned Opc = 0;
3138   SmallVector<SDValue, 8> Ops;
3139 
3140   switch (N->getOpcode()) {
3141   default: return false;
3142   case NVPTXISD::Tex1DFloatS32:
3143     Opc = NVPTX::TEX_1D_F32_S32;
3144     break;
3145   case NVPTXISD::Tex1DFloatFloat:
3146     Opc = NVPTX::TEX_1D_F32_F32;
3147     break;
3148   case NVPTXISD::Tex1DFloatFloatLevel:
3149     Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3150     break;
3151   case NVPTXISD::Tex1DFloatFloatGrad:
3152     Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3153     break;
3154   case NVPTXISD::Tex1DS32S32:
3155     Opc = NVPTX::TEX_1D_S32_S32;
3156     break;
3157   case NVPTXISD::Tex1DS32Float:
3158     Opc = NVPTX::TEX_1D_S32_F32;
3159     break;
3160   case NVPTXISD::Tex1DS32FloatLevel:
3161     Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3162     break;
3163   case NVPTXISD::Tex1DS32FloatGrad:
3164     Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3165     break;
3166   case NVPTXISD::Tex1DU32S32:
3167     Opc = NVPTX::TEX_1D_U32_S32;
3168     break;
3169   case NVPTXISD::Tex1DU32Float:
3170     Opc = NVPTX::TEX_1D_U32_F32;
3171     break;
3172   case NVPTXISD::Tex1DU32FloatLevel:
3173     Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3174     break;
3175   case NVPTXISD::Tex1DU32FloatGrad:
3176     Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3177     break;
3178   case NVPTXISD::Tex1DArrayFloatS32:
3179     Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3180     break;
3181   case NVPTXISD::Tex1DArrayFloatFloat:
3182     Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3183     break;
3184   case NVPTXISD::Tex1DArrayFloatFloatLevel:
3185     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3186     break;
3187   case NVPTXISD::Tex1DArrayFloatFloatGrad:
3188     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3189     break;
3190   case NVPTXISD::Tex1DArrayS32S32:
3191     Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3192     break;
3193   case NVPTXISD::Tex1DArrayS32Float:
3194     Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3195     break;
3196   case NVPTXISD::Tex1DArrayS32FloatLevel:
3197     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3198     break;
3199   case NVPTXISD::Tex1DArrayS32FloatGrad:
3200     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3201     break;
3202   case NVPTXISD::Tex1DArrayU32S32:
3203     Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3204     break;
3205   case NVPTXISD::Tex1DArrayU32Float:
3206     Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3207     break;
3208   case NVPTXISD::Tex1DArrayU32FloatLevel:
3209     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3210     break;
3211   case NVPTXISD::Tex1DArrayU32FloatGrad:
3212     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3213     break;
3214   case NVPTXISD::Tex2DFloatS32:
3215     Opc = NVPTX::TEX_2D_F32_S32;
3216     break;
3217   case NVPTXISD::Tex2DFloatFloat:
3218     Opc = NVPTX::TEX_2D_F32_F32;
3219     break;
3220   case NVPTXISD::Tex2DFloatFloatLevel:
3221     Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3222     break;
3223   case NVPTXISD::Tex2DFloatFloatGrad:
3224     Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3225     break;
3226   case NVPTXISD::Tex2DS32S32:
3227     Opc = NVPTX::TEX_2D_S32_S32;
3228     break;
3229   case NVPTXISD::Tex2DS32Float:
3230     Opc = NVPTX::TEX_2D_S32_F32;
3231     break;
3232   case NVPTXISD::Tex2DS32FloatLevel:
3233     Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3234     break;
3235   case NVPTXISD::Tex2DS32FloatGrad:
3236     Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3237     break;
3238   case NVPTXISD::Tex2DU32S32:
3239     Opc = NVPTX::TEX_2D_U32_S32;
3240     break;
3241   case NVPTXISD::Tex2DU32Float:
3242     Opc = NVPTX::TEX_2D_U32_F32;
3243     break;
3244   case NVPTXISD::Tex2DU32FloatLevel:
3245     Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3246     break;
3247   case NVPTXISD::Tex2DU32FloatGrad:
3248     Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3249     break;
3250   case NVPTXISD::Tex2DArrayFloatS32:
3251     Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3252     break;
3253   case NVPTXISD::Tex2DArrayFloatFloat:
3254     Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3255     break;
3256   case NVPTXISD::Tex2DArrayFloatFloatLevel:
3257     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3258     break;
3259   case NVPTXISD::Tex2DArrayFloatFloatGrad:
3260     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3261     break;
3262   case NVPTXISD::Tex2DArrayS32S32:
3263     Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3264     break;
3265   case NVPTXISD::Tex2DArrayS32Float:
3266     Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3267     break;
3268   case NVPTXISD::Tex2DArrayS32FloatLevel:
3269     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3270     break;
3271   case NVPTXISD::Tex2DArrayS32FloatGrad:
3272     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3273     break;
3274   case NVPTXISD::Tex2DArrayU32S32:
3275     Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3276     break;
3277   case NVPTXISD::Tex2DArrayU32Float:
3278     Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3279     break;
3280   case NVPTXISD::Tex2DArrayU32FloatLevel:
3281     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3282     break;
3283   case NVPTXISD::Tex2DArrayU32FloatGrad:
3284     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3285     break;
3286   case NVPTXISD::Tex3DFloatS32:
3287     Opc = NVPTX::TEX_3D_F32_S32;
3288     break;
3289   case NVPTXISD::Tex3DFloatFloat:
3290     Opc = NVPTX::TEX_3D_F32_F32;
3291     break;
3292   case NVPTXISD::Tex3DFloatFloatLevel:
3293     Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3294     break;
3295   case NVPTXISD::Tex3DFloatFloatGrad:
3296     Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3297     break;
3298   case NVPTXISD::Tex3DS32S32:
3299     Opc = NVPTX::TEX_3D_S32_S32;
3300     break;
3301   case NVPTXISD::Tex3DS32Float:
3302     Opc = NVPTX::TEX_3D_S32_F32;
3303     break;
3304   case NVPTXISD::Tex3DS32FloatLevel:
3305     Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3306     break;
3307   case NVPTXISD::Tex3DS32FloatGrad:
3308     Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3309     break;
3310   case NVPTXISD::Tex3DU32S32:
3311     Opc = NVPTX::TEX_3D_U32_S32;
3312     break;
3313   case NVPTXISD::Tex3DU32Float:
3314     Opc = NVPTX::TEX_3D_U32_F32;
3315     break;
3316   case NVPTXISD::Tex3DU32FloatLevel:
3317     Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3318     break;
3319   case NVPTXISD::Tex3DU32FloatGrad:
3320     Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3321     break;
3322   case NVPTXISD::TexCubeFloatFloat:
3323     Opc = NVPTX::TEX_CUBE_F32_F32;
3324     break;
3325   case NVPTXISD::TexCubeFloatFloatLevel:
3326     Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3327     break;
3328   case NVPTXISD::TexCubeS32Float:
3329     Opc = NVPTX::TEX_CUBE_S32_F32;
3330     break;
3331   case NVPTXISD::TexCubeS32FloatLevel:
3332     Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3333     break;
3334   case NVPTXISD::TexCubeU32Float:
3335     Opc = NVPTX::TEX_CUBE_U32_F32;
3336     break;
3337   case NVPTXISD::TexCubeU32FloatLevel:
3338     Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3339     break;
3340   case NVPTXISD::TexCubeArrayFloatFloat:
3341     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3342     break;
3343   case NVPTXISD::TexCubeArrayFloatFloatLevel:
3344     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3345     break;
3346   case NVPTXISD::TexCubeArrayS32Float:
3347     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3348     break;
3349   case NVPTXISD::TexCubeArrayS32FloatLevel:
3350     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3351     break;
3352   case NVPTXISD::TexCubeArrayU32Float:
3353     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3354     break;
3355   case NVPTXISD::TexCubeArrayU32FloatLevel:
3356     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3357     break;
3358   case NVPTXISD::Tld4R2DFloatFloat:
3359     Opc = NVPTX::TLD4_R_2D_F32_F32;
3360     break;
3361   case NVPTXISD::Tld4G2DFloatFloat:
3362     Opc = NVPTX::TLD4_G_2D_F32_F32;
3363     break;
3364   case NVPTXISD::Tld4B2DFloatFloat:
3365     Opc = NVPTX::TLD4_B_2D_F32_F32;
3366     break;
3367   case NVPTXISD::Tld4A2DFloatFloat:
3368     Opc = NVPTX::TLD4_A_2D_F32_F32;
3369     break;
3370   case NVPTXISD::Tld4R2DS64Float:
3371     Opc = NVPTX::TLD4_R_2D_S32_F32;
3372     break;
3373   case NVPTXISD::Tld4G2DS64Float:
3374     Opc = NVPTX::TLD4_G_2D_S32_F32;
3375     break;
3376   case NVPTXISD::Tld4B2DS64Float:
3377     Opc = NVPTX::TLD4_B_2D_S32_F32;
3378     break;
3379   case NVPTXISD::Tld4A2DS64Float:
3380     Opc = NVPTX::TLD4_A_2D_S32_F32;
3381     break;
3382   case NVPTXISD::Tld4R2DU64Float:
3383     Opc = NVPTX::TLD4_R_2D_U32_F32;
3384     break;
3385   case NVPTXISD::Tld4G2DU64Float:
3386     Opc = NVPTX::TLD4_G_2D_U32_F32;
3387     break;
3388   case NVPTXISD::Tld4B2DU64Float:
3389     Opc = NVPTX::TLD4_B_2D_U32_F32;
3390     break;
3391   case NVPTXISD::Tld4A2DU64Float:
3392     Opc = NVPTX::TLD4_A_2D_U32_F32;
3393     break;
3394   case NVPTXISD::TexUnified1DFloatS32:
3395     Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3396     break;
3397   case NVPTXISD::TexUnified1DFloatFloat:
3398     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3399     break;
3400   case NVPTXISD::TexUnified1DFloatFloatLevel:
3401     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3402     break;
3403   case NVPTXISD::TexUnified1DFloatFloatGrad:
3404     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3405     break;
3406   case NVPTXISD::TexUnified1DS32S32:
3407     Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3408     break;
3409   case NVPTXISD::TexUnified1DS32Float:
3410     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3411     break;
3412   case NVPTXISD::TexUnified1DS32FloatLevel:
3413     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3414     break;
3415   case NVPTXISD::TexUnified1DS32FloatGrad:
3416     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3417     break;
3418   case NVPTXISD::TexUnified1DU32S32:
3419     Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3420     break;
3421   case NVPTXISD::TexUnified1DU32Float:
3422     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3423     break;
3424   case NVPTXISD::TexUnified1DU32FloatLevel:
3425     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3426     break;
3427   case NVPTXISD::TexUnified1DU32FloatGrad:
3428     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3429     break;
3430   case NVPTXISD::TexUnified1DArrayFloatS32:
3431     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3432     break;
3433   case NVPTXISD::TexUnified1DArrayFloatFloat:
3434     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3435     break;
3436   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3437     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3438     break;
3439   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3440     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3441     break;
3442   case NVPTXISD::TexUnified1DArrayS32S32:
3443     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3444     break;
3445   case NVPTXISD::TexUnified1DArrayS32Float:
3446     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3447     break;
3448   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3449     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3450     break;
3451   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3452     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3453     break;
3454   case NVPTXISD::TexUnified1DArrayU32S32:
3455     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3456     break;
3457   case NVPTXISD::TexUnified1DArrayU32Float:
3458     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3459     break;
3460   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3461     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3462     break;
3463   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3464     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3465     break;
3466   case NVPTXISD::TexUnified2DFloatS32:
3467     Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3468     break;
3469   case NVPTXISD::TexUnified2DFloatFloat:
3470     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3471     break;
3472   case NVPTXISD::TexUnified2DFloatFloatLevel:
3473     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3474     break;
3475   case NVPTXISD::TexUnified2DFloatFloatGrad:
3476     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3477     break;
3478   case NVPTXISD::TexUnified2DS32S32:
3479     Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3480     break;
3481   case NVPTXISD::TexUnified2DS32Float:
3482     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3483     break;
3484   case NVPTXISD::TexUnified2DS32FloatLevel:
3485     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3486     break;
3487   case NVPTXISD::TexUnified2DS32FloatGrad:
3488     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3489     break;
3490   case NVPTXISD::TexUnified2DU32S32:
3491     Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3492     break;
3493   case NVPTXISD::TexUnified2DU32Float:
3494     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3495     break;
3496   case NVPTXISD::TexUnified2DU32FloatLevel:
3497     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3498     break;
3499   case NVPTXISD::TexUnified2DU32FloatGrad:
3500     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3501     break;
3502   case NVPTXISD::TexUnified2DArrayFloatS32:
3503     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3504     break;
3505   case NVPTXISD::TexUnified2DArrayFloatFloat:
3506     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3507     break;
3508   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3509     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3510     break;
3511   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3512     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3513     break;
3514   case NVPTXISD::TexUnified2DArrayS32S32:
3515     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3516     break;
3517   case NVPTXISD::TexUnified2DArrayS32Float:
3518     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3519     break;
3520   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3521     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3522     break;
3523   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3524     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3525     break;
3526   case NVPTXISD::TexUnified2DArrayU32S32:
3527     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3528     break;
3529   case NVPTXISD::TexUnified2DArrayU32Float:
3530     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3531     break;
3532   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3533     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3534     break;
3535   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3536     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3537     break;
3538   case NVPTXISD::TexUnified3DFloatS32:
3539     Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3540     break;
3541   case NVPTXISD::TexUnified3DFloatFloat:
3542     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3543     break;
3544   case NVPTXISD::TexUnified3DFloatFloatLevel:
3545     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3546     break;
3547   case NVPTXISD::TexUnified3DFloatFloatGrad:
3548     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3549     break;
3550   case NVPTXISD::TexUnified3DS32S32:
3551     Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3552     break;
3553   case NVPTXISD::TexUnified3DS32Float:
3554     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3555     break;
3556   case NVPTXISD::TexUnified3DS32FloatLevel:
3557     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3558     break;
3559   case NVPTXISD::TexUnified3DS32FloatGrad:
3560     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3561     break;
3562   case NVPTXISD::TexUnified3DU32S32:
3563     Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3564     break;
3565   case NVPTXISD::TexUnified3DU32Float:
3566     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3567     break;
3568   case NVPTXISD::TexUnified3DU32FloatLevel:
3569     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3570     break;
3571   case NVPTXISD::TexUnified3DU32FloatGrad:
3572     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3573     break;
3574   case NVPTXISD::TexUnifiedCubeFloatFloat:
3575     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3576     break;
3577   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3578     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3579     break;
3580   case NVPTXISD::TexUnifiedCubeS32Float:
3581     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3582     break;
3583   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3584     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3585     break;
3586   case NVPTXISD::TexUnifiedCubeU32Float:
3587     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3588     break;
3589   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3590     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3591     break;
3592   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3593     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3594     break;
3595   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3596     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3597     break;
3598   case NVPTXISD::TexUnifiedCubeArrayS32Float:
3599     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3600     break;
3601   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3602     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3603     break;
3604   case NVPTXISD::TexUnifiedCubeArrayU32Float:
3605     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3606     break;
3607   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3608     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3609     break;
3610   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3611     Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3612     break;
3613   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3614     Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3615     break;
3616   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3617     Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3618     break;
3619   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3620     Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3621     break;
3622   case NVPTXISD::Tld4UnifiedR2DS64Float:
3623     Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3624     break;
3625   case NVPTXISD::Tld4UnifiedG2DS64Float:
3626     Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3627     break;
3628   case NVPTXISD::Tld4UnifiedB2DS64Float:
3629     Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3630     break;
3631   case NVPTXISD::Tld4UnifiedA2DS64Float:
3632     Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3633     break;
3634   case NVPTXISD::Tld4UnifiedR2DU64Float:
3635     Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3636     break;
3637   case NVPTXISD::Tld4UnifiedG2DU64Float:
3638     Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3639     break;
3640   case NVPTXISD::Tld4UnifiedB2DU64Float:
3641     Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3642     break;
3643   case NVPTXISD::Tld4UnifiedA2DU64Float:
3644     Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3645     break;
3646   }
3647 
3648   // Copy over operands
3649   for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3650     Ops.push_back(N->getOperand(i));
3651   }
3652 
3653   Ops.push_back(Chain);
3654   ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3655   return true;
3656 }
3657 
trySurfaceIntrinsic(SDNode * N)3658 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
3659   SDValue Chain = N->getOperand(0);
3660   SDValue TexHandle = N->getOperand(1);
3661   unsigned Opc = 0;
3662   SmallVector<SDValue, 8> Ops;
3663   switch (N->getOpcode()) {
3664   default: return false;
3665   case NVPTXISD::Suld1DI8Clamp:
3666     Opc = NVPTX::SULD_1D_I8_CLAMP;
3667     Ops.push_back(TexHandle);
3668     Ops.push_back(N->getOperand(2));
3669     Ops.push_back(Chain);
3670     break;
3671   case NVPTXISD::Suld1DI16Clamp:
3672     Opc = NVPTX::SULD_1D_I16_CLAMP;
3673     Ops.push_back(TexHandle);
3674     Ops.push_back(N->getOperand(2));
3675     Ops.push_back(Chain);
3676     break;
3677   case NVPTXISD::Suld1DI32Clamp:
3678     Opc = NVPTX::SULD_1D_I32_CLAMP;
3679     Ops.push_back(TexHandle);
3680     Ops.push_back(N->getOperand(2));
3681     Ops.push_back(Chain);
3682     break;
3683   case NVPTXISD::Suld1DI64Clamp:
3684     Opc = NVPTX::SULD_1D_I64_CLAMP;
3685     Ops.push_back(TexHandle);
3686     Ops.push_back(N->getOperand(2));
3687     Ops.push_back(Chain);
3688     break;
3689   case NVPTXISD::Suld1DV2I8Clamp:
3690     Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3691     Ops.push_back(TexHandle);
3692     Ops.push_back(N->getOperand(2));
3693     Ops.push_back(Chain);
3694     break;
3695   case NVPTXISD::Suld1DV2I16Clamp:
3696     Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3697     Ops.push_back(TexHandle);
3698     Ops.push_back(N->getOperand(2));
3699     Ops.push_back(Chain);
3700     break;
3701   case NVPTXISD::Suld1DV2I32Clamp:
3702     Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3703     Ops.push_back(TexHandle);
3704     Ops.push_back(N->getOperand(2));
3705     Ops.push_back(Chain);
3706     break;
3707   case NVPTXISD::Suld1DV2I64Clamp:
3708     Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3709     Ops.push_back(TexHandle);
3710     Ops.push_back(N->getOperand(2));
3711     Ops.push_back(Chain);
3712     break;
3713   case NVPTXISD::Suld1DV4I8Clamp:
3714     Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3715     Ops.push_back(TexHandle);
3716     Ops.push_back(N->getOperand(2));
3717     Ops.push_back(Chain);
3718     break;
3719   case NVPTXISD::Suld1DV4I16Clamp:
3720     Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3721     Ops.push_back(TexHandle);
3722     Ops.push_back(N->getOperand(2));
3723     Ops.push_back(Chain);
3724     break;
3725   case NVPTXISD::Suld1DV4I32Clamp:
3726     Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3727     Ops.push_back(TexHandle);
3728     Ops.push_back(N->getOperand(2));
3729     Ops.push_back(Chain);
3730     break;
3731   case NVPTXISD::Suld1DArrayI8Clamp:
3732     Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3733     Ops.push_back(TexHandle);
3734     Ops.push_back(N->getOperand(2));
3735     Ops.push_back(N->getOperand(3));
3736     Ops.push_back(Chain);
3737     break;
3738   case NVPTXISD::Suld1DArrayI16Clamp:
3739     Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3740     Ops.push_back(TexHandle);
3741     Ops.push_back(N->getOperand(2));
3742     Ops.push_back(N->getOperand(3));
3743     Ops.push_back(Chain);
3744     break;
3745   case NVPTXISD::Suld1DArrayI32Clamp:
3746     Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3747     Ops.push_back(TexHandle);
3748     Ops.push_back(N->getOperand(2));
3749     Ops.push_back(N->getOperand(3));
3750     Ops.push_back(Chain);
3751     break;
3752   case NVPTXISD::Suld1DArrayI64Clamp:
3753     Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3754     Ops.push_back(TexHandle);
3755     Ops.push_back(N->getOperand(2));
3756     Ops.push_back(N->getOperand(3));
3757     Ops.push_back(Chain);
3758     break;
3759   case NVPTXISD::Suld1DArrayV2I8Clamp:
3760     Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3761     Ops.push_back(TexHandle);
3762     Ops.push_back(N->getOperand(2));
3763     Ops.push_back(N->getOperand(3));
3764     Ops.push_back(Chain);
3765     break;
3766   case NVPTXISD::Suld1DArrayV2I16Clamp:
3767     Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3768     Ops.push_back(TexHandle);
3769     Ops.push_back(N->getOperand(2));
3770     Ops.push_back(N->getOperand(3));
3771     Ops.push_back(Chain);
3772     break;
3773   case NVPTXISD::Suld1DArrayV2I32Clamp:
3774     Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3775     Ops.push_back(TexHandle);
3776     Ops.push_back(N->getOperand(2));
3777     Ops.push_back(N->getOperand(3));
3778     Ops.push_back(Chain);
3779     break;
3780   case NVPTXISD::Suld1DArrayV2I64Clamp:
3781     Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3782     Ops.push_back(TexHandle);
3783     Ops.push_back(N->getOperand(2));
3784     Ops.push_back(N->getOperand(3));
3785     Ops.push_back(Chain);
3786     break;
3787   case NVPTXISD::Suld1DArrayV4I8Clamp:
3788     Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3789     Ops.push_back(TexHandle);
3790     Ops.push_back(N->getOperand(2));
3791     Ops.push_back(N->getOperand(3));
3792     Ops.push_back(Chain);
3793     break;
3794   case NVPTXISD::Suld1DArrayV4I16Clamp:
3795     Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3796     Ops.push_back(TexHandle);
3797     Ops.push_back(N->getOperand(2));
3798     Ops.push_back(N->getOperand(3));
3799     Ops.push_back(Chain);
3800     break;
3801   case NVPTXISD::Suld1DArrayV4I32Clamp:
3802     Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3803     Ops.push_back(TexHandle);
3804     Ops.push_back(N->getOperand(2));
3805     Ops.push_back(N->getOperand(3));
3806     Ops.push_back(Chain);
3807     break;
3808   case NVPTXISD::Suld2DI8Clamp:
3809     Opc = NVPTX::SULD_2D_I8_CLAMP;
3810     Ops.push_back(TexHandle);
3811     Ops.push_back(N->getOperand(2));
3812     Ops.push_back(N->getOperand(3));
3813     Ops.push_back(Chain);
3814     break;
3815   case NVPTXISD::Suld2DI16Clamp:
3816     Opc = NVPTX::SULD_2D_I16_CLAMP;
3817     Ops.push_back(TexHandle);
3818     Ops.push_back(N->getOperand(2));
3819     Ops.push_back(N->getOperand(3));
3820     Ops.push_back(Chain);
3821     break;
3822   case NVPTXISD::Suld2DI32Clamp:
3823     Opc = NVPTX::SULD_2D_I32_CLAMP;
3824     Ops.push_back(TexHandle);
3825     Ops.push_back(N->getOperand(2));
3826     Ops.push_back(N->getOperand(3));
3827     Ops.push_back(Chain);
3828     break;
3829   case NVPTXISD::Suld2DI64Clamp:
3830     Opc = NVPTX::SULD_2D_I64_CLAMP;
3831     Ops.push_back(TexHandle);
3832     Ops.push_back(N->getOperand(2));
3833     Ops.push_back(N->getOperand(3));
3834     Ops.push_back(Chain);
3835     break;
3836   case NVPTXISD::Suld2DV2I8Clamp:
3837     Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3838     Ops.push_back(TexHandle);
3839     Ops.push_back(N->getOperand(2));
3840     Ops.push_back(N->getOperand(3));
3841     Ops.push_back(Chain);
3842     break;
3843   case NVPTXISD::Suld2DV2I16Clamp:
3844     Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3845     Ops.push_back(TexHandle);
3846     Ops.push_back(N->getOperand(2));
3847     Ops.push_back(N->getOperand(3));
3848     Ops.push_back(Chain);
3849     break;
3850   case NVPTXISD::Suld2DV2I32Clamp:
3851     Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3852     Ops.push_back(TexHandle);
3853     Ops.push_back(N->getOperand(2));
3854     Ops.push_back(N->getOperand(3));
3855     Ops.push_back(Chain);
3856     break;
3857   case NVPTXISD::Suld2DV2I64Clamp:
3858     Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3859     Ops.push_back(TexHandle);
3860     Ops.push_back(N->getOperand(2));
3861     Ops.push_back(N->getOperand(3));
3862     Ops.push_back(Chain);
3863     break;
3864   case NVPTXISD::Suld2DV4I8Clamp:
3865     Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3866     Ops.push_back(TexHandle);
3867     Ops.push_back(N->getOperand(2));
3868     Ops.push_back(N->getOperand(3));
3869     Ops.push_back(Chain);
3870     break;
3871   case NVPTXISD::Suld2DV4I16Clamp:
3872     Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3873     Ops.push_back(TexHandle);
3874     Ops.push_back(N->getOperand(2));
3875     Ops.push_back(N->getOperand(3));
3876     Ops.push_back(Chain);
3877     break;
3878   case NVPTXISD::Suld2DV4I32Clamp:
3879     Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3880     Ops.push_back(TexHandle);
3881     Ops.push_back(N->getOperand(2));
3882     Ops.push_back(N->getOperand(3));
3883     Ops.push_back(Chain);
3884     break;
3885   case NVPTXISD::Suld2DArrayI8Clamp:
3886     Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3887     Ops.push_back(TexHandle);
3888     Ops.push_back(N->getOperand(2));
3889     Ops.push_back(N->getOperand(3));
3890     Ops.push_back(N->getOperand(4));
3891     Ops.push_back(Chain);
3892     break;
3893   case NVPTXISD::Suld2DArrayI16Clamp:
3894     Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3895     Ops.push_back(TexHandle);
3896     Ops.push_back(N->getOperand(2));
3897     Ops.push_back(N->getOperand(3));
3898     Ops.push_back(N->getOperand(4));
3899     Ops.push_back(Chain);
3900     break;
3901   case NVPTXISD::Suld2DArrayI32Clamp:
3902     Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3903     Ops.push_back(TexHandle);
3904     Ops.push_back(N->getOperand(2));
3905     Ops.push_back(N->getOperand(3));
3906     Ops.push_back(N->getOperand(4));
3907     Ops.push_back(Chain);
3908     break;
3909   case NVPTXISD::Suld2DArrayI64Clamp:
3910     Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3911     Ops.push_back(TexHandle);
3912     Ops.push_back(N->getOperand(2));
3913     Ops.push_back(N->getOperand(3));
3914     Ops.push_back(N->getOperand(4));
3915     Ops.push_back(Chain);
3916     break;
3917   case NVPTXISD::Suld2DArrayV2I8Clamp:
3918     Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3919     Ops.push_back(TexHandle);
3920     Ops.push_back(N->getOperand(2));
3921     Ops.push_back(N->getOperand(3));
3922     Ops.push_back(N->getOperand(4));
3923     Ops.push_back(Chain);
3924     break;
3925   case NVPTXISD::Suld2DArrayV2I16Clamp:
3926     Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3927     Ops.push_back(TexHandle);
3928     Ops.push_back(N->getOperand(2));
3929     Ops.push_back(N->getOperand(3));
3930     Ops.push_back(N->getOperand(4));
3931     Ops.push_back(Chain);
3932     break;
3933   case NVPTXISD::Suld2DArrayV2I32Clamp:
3934     Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3935     Ops.push_back(TexHandle);
3936     Ops.push_back(N->getOperand(2));
3937     Ops.push_back(N->getOperand(3));
3938     Ops.push_back(N->getOperand(4));
3939     Ops.push_back(Chain);
3940     break;
3941   case NVPTXISD::Suld2DArrayV2I64Clamp:
3942     Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3943     Ops.push_back(TexHandle);
3944     Ops.push_back(N->getOperand(2));
3945     Ops.push_back(N->getOperand(3));
3946     Ops.push_back(N->getOperand(4));
3947     Ops.push_back(Chain);
3948     break;
3949   case NVPTXISD::Suld2DArrayV4I8Clamp:
3950     Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3951     Ops.push_back(TexHandle);
3952     Ops.push_back(N->getOperand(2));
3953     Ops.push_back(N->getOperand(3));
3954     Ops.push_back(N->getOperand(4));
3955     Ops.push_back(Chain);
3956     break;
3957   case NVPTXISD::Suld2DArrayV4I16Clamp:
3958     Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3959     Ops.push_back(TexHandle);
3960     Ops.push_back(N->getOperand(2));
3961     Ops.push_back(N->getOperand(3));
3962     Ops.push_back(N->getOperand(4));
3963     Ops.push_back(Chain);
3964     break;
3965   case NVPTXISD::Suld2DArrayV4I32Clamp:
3966     Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3967     Ops.push_back(TexHandle);
3968     Ops.push_back(N->getOperand(2));
3969     Ops.push_back(N->getOperand(3));
3970     Ops.push_back(N->getOperand(4));
3971     Ops.push_back(Chain);
3972     break;
3973   case NVPTXISD::Suld3DI8Clamp:
3974     Opc = NVPTX::SULD_3D_I8_CLAMP;
3975     Ops.push_back(TexHandle);
3976     Ops.push_back(N->getOperand(2));
3977     Ops.push_back(N->getOperand(3));
3978     Ops.push_back(N->getOperand(4));
3979     Ops.push_back(Chain);
3980     break;
3981   case NVPTXISD::Suld3DI16Clamp:
3982     Opc = NVPTX::SULD_3D_I16_CLAMP;
3983     Ops.push_back(TexHandle);
3984     Ops.push_back(N->getOperand(2));
3985     Ops.push_back(N->getOperand(3));
3986     Ops.push_back(N->getOperand(4));
3987     Ops.push_back(Chain);
3988     break;
3989   case NVPTXISD::Suld3DI32Clamp:
3990     Opc = NVPTX::SULD_3D_I32_CLAMP;
3991     Ops.push_back(TexHandle);
3992     Ops.push_back(N->getOperand(2));
3993     Ops.push_back(N->getOperand(3));
3994     Ops.push_back(N->getOperand(4));
3995     Ops.push_back(Chain);
3996     break;
3997   case NVPTXISD::Suld3DI64Clamp:
3998     Opc = NVPTX::SULD_3D_I64_CLAMP;
3999     Ops.push_back(TexHandle);
4000     Ops.push_back(N->getOperand(2));
4001     Ops.push_back(N->getOperand(3));
4002     Ops.push_back(N->getOperand(4));
4003     Ops.push_back(Chain);
4004     break;
4005   case NVPTXISD::Suld3DV2I8Clamp:
4006     Opc = NVPTX::SULD_3D_V2I8_CLAMP;
4007     Ops.push_back(TexHandle);
4008     Ops.push_back(N->getOperand(2));
4009     Ops.push_back(N->getOperand(3));
4010     Ops.push_back(N->getOperand(4));
4011     Ops.push_back(Chain);
4012     break;
4013   case NVPTXISD::Suld3DV2I16Clamp:
4014     Opc = NVPTX::SULD_3D_V2I16_CLAMP;
4015     Ops.push_back(TexHandle);
4016     Ops.push_back(N->getOperand(2));
4017     Ops.push_back(N->getOperand(3));
4018     Ops.push_back(N->getOperand(4));
4019     Ops.push_back(Chain);
4020     break;
4021   case NVPTXISD::Suld3DV2I32Clamp:
4022     Opc = NVPTX::SULD_3D_V2I32_CLAMP;
4023     Ops.push_back(TexHandle);
4024     Ops.push_back(N->getOperand(2));
4025     Ops.push_back(N->getOperand(3));
4026     Ops.push_back(N->getOperand(4));
4027     Ops.push_back(Chain);
4028     break;
4029   case NVPTXISD::Suld3DV2I64Clamp:
4030     Opc = NVPTX::SULD_3D_V2I64_CLAMP;
4031     Ops.push_back(TexHandle);
4032     Ops.push_back(N->getOperand(2));
4033     Ops.push_back(N->getOperand(3));
4034     Ops.push_back(N->getOperand(4));
4035     Ops.push_back(Chain);
4036     break;
4037   case NVPTXISD::Suld3DV4I8Clamp:
4038     Opc = NVPTX::SULD_3D_V4I8_CLAMP;
4039     Ops.push_back(TexHandle);
4040     Ops.push_back(N->getOperand(2));
4041     Ops.push_back(N->getOperand(3));
4042     Ops.push_back(N->getOperand(4));
4043     Ops.push_back(Chain);
4044     break;
4045   case NVPTXISD::Suld3DV4I16Clamp:
4046     Opc = NVPTX::SULD_3D_V4I16_CLAMP;
4047     Ops.push_back(TexHandle);
4048     Ops.push_back(N->getOperand(2));
4049     Ops.push_back(N->getOperand(3));
4050     Ops.push_back(N->getOperand(4));
4051     Ops.push_back(Chain);
4052     break;
4053   case NVPTXISD::Suld3DV4I32Clamp:
4054     Opc = NVPTX::SULD_3D_V4I32_CLAMP;
4055     Ops.push_back(TexHandle);
4056     Ops.push_back(N->getOperand(2));
4057     Ops.push_back(N->getOperand(3));
4058     Ops.push_back(N->getOperand(4));
4059     Ops.push_back(Chain);
4060     break;
4061   case NVPTXISD::Suld1DI8Trap:
4062     Opc = NVPTX::SULD_1D_I8_TRAP;
4063     Ops.push_back(TexHandle);
4064     Ops.push_back(N->getOperand(2));
4065     Ops.push_back(Chain);
4066     break;
4067   case NVPTXISD::Suld1DI16Trap:
4068     Opc = NVPTX::SULD_1D_I16_TRAP;
4069     Ops.push_back(TexHandle);
4070     Ops.push_back(N->getOperand(2));
4071     Ops.push_back(Chain);
4072     break;
4073   case NVPTXISD::Suld1DI32Trap:
4074     Opc = NVPTX::SULD_1D_I32_TRAP;
4075     Ops.push_back(TexHandle);
4076     Ops.push_back(N->getOperand(2));
4077     Ops.push_back(Chain);
4078     break;
4079   case NVPTXISD::Suld1DI64Trap:
4080     Opc = NVPTX::SULD_1D_I64_TRAP;
4081     Ops.push_back(TexHandle);
4082     Ops.push_back(N->getOperand(2));
4083     Ops.push_back(Chain);
4084     break;
4085   case NVPTXISD::Suld1DV2I8Trap:
4086     Opc = NVPTX::SULD_1D_V2I8_TRAP;
4087     Ops.push_back(TexHandle);
4088     Ops.push_back(N->getOperand(2));
4089     Ops.push_back(Chain);
4090     break;
4091   case NVPTXISD::Suld1DV2I16Trap:
4092     Opc = NVPTX::SULD_1D_V2I16_TRAP;
4093     Ops.push_back(TexHandle);
4094     Ops.push_back(N->getOperand(2));
4095     Ops.push_back(Chain);
4096     break;
4097   case NVPTXISD::Suld1DV2I32Trap:
4098     Opc = NVPTX::SULD_1D_V2I32_TRAP;
4099     Ops.push_back(TexHandle);
4100     Ops.push_back(N->getOperand(2));
4101     Ops.push_back(Chain);
4102     break;
4103   case NVPTXISD::Suld1DV2I64Trap:
4104     Opc = NVPTX::SULD_1D_V2I64_TRAP;
4105     Ops.push_back(TexHandle);
4106     Ops.push_back(N->getOperand(2));
4107     Ops.push_back(Chain);
4108     break;
4109   case NVPTXISD::Suld1DV4I8Trap:
4110     Opc = NVPTX::SULD_1D_V4I8_TRAP;
4111     Ops.push_back(TexHandle);
4112     Ops.push_back(N->getOperand(2));
4113     Ops.push_back(Chain);
4114     break;
4115   case NVPTXISD::Suld1DV4I16Trap:
4116     Opc = NVPTX::SULD_1D_V4I16_TRAP;
4117     Ops.push_back(TexHandle);
4118     Ops.push_back(N->getOperand(2));
4119     Ops.push_back(Chain);
4120     break;
4121   case NVPTXISD::Suld1DV4I32Trap:
4122     Opc = NVPTX::SULD_1D_V4I32_TRAP;
4123     Ops.push_back(TexHandle);
4124     Ops.push_back(N->getOperand(2));
4125     Ops.push_back(Chain);
4126     break;
4127   case NVPTXISD::Suld1DArrayI8Trap:
4128     Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4129     Ops.push_back(TexHandle);
4130     Ops.push_back(N->getOperand(2));
4131     Ops.push_back(N->getOperand(3));
4132     Ops.push_back(Chain);
4133     break;
4134   case NVPTXISD::Suld1DArrayI16Trap:
4135     Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4136     Ops.push_back(TexHandle);
4137     Ops.push_back(N->getOperand(2));
4138     Ops.push_back(N->getOperand(3));
4139     Ops.push_back(Chain);
4140     break;
4141   case NVPTXISD::Suld1DArrayI32Trap:
4142     Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4143     Ops.push_back(TexHandle);
4144     Ops.push_back(N->getOperand(2));
4145     Ops.push_back(N->getOperand(3));
4146     Ops.push_back(Chain);
4147     break;
4148   case NVPTXISD::Suld1DArrayI64Trap:
4149     Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4150     Ops.push_back(TexHandle);
4151     Ops.push_back(N->getOperand(2));
4152     Ops.push_back(N->getOperand(3));
4153     Ops.push_back(Chain);
4154     break;
4155   case NVPTXISD::Suld1DArrayV2I8Trap:
4156     Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4157     Ops.push_back(TexHandle);
4158     Ops.push_back(N->getOperand(2));
4159     Ops.push_back(N->getOperand(3));
4160     Ops.push_back(Chain);
4161     break;
4162   case NVPTXISD::Suld1DArrayV2I16Trap:
4163     Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4164     Ops.push_back(TexHandle);
4165     Ops.push_back(N->getOperand(2));
4166     Ops.push_back(N->getOperand(3));
4167     Ops.push_back(Chain);
4168     break;
4169   case NVPTXISD::Suld1DArrayV2I32Trap:
4170     Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4171     Ops.push_back(TexHandle);
4172     Ops.push_back(N->getOperand(2));
4173     Ops.push_back(N->getOperand(3));
4174     Ops.push_back(Chain);
4175     break;
4176   case NVPTXISD::Suld1DArrayV2I64Trap:
4177     Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4178     Ops.push_back(TexHandle);
4179     Ops.push_back(N->getOperand(2));
4180     Ops.push_back(N->getOperand(3));
4181     Ops.push_back(Chain);
4182     break;
4183   case NVPTXISD::Suld1DArrayV4I8Trap:
4184     Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4185     Ops.push_back(TexHandle);
4186     Ops.push_back(N->getOperand(2));
4187     Ops.push_back(N->getOperand(3));
4188     Ops.push_back(Chain);
4189     break;
4190   case NVPTXISD::Suld1DArrayV4I16Trap:
4191     Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4192     Ops.push_back(TexHandle);
4193     Ops.push_back(N->getOperand(2));
4194     Ops.push_back(N->getOperand(3));
4195     Ops.push_back(Chain);
4196     break;
4197   case NVPTXISD::Suld1DArrayV4I32Trap:
4198     Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4199     Ops.push_back(TexHandle);
4200     Ops.push_back(N->getOperand(2));
4201     Ops.push_back(N->getOperand(3));
4202     Ops.push_back(Chain);
4203     break;
4204   case NVPTXISD::Suld2DI8Trap:
4205     Opc = NVPTX::SULD_2D_I8_TRAP;
4206     Ops.push_back(TexHandle);
4207     Ops.push_back(N->getOperand(2));
4208     Ops.push_back(N->getOperand(3));
4209     Ops.push_back(Chain);
4210     break;
4211   case NVPTXISD::Suld2DI16Trap:
4212     Opc = NVPTX::SULD_2D_I16_TRAP;
4213     Ops.push_back(TexHandle);
4214     Ops.push_back(N->getOperand(2));
4215     Ops.push_back(N->getOperand(3));
4216     Ops.push_back(Chain);
4217     break;
4218   case NVPTXISD::Suld2DI32Trap:
4219     Opc = NVPTX::SULD_2D_I32_TRAP;
4220     Ops.push_back(TexHandle);
4221     Ops.push_back(N->getOperand(2));
4222     Ops.push_back(N->getOperand(3));
4223     Ops.push_back(Chain);
4224     break;
4225   case NVPTXISD::Suld2DI64Trap:
4226     Opc = NVPTX::SULD_2D_I64_TRAP;
4227     Ops.push_back(TexHandle);
4228     Ops.push_back(N->getOperand(2));
4229     Ops.push_back(N->getOperand(3));
4230     Ops.push_back(Chain);
4231     break;
4232   case NVPTXISD::Suld2DV2I8Trap:
4233     Opc = NVPTX::SULD_2D_V2I8_TRAP;
4234     Ops.push_back(TexHandle);
4235     Ops.push_back(N->getOperand(2));
4236     Ops.push_back(N->getOperand(3));
4237     Ops.push_back(Chain);
4238     break;
4239   case NVPTXISD::Suld2DV2I16Trap:
4240     Opc = NVPTX::SULD_2D_V2I16_TRAP;
4241     Ops.push_back(TexHandle);
4242     Ops.push_back(N->getOperand(2));
4243     Ops.push_back(N->getOperand(3));
4244     Ops.push_back(Chain);
4245     break;
4246   case NVPTXISD::Suld2DV2I32Trap:
4247     Opc = NVPTX::SULD_2D_V2I32_TRAP;
4248     Ops.push_back(TexHandle);
4249     Ops.push_back(N->getOperand(2));
4250     Ops.push_back(N->getOperand(3));
4251     Ops.push_back(Chain);
4252     break;
4253   case NVPTXISD::Suld2DV2I64Trap:
4254     Opc = NVPTX::SULD_2D_V2I64_TRAP;
4255     Ops.push_back(TexHandle);
4256     Ops.push_back(N->getOperand(2));
4257     Ops.push_back(N->getOperand(3));
4258     Ops.push_back(Chain);
4259     break;
4260   case NVPTXISD::Suld2DV4I8Trap:
4261     Opc = NVPTX::SULD_2D_V4I8_TRAP;
4262     Ops.push_back(TexHandle);
4263     Ops.push_back(N->getOperand(2));
4264     Ops.push_back(N->getOperand(3));
4265     Ops.push_back(Chain);
4266     break;
4267   case NVPTXISD::Suld2DV4I16Trap:
4268     Opc = NVPTX::SULD_2D_V4I16_TRAP;
4269     Ops.push_back(TexHandle);
4270     Ops.push_back(N->getOperand(2));
4271     Ops.push_back(N->getOperand(3));
4272     Ops.push_back(Chain);
4273     break;
4274   case NVPTXISD::Suld2DV4I32Trap:
4275     Opc = NVPTX::SULD_2D_V4I32_TRAP;
4276     Ops.push_back(TexHandle);
4277     Ops.push_back(N->getOperand(2));
4278     Ops.push_back(N->getOperand(3));
4279     Ops.push_back(Chain);
4280     break;
4281   case NVPTXISD::Suld2DArrayI8Trap:
4282     Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4283     Ops.push_back(TexHandle);
4284     Ops.push_back(N->getOperand(2));
4285     Ops.push_back(N->getOperand(3));
4286     Ops.push_back(N->getOperand(4));
4287     Ops.push_back(Chain);
4288     break;
4289   case NVPTXISD::Suld2DArrayI16Trap:
4290     Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4291     Ops.push_back(TexHandle);
4292     Ops.push_back(N->getOperand(2));
4293     Ops.push_back(N->getOperand(3));
4294     Ops.push_back(N->getOperand(4));
4295     Ops.push_back(Chain);
4296     break;
4297   case NVPTXISD::Suld2DArrayI32Trap:
4298     Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4299     Ops.push_back(TexHandle);
4300     Ops.push_back(N->getOperand(2));
4301     Ops.push_back(N->getOperand(3));
4302     Ops.push_back(N->getOperand(4));
4303     Ops.push_back(Chain);
4304     break;
4305   case NVPTXISD::Suld2DArrayI64Trap:
4306     Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4307     Ops.push_back(TexHandle);
4308     Ops.push_back(N->getOperand(2));
4309     Ops.push_back(N->getOperand(3));
4310     Ops.push_back(N->getOperand(4));
4311     Ops.push_back(Chain);
4312     break;
4313   case NVPTXISD::Suld2DArrayV2I8Trap:
4314     Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4315     Ops.push_back(TexHandle);
4316     Ops.push_back(N->getOperand(2));
4317     Ops.push_back(N->getOperand(3));
4318     Ops.push_back(N->getOperand(4));
4319     Ops.push_back(Chain);
4320     break;
4321   case NVPTXISD::Suld2DArrayV2I16Trap:
4322     Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4323     Ops.push_back(TexHandle);
4324     Ops.push_back(N->getOperand(2));
4325     Ops.push_back(N->getOperand(3));
4326     Ops.push_back(N->getOperand(4));
4327     Ops.push_back(Chain);
4328     break;
4329   case NVPTXISD::Suld2DArrayV2I32Trap:
4330     Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4331     Ops.push_back(TexHandle);
4332     Ops.push_back(N->getOperand(2));
4333     Ops.push_back(N->getOperand(3));
4334     Ops.push_back(N->getOperand(4));
4335     Ops.push_back(Chain);
4336     break;
4337   case NVPTXISD::Suld2DArrayV2I64Trap:
4338     Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4339     Ops.push_back(TexHandle);
4340     Ops.push_back(N->getOperand(2));
4341     Ops.push_back(N->getOperand(3));
4342     Ops.push_back(N->getOperand(4));
4343     Ops.push_back(Chain);
4344     break;
4345   case NVPTXISD::Suld2DArrayV4I8Trap:
4346     Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4347     Ops.push_back(TexHandle);
4348     Ops.push_back(N->getOperand(2));
4349     Ops.push_back(N->getOperand(3));
4350     Ops.push_back(N->getOperand(4));
4351     Ops.push_back(Chain);
4352     break;
4353   case NVPTXISD::Suld2DArrayV4I16Trap:
4354     Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4355     Ops.push_back(TexHandle);
4356     Ops.push_back(N->getOperand(2));
4357     Ops.push_back(N->getOperand(3));
4358     Ops.push_back(N->getOperand(4));
4359     Ops.push_back(Chain);
4360     break;
4361   case NVPTXISD::Suld2DArrayV4I32Trap:
4362     Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4363     Ops.push_back(TexHandle);
4364     Ops.push_back(N->getOperand(2));
4365     Ops.push_back(N->getOperand(3));
4366     Ops.push_back(N->getOperand(4));
4367     Ops.push_back(Chain);
4368     break;
4369   case NVPTXISD::Suld3DI8Trap:
4370     Opc = NVPTX::SULD_3D_I8_TRAP;
4371     Ops.push_back(TexHandle);
4372     Ops.push_back(N->getOperand(2));
4373     Ops.push_back(N->getOperand(3));
4374     Ops.push_back(N->getOperand(4));
4375     Ops.push_back(Chain);
4376     break;
4377   case NVPTXISD::Suld3DI16Trap:
4378     Opc = NVPTX::SULD_3D_I16_TRAP;
4379     Ops.push_back(TexHandle);
4380     Ops.push_back(N->getOperand(2));
4381     Ops.push_back(N->getOperand(3));
4382     Ops.push_back(N->getOperand(4));
4383     Ops.push_back(Chain);
4384     break;
4385   case NVPTXISD::Suld3DI32Trap:
4386     Opc = NVPTX::SULD_3D_I32_TRAP;
4387     Ops.push_back(TexHandle);
4388     Ops.push_back(N->getOperand(2));
4389     Ops.push_back(N->getOperand(3));
4390     Ops.push_back(N->getOperand(4));
4391     Ops.push_back(Chain);
4392     break;
4393   case NVPTXISD::Suld3DI64Trap:
4394     Opc = NVPTX::SULD_3D_I64_TRAP;
4395     Ops.push_back(TexHandle);
4396     Ops.push_back(N->getOperand(2));
4397     Ops.push_back(N->getOperand(3));
4398     Ops.push_back(N->getOperand(4));
4399     Ops.push_back(Chain);
4400     break;
4401   case NVPTXISD::Suld3DV2I8Trap:
4402     Opc = NVPTX::SULD_3D_V2I8_TRAP;
4403     Ops.push_back(TexHandle);
4404     Ops.push_back(N->getOperand(2));
4405     Ops.push_back(N->getOperand(3));
4406     Ops.push_back(N->getOperand(4));
4407     Ops.push_back(Chain);
4408     break;
4409   case NVPTXISD::Suld3DV2I16Trap:
4410     Opc = NVPTX::SULD_3D_V2I16_TRAP;
4411     Ops.push_back(TexHandle);
4412     Ops.push_back(N->getOperand(2));
4413     Ops.push_back(N->getOperand(3));
4414     Ops.push_back(N->getOperand(4));
4415     Ops.push_back(Chain);
4416     break;
4417   case NVPTXISD::Suld3DV2I32Trap:
4418     Opc = NVPTX::SULD_3D_V2I32_TRAP;
4419     Ops.push_back(TexHandle);
4420     Ops.push_back(N->getOperand(2));
4421     Ops.push_back(N->getOperand(3));
4422     Ops.push_back(N->getOperand(4));
4423     Ops.push_back(Chain);
4424     break;
4425   case NVPTXISD::Suld3DV2I64Trap:
4426     Opc = NVPTX::SULD_3D_V2I64_TRAP;
4427     Ops.push_back(TexHandle);
4428     Ops.push_back(N->getOperand(2));
4429     Ops.push_back(N->getOperand(3));
4430     Ops.push_back(N->getOperand(4));
4431     Ops.push_back(Chain);
4432     break;
4433   case NVPTXISD::Suld3DV4I8Trap:
4434     Opc = NVPTX::SULD_3D_V4I8_TRAP;
4435     Ops.push_back(TexHandle);
4436     Ops.push_back(N->getOperand(2));
4437     Ops.push_back(N->getOperand(3));
4438     Ops.push_back(N->getOperand(4));
4439     Ops.push_back(Chain);
4440     break;
4441   case NVPTXISD::Suld3DV4I16Trap:
4442     Opc = NVPTX::SULD_3D_V4I16_TRAP;
4443     Ops.push_back(TexHandle);
4444     Ops.push_back(N->getOperand(2));
4445     Ops.push_back(N->getOperand(3));
4446     Ops.push_back(N->getOperand(4));
4447     Ops.push_back(Chain);
4448     break;
4449   case NVPTXISD::Suld3DV4I32Trap:
4450     Opc = NVPTX::SULD_3D_V4I32_TRAP;
4451     Ops.push_back(TexHandle);
4452     Ops.push_back(N->getOperand(2));
4453     Ops.push_back(N->getOperand(3));
4454     Ops.push_back(N->getOperand(4));
4455     Ops.push_back(Chain);
4456     break;
4457   case NVPTXISD::Suld1DI8Zero:
4458     Opc = NVPTX::SULD_1D_I8_ZERO;
4459     Ops.push_back(TexHandle);
4460     Ops.push_back(N->getOperand(2));
4461     Ops.push_back(Chain);
4462     break;
4463   case NVPTXISD::Suld1DI16Zero:
4464     Opc = NVPTX::SULD_1D_I16_ZERO;
4465     Ops.push_back(TexHandle);
4466     Ops.push_back(N->getOperand(2));
4467     Ops.push_back(Chain);
4468     break;
4469   case NVPTXISD::Suld1DI32Zero:
4470     Opc = NVPTX::SULD_1D_I32_ZERO;
4471     Ops.push_back(TexHandle);
4472     Ops.push_back(N->getOperand(2));
4473     Ops.push_back(Chain);
4474     break;
4475   case NVPTXISD::Suld1DI64Zero:
4476     Opc = NVPTX::SULD_1D_I64_ZERO;
4477     Ops.push_back(TexHandle);
4478     Ops.push_back(N->getOperand(2));
4479     Ops.push_back(Chain);
4480     break;
4481   case NVPTXISD::Suld1DV2I8Zero:
4482     Opc = NVPTX::SULD_1D_V2I8_ZERO;
4483     Ops.push_back(TexHandle);
4484     Ops.push_back(N->getOperand(2));
4485     Ops.push_back(Chain);
4486     break;
4487   case NVPTXISD::Suld1DV2I16Zero:
4488     Opc = NVPTX::SULD_1D_V2I16_ZERO;
4489     Ops.push_back(TexHandle);
4490     Ops.push_back(N->getOperand(2));
4491     Ops.push_back(Chain);
4492     break;
4493   case NVPTXISD::Suld1DV2I32Zero:
4494     Opc = NVPTX::SULD_1D_V2I32_ZERO;
4495     Ops.push_back(TexHandle);
4496     Ops.push_back(N->getOperand(2));
4497     Ops.push_back(Chain);
4498     break;
4499   case NVPTXISD::Suld1DV2I64Zero:
4500     Opc = NVPTX::SULD_1D_V2I64_ZERO;
4501     Ops.push_back(TexHandle);
4502     Ops.push_back(N->getOperand(2));
4503     Ops.push_back(Chain);
4504     break;
4505   case NVPTXISD::Suld1DV4I8Zero:
4506     Opc = NVPTX::SULD_1D_V4I8_ZERO;
4507     Ops.push_back(TexHandle);
4508     Ops.push_back(N->getOperand(2));
4509     Ops.push_back(Chain);
4510     break;
4511   case NVPTXISD::Suld1DV4I16Zero:
4512     Opc = NVPTX::SULD_1D_V4I16_ZERO;
4513     Ops.push_back(TexHandle);
4514     Ops.push_back(N->getOperand(2));
4515     Ops.push_back(Chain);
4516     break;
4517   case NVPTXISD::Suld1DV4I32Zero:
4518     Opc = NVPTX::SULD_1D_V4I32_ZERO;
4519     Ops.push_back(TexHandle);
4520     Ops.push_back(N->getOperand(2));
4521     Ops.push_back(Chain);
4522     break;
4523   case NVPTXISD::Suld1DArrayI8Zero:
4524     Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4525     Ops.push_back(TexHandle);
4526     Ops.push_back(N->getOperand(2));
4527     Ops.push_back(N->getOperand(3));
4528     Ops.push_back(Chain);
4529     break;
4530   case NVPTXISD::Suld1DArrayI16Zero:
4531     Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4532     Ops.push_back(TexHandle);
4533     Ops.push_back(N->getOperand(2));
4534     Ops.push_back(N->getOperand(3));
4535     Ops.push_back(Chain);
4536     break;
4537   case NVPTXISD::Suld1DArrayI32Zero:
4538     Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4539     Ops.push_back(TexHandle);
4540     Ops.push_back(N->getOperand(2));
4541     Ops.push_back(N->getOperand(3));
4542     Ops.push_back(Chain);
4543     break;
4544   case NVPTXISD::Suld1DArrayI64Zero:
4545     Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4546     Ops.push_back(TexHandle);
4547     Ops.push_back(N->getOperand(2));
4548     Ops.push_back(N->getOperand(3));
4549     Ops.push_back(Chain);
4550     break;
4551   case NVPTXISD::Suld1DArrayV2I8Zero:
4552     Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4553     Ops.push_back(TexHandle);
4554     Ops.push_back(N->getOperand(2));
4555     Ops.push_back(N->getOperand(3));
4556     Ops.push_back(Chain);
4557     break;
4558   case NVPTXISD::Suld1DArrayV2I16Zero:
4559     Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4560     Ops.push_back(TexHandle);
4561     Ops.push_back(N->getOperand(2));
4562     Ops.push_back(N->getOperand(3));
4563     Ops.push_back(Chain);
4564     break;
4565   case NVPTXISD::Suld1DArrayV2I32Zero:
4566     Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4567     Ops.push_back(TexHandle);
4568     Ops.push_back(N->getOperand(2));
4569     Ops.push_back(N->getOperand(3));
4570     Ops.push_back(Chain);
4571     break;
4572   case NVPTXISD::Suld1DArrayV2I64Zero:
4573     Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4574     Ops.push_back(TexHandle);
4575     Ops.push_back(N->getOperand(2));
4576     Ops.push_back(N->getOperand(3));
4577     Ops.push_back(Chain);
4578     break;
4579   case NVPTXISD::Suld1DArrayV4I8Zero:
4580     Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4581     Ops.push_back(TexHandle);
4582     Ops.push_back(N->getOperand(2));
4583     Ops.push_back(N->getOperand(3));
4584     Ops.push_back(Chain);
4585     break;
4586   case NVPTXISD::Suld1DArrayV4I16Zero:
4587     Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4588     Ops.push_back(TexHandle);
4589     Ops.push_back(N->getOperand(2));
4590     Ops.push_back(N->getOperand(3));
4591     Ops.push_back(Chain);
4592     break;
4593   case NVPTXISD::Suld1DArrayV4I32Zero:
4594     Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4595     Ops.push_back(TexHandle);
4596     Ops.push_back(N->getOperand(2));
4597     Ops.push_back(N->getOperand(3));
4598     Ops.push_back(Chain);
4599     break;
4600   case NVPTXISD::Suld2DI8Zero:
4601     Opc = NVPTX::SULD_2D_I8_ZERO;
4602     Ops.push_back(TexHandle);
4603     Ops.push_back(N->getOperand(2));
4604     Ops.push_back(N->getOperand(3));
4605     Ops.push_back(Chain);
4606     break;
4607   case NVPTXISD::Suld2DI16Zero:
4608     Opc = NVPTX::SULD_2D_I16_ZERO;
4609     Ops.push_back(TexHandle);
4610     Ops.push_back(N->getOperand(2));
4611     Ops.push_back(N->getOperand(3));
4612     Ops.push_back(Chain);
4613     break;
4614   case NVPTXISD::Suld2DI32Zero:
4615     Opc = NVPTX::SULD_2D_I32_ZERO;
4616     Ops.push_back(TexHandle);
4617     Ops.push_back(N->getOperand(2));
4618     Ops.push_back(N->getOperand(3));
4619     Ops.push_back(Chain);
4620     break;
4621   case NVPTXISD::Suld2DI64Zero:
4622     Opc = NVPTX::SULD_2D_I64_ZERO;
4623     Ops.push_back(TexHandle);
4624     Ops.push_back(N->getOperand(2));
4625     Ops.push_back(N->getOperand(3));
4626     Ops.push_back(Chain);
4627     break;
4628   case NVPTXISD::Suld2DV2I8Zero:
4629     Opc = NVPTX::SULD_2D_V2I8_ZERO;
4630     Ops.push_back(TexHandle);
4631     Ops.push_back(N->getOperand(2));
4632     Ops.push_back(N->getOperand(3));
4633     Ops.push_back(Chain);
4634     break;
4635   case NVPTXISD::Suld2DV2I16Zero:
4636     Opc = NVPTX::SULD_2D_V2I16_ZERO;
4637     Ops.push_back(TexHandle);
4638     Ops.push_back(N->getOperand(2));
4639     Ops.push_back(N->getOperand(3));
4640     Ops.push_back(Chain);
4641     break;
4642   case NVPTXISD::Suld2DV2I32Zero:
4643     Opc = NVPTX::SULD_2D_V2I32_ZERO;
4644     Ops.push_back(TexHandle);
4645     Ops.push_back(N->getOperand(2));
4646     Ops.push_back(N->getOperand(3));
4647     Ops.push_back(Chain);
4648     break;
4649   case NVPTXISD::Suld2DV2I64Zero:
4650     Opc = NVPTX::SULD_2D_V2I64_ZERO;
4651     Ops.push_back(TexHandle);
4652     Ops.push_back(N->getOperand(2));
4653     Ops.push_back(N->getOperand(3));
4654     Ops.push_back(Chain);
4655     break;
4656   case NVPTXISD::Suld2DV4I8Zero:
4657     Opc = NVPTX::SULD_2D_V4I8_ZERO;
4658     Ops.push_back(TexHandle);
4659     Ops.push_back(N->getOperand(2));
4660     Ops.push_back(N->getOperand(3));
4661     Ops.push_back(Chain);
4662     break;
4663   case NVPTXISD::Suld2DV4I16Zero:
4664     Opc = NVPTX::SULD_2D_V4I16_ZERO;
4665     Ops.push_back(TexHandle);
4666     Ops.push_back(N->getOperand(2));
4667     Ops.push_back(N->getOperand(3));
4668     Ops.push_back(Chain);
4669     break;
4670   case NVPTXISD::Suld2DV4I32Zero:
4671     Opc = NVPTX::SULD_2D_V4I32_ZERO;
4672     Ops.push_back(TexHandle);
4673     Ops.push_back(N->getOperand(2));
4674     Ops.push_back(N->getOperand(3));
4675     Ops.push_back(Chain);
4676     break;
4677   case NVPTXISD::Suld2DArrayI8Zero:
4678     Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4679     Ops.push_back(TexHandle);
4680     Ops.push_back(N->getOperand(2));
4681     Ops.push_back(N->getOperand(3));
4682     Ops.push_back(N->getOperand(4));
4683     Ops.push_back(Chain);
4684     break;
4685   case NVPTXISD::Suld2DArrayI16Zero:
4686     Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4687     Ops.push_back(TexHandle);
4688     Ops.push_back(N->getOperand(2));
4689     Ops.push_back(N->getOperand(3));
4690     Ops.push_back(N->getOperand(4));
4691     Ops.push_back(Chain);
4692     break;
4693   case NVPTXISD::Suld2DArrayI32Zero:
4694     Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4695     Ops.push_back(TexHandle);
4696     Ops.push_back(N->getOperand(2));
4697     Ops.push_back(N->getOperand(3));
4698     Ops.push_back(N->getOperand(4));
4699     Ops.push_back(Chain);
4700     break;
4701   case NVPTXISD::Suld2DArrayI64Zero:
4702     Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4703     Ops.push_back(TexHandle);
4704     Ops.push_back(N->getOperand(2));
4705     Ops.push_back(N->getOperand(3));
4706     Ops.push_back(N->getOperand(4));
4707     Ops.push_back(Chain);
4708     break;
4709   case NVPTXISD::Suld2DArrayV2I8Zero:
4710     Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4711     Ops.push_back(TexHandle);
4712     Ops.push_back(N->getOperand(2));
4713     Ops.push_back(N->getOperand(3));
4714     Ops.push_back(N->getOperand(4));
4715     Ops.push_back(Chain);
4716     break;
4717   case NVPTXISD::Suld2DArrayV2I16Zero:
4718     Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4719     Ops.push_back(TexHandle);
4720     Ops.push_back(N->getOperand(2));
4721     Ops.push_back(N->getOperand(3));
4722     Ops.push_back(N->getOperand(4));
4723     Ops.push_back(Chain);
4724     break;
4725   case NVPTXISD::Suld2DArrayV2I32Zero:
4726     Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4727     Ops.push_back(TexHandle);
4728     Ops.push_back(N->getOperand(2));
4729     Ops.push_back(N->getOperand(3));
4730     Ops.push_back(N->getOperand(4));
4731     Ops.push_back(Chain);
4732     break;
4733   case NVPTXISD::Suld2DArrayV2I64Zero:
4734     Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4735     Ops.push_back(TexHandle);
4736     Ops.push_back(N->getOperand(2));
4737     Ops.push_back(N->getOperand(3));
4738     Ops.push_back(N->getOperand(4));
4739     Ops.push_back(Chain);
4740     break;
4741   case NVPTXISD::Suld2DArrayV4I8Zero:
4742     Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4743     Ops.push_back(TexHandle);
4744     Ops.push_back(N->getOperand(2));
4745     Ops.push_back(N->getOperand(3));
4746     Ops.push_back(N->getOperand(4));
4747     Ops.push_back(Chain);
4748     break;
4749   case NVPTXISD::Suld2DArrayV4I16Zero:
4750     Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4751     Ops.push_back(TexHandle);
4752     Ops.push_back(N->getOperand(2));
4753     Ops.push_back(N->getOperand(3));
4754     Ops.push_back(N->getOperand(4));
4755     Ops.push_back(Chain);
4756     break;
4757   case NVPTXISD::Suld2DArrayV4I32Zero:
4758     Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4759     Ops.push_back(TexHandle);
4760     Ops.push_back(N->getOperand(2));
4761     Ops.push_back(N->getOperand(3));
4762     Ops.push_back(N->getOperand(4));
4763     Ops.push_back(Chain);
4764     break;
4765   case NVPTXISD::Suld3DI8Zero:
4766     Opc = NVPTX::SULD_3D_I8_ZERO;
4767     Ops.push_back(TexHandle);
4768     Ops.push_back(N->getOperand(2));
4769     Ops.push_back(N->getOperand(3));
4770     Ops.push_back(N->getOperand(4));
4771     Ops.push_back(Chain);
4772     break;
4773   case NVPTXISD::Suld3DI16Zero:
4774     Opc = NVPTX::SULD_3D_I16_ZERO;
4775     Ops.push_back(TexHandle);
4776     Ops.push_back(N->getOperand(2));
4777     Ops.push_back(N->getOperand(3));
4778     Ops.push_back(N->getOperand(4));
4779     Ops.push_back(Chain);
4780     break;
4781   case NVPTXISD::Suld3DI32Zero:
4782     Opc = NVPTX::SULD_3D_I32_ZERO;
4783     Ops.push_back(TexHandle);
4784     Ops.push_back(N->getOperand(2));
4785     Ops.push_back(N->getOperand(3));
4786     Ops.push_back(N->getOperand(4));
4787     Ops.push_back(Chain);
4788     break;
4789   case NVPTXISD::Suld3DI64Zero:
4790     Opc = NVPTX::SULD_3D_I64_ZERO;
4791     Ops.push_back(TexHandle);
4792     Ops.push_back(N->getOperand(2));
4793     Ops.push_back(N->getOperand(3));
4794     Ops.push_back(N->getOperand(4));
4795     Ops.push_back(Chain);
4796     break;
4797   case NVPTXISD::Suld3DV2I8Zero:
4798     Opc = NVPTX::SULD_3D_V2I8_ZERO;
4799     Ops.push_back(TexHandle);
4800     Ops.push_back(N->getOperand(2));
4801     Ops.push_back(N->getOperand(3));
4802     Ops.push_back(N->getOperand(4));
4803     Ops.push_back(Chain);
4804     break;
4805   case NVPTXISD::Suld3DV2I16Zero:
4806     Opc = NVPTX::SULD_3D_V2I16_ZERO;
4807     Ops.push_back(TexHandle);
4808     Ops.push_back(N->getOperand(2));
4809     Ops.push_back(N->getOperand(3));
4810     Ops.push_back(N->getOperand(4));
4811     Ops.push_back(Chain);
4812     break;
4813   case NVPTXISD::Suld3DV2I32Zero:
4814     Opc = NVPTX::SULD_3D_V2I32_ZERO;
4815     Ops.push_back(TexHandle);
4816     Ops.push_back(N->getOperand(2));
4817     Ops.push_back(N->getOperand(3));
4818     Ops.push_back(N->getOperand(4));
4819     Ops.push_back(Chain);
4820     break;
4821   case NVPTXISD::Suld3DV2I64Zero:
4822     Opc = NVPTX::SULD_3D_V2I64_ZERO;
4823     Ops.push_back(TexHandle);
4824     Ops.push_back(N->getOperand(2));
4825     Ops.push_back(N->getOperand(3));
4826     Ops.push_back(N->getOperand(4));
4827     Ops.push_back(Chain);
4828     break;
4829   case NVPTXISD::Suld3DV4I8Zero:
4830     Opc = NVPTX::SULD_3D_V4I8_ZERO;
4831     Ops.push_back(TexHandle);
4832     Ops.push_back(N->getOperand(2));
4833     Ops.push_back(N->getOperand(3));
4834     Ops.push_back(N->getOperand(4));
4835     Ops.push_back(Chain);
4836     break;
4837   case NVPTXISD::Suld3DV4I16Zero:
4838     Opc = NVPTX::SULD_3D_V4I16_ZERO;
4839     Ops.push_back(TexHandle);
4840     Ops.push_back(N->getOperand(2));
4841     Ops.push_back(N->getOperand(3));
4842     Ops.push_back(N->getOperand(4));
4843     Ops.push_back(Chain);
4844     break;
4845   case NVPTXISD::Suld3DV4I32Zero:
4846     Opc = NVPTX::SULD_3D_V4I32_ZERO;
4847     Ops.push_back(TexHandle);
4848     Ops.push_back(N->getOperand(2));
4849     Ops.push_back(N->getOperand(3));
4850     Ops.push_back(N->getOperand(4));
4851     Ops.push_back(Chain);
4852     break;
4853   }
4854   ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
4855   return true;
4856 }
4857 
4858 
4859 /// SelectBFE - Look for instruction sequences that can be made more efficient
4860 /// by using the 'bfe' (bit-field extract) PTX instruction
tryBFE(SDNode * N)4861 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
4862   SDLoc DL(N);
4863   SDValue LHS = N->getOperand(0);
4864   SDValue RHS = N->getOperand(1);
4865   SDValue Len;
4866   SDValue Start;
4867   SDValue Val;
4868   bool IsSigned = false;
4869 
4870   if (N->getOpcode() == ISD::AND) {
4871     // Canonicalize the operands
4872     // We want 'and %val, %mask'
4873     if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4874       std::swap(LHS, RHS);
4875     }
4876 
4877     ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4878     if (!Mask) {
4879       // We need a constant mask on the RHS of the AND
4880       return false;
4881     }
4882 
4883     // Extract the mask bits
4884     uint64_t MaskVal = Mask->getZExtValue();
4885     if (!isMask_64(MaskVal)) {
4886       // We *could* handle shifted masks here, but doing so would require an
4887       // 'and' operation to fix up the low-order bits so we would trade
4888       // shr+and for bfe+and, which has the same throughput
4889       return false;
4890     }
4891 
4892     // How many bits are in our mask?
4893     uint64_t NumBits = countTrailingOnes(MaskVal);
4894     Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4895 
4896     if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4897       // We have a 'srl/and' pair, extract the effective start bit and length
4898       Val = LHS.getNode()->getOperand(0);
4899       Start = LHS.getNode()->getOperand(1);
4900       ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4901       if (StartConst) {
4902         uint64_t StartVal = StartConst->getZExtValue();
4903         // How many "good" bits do we have left?  "good" is defined here as bits
4904         // that exist in the original value, not shifted in.
4905         uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4906         if (NumBits > GoodBits) {
4907           // Do not handle the case where bits have been shifted in. In theory
4908           // we could handle this, but the cost is likely higher than just
4909           // emitting the srl/and pair.
4910           return false;
4911         }
4912         Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
4913       } else {
4914         // Do not handle the case where the shift amount (can be zero if no srl
4915         // was found) is not constant. We could handle this case, but it would
4916         // require run-time logic that would be more expensive than just
4917         // emitting the srl/and pair.
4918         return false;
4919       }
4920     } else {
4921       // Do not handle the case where the LHS of the and is not a shift. While
4922       // it would be trivial to handle this case, it would just transform
4923       // 'and' -> 'bfe', but 'and' has higher-throughput.
4924       return false;
4925     }
4926   } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4927     if (LHS->getOpcode() == ISD::AND) {
4928       ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4929       if (!ShiftCnst) {
4930         // Shift amount must be constant
4931         return false;
4932       }
4933 
4934       uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4935 
4936       SDValue AndLHS = LHS->getOperand(0);
4937       SDValue AndRHS = LHS->getOperand(1);
4938 
4939       // Canonicalize the AND to have the mask on the RHS
4940       if (isa<ConstantSDNode>(AndLHS)) {
4941         std::swap(AndLHS, AndRHS);
4942       }
4943 
4944       ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4945       if (!MaskCnst) {
4946         // Mask must be constant
4947         return false;
4948       }
4949 
4950       uint64_t MaskVal = MaskCnst->getZExtValue();
4951       uint64_t NumZeros;
4952       uint64_t NumBits;
4953       if (isMask_64(MaskVal)) {
4954         NumZeros = 0;
4955         // The number of bits in the result bitfield will be the number of
4956         // trailing ones (the AND) minus the number of bits we shift off
4957         NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4958       } else if (isShiftedMask_64(MaskVal)) {
4959         NumZeros = countTrailingZeros(MaskVal);
4960         unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4961         // The number of bits in the result bitfield will be the number of
4962         // trailing zeros plus the number of set bits in the mask minus the
4963         // number of bits we shift off
4964         NumBits = NumZeros + NumOnes - ShiftAmt;
4965       } else {
4966         // This is not a mask we can handle
4967         return false;
4968       }
4969 
4970       if (ShiftAmt < NumZeros) {
4971         // Handling this case would require extra logic that would make this
4972         // transformation non-profitable
4973         return false;
4974       }
4975 
4976       Val = AndLHS;
4977       Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4978       Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4979     } else if (LHS->getOpcode() == ISD::SHL) {
4980       // Here, we have a pattern like:
4981       //
4982       // (sra (shl val, NN), MM)
4983       // or
4984       // (srl (shl val, NN), MM)
4985       //
4986       // If MM >= NN, we can efficiently optimize this with bfe
4987       Val = LHS->getOperand(0);
4988 
4989       SDValue ShlRHS = LHS->getOperand(1);
4990       ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4991       if (!ShlCnst) {
4992         // Shift amount must be constant
4993         return false;
4994       }
4995       uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4996 
4997       SDValue ShrRHS = RHS;
4998       ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4999       if (!ShrCnst) {
5000         // Shift amount must be constant
5001         return false;
5002       }
5003       uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
5004 
5005       // To avoid extra codegen and be profitable, we need Outer >= Inner
5006       if (OuterShiftAmt < InnerShiftAmt) {
5007         return false;
5008       }
5009 
5010       // If the outer shift is more than the type size, we have no bitfield to
5011       // extract (since we also check that the inner shift is <= the outer shift
5012       // then this also implies that the inner shift is < the type size)
5013       if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
5014         return false;
5015       }
5016 
5017       Start =
5018         CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
5019       Len =
5020         CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
5021                                   OuterShiftAmt, DL, MVT::i32);
5022 
5023       if (N->getOpcode() == ISD::SRA) {
5024         // If we have a arithmetic right shift, we need to use the signed bfe
5025         // variant
5026         IsSigned = true;
5027       }
5028     } else {
5029       // No can do...
5030       return false;
5031     }
5032   } else {
5033     // No can do...
5034     return false;
5035   }
5036 
5037 
5038   unsigned Opc;
5039   // For the BFE operations we form here from "and" and "srl", always use the
5040   // unsigned variants.
5041   if (Val.getValueType() == MVT::i32) {
5042     if (IsSigned) {
5043       Opc = NVPTX::BFE_S32rii;
5044     } else {
5045       Opc = NVPTX::BFE_U32rii;
5046     }
5047   } else if (Val.getValueType() == MVT::i64) {
5048     if (IsSigned) {
5049       Opc = NVPTX::BFE_S64rii;
5050     } else {
5051       Opc = NVPTX::BFE_U64rii;
5052     }
5053   } else {
5054     // We cannot handle this type
5055     return false;
5056   }
5057 
5058   SDValue Ops[] = {
5059     Val, Start, Len
5060   };
5061 
5062   ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
5063   return true;
5064 }
5065 
5066 // SelectDirectAddr - Match a direct address for DAG.
5067 // A direct address could be a globaladdress or externalsymbol.
SelectDirectAddr(SDValue N,SDValue & Address)5068 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
5069   // Return true if TGA or ES.
5070   if (N.getOpcode() == ISD::TargetGlobalAddress ||
5071       N.getOpcode() == ISD::TargetExternalSymbol) {
5072     Address = N;
5073     return true;
5074   }
5075   if (N.getOpcode() == NVPTXISD::Wrapper) {
5076     Address = N.getOperand(0);
5077     return true;
5078   }
5079   if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
5080     unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
5081     if (IID == Intrinsic::nvvm_ptr_gen_to_param)
5082       if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
5083         return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
5084   }
5085   return false;
5086 }
5087 
5088 // symbol+offset
SelectADDRsi_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)5089 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
5090     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5091   if (Addr.getOpcode() == ISD::ADD) {
5092     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5093       SDValue base = Addr.getOperand(0);
5094       if (SelectDirectAddr(base, Base)) {
5095         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5096                                            mvt);
5097         return true;
5098       }
5099     }
5100   }
5101   return false;
5102 }
5103 
5104 // symbol+offset
SelectADDRsi(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5105 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
5106                                      SDValue &Base, SDValue &Offset) {
5107   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
5108 }
5109 
5110 // symbol+offset
SelectADDRsi64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5111 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
5112                                        SDValue &Base, SDValue &Offset) {
5113   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5114 }
5115 
5116 // register+offset
SelectADDRri_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)5117 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5118     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5119   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5120     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5121     Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
5122     return true;
5123   }
5124   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5125       Addr.getOpcode() == ISD::TargetGlobalAddress)
5126     return false; // direct calls.
5127 
5128   if (Addr.getOpcode() == ISD::ADD) {
5129     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5130       return false;
5131     }
5132     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5133       if (FrameIndexSDNode *FIN =
5134               dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5135         // Constant offset from frame ref.
5136         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5137       else
5138         Base = Addr.getOperand(0);
5139       Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5140                                          mvt);
5141       return true;
5142     }
5143   }
5144   return false;
5145 }
5146 
5147 // register+offset
SelectADDRri(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5148 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5149                                      SDValue &Base, SDValue &Offset) {
5150   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5151 }
5152 
5153 // register+offset
SelectADDRri64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5154 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5155                                        SDValue &Base, SDValue &Offset) {
5156   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5157 }
5158 
ChkMemSDNodeAddressSpace(SDNode * N,unsigned int spN) const5159 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5160                                                  unsigned int spN) const {
5161   const Value *Src = nullptr;
5162   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5163     if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5164       return true;
5165     Src = mN->getMemOperand()->getValue();
5166   }
5167   if (!Src)
5168     return false;
5169   if (auto *PT = dyn_cast<PointerType>(Src->getType()))
5170     return (PT->getAddressSpace() == spN);
5171   return false;
5172 }
5173 
5174 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5175 /// inline asm expressions.
SelectInlineAsmMemoryOperand(const SDValue & Op,unsigned ConstraintID,std::vector<SDValue> & OutOps)5176 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5177     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5178   SDValue Op0, Op1;
5179   switch (ConstraintID) {
5180   default:
5181     return true;
5182   case InlineAsm::Constraint_m: // memory
5183     if (SelectDirectAddr(Op, Op0)) {
5184       OutOps.push_back(Op0);
5185       OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
5186       return false;
5187     }
5188     if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5189       OutOps.push_back(Op0);
5190       OutOps.push_back(Op1);
5191       return false;
5192     }
5193     break;
5194   }
5195   return true;
5196 }
5197 
5198 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
5199 /// conversion from \p SrcTy to \p DestTy.
GetConvertOpcode(MVT DestTy,MVT SrcTy,bool IsSigned)5200 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
5201                                              bool IsSigned) {
5202   switch (SrcTy.SimpleTy) {
5203   default:
5204     llvm_unreachable("Unhandled source type");
5205   case MVT::i8:
5206     switch (DestTy.SimpleTy) {
5207     default:
5208       llvm_unreachable("Unhandled dest type");
5209     case MVT::i16:
5210       return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
5211     case MVT::i32:
5212       return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
5213     case MVT::i64:
5214       return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
5215     }
5216   case MVT::i16:
5217     switch (DestTy.SimpleTy) {
5218     default:
5219       llvm_unreachable("Unhandled dest type");
5220     case MVT::i8:
5221       return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
5222     case MVT::i32:
5223       return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
5224     case MVT::i64:
5225       return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
5226     }
5227   case MVT::i32:
5228     switch (DestTy.SimpleTy) {
5229     default:
5230       llvm_unreachable("Unhandled dest type");
5231     case MVT::i8:
5232       return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
5233     case MVT::i16:
5234       return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
5235     case MVT::i64:
5236       return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
5237     }
5238   case MVT::i64:
5239     switch (DestTy.SimpleTy) {
5240     default:
5241       llvm_unreachable("Unhandled dest type");
5242     case MVT::i8:
5243       return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
5244     case MVT::i16:
5245       return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
5246     case MVT::i32:
5247       return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
5248     }
5249   }
5250 }
5251