1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "NVPTXISelDAGToDAG.h"
15 #include "NVPTXUtilities.h"
16 #include "llvm/Analysis/ValueTracking.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include "llvm/Target/TargetIntrinsicInfo.h"
24
25 using namespace llvm;
26
27 #define DEBUG_TYPE "nvptx-isel"
28
29 static cl::opt<int> UsePrecDivF32(
30 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
31 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
32 " IEEE Compliant F32 div.rnd if available."),
33 cl::init(2));
34
35 static cl::opt<bool>
36 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
37 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
38 cl::init(true));
39
40 static cl::opt<bool>
41 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
42 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
43 cl::init(false));
44
45
46 /// createNVPTXISelDag - This pass converts a legalized DAG into a
47 /// NVPTX-specific DAG, ready for instruction scheduling.
createNVPTXISelDag(NVPTXTargetMachine & TM,llvm::CodeGenOpt::Level OptLevel)48 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
49 llvm::CodeGenOpt::Level OptLevel) {
50 return new NVPTXDAGToDAGISel(TM, OptLevel);
51 }
52
NVPTXDAGToDAGISel(NVPTXTargetMachine & tm,CodeGenOpt::Level OptLevel)53 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
54 CodeGenOpt::Level OptLevel)
55 : SelectionDAGISel(tm, OptLevel), TM(tm) {
56 doMulWide = (OptLevel > 0);
57 }
58
runOnMachineFunction(MachineFunction & MF)59 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
60 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
61 return SelectionDAGISel::runOnMachineFunction(MF);
62 }
63
getDivF32Level() const64 int NVPTXDAGToDAGISel::getDivF32Level() const {
65 if (UsePrecDivF32.getNumOccurrences() > 0) {
66 // If nvptx-prec-div32=N is used on the command-line, always honor it
67 return UsePrecDivF32;
68 } else {
69 // Otherwise, use div.approx if fast math is enabled
70 if (TM.Options.UnsafeFPMath)
71 return 0;
72 else
73 return 2;
74 }
75 }
76
usePrecSqrtF32() const77 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
78 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
79 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
80 return UsePrecSqrtF32;
81 } else {
82 // Otherwise, use sqrt.approx if fast math is enabled
83 return !TM.Options.UnsafeFPMath;
84 }
85 }
86
useF32FTZ() const87 bool NVPTXDAGToDAGISel::useF32FTZ() const {
88 if (FtzEnabled.getNumOccurrences() > 0) {
89 // If nvptx-f32ftz is used on the command-line, always honor it
90 return FtzEnabled;
91 } else {
92 const Function *F = MF->getFunction();
93 // Otherwise, check for an nvptx-f32ftz attribute on the function
94 if (F->hasFnAttribute("nvptx-f32ftz"))
95 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
96 else
97 return false;
98 }
99 }
100
allowFMA() const101 bool NVPTXDAGToDAGISel::allowFMA() const {
102 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
103 return TL->allowFMA(*MF, OptLevel);
104 }
105
106 /// Select - Select instructions not customized! Used for
107 /// expanded, promoted and normal instructions.
Select(SDNode * N)108 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
109
110 if (N->isMachineOpcode()) {
111 N->setNodeId(-1);
112 return nullptr; // Already selected.
113 }
114
115 SDNode *ResNode = nullptr;
116 switch (N->getOpcode()) {
117 case ISD::LOAD:
118 ResNode = SelectLoad(N);
119 break;
120 case ISD::STORE:
121 ResNode = SelectStore(N);
122 break;
123 case NVPTXISD::LoadV2:
124 case NVPTXISD::LoadV4:
125 ResNode = SelectLoadVector(N);
126 break;
127 case NVPTXISD::LDGV2:
128 case NVPTXISD::LDGV4:
129 case NVPTXISD::LDUV2:
130 case NVPTXISD::LDUV4:
131 ResNode = SelectLDGLDU(N);
132 break;
133 case NVPTXISD::StoreV2:
134 case NVPTXISD::StoreV4:
135 ResNode = SelectStoreVector(N);
136 break;
137 case NVPTXISD::LoadParam:
138 case NVPTXISD::LoadParamV2:
139 case NVPTXISD::LoadParamV4:
140 ResNode = SelectLoadParam(N);
141 break;
142 case NVPTXISD::StoreRetval:
143 case NVPTXISD::StoreRetvalV2:
144 case NVPTXISD::StoreRetvalV4:
145 ResNode = SelectStoreRetval(N);
146 break;
147 case NVPTXISD::StoreParam:
148 case NVPTXISD::StoreParamV2:
149 case NVPTXISD::StoreParamV4:
150 case NVPTXISD::StoreParamS32:
151 case NVPTXISD::StoreParamU32:
152 ResNode = SelectStoreParam(N);
153 break;
154 case ISD::INTRINSIC_WO_CHAIN:
155 ResNode = SelectIntrinsicNoChain(N);
156 break;
157 case ISD::INTRINSIC_W_CHAIN:
158 ResNode = SelectIntrinsicChain(N);
159 break;
160 case NVPTXISD::Tex1DFloatS32:
161 case NVPTXISD::Tex1DFloatFloat:
162 case NVPTXISD::Tex1DFloatFloatLevel:
163 case NVPTXISD::Tex1DFloatFloatGrad:
164 case NVPTXISD::Tex1DS32S32:
165 case NVPTXISD::Tex1DS32Float:
166 case NVPTXISD::Tex1DS32FloatLevel:
167 case NVPTXISD::Tex1DS32FloatGrad:
168 case NVPTXISD::Tex1DU32S32:
169 case NVPTXISD::Tex1DU32Float:
170 case NVPTXISD::Tex1DU32FloatLevel:
171 case NVPTXISD::Tex1DU32FloatGrad:
172 case NVPTXISD::Tex1DArrayFloatS32:
173 case NVPTXISD::Tex1DArrayFloatFloat:
174 case NVPTXISD::Tex1DArrayFloatFloatLevel:
175 case NVPTXISD::Tex1DArrayFloatFloatGrad:
176 case NVPTXISD::Tex1DArrayS32S32:
177 case NVPTXISD::Tex1DArrayS32Float:
178 case NVPTXISD::Tex1DArrayS32FloatLevel:
179 case NVPTXISD::Tex1DArrayS32FloatGrad:
180 case NVPTXISD::Tex1DArrayU32S32:
181 case NVPTXISD::Tex1DArrayU32Float:
182 case NVPTXISD::Tex1DArrayU32FloatLevel:
183 case NVPTXISD::Tex1DArrayU32FloatGrad:
184 case NVPTXISD::Tex2DFloatS32:
185 case NVPTXISD::Tex2DFloatFloat:
186 case NVPTXISD::Tex2DFloatFloatLevel:
187 case NVPTXISD::Tex2DFloatFloatGrad:
188 case NVPTXISD::Tex2DS32S32:
189 case NVPTXISD::Tex2DS32Float:
190 case NVPTXISD::Tex2DS32FloatLevel:
191 case NVPTXISD::Tex2DS32FloatGrad:
192 case NVPTXISD::Tex2DU32S32:
193 case NVPTXISD::Tex2DU32Float:
194 case NVPTXISD::Tex2DU32FloatLevel:
195 case NVPTXISD::Tex2DU32FloatGrad:
196 case NVPTXISD::Tex2DArrayFloatS32:
197 case NVPTXISD::Tex2DArrayFloatFloat:
198 case NVPTXISD::Tex2DArrayFloatFloatLevel:
199 case NVPTXISD::Tex2DArrayFloatFloatGrad:
200 case NVPTXISD::Tex2DArrayS32S32:
201 case NVPTXISD::Tex2DArrayS32Float:
202 case NVPTXISD::Tex2DArrayS32FloatLevel:
203 case NVPTXISD::Tex2DArrayS32FloatGrad:
204 case NVPTXISD::Tex2DArrayU32S32:
205 case NVPTXISD::Tex2DArrayU32Float:
206 case NVPTXISD::Tex2DArrayU32FloatLevel:
207 case NVPTXISD::Tex2DArrayU32FloatGrad:
208 case NVPTXISD::Tex3DFloatS32:
209 case NVPTXISD::Tex3DFloatFloat:
210 case NVPTXISD::Tex3DFloatFloatLevel:
211 case NVPTXISD::Tex3DFloatFloatGrad:
212 case NVPTXISD::Tex3DS32S32:
213 case NVPTXISD::Tex3DS32Float:
214 case NVPTXISD::Tex3DS32FloatLevel:
215 case NVPTXISD::Tex3DS32FloatGrad:
216 case NVPTXISD::Tex3DU32S32:
217 case NVPTXISD::Tex3DU32Float:
218 case NVPTXISD::Tex3DU32FloatLevel:
219 case NVPTXISD::Tex3DU32FloatGrad:
220 case NVPTXISD::TexCubeFloatFloat:
221 case NVPTXISD::TexCubeFloatFloatLevel:
222 case NVPTXISD::TexCubeS32Float:
223 case NVPTXISD::TexCubeS32FloatLevel:
224 case NVPTXISD::TexCubeU32Float:
225 case NVPTXISD::TexCubeU32FloatLevel:
226 case NVPTXISD::TexCubeArrayFloatFloat:
227 case NVPTXISD::TexCubeArrayFloatFloatLevel:
228 case NVPTXISD::TexCubeArrayS32Float:
229 case NVPTXISD::TexCubeArrayS32FloatLevel:
230 case NVPTXISD::TexCubeArrayU32Float:
231 case NVPTXISD::TexCubeArrayU32FloatLevel:
232 case NVPTXISD::Tld4R2DFloatFloat:
233 case NVPTXISD::Tld4G2DFloatFloat:
234 case NVPTXISD::Tld4B2DFloatFloat:
235 case NVPTXISD::Tld4A2DFloatFloat:
236 case NVPTXISD::Tld4R2DS64Float:
237 case NVPTXISD::Tld4G2DS64Float:
238 case NVPTXISD::Tld4B2DS64Float:
239 case NVPTXISD::Tld4A2DS64Float:
240 case NVPTXISD::Tld4R2DU64Float:
241 case NVPTXISD::Tld4G2DU64Float:
242 case NVPTXISD::Tld4B2DU64Float:
243 case NVPTXISD::Tld4A2DU64Float:
244 case NVPTXISD::TexUnified1DFloatS32:
245 case NVPTXISD::TexUnified1DFloatFloat:
246 case NVPTXISD::TexUnified1DFloatFloatLevel:
247 case NVPTXISD::TexUnified1DFloatFloatGrad:
248 case NVPTXISD::TexUnified1DS32S32:
249 case NVPTXISD::TexUnified1DS32Float:
250 case NVPTXISD::TexUnified1DS32FloatLevel:
251 case NVPTXISD::TexUnified1DS32FloatGrad:
252 case NVPTXISD::TexUnified1DU32S32:
253 case NVPTXISD::TexUnified1DU32Float:
254 case NVPTXISD::TexUnified1DU32FloatLevel:
255 case NVPTXISD::TexUnified1DU32FloatGrad:
256 case NVPTXISD::TexUnified1DArrayFloatS32:
257 case NVPTXISD::TexUnified1DArrayFloatFloat:
258 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
259 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
260 case NVPTXISD::TexUnified1DArrayS32S32:
261 case NVPTXISD::TexUnified1DArrayS32Float:
262 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
263 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
264 case NVPTXISD::TexUnified1DArrayU32S32:
265 case NVPTXISD::TexUnified1DArrayU32Float:
266 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
267 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
268 case NVPTXISD::TexUnified2DFloatS32:
269 case NVPTXISD::TexUnified2DFloatFloat:
270 case NVPTXISD::TexUnified2DFloatFloatLevel:
271 case NVPTXISD::TexUnified2DFloatFloatGrad:
272 case NVPTXISD::TexUnified2DS32S32:
273 case NVPTXISD::TexUnified2DS32Float:
274 case NVPTXISD::TexUnified2DS32FloatLevel:
275 case NVPTXISD::TexUnified2DS32FloatGrad:
276 case NVPTXISD::TexUnified2DU32S32:
277 case NVPTXISD::TexUnified2DU32Float:
278 case NVPTXISD::TexUnified2DU32FloatLevel:
279 case NVPTXISD::TexUnified2DU32FloatGrad:
280 case NVPTXISD::TexUnified2DArrayFloatS32:
281 case NVPTXISD::TexUnified2DArrayFloatFloat:
282 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
283 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
284 case NVPTXISD::TexUnified2DArrayS32S32:
285 case NVPTXISD::TexUnified2DArrayS32Float:
286 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
287 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
288 case NVPTXISD::TexUnified2DArrayU32S32:
289 case NVPTXISD::TexUnified2DArrayU32Float:
290 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
291 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
292 case NVPTXISD::TexUnified3DFloatS32:
293 case NVPTXISD::TexUnified3DFloatFloat:
294 case NVPTXISD::TexUnified3DFloatFloatLevel:
295 case NVPTXISD::TexUnified3DFloatFloatGrad:
296 case NVPTXISD::TexUnified3DS32S32:
297 case NVPTXISD::TexUnified3DS32Float:
298 case NVPTXISD::TexUnified3DS32FloatLevel:
299 case NVPTXISD::TexUnified3DS32FloatGrad:
300 case NVPTXISD::TexUnified3DU32S32:
301 case NVPTXISD::TexUnified3DU32Float:
302 case NVPTXISD::TexUnified3DU32FloatLevel:
303 case NVPTXISD::TexUnified3DU32FloatGrad:
304 case NVPTXISD::TexUnifiedCubeFloatFloat:
305 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
306 case NVPTXISD::TexUnifiedCubeS32Float:
307 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
308 case NVPTXISD::TexUnifiedCubeU32Float:
309 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
310 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
311 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
312 case NVPTXISD::TexUnifiedCubeArrayS32Float:
313 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
314 case NVPTXISD::TexUnifiedCubeArrayU32Float:
315 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
316 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
317 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
318 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
319 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
320 case NVPTXISD::Tld4UnifiedR2DS64Float:
321 case NVPTXISD::Tld4UnifiedG2DS64Float:
322 case NVPTXISD::Tld4UnifiedB2DS64Float:
323 case NVPTXISD::Tld4UnifiedA2DS64Float:
324 case NVPTXISD::Tld4UnifiedR2DU64Float:
325 case NVPTXISD::Tld4UnifiedG2DU64Float:
326 case NVPTXISD::Tld4UnifiedB2DU64Float:
327 case NVPTXISD::Tld4UnifiedA2DU64Float:
328 ResNode = SelectTextureIntrinsic(N);
329 break;
330 case NVPTXISD::Suld1DI8Clamp:
331 case NVPTXISD::Suld1DI16Clamp:
332 case NVPTXISD::Suld1DI32Clamp:
333 case NVPTXISD::Suld1DI64Clamp:
334 case NVPTXISD::Suld1DV2I8Clamp:
335 case NVPTXISD::Suld1DV2I16Clamp:
336 case NVPTXISD::Suld1DV2I32Clamp:
337 case NVPTXISD::Suld1DV2I64Clamp:
338 case NVPTXISD::Suld1DV4I8Clamp:
339 case NVPTXISD::Suld1DV4I16Clamp:
340 case NVPTXISD::Suld1DV4I32Clamp:
341 case NVPTXISD::Suld1DArrayI8Clamp:
342 case NVPTXISD::Suld1DArrayI16Clamp:
343 case NVPTXISD::Suld1DArrayI32Clamp:
344 case NVPTXISD::Suld1DArrayI64Clamp:
345 case NVPTXISD::Suld1DArrayV2I8Clamp:
346 case NVPTXISD::Suld1DArrayV2I16Clamp:
347 case NVPTXISD::Suld1DArrayV2I32Clamp:
348 case NVPTXISD::Suld1DArrayV2I64Clamp:
349 case NVPTXISD::Suld1DArrayV4I8Clamp:
350 case NVPTXISD::Suld1DArrayV4I16Clamp:
351 case NVPTXISD::Suld1DArrayV4I32Clamp:
352 case NVPTXISD::Suld2DI8Clamp:
353 case NVPTXISD::Suld2DI16Clamp:
354 case NVPTXISD::Suld2DI32Clamp:
355 case NVPTXISD::Suld2DI64Clamp:
356 case NVPTXISD::Suld2DV2I8Clamp:
357 case NVPTXISD::Suld2DV2I16Clamp:
358 case NVPTXISD::Suld2DV2I32Clamp:
359 case NVPTXISD::Suld2DV2I64Clamp:
360 case NVPTXISD::Suld2DV4I8Clamp:
361 case NVPTXISD::Suld2DV4I16Clamp:
362 case NVPTXISD::Suld2DV4I32Clamp:
363 case NVPTXISD::Suld2DArrayI8Clamp:
364 case NVPTXISD::Suld2DArrayI16Clamp:
365 case NVPTXISD::Suld2DArrayI32Clamp:
366 case NVPTXISD::Suld2DArrayI64Clamp:
367 case NVPTXISD::Suld2DArrayV2I8Clamp:
368 case NVPTXISD::Suld2DArrayV2I16Clamp:
369 case NVPTXISD::Suld2DArrayV2I32Clamp:
370 case NVPTXISD::Suld2DArrayV2I64Clamp:
371 case NVPTXISD::Suld2DArrayV4I8Clamp:
372 case NVPTXISD::Suld2DArrayV4I16Clamp:
373 case NVPTXISD::Suld2DArrayV4I32Clamp:
374 case NVPTXISD::Suld3DI8Clamp:
375 case NVPTXISD::Suld3DI16Clamp:
376 case NVPTXISD::Suld3DI32Clamp:
377 case NVPTXISD::Suld3DI64Clamp:
378 case NVPTXISD::Suld3DV2I8Clamp:
379 case NVPTXISD::Suld3DV2I16Clamp:
380 case NVPTXISD::Suld3DV2I32Clamp:
381 case NVPTXISD::Suld3DV2I64Clamp:
382 case NVPTXISD::Suld3DV4I8Clamp:
383 case NVPTXISD::Suld3DV4I16Clamp:
384 case NVPTXISD::Suld3DV4I32Clamp:
385 case NVPTXISD::Suld1DI8Trap:
386 case NVPTXISD::Suld1DI16Trap:
387 case NVPTXISD::Suld1DI32Trap:
388 case NVPTXISD::Suld1DI64Trap:
389 case NVPTXISD::Suld1DV2I8Trap:
390 case NVPTXISD::Suld1DV2I16Trap:
391 case NVPTXISD::Suld1DV2I32Trap:
392 case NVPTXISD::Suld1DV2I64Trap:
393 case NVPTXISD::Suld1DV4I8Trap:
394 case NVPTXISD::Suld1DV4I16Trap:
395 case NVPTXISD::Suld1DV4I32Trap:
396 case NVPTXISD::Suld1DArrayI8Trap:
397 case NVPTXISD::Suld1DArrayI16Trap:
398 case NVPTXISD::Suld1DArrayI32Trap:
399 case NVPTXISD::Suld1DArrayI64Trap:
400 case NVPTXISD::Suld1DArrayV2I8Trap:
401 case NVPTXISD::Suld1DArrayV2I16Trap:
402 case NVPTXISD::Suld1DArrayV2I32Trap:
403 case NVPTXISD::Suld1DArrayV2I64Trap:
404 case NVPTXISD::Suld1DArrayV4I8Trap:
405 case NVPTXISD::Suld1DArrayV4I16Trap:
406 case NVPTXISD::Suld1DArrayV4I32Trap:
407 case NVPTXISD::Suld2DI8Trap:
408 case NVPTXISD::Suld2DI16Trap:
409 case NVPTXISD::Suld2DI32Trap:
410 case NVPTXISD::Suld2DI64Trap:
411 case NVPTXISD::Suld2DV2I8Trap:
412 case NVPTXISD::Suld2DV2I16Trap:
413 case NVPTXISD::Suld2DV2I32Trap:
414 case NVPTXISD::Suld2DV2I64Trap:
415 case NVPTXISD::Suld2DV4I8Trap:
416 case NVPTXISD::Suld2DV4I16Trap:
417 case NVPTXISD::Suld2DV4I32Trap:
418 case NVPTXISD::Suld2DArrayI8Trap:
419 case NVPTXISD::Suld2DArrayI16Trap:
420 case NVPTXISD::Suld2DArrayI32Trap:
421 case NVPTXISD::Suld2DArrayI64Trap:
422 case NVPTXISD::Suld2DArrayV2I8Trap:
423 case NVPTXISD::Suld2DArrayV2I16Trap:
424 case NVPTXISD::Suld2DArrayV2I32Trap:
425 case NVPTXISD::Suld2DArrayV2I64Trap:
426 case NVPTXISD::Suld2DArrayV4I8Trap:
427 case NVPTXISD::Suld2DArrayV4I16Trap:
428 case NVPTXISD::Suld2DArrayV4I32Trap:
429 case NVPTXISD::Suld3DI8Trap:
430 case NVPTXISD::Suld3DI16Trap:
431 case NVPTXISD::Suld3DI32Trap:
432 case NVPTXISD::Suld3DI64Trap:
433 case NVPTXISD::Suld3DV2I8Trap:
434 case NVPTXISD::Suld3DV2I16Trap:
435 case NVPTXISD::Suld3DV2I32Trap:
436 case NVPTXISD::Suld3DV2I64Trap:
437 case NVPTXISD::Suld3DV4I8Trap:
438 case NVPTXISD::Suld3DV4I16Trap:
439 case NVPTXISD::Suld3DV4I32Trap:
440 case NVPTXISD::Suld1DI8Zero:
441 case NVPTXISD::Suld1DI16Zero:
442 case NVPTXISD::Suld1DI32Zero:
443 case NVPTXISD::Suld1DI64Zero:
444 case NVPTXISD::Suld1DV2I8Zero:
445 case NVPTXISD::Suld1DV2I16Zero:
446 case NVPTXISD::Suld1DV2I32Zero:
447 case NVPTXISD::Suld1DV2I64Zero:
448 case NVPTXISD::Suld1DV4I8Zero:
449 case NVPTXISD::Suld1DV4I16Zero:
450 case NVPTXISD::Suld1DV4I32Zero:
451 case NVPTXISD::Suld1DArrayI8Zero:
452 case NVPTXISD::Suld1DArrayI16Zero:
453 case NVPTXISD::Suld1DArrayI32Zero:
454 case NVPTXISD::Suld1DArrayI64Zero:
455 case NVPTXISD::Suld1DArrayV2I8Zero:
456 case NVPTXISD::Suld1DArrayV2I16Zero:
457 case NVPTXISD::Suld1DArrayV2I32Zero:
458 case NVPTXISD::Suld1DArrayV2I64Zero:
459 case NVPTXISD::Suld1DArrayV4I8Zero:
460 case NVPTXISD::Suld1DArrayV4I16Zero:
461 case NVPTXISD::Suld1DArrayV4I32Zero:
462 case NVPTXISD::Suld2DI8Zero:
463 case NVPTXISD::Suld2DI16Zero:
464 case NVPTXISD::Suld2DI32Zero:
465 case NVPTXISD::Suld2DI64Zero:
466 case NVPTXISD::Suld2DV2I8Zero:
467 case NVPTXISD::Suld2DV2I16Zero:
468 case NVPTXISD::Suld2DV2I32Zero:
469 case NVPTXISD::Suld2DV2I64Zero:
470 case NVPTXISD::Suld2DV4I8Zero:
471 case NVPTXISD::Suld2DV4I16Zero:
472 case NVPTXISD::Suld2DV4I32Zero:
473 case NVPTXISD::Suld2DArrayI8Zero:
474 case NVPTXISD::Suld2DArrayI16Zero:
475 case NVPTXISD::Suld2DArrayI32Zero:
476 case NVPTXISD::Suld2DArrayI64Zero:
477 case NVPTXISD::Suld2DArrayV2I8Zero:
478 case NVPTXISD::Suld2DArrayV2I16Zero:
479 case NVPTXISD::Suld2DArrayV2I32Zero:
480 case NVPTXISD::Suld2DArrayV2I64Zero:
481 case NVPTXISD::Suld2DArrayV4I8Zero:
482 case NVPTXISD::Suld2DArrayV4I16Zero:
483 case NVPTXISD::Suld2DArrayV4I32Zero:
484 case NVPTXISD::Suld3DI8Zero:
485 case NVPTXISD::Suld3DI16Zero:
486 case NVPTXISD::Suld3DI32Zero:
487 case NVPTXISD::Suld3DI64Zero:
488 case NVPTXISD::Suld3DV2I8Zero:
489 case NVPTXISD::Suld3DV2I16Zero:
490 case NVPTXISD::Suld3DV2I32Zero:
491 case NVPTXISD::Suld3DV2I64Zero:
492 case NVPTXISD::Suld3DV4I8Zero:
493 case NVPTXISD::Suld3DV4I16Zero:
494 case NVPTXISD::Suld3DV4I32Zero:
495 ResNode = SelectSurfaceIntrinsic(N);
496 break;
497 case ISD::AND:
498 case ISD::SRA:
499 case ISD::SRL:
500 // Try to select BFE
501 ResNode = SelectBFE(N);
502 break;
503 case ISD::ADDRSPACECAST:
504 ResNode = SelectAddrSpaceCast(N);
505 break;
506 default:
507 break;
508 }
509 if (ResNode)
510 return ResNode;
511 return SelectCode(N);
512 }
513
SelectIntrinsicChain(SDNode * N)514 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
515 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
516 switch (IID) {
517 default:
518 return NULL;
519 case Intrinsic::nvvm_ldg_global_f:
520 case Intrinsic::nvvm_ldg_global_i:
521 case Intrinsic::nvvm_ldg_global_p:
522 case Intrinsic::nvvm_ldu_global_f:
523 case Intrinsic::nvvm_ldu_global_i:
524 case Intrinsic::nvvm_ldu_global_p:
525 return SelectLDGLDU(N);
526 }
527 }
528
getCodeAddrSpace(MemSDNode * N)529 static unsigned int getCodeAddrSpace(MemSDNode *N) {
530 const Value *Src = N->getMemOperand()->getValue();
531
532 if (!Src)
533 return NVPTX::PTXLdStInstCode::GENERIC;
534
535 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
536 switch (PT->getAddressSpace()) {
537 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
538 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
539 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
540 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
541 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
542 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
543 default: break;
544 }
545 }
546 return NVPTX::PTXLdStInstCode::GENERIC;
547 }
548
canLowerToLDG(MemSDNode * N,const NVPTXSubtarget & Subtarget,unsigned CodeAddrSpace,MachineFunction * F)549 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
550 unsigned CodeAddrSpace, MachineFunction *F) {
551 // To use non-coherent caching, the load has to be from global
552 // memory and we have to prove that the memory area is not written
553 // to anywhere for the duration of the kernel call, not even after
554 // the load.
555 //
556 // To ensure that there are no writes to the memory, we require the
557 // underlying pointer to be a noalias (__restrict) kernel parameter
558 // that is never used for a write. We can only do this for kernel
559 // functions since from within a device function, we cannot know if
560 // there were or will be writes to the memory from the caller - or we
561 // could, but then we would have to do inter-procedural analysis.
562 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
563 !isKernelFunction(*F->getFunction())) {
564 return false;
565 }
566
567 // We use GetUnderlyingObjects() here instead of
568 // GetUnderlyingObject() mainly because the former looks through phi
569 // nodes while the latter does not. We need to look through phi
570 // nodes to handle pointer induction variables.
571 SmallVector<Value *, 8> Objs;
572 GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
573 Objs, F->getDataLayout());
574 for (Value *Obj : Objs) {
575 auto *A = dyn_cast<const Argument>(Obj);
576 if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
577 }
578
579 return true;
580 }
581
SelectIntrinsicNoChain(SDNode * N)582 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
583 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
584 switch (IID) {
585 default:
586 return nullptr;
587 case Intrinsic::nvvm_texsurf_handle_internal:
588 return SelectTexSurfHandle(N);
589 }
590 }
591
SelectTexSurfHandle(SDNode * N)592 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
593 // Op 0 is the intrinsic ID
594 SDValue Wrapper = N->getOperand(1);
595 SDValue GlobalVal = Wrapper.getOperand(0);
596 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
597 GlobalVal);
598 }
599
SelectAddrSpaceCast(SDNode * N)600 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
601 SDValue Src = N->getOperand(0);
602 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
603 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
604 unsigned DstAddrSpace = CastN->getDestAddressSpace();
605
606 assert(SrcAddrSpace != DstAddrSpace &&
607 "addrspacecast must be between different address spaces");
608
609 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
610 // Specific to generic
611 unsigned Opc;
612 switch (SrcAddrSpace) {
613 default: report_fatal_error("Bad address space in addrspacecast");
614 case ADDRESS_SPACE_GLOBAL:
615 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
616 break;
617 case ADDRESS_SPACE_SHARED:
618 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
619 break;
620 case ADDRESS_SPACE_CONST:
621 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
622 break;
623 case ADDRESS_SPACE_LOCAL:
624 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
625 break;
626 }
627 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
628 } else {
629 // Generic to specific
630 if (SrcAddrSpace != 0)
631 report_fatal_error("Cannot cast between two non-generic address spaces");
632 unsigned Opc;
633 switch (DstAddrSpace) {
634 default: report_fatal_error("Bad address space in addrspacecast");
635 case ADDRESS_SPACE_GLOBAL:
636 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
637 : NVPTX::cvta_to_global_yes;
638 break;
639 case ADDRESS_SPACE_SHARED:
640 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
641 : NVPTX::cvta_to_shared_yes;
642 break;
643 case ADDRESS_SPACE_CONST:
644 Opc =
645 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
646 break;
647 case ADDRESS_SPACE_LOCAL:
648 Opc =
649 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
650 break;
651 case ADDRESS_SPACE_PARAM:
652 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
653 : NVPTX::nvvm_ptr_gen_to_param;
654 break;
655 }
656 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
657 }
658 }
659
SelectLoad(SDNode * N)660 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
661 SDLoc dl(N);
662 LoadSDNode *LD = cast<LoadSDNode>(N);
663 EVT LoadedVT = LD->getMemoryVT();
664 SDNode *NVPTXLD = nullptr;
665
666 // do not support pre/post inc/dec
667 if (LD->isIndexed())
668 return nullptr;
669
670 if (!LoadedVT.isSimple())
671 return nullptr;
672
673 // Address Space Setting
674 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
675
676 if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
677 return SelectLDGLDU(N);
678 }
679
680 // Volatile Setting
681 // - .volatile is only availalble for .global and .shared
682 bool isVolatile = LD->isVolatile();
683 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
684 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
685 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
686 isVolatile = false;
687
688 // Vector Setting
689 MVT SimpleVT = LoadedVT.getSimpleVT();
690 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
691 if (SimpleVT.isVector()) {
692 unsigned num = SimpleVT.getVectorNumElements();
693 if (num == 2)
694 vecType = NVPTX::PTXLdStInstCode::V2;
695 else if (num == 4)
696 vecType = NVPTX::PTXLdStInstCode::V4;
697 else
698 return nullptr;
699 }
700
701 // Type Setting: fromType + fromTypeWidth
702 //
703 // Sign : ISD::SEXTLOAD
704 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
705 // type is integer
706 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
707 MVT ScalarVT = SimpleVT.getScalarType();
708 // Read at least 8 bits (predicates are stored as 8-bit values)
709 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
710 unsigned int fromType;
711 if ((LD->getExtensionType() == ISD::SEXTLOAD))
712 fromType = NVPTX::PTXLdStInstCode::Signed;
713 else if (ScalarVT.isFloatingPoint())
714 fromType = NVPTX::PTXLdStInstCode::Float;
715 else
716 fromType = NVPTX::PTXLdStInstCode::Unsigned;
717
718 // Create the machine instruction DAG
719 SDValue Chain = N->getOperand(0);
720 SDValue N1 = N->getOperand(1);
721 SDValue Addr;
722 SDValue Offset, Base;
723 unsigned Opcode;
724 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
725
726 if (SelectDirectAddr(N1, Addr)) {
727 switch (TargetVT) {
728 case MVT::i8:
729 Opcode = NVPTX::LD_i8_avar;
730 break;
731 case MVT::i16:
732 Opcode = NVPTX::LD_i16_avar;
733 break;
734 case MVT::i32:
735 Opcode = NVPTX::LD_i32_avar;
736 break;
737 case MVT::i64:
738 Opcode = NVPTX::LD_i64_avar;
739 break;
740 case MVT::f32:
741 Opcode = NVPTX::LD_f32_avar;
742 break;
743 case MVT::f64:
744 Opcode = NVPTX::LD_f64_avar;
745 break;
746 default:
747 return nullptr;
748 }
749 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
750 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
751 getI32Imm(fromTypeWidth, dl), Addr, Chain };
752 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
753 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
754 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
755 switch (TargetVT) {
756 case MVT::i8:
757 Opcode = NVPTX::LD_i8_asi;
758 break;
759 case MVT::i16:
760 Opcode = NVPTX::LD_i16_asi;
761 break;
762 case MVT::i32:
763 Opcode = NVPTX::LD_i32_asi;
764 break;
765 case MVT::i64:
766 Opcode = NVPTX::LD_i64_asi;
767 break;
768 case MVT::f32:
769 Opcode = NVPTX::LD_f32_asi;
770 break;
771 case MVT::f64:
772 Opcode = NVPTX::LD_f64_asi;
773 break;
774 default:
775 return nullptr;
776 }
777 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
778 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
779 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
780 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
781 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
782 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
783 if (TM.is64Bit()) {
784 switch (TargetVT) {
785 case MVT::i8:
786 Opcode = NVPTX::LD_i8_ari_64;
787 break;
788 case MVT::i16:
789 Opcode = NVPTX::LD_i16_ari_64;
790 break;
791 case MVT::i32:
792 Opcode = NVPTX::LD_i32_ari_64;
793 break;
794 case MVT::i64:
795 Opcode = NVPTX::LD_i64_ari_64;
796 break;
797 case MVT::f32:
798 Opcode = NVPTX::LD_f32_ari_64;
799 break;
800 case MVT::f64:
801 Opcode = NVPTX::LD_f64_ari_64;
802 break;
803 default:
804 return nullptr;
805 }
806 } else {
807 switch (TargetVT) {
808 case MVT::i8:
809 Opcode = NVPTX::LD_i8_ari;
810 break;
811 case MVT::i16:
812 Opcode = NVPTX::LD_i16_ari;
813 break;
814 case MVT::i32:
815 Opcode = NVPTX::LD_i32_ari;
816 break;
817 case MVT::i64:
818 Opcode = NVPTX::LD_i64_ari;
819 break;
820 case MVT::f32:
821 Opcode = NVPTX::LD_f32_ari;
822 break;
823 case MVT::f64:
824 Opcode = NVPTX::LD_f64_ari;
825 break;
826 default:
827 return nullptr;
828 }
829 }
830 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
831 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
832 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
833 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
834 } else {
835 if (TM.is64Bit()) {
836 switch (TargetVT) {
837 case MVT::i8:
838 Opcode = NVPTX::LD_i8_areg_64;
839 break;
840 case MVT::i16:
841 Opcode = NVPTX::LD_i16_areg_64;
842 break;
843 case MVT::i32:
844 Opcode = NVPTX::LD_i32_areg_64;
845 break;
846 case MVT::i64:
847 Opcode = NVPTX::LD_i64_areg_64;
848 break;
849 case MVT::f32:
850 Opcode = NVPTX::LD_f32_areg_64;
851 break;
852 case MVT::f64:
853 Opcode = NVPTX::LD_f64_areg_64;
854 break;
855 default:
856 return nullptr;
857 }
858 } else {
859 switch (TargetVT) {
860 case MVT::i8:
861 Opcode = NVPTX::LD_i8_areg;
862 break;
863 case MVT::i16:
864 Opcode = NVPTX::LD_i16_areg;
865 break;
866 case MVT::i32:
867 Opcode = NVPTX::LD_i32_areg;
868 break;
869 case MVT::i64:
870 Opcode = NVPTX::LD_i64_areg;
871 break;
872 case MVT::f32:
873 Opcode = NVPTX::LD_f32_areg;
874 break;
875 case MVT::f64:
876 Opcode = NVPTX::LD_f64_areg;
877 break;
878 default:
879 return nullptr;
880 }
881 }
882 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
883 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
884 getI32Imm(fromTypeWidth, dl), N1, Chain };
885 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
886 }
887
888 if (NVPTXLD) {
889 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
890 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
891 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
892 }
893
894 return NVPTXLD;
895 }
896
SelectLoadVector(SDNode * N)897 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
898
899 SDValue Chain = N->getOperand(0);
900 SDValue Op1 = N->getOperand(1);
901 SDValue Addr, Offset, Base;
902 unsigned Opcode;
903 SDLoc DL(N);
904 SDNode *LD;
905 MemSDNode *MemSD = cast<MemSDNode>(N);
906 EVT LoadedVT = MemSD->getMemoryVT();
907
908 if (!LoadedVT.isSimple())
909 return nullptr;
910
911 // Address Space Setting
912 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
913
914 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
915 return SelectLDGLDU(N);
916 }
917
918 // Volatile Setting
919 // - .volatile is only availalble for .global and .shared
920 bool IsVolatile = MemSD->isVolatile();
921 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
922 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
923 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
924 IsVolatile = false;
925
926 // Vector Setting
927 MVT SimpleVT = LoadedVT.getSimpleVT();
928
929 // Type Setting: fromType + fromTypeWidth
930 //
931 // Sign : ISD::SEXTLOAD
932 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
933 // type is integer
934 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
935 MVT ScalarVT = SimpleVT.getScalarType();
936 // Read at least 8 bits (predicates are stored as 8-bit values)
937 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
938 unsigned int FromType;
939 // The last operand holds the original LoadSDNode::getExtensionType() value
940 unsigned ExtensionType = cast<ConstantSDNode>(
941 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
942 if (ExtensionType == ISD::SEXTLOAD)
943 FromType = NVPTX::PTXLdStInstCode::Signed;
944 else if (ScalarVT.isFloatingPoint())
945 FromType = NVPTX::PTXLdStInstCode::Float;
946 else
947 FromType = NVPTX::PTXLdStInstCode::Unsigned;
948
949 unsigned VecType;
950
951 switch (N->getOpcode()) {
952 case NVPTXISD::LoadV2:
953 VecType = NVPTX::PTXLdStInstCode::V2;
954 break;
955 case NVPTXISD::LoadV4:
956 VecType = NVPTX::PTXLdStInstCode::V4;
957 break;
958 default:
959 return nullptr;
960 }
961
962 EVT EltVT = N->getValueType(0);
963
964 if (SelectDirectAddr(Op1, Addr)) {
965 switch (N->getOpcode()) {
966 default:
967 return nullptr;
968 case NVPTXISD::LoadV2:
969 switch (EltVT.getSimpleVT().SimpleTy) {
970 default:
971 return nullptr;
972 case MVT::i8:
973 Opcode = NVPTX::LDV_i8_v2_avar;
974 break;
975 case MVT::i16:
976 Opcode = NVPTX::LDV_i16_v2_avar;
977 break;
978 case MVT::i32:
979 Opcode = NVPTX::LDV_i32_v2_avar;
980 break;
981 case MVT::i64:
982 Opcode = NVPTX::LDV_i64_v2_avar;
983 break;
984 case MVT::f32:
985 Opcode = NVPTX::LDV_f32_v2_avar;
986 break;
987 case MVT::f64:
988 Opcode = NVPTX::LDV_f64_v2_avar;
989 break;
990 }
991 break;
992 case NVPTXISD::LoadV4:
993 switch (EltVT.getSimpleVT().SimpleTy) {
994 default:
995 return nullptr;
996 case MVT::i8:
997 Opcode = NVPTX::LDV_i8_v4_avar;
998 break;
999 case MVT::i16:
1000 Opcode = NVPTX::LDV_i16_v4_avar;
1001 break;
1002 case MVT::i32:
1003 Opcode = NVPTX::LDV_i32_v4_avar;
1004 break;
1005 case MVT::f32:
1006 Opcode = NVPTX::LDV_f32_v4_avar;
1007 break;
1008 }
1009 break;
1010 }
1011
1012 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1013 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1014 getI32Imm(FromTypeWidth, DL), Addr, Chain };
1015 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1016 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1017 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1018 switch (N->getOpcode()) {
1019 default:
1020 return nullptr;
1021 case NVPTXISD::LoadV2:
1022 switch (EltVT.getSimpleVT().SimpleTy) {
1023 default:
1024 return nullptr;
1025 case MVT::i8:
1026 Opcode = NVPTX::LDV_i8_v2_asi;
1027 break;
1028 case MVT::i16:
1029 Opcode = NVPTX::LDV_i16_v2_asi;
1030 break;
1031 case MVT::i32:
1032 Opcode = NVPTX::LDV_i32_v2_asi;
1033 break;
1034 case MVT::i64:
1035 Opcode = NVPTX::LDV_i64_v2_asi;
1036 break;
1037 case MVT::f32:
1038 Opcode = NVPTX::LDV_f32_v2_asi;
1039 break;
1040 case MVT::f64:
1041 Opcode = NVPTX::LDV_f64_v2_asi;
1042 break;
1043 }
1044 break;
1045 case NVPTXISD::LoadV4:
1046 switch (EltVT.getSimpleVT().SimpleTy) {
1047 default:
1048 return nullptr;
1049 case MVT::i8:
1050 Opcode = NVPTX::LDV_i8_v4_asi;
1051 break;
1052 case MVT::i16:
1053 Opcode = NVPTX::LDV_i16_v4_asi;
1054 break;
1055 case MVT::i32:
1056 Opcode = NVPTX::LDV_i32_v4_asi;
1057 break;
1058 case MVT::f32:
1059 Opcode = NVPTX::LDV_f32_v4_asi;
1060 break;
1061 }
1062 break;
1063 }
1064
1065 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1066 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1067 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1068 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1069 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1070 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1071 if (TM.is64Bit()) {
1072 switch (N->getOpcode()) {
1073 default:
1074 return nullptr;
1075 case NVPTXISD::LoadV2:
1076 switch (EltVT.getSimpleVT().SimpleTy) {
1077 default:
1078 return nullptr;
1079 case MVT::i8:
1080 Opcode = NVPTX::LDV_i8_v2_ari_64;
1081 break;
1082 case MVT::i16:
1083 Opcode = NVPTX::LDV_i16_v2_ari_64;
1084 break;
1085 case MVT::i32:
1086 Opcode = NVPTX::LDV_i32_v2_ari_64;
1087 break;
1088 case MVT::i64:
1089 Opcode = NVPTX::LDV_i64_v2_ari_64;
1090 break;
1091 case MVT::f32:
1092 Opcode = NVPTX::LDV_f32_v2_ari_64;
1093 break;
1094 case MVT::f64:
1095 Opcode = NVPTX::LDV_f64_v2_ari_64;
1096 break;
1097 }
1098 break;
1099 case NVPTXISD::LoadV4:
1100 switch (EltVT.getSimpleVT().SimpleTy) {
1101 default:
1102 return nullptr;
1103 case MVT::i8:
1104 Opcode = NVPTX::LDV_i8_v4_ari_64;
1105 break;
1106 case MVT::i16:
1107 Opcode = NVPTX::LDV_i16_v4_ari_64;
1108 break;
1109 case MVT::i32:
1110 Opcode = NVPTX::LDV_i32_v4_ari_64;
1111 break;
1112 case MVT::f32:
1113 Opcode = NVPTX::LDV_f32_v4_ari_64;
1114 break;
1115 }
1116 break;
1117 }
1118 } else {
1119 switch (N->getOpcode()) {
1120 default:
1121 return nullptr;
1122 case NVPTXISD::LoadV2:
1123 switch (EltVT.getSimpleVT().SimpleTy) {
1124 default:
1125 return nullptr;
1126 case MVT::i8:
1127 Opcode = NVPTX::LDV_i8_v2_ari;
1128 break;
1129 case MVT::i16:
1130 Opcode = NVPTX::LDV_i16_v2_ari;
1131 break;
1132 case MVT::i32:
1133 Opcode = NVPTX::LDV_i32_v2_ari;
1134 break;
1135 case MVT::i64:
1136 Opcode = NVPTX::LDV_i64_v2_ari;
1137 break;
1138 case MVT::f32:
1139 Opcode = NVPTX::LDV_f32_v2_ari;
1140 break;
1141 case MVT::f64:
1142 Opcode = NVPTX::LDV_f64_v2_ari;
1143 break;
1144 }
1145 break;
1146 case NVPTXISD::LoadV4:
1147 switch (EltVT.getSimpleVT().SimpleTy) {
1148 default:
1149 return nullptr;
1150 case MVT::i8:
1151 Opcode = NVPTX::LDV_i8_v4_ari;
1152 break;
1153 case MVT::i16:
1154 Opcode = NVPTX::LDV_i16_v4_ari;
1155 break;
1156 case MVT::i32:
1157 Opcode = NVPTX::LDV_i32_v4_ari;
1158 break;
1159 case MVT::f32:
1160 Opcode = NVPTX::LDV_f32_v4_ari;
1161 break;
1162 }
1163 break;
1164 }
1165 }
1166
1167 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1168 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1169 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1170
1171 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1172 } else {
1173 if (TM.is64Bit()) {
1174 switch (N->getOpcode()) {
1175 default:
1176 return nullptr;
1177 case NVPTXISD::LoadV2:
1178 switch (EltVT.getSimpleVT().SimpleTy) {
1179 default:
1180 return nullptr;
1181 case MVT::i8:
1182 Opcode = NVPTX::LDV_i8_v2_areg_64;
1183 break;
1184 case MVT::i16:
1185 Opcode = NVPTX::LDV_i16_v2_areg_64;
1186 break;
1187 case MVT::i32:
1188 Opcode = NVPTX::LDV_i32_v2_areg_64;
1189 break;
1190 case MVT::i64:
1191 Opcode = NVPTX::LDV_i64_v2_areg_64;
1192 break;
1193 case MVT::f32:
1194 Opcode = NVPTX::LDV_f32_v2_areg_64;
1195 break;
1196 case MVT::f64:
1197 Opcode = NVPTX::LDV_f64_v2_areg_64;
1198 break;
1199 }
1200 break;
1201 case NVPTXISD::LoadV4:
1202 switch (EltVT.getSimpleVT().SimpleTy) {
1203 default:
1204 return nullptr;
1205 case MVT::i8:
1206 Opcode = NVPTX::LDV_i8_v4_areg_64;
1207 break;
1208 case MVT::i16:
1209 Opcode = NVPTX::LDV_i16_v4_areg_64;
1210 break;
1211 case MVT::i32:
1212 Opcode = NVPTX::LDV_i32_v4_areg_64;
1213 break;
1214 case MVT::f32:
1215 Opcode = NVPTX::LDV_f32_v4_areg_64;
1216 break;
1217 }
1218 break;
1219 }
1220 } else {
1221 switch (N->getOpcode()) {
1222 default:
1223 return nullptr;
1224 case NVPTXISD::LoadV2:
1225 switch (EltVT.getSimpleVT().SimpleTy) {
1226 default:
1227 return nullptr;
1228 case MVT::i8:
1229 Opcode = NVPTX::LDV_i8_v2_areg;
1230 break;
1231 case MVT::i16:
1232 Opcode = NVPTX::LDV_i16_v2_areg;
1233 break;
1234 case MVT::i32:
1235 Opcode = NVPTX::LDV_i32_v2_areg;
1236 break;
1237 case MVT::i64:
1238 Opcode = NVPTX::LDV_i64_v2_areg;
1239 break;
1240 case MVT::f32:
1241 Opcode = NVPTX::LDV_f32_v2_areg;
1242 break;
1243 case MVT::f64:
1244 Opcode = NVPTX::LDV_f64_v2_areg;
1245 break;
1246 }
1247 break;
1248 case NVPTXISD::LoadV4:
1249 switch (EltVT.getSimpleVT().SimpleTy) {
1250 default:
1251 return nullptr;
1252 case MVT::i8:
1253 Opcode = NVPTX::LDV_i8_v4_areg;
1254 break;
1255 case MVT::i16:
1256 Opcode = NVPTX::LDV_i16_v4_areg;
1257 break;
1258 case MVT::i32:
1259 Opcode = NVPTX::LDV_i32_v4_areg;
1260 break;
1261 case MVT::f32:
1262 Opcode = NVPTX::LDV_f32_v4_areg;
1263 break;
1264 }
1265 break;
1266 }
1267 }
1268
1269 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1270 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1271 getI32Imm(FromTypeWidth, DL), Op1, Chain };
1272 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1273 }
1274
1275 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1276 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1277 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1278
1279 return LD;
1280 }
1281
SelectLDGLDU(SDNode * N)1282 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1283
1284 SDValue Chain = N->getOperand(0);
1285 SDValue Op1;
1286 MemSDNode *Mem;
1287 bool IsLDG = true;
1288
1289 // If this is an LDG intrinsic, the address is the third operand. Its its an
1290 // LDG/LDU SD node (from custom vector handling), then its the second operand
1291 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1292 Op1 = N->getOperand(2);
1293 Mem = cast<MemIntrinsicSDNode>(N);
1294 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1295 switch (IID) {
1296 default:
1297 return NULL;
1298 case Intrinsic::nvvm_ldg_global_f:
1299 case Intrinsic::nvvm_ldg_global_i:
1300 case Intrinsic::nvvm_ldg_global_p:
1301 IsLDG = true;
1302 break;
1303 case Intrinsic::nvvm_ldu_global_f:
1304 case Intrinsic::nvvm_ldu_global_i:
1305 case Intrinsic::nvvm_ldu_global_p:
1306 IsLDG = false;
1307 break;
1308 }
1309 } else {
1310 Op1 = N->getOperand(1);
1311 Mem = cast<MemSDNode>(N);
1312 }
1313
1314 unsigned Opcode;
1315 SDLoc DL(N);
1316 SDNode *LD;
1317 SDValue Base, Offset, Addr;
1318
1319 EVT EltVT = Mem->getMemoryVT();
1320 if (EltVT.isVector()) {
1321 EltVT = EltVT.getVectorElementType();
1322 }
1323
1324 if (SelectDirectAddr(Op1, Addr)) {
1325 switch (N->getOpcode()) {
1326 default:
1327 return nullptr;
1328 case ISD::INTRINSIC_W_CHAIN:
1329 if (IsLDG) {
1330 switch (EltVT.getSimpleVT().SimpleTy) {
1331 default:
1332 return nullptr;
1333 case MVT::i8:
1334 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1335 break;
1336 case MVT::i16:
1337 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1338 break;
1339 case MVT::i32:
1340 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1341 break;
1342 case MVT::i64:
1343 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1344 break;
1345 case MVT::f32:
1346 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1347 break;
1348 case MVT::f64:
1349 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1350 break;
1351 }
1352 } else {
1353 switch (EltVT.getSimpleVT().SimpleTy) {
1354 default:
1355 return nullptr;
1356 case MVT::i8:
1357 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1358 break;
1359 case MVT::i16:
1360 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1361 break;
1362 case MVT::i32:
1363 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1364 break;
1365 case MVT::i64:
1366 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1367 break;
1368 case MVT::f32:
1369 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1370 break;
1371 case MVT::f64:
1372 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1373 break;
1374 }
1375 }
1376 break;
1377 case NVPTXISD::LDGV2:
1378 switch (EltVT.getSimpleVT().SimpleTy) {
1379 default:
1380 return nullptr;
1381 case MVT::i8:
1382 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1383 break;
1384 case MVT::i16:
1385 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1386 break;
1387 case MVT::i32:
1388 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1389 break;
1390 case MVT::i64:
1391 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1392 break;
1393 case MVT::f32:
1394 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1395 break;
1396 case MVT::f64:
1397 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1398 break;
1399 }
1400 break;
1401 case NVPTXISD::LDUV2:
1402 switch (EltVT.getSimpleVT().SimpleTy) {
1403 default:
1404 return nullptr;
1405 case MVT::i8:
1406 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1407 break;
1408 case MVT::i16:
1409 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1410 break;
1411 case MVT::i32:
1412 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1413 break;
1414 case MVT::i64:
1415 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1416 break;
1417 case MVT::f32:
1418 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1419 break;
1420 case MVT::f64:
1421 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1422 break;
1423 }
1424 break;
1425 case NVPTXISD::LDGV4:
1426 switch (EltVT.getSimpleVT().SimpleTy) {
1427 default:
1428 return nullptr;
1429 case MVT::i8:
1430 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1431 break;
1432 case MVT::i16:
1433 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1434 break;
1435 case MVT::i32:
1436 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1437 break;
1438 case MVT::f32:
1439 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1440 break;
1441 }
1442 break;
1443 case NVPTXISD::LDUV4:
1444 switch (EltVT.getSimpleVT().SimpleTy) {
1445 default:
1446 return nullptr;
1447 case MVT::i8:
1448 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1449 break;
1450 case MVT::i16:
1451 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1452 break;
1453 case MVT::i32:
1454 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1455 break;
1456 case MVT::f32:
1457 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1458 break;
1459 }
1460 break;
1461 }
1462
1463 SDValue Ops[] = { Addr, Chain };
1464 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1465 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1466 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1467 if (TM.is64Bit()) {
1468 switch (N->getOpcode()) {
1469 default:
1470 return nullptr;
1471 case ISD::LOAD:
1472 case ISD::INTRINSIC_W_CHAIN:
1473 if (IsLDG) {
1474 switch (EltVT.getSimpleVT().SimpleTy) {
1475 default:
1476 return nullptr;
1477 case MVT::i8:
1478 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1479 break;
1480 case MVT::i16:
1481 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1482 break;
1483 case MVT::i32:
1484 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1485 break;
1486 case MVT::i64:
1487 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1488 break;
1489 case MVT::f32:
1490 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1491 break;
1492 case MVT::f64:
1493 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1494 break;
1495 }
1496 } else {
1497 switch (EltVT.getSimpleVT().SimpleTy) {
1498 default:
1499 return nullptr;
1500 case MVT::i8:
1501 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1502 break;
1503 case MVT::i16:
1504 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1505 break;
1506 case MVT::i32:
1507 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1508 break;
1509 case MVT::i64:
1510 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1511 break;
1512 case MVT::f32:
1513 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1514 break;
1515 case MVT::f64:
1516 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1517 break;
1518 }
1519 }
1520 break;
1521 case NVPTXISD::LoadV2:
1522 case NVPTXISD::LDGV2:
1523 switch (EltVT.getSimpleVT().SimpleTy) {
1524 default:
1525 return nullptr;
1526 case MVT::i8:
1527 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1528 break;
1529 case MVT::i16:
1530 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1531 break;
1532 case MVT::i32:
1533 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1534 break;
1535 case MVT::i64:
1536 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1537 break;
1538 case MVT::f32:
1539 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1540 break;
1541 case MVT::f64:
1542 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1543 break;
1544 }
1545 break;
1546 case NVPTXISD::LDUV2:
1547 switch (EltVT.getSimpleVT().SimpleTy) {
1548 default:
1549 return nullptr;
1550 case MVT::i8:
1551 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1552 break;
1553 case MVT::i16:
1554 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1555 break;
1556 case MVT::i32:
1557 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1558 break;
1559 case MVT::i64:
1560 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1561 break;
1562 case MVT::f32:
1563 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1564 break;
1565 case MVT::f64:
1566 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1567 break;
1568 }
1569 break;
1570 case NVPTXISD::LoadV4:
1571 case NVPTXISD::LDGV4:
1572 switch (EltVT.getSimpleVT().SimpleTy) {
1573 default:
1574 return nullptr;
1575 case MVT::i8:
1576 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1577 break;
1578 case MVT::i16:
1579 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1580 break;
1581 case MVT::i32:
1582 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1583 break;
1584 case MVT::f32:
1585 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1586 break;
1587 }
1588 break;
1589 case NVPTXISD::LDUV4:
1590 switch (EltVT.getSimpleVT().SimpleTy) {
1591 default:
1592 return nullptr;
1593 case MVT::i8:
1594 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1595 break;
1596 case MVT::i16:
1597 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1598 break;
1599 case MVT::i32:
1600 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1601 break;
1602 case MVT::f32:
1603 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1604 break;
1605 }
1606 break;
1607 }
1608 } else {
1609 switch (N->getOpcode()) {
1610 default:
1611 return nullptr;
1612 case ISD::LOAD:
1613 case ISD::INTRINSIC_W_CHAIN:
1614 if (IsLDG) {
1615 switch (EltVT.getSimpleVT().SimpleTy) {
1616 default:
1617 return nullptr;
1618 case MVT::i8:
1619 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1620 break;
1621 case MVT::i16:
1622 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1623 break;
1624 case MVT::i32:
1625 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1626 break;
1627 case MVT::i64:
1628 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1629 break;
1630 case MVT::f32:
1631 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1632 break;
1633 case MVT::f64:
1634 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1635 break;
1636 }
1637 } else {
1638 switch (EltVT.getSimpleVT().SimpleTy) {
1639 default:
1640 return nullptr;
1641 case MVT::i8:
1642 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1643 break;
1644 case MVT::i16:
1645 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1646 break;
1647 case MVT::i32:
1648 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1649 break;
1650 case MVT::i64:
1651 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1652 break;
1653 case MVT::f32:
1654 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1655 break;
1656 case MVT::f64:
1657 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1658 break;
1659 }
1660 }
1661 break;
1662 case NVPTXISD::LoadV2:
1663 case NVPTXISD::LDGV2:
1664 switch (EltVT.getSimpleVT().SimpleTy) {
1665 default:
1666 return nullptr;
1667 case MVT::i8:
1668 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1669 break;
1670 case MVT::i16:
1671 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1672 break;
1673 case MVT::i32:
1674 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1675 break;
1676 case MVT::i64:
1677 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1678 break;
1679 case MVT::f32:
1680 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1681 break;
1682 case MVT::f64:
1683 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1684 break;
1685 }
1686 break;
1687 case NVPTXISD::LDUV2:
1688 switch (EltVT.getSimpleVT().SimpleTy) {
1689 default:
1690 return nullptr;
1691 case MVT::i8:
1692 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1693 break;
1694 case MVT::i16:
1695 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1696 break;
1697 case MVT::i32:
1698 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1699 break;
1700 case MVT::i64:
1701 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1702 break;
1703 case MVT::f32:
1704 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1705 break;
1706 case MVT::f64:
1707 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1708 break;
1709 }
1710 break;
1711 case NVPTXISD::LoadV4:
1712 case NVPTXISD::LDGV4:
1713 switch (EltVT.getSimpleVT().SimpleTy) {
1714 default:
1715 return nullptr;
1716 case MVT::i8:
1717 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1718 break;
1719 case MVT::i16:
1720 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1721 break;
1722 case MVT::i32:
1723 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1724 break;
1725 case MVT::f32:
1726 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1727 break;
1728 }
1729 break;
1730 case NVPTXISD::LDUV4:
1731 switch (EltVT.getSimpleVT().SimpleTy) {
1732 default:
1733 return nullptr;
1734 case MVT::i8:
1735 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1736 break;
1737 case MVT::i16:
1738 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1739 break;
1740 case MVT::i32:
1741 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1742 break;
1743 case MVT::f32:
1744 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1745 break;
1746 }
1747 break;
1748 }
1749 }
1750
1751 SDValue Ops[] = { Base, Offset, Chain };
1752
1753 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1754 } else {
1755 if (TM.is64Bit()) {
1756 switch (N->getOpcode()) {
1757 default:
1758 return nullptr;
1759 case ISD::LOAD:
1760 case ISD::INTRINSIC_W_CHAIN:
1761 if (IsLDG) {
1762 switch (EltVT.getSimpleVT().SimpleTy) {
1763 default:
1764 return nullptr;
1765 case MVT::i8:
1766 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1767 break;
1768 case MVT::i16:
1769 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1770 break;
1771 case MVT::i32:
1772 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1773 break;
1774 case MVT::i64:
1775 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1776 break;
1777 case MVT::f32:
1778 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1779 break;
1780 case MVT::f64:
1781 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1782 break;
1783 }
1784 } else {
1785 switch (EltVT.getSimpleVT().SimpleTy) {
1786 default:
1787 return nullptr;
1788 case MVT::i8:
1789 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1790 break;
1791 case MVT::i16:
1792 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1793 break;
1794 case MVT::i32:
1795 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1796 break;
1797 case MVT::i64:
1798 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1799 break;
1800 case MVT::f32:
1801 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1802 break;
1803 case MVT::f64:
1804 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1805 break;
1806 }
1807 }
1808 break;
1809 case NVPTXISD::LoadV2:
1810 case NVPTXISD::LDGV2:
1811 switch (EltVT.getSimpleVT().SimpleTy) {
1812 default:
1813 return nullptr;
1814 case MVT::i8:
1815 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1816 break;
1817 case MVT::i16:
1818 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1819 break;
1820 case MVT::i32:
1821 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1822 break;
1823 case MVT::i64:
1824 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1825 break;
1826 case MVT::f32:
1827 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1828 break;
1829 case MVT::f64:
1830 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1831 break;
1832 }
1833 break;
1834 case NVPTXISD::LDUV2:
1835 switch (EltVT.getSimpleVT().SimpleTy) {
1836 default:
1837 return nullptr;
1838 case MVT::i8:
1839 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1840 break;
1841 case MVT::i16:
1842 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1843 break;
1844 case MVT::i32:
1845 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1846 break;
1847 case MVT::i64:
1848 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1849 break;
1850 case MVT::f32:
1851 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1852 break;
1853 case MVT::f64:
1854 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1855 break;
1856 }
1857 break;
1858 case NVPTXISD::LoadV4:
1859 case NVPTXISD::LDGV4:
1860 switch (EltVT.getSimpleVT().SimpleTy) {
1861 default:
1862 return nullptr;
1863 case MVT::i8:
1864 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1865 break;
1866 case MVT::i16:
1867 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1868 break;
1869 case MVT::i32:
1870 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1871 break;
1872 case MVT::f32:
1873 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1874 break;
1875 }
1876 break;
1877 case NVPTXISD::LDUV4:
1878 switch (EltVT.getSimpleVT().SimpleTy) {
1879 default:
1880 return nullptr;
1881 case MVT::i8:
1882 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1883 break;
1884 case MVT::i16:
1885 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1886 break;
1887 case MVT::i32:
1888 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1889 break;
1890 case MVT::f32:
1891 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1892 break;
1893 }
1894 break;
1895 }
1896 } else {
1897 switch (N->getOpcode()) {
1898 default:
1899 return nullptr;
1900 case ISD::LOAD:
1901 case ISD::INTRINSIC_W_CHAIN:
1902 if (IsLDG) {
1903 switch (EltVT.getSimpleVT().SimpleTy) {
1904 default:
1905 return nullptr;
1906 case MVT::i8:
1907 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1908 break;
1909 case MVT::i16:
1910 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1911 break;
1912 case MVT::i32:
1913 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1914 break;
1915 case MVT::i64:
1916 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1917 break;
1918 case MVT::f32:
1919 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1920 break;
1921 case MVT::f64:
1922 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1923 break;
1924 }
1925 } else {
1926 switch (EltVT.getSimpleVT().SimpleTy) {
1927 default:
1928 return nullptr;
1929 case MVT::i8:
1930 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1931 break;
1932 case MVT::i16:
1933 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1934 break;
1935 case MVT::i32:
1936 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1937 break;
1938 case MVT::i64:
1939 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1940 break;
1941 case MVT::f32:
1942 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1943 break;
1944 case MVT::f64:
1945 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1946 break;
1947 }
1948 }
1949 break;
1950 case NVPTXISD::LoadV2:
1951 case NVPTXISD::LDGV2:
1952 switch (EltVT.getSimpleVT().SimpleTy) {
1953 default:
1954 return nullptr;
1955 case MVT::i8:
1956 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1957 break;
1958 case MVT::i16:
1959 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1960 break;
1961 case MVT::i32:
1962 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1963 break;
1964 case MVT::i64:
1965 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1966 break;
1967 case MVT::f32:
1968 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1969 break;
1970 case MVT::f64:
1971 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1972 break;
1973 }
1974 break;
1975 case NVPTXISD::LDUV2:
1976 switch (EltVT.getSimpleVT().SimpleTy) {
1977 default:
1978 return nullptr;
1979 case MVT::i8:
1980 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1981 break;
1982 case MVT::i16:
1983 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1984 break;
1985 case MVT::i32:
1986 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1987 break;
1988 case MVT::i64:
1989 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1990 break;
1991 case MVT::f32:
1992 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1993 break;
1994 case MVT::f64:
1995 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1996 break;
1997 }
1998 break;
1999 case NVPTXISD::LoadV4:
2000 case NVPTXISD::LDGV4:
2001 switch (EltVT.getSimpleVT().SimpleTy) {
2002 default:
2003 return nullptr;
2004 case MVT::i8:
2005 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
2006 break;
2007 case MVT::i16:
2008 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
2009 break;
2010 case MVT::i32:
2011 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
2012 break;
2013 case MVT::f32:
2014 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
2015 break;
2016 }
2017 break;
2018 case NVPTXISD::LDUV4:
2019 switch (EltVT.getSimpleVT().SimpleTy) {
2020 default:
2021 return nullptr;
2022 case MVT::i8:
2023 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
2024 break;
2025 case MVT::i16:
2026 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
2027 break;
2028 case MVT::i32:
2029 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
2030 break;
2031 case MVT::f32:
2032 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
2033 break;
2034 }
2035 break;
2036 }
2037 }
2038
2039 SDValue Ops[] = { Op1, Chain };
2040 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
2041 }
2042
2043 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2044 MemRefs0[0] = Mem->getMemOperand();
2045 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2046
2047 return LD;
2048 }
2049
SelectStore(SDNode * N)2050 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
2051 SDLoc dl(N);
2052 StoreSDNode *ST = cast<StoreSDNode>(N);
2053 EVT StoreVT = ST->getMemoryVT();
2054 SDNode *NVPTXST = nullptr;
2055
2056 // do not support pre/post inc/dec
2057 if (ST->isIndexed())
2058 return nullptr;
2059
2060 if (!StoreVT.isSimple())
2061 return nullptr;
2062
2063 // Address Space Setting
2064 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2065
2066 // Volatile Setting
2067 // - .volatile is only availalble for .global and .shared
2068 bool isVolatile = ST->isVolatile();
2069 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2070 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2071 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2072 isVolatile = false;
2073
2074 // Vector Setting
2075 MVT SimpleVT = StoreVT.getSimpleVT();
2076 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2077 if (SimpleVT.isVector()) {
2078 unsigned num = SimpleVT.getVectorNumElements();
2079 if (num == 2)
2080 vecType = NVPTX::PTXLdStInstCode::V2;
2081 else if (num == 4)
2082 vecType = NVPTX::PTXLdStInstCode::V4;
2083 else
2084 return nullptr;
2085 }
2086
2087 // Type Setting: toType + toTypeWidth
2088 // - for integer type, always use 'u'
2089 //
2090 MVT ScalarVT = SimpleVT.getScalarType();
2091 unsigned toTypeWidth = ScalarVT.getSizeInBits();
2092 unsigned int toType;
2093 if (ScalarVT.isFloatingPoint())
2094 toType = NVPTX::PTXLdStInstCode::Float;
2095 else
2096 toType = NVPTX::PTXLdStInstCode::Unsigned;
2097
2098 // Create the machine instruction DAG
2099 SDValue Chain = N->getOperand(0);
2100 SDValue N1 = N->getOperand(1);
2101 SDValue N2 = N->getOperand(2);
2102 SDValue Addr;
2103 SDValue Offset, Base;
2104 unsigned Opcode;
2105 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2106
2107 if (SelectDirectAddr(N2, Addr)) {
2108 switch (SourceVT) {
2109 case MVT::i8:
2110 Opcode = NVPTX::ST_i8_avar;
2111 break;
2112 case MVT::i16:
2113 Opcode = NVPTX::ST_i16_avar;
2114 break;
2115 case MVT::i32:
2116 Opcode = NVPTX::ST_i32_avar;
2117 break;
2118 case MVT::i64:
2119 Opcode = NVPTX::ST_i64_avar;
2120 break;
2121 case MVT::f32:
2122 Opcode = NVPTX::ST_f32_avar;
2123 break;
2124 case MVT::f64:
2125 Opcode = NVPTX::ST_f64_avar;
2126 break;
2127 default:
2128 return nullptr;
2129 }
2130 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2131 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2132 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2133 Chain };
2134 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2135 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2136 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2137 switch (SourceVT) {
2138 case MVT::i8:
2139 Opcode = NVPTX::ST_i8_asi;
2140 break;
2141 case MVT::i16:
2142 Opcode = NVPTX::ST_i16_asi;
2143 break;
2144 case MVT::i32:
2145 Opcode = NVPTX::ST_i32_asi;
2146 break;
2147 case MVT::i64:
2148 Opcode = NVPTX::ST_i64_asi;
2149 break;
2150 case MVT::f32:
2151 Opcode = NVPTX::ST_f32_asi;
2152 break;
2153 case MVT::f64:
2154 Opcode = NVPTX::ST_f64_asi;
2155 break;
2156 default:
2157 return nullptr;
2158 }
2159 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2160 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2161 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2162 Offset, Chain };
2163 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2164 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2165 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2166 if (TM.is64Bit()) {
2167 switch (SourceVT) {
2168 case MVT::i8:
2169 Opcode = NVPTX::ST_i8_ari_64;
2170 break;
2171 case MVT::i16:
2172 Opcode = NVPTX::ST_i16_ari_64;
2173 break;
2174 case MVT::i32:
2175 Opcode = NVPTX::ST_i32_ari_64;
2176 break;
2177 case MVT::i64:
2178 Opcode = NVPTX::ST_i64_ari_64;
2179 break;
2180 case MVT::f32:
2181 Opcode = NVPTX::ST_f32_ari_64;
2182 break;
2183 case MVT::f64:
2184 Opcode = NVPTX::ST_f64_ari_64;
2185 break;
2186 default:
2187 return nullptr;
2188 }
2189 } else {
2190 switch (SourceVT) {
2191 case MVT::i8:
2192 Opcode = NVPTX::ST_i8_ari;
2193 break;
2194 case MVT::i16:
2195 Opcode = NVPTX::ST_i16_ari;
2196 break;
2197 case MVT::i32:
2198 Opcode = NVPTX::ST_i32_ari;
2199 break;
2200 case MVT::i64:
2201 Opcode = NVPTX::ST_i64_ari;
2202 break;
2203 case MVT::f32:
2204 Opcode = NVPTX::ST_f32_ari;
2205 break;
2206 case MVT::f64:
2207 Opcode = NVPTX::ST_f64_ari;
2208 break;
2209 default:
2210 return nullptr;
2211 }
2212 }
2213 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2214 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2215 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2216 Offset, Chain };
2217 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2218 } else {
2219 if (TM.is64Bit()) {
2220 switch (SourceVT) {
2221 case MVT::i8:
2222 Opcode = NVPTX::ST_i8_areg_64;
2223 break;
2224 case MVT::i16:
2225 Opcode = NVPTX::ST_i16_areg_64;
2226 break;
2227 case MVT::i32:
2228 Opcode = NVPTX::ST_i32_areg_64;
2229 break;
2230 case MVT::i64:
2231 Opcode = NVPTX::ST_i64_areg_64;
2232 break;
2233 case MVT::f32:
2234 Opcode = NVPTX::ST_f32_areg_64;
2235 break;
2236 case MVT::f64:
2237 Opcode = NVPTX::ST_f64_areg_64;
2238 break;
2239 default:
2240 return nullptr;
2241 }
2242 } else {
2243 switch (SourceVT) {
2244 case MVT::i8:
2245 Opcode = NVPTX::ST_i8_areg;
2246 break;
2247 case MVT::i16:
2248 Opcode = NVPTX::ST_i16_areg;
2249 break;
2250 case MVT::i32:
2251 Opcode = NVPTX::ST_i32_areg;
2252 break;
2253 case MVT::i64:
2254 Opcode = NVPTX::ST_i64_areg;
2255 break;
2256 case MVT::f32:
2257 Opcode = NVPTX::ST_f32_areg;
2258 break;
2259 case MVT::f64:
2260 Opcode = NVPTX::ST_f64_areg;
2261 break;
2262 default:
2263 return nullptr;
2264 }
2265 }
2266 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2267 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2268 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2269 Chain };
2270 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2271 }
2272
2273 if (NVPTXST) {
2274 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2275 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2276 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2277 }
2278
2279 return NVPTXST;
2280 }
2281
SelectStoreVector(SDNode * N)2282 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2283 SDValue Chain = N->getOperand(0);
2284 SDValue Op1 = N->getOperand(1);
2285 SDValue Addr, Offset, Base;
2286 unsigned Opcode;
2287 SDLoc DL(N);
2288 SDNode *ST;
2289 EVT EltVT = Op1.getValueType();
2290 MemSDNode *MemSD = cast<MemSDNode>(N);
2291 EVT StoreVT = MemSD->getMemoryVT();
2292
2293 // Address Space Setting
2294 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2295
2296 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2297 report_fatal_error("Cannot store to pointer that points to constant "
2298 "memory space");
2299 }
2300
2301 // Volatile Setting
2302 // - .volatile is only availalble for .global and .shared
2303 bool IsVolatile = MemSD->isVolatile();
2304 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2305 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2306 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2307 IsVolatile = false;
2308
2309 // Type Setting: toType + toTypeWidth
2310 // - for integer type, always use 'u'
2311 assert(StoreVT.isSimple() && "Store value is not simple");
2312 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2313 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2314 unsigned ToType;
2315 if (ScalarVT.isFloatingPoint())
2316 ToType = NVPTX::PTXLdStInstCode::Float;
2317 else
2318 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2319
2320 SmallVector<SDValue, 12> StOps;
2321 SDValue N2;
2322 unsigned VecType;
2323
2324 switch (N->getOpcode()) {
2325 case NVPTXISD::StoreV2:
2326 VecType = NVPTX::PTXLdStInstCode::V2;
2327 StOps.push_back(N->getOperand(1));
2328 StOps.push_back(N->getOperand(2));
2329 N2 = N->getOperand(3);
2330 break;
2331 case NVPTXISD::StoreV4:
2332 VecType = NVPTX::PTXLdStInstCode::V4;
2333 StOps.push_back(N->getOperand(1));
2334 StOps.push_back(N->getOperand(2));
2335 StOps.push_back(N->getOperand(3));
2336 StOps.push_back(N->getOperand(4));
2337 N2 = N->getOperand(5);
2338 break;
2339 default:
2340 return nullptr;
2341 }
2342
2343 StOps.push_back(getI32Imm(IsVolatile, DL));
2344 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2345 StOps.push_back(getI32Imm(VecType, DL));
2346 StOps.push_back(getI32Imm(ToType, DL));
2347 StOps.push_back(getI32Imm(ToTypeWidth, DL));
2348
2349 if (SelectDirectAddr(N2, Addr)) {
2350 switch (N->getOpcode()) {
2351 default:
2352 return nullptr;
2353 case NVPTXISD::StoreV2:
2354 switch (EltVT.getSimpleVT().SimpleTy) {
2355 default:
2356 return nullptr;
2357 case MVT::i8:
2358 Opcode = NVPTX::STV_i8_v2_avar;
2359 break;
2360 case MVT::i16:
2361 Opcode = NVPTX::STV_i16_v2_avar;
2362 break;
2363 case MVT::i32:
2364 Opcode = NVPTX::STV_i32_v2_avar;
2365 break;
2366 case MVT::i64:
2367 Opcode = NVPTX::STV_i64_v2_avar;
2368 break;
2369 case MVT::f32:
2370 Opcode = NVPTX::STV_f32_v2_avar;
2371 break;
2372 case MVT::f64:
2373 Opcode = NVPTX::STV_f64_v2_avar;
2374 break;
2375 }
2376 break;
2377 case NVPTXISD::StoreV4:
2378 switch (EltVT.getSimpleVT().SimpleTy) {
2379 default:
2380 return nullptr;
2381 case MVT::i8:
2382 Opcode = NVPTX::STV_i8_v4_avar;
2383 break;
2384 case MVT::i16:
2385 Opcode = NVPTX::STV_i16_v4_avar;
2386 break;
2387 case MVT::i32:
2388 Opcode = NVPTX::STV_i32_v4_avar;
2389 break;
2390 case MVT::f32:
2391 Opcode = NVPTX::STV_f32_v4_avar;
2392 break;
2393 }
2394 break;
2395 }
2396 StOps.push_back(Addr);
2397 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2398 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2399 switch (N->getOpcode()) {
2400 default:
2401 return nullptr;
2402 case NVPTXISD::StoreV2:
2403 switch (EltVT.getSimpleVT().SimpleTy) {
2404 default:
2405 return nullptr;
2406 case MVT::i8:
2407 Opcode = NVPTX::STV_i8_v2_asi;
2408 break;
2409 case MVT::i16:
2410 Opcode = NVPTX::STV_i16_v2_asi;
2411 break;
2412 case MVT::i32:
2413 Opcode = NVPTX::STV_i32_v2_asi;
2414 break;
2415 case MVT::i64:
2416 Opcode = NVPTX::STV_i64_v2_asi;
2417 break;
2418 case MVT::f32:
2419 Opcode = NVPTX::STV_f32_v2_asi;
2420 break;
2421 case MVT::f64:
2422 Opcode = NVPTX::STV_f64_v2_asi;
2423 break;
2424 }
2425 break;
2426 case NVPTXISD::StoreV4:
2427 switch (EltVT.getSimpleVT().SimpleTy) {
2428 default:
2429 return nullptr;
2430 case MVT::i8:
2431 Opcode = NVPTX::STV_i8_v4_asi;
2432 break;
2433 case MVT::i16:
2434 Opcode = NVPTX::STV_i16_v4_asi;
2435 break;
2436 case MVT::i32:
2437 Opcode = NVPTX::STV_i32_v4_asi;
2438 break;
2439 case MVT::f32:
2440 Opcode = NVPTX::STV_f32_v4_asi;
2441 break;
2442 }
2443 break;
2444 }
2445 StOps.push_back(Base);
2446 StOps.push_back(Offset);
2447 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2448 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2449 if (TM.is64Bit()) {
2450 switch (N->getOpcode()) {
2451 default:
2452 return nullptr;
2453 case NVPTXISD::StoreV2:
2454 switch (EltVT.getSimpleVT().SimpleTy) {
2455 default:
2456 return nullptr;
2457 case MVT::i8:
2458 Opcode = NVPTX::STV_i8_v2_ari_64;
2459 break;
2460 case MVT::i16:
2461 Opcode = NVPTX::STV_i16_v2_ari_64;
2462 break;
2463 case MVT::i32:
2464 Opcode = NVPTX::STV_i32_v2_ari_64;
2465 break;
2466 case MVT::i64:
2467 Opcode = NVPTX::STV_i64_v2_ari_64;
2468 break;
2469 case MVT::f32:
2470 Opcode = NVPTX::STV_f32_v2_ari_64;
2471 break;
2472 case MVT::f64:
2473 Opcode = NVPTX::STV_f64_v2_ari_64;
2474 break;
2475 }
2476 break;
2477 case NVPTXISD::StoreV4:
2478 switch (EltVT.getSimpleVT().SimpleTy) {
2479 default:
2480 return nullptr;
2481 case MVT::i8:
2482 Opcode = NVPTX::STV_i8_v4_ari_64;
2483 break;
2484 case MVT::i16:
2485 Opcode = NVPTX::STV_i16_v4_ari_64;
2486 break;
2487 case MVT::i32:
2488 Opcode = NVPTX::STV_i32_v4_ari_64;
2489 break;
2490 case MVT::f32:
2491 Opcode = NVPTX::STV_f32_v4_ari_64;
2492 break;
2493 }
2494 break;
2495 }
2496 } else {
2497 switch (N->getOpcode()) {
2498 default:
2499 return nullptr;
2500 case NVPTXISD::StoreV2:
2501 switch (EltVT.getSimpleVT().SimpleTy) {
2502 default:
2503 return nullptr;
2504 case MVT::i8:
2505 Opcode = NVPTX::STV_i8_v2_ari;
2506 break;
2507 case MVT::i16:
2508 Opcode = NVPTX::STV_i16_v2_ari;
2509 break;
2510 case MVT::i32:
2511 Opcode = NVPTX::STV_i32_v2_ari;
2512 break;
2513 case MVT::i64:
2514 Opcode = NVPTX::STV_i64_v2_ari;
2515 break;
2516 case MVT::f32:
2517 Opcode = NVPTX::STV_f32_v2_ari;
2518 break;
2519 case MVT::f64:
2520 Opcode = NVPTX::STV_f64_v2_ari;
2521 break;
2522 }
2523 break;
2524 case NVPTXISD::StoreV4:
2525 switch (EltVT.getSimpleVT().SimpleTy) {
2526 default:
2527 return nullptr;
2528 case MVT::i8:
2529 Opcode = NVPTX::STV_i8_v4_ari;
2530 break;
2531 case MVT::i16:
2532 Opcode = NVPTX::STV_i16_v4_ari;
2533 break;
2534 case MVT::i32:
2535 Opcode = NVPTX::STV_i32_v4_ari;
2536 break;
2537 case MVT::f32:
2538 Opcode = NVPTX::STV_f32_v4_ari;
2539 break;
2540 }
2541 break;
2542 }
2543 }
2544 StOps.push_back(Base);
2545 StOps.push_back(Offset);
2546 } else {
2547 if (TM.is64Bit()) {
2548 switch (N->getOpcode()) {
2549 default:
2550 return nullptr;
2551 case NVPTXISD::StoreV2:
2552 switch (EltVT.getSimpleVT().SimpleTy) {
2553 default:
2554 return nullptr;
2555 case MVT::i8:
2556 Opcode = NVPTX::STV_i8_v2_areg_64;
2557 break;
2558 case MVT::i16:
2559 Opcode = NVPTX::STV_i16_v2_areg_64;
2560 break;
2561 case MVT::i32:
2562 Opcode = NVPTX::STV_i32_v2_areg_64;
2563 break;
2564 case MVT::i64:
2565 Opcode = NVPTX::STV_i64_v2_areg_64;
2566 break;
2567 case MVT::f32:
2568 Opcode = NVPTX::STV_f32_v2_areg_64;
2569 break;
2570 case MVT::f64:
2571 Opcode = NVPTX::STV_f64_v2_areg_64;
2572 break;
2573 }
2574 break;
2575 case NVPTXISD::StoreV4:
2576 switch (EltVT.getSimpleVT().SimpleTy) {
2577 default:
2578 return nullptr;
2579 case MVT::i8:
2580 Opcode = NVPTX::STV_i8_v4_areg_64;
2581 break;
2582 case MVT::i16:
2583 Opcode = NVPTX::STV_i16_v4_areg_64;
2584 break;
2585 case MVT::i32:
2586 Opcode = NVPTX::STV_i32_v4_areg_64;
2587 break;
2588 case MVT::f32:
2589 Opcode = NVPTX::STV_f32_v4_areg_64;
2590 break;
2591 }
2592 break;
2593 }
2594 } else {
2595 switch (N->getOpcode()) {
2596 default:
2597 return nullptr;
2598 case NVPTXISD::StoreV2:
2599 switch (EltVT.getSimpleVT().SimpleTy) {
2600 default:
2601 return nullptr;
2602 case MVT::i8:
2603 Opcode = NVPTX::STV_i8_v2_areg;
2604 break;
2605 case MVT::i16:
2606 Opcode = NVPTX::STV_i16_v2_areg;
2607 break;
2608 case MVT::i32:
2609 Opcode = NVPTX::STV_i32_v2_areg;
2610 break;
2611 case MVT::i64:
2612 Opcode = NVPTX::STV_i64_v2_areg;
2613 break;
2614 case MVT::f32:
2615 Opcode = NVPTX::STV_f32_v2_areg;
2616 break;
2617 case MVT::f64:
2618 Opcode = NVPTX::STV_f64_v2_areg;
2619 break;
2620 }
2621 break;
2622 case NVPTXISD::StoreV4:
2623 switch (EltVT.getSimpleVT().SimpleTy) {
2624 default:
2625 return nullptr;
2626 case MVT::i8:
2627 Opcode = NVPTX::STV_i8_v4_areg;
2628 break;
2629 case MVT::i16:
2630 Opcode = NVPTX::STV_i16_v4_areg;
2631 break;
2632 case MVT::i32:
2633 Opcode = NVPTX::STV_i32_v4_areg;
2634 break;
2635 case MVT::f32:
2636 Opcode = NVPTX::STV_f32_v4_areg;
2637 break;
2638 }
2639 break;
2640 }
2641 }
2642 StOps.push_back(N2);
2643 }
2644
2645 StOps.push_back(Chain);
2646
2647 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2648
2649 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2650 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2651 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2652
2653 return ST;
2654 }
2655
SelectLoadParam(SDNode * Node)2656 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2657 SDValue Chain = Node->getOperand(0);
2658 SDValue Offset = Node->getOperand(2);
2659 SDValue Flag = Node->getOperand(3);
2660 SDLoc DL(Node);
2661 MemSDNode *Mem = cast<MemSDNode>(Node);
2662
2663 unsigned VecSize;
2664 switch (Node->getOpcode()) {
2665 default:
2666 return nullptr;
2667 case NVPTXISD::LoadParam:
2668 VecSize = 1;
2669 break;
2670 case NVPTXISD::LoadParamV2:
2671 VecSize = 2;
2672 break;
2673 case NVPTXISD::LoadParamV4:
2674 VecSize = 4;
2675 break;
2676 }
2677
2678 EVT EltVT = Node->getValueType(0);
2679 EVT MemVT = Mem->getMemoryVT();
2680
2681 unsigned Opc = 0;
2682
2683 switch (VecSize) {
2684 default:
2685 return nullptr;
2686 case 1:
2687 switch (MemVT.getSimpleVT().SimpleTy) {
2688 default:
2689 return nullptr;
2690 case MVT::i1:
2691 Opc = NVPTX::LoadParamMemI8;
2692 break;
2693 case MVT::i8:
2694 Opc = NVPTX::LoadParamMemI8;
2695 break;
2696 case MVT::i16:
2697 Opc = NVPTX::LoadParamMemI16;
2698 break;
2699 case MVT::i32:
2700 Opc = NVPTX::LoadParamMemI32;
2701 break;
2702 case MVT::i64:
2703 Opc = NVPTX::LoadParamMemI64;
2704 break;
2705 case MVT::f32:
2706 Opc = NVPTX::LoadParamMemF32;
2707 break;
2708 case MVT::f64:
2709 Opc = NVPTX::LoadParamMemF64;
2710 break;
2711 }
2712 break;
2713 case 2:
2714 switch (MemVT.getSimpleVT().SimpleTy) {
2715 default:
2716 return nullptr;
2717 case MVT::i1:
2718 Opc = NVPTX::LoadParamMemV2I8;
2719 break;
2720 case MVT::i8:
2721 Opc = NVPTX::LoadParamMemV2I8;
2722 break;
2723 case MVT::i16:
2724 Opc = NVPTX::LoadParamMemV2I16;
2725 break;
2726 case MVT::i32:
2727 Opc = NVPTX::LoadParamMemV2I32;
2728 break;
2729 case MVT::i64:
2730 Opc = NVPTX::LoadParamMemV2I64;
2731 break;
2732 case MVT::f32:
2733 Opc = NVPTX::LoadParamMemV2F32;
2734 break;
2735 case MVT::f64:
2736 Opc = NVPTX::LoadParamMemV2F64;
2737 break;
2738 }
2739 break;
2740 case 4:
2741 switch (MemVT.getSimpleVT().SimpleTy) {
2742 default:
2743 return nullptr;
2744 case MVT::i1:
2745 Opc = NVPTX::LoadParamMemV4I8;
2746 break;
2747 case MVT::i8:
2748 Opc = NVPTX::LoadParamMemV4I8;
2749 break;
2750 case MVT::i16:
2751 Opc = NVPTX::LoadParamMemV4I16;
2752 break;
2753 case MVT::i32:
2754 Opc = NVPTX::LoadParamMemV4I32;
2755 break;
2756 case MVT::f32:
2757 Opc = NVPTX::LoadParamMemV4F32;
2758 break;
2759 }
2760 break;
2761 }
2762
2763 SDVTList VTs;
2764 if (VecSize == 1) {
2765 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2766 } else if (VecSize == 2) {
2767 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2768 } else {
2769 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2770 VTs = CurDAG->getVTList(EVTs);
2771 }
2772
2773 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2774
2775 SmallVector<SDValue, 2> Ops;
2776 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2777 Ops.push_back(Chain);
2778 Ops.push_back(Flag);
2779
2780 return CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2781 }
2782
SelectStoreRetval(SDNode * N)2783 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2784 SDLoc DL(N);
2785 SDValue Chain = N->getOperand(0);
2786 SDValue Offset = N->getOperand(1);
2787 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2788 MemSDNode *Mem = cast<MemSDNode>(N);
2789
2790 // How many elements do we have?
2791 unsigned NumElts = 1;
2792 switch (N->getOpcode()) {
2793 default:
2794 return nullptr;
2795 case NVPTXISD::StoreRetval:
2796 NumElts = 1;
2797 break;
2798 case NVPTXISD::StoreRetvalV2:
2799 NumElts = 2;
2800 break;
2801 case NVPTXISD::StoreRetvalV4:
2802 NumElts = 4;
2803 break;
2804 }
2805
2806 // Build vector of operands
2807 SmallVector<SDValue, 6> Ops;
2808 for (unsigned i = 0; i < NumElts; ++i)
2809 Ops.push_back(N->getOperand(i + 2));
2810 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2811 Ops.push_back(Chain);
2812
2813 // Determine target opcode
2814 // If we have an i1, use an 8-bit store. The lowering code in
2815 // NVPTXISelLowering will have already emitted an upcast.
2816 unsigned Opcode = 0;
2817 switch (NumElts) {
2818 default:
2819 return nullptr;
2820 case 1:
2821 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2822 default:
2823 return nullptr;
2824 case MVT::i1:
2825 Opcode = NVPTX::StoreRetvalI8;
2826 break;
2827 case MVT::i8:
2828 Opcode = NVPTX::StoreRetvalI8;
2829 break;
2830 case MVT::i16:
2831 Opcode = NVPTX::StoreRetvalI16;
2832 break;
2833 case MVT::i32:
2834 Opcode = NVPTX::StoreRetvalI32;
2835 break;
2836 case MVT::i64:
2837 Opcode = NVPTX::StoreRetvalI64;
2838 break;
2839 case MVT::f32:
2840 Opcode = NVPTX::StoreRetvalF32;
2841 break;
2842 case MVT::f64:
2843 Opcode = NVPTX::StoreRetvalF64;
2844 break;
2845 }
2846 break;
2847 case 2:
2848 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2849 default:
2850 return nullptr;
2851 case MVT::i1:
2852 Opcode = NVPTX::StoreRetvalV2I8;
2853 break;
2854 case MVT::i8:
2855 Opcode = NVPTX::StoreRetvalV2I8;
2856 break;
2857 case MVT::i16:
2858 Opcode = NVPTX::StoreRetvalV2I16;
2859 break;
2860 case MVT::i32:
2861 Opcode = NVPTX::StoreRetvalV2I32;
2862 break;
2863 case MVT::i64:
2864 Opcode = NVPTX::StoreRetvalV2I64;
2865 break;
2866 case MVT::f32:
2867 Opcode = NVPTX::StoreRetvalV2F32;
2868 break;
2869 case MVT::f64:
2870 Opcode = NVPTX::StoreRetvalV2F64;
2871 break;
2872 }
2873 break;
2874 case 4:
2875 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2876 default:
2877 return nullptr;
2878 case MVT::i1:
2879 Opcode = NVPTX::StoreRetvalV4I8;
2880 break;
2881 case MVT::i8:
2882 Opcode = NVPTX::StoreRetvalV4I8;
2883 break;
2884 case MVT::i16:
2885 Opcode = NVPTX::StoreRetvalV4I16;
2886 break;
2887 case MVT::i32:
2888 Opcode = NVPTX::StoreRetvalV4I32;
2889 break;
2890 case MVT::f32:
2891 Opcode = NVPTX::StoreRetvalV4F32;
2892 break;
2893 }
2894 break;
2895 }
2896
2897 SDNode *Ret =
2898 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2899 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2900 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2901 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2902
2903 return Ret;
2904 }
2905
SelectStoreParam(SDNode * N)2906 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2907 SDLoc DL(N);
2908 SDValue Chain = N->getOperand(0);
2909 SDValue Param = N->getOperand(1);
2910 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2911 SDValue Offset = N->getOperand(2);
2912 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2913 MemSDNode *Mem = cast<MemSDNode>(N);
2914 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2915
2916 // How many elements do we have?
2917 unsigned NumElts = 1;
2918 switch (N->getOpcode()) {
2919 default:
2920 return nullptr;
2921 case NVPTXISD::StoreParamU32:
2922 case NVPTXISD::StoreParamS32:
2923 case NVPTXISD::StoreParam:
2924 NumElts = 1;
2925 break;
2926 case NVPTXISD::StoreParamV2:
2927 NumElts = 2;
2928 break;
2929 case NVPTXISD::StoreParamV4:
2930 NumElts = 4;
2931 break;
2932 }
2933
2934 // Build vector of operands
2935 SmallVector<SDValue, 8> Ops;
2936 for (unsigned i = 0; i < NumElts; ++i)
2937 Ops.push_back(N->getOperand(i + 3));
2938 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2939 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2940 Ops.push_back(Chain);
2941 Ops.push_back(Flag);
2942
2943 // Determine target opcode
2944 // If we have an i1, use an 8-bit store. The lowering code in
2945 // NVPTXISelLowering will have already emitted an upcast.
2946 unsigned Opcode = 0;
2947 switch (N->getOpcode()) {
2948 default:
2949 switch (NumElts) {
2950 default:
2951 return nullptr;
2952 case 1:
2953 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2954 default:
2955 return nullptr;
2956 case MVT::i1:
2957 Opcode = NVPTX::StoreParamI8;
2958 break;
2959 case MVT::i8:
2960 Opcode = NVPTX::StoreParamI8;
2961 break;
2962 case MVT::i16:
2963 Opcode = NVPTX::StoreParamI16;
2964 break;
2965 case MVT::i32:
2966 Opcode = NVPTX::StoreParamI32;
2967 break;
2968 case MVT::i64:
2969 Opcode = NVPTX::StoreParamI64;
2970 break;
2971 case MVT::f32:
2972 Opcode = NVPTX::StoreParamF32;
2973 break;
2974 case MVT::f64:
2975 Opcode = NVPTX::StoreParamF64;
2976 break;
2977 }
2978 break;
2979 case 2:
2980 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2981 default:
2982 return nullptr;
2983 case MVT::i1:
2984 Opcode = NVPTX::StoreParamV2I8;
2985 break;
2986 case MVT::i8:
2987 Opcode = NVPTX::StoreParamV2I8;
2988 break;
2989 case MVT::i16:
2990 Opcode = NVPTX::StoreParamV2I16;
2991 break;
2992 case MVT::i32:
2993 Opcode = NVPTX::StoreParamV2I32;
2994 break;
2995 case MVT::i64:
2996 Opcode = NVPTX::StoreParamV2I64;
2997 break;
2998 case MVT::f32:
2999 Opcode = NVPTX::StoreParamV2F32;
3000 break;
3001 case MVT::f64:
3002 Opcode = NVPTX::StoreParamV2F64;
3003 break;
3004 }
3005 break;
3006 case 4:
3007 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3008 default:
3009 return nullptr;
3010 case MVT::i1:
3011 Opcode = NVPTX::StoreParamV4I8;
3012 break;
3013 case MVT::i8:
3014 Opcode = NVPTX::StoreParamV4I8;
3015 break;
3016 case MVT::i16:
3017 Opcode = NVPTX::StoreParamV4I16;
3018 break;
3019 case MVT::i32:
3020 Opcode = NVPTX::StoreParamV4I32;
3021 break;
3022 case MVT::f32:
3023 Opcode = NVPTX::StoreParamV4F32;
3024 break;
3025 }
3026 break;
3027 }
3028 break;
3029 // Special case: if we have a sign-extend/zero-extend node, insert the
3030 // conversion instruction first, and use that as the value operand to
3031 // the selected StoreParam node.
3032 case NVPTXISD::StoreParamU32: {
3033 Opcode = NVPTX::StoreParamI32;
3034 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3035 MVT::i32);
3036 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
3037 MVT::i32, Ops[0], CvtNone);
3038 Ops[0] = SDValue(Cvt, 0);
3039 break;
3040 }
3041 case NVPTXISD::StoreParamS32: {
3042 Opcode = NVPTX::StoreParamI32;
3043 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3044 MVT::i32);
3045 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3046 MVT::i32, Ops[0], CvtNone);
3047 Ops[0] = SDValue(Cvt, 0);
3048 break;
3049 }
3050 }
3051
3052 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3053 SDNode *Ret =
3054 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3055 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3056 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3057 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3058
3059 return Ret;
3060 }
3061
SelectTextureIntrinsic(SDNode * N)3062 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3063 SDValue Chain = N->getOperand(0);
3064 SDNode *Ret = nullptr;
3065 unsigned Opc = 0;
3066 SmallVector<SDValue, 8> Ops;
3067
3068 switch (N->getOpcode()) {
3069 default: return nullptr;
3070 case NVPTXISD::Tex1DFloatS32:
3071 Opc = NVPTX::TEX_1D_F32_S32;
3072 break;
3073 case NVPTXISD::Tex1DFloatFloat:
3074 Opc = NVPTX::TEX_1D_F32_F32;
3075 break;
3076 case NVPTXISD::Tex1DFloatFloatLevel:
3077 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3078 break;
3079 case NVPTXISD::Tex1DFloatFloatGrad:
3080 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3081 break;
3082 case NVPTXISD::Tex1DS32S32:
3083 Opc = NVPTX::TEX_1D_S32_S32;
3084 break;
3085 case NVPTXISD::Tex1DS32Float:
3086 Opc = NVPTX::TEX_1D_S32_F32;
3087 break;
3088 case NVPTXISD::Tex1DS32FloatLevel:
3089 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3090 break;
3091 case NVPTXISD::Tex1DS32FloatGrad:
3092 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3093 break;
3094 case NVPTXISD::Tex1DU32S32:
3095 Opc = NVPTX::TEX_1D_U32_S32;
3096 break;
3097 case NVPTXISD::Tex1DU32Float:
3098 Opc = NVPTX::TEX_1D_U32_F32;
3099 break;
3100 case NVPTXISD::Tex1DU32FloatLevel:
3101 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3102 break;
3103 case NVPTXISD::Tex1DU32FloatGrad:
3104 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3105 break;
3106 case NVPTXISD::Tex1DArrayFloatS32:
3107 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3108 break;
3109 case NVPTXISD::Tex1DArrayFloatFloat:
3110 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3111 break;
3112 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3113 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3114 break;
3115 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3116 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3117 break;
3118 case NVPTXISD::Tex1DArrayS32S32:
3119 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3120 break;
3121 case NVPTXISD::Tex1DArrayS32Float:
3122 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3123 break;
3124 case NVPTXISD::Tex1DArrayS32FloatLevel:
3125 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3126 break;
3127 case NVPTXISD::Tex1DArrayS32FloatGrad:
3128 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3129 break;
3130 case NVPTXISD::Tex1DArrayU32S32:
3131 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3132 break;
3133 case NVPTXISD::Tex1DArrayU32Float:
3134 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3135 break;
3136 case NVPTXISD::Tex1DArrayU32FloatLevel:
3137 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3138 break;
3139 case NVPTXISD::Tex1DArrayU32FloatGrad:
3140 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3141 break;
3142 case NVPTXISD::Tex2DFloatS32:
3143 Opc = NVPTX::TEX_2D_F32_S32;
3144 break;
3145 case NVPTXISD::Tex2DFloatFloat:
3146 Opc = NVPTX::TEX_2D_F32_F32;
3147 break;
3148 case NVPTXISD::Tex2DFloatFloatLevel:
3149 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3150 break;
3151 case NVPTXISD::Tex2DFloatFloatGrad:
3152 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3153 break;
3154 case NVPTXISD::Tex2DS32S32:
3155 Opc = NVPTX::TEX_2D_S32_S32;
3156 break;
3157 case NVPTXISD::Tex2DS32Float:
3158 Opc = NVPTX::TEX_2D_S32_F32;
3159 break;
3160 case NVPTXISD::Tex2DS32FloatLevel:
3161 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3162 break;
3163 case NVPTXISD::Tex2DS32FloatGrad:
3164 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3165 break;
3166 case NVPTXISD::Tex2DU32S32:
3167 Opc = NVPTX::TEX_2D_U32_S32;
3168 break;
3169 case NVPTXISD::Tex2DU32Float:
3170 Opc = NVPTX::TEX_2D_U32_F32;
3171 break;
3172 case NVPTXISD::Tex2DU32FloatLevel:
3173 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3174 break;
3175 case NVPTXISD::Tex2DU32FloatGrad:
3176 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3177 break;
3178 case NVPTXISD::Tex2DArrayFloatS32:
3179 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3180 break;
3181 case NVPTXISD::Tex2DArrayFloatFloat:
3182 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3183 break;
3184 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3185 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3186 break;
3187 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3188 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3189 break;
3190 case NVPTXISD::Tex2DArrayS32S32:
3191 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3192 break;
3193 case NVPTXISD::Tex2DArrayS32Float:
3194 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3195 break;
3196 case NVPTXISD::Tex2DArrayS32FloatLevel:
3197 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3198 break;
3199 case NVPTXISD::Tex2DArrayS32FloatGrad:
3200 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3201 break;
3202 case NVPTXISD::Tex2DArrayU32S32:
3203 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3204 break;
3205 case NVPTXISD::Tex2DArrayU32Float:
3206 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3207 break;
3208 case NVPTXISD::Tex2DArrayU32FloatLevel:
3209 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3210 break;
3211 case NVPTXISD::Tex2DArrayU32FloatGrad:
3212 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3213 break;
3214 case NVPTXISD::Tex3DFloatS32:
3215 Opc = NVPTX::TEX_3D_F32_S32;
3216 break;
3217 case NVPTXISD::Tex3DFloatFloat:
3218 Opc = NVPTX::TEX_3D_F32_F32;
3219 break;
3220 case NVPTXISD::Tex3DFloatFloatLevel:
3221 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3222 break;
3223 case NVPTXISD::Tex3DFloatFloatGrad:
3224 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3225 break;
3226 case NVPTXISD::Tex3DS32S32:
3227 Opc = NVPTX::TEX_3D_S32_S32;
3228 break;
3229 case NVPTXISD::Tex3DS32Float:
3230 Opc = NVPTX::TEX_3D_S32_F32;
3231 break;
3232 case NVPTXISD::Tex3DS32FloatLevel:
3233 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3234 break;
3235 case NVPTXISD::Tex3DS32FloatGrad:
3236 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3237 break;
3238 case NVPTXISD::Tex3DU32S32:
3239 Opc = NVPTX::TEX_3D_U32_S32;
3240 break;
3241 case NVPTXISD::Tex3DU32Float:
3242 Opc = NVPTX::TEX_3D_U32_F32;
3243 break;
3244 case NVPTXISD::Tex3DU32FloatLevel:
3245 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3246 break;
3247 case NVPTXISD::Tex3DU32FloatGrad:
3248 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3249 break;
3250 case NVPTXISD::TexCubeFloatFloat:
3251 Opc = NVPTX::TEX_CUBE_F32_F32;
3252 break;
3253 case NVPTXISD::TexCubeFloatFloatLevel:
3254 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3255 break;
3256 case NVPTXISD::TexCubeS32Float:
3257 Opc = NVPTX::TEX_CUBE_S32_F32;
3258 break;
3259 case NVPTXISD::TexCubeS32FloatLevel:
3260 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3261 break;
3262 case NVPTXISD::TexCubeU32Float:
3263 Opc = NVPTX::TEX_CUBE_U32_F32;
3264 break;
3265 case NVPTXISD::TexCubeU32FloatLevel:
3266 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3267 break;
3268 case NVPTXISD::TexCubeArrayFloatFloat:
3269 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3270 break;
3271 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3272 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3273 break;
3274 case NVPTXISD::TexCubeArrayS32Float:
3275 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3276 break;
3277 case NVPTXISD::TexCubeArrayS32FloatLevel:
3278 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3279 break;
3280 case NVPTXISD::TexCubeArrayU32Float:
3281 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3282 break;
3283 case NVPTXISD::TexCubeArrayU32FloatLevel:
3284 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3285 break;
3286 case NVPTXISD::Tld4R2DFloatFloat:
3287 Opc = NVPTX::TLD4_R_2D_F32_F32;
3288 break;
3289 case NVPTXISD::Tld4G2DFloatFloat:
3290 Opc = NVPTX::TLD4_G_2D_F32_F32;
3291 break;
3292 case NVPTXISD::Tld4B2DFloatFloat:
3293 Opc = NVPTX::TLD4_B_2D_F32_F32;
3294 break;
3295 case NVPTXISD::Tld4A2DFloatFloat:
3296 Opc = NVPTX::TLD4_A_2D_F32_F32;
3297 break;
3298 case NVPTXISD::Tld4R2DS64Float:
3299 Opc = NVPTX::TLD4_R_2D_S32_F32;
3300 break;
3301 case NVPTXISD::Tld4G2DS64Float:
3302 Opc = NVPTX::TLD4_G_2D_S32_F32;
3303 break;
3304 case NVPTXISD::Tld4B2DS64Float:
3305 Opc = NVPTX::TLD4_B_2D_S32_F32;
3306 break;
3307 case NVPTXISD::Tld4A2DS64Float:
3308 Opc = NVPTX::TLD4_A_2D_S32_F32;
3309 break;
3310 case NVPTXISD::Tld4R2DU64Float:
3311 Opc = NVPTX::TLD4_R_2D_U32_F32;
3312 break;
3313 case NVPTXISD::Tld4G2DU64Float:
3314 Opc = NVPTX::TLD4_G_2D_U32_F32;
3315 break;
3316 case NVPTXISD::Tld4B2DU64Float:
3317 Opc = NVPTX::TLD4_B_2D_U32_F32;
3318 break;
3319 case NVPTXISD::Tld4A2DU64Float:
3320 Opc = NVPTX::TLD4_A_2D_U32_F32;
3321 break;
3322 case NVPTXISD::TexUnified1DFloatS32:
3323 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3324 break;
3325 case NVPTXISD::TexUnified1DFloatFloat:
3326 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3327 break;
3328 case NVPTXISD::TexUnified1DFloatFloatLevel:
3329 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3330 break;
3331 case NVPTXISD::TexUnified1DFloatFloatGrad:
3332 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3333 break;
3334 case NVPTXISD::TexUnified1DS32S32:
3335 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3336 break;
3337 case NVPTXISD::TexUnified1DS32Float:
3338 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3339 break;
3340 case NVPTXISD::TexUnified1DS32FloatLevel:
3341 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3342 break;
3343 case NVPTXISD::TexUnified1DS32FloatGrad:
3344 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3345 break;
3346 case NVPTXISD::TexUnified1DU32S32:
3347 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3348 break;
3349 case NVPTXISD::TexUnified1DU32Float:
3350 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3351 break;
3352 case NVPTXISD::TexUnified1DU32FloatLevel:
3353 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3354 break;
3355 case NVPTXISD::TexUnified1DU32FloatGrad:
3356 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3357 break;
3358 case NVPTXISD::TexUnified1DArrayFloatS32:
3359 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3360 break;
3361 case NVPTXISD::TexUnified1DArrayFloatFloat:
3362 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3363 break;
3364 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3365 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3366 break;
3367 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3368 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3369 break;
3370 case NVPTXISD::TexUnified1DArrayS32S32:
3371 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3372 break;
3373 case NVPTXISD::TexUnified1DArrayS32Float:
3374 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3375 break;
3376 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3377 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3378 break;
3379 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3380 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3381 break;
3382 case NVPTXISD::TexUnified1DArrayU32S32:
3383 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3384 break;
3385 case NVPTXISD::TexUnified1DArrayU32Float:
3386 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3387 break;
3388 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3389 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3390 break;
3391 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3392 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3393 break;
3394 case NVPTXISD::TexUnified2DFloatS32:
3395 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3396 break;
3397 case NVPTXISD::TexUnified2DFloatFloat:
3398 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3399 break;
3400 case NVPTXISD::TexUnified2DFloatFloatLevel:
3401 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3402 break;
3403 case NVPTXISD::TexUnified2DFloatFloatGrad:
3404 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3405 break;
3406 case NVPTXISD::TexUnified2DS32S32:
3407 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3408 break;
3409 case NVPTXISD::TexUnified2DS32Float:
3410 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3411 break;
3412 case NVPTXISD::TexUnified2DS32FloatLevel:
3413 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3414 break;
3415 case NVPTXISD::TexUnified2DS32FloatGrad:
3416 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3417 break;
3418 case NVPTXISD::TexUnified2DU32S32:
3419 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3420 break;
3421 case NVPTXISD::TexUnified2DU32Float:
3422 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3423 break;
3424 case NVPTXISD::TexUnified2DU32FloatLevel:
3425 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3426 break;
3427 case NVPTXISD::TexUnified2DU32FloatGrad:
3428 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3429 break;
3430 case NVPTXISD::TexUnified2DArrayFloatS32:
3431 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3432 break;
3433 case NVPTXISD::TexUnified2DArrayFloatFloat:
3434 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3435 break;
3436 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3437 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3438 break;
3439 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3440 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3441 break;
3442 case NVPTXISD::TexUnified2DArrayS32S32:
3443 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3444 break;
3445 case NVPTXISD::TexUnified2DArrayS32Float:
3446 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3447 break;
3448 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3449 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3450 break;
3451 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3452 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3453 break;
3454 case NVPTXISD::TexUnified2DArrayU32S32:
3455 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3456 break;
3457 case NVPTXISD::TexUnified2DArrayU32Float:
3458 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3459 break;
3460 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3461 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3462 break;
3463 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3464 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3465 break;
3466 case NVPTXISD::TexUnified3DFloatS32:
3467 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3468 break;
3469 case NVPTXISD::TexUnified3DFloatFloat:
3470 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3471 break;
3472 case NVPTXISD::TexUnified3DFloatFloatLevel:
3473 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3474 break;
3475 case NVPTXISD::TexUnified3DFloatFloatGrad:
3476 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3477 break;
3478 case NVPTXISD::TexUnified3DS32S32:
3479 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3480 break;
3481 case NVPTXISD::TexUnified3DS32Float:
3482 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3483 break;
3484 case NVPTXISD::TexUnified3DS32FloatLevel:
3485 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3486 break;
3487 case NVPTXISD::TexUnified3DS32FloatGrad:
3488 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3489 break;
3490 case NVPTXISD::TexUnified3DU32S32:
3491 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3492 break;
3493 case NVPTXISD::TexUnified3DU32Float:
3494 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3495 break;
3496 case NVPTXISD::TexUnified3DU32FloatLevel:
3497 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3498 break;
3499 case NVPTXISD::TexUnified3DU32FloatGrad:
3500 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3501 break;
3502 case NVPTXISD::TexUnifiedCubeFloatFloat:
3503 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3504 break;
3505 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3506 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3507 break;
3508 case NVPTXISD::TexUnifiedCubeS32Float:
3509 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3510 break;
3511 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3512 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3513 break;
3514 case NVPTXISD::TexUnifiedCubeU32Float:
3515 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3516 break;
3517 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3518 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3519 break;
3520 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3521 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3522 break;
3523 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3524 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3525 break;
3526 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3527 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3528 break;
3529 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3530 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3531 break;
3532 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3533 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3534 break;
3535 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3536 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3537 break;
3538 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3539 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3540 break;
3541 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3542 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3543 break;
3544 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3545 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3546 break;
3547 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3548 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3549 break;
3550 case NVPTXISD::Tld4UnifiedR2DS64Float:
3551 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3552 break;
3553 case NVPTXISD::Tld4UnifiedG2DS64Float:
3554 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3555 break;
3556 case NVPTXISD::Tld4UnifiedB2DS64Float:
3557 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3558 break;
3559 case NVPTXISD::Tld4UnifiedA2DS64Float:
3560 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3561 break;
3562 case NVPTXISD::Tld4UnifiedR2DU64Float:
3563 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3564 break;
3565 case NVPTXISD::Tld4UnifiedG2DU64Float:
3566 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3567 break;
3568 case NVPTXISD::Tld4UnifiedB2DU64Float:
3569 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3570 break;
3571 case NVPTXISD::Tld4UnifiedA2DU64Float:
3572 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3573 break;
3574 }
3575
3576 // Copy over operands
3577 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3578 Ops.push_back(N->getOperand(i));
3579 }
3580
3581 Ops.push_back(Chain);
3582 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3583 return Ret;
3584 }
3585
SelectSurfaceIntrinsic(SDNode * N)3586 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3587 SDValue Chain = N->getOperand(0);
3588 SDValue TexHandle = N->getOperand(1);
3589 SDNode *Ret = nullptr;
3590 unsigned Opc = 0;
3591 SmallVector<SDValue, 8> Ops;
3592 switch (N->getOpcode()) {
3593 default: return nullptr;
3594 case NVPTXISD::Suld1DI8Clamp:
3595 Opc = NVPTX::SULD_1D_I8_CLAMP;
3596 Ops.push_back(TexHandle);
3597 Ops.push_back(N->getOperand(2));
3598 Ops.push_back(Chain);
3599 break;
3600 case NVPTXISD::Suld1DI16Clamp:
3601 Opc = NVPTX::SULD_1D_I16_CLAMP;
3602 Ops.push_back(TexHandle);
3603 Ops.push_back(N->getOperand(2));
3604 Ops.push_back(Chain);
3605 break;
3606 case NVPTXISD::Suld1DI32Clamp:
3607 Opc = NVPTX::SULD_1D_I32_CLAMP;
3608 Ops.push_back(TexHandle);
3609 Ops.push_back(N->getOperand(2));
3610 Ops.push_back(Chain);
3611 break;
3612 case NVPTXISD::Suld1DI64Clamp:
3613 Opc = NVPTX::SULD_1D_I64_CLAMP;
3614 Ops.push_back(TexHandle);
3615 Ops.push_back(N->getOperand(2));
3616 Ops.push_back(Chain);
3617 break;
3618 case NVPTXISD::Suld1DV2I8Clamp:
3619 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3620 Ops.push_back(TexHandle);
3621 Ops.push_back(N->getOperand(2));
3622 Ops.push_back(Chain);
3623 break;
3624 case NVPTXISD::Suld1DV2I16Clamp:
3625 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3626 Ops.push_back(TexHandle);
3627 Ops.push_back(N->getOperand(2));
3628 Ops.push_back(Chain);
3629 break;
3630 case NVPTXISD::Suld1DV2I32Clamp:
3631 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3632 Ops.push_back(TexHandle);
3633 Ops.push_back(N->getOperand(2));
3634 Ops.push_back(Chain);
3635 break;
3636 case NVPTXISD::Suld1DV2I64Clamp:
3637 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3638 Ops.push_back(TexHandle);
3639 Ops.push_back(N->getOperand(2));
3640 Ops.push_back(Chain);
3641 break;
3642 case NVPTXISD::Suld1DV4I8Clamp:
3643 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3644 Ops.push_back(TexHandle);
3645 Ops.push_back(N->getOperand(2));
3646 Ops.push_back(Chain);
3647 break;
3648 case NVPTXISD::Suld1DV4I16Clamp:
3649 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3650 Ops.push_back(TexHandle);
3651 Ops.push_back(N->getOperand(2));
3652 Ops.push_back(Chain);
3653 break;
3654 case NVPTXISD::Suld1DV4I32Clamp:
3655 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3656 Ops.push_back(TexHandle);
3657 Ops.push_back(N->getOperand(2));
3658 Ops.push_back(Chain);
3659 break;
3660 case NVPTXISD::Suld1DArrayI8Clamp:
3661 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3662 Ops.push_back(TexHandle);
3663 Ops.push_back(N->getOperand(2));
3664 Ops.push_back(N->getOperand(3));
3665 Ops.push_back(Chain);
3666 break;
3667 case NVPTXISD::Suld1DArrayI16Clamp:
3668 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3669 Ops.push_back(TexHandle);
3670 Ops.push_back(N->getOperand(2));
3671 Ops.push_back(N->getOperand(3));
3672 Ops.push_back(Chain);
3673 break;
3674 case NVPTXISD::Suld1DArrayI32Clamp:
3675 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3676 Ops.push_back(TexHandle);
3677 Ops.push_back(N->getOperand(2));
3678 Ops.push_back(N->getOperand(3));
3679 Ops.push_back(Chain);
3680 break;
3681 case NVPTXISD::Suld1DArrayI64Clamp:
3682 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3683 Ops.push_back(TexHandle);
3684 Ops.push_back(N->getOperand(2));
3685 Ops.push_back(N->getOperand(3));
3686 Ops.push_back(Chain);
3687 break;
3688 case NVPTXISD::Suld1DArrayV2I8Clamp:
3689 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3690 Ops.push_back(TexHandle);
3691 Ops.push_back(N->getOperand(2));
3692 Ops.push_back(N->getOperand(3));
3693 Ops.push_back(Chain);
3694 break;
3695 case NVPTXISD::Suld1DArrayV2I16Clamp:
3696 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3697 Ops.push_back(TexHandle);
3698 Ops.push_back(N->getOperand(2));
3699 Ops.push_back(N->getOperand(3));
3700 Ops.push_back(Chain);
3701 break;
3702 case NVPTXISD::Suld1DArrayV2I32Clamp:
3703 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3704 Ops.push_back(TexHandle);
3705 Ops.push_back(N->getOperand(2));
3706 Ops.push_back(N->getOperand(3));
3707 Ops.push_back(Chain);
3708 break;
3709 case NVPTXISD::Suld1DArrayV2I64Clamp:
3710 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3711 Ops.push_back(TexHandle);
3712 Ops.push_back(N->getOperand(2));
3713 Ops.push_back(N->getOperand(3));
3714 Ops.push_back(Chain);
3715 break;
3716 case NVPTXISD::Suld1DArrayV4I8Clamp:
3717 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3718 Ops.push_back(TexHandle);
3719 Ops.push_back(N->getOperand(2));
3720 Ops.push_back(N->getOperand(3));
3721 Ops.push_back(Chain);
3722 break;
3723 case NVPTXISD::Suld1DArrayV4I16Clamp:
3724 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3725 Ops.push_back(TexHandle);
3726 Ops.push_back(N->getOperand(2));
3727 Ops.push_back(N->getOperand(3));
3728 Ops.push_back(Chain);
3729 break;
3730 case NVPTXISD::Suld1DArrayV4I32Clamp:
3731 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3732 Ops.push_back(TexHandle);
3733 Ops.push_back(N->getOperand(2));
3734 Ops.push_back(N->getOperand(3));
3735 Ops.push_back(Chain);
3736 break;
3737 case NVPTXISD::Suld2DI8Clamp:
3738 Opc = NVPTX::SULD_2D_I8_CLAMP;
3739 Ops.push_back(TexHandle);
3740 Ops.push_back(N->getOperand(2));
3741 Ops.push_back(N->getOperand(3));
3742 Ops.push_back(Chain);
3743 break;
3744 case NVPTXISD::Suld2DI16Clamp:
3745 Opc = NVPTX::SULD_2D_I16_CLAMP;
3746 Ops.push_back(TexHandle);
3747 Ops.push_back(N->getOperand(2));
3748 Ops.push_back(N->getOperand(3));
3749 Ops.push_back(Chain);
3750 break;
3751 case NVPTXISD::Suld2DI32Clamp:
3752 Opc = NVPTX::SULD_2D_I32_CLAMP;
3753 Ops.push_back(TexHandle);
3754 Ops.push_back(N->getOperand(2));
3755 Ops.push_back(N->getOperand(3));
3756 Ops.push_back(Chain);
3757 break;
3758 case NVPTXISD::Suld2DI64Clamp:
3759 Opc = NVPTX::SULD_2D_I64_CLAMP;
3760 Ops.push_back(TexHandle);
3761 Ops.push_back(N->getOperand(2));
3762 Ops.push_back(N->getOperand(3));
3763 Ops.push_back(Chain);
3764 break;
3765 case NVPTXISD::Suld2DV2I8Clamp:
3766 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3767 Ops.push_back(TexHandle);
3768 Ops.push_back(N->getOperand(2));
3769 Ops.push_back(N->getOperand(3));
3770 Ops.push_back(Chain);
3771 break;
3772 case NVPTXISD::Suld2DV2I16Clamp:
3773 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3774 Ops.push_back(TexHandle);
3775 Ops.push_back(N->getOperand(2));
3776 Ops.push_back(N->getOperand(3));
3777 Ops.push_back(Chain);
3778 break;
3779 case NVPTXISD::Suld2DV2I32Clamp:
3780 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3781 Ops.push_back(TexHandle);
3782 Ops.push_back(N->getOperand(2));
3783 Ops.push_back(N->getOperand(3));
3784 Ops.push_back(Chain);
3785 break;
3786 case NVPTXISD::Suld2DV2I64Clamp:
3787 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3788 Ops.push_back(TexHandle);
3789 Ops.push_back(N->getOperand(2));
3790 Ops.push_back(N->getOperand(3));
3791 Ops.push_back(Chain);
3792 break;
3793 case NVPTXISD::Suld2DV4I8Clamp:
3794 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3795 Ops.push_back(TexHandle);
3796 Ops.push_back(N->getOperand(2));
3797 Ops.push_back(N->getOperand(3));
3798 Ops.push_back(Chain);
3799 break;
3800 case NVPTXISD::Suld2DV4I16Clamp:
3801 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3802 Ops.push_back(TexHandle);
3803 Ops.push_back(N->getOperand(2));
3804 Ops.push_back(N->getOperand(3));
3805 Ops.push_back(Chain);
3806 break;
3807 case NVPTXISD::Suld2DV4I32Clamp:
3808 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3809 Ops.push_back(TexHandle);
3810 Ops.push_back(N->getOperand(2));
3811 Ops.push_back(N->getOperand(3));
3812 Ops.push_back(Chain);
3813 break;
3814 case NVPTXISD::Suld2DArrayI8Clamp:
3815 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3816 Ops.push_back(TexHandle);
3817 Ops.push_back(N->getOperand(2));
3818 Ops.push_back(N->getOperand(3));
3819 Ops.push_back(N->getOperand(4));
3820 Ops.push_back(Chain);
3821 break;
3822 case NVPTXISD::Suld2DArrayI16Clamp:
3823 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3824 Ops.push_back(TexHandle);
3825 Ops.push_back(N->getOperand(2));
3826 Ops.push_back(N->getOperand(3));
3827 Ops.push_back(N->getOperand(4));
3828 Ops.push_back(Chain);
3829 break;
3830 case NVPTXISD::Suld2DArrayI32Clamp:
3831 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3832 Ops.push_back(TexHandle);
3833 Ops.push_back(N->getOperand(2));
3834 Ops.push_back(N->getOperand(3));
3835 Ops.push_back(N->getOperand(4));
3836 Ops.push_back(Chain);
3837 break;
3838 case NVPTXISD::Suld2DArrayI64Clamp:
3839 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3840 Ops.push_back(TexHandle);
3841 Ops.push_back(N->getOperand(2));
3842 Ops.push_back(N->getOperand(3));
3843 Ops.push_back(N->getOperand(4));
3844 Ops.push_back(Chain);
3845 break;
3846 case NVPTXISD::Suld2DArrayV2I8Clamp:
3847 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3848 Ops.push_back(TexHandle);
3849 Ops.push_back(N->getOperand(2));
3850 Ops.push_back(N->getOperand(3));
3851 Ops.push_back(N->getOperand(4));
3852 Ops.push_back(Chain);
3853 break;
3854 case NVPTXISD::Suld2DArrayV2I16Clamp:
3855 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3856 Ops.push_back(TexHandle);
3857 Ops.push_back(N->getOperand(2));
3858 Ops.push_back(N->getOperand(3));
3859 Ops.push_back(N->getOperand(4));
3860 Ops.push_back(Chain);
3861 break;
3862 case NVPTXISD::Suld2DArrayV2I32Clamp:
3863 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3864 Ops.push_back(TexHandle);
3865 Ops.push_back(N->getOperand(2));
3866 Ops.push_back(N->getOperand(3));
3867 Ops.push_back(N->getOperand(4));
3868 Ops.push_back(Chain);
3869 break;
3870 case NVPTXISD::Suld2DArrayV2I64Clamp:
3871 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3872 Ops.push_back(TexHandle);
3873 Ops.push_back(N->getOperand(2));
3874 Ops.push_back(N->getOperand(3));
3875 Ops.push_back(N->getOperand(4));
3876 Ops.push_back(Chain);
3877 break;
3878 case NVPTXISD::Suld2DArrayV4I8Clamp:
3879 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3880 Ops.push_back(TexHandle);
3881 Ops.push_back(N->getOperand(2));
3882 Ops.push_back(N->getOperand(3));
3883 Ops.push_back(N->getOperand(4));
3884 Ops.push_back(Chain);
3885 break;
3886 case NVPTXISD::Suld2DArrayV4I16Clamp:
3887 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3888 Ops.push_back(TexHandle);
3889 Ops.push_back(N->getOperand(2));
3890 Ops.push_back(N->getOperand(3));
3891 Ops.push_back(N->getOperand(4));
3892 Ops.push_back(Chain);
3893 break;
3894 case NVPTXISD::Suld2DArrayV4I32Clamp:
3895 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3896 Ops.push_back(TexHandle);
3897 Ops.push_back(N->getOperand(2));
3898 Ops.push_back(N->getOperand(3));
3899 Ops.push_back(N->getOperand(4));
3900 Ops.push_back(Chain);
3901 break;
3902 case NVPTXISD::Suld3DI8Clamp:
3903 Opc = NVPTX::SULD_3D_I8_CLAMP;
3904 Ops.push_back(TexHandle);
3905 Ops.push_back(N->getOperand(2));
3906 Ops.push_back(N->getOperand(3));
3907 Ops.push_back(N->getOperand(4));
3908 Ops.push_back(Chain);
3909 break;
3910 case NVPTXISD::Suld3DI16Clamp:
3911 Opc = NVPTX::SULD_3D_I16_CLAMP;
3912 Ops.push_back(TexHandle);
3913 Ops.push_back(N->getOperand(2));
3914 Ops.push_back(N->getOperand(3));
3915 Ops.push_back(N->getOperand(4));
3916 Ops.push_back(Chain);
3917 break;
3918 case NVPTXISD::Suld3DI32Clamp:
3919 Opc = NVPTX::SULD_3D_I32_CLAMP;
3920 Ops.push_back(TexHandle);
3921 Ops.push_back(N->getOperand(2));
3922 Ops.push_back(N->getOperand(3));
3923 Ops.push_back(N->getOperand(4));
3924 Ops.push_back(Chain);
3925 break;
3926 case NVPTXISD::Suld3DI64Clamp:
3927 Opc = NVPTX::SULD_3D_I64_CLAMP;
3928 Ops.push_back(TexHandle);
3929 Ops.push_back(N->getOperand(2));
3930 Ops.push_back(N->getOperand(3));
3931 Ops.push_back(N->getOperand(4));
3932 Ops.push_back(Chain);
3933 break;
3934 case NVPTXISD::Suld3DV2I8Clamp:
3935 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3936 Ops.push_back(TexHandle);
3937 Ops.push_back(N->getOperand(2));
3938 Ops.push_back(N->getOperand(3));
3939 Ops.push_back(N->getOperand(4));
3940 Ops.push_back(Chain);
3941 break;
3942 case NVPTXISD::Suld3DV2I16Clamp:
3943 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3944 Ops.push_back(TexHandle);
3945 Ops.push_back(N->getOperand(2));
3946 Ops.push_back(N->getOperand(3));
3947 Ops.push_back(N->getOperand(4));
3948 Ops.push_back(Chain);
3949 break;
3950 case NVPTXISD::Suld3DV2I32Clamp:
3951 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3952 Ops.push_back(TexHandle);
3953 Ops.push_back(N->getOperand(2));
3954 Ops.push_back(N->getOperand(3));
3955 Ops.push_back(N->getOperand(4));
3956 Ops.push_back(Chain);
3957 break;
3958 case NVPTXISD::Suld3DV2I64Clamp:
3959 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3960 Ops.push_back(TexHandle);
3961 Ops.push_back(N->getOperand(2));
3962 Ops.push_back(N->getOperand(3));
3963 Ops.push_back(N->getOperand(4));
3964 Ops.push_back(Chain);
3965 break;
3966 case NVPTXISD::Suld3DV4I8Clamp:
3967 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3968 Ops.push_back(TexHandle);
3969 Ops.push_back(N->getOperand(2));
3970 Ops.push_back(N->getOperand(3));
3971 Ops.push_back(N->getOperand(4));
3972 Ops.push_back(Chain);
3973 break;
3974 case NVPTXISD::Suld3DV4I16Clamp:
3975 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3976 Ops.push_back(TexHandle);
3977 Ops.push_back(N->getOperand(2));
3978 Ops.push_back(N->getOperand(3));
3979 Ops.push_back(N->getOperand(4));
3980 Ops.push_back(Chain);
3981 break;
3982 case NVPTXISD::Suld3DV4I32Clamp:
3983 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3984 Ops.push_back(TexHandle);
3985 Ops.push_back(N->getOperand(2));
3986 Ops.push_back(N->getOperand(3));
3987 Ops.push_back(N->getOperand(4));
3988 Ops.push_back(Chain);
3989 break;
3990 case NVPTXISD::Suld1DI8Trap:
3991 Opc = NVPTX::SULD_1D_I8_TRAP;
3992 Ops.push_back(TexHandle);
3993 Ops.push_back(N->getOperand(2));
3994 Ops.push_back(Chain);
3995 break;
3996 case NVPTXISD::Suld1DI16Trap:
3997 Opc = NVPTX::SULD_1D_I16_TRAP;
3998 Ops.push_back(TexHandle);
3999 Ops.push_back(N->getOperand(2));
4000 Ops.push_back(Chain);
4001 break;
4002 case NVPTXISD::Suld1DI32Trap:
4003 Opc = NVPTX::SULD_1D_I32_TRAP;
4004 Ops.push_back(TexHandle);
4005 Ops.push_back(N->getOperand(2));
4006 Ops.push_back(Chain);
4007 break;
4008 case NVPTXISD::Suld1DI64Trap:
4009 Opc = NVPTX::SULD_1D_I64_TRAP;
4010 Ops.push_back(TexHandle);
4011 Ops.push_back(N->getOperand(2));
4012 Ops.push_back(Chain);
4013 break;
4014 case NVPTXISD::Suld1DV2I8Trap:
4015 Opc = NVPTX::SULD_1D_V2I8_TRAP;
4016 Ops.push_back(TexHandle);
4017 Ops.push_back(N->getOperand(2));
4018 Ops.push_back(Chain);
4019 break;
4020 case NVPTXISD::Suld1DV2I16Trap:
4021 Opc = NVPTX::SULD_1D_V2I16_TRAP;
4022 Ops.push_back(TexHandle);
4023 Ops.push_back(N->getOperand(2));
4024 Ops.push_back(Chain);
4025 break;
4026 case NVPTXISD::Suld1DV2I32Trap:
4027 Opc = NVPTX::SULD_1D_V2I32_TRAP;
4028 Ops.push_back(TexHandle);
4029 Ops.push_back(N->getOperand(2));
4030 Ops.push_back(Chain);
4031 break;
4032 case NVPTXISD::Suld1DV2I64Trap:
4033 Opc = NVPTX::SULD_1D_V2I64_TRAP;
4034 Ops.push_back(TexHandle);
4035 Ops.push_back(N->getOperand(2));
4036 Ops.push_back(Chain);
4037 break;
4038 case NVPTXISD::Suld1DV4I8Trap:
4039 Opc = NVPTX::SULD_1D_V4I8_TRAP;
4040 Ops.push_back(TexHandle);
4041 Ops.push_back(N->getOperand(2));
4042 Ops.push_back(Chain);
4043 break;
4044 case NVPTXISD::Suld1DV4I16Trap:
4045 Opc = NVPTX::SULD_1D_V4I16_TRAP;
4046 Ops.push_back(TexHandle);
4047 Ops.push_back(N->getOperand(2));
4048 Ops.push_back(Chain);
4049 break;
4050 case NVPTXISD::Suld1DV4I32Trap:
4051 Opc = NVPTX::SULD_1D_V4I32_TRAP;
4052 Ops.push_back(TexHandle);
4053 Ops.push_back(N->getOperand(2));
4054 Ops.push_back(Chain);
4055 break;
4056 case NVPTXISD::Suld1DArrayI8Trap:
4057 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4058 Ops.push_back(TexHandle);
4059 Ops.push_back(N->getOperand(2));
4060 Ops.push_back(N->getOperand(3));
4061 Ops.push_back(Chain);
4062 break;
4063 case NVPTXISD::Suld1DArrayI16Trap:
4064 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4065 Ops.push_back(TexHandle);
4066 Ops.push_back(N->getOperand(2));
4067 Ops.push_back(N->getOperand(3));
4068 Ops.push_back(Chain);
4069 break;
4070 case NVPTXISD::Suld1DArrayI32Trap:
4071 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4072 Ops.push_back(TexHandle);
4073 Ops.push_back(N->getOperand(2));
4074 Ops.push_back(N->getOperand(3));
4075 Ops.push_back(Chain);
4076 break;
4077 case NVPTXISD::Suld1DArrayI64Trap:
4078 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4079 Ops.push_back(TexHandle);
4080 Ops.push_back(N->getOperand(2));
4081 Ops.push_back(N->getOperand(3));
4082 Ops.push_back(Chain);
4083 break;
4084 case NVPTXISD::Suld1DArrayV2I8Trap:
4085 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4086 Ops.push_back(TexHandle);
4087 Ops.push_back(N->getOperand(2));
4088 Ops.push_back(N->getOperand(3));
4089 Ops.push_back(Chain);
4090 break;
4091 case NVPTXISD::Suld1DArrayV2I16Trap:
4092 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4093 Ops.push_back(TexHandle);
4094 Ops.push_back(N->getOperand(2));
4095 Ops.push_back(N->getOperand(3));
4096 Ops.push_back(Chain);
4097 break;
4098 case NVPTXISD::Suld1DArrayV2I32Trap:
4099 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4100 Ops.push_back(TexHandle);
4101 Ops.push_back(N->getOperand(2));
4102 Ops.push_back(N->getOperand(3));
4103 Ops.push_back(Chain);
4104 break;
4105 case NVPTXISD::Suld1DArrayV2I64Trap:
4106 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4107 Ops.push_back(TexHandle);
4108 Ops.push_back(N->getOperand(2));
4109 Ops.push_back(N->getOperand(3));
4110 Ops.push_back(Chain);
4111 break;
4112 case NVPTXISD::Suld1DArrayV4I8Trap:
4113 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4114 Ops.push_back(TexHandle);
4115 Ops.push_back(N->getOperand(2));
4116 Ops.push_back(N->getOperand(3));
4117 Ops.push_back(Chain);
4118 break;
4119 case NVPTXISD::Suld1DArrayV4I16Trap:
4120 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4121 Ops.push_back(TexHandle);
4122 Ops.push_back(N->getOperand(2));
4123 Ops.push_back(N->getOperand(3));
4124 Ops.push_back(Chain);
4125 break;
4126 case NVPTXISD::Suld1DArrayV4I32Trap:
4127 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4128 Ops.push_back(TexHandle);
4129 Ops.push_back(N->getOperand(2));
4130 Ops.push_back(N->getOperand(3));
4131 Ops.push_back(Chain);
4132 break;
4133 case NVPTXISD::Suld2DI8Trap:
4134 Opc = NVPTX::SULD_2D_I8_TRAP;
4135 Ops.push_back(TexHandle);
4136 Ops.push_back(N->getOperand(2));
4137 Ops.push_back(N->getOperand(3));
4138 Ops.push_back(Chain);
4139 break;
4140 case NVPTXISD::Suld2DI16Trap:
4141 Opc = NVPTX::SULD_2D_I16_TRAP;
4142 Ops.push_back(TexHandle);
4143 Ops.push_back(N->getOperand(2));
4144 Ops.push_back(N->getOperand(3));
4145 Ops.push_back(Chain);
4146 break;
4147 case NVPTXISD::Suld2DI32Trap:
4148 Opc = NVPTX::SULD_2D_I32_TRAP;
4149 Ops.push_back(TexHandle);
4150 Ops.push_back(N->getOperand(2));
4151 Ops.push_back(N->getOperand(3));
4152 Ops.push_back(Chain);
4153 break;
4154 case NVPTXISD::Suld2DI64Trap:
4155 Opc = NVPTX::SULD_2D_I64_TRAP;
4156 Ops.push_back(TexHandle);
4157 Ops.push_back(N->getOperand(2));
4158 Ops.push_back(N->getOperand(3));
4159 Ops.push_back(Chain);
4160 break;
4161 case NVPTXISD::Suld2DV2I8Trap:
4162 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4163 Ops.push_back(TexHandle);
4164 Ops.push_back(N->getOperand(2));
4165 Ops.push_back(N->getOperand(3));
4166 Ops.push_back(Chain);
4167 break;
4168 case NVPTXISD::Suld2DV2I16Trap:
4169 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4170 Ops.push_back(TexHandle);
4171 Ops.push_back(N->getOperand(2));
4172 Ops.push_back(N->getOperand(3));
4173 Ops.push_back(Chain);
4174 break;
4175 case NVPTXISD::Suld2DV2I32Trap:
4176 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4177 Ops.push_back(TexHandle);
4178 Ops.push_back(N->getOperand(2));
4179 Ops.push_back(N->getOperand(3));
4180 Ops.push_back(Chain);
4181 break;
4182 case NVPTXISD::Suld2DV2I64Trap:
4183 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4184 Ops.push_back(TexHandle);
4185 Ops.push_back(N->getOperand(2));
4186 Ops.push_back(N->getOperand(3));
4187 Ops.push_back(Chain);
4188 break;
4189 case NVPTXISD::Suld2DV4I8Trap:
4190 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4191 Ops.push_back(TexHandle);
4192 Ops.push_back(N->getOperand(2));
4193 Ops.push_back(N->getOperand(3));
4194 Ops.push_back(Chain);
4195 break;
4196 case NVPTXISD::Suld2DV4I16Trap:
4197 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4198 Ops.push_back(TexHandle);
4199 Ops.push_back(N->getOperand(2));
4200 Ops.push_back(N->getOperand(3));
4201 Ops.push_back(Chain);
4202 break;
4203 case NVPTXISD::Suld2DV4I32Trap:
4204 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4205 Ops.push_back(TexHandle);
4206 Ops.push_back(N->getOperand(2));
4207 Ops.push_back(N->getOperand(3));
4208 Ops.push_back(Chain);
4209 break;
4210 case NVPTXISD::Suld2DArrayI8Trap:
4211 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4212 Ops.push_back(TexHandle);
4213 Ops.push_back(N->getOperand(2));
4214 Ops.push_back(N->getOperand(3));
4215 Ops.push_back(N->getOperand(4));
4216 Ops.push_back(Chain);
4217 break;
4218 case NVPTXISD::Suld2DArrayI16Trap:
4219 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4220 Ops.push_back(TexHandle);
4221 Ops.push_back(N->getOperand(2));
4222 Ops.push_back(N->getOperand(3));
4223 Ops.push_back(N->getOperand(4));
4224 Ops.push_back(Chain);
4225 break;
4226 case NVPTXISD::Suld2DArrayI32Trap:
4227 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4228 Ops.push_back(TexHandle);
4229 Ops.push_back(N->getOperand(2));
4230 Ops.push_back(N->getOperand(3));
4231 Ops.push_back(N->getOperand(4));
4232 Ops.push_back(Chain);
4233 break;
4234 case NVPTXISD::Suld2DArrayI64Trap:
4235 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4236 Ops.push_back(TexHandle);
4237 Ops.push_back(N->getOperand(2));
4238 Ops.push_back(N->getOperand(3));
4239 Ops.push_back(N->getOperand(4));
4240 Ops.push_back(Chain);
4241 break;
4242 case NVPTXISD::Suld2DArrayV2I8Trap:
4243 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4244 Ops.push_back(TexHandle);
4245 Ops.push_back(N->getOperand(2));
4246 Ops.push_back(N->getOperand(3));
4247 Ops.push_back(N->getOperand(4));
4248 Ops.push_back(Chain);
4249 break;
4250 case NVPTXISD::Suld2DArrayV2I16Trap:
4251 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4252 Ops.push_back(TexHandle);
4253 Ops.push_back(N->getOperand(2));
4254 Ops.push_back(N->getOperand(3));
4255 Ops.push_back(N->getOperand(4));
4256 Ops.push_back(Chain);
4257 break;
4258 case NVPTXISD::Suld2DArrayV2I32Trap:
4259 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4260 Ops.push_back(TexHandle);
4261 Ops.push_back(N->getOperand(2));
4262 Ops.push_back(N->getOperand(3));
4263 Ops.push_back(N->getOperand(4));
4264 Ops.push_back(Chain);
4265 break;
4266 case NVPTXISD::Suld2DArrayV2I64Trap:
4267 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4268 Ops.push_back(TexHandle);
4269 Ops.push_back(N->getOperand(2));
4270 Ops.push_back(N->getOperand(3));
4271 Ops.push_back(N->getOperand(4));
4272 Ops.push_back(Chain);
4273 break;
4274 case NVPTXISD::Suld2DArrayV4I8Trap:
4275 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4276 Ops.push_back(TexHandle);
4277 Ops.push_back(N->getOperand(2));
4278 Ops.push_back(N->getOperand(3));
4279 Ops.push_back(N->getOperand(4));
4280 Ops.push_back(Chain);
4281 break;
4282 case NVPTXISD::Suld2DArrayV4I16Trap:
4283 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4284 Ops.push_back(TexHandle);
4285 Ops.push_back(N->getOperand(2));
4286 Ops.push_back(N->getOperand(3));
4287 Ops.push_back(N->getOperand(4));
4288 Ops.push_back(Chain);
4289 break;
4290 case NVPTXISD::Suld2DArrayV4I32Trap:
4291 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4292 Ops.push_back(TexHandle);
4293 Ops.push_back(N->getOperand(2));
4294 Ops.push_back(N->getOperand(3));
4295 Ops.push_back(N->getOperand(4));
4296 Ops.push_back(Chain);
4297 break;
4298 case NVPTXISD::Suld3DI8Trap:
4299 Opc = NVPTX::SULD_3D_I8_TRAP;
4300 Ops.push_back(TexHandle);
4301 Ops.push_back(N->getOperand(2));
4302 Ops.push_back(N->getOperand(3));
4303 Ops.push_back(N->getOperand(4));
4304 Ops.push_back(Chain);
4305 break;
4306 case NVPTXISD::Suld3DI16Trap:
4307 Opc = NVPTX::SULD_3D_I16_TRAP;
4308 Ops.push_back(TexHandle);
4309 Ops.push_back(N->getOperand(2));
4310 Ops.push_back(N->getOperand(3));
4311 Ops.push_back(N->getOperand(4));
4312 Ops.push_back(Chain);
4313 break;
4314 case NVPTXISD::Suld3DI32Trap:
4315 Opc = NVPTX::SULD_3D_I32_TRAP;
4316 Ops.push_back(TexHandle);
4317 Ops.push_back(N->getOperand(2));
4318 Ops.push_back(N->getOperand(3));
4319 Ops.push_back(N->getOperand(4));
4320 Ops.push_back(Chain);
4321 break;
4322 case NVPTXISD::Suld3DI64Trap:
4323 Opc = NVPTX::SULD_3D_I64_TRAP;
4324 Ops.push_back(TexHandle);
4325 Ops.push_back(N->getOperand(2));
4326 Ops.push_back(N->getOperand(3));
4327 Ops.push_back(N->getOperand(4));
4328 Ops.push_back(Chain);
4329 break;
4330 case NVPTXISD::Suld3DV2I8Trap:
4331 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4332 Ops.push_back(TexHandle);
4333 Ops.push_back(N->getOperand(2));
4334 Ops.push_back(N->getOperand(3));
4335 Ops.push_back(N->getOperand(4));
4336 Ops.push_back(Chain);
4337 break;
4338 case NVPTXISD::Suld3DV2I16Trap:
4339 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4340 Ops.push_back(TexHandle);
4341 Ops.push_back(N->getOperand(2));
4342 Ops.push_back(N->getOperand(3));
4343 Ops.push_back(N->getOperand(4));
4344 Ops.push_back(Chain);
4345 break;
4346 case NVPTXISD::Suld3DV2I32Trap:
4347 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4348 Ops.push_back(TexHandle);
4349 Ops.push_back(N->getOperand(2));
4350 Ops.push_back(N->getOperand(3));
4351 Ops.push_back(N->getOperand(4));
4352 Ops.push_back(Chain);
4353 break;
4354 case NVPTXISD::Suld3DV2I64Trap:
4355 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4356 Ops.push_back(TexHandle);
4357 Ops.push_back(N->getOperand(2));
4358 Ops.push_back(N->getOperand(3));
4359 Ops.push_back(N->getOperand(4));
4360 Ops.push_back(Chain);
4361 break;
4362 case NVPTXISD::Suld3DV4I8Trap:
4363 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4364 Ops.push_back(TexHandle);
4365 Ops.push_back(N->getOperand(2));
4366 Ops.push_back(N->getOperand(3));
4367 Ops.push_back(N->getOperand(4));
4368 Ops.push_back(Chain);
4369 break;
4370 case NVPTXISD::Suld3DV4I16Trap:
4371 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4372 Ops.push_back(TexHandle);
4373 Ops.push_back(N->getOperand(2));
4374 Ops.push_back(N->getOperand(3));
4375 Ops.push_back(N->getOperand(4));
4376 Ops.push_back(Chain);
4377 break;
4378 case NVPTXISD::Suld3DV4I32Trap:
4379 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4380 Ops.push_back(TexHandle);
4381 Ops.push_back(N->getOperand(2));
4382 Ops.push_back(N->getOperand(3));
4383 Ops.push_back(N->getOperand(4));
4384 Ops.push_back(Chain);
4385 break;
4386 case NVPTXISD::Suld1DI8Zero:
4387 Opc = NVPTX::SULD_1D_I8_ZERO;
4388 Ops.push_back(TexHandle);
4389 Ops.push_back(N->getOperand(2));
4390 Ops.push_back(Chain);
4391 break;
4392 case NVPTXISD::Suld1DI16Zero:
4393 Opc = NVPTX::SULD_1D_I16_ZERO;
4394 Ops.push_back(TexHandle);
4395 Ops.push_back(N->getOperand(2));
4396 Ops.push_back(Chain);
4397 break;
4398 case NVPTXISD::Suld1DI32Zero:
4399 Opc = NVPTX::SULD_1D_I32_ZERO;
4400 Ops.push_back(TexHandle);
4401 Ops.push_back(N->getOperand(2));
4402 Ops.push_back(Chain);
4403 break;
4404 case NVPTXISD::Suld1DI64Zero:
4405 Opc = NVPTX::SULD_1D_I64_ZERO;
4406 Ops.push_back(TexHandle);
4407 Ops.push_back(N->getOperand(2));
4408 Ops.push_back(Chain);
4409 break;
4410 case NVPTXISD::Suld1DV2I8Zero:
4411 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4412 Ops.push_back(TexHandle);
4413 Ops.push_back(N->getOperand(2));
4414 Ops.push_back(Chain);
4415 break;
4416 case NVPTXISD::Suld1DV2I16Zero:
4417 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4418 Ops.push_back(TexHandle);
4419 Ops.push_back(N->getOperand(2));
4420 Ops.push_back(Chain);
4421 break;
4422 case NVPTXISD::Suld1DV2I32Zero:
4423 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4424 Ops.push_back(TexHandle);
4425 Ops.push_back(N->getOperand(2));
4426 Ops.push_back(Chain);
4427 break;
4428 case NVPTXISD::Suld1DV2I64Zero:
4429 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4430 Ops.push_back(TexHandle);
4431 Ops.push_back(N->getOperand(2));
4432 Ops.push_back(Chain);
4433 break;
4434 case NVPTXISD::Suld1DV4I8Zero:
4435 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4436 Ops.push_back(TexHandle);
4437 Ops.push_back(N->getOperand(2));
4438 Ops.push_back(Chain);
4439 break;
4440 case NVPTXISD::Suld1DV4I16Zero:
4441 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4442 Ops.push_back(TexHandle);
4443 Ops.push_back(N->getOperand(2));
4444 Ops.push_back(Chain);
4445 break;
4446 case NVPTXISD::Suld1DV4I32Zero:
4447 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4448 Ops.push_back(TexHandle);
4449 Ops.push_back(N->getOperand(2));
4450 Ops.push_back(Chain);
4451 break;
4452 case NVPTXISD::Suld1DArrayI8Zero:
4453 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4454 Ops.push_back(TexHandle);
4455 Ops.push_back(N->getOperand(2));
4456 Ops.push_back(N->getOperand(3));
4457 Ops.push_back(Chain);
4458 break;
4459 case NVPTXISD::Suld1DArrayI16Zero:
4460 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4461 Ops.push_back(TexHandle);
4462 Ops.push_back(N->getOperand(2));
4463 Ops.push_back(N->getOperand(3));
4464 Ops.push_back(Chain);
4465 break;
4466 case NVPTXISD::Suld1DArrayI32Zero:
4467 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4468 Ops.push_back(TexHandle);
4469 Ops.push_back(N->getOperand(2));
4470 Ops.push_back(N->getOperand(3));
4471 Ops.push_back(Chain);
4472 break;
4473 case NVPTXISD::Suld1DArrayI64Zero:
4474 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4475 Ops.push_back(TexHandle);
4476 Ops.push_back(N->getOperand(2));
4477 Ops.push_back(N->getOperand(3));
4478 Ops.push_back(Chain);
4479 break;
4480 case NVPTXISD::Suld1DArrayV2I8Zero:
4481 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4482 Ops.push_back(TexHandle);
4483 Ops.push_back(N->getOperand(2));
4484 Ops.push_back(N->getOperand(3));
4485 Ops.push_back(Chain);
4486 break;
4487 case NVPTXISD::Suld1DArrayV2I16Zero:
4488 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4489 Ops.push_back(TexHandle);
4490 Ops.push_back(N->getOperand(2));
4491 Ops.push_back(N->getOperand(3));
4492 Ops.push_back(Chain);
4493 break;
4494 case NVPTXISD::Suld1DArrayV2I32Zero:
4495 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4496 Ops.push_back(TexHandle);
4497 Ops.push_back(N->getOperand(2));
4498 Ops.push_back(N->getOperand(3));
4499 Ops.push_back(Chain);
4500 break;
4501 case NVPTXISD::Suld1DArrayV2I64Zero:
4502 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4503 Ops.push_back(TexHandle);
4504 Ops.push_back(N->getOperand(2));
4505 Ops.push_back(N->getOperand(3));
4506 Ops.push_back(Chain);
4507 break;
4508 case NVPTXISD::Suld1DArrayV4I8Zero:
4509 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4510 Ops.push_back(TexHandle);
4511 Ops.push_back(N->getOperand(2));
4512 Ops.push_back(N->getOperand(3));
4513 Ops.push_back(Chain);
4514 break;
4515 case NVPTXISD::Suld1DArrayV4I16Zero:
4516 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4517 Ops.push_back(TexHandle);
4518 Ops.push_back(N->getOperand(2));
4519 Ops.push_back(N->getOperand(3));
4520 Ops.push_back(Chain);
4521 break;
4522 case NVPTXISD::Suld1DArrayV4I32Zero:
4523 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4524 Ops.push_back(TexHandle);
4525 Ops.push_back(N->getOperand(2));
4526 Ops.push_back(N->getOperand(3));
4527 Ops.push_back(Chain);
4528 break;
4529 case NVPTXISD::Suld2DI8Zero:
4530 Opc = NVPTX::SULD_2D_I8_ZERO;
4531 Ops.push_back(TexHandle);
4532 Ops.push_back(N->getOperand(2));
4533 Ops.push_back(N->getOperand(3));
4534 Ops.push_back(Chain);
4535 break;
4536 case NVPTXISD::Suld2DI16Zero:
4537 Opc = NVPTX::SULD_2D_I16_ZERO;
4538 Ops.push_back(TexHandle);
4539 Ops.push_back(N->getOperand(2));
4540 Ops.push_back(N->getOperand(3));
4541 Ops.push_back(Chain);
4542 break;
4543 case NVPTXISD::Suld2DI32Zero:
4544 Opc = NVPTX::SULD_2D_I32_ZERO;
4545 Ops.push_back(TexHandle);
4546 Ops.push_back(N->getOperand(2));
4547 Ops.push_back(N->getOperand(3));
4548 Ops.push_back(Chain);
4549 break;
4550 case NVPTXISD::Suld2DI64Zero:
4551 Opc = NVPTX::SULD_2D_I64_ZERO;
4552 Ops.push_back(TexHandle);
4553 Ops.push_back(N->getOperand(2));
4554 Ops.push_back(N->getOperand(3));
4555 Ops.push_back(Chain);
4556 break;
4557 case NVPTXISD::Suld2DV2I8Zero:
4558 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4559 Ops.push_back(TexHandle);
4560 Ops.push_back(N->getOperand(2));
4561 Ops.push_back(N->getOperand(3));
4562 Ops.push_back(Chain);
4563 break;
4564 case NVPTXISD::Suld2DV2I16Zero:
4565 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4566 Ops.push_back(TexHandle);
4567 Ops.push_back(N->getOperand(2));
4568 Ops.push_back(N->getOperand(3));
4569 Ops.push_back(Chain);
4570 break;
4571 case NVPTXISD::Suld2DV2I32Zero:
4572 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4573 Ops.push_back(TexHandle);
4574 Ops.push_back(N->getOperand(2));
4575 Ops.push_back(N->getOperand(3));
4576 Ops.push_back(Chain);
4577 break;
4578 case NVPTXISD::Suld2DV2I64Zero:
4579 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4580 Ops.push_back(TexHandle);
4581 Ops.push_back(N->getOperand(2));
4582 Ops.push_back(N->getOperand(3));
4583 Ops.push_back(Chain);
4584 break;
4585 case NVPTXISD::Suld2DV4I8Zero:
4586 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4587 Ops.push_back(TexHandle);
4588 Ops.push_back(N->getOperand(2));
4589 Ops.push_back(N->getOperand(3));
4590 Ops.push_back(Chain);
4591 break;
4592 case NVPTXISD::Suld2DV4I16Zero:
4593 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4594 Ops.push_back(TexHandle);
4595 Ops.push_back(N->getOperand(2));
4596 Ops.push_back(N->getOperand(3));
4597 Ops.push_back(Chain);
4598 break;
4599 case NVPTXISD::Suld2DV4I32Zero:
4600 Opc = NVPTX::SULD_2D_V4I32_ZERO;
4601 Ops.push_back(TexHandle);
4602 Ops.push_back(N->getOperand(2));
4603 Ops.push_back(N->getOperand(3));
4604 Ops.push_back(Chain);
4605 break;
4606 case NVPTXISD::Suld2DArrayI8Zero:
4607 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4608 Ops.push_back(TexHandle);
4609 Ops.push_back(N->getOperand(2));
4610 Ops.push_back(N->getOperand(3));
4611 Ops.push_back(N->getOperand(4));
4612 Ops.push_back(Chain);
4613 break;
4614 case NVPTXISD::Suld2DArrayI16Zero:
4615 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4616 Ops.push_back(TexHandle);
4617 Ops.push_back(N->getOperand(2));
4618 Ops.push_back(N->getOperand(3));
4619 Ops.push_back(N->getOperand(4));
4620 Ops.push_back(Chain);
4621 break;
4622 case NVPTXISD::Suld2DArrayI32Zero:
4623 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4624 Ops.push_back(TexHandle);
4625 Ops.push_back(N->getOperand(2));
4626 Ops.push_back(N->getOperand(3));
4627 Ops.push_back(N->getOperand(4));
4628 Ops.push_back(Chain);
4629 break;
4630 case NVPTXISD::Suld2DArrayI64Zero:
4631 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4632 Ops.push_back(TexHandle);
4633 Ops.push_back(N->getOperand(2));
4634 Ops.push_back(N->getOperand(3));
4635 Ops.push_back(N->getOperand(4));
4636 Ops.push_back(Chain);
4637 break;
4638 case NVPTXISD::Suld2DArrayV2I8Zero:
4639 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4640 Ops.push_back(TexHandle);
4641 Ops.push_back(N->getOperand(2));
4642 Ops.push_back(N->getOperand(3));
4643 Ops.push_back(N->getOperand(4));
4644 Ops.push_back(Chain);
4645 break;
4646 case NVPTXISD::Suld2DArrayV2I16Zero:
4647 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4648 Ops.push_back(TexHandle);
4649 Ops.push_back(N->getOperand(2));
4650 Ops.push_back(N->getOperand(3));
4651 Ops.push_back(N->getOperand(4));
4652 Ops.push_back(Chain);
4653 break;
4654 case NVPTXISD::Suld2DArrayV2I32Zero:
4655 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4656 Ops.push_back(TexHandle);
4657 Ops.push_back(N->getOperand(2));
4658 Ops.push_back(N->getOperand(3));
4659 Ops.push_back(N->getOperand(4));
4660 Ops.push_back(Chain);
4661 break;
4662 case NVPTXISD::Suld2DArrayV2I64Zero:
4663 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4664 Ops.push_back(TexHandle);
4665 Ops.push_back(N->getOperand(2));
4666 Ops.push_back(N->getOperand(3));
4667 Ops.push_back(N->getOperand(4));
4668 Ops.push_back(Chain);
4669 break;
4670 case NVPTXISD::Suld2DArrayV4I8Zero:
4671 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4672 Ops.push_back(TexHandle);
4673 Ops.push_back(N->getOperand(2));
4674 Ops.push_back(N->getOperand(3));
4675 Ops.push_back(N->getOperand(4));
4676 Ops.push_back(Chain);
4677 break;
4678 case NVPTXISD::Suld2DArrayV4I16Zero:
4679 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4680 Ops.push_back(TexHandle);
4681 Ops.push_back(N->getOperand(2));
4682 Ops.push_back(N->getOperand(3));
4683 Ops.push_back(N->getOperand(4));
4684 Ops.push_back(Chain);
4685 break;
4686 case NVPTXISD::Suld2DArrayV4I32Zero:
4687 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4688 Ops.push_back(TexHandle);
4689 Ops.push_back(N->getOperand(2));
4690 Ops.push_back(N->getOperand(3));
4691 Ops.push_back(N->getOperand(4));
4692 Ops.push_back(Chain);
4693 break;
4694 case NVPTXISD::Suld3DI8Zero:
4695 Opc = NVPTX::SULD_3D_I8_ZERO;
4696 Ops.push_back(TexHandle);
4697 Ops.push_back(N->getOperand(2));
4698 Ops.push_back(N->getOperand(3));
4699 Ops.push_back(N->getOperand(4));
4700 Ops.push_back(Chain);
4701 break;
4702 case NVPTXISD::Suld3DI16Zero:
4703 Opc = NVPTX::SULD_3D_I16_ZERO;
4704 Ops.push_back(TexHandle);
4705 Ops.push_back(N->getOperand(2));
4706 Ops.push_back(N->getOperand(3));
4707 Ops.push_back(N->getOperand(4));
4708 Ops.push_back(Chain);
4709 break;
4710 case NVPTXISD::Suld3DI32Zero:
4711 Opc = NVPTX::SULD_3D_I32_ZERO;
4712 Ops.push_back(TexHandle);
4713 Ops.push_back(N->getOperand(2));
4714 Ops.push_back(N->getOperand(3));
4715 Ops.push_back(N->getOperand(4));
4716 Ops.push_back(Chain);
4717 break;
4718 case NVPTXISD::Suld3DI64Zero:
4719 Opc = NVPTX::SULD_3D_I64_ZERO;
4720 Ops.push_back(TexHandle);
4721 Ops.push_back(N->getOperand(2));
4722 Ops.push_back(N->getOperand(3));
4723 Ops.push_back(N->getOperand(4));
4724 Ops.push_back(Chain);
4725 break;
4726 case NVPTXISD::Suld3DV2I8Zero:
4727 Opc = NVPTX::SULD_3D_V2I8_ZERO;
4728 Ops.push_back(TexHandle);
4729 Ops.push_back(N->getOperand(2));
4730 Ops.push_back(N->getOperand(3));
4731 Ops.push_back(N->getOperand(4));
4732 Ops.push_back(Chain);
4733 break;
4734 case NVPTXISD::Suld3DV2I16Zero:
4735 Opc = NVPTX::SULD_3D_V2I16_ZERO;
4736 Ops.push_back(TexHandle);
4737 Ops.push_back(N->getOperand(2));
4738 Ops.push_back(N->getOperand(3));
4739 Ops.push_back(N->getOperand(4));
4740 Ops.push_back(Chain);
4741 break;
4742 case NVPTXISD::Suld3DV2I32Zero:
4743 Opc = NVPTX::SULD_3D_V2I32_ZERO;
4744 Ops.push_back(TexHandle);
4745 Ops.push_back(N->getOperand(2));
4746 Ops.push_back(N->getOperand(3));
4747 Ops.push_back(N->getOperand(4));
4748 Ops.push_back(Chain);
4749 break;
4750 case NVPTXISD::Suld3DV2I64Zero:
4751 Opc = NVPTX::SULD_3D_V2I64_ZERO;
4752 Ops.push_back(TexHandle);
4753 Ops.push_back(N->getOperand(2));
4754 Ops.push_back(N->getOperand(3));
4755 Ops.push_back(N->getOperand(4));
4756 Ops.push_back(Chain);
4757 break;
4758 case NVPTXISD::Suld3DV4I8Zero:
4759 Opc = NVPTX::SULD_3D_V4I8_ZERO;
4760 Ops.push_back(TexHandle);
4761 Ops.push_back(N->getOperand(2));
4762 Ops.push_back(N->getOperand(3));
4763 Ops.push_back(N->getOperand(4));
4764 Ops.push_back(Chain);
4765 break;
4766 case NVPTXISD::Suld3DV4I16Zero:
4767 Opc = NVPTX::SULD_3D_V4I16_ZERO;
4768 Ops.push_back(TexHandle);
4769 Ops.push_back(N->getOperand(2));
4770 Ops.push_back(N->getOperand(3));
4771 Ops.push_back(N->getOperand(4));
4772 Ops.push_back(Chain);
4773 break;
4774 case NVPTXISD::Suld3DV4I32Zero:
4775 Opc = NVPTX::SULD_3D_V4I32_ZERO;
4776 Ops.push_back(TexHandle);
4777 Ops.push_back(N->getOperand(2));
4778 Ops.push_back(N->getOperand(3));
4779 Ops.push_back(N->getOperand(4));
4780 Ops.push_back(Chain);
4781 break;
4782 }
4783 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4784 return Ret;
4785 }
4786
4787
4788 /// SelectBFE - Look for instruction sequences that can be made more efficient
4789 /// by using the 'bfe' (bit-field extract) PTX instruction
SelectBFE(SDNode * N)4790 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
4791 SDLoc DL(N);
4792 SDValue LHS = N->getOperand(0);
4793 SDValue RHS = N->getOperand(1);
4794 SDValue Len;
4795 SDValue Start;
4796 SDValue Val;
4797 bool IsSigned = false;
4798
4799 if (N->getOpcode() == ISD::AND) {
4800 // Canonicalize the operands
4801 // We want 'and %val, %mask'
4802 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4803 std::swap(LHS, RHS);
4804 }
4805
4806 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4807 if (!Mask) {
4808 // We need a constant mask on the RHS of the AND
4809 return NULL;
4810 }
4811
4812 // Extract the mask bits
4813 uint64_t MaskVal = Mask->getZExtValue();
4814 if (!isMask_64(MaskVal)) {
4815 // We *could* handle shifted masks here, but doing so would require an
4816 // 'and' operation to fix up the low-order bits so we would trade
4817 // shr+and for bfe+and, which has the same throughput
4818 return NULL;
4819 }
4820
4821 // How many bits are in our mask?
4822 uint64_t NumBits = countTrailingOnes(MaskVal);
4823 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4824
4825 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4826 // We have a 'srl/and' pair, extract the effective start bit and length
4827 Val = LHS.getNode()->getOperand(0);
4828 Start = LHS.getNode()->getOperand(1);
4829 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4830 if (StartConst) {
4831 uint64_t StartVal = StartConst->getZExtValue();
4832 // How many "good" bits do we have left? "good" is defined here as bits
4833 // that exist in the original value, not shifted in.
4834 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4835 if (NumBits > GoodBits) {
4836 // Do not handle the case where bits have been shifted in. In theory
4837 // we could handle this, but the cost is likely higher than just
4838 // emitting the srl/and pair.
4839 return NULL;
4840 }
4841 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
4842 } else {
4843 // Do not handle the case where the shift amount (can be zero if no srl
4844 // was found) is not constant. We could handle this case, but it would
4845 // require run-time logic that would be more expensive than just
4846 // emitting the srl/and pair.
4847 return NULL;
4848 }
4849 } else {
4850 // Do not handle the case where the LHS of the and is not a shift. While
4851 // it would be trivial to handle this case, it would just transform
4852 // 'and' -> 'bfe', but 'and' has higher-throughput.
4853 return NULL;
4854 }
4855 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4856 if (LHS->getOpcode() == ISD::AND) {
4857 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4858 if (!ShiftCnst) {
4859 // Shift amount must be constant
4860 return NULL;
4861 }
4862
4863 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4864
4865 SDValue AndLHS = LHS->getOperand(0);
4866 SDValue AndRHS = LHS->getOperand(1);
4867
4868 // Canonicalize the AND to have the mask on the RHS
4869 if (isa<ConstantSDNode>(AndLHS)) {
4870 std::swap(AndLHS, AndRHS);
4871 }
4872
4873 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4874 if (!MaskCnst) {
4875 // Mask must be constant
4876 return NULL;
4877 }
4878
4879 uint64_t MaskVal = MaskCnst->getZExtValue();
4880 uint64_t NumZeros;
4881 uint64_t NumBits;
4882 if (isMask_64(MaskVal)) {
4883 NumZeros = 0;
4884 // The number of bits in the result bitfield will be the number of
4885 // trailing ones (the AND) minus the number of bits we shift off
4886 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4887 } else if (isShiftedMask_64(MaskVal)) {
4888 NumZeros = countTrailingZeros(MaskVal);
4889 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4890 // The number of bits in the result bitfield will be the number of
4891 // trailing zeros plus the number of set bits in the mask minus the
4892 // number of bits we shift off
4893 NumBits = NumZeros + NumOnes - ShiftAmt;
4894 } else {
4895 // This is not a mask we can handle
4896 return NULL;
4897 }
4898
4899 if (ShiftAmt < NumZeros) {
4900 // Handling this case would require extra logic that would make this
4901 // transformation non-profitable
4902 return NULL;
4903 }
4904
4905 Val = AndLHS;
4906 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4907 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4908 } else if (LHS->getOpcode() == ISD::SHL) {
4909 // Here, we have a pattern like:
4910 //
4911 // (sra (shl val, NN), MM)
4912 // or
4913 // (srl (shl val, NN), MM)
4914 //
4915 // If MM >= NN, we can efficiently optimize this with bfe
4916 Val = LHS->getOperand(0);
4917
4918 SDValue ShlRHS = LHS->getOperand(1);
4919 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4920 if (!ShlCnst) {
4921 // Shift amount must be constant
4922 return NULL;
4923 }
4924 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4925
4926 SDValue ShrRHS = RHS;
4927 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4928 if (!ShrCnst) {
4929 // Shift amount must be constant
4930 return NULL;
4931 }
4932 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4933
4934 // To avoid extra codegen and be profitable, we need Outer >= Inner
4935 if (OuterShiftAmt < InnerShiftAmt) {
4936 return NULL;
4937 }
4938
4939 // If the outer shift is more than the type size, we have no bitfield to
4940 // extract (since we also check that the inner shift is <= the outer shift
4941 // then this also implies that the inner shift is < the type size)
4942 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4943 return NULL;
4944 }
4945
4946 Start =
4947 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
4948 Len =
4949 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
4950 OuterShiftAmt, DL, MVT::i32);
4951
4952 if (N->getOpcode() == ISD::SRA) {
4953 // If we have a arithmetic right shift, we need to use the signed bfe
4954 // variant
4955 IsSigned = true;
4956 }
4957 } else {
4958 // No can do...
4959 return NULL;
4960 }
4961 } else {
4962 // No can do...
4963 return NULL;
4964 }
4965
4966
4967 unsigned Opc;
4968 // For the BFE operations we form here from "and" and "srl", always use the
4969 // unsigned variants.
4970 if (Val.getValueType() == MVT::i32) {
4971 if (IsSigned) {
4972 Opc = NVPTX::BFE_S32rii;
4973 } else {
4974 Opc = NVPTX::BFE_U32rii;
4975 }
4976 } else if (Val.getValueType() == MVT::i64) {
4977 if (IsSigned) {
4978 Opc = NVPTX::BFE_S64rii;
4979 } else {
4980 Opc = NVPTX::BFE_U64rii;
4981 }
4982 } else {
4983 // We cannot handle this type
4984 return NULL;
4985 }
4986
4987 SDValue Ops[] = {
4988 Val, Start, Len
4989 };
4990
4991 return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops);
4992 }
4993
4994 // SelectDirectAddr - Match a direct address for DAG.
4995 // A direct address could be a globaladdress or externalsymbol.
SelectDirectAddr(SDValue N,SDValue & Address)4996 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4997 // Return true if TGA or ES.
4998 if (N.getOpcode() == ISD::TargetGlobalAddress ||
4999 N.getOpcode() == ISD::TargetExternalSymbol) {
5000 Address = N;
5001 return true;
5002 }
5003 if (N.getOpcode() == NVPTXISD::Wrapper) {
5004 Address = N.getOperand(0);
5005 return true;
5006 }
5007 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
5008 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
5009 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
5010 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
5011 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
5012 }
5013 return false;
5014 }
5015
5016 // symbol+offset
SelectADDRsi_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)5017 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
5018 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5019 if (Addr.getOpcode() == ISD::ADD) {
5020 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5021 SDValue base = Addr.getOperand(0);
5022 if (SelectDirectAddr(base, Base)) {
5023 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5024 mvt);
5025 return true;
5026 }
5027 }
5028 }
5029 return false;
5030 }
5031
5032 // symbol+offset
SelectADDRsi(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5033 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
5034 SDValue &Base, SDValue &Offset) {
5035 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
5036 }
5037
5038 // symbol+offset
SelectADDRsi64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5039 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
5040 SDValue &Base, SDValue &Offset) {
5041 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5042 }
5043
5044 // register+offset
SelectADDRri_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)5045 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5046 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5047 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5048 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5049 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
5050 return true;
5051 }
5052 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5053 Addr.getOpcode() == ISD::TargetGlobalAddress)
5054 return false; // direct calls.
5055
5056 if (Addr.getOpcode() == ISD::ADD) {
5057 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5058 return false;
5059 }
5060 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5061 if (FrameIndexSDNode *FIN =
5062 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5063 // Constant offset from frame ref.
5064 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5065 else
5066 Base = Addr.getOperand(0);
5067 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5068 mvt);
5069 return true;
5070 }
5071 }
5072 return false;
5073 }
5074
5075 // register+offset
SelectADDRri(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5076 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5077 SDValue &Base, SDValue &Offset) {
5078 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5079 }
5080
5081 // register+offset
SelectADDRri64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5082 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5083 SDValue &Base, SDValue &Offset) {
5084 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5085 }
5086
ChkMemSDNodeAddressSpace(SDNode * N,unsigned int spN) const5087 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5088 unsigned int spN) const {
5089 const Value *Src = nullptr;
5090 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5091 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5092 return true;
5093 Src = mN->getMemOperand()->getValue();
5094 }
5095 if (!Src)
5096 return false;
5097 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
5098 return (PT->getAddressSpace() == spN);
5099 return false;
5100 }
5101
5102 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5103 /// inline asm expressions.
SelectInlineAsmMemoryOperand(const SDValue & Op,unsigned ConstraintID,std::vector<SDValue> & OutOps)5104 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5105 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5106 SDValue Op0, Op1;
5107 switch (ConstraintID) {
5108 default:
5109 return true;
5110 case InlineAsm::Constraint_m: // memory
5111 if (SelectDirectAddr(Op, Op0)) {
5112 OutOps.push_back(Op0);
5113 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
5114 return false;
5115 }
5116 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5117 OutOps.push_back(Op0);
5118 OutOps.push_back(Op1);
5119 return false;
5120 }
5121 break;
5122 }
5123 return true;
5124 }
5125