1
2 #include "nv50_ir.h"
3 #include "nv50_ir_target.h"
4 #include "nv50_ir_build_util.h"
5
6 #include "nv50_ir_from_sm4.h"
7
8 // WTF: pass-through is implicit ??? check ReadWriteMask
9
10 namespace tgsi {
11
irSemantic(unsigned sn)12 static nv50_ir::SVSemantic irSemantic(unsigned sn)
13 {
14 switch (sn) {
15 case TGSI_SEMANTIC_POSITION: return nv50_ir::SV_POSITION;
16 case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE;
17 case NV50_SEMANTIC_LAYER: return nv50_ir::SV_LAYER;
18 case NV50_SEMANTIC_VIEWPORTINDEX: return nv50_ir::SV_VIEWPORT_INDEX;
19 case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE;
20 case NV50_SEMANTIC_CLIPDISTANCE: return nv50_ir::SV_CLIP_DISTANCE;
21 case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID;
22 case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
23 case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID;
24 case NV50_SEMANTIC_TESSFACTOR: return nv50_ir::SV_TESS_FACTOR;
25 case NV50_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD;
26 default:
27 return nv50_ir::SV_UNDEFINED;
28 }
29 }
30
31 } // namespace tgsi
32
33 namespace {
34
35 using namespace nv50_ir;
36
37 #define NV50_IR_MAX_RESOURCES 64
38
39 class Converter : public BuildUtil
40 {
41 public:
42 Converter(Program *, struct nv50_ir_prog_info *);
43 ~Converter();
44
45 private:
46 DataArray tData32;
47 DataArray tData64;
48 unsigned int nrRegVals;
49
50 DataArray *lData;
51 unsigned int nrArrays;
52 unsigned int arrayVol;
53
54 DataArray oData;
55
56 uint8_t interpMode[PIPE_MAX_SHADER_INPUTS];
57
58 // outputs for each phase
59 struct nv50_ir_varying out[3][PIPE_MAX_SHADER_OUTPUTS];
60
61 int phase;
62 int subPhaseCnt[2];
63 int subPhase;
64 unsigned int phaseStart;
65 unsigned int phaseInstance;
66 unsigned int *phaseInstCnt[2];
67 bool unrollPhase;
68 bool phaseInstanceUsed;
69 int phaseEnded; // (phase + 1) if $phase ended
70
71 bool finalized;
72
73 Value *srcPtr[3][3]; // for indirect addressing, save pointer values
74 Value *dstPtr[3];
75 Value *vtxBase[3]; // base address of vertex in a primitive (TP/GP)
76
77 Value *domainPt[3]; // pre-fetched TessCoord
78
79 unsigned int nDstOpnds;
80
81 Stack condBBs;
82 Stack joinBBs;
83 Stack loopBBs;
84 Stack breakBBs;
85 Stack entryBBs;
86 Stack leaveBBs;
87 Stack retIPs;
88
89 bool shadow[NV50_IR_MAX_RESOURCES];
90 TexTarget resourceType[NV50_IR_MAX_RESOURCES][2];
91
92 struct nv50_ir_prog_info& info;
93
94 Value *fragCoord[4];
95
96 public:
97 bool run();
98
99 private:
100 bool handleInstruction(unsigned int pos);
101 bool inspectInstruction(unsigned int pos);
102 bool handleDeclaration(const sm4_dcl& dcl);
103 bool inspectDeclaration(const sm4_dcl& dcl);
104 bool parseSignature();
105
106 bool haveNextPhase(unsigned int pos) const;
107
108 void allocateValues();
109 void exportOutputs();
110
111 void emitTex(Value *dst0[4], TexInstruction *, const uint8_t swizzle[4]);
112 void handleLOAD(Value *dst0[4]);
113 void handleSAMPLE(operation, Value *dst0[4]);
114 void handleQUERY(Value *dst0[4], enum TexQuery query);
115 void handleDP(Value *dst0[4], int dim);
116
117 Symbol *iSym(int i, int c);
118 Symbol *oSym(int i, int c);
119
120 Value *src(int i, int c);
121 Value *src(const sm4_op&, int c, int i);
122 Value *dst(int i, int c);
123 Value *dst(const sm4_op&, int c, int i);
124 void saveDst(int i, int c, Value *value);
125 void saveDst(const sm4_op&, int c, Value *value, int i);
126 void saveFragDepth(operation op, Value *value);
127
128 Value *interpolate(const sm4_op&, int c, int i);
129
130 Value *getSrcPtr(int s, int dim, int shl);
131 Value *getDstPtr(int d, int dim, int shl);
132 Value *getVtxPtr(int s);
133
134 bool checkDstSrcAliasing() const;
135 void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
136 void finalizeShader();
137
138 operation cvtOpcode(enum sm4_opcode op) const;
139 unsigned int getDstOpndCount(enum sm4_opcode opcode) const;
140
141 DataType inferSrcType(enum sm4_opcode op) const;
142 DataType inferDstType(enum sm4_opcode op) const;
143
144 unsigned g3dPrim(const unsigned prim, unsigned *patchSize = NULL) const;
145 CondCode cvtCondCode(enum sm4_opcode op) const;
146 RoundMode cvtRoundingMode(enum sm4_opcode op) const;
147 TexTarget cvtTexTarget(enum sm4_target,
148 enum sm4_opcode, operation *) const;
149 SVSemantic cvtSemantic(enum sm4_sv, uint8_t &index) const;
150 uint8_t cvtInterpMode(enum sm4_interpolation) const;
151
152 unsigned tgsiSemantic(SVSemantic, int index);
153 void recordSV(unsigned sn, unsigned si, unsigned mask, bool input);
154
155 private:
156 sm4_insn *insn;
157 DataType dTy, sTy;
158
159 const struct sm4_program& sm4;
160 Program *prog;
161 };
162
163 #define PRIM_CASE(a, b) \
164 case D3D_PRIMITIVE_TOPOLOGY_##a: return PIPE_PRIM_##b;
165
166 unsigned
g3dPrim(const unsigned prim,unsigned * patchSize) const167 Converter::g3dPrim(const unsigned prim, unsigned *patchSize) const
168 {
169 switch (prim) {
170 PRIM_CASE(UNDEFINED, POINTS);
171 PRIM_CASE(POINTLIST, POINTS);
172 PRIM_CASE(LINELIST, LINES);
173 PRIM_CASE(LINESTRIP, LINE_STRIP);
174 PRIM_CASE(TRIANGLELIST, TRIANGLES);
175 PRIM_CASE(TRIANGLESTRIP, TRIANGLE_STRIP);
176 PRIM_CASE(LINELIST_ADJ, LINES_ADJACENCY);
177 PRIM_CASE(LINESTRIP_ADJ, LINE_STRIP_ADJACENCY);
178 PRIM_CASE(TRIANGLELIST_ADJ, TRIANGLES_ADJACENCY);
179 PRIM_CASE(TRIANGLESTRIP_ADJ, TRIANGLES_ADJACENCY);
180 default:
181 if (prim < D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST ||
182 prim > D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST)
183 return PIPE_PRIM_POINTS;
184 if (patchSize)
185 *patchSize =
186 prim - D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + 1;
187 return NV50_PRIM_PATCHES;
188 }
189 }
190
191 #define IPM_CASE(n, a, b) \
192 case SM4_INTERPOLATION_##n: return NV50_IR_INTERP_##a | NV50_IR_INTERP_##b
193
194 uint8_t
cvtInterpMode(enum sm4_interpolation mode) const195 Converter::cvtInterpMode(enum sm4_interpolation mode) const
196 {
197 switch (mode) {
198 IPM_CASE(CONSTANT, FLAT, FLAT);
199 IPM_CASE(LINEAR, PERSPECTIVE, PERSPECTIVE);
200 IPM_CASE(LINEAR_CENTROID, PERSPECTIVE, CENTROID);
201 IPM_CASE(LINEAR_NOPERSPECTIVE, LINEAR, LINEAR);
202 IPM_CASE(LINEAR_NOPERSPECTIVE_CENTROID, LINEAR, CENTROID);
203 IPM_CASE(LINEAR_SAMPLE, PERSPECTIVE, OFFSET);
204 IPM_CASE(LINEAR_NOPERSPECTIVE_SAMPLE, LINEAR, OFFSET);
205 IPM_CASE(UNDEFINED, LINEAR, LINEAR);
206 default:
207 assert(!"invalid interpolation mode");
208 return 0;
209 }
210 }
211
212 static void
setVaryingInterpMode(struct nv50_ir_varying * var,uint8_t mode)213 setVaryingInterpMode(struct nv50_ir_varying *var, uint8_t mode)
214 {
215 switch (mode & NV50_IR_INTERP_MODE_MASK) {
216 case NV50_IR_INTERP_LINEAR:
217 var->linear = 1;
218 break;
219 case NV50_IR_INTERP_FLAT:
220 var->flat = 1;
221 break;
222 default:
223 break;
224 }
225 if (mode & NV50_IR_INTERP_CENTROID)
226 var->centroid = 1;
227 }
228
229 RoundMode
cvtRoundingMode(enum sm4_opcode op) const230 Converter::cvtRoundingMode(enum sm4_opcode op) const
231 {
232 switch (op) {
233 case SM4_OPCODE_ROUND_NE: return ROUND_NI;
234 case SM4_OPCODE_ROUND_NI: return ROUND_MI;
235 case SM4_OPCODE_ROUND_PI: return ROUND_PI;
236 case SM4_OPCODE_ROUND_Z: return ROUND_ZI;
237 default:
238 return ROUND_N;
239 }
240 }
241
242 CondCode
cvtCondCode(enum sm4_opcode op) const243 Converter::cvtCondCode(enum sm4_opcode op) const
244 {
245 switch (op) {
246 case SM4_OPCODE_EQ:
247 case SM4_OPCODE_DEQ:
248 case SM4_OPCODE_IEQ: return CC_EQ;
249 case SM4_OPCODE_GE:
250 case SM4_OPCODE_DGE:
251 case SM4_OPCODE_IGE:
252 case SM4_OPCODE_UGE: return CC_GE;
253 case SM4_OPCODE_LT:
254 case SM4_OPCODE_DLT:
255 case SM4_OPCODE_ILT:
256 case SM4_OPCODE_ULT: return CC_LT;
257 case SM4_OPCODE_NE:
258 case SM4_OPCODE_INE:
259 case SM4_OPCODE_DNE: return CC_NEU;
260 default:
261 return CC_ALWAYS;
262 }
263 }
264
265 DataType
inferSrcType(enum sm4_opcode op) const266 Converter::inferSrcType(enum sm4_opcode op) const
267 {
268 switch (op) {
269 case SM4_OPCODE_IADD:
270 case SM4_OPCODE_IEQ:
271 case SM4_OPCODE_IGE:
272 case SM4_OPCODE_ILT:
273 case SM4_OPCODE_IMAD:
274 case SM4_OPCODE_IMAX:
275 case SM4_OPCODE_IMIN:
276 case SM4_OPCODE_IMUL:
277 case SM4_OPCODE_INE:
278 case SM4_OPCODE_INEG:
279 case SM4_OPCODE_ISHL:
280 case SM4_OPCODE_ISHR:
281 case SM4_OPCODE_ITOF:
282 case SM4_OPCODE_ATOMIC_IADD:
283 case SM4_OPCODE_ATOMIC_IMAX:
284 case SM4_OPCODE_ATOMIC_IMIN:
285 return TYPE_S32;
286 case SM4_OPCODE_AND:
287 case SM4_OPCODE_NOT:
288 case SM4_OPCODE_OR:
289 case SM4_OPCODE_UDIV:
290 case SM4_OPCODE_ULT:
291 case SM4_OPCODE_UGE:
292 case SM4_OPCODE_UMUL:
293 case SM4_OPCODE_UMAD:
294 case SM4_OPCODE_UMAX:
295 case SM4_OPCODE_UMIN:
296 case SM4_OPCODE_USHR:
297 case SM4_OPCODE_UTOF:
298 case SM4_OPCODE_XOR:
299 case SM4_OPCODE_UADDC:
300 case SM4_OPCODE_USUBB:
301 case SM4_OPCODE_ATOMIC_AND:
302 case SM4_OPCODE_ATOMIC_OR:
303 case SM4_OPCODE_ATOMIC_XOR:
304 case SM4_OPCODE_ATOMIC_UMAX:
305 case SM4_OPCODE_ATOMIC_UMIN:
306 return TYPE_U32;
307 case SM4_OPCODE_DADD:
308 case SM4_OPCODE_DMAX:
309 case SM4_OPCODE_DMIN:
310 case SM4_OPCODE_DMUL:
311 case SM4_OPCODE_DEQ:
312 case SM4_OPCODE_DGE:
313 case SM4_OPCODE_DLT:
314 case SM4_OPCODE_DNE:
315 case SM4_OPCODE_DMOV:
316 case SM4_OPCODE_DMOVC:
317 case SM4_OPCODE_DTOF:
318 return TYPE_F64;
319 case SM4_OPCODE_F16TOF32:
320 return TYPE_F16;
321 default:
322 return TYPE_F32;
323 }
324 }
325
326 DataType
inferDstType(enum sm4_opcode op) const327 Converter::inferDstType(enum sm4_opcode op) const
328 {
329 switch (op) {
330 case SM4_OPCODE_FTOI:
331 return TYPE_S32;
332 case SM4_OPCODE_FTOU:
333 case SM4_OPCODE_EQ:
334 case SM4_OPCODE_GE:
335 case SM4_OPCODE_LT:
336 case SM4_OPCODE_NE:
337 return TYPE_U32;
338 case SM4_OPCODE_FTOD:
339 return TYPE_F64;
340 case SM4_OPCODE_F32TOF16:
341 return TYPE_F16;
342 case SM4_OPCODE_ITOF:
343 case SM4_OPCODE_UTOF:
344 case SM4_OPCODE_DTOF:
345 return TYPE_F32;
346 default:
347 return inferSrcType(op);
348 }
349 }
350
351 operation
cvtOpcode(enum sm4_opcode op) const352 Converter::cvtOpcode(enum sm4_opcode op) const
353 {
354 switch (op) {
355 case SM4_OPCODE_ADD: return OP_ADD;
356 case SM4_OPCODE_AND: return OP_AND;
357 case SM4_OPCODE_BREAK: return OP_BREAK;
358 case SM4_OPCODE_BREAKC: return OP_BREAK;
359 case SM4_OPCODE_CALL: return OP_CALL;
360 case SM4_OPCODE_CALLC: return OP_CALL;
361 case SM4_OPCODE_CASE: return OP_NOP;
362 case SM4_OPCODE_CONTINUE: return OP_CONT;
363 case SM4_OPCODE_CONTINUEC: return OP_CONT;
364 case SM4_OPCODE_CUT: return OP_RESTART;
365 case SM4_OPCODE_DEFAULT: return OP_NOP;
366 case SM4_OPCODE_DERIV_RTX: return OP_DFDX;
367 case SM4_OPCODE_DERIV_RTY: return OP_DFDY;
368 case SM4_OPCODE_DISCARD: return OP_DISCARD;
369 case SM4_OPCODE_DIV: return OP_DIV;
370 case SM4_OPCODE_DP2: return OP_MAD;
371 case SM4_OPCODE_DP3: return OP_MAD;
372 case SM4_OPCODE_DP4: return OP_MAD;
373 case SM4_OPCODE_ELSE: return OP_BRA;
374 case SM4_OPCODE_EMIT: return OP_EMIT;
375 case SM4_OPCODE_EMITTHENCUT: return OP_EMIT;
376 case SM4_OPCODE_ENDIF: return OP_BRA;
377 case SM4_OPCODE_ENDLOOP: return OP_PREBREAK;
378 case SM4_OPCODE_ENDSWITCH: return OP_NOP;
379 case SM4_OPCODE_EQ: return OP_SET;
380 case SM4_OPCODE_EXP: return OP_EX2;
381 case SM4_OPCODE_FRC: return OP_CVT;
382 case SM4_OPCODE_FTOI: return OP_CVT;
383 case SM4_OPCODE_FTOU: return OP_CVT;
384 case SM4_OPCODE_GE: return OP_SET;
385 case SM4_OPCODE_IADD: return OP_ADD;
386 case SM4_OPCODE_IF: return OP_BRA;
387 case SM4_OPCODE_IEQ: return OP_SET;
388 case SM4_OPCODE_IGE: return OP_SET;
389 case SM4_OPCODE_ILT: return OP_SET;
390 case SM4_OPCODE_IMAD: return OP_MAD;
391 case SM4_OPCODE_IMAX: return OP_MAX;
392 case SM4_OPCODE_IMIN: return OP_MIN;
393 case SM4_OPCODE_IMUL: return OP_MUL;
394 case SM4_OPCODE_INE: return OP_SET;
395 case SM4_OPCODE_INEG: return OP_NEG;
396 case SM4_OPCODE_ISHL: return OP_SHL;
397 case SM4_OPCODE_ISHR: return OP_SHR;
398 case SM4_OPCODE_ITOF: return OP_CVT;
399 case SM4_OPCODE_LD: return OP_TXF;
400 case SM4_OPCODE_LD_MS: return OP_TXF;
401 case SM4_OPCODE_LOG: return OP_LG2;
402 case SM4_OPCODE_LOOP: return OP_PRECONT;
403 case SM4_OPCODE_LT: return OP_SET;
404 case SM4_OPCODE_MAD: return OP_MAD;
405 case SM4_OPCODE_MIN: return OP_MIN;
406 case SM4_OPCODE_MAX: return OP_MAX;
407 case SM4_OPCODE_MOV: return OP_MOV;
408 case SM4_OPCODE_MOVC: return OP_MOV;
409 case SM4_OPCODE_MUL: return OP_MUL;
410 case SM4_OPCODE_NE: return OP_SET;
411 case SM4_OPCODE_NOP: return OP_NOP;
412 case SM4_OPCODE_NOT: return OP_NOT;
413 case SM4_OPCODE_OR: return OP_OR;
414 case SM4_OPCODE_RESINFO: return OP_TXQ;
415 case SM4_OPCODE_RET: return OP_RET;
416 case SM4_OPCODE_RETC: return OP_RET;
417 case SM4_OPCODE_ROUND_NE: return OP_CVT;
418 case SM4_OPCODE_ROUND_NI: return OP_FLOOR;
419 case SM4_OPCODE_ROUND_PI: return OP_CEIL;
420 case SM4_OPCODE_ROUND_Z: return OP_TRUNC;
421 case SM4_OPCODE_RSQ: return OP_RSQ;
422 case SM4_OPCODE_SAMPLE: return OP_TEX;
423 case SM4_OPCODE_SAMPLE_C: return OP_TEX;
424 case SM4_OPCODE_SAMPLE_C_LZ: return OP_TEX;
425 case SM4_OPCODE_SAMPLE_L: return OP_TXL;
426 case SM4_OPCODE_SAMPLE_D: return OP_TXD;
427 case SM4_OPCODE_SAMPLE_B: return OP_TXB;
428 case SM4_OPCODE_SQRT: return OP_SQRT;
429 case SM4_OPCODE_SWITCH: return OP_NOP;
430 case SM4_OPCODE_SINCOS: return OP_PRESIN;
431 case SM4_OPCODE_UDIV: return OP_DIV;
432 case SM4_OPCODE_ULT: return OP_SET;
433 case SM4_OPCODE_UGE: return OP_SET;
434 case SM4_OPCODE_UMUL: return OP_MUL;
435 case SM4_OPCODE_UMAD: return OP_MAD;
436 case SM4_OPCODE_UMAX: return OP_MAX;
437 case SM4_OPCODE_UMIN: return OP_MIN;
438 case SM4_OPCODE_USHR: return OP_SHR;
439 case SM4_OPCODE_UTOF: return OP_CVT;
440 case SM4_OPCODE_XOR: return OP_XOR;
441
442 case SM4_OPCODE_GATHER4: return OP_TXG;
443 case SM4_OPCODE_SAMPLE_POS: return OP_PIXLD;
444 case SM4_OPCODE_SAMPLE_INFO: return OP_PIXLD;
445 case SM4_OPCODE_EMIT_STREAM: return OP_EMIT;
446 case SM4_OPCODE_CUT_STREAM: return OP_RESTART;
447 case SM4_OPCODE_EMITTHENCUT_STREAM: return OP_EMIT;
448 case SM4_OPCODE_INTERFACE_CALL: return OP_CALL;
449 case SM4_OPCODE_BUFINFO: return OP_TXQ;
450 case SM4_OPCODE_DERIV_RTX_COARSE: return OP_DFDX;
451 case SM4_OPCODE_DERIV_RTX_FINE: return OP_DFDX;
452 case SM4_OPCODE_DERIV_RTY_COARSE: return OP_DFDY;
453 case SM4_OPCODE_DERIV_RTY_FINE: return OP_DFDY;
454 case SM4_OPCODE_GATHER4_C: return OP_TXG;
455 case SM4_OPCODE_GATHER4_PO: return OP_TXG;
456 case SM4_OPCODE_GATHER4_PO_C: return OP_TXG;
457
458 case SM4_OPCODE_RCP: return OP_RCP;
459 case SM4_OPCODE_F32TOF16: return OP_CVT;
460 case SM4_OPCODE_F16TOF32: return OP_CVT;
461 case SM4_OPCODE_UADDC: return OP_ADD;
462 case SM4_OPCODE_USUBB: return OP_SUB;
463 case SM4_OPCODE_COUNTBITS: return OP_POPCNT;
464
465 case SM4_OPCODE_ATOMIC_AND: return OP_AND;
466 case SM4_OPCODE_ATOMIC_OR: return OP_OR;
467 case SM4_OPCODE_ATOMIC_XOR: return OP_XOR;
468 case SM4_OPCODE_ATOMIC_CMP_STORE: return OP_STORE;
469 case SM4_OPCODE_ATOMIC_IADD: return OP_ADD;
470 case SM4_OPCODE_ATOMIC_IMAX: return OP_MAX;
471 case SM4_OPCODE_ATOMIC_IMIN: return OP_MIN;
472 case SM4_OPCODE_ATOMIC_UMAX: return OP_MAX;
473 case SM4_OPCODE_ATOMIC_UMIN: return OP_MIN;
474
475 case SM4_OPCODE_SYNC: return OP_MEMBAR;
476 case SM4_OPCODE_DADD: return OP_ADD;
477 case SM4_OPCODE_DMAX: return OP_MAX;
478 case SM4_OPCODE_DMIN: return OP_MIN;
479 case SM4_OPCODE_DMUL: return OP_MUL;
480 case SM4_OPCODE_DEQ: return OP_SET;
481 case SM4_OPCODE_DGE: return OP_SET;
482 case SM4_OPCODE_DLT: return OP_SET;
483 case SM4_OPCODE_DNE: return OP_SET;
484 case SM4_OPCODE_DMOV: return OP_MOV;
485 case SM4_OPCODE_DMOVC: return OP_MOV;
486 case SM4_OPCODE_DTOF: return OP_CVT;
487 case SM4_OPCODE_FTOD: return OP_CVT;
488
489 default:
490 return OP_NOP;
491 }
492 }
493
494 unsigned int
getDstOpndCount(enum sm4_opcode opcode) const495 Converter::getDstOpndCount(enum sm4_opcode opcode) const
496 {
497 switch (opcode) {
498 case SM4_OPCODE_SINCOS:
499 case SM4_OPCODE_UDIV:
500 case SM4_OPCODE_IMUL:
501 case SM4_OPCODE_UMUL:
502 return 2;
503 case SM4_OPCODE_BREAK:
504 case SM4_OPCODE_BREAKC:
505 case SM4_OPCODE_CALL:
506 case SM4_OPCODE_CALLC:
507 case SM4_OPCODE_CONTINUE:
508 case SM4_OPCODE_CONTINUEC:
509 case SM4_OPCODE_DISCARD:
510 case SM4_OPCODE_EMIT:
511 case SM4_OPCODE_EMIT_STREAM:
512 case SM4_OPCODE_CUT:
513 case SM4_OPCODE_CUT_STREAM:
514 case SM4_OPCODE_EMITTHENCUT:
515 case SM4_OPCODE_EMITTHENCUT_STREAM:
516 case SM4_OPCODE_IF:
517 case SM4_OPCODE_ELSE:
518 case SM4_OPCODE_ENDIF:
519 case SM4_OPCODE_LOOP:
520 case SM4_OPCODE_ENDLOOP:
521 case SM4_OPCODE_RET:
522 case SM4_OPCODE_RETC:
523 case SM4_OPCODE_SYNC:
524 case SM4_OPCODE_SWITCH:
525 case SM4_OPCODE_CASE:
526 case SM4_OPCODE_HS_DECLS:
527 case SM4_OPCODE_HS_CONTROL_POINT_PHASE:
528 case SM4_OPCODE_HS_FORK_PHASE:
529 case SM4_OPCODE_HS_JOIN_PHASE:
530 return 0;
531 default:
532 return 1;
533 }
534 }
535
536 #define TARG_CASE_1(a, b) case SM4_TARGET_##a: return TEX_TARGET_##b;
537 #define TARG_CASE_2(a, b) case SM4_TARGET_##a: \
538 return dc ? TEX_TARGET_##b##_SHADOW : TEX_TARGET_##b
539
540 TexTarget
cvtTexTarget(enum sm4_target targ,enum sm4_opcode op,operation * opr) const541 Converter::cvtTexTarget(enum sm4_target targ,
542 enum sm4_opcode op, operation *opr) const
543 {
544 bool dc = (op == SM4_OPCODE_SAMPLE_C ||
545 op == SM4_OPCODE_SAMPLE_C_LZ ||
546 op == SM4_OPCODE_GATHER4_C ||
547 op == SM4_OPCODE_GATHER4_PO_C);
548
549 if (opr) {
550 switch (targ) {
551 case SM4_TARGET_RAW_BUFFER: *opr = OP_LOAD; break;
552 case SM4_TARGET_STRUCTURED_BUFFER: *opr = OP_SULD; break;
553 default:
554 *opr = OP_TEX;
555 break;
556 }
557 }
558
559 switch (targ) {
560 TARG_CASE_1(UNKNOWN, 2D);
561 TARG_CASE_2(TEXTURE1D, 1D);
562 TARG_CASE_2(TEXTURE2D, 2D);
563 TARG_CASE_1(TEXTURE2DMS, 2D_MS);
564 TARG_CASE_1(TEXTURE3D, 3D);
565 TARG_CASE_2(TEXTURECUBE, CUBE);
566 TARG_CASE_2(TEXTURE1DARRAY, 1D_ARRAY);
567 TARG_CASE_2(TEXTURE2DARRAY, 2D_ARRAY);
568 TARG_CASE_1(TEXTURE2DMSARRAY, 2D_MS_ARRAY);
569 TARG_CASE_2(TEXTURECUBEARRAY, CUBE_ARRAY);
570 TARG_CASE_1(BUFFER, BUFFER);
571 TARG_CASE_1(RAW_BUFFER, BUFFER);
572 TARG_CASE_1(STRUCTURED_BUFFER, BUFFER);
573 default:
574 assert(!"invalid SM4 texture target");
575 return dc ? TEX_TARGET_2D_SHADOW : TEX_TARGET_2D;
576 }
577 }
578
579 static inline uint32_t
getSVIndex(enum sm4_sv sv)580 getSVIndex(enum sm4_sv sv)
581 {
582 switch (sv) {
583 case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return 0;
584 case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return 1;
585 case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return 2;
586 case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return 3;
587
588 case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR: return 4;
589 case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR: return 5;
590
591 case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return 0;
592 case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return 1;
593 case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return 2;
594
595 case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR: return 4;
596
597 case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR: return 0;
598
599 case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR: return 4;
600
601 default:
602 return 0;
603 }
604 }
605
606 SVSemantic
cvtSemantic(enum sm4_sv sv,uint8_t & idx) const607 Converter::cvtSemantic(enum sm4_sv sv, uint8_t &idx) const
608 {
609 idx = 0;
610
611 switch (sv) {
612 case SM4_SV_UNDEFINED: return SV_UNDEFINED;
613 case SM4_SV_POSITION: return SV_POSITION;
614 case SM4_SV_CLIP_DISTANCE: return SV_CLIP_DISTANCE;
615 case SM4_SV_CULL_DISTANCE: return SV_CLIP_DISTANCE; // XXX: distinction
616 case SM4_SV_RENDER_TARGET_ARRAY_INDEX: return SV_LAYER;
617 case SM4_SV_VIEWPORT_ARRAY_INDEX: return SV_VIEWPORT_INDEX;
618 case SM4_SV_VERTEX_ID: return SV_VERTEX_ID;
619 case SM4_SV_PRIMITIVE_ID: return SV_PRIMITIVE_ID;
620 case SM4_SV_INSTANCE_ID: return SV_INSTANCE_ID;
621 case SM4_SV_IS_FRONT_FACE: return SV_FACE;
622 case SM4_SV_SAMPLE_INDEX: return SV_SAMPLE_INDEX;
623
624 case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR:
625 case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR:
626 case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR:
627 case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR:
628 case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR:
629 case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR:
630 case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR:
631 case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR:
632 case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR:
633 case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR:
634 case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR:
635 case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR:
636 idx = getSVIndex(sv);
637 return SV_TESS_FACTOR;
638
639 default:
640 assert(!"invalid SM4 system value");
641 return SV_UNDEFINED;
642 }
643 }
644
645 unsigned
tgsiSemantic(SVSemantic sv,int index)646 Converter::tgsiSemantic(SVSemantic sv, int index)
647 {
648 switch (sv) {
649 case SV_POSITION: return TGSI_SEMANTIC_POSITION;
650 case SV_FACE: return TGSI_SEMANTIC_FACE;
651 case SV_LAYER: return NV50_SEMANTIC_LAYER;
652 case SV_VIEWPORT_INDEX: return NV50_SEMANTIC_VIEWPORTINDEX;
653 case SV_POINT_SIZE: return TGSI_SEMANTIC_PSIZE;
654 case SV_CLIP_DISTANCE: return NV50_SEMANTIC_CLIPDISTANCE;
655 case SV_VERTEX_ID: return TGSI_SEMANTIC_VERTEXID;
656 case SV_INSTANCE_ID: return TGSI_SEMANTIC_INSTANCEID;
657 case SV_PRIMITIVE_ID: return TGSI_SEMANTIC_PRIMID;
658 case SV_TESS_FACTOR: return NV50_SEMANTIC_TESSFACTOR;
659 case SV_TESS_COORD: return NV50_SEMANTIC_TESSCOORD;
660 case SV_INVOCATION_ID: return NV50_SEMANTIC_INVOCATIONID;
661 default:
662 return TGSI_SEMANTIC_GENERIC;
663 }
664 }
665
666 void
recordSV(unsigned sn,unsigned si,unsigned mask,bool input)667 Converter::recordSV(unsigned sn, unsigned si, unsigned mask, bool input)
668 {
669 unsigned int i;
670 for (i = 0; i < info.numSysVals; ++i)
671 if (info.sv[i].sn == sn &&
672 info.sv[i].si == si)
673 return;
674 info.numSysVals = i + 1;
675 info.sv[i].sn = sn;
676 info.sv[i].si = si;
677 info.sv[i].mask = mask;
678 info.sv[i].input = input ? 1 : 0;
679 }
680
681 bool
parseSignature()682 Converter::parseSignature()
683 {
684 struct nv50_ir_varying *patch;
685 unsigned int i, r, n;
686
687 info.numInputs = 0;
688 info.numOutputs = 0;
689 info.numPatchConstants = 0;
690
691 for (n = 0, i = 0; i < sm4.num_params_in; ++i) {
692 r = sm4.params_in[i].Register;
693
694 info.in[r].mask |= sm4.params_in[i].ReadWriteMask;
695 // mask might be uninitialized ...
696 if (!sm4.params_in[i].ReadWriteMask)
697 info.in[r].mask = 0xf;
698 info.in[r].id = r;
699 if (info.in[r].regular) // already assigned semantic name/index
700 continue;
701 info.in[r].regular = 1;
702 info.in[r].patch = 0;
703
704 info.numInputs = MAX2(info.numInputs, r + 1);
705
706 switch (sm4.params_in[i].SystemValueType) {
707 case D3D_NAME_UNDEFINED:
708 info.in[r].sn = TGSI_SEMANTIC_GENERIC;
709 info.in[r].si = n++;
710 break;
711 case D3D_NAME_POSITION:
712 info.in[r].sn = TGSI_SEMANTIC_POSITION;
713 break;
714 case D3D_NAME_VERTEX_ID:
715 info.in[r].sn = TGSI_SEMANTIC_VERTEXID;
716 break;
717 case D3D_NAME_PRIMITIVE_ID:
718 info.in[r].sn = TGSI_SEMANTIC_PRIMID;
719 // no corresponding output
720 recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true);
721 break;
722 case D3D_NAME_INSTANCE_ID:
723 info.in[r].sn = TGSI_SEMANTIC_INSTANCEID;
724 break;
725 case D3D_NAME_IS_FRONT_FACE:
726 info.in[r].sn = TGSI_SEMANTIC_FACE;
727 // no corresponding output
728 recordSV(TGSI_SEMANTIC_FACE, 0, 1, true);
729 break;
730 default:
731 assert(!"invalid/unsupported input linkage semantic");
732 break;
733 }
734 }
735
736 for (n = 0, i = 0; i < sm4.num_params_out; ++i) {
737 r = sm4.params_out[i].Register;
738
739 info.out[r].mask |= ~sm4.params_out[i].ReadWriteMask;
740 info.out[r].id = r;
741 if (info.out[r].regular) // already assigned semantic name/index
742 continue;
743 info.out[r].regular = 1;
744 info.out[r].patch = 0;
745
746 info.numOutputs = MAX2(info.numOutputs, r + 1);
747
748 switch (sm4.params_out[i].SystemValueType) {
749 case D3D_NAME_UNDEFINED:
750 if (prog->getType() == Program::TYPE_FRAGMENT) {
751 info.out[r].sn = TGSI_SEMANTIC_COLOR;
752 info.out[r].si = info.prop.fp.numColourResults++;
753 } else {
754 info.out[r].sn = TGSI_SEMANTIC_GENERIC;
755 info.out[r].si = n++;
756 }
757 break;
758 case D3D_NAME_POSITION:
759 case D3D_NAME_DEPTH:
760 case D3D_NAME_DEPTH_GREATER_EQUAL:
761 case D3D_NAME_DEPTH_LESS_EQUAL:
762 info.out[r].sn = TGSI_SEMANTIC_POSITION;
763 info.io.fragDepth = r;
764 break;
765 case D3D_NAME_CULL_DISTANCE:
766 case D3D_NAME_CLIP_DISTANCE:
767 info.out[r].sn = NV50_SEMANTIC_CLIPDISTANCE;
768 info.out[r].si = sm4.params_out[i].SemanticIndex;
769 break;
770 case D3D_NAME_RENDER_TARGET_ARRAY_INDEX:
771 info.out[r].sn = NV50_SEMANTIC_LAYER;
772 break;
773 case D3D_NAME_VIEWPORT_ARRAY_INDEX:
774 info.out[r].sn = NV50_SEMANTIC_VIEWPORTINDEX;
775 break;
776 case D3D_NAME_PRIMITIVE_ID:
777 info.out[r].sn = TGSI_SEMANTIC_PRIMID;
778 break;
779 case D3D_NAME_TARGET:
780 info.out[r].sn = TGSI_SEMANTIC_COLOR;
781 info.out[r].si = sm4.params_out[i].SemanticIndex;
782 break;
783 case D3D_NAME_COVERAGE:
784 info.out[r].sn = NV50_SEMANTIC_SAMPLEMASK;
785 info.io.sampleMask = r;
786 break;
787 case D3D_NAME_SAMPLE_INDEX:
788 default:
789 assert(!"invalid/unsupported output linkage semantic");
790 break;
791 }
792 }
793
794 if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
795 patch = &info.in[info.numInputs];
796 else
797 patch = &info.out[info.numOutputs];
798
799 for (n = 0, i = 0; i < sm4.num_params_patch; ++i) {
800 r = sm4.params_patch[i].Register;
801
802 patch[r].mask |= sm4.params_patch[i].Mask;
803 patch[r].id = r;
804 if (patch[r].regular) // already visited
805 continue;
806 patch[r].regular = 1;
807 patch[r].patch = 1;
808
809 info.numPatchConstants = MAX2(info.numPatchConstants, r + 1);
810
811 switch (sm4.params_patch[i].SystemValueType) {
812 case D3D_NAME_UNDEFINED:
813 patch[r].sn = TGSI_SEMANTIC_GENERIC;
814 patch[r].si = n++;
815 break;
816 case D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR:
817 case D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR:
818 case D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR:
819 patch[r].sn = NV50_SEMANTIC_TESSFACTOR;
820 patch[r].si = sm4.params_patch[i].SemanticIndex;
821 break;
822 case D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR:
823 case D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR:
824 case D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR:
825 patch[r].sn = NV50_SEMANTIC_TESSFACTOR;
826 patch[r].si = sm4.params_patch[i].SemanticIndex + 4;
827 break;
828 default:
829 assert(!"invalid patch-constant linkage semantic");
830 break;
831 }
832 }
833 if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
834 info.numInputs += info.numPatchConstants;
835 else
836 info.numOutputs += info.numPatchConstants;
837
838 return true;
839 }
840
841 bool
inspectDeclaration(const sm4_dcl & dcl)842 Converter::inspectDeclaration(const sm4_dcl& dcl)
843 {
844 int idx = -1;
845 enum sm4_interpolation ipa_mode;
846
847 if (dcl.op.get() && dcl.op->is_index_simple(0))
848 idx = dcl.op->indices[0].disp;
849
850 switch (dcl.opcode) {
851 case SM4_OPCODE_DCL_SAMPLER:
852 assert(idx >= 0);
853 shadow[idx] = dcl.dcl_sampler.shadow;
854 break;
855 case SM4_OPCODE_DCL_RESOURCE:
856 {
857 enum sm4_target targ = (enum sm4_target)dcl.dcl_resource.target;
858
859 assert(idx >= 0 && idx < NV50_IR_MAX_RESOURCES);
860 resourceType[idx][0] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE, NULL);
861 resourceType[idx][1] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE_C, NULL);
862 }
863 break;
864 case SM4_OPCODE_DCL_CONSTANT_BUFFER:
865 // nothing to do
866 break;
867 case SM4_OPCODE_CUSTOMDATA:
868 info.immd.bufSize = dcl.num * 4;
869 info.immd.buf = (uint32_t *)MALLOC(info.immd.bufSize);
870 memcpy(info.immd.buf, dcl.data, info.immd.bufSize);
871 break;
872 case SM4_OPCODE_DCL_INDEX_RANGE:
873 // XXX: ?
874 break;
875 case SM4_OPCODE_DCL_INPUT_PS_SGV:
876 case SM4_OPCODE_DCL_INPUT_PS_SIV:
877 case SM4_OPCODE_DCL_INPUT_PS:
878 {
879 assert(idx >= 0 && idx < info.numInputs);
880 ipa_mode = (enum sm4_interpolation)dcl.dcl_input_ps.interpolation;
881 interpMode[idx] = cvtInterpMode(ipa_mode);
882 setVaryingInterpMode(&info.in[idx], interpMode[idx]);
883 }
884 break;
885 case SM4_OPCODE_DCL_INPUT_SGV:
886 case SM4_OPCODE_DCL_INPUT_SIV:
887 case SM4_OPCODE_DCL_INPUT:
888 if (dcl.op->file == SM4_FILE_INPUT_DOMAIN_POINT) {
889 idx = info.numInputs++;
890 info.in[idx].sn = NV50_SEMANTIC_TESSCOORD;
891 info.in[idx].mask = dcl.op->mask;
892 }
893 // rest handled in parseSignature
894 break;
895 case SM4_OPCODE_DCL_OUTPUT_SGV:
896 case SM4_OPCODE_DCL_OUTPUT_SIV:
897 switch (dcl.sv) {
898 case SM4_SV_POSITION:
899 assert(prog->getType() != Program::TYPE_FRAGMENT);
900 break;
901 case SM4_SV_CULL_DISTANCE: // XXX: order ?
902 info.io.cullDistanceMask |= 1 << info.io.clipDistanceMask;
903 // fall through
904 case SM4_SV_CLIP_DISTANCE:
905 info.io.clipDistanceMask++; // abuse as count
906 break;
907 default:
908 break;
909 }
910 switch (dcl.op->file) {
911 case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
912 case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
913 case SM4_FILE_OUTPUT_DEPTH:
914 if (info.io.fragDepth < 0xff)
915 break;
916 idx = info.io.fragDepth = info.numOutputs++;
917 info.out[idx].sn = TGSI_SEMANTIC_POSITION;
918 break;
919 case SM4_FILE_OUTPUT_COVERAGE_MASK:
920 if (info.io.sampleMask < 0xff)
921 break;
922 idx = info.io.sampleMask = info.numOutputs++;
923 info.out[idx].sn = NV50_SEMANTIC_SAMPLEMASK;
924 break;
925 default:
926 break;
927 }
928 break;
929 case SM4_OPCODE_DCL_OUTPUT:
930 // handled in parseSignature
931 break;
932 case SM4_OPCODE_DCL_TEMPS:
933 nrRegVals += dcl.num;
934 break;
935 case SM4_OPCODE_DCL_INDEXABLE_TEMP:
936 nrArrays++;
937 break;
938 case SM4_OPCODE_DCL_GLOBAL_FLAGS:
939 if (prog->getType() == Program::TYPE_FRAGMENT)
940 info.prop.fp.earlyFragTests = dcl.dcl_global_flags.early_depth_stencil;
941 break;
942
943 case SM4_OPCODE_DCL_FUNCTION_BODY:
944 break;
945 case SM4_OPCODE_DCL_FUNCTION_TABLE:
946 break;
947 case SM4_OPCODE_DCL_INTERFACE:
948 break;
949
950 // GP
951 case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
952 info.prop.gp.outputPrim = g3dPrim(
953 dcl.dcl_gs_output_primitive_topology.primitive_topology);
954 break;
955 case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE:
956 info.prop.gp.inputPrim = g3dPrim(dcl.dcl_gs_input_primitive.primitive);
957 break;
958 case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
959 info.prop.gp.maxVertices = dcl.num;
960 break;
961 case SM4_OPCODE_DCL_GS_INSTANCE_COUNT:
962 info.prop.gp.instanceCount = dcl.num;
963 break;
964 case SM4_OPCODE_DCL_STREAM:
965 break;
966
967 // TCP/TEP
968 case SM4_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
969 info.prop.tp.inputPatchSize =
970 dcl.dcl_input_control_point_count.control_points;
971 break;
972 case SM4_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
973 info.prop.tp.outputPatchSize =
974 dcl.dcl_output_control_point_count.control_points;
975 break;
976 case SM4_OPCODE_DCL_TESS_DOMAIN:
977 switch (dcl.dcl_tess_domain.domain) {
978 case D3D_TESSELLATOR_DOMAIN_ISOLINE:
979 info.prop.tp.domain = PIPE_PRIM_LINES;
980 break;
981 case D3D_TESSELLATOR_DOMAIN_TRI:
982 info.prop.tp.domain = PIPE_PRIM_TRIANGLES;
983 break;
984 case D3D_TESSELLATOR_DOMAIN_QUAD:
985 info.prop.tp.domain = PIPE_PRIM_QUADS;
986 break;
987 case D3D_TESSELLATOR_DOMAIN_UNDEFINED:
988 default:
989 info.prop.tp.domain = PIPE_PRIM_MAX;
990 break;
991 }
992 break;
993 case SM4_OPCODE_DCL_TESS_PARTITIONING:
994 switch (dcl.dcl_tess_partitioning.partitioning) {
995 case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
996 info.prop.tp.partitioning = NV50_TESS_PART_FRACT_ODD;
997 break;
998 case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
999 info.prop.tp.partitioning = NV50_TESS_PART_FRACT_EVEN;
1000 break;
1001 case D3D_TESSELLATOR_PARTITIONING_POW2:
1002 info.prop.tp.partitioning = NV50_TESS_PART_POW2;
1003 break;
1004 case D3D_TESSELLATOR_PARTITIONING_INTEGER:
1005 case D3D_TESSELLATOR_PARTITIONING_UNDEFINED:
1006 default:
1007 info.prop.tp.partitioning = NV50_TESS_PART_INTEGER;
1008 break;
1009 }
1010 break;
1011 case SM4_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
1012 switch (dcl.dcl_tess_output_primitive.primitive) {
1013 case D3D_TESSELLATOR_OUTPUT_LINE:
1014 info.prop.tp.outputPrim = PIPE_PRIM_LINES;
1015 break;
1016 case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW:
1017 info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES;
1018 info.prop.tp.winding = +1;
1019 break;
1020 case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW:
1021 info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES;
1022 info.prop.tp.winding = -1;
1023 break;
1024 case D3D_TESSELLATOR_OUTPUT_POINT:
1025 info.prop.tp.outputPrim = PIPE_PRIM_POINTS;
1026 break;
1027 case D3D_TESSELLATOR_OUTPUT_UNDEFINED:
1028 default:
1029 info.prop.tp.outputPrim = PIPE_PRIM_MAX;
1030 break;
1031 }
1032 break;
1033
1034 case SM4_OPCODE_HS_FORK_PHASE:
1035 ++subPhaseCnt[0];
1036 phase = 1;
1037 break;
1038 case SM4_OPCODE_HS_JOIN_PHASE:
1039 phase = 2;
1040 ++subPhaseCnt[1];
1041 break;
1042 case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
1043 case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
1044 case SM4_OPCODE_DCL_HS_MAX_TESSFACTOR:
1045 break;
1046
1047 // weird stuff
1048 case SM4_OPCODE_DCL_THREAD_GROUP:
1049 case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
1050 case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
1051 case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
1052 case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
1053 case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
1054 case SM4_OPCODE_DCL_RESOURCE_RAW:
1055 case SM4_OPCODE_DCL_RESOURCE_STRUCTURED:
1056 ERROR("unhandled declaration\n");
1057 abort();
1058 return false;
1059
1060 default:
1061 assert(!"invalid SM4 declaration");
1062 return false;
1063 }
1064 return true;
1065 }
1066
1067 void
allocateValues()1068 Converter::allocateValues()
1069 {
1070 lData = new DataArray[nrArrays];
1071
1072 for (unsigned int i = 0; i < nrArrays; ++i)
1073 lData[i].setParent(this);
1074
1075 tData32.setup(0, nrRegVals, 4, 4, FILE_GPR);
1076 tData64.setup(0, nrRegVals, 2, 8, FILE_GPR);
1077
1078 if (prog->getType() == Program::TYPE_FRAGMENT)
1079 oData.setup(0, info.numOutputs, 4, 4, FILE_GPR);
1080 }
1081
handleDeclaration(const sm4_dcl & dcl)1082 bool Converter::handleDeclaration(const sm4_dcl& dcl)
1083 {
1084 switch (dcl.opcode) {
1085 case SM4_OPCODE_DCL_INDEXABLE_TEMP:
1086 lData[nrArrays++].setup(arrayVol,
1087 dcl.indexable_temp.num, dcl.indexable_temp.comps,
1088 4, FILE_MEMORY_LOCAL);
1089 arrayVol += dcl.indexable_temp.num * dcl.indexable_temp.comps * 4;
1090 break;
1091 case SM4_OPCODE_HS_FORK_PHASE:
1092 if (subPhaseCnt[0])
1093 phaseInstCnt[0][subPhaseCnt[0]] = phaseInstCnt[0][subPhaseCnt[0] - 1];
1094 ++subPhaseCnt[0];
1095 break;
1096 case SM4_OPCODE_HS_JOIN_PHASE:
1097 if (subPhaseCnt[1])
1098 phaseInstCnt[1][subPhaseCnt[1]] = phaseInstCnt[1][subPhaseCnt[1] - 1];
1099 ++subPhaseCnt[1];
1100 break;
1101 case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
1102 phaseInstCnt[0][subPhaseCnt[0] - 1] = dcl.num;
1103 break;
1104 case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
1105 phaseInstCnt[1][subPhaseCnt[1] - 1] = dcl.num;
1106 break;
1107
1108 default:
1109 break; // already handled in inspection
1110 }
1111
1112 return true;
1113 }
1114
1115 Symbol *
iSym(int i,int c)1116 Converter::iSym(int i, int c)
1117 {
1118 if (info.in[i].regular) {
1119 return mkSymbol(FILE_SHADER_INPUT, 0, sTy, info.in[i].slot[c] * 4);
1120 } else {
1121 return mkSysVal(tgsi::irSemantic(info.in[i].sn), info.in[i].si);
1122 }
1123 }
1124
1125 Symbol *
oSym(int i,int c)1126 Converter::oSym(int i, int c)
1127 {
1128 if (info.out[i].regular) {
1129 return mkSymbol(FILE_SHADER_OUTPUT, 0, dTy, info.out[i].slot[c] * 4);
1130 } else {
1131 return mkSysVal(tgsi::irSemantic(info.out[i].sn), info.out[i].si);
1132 }
1133 }
1134
1135 Value *
getSrcPtr(int s,int dim,int shl)1136 Converter::getSrcPtr(int s, int dim, int shl)
1137 {
1138 if (srcPtr[s][dim])
1139 return srcPtr[s][dim];
1140
1141 sm4_op *op = insn->ops[s + nDstOpnds]->indices[dim].reg.get();
1142
1143 if (!op)
1144 return NULL;
1145
1146 Value *index = src(*op, 0, s);
1147
1148 srcPtr[s][dim] = index;
1149 if (shl)
1150 srcPtr[s][dim] = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl));
1151 return srcPtr[s][dim];
1152 }
1153
1154 Value *
getDstPtr(int d,int dim,int shl)1155 Converter::getDstPtr(int d, int dim, int shl)
1156 {
1157 assert(d == 0);
1158 if (dstPtr[dim])
1159 return dstPtr[dim];
1160
1161 sm4_op *op = insn->ops[d]->indices[dim].reg.get();
1162 if (!op)
1163 return NULL;
1164
1165 Value *index = src(*op, 0, d);
1166 if (shl)
1167 index = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl));
1168
1169 return (dstPtr[dim] = index);
1170 }
1171
1172 Value *
getVtxPtr(int s)1173 Converter::getVtxPtr(int s)
1174 {
1175 assert(s < 3);
1176 if (vtxBase[s])
1177 return vtxBase[s];
1178
1179 sm4_op *op = insn->ops[s + nDstOpnds].get();
1180 if (!op)
1181 return NULL;
1182 int idx = op->indices[0].disp;
1183
1184 vtxBase[s] = getSrcPtr(s, 0, 0);
1185 vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(idx), vtxBase[s]);
1186 return vtxBase[s];
1187 }
1188
1189 Value *
src(int i,int c)1190 Converter::src(int i, int c)
1191 {
1192 return src(*insn->ops[i + nDstOpnds], c, i);
1193 }
1194
1195 Value *
dst(int i,int c)1196 Converter::dst(int i, int c)
1197 {
1198 return dst(*insn->ops[i], c, i);
1199 }
1200
1201 void
saveDst(int i,int c,Value * value)1202 Converter::saveDst(int i, int c, Value *value)
1203 {
1204 if (insn->insn.sat)
1205 mkOp1(OP_SAT, dTy, value, value);
1206 return saveDst(*insn->ops[i], c, value, i);
1207 }
1208
1209 Value *
interpolate(const sm4_op & op,int c,int i)1210 Converter::interpolate(const sm4_op& op, int c, int i)
1211 {
1212 int idx = op.indices[0].disp;
1213 int swz = op.swizzle[c];
1214 operation opr =
1215 (info.in[idx].linear || info.in[idx].flat) ? OP_LINTERP : OP_PINTERP;
1216
1217 Value *ptr = getSrcPtr(i, 0, 4);
1218
1219 Instruction *insn = new_Instruction(func, opr, TYPE_F32);
1220
1221 insn->setDef(0, getScratch());
1222 insn->setSrc(0, iSym(idx, swz));
1223 if (opr == OP_PINTERP)
1224 insn->setSrc(1, fragCoord[3]);
1225 if (ptr)
1226 insn->setIndirect(0, 0, ptr);
1227
1228 insn->setInterpolate(interpMode[idx]);
1229
1230 bb->insertTail(insn);
1231 return insn->getDef(0);
1232 }
1233
1234 Value *
src(const sm4_op & op,int c,int s)1235 Converter::src(const sm4_op& op, int c, int s)
1236 {
1237 const int size = typeSizeof(sTy);
1238
1239 Instruction *ld;
1240 Value *res, *ptr, *vtx;
1241 int idx, dim, off;
1242 const int swz = op.swizzle[c];
1243
1244 switch (op.file) {
1245 case SM4_FILE_IMMEDIATE32:
1246 res = loadImm(NULL, (uint32_t)op.imm_values[swz].u32);
1247 break;
1248 case SM4_FILE_IMMEDIATE64:
1249 assert(c < 2);
1250 res = loadImm(NULL, op.imm_values[swz].u64);
1251 break;
1252 case SM4_FILE_TEMP:
1253 assert(op.is_index_simple(0));
1254 idx = op.indices[0].disp;
1255 if (size == 8)
1256 res = tData64.load(idx, swz, NULL);
1257 else
1258 res = tData32.load(idx, swz, NULL);
1259 break;
1260 case SM4_FILE_INPUT:
1261 case SM4_FILE_INPUT_CONTROL_POINT:
1262 case SM4_FILE_INPUT_PATCH_CONSTANT:
1263 if (prog->getType() == Program::TYPE_FRAGMENT)
1264 return interpolate(op, c, s);
1265
1266 idx = 0;
1267 if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT)
1268 idx = info.numInputs - info.numPatchConstants;
1269
1270 if (op.num_indices == 2) {
1271 vtx = getVtxPtr(s);
1272 ptr = getSrcPtr(s, 1, 4);
1273 idx += op.indices[1].disp;
1274 res = getSSA();
1275 ld = mkOp1(OP_VFETCH, TYPE_U32, res, iSym(idx, swz));
1276 ld->setIndirect(0, 0, ptr);
1277 ld->setIndirect(0, 1, vtx);
1278 } else {
1279 idx += op.indices[0].disp;
1280 res = mkLoad(sTy, iSym(idx, swz), getSrcPtr(s, 0, 4));
1281 }
1282 if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT)
1283 res->defs->getInsn()->perPatch = 1;
1284 break;
1285 case SM4_FILE_CONSTANT_BUFFER:
1286 assert(op.num_indices == 2);
1287 assert(op.is_index_simple(0));
1288
1289 ptr = getSrcPtr(s, 1, 4);
1290 dim = op.indices[0].disp;
1291 off = (op.indices[1].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4);
1292
1293 res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, dim, sTy, off), ptr);
1294 break;
1295 case SM4_FILE_IMMEDIATE_CONSTANT_BUFFER:
1296 ptr = getSrcPtr(s, 0, 4);
1297 off = (op.indices[0].disp * 4 + swz) * 4;
1298 res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, 14, sTy, off), ptr);
1299 break;
1300 case SM4_FILE_INDEXABLE_TEMP:
1301 {
1302 assert(op.is_index_simple(0));
1303 int a = op.indices[0].disp;
1304 idx = op.indices[1].disp;
1305 res = lData[a].load(idx, swz, getSrcPtr(s, 1, 4));
1306 }
1307 break;
1308 case SM4_FILE_INPUT_PRIMITIVEID:
1309 recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true);
1310 res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
1311 break;
1312 case SM4_FILE_INPUT_GS_INSTANCE_ID:
1313 case SM4_FILE_OUTPUT_CONTROL_POINT_ID:
1314 recordSV(NV50_SEMANTIC_INVOCATIONID, 0, 1, true);
1315 res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0));
1316 break;
1317 case SM4_FILE_CYCLE_COUNTER:
1318 res =
1319 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CLOCK, swz ? 1 : 0));
1320 break;
1321 case SM4_FILE_INPUT_FORK_INSTANCE_ID:
1322 case SM4_FILE_INPUT_JOIN_INSTANCE_ID:
1323 {
1324 phaseInstanceUsed = true;
1325 if (unrollPhase)
1326 return loadImm(NULL, phaseInstance);
1327 const unsigned int cnt = phaseInstCnt[phase - 1][subPhase];
1328 res = getScratch();
1329 res = mkOp1v(OP_RDSV, TYPE_U32, res, mkSysVal(SV_INVOCATION_ID, 0));
1330 res = mkOp2v(OP_MIN, TYPE_U32, res, res, loadImm(NULL, cnt - 1));
1331 }
1332 break;
1333 case SM4_FILE_INPUT_DOMAIN_POINT:
1334 assert(swz < 3);
1335 res = domainPt[swz];
1336 break;
1337 case SM4_FILE_THREAD_GROUP_SHARED_MEMORY:
1338 off = (op.indices[0].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4);
1339 ptr = getSrcPtr(s, 0, 4);
1340 res = mkLoad(sTy, mkSymbol(FILE_MEMORY_SHARED, 0, sTy, off), ptr);
1341 break;
1342 case SM4_FILE_RESOURCE:
1343 case SM4_FILE_SAMPLER:
1344 case SM4_FILE_UNORDERED_ACCESS_VIEW:
1345 return NULL;
1346 case SM4_FILE_INPUT_THREAD_ID:
1347 res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_TID, swz));
1348 break;
1349 case SM4_FILE_INPUT_THREAD_GROUP_ID:
1350 res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CTAID, swz));
1351 break;
1352 case SM4_FILE_FUNCTION_INPUT:
1353 case SM4_FILE_INPUT_THREAD_ID_IN_GROUP:
1354 assert(!"unhandled source file");
1355 return NULL;
1356 default:
1357 assert(!"invalid source file");
1358 return NULL;
1359 }
1360
1361 if (op.abs)
1362 res = mkOp1v(OP_ABS, sTy, getSSA(res->reg.size), res);
1363 if (op.neg)
1364 res = mkOp1v(OP_NEG, sTy, getSSA(res->reg.size), res);
1365 return res;
1366 }
1367
1368 Value *
dst(const sm4_op & op,int c,int i)1369 Converter::dst(const sm4_op &op, int c, int i)
1370 {
1371 switch (op.file) {
1372 case SM4_FILE_TEMP:
1373 return tData32.acquire(op.indices[0].disp, c);
1374 case SM4_FILE_INDEXABLE_TEMP:
1375 return getScratch();
1376 case SM4_FILE_OUTPUT:
1377 if (prog->getType() == Program::TYPE_FRAGMENT)
1378 return oData.acquire(op.indices[0].disp, c);
1379 return getScratch();
1380 case SM4_FILE_NULL:
1381 return NULL;
1382 case SM4_FILE_OUTPUT_DEPTH:
1383 case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
1384 case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
1385 case SM4_FILE_OUTPUT_COVERAGE_MASK:
1386 return getScratch();
1387 case SM4_FILE_IMMEDIATE32:
1388 case SM4_FILE_IMMEDIATE64:
1389 case SM4_FILE_CONSTANT_BUFFER:
1390 case SM4_FILE_RESOURCE:
1391 case SM4_FILE_SAMPLER:
1392 case SM4_FILE_UNORDERED_ACCESS_VIEW:
1393 assert(!"invalid destination file");
1394 return NULL;
1395 default:
1396 assert(!"invalid file");
1397 return NULL;
1398 }
1399 }
1400
1401 void
saveFragDepth(operation op,Value * value)1402 Converter::saveFragDepth(operation op, Value *value)
1403 {
1404 if (op == OP_MIN || op == OP_MAX) {
1405 Value *zIn;
1406 zIn = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 2));
1407 value = mkOp2v(op, TYPE_F32, getSSA(), value, zIn);
1408 }
1409 oData.store(info.io.fragDepth, 2, NULL, value);
1410 }
1411
1412 void
saveDst(const sm4_op & op,int c,Value * value,int s)1413 Converter::saveDst(const sm4_op &op, int c, Value *value, int s)
1414 {
1415 Symbol *sym;
1416 Instruction *st;
1417 int a, idx;
1418
1419 switch (op.file) {
1420 case SM4_FILE_TEMP:
1421 idx = op.indices[0].disp;
1422 tData32.store(idx, c, NULL, value);
1423 break;
1424 case SM4_FILE_INDEXABLE_TEMP:
1425 a = op.indices[0].disp;
1426 idx = op.indices[1].disp;
1427 // FIXME: shift is wrong, depends in lData
1428 lData[a].store(idx, c, getDstPtr(s, 1, 4), value);
1429 break;
1430 case SM4_FILE_OUTPUT:
1431 assert(op.num_indices == 1);
1432 idx = op.indices[0].disp;
1433 if (prog->getType() == Program::TYPE_FRAGMENT) {
1434 oData.store(idx, c, NULL, value);
1435 } else {
1436 if (phase)
1437 idx += info.numOutputs - info.numPatchConstants;
1438 const int shl = (info.out[idx].sn == NV50_SEMANTIC_TESSFACTOR) ? 2 : 4;
1439 sym = oSym(idx, c);
1440 if (sym->reg.file == FILE_SHADER_OUTPUT)
1441 st = mkStore(OP_EXPORT, dTy, sym, getDstPtr(s, 0, shl), value);
1442 else
1443 st = mkStore(OP_WRSV, dTy, sym, getDstPtr(s, 0, 2), value);
1444 st->perPatch = phase ? 1 : 0;
1445 }
1446 break;
1447 case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
1448 saveFragDepth(OP_MAX, value);
1449 break;
1450 case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
1451 saveFragDepth(OP_MIN, value);
1452 break;
1453 case SM4_FILE_OUTPUT_DEPTH:
1454 saveFragDepth(OP_NOP, value);
1455 break;
1456 case SM4_FILE_OUTPUT_COVERAGE_MASK:
1457 oData.store(info.io.sampleMask, 0, NULL, value);
1458 break;
1459 case SM4_FILE_IMMEDIATE32:
1460 case SM4_FILE_IMMEDIATE64:
1461 case SM4_FILE_INPUT:
1462 case SM4_FILE_CONSTANT_BUFFER:
1463 case SM4_FILE_RESOURCE:
1464 case SM4_FILE_SAMPLER:
1465 assert(!"invalid destination file");
1466 return;
1467 default:
1468 assert(!"invalid file");
1469 return;
1470 }
1471 }
1472
1473 void
emitTex(Value * dst0[4],TexInstruction * tex,const uint8_t swz[4])1474 Converter::emitTex(Value *dst0[4], TexInstruction *tex, const uint8_t swz[4])
1475 {
1476 Value *res[4] = { NULL, NULL, NULL, NULL };
1477 unsigned int c, d;
1478
1479 for (c = 0; c < 4; ++c)
1480 if (dst0[c])
1481 tex->tex.mask |= 1 << swz[c];
1482 for (d = 0, c = 0; c < 4; ++c)
1483 if (tex->tex.mask & (1 << c))
1484 tex->setDef(d++, (res[c] = getScratch()));
1485
1486 bb->insertTail(tex);
1487
1488 if (insn->opcode == SM4_OPCODE_RESINFO) {
1489 if (tex->tex.target.getDim() == 1) {
1490 res[2] = loadImm(NULL, 0);
1491 if (!tex->tex.target.isArray())
1492 res[1] = res[2];
1493 } else
1494 if (tex->tex.target.getDim() == 2 && !tex->tex.target.isArray()) {
1495 res[2] = loadImm(NULL, 0);
1496 }
1497 for (c = 0; c < 4; ++c) {
1498 if (!dst0[c])
1499 continue;
1500 Value *src = res[swz[c]];
1501 assert(src);
1502 switch (insn->insn.resinfo_return_type) {
1503 case 0:
1504 mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src);
1505 break;
1506 case 1:
1507 mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src);
1508 if (swz[c] < tex->tex.target.getDim())
1509 mkOp1(OP_RCP, TYPE_F32, dst0[c], dst0[c]);
1510 break;
1511 default:
1512 mkMov(dst0[c], src);
1513 break;
1514 }
1515 }
1516 } else {
1517 for (c = 0; c < 4; ++c)
1518 if (dst0[c])
1519 mkMov(dst0[c], res[swz[c]]);
1520 }
1521 }
1522
1523 void
handleQUERY(Value * dst0[4],enum TexQuery query)1524 Converter::handleQUERY(Value *dst0[4], enum TexQuery query)
1525 {
1526 TexInstruction *texi = new_TexInstruction(func, OP_TXQ);
1527 texi->tex.query = query;
1528
1529 assert(insn->ops[2]->file == SM4_FILE_RESOURCE); // TODO: UAVs
1530
1531 const int rOp = (query == TXQ_DIMS) ? 2 : 1;
1532 const int sOp = (query == TXQ_DIMS) ? 0 : 1;
1533
1534 const int tR = insn->ops[rOp]->indices[0].disp;
1535
1536 texi->setTexture(resourceType[tR][0], tR, 0);
1537
1538 texi->setSrc(0, src(sOp, 0)); // mip level or sample index
1539
1540 emitTex(dst0, texi, insn->ops[rOp]->swizzle);
1541 }
1542
1543 void
handleLOAD(Value * dst0[4])1544 Converter::handleLOAD(Value *dst0[4])
1545 {
1546 TexInstruction *texi = new_TexInstruction(func, OP_TXF);
1547 unsigned int c;
1548
1549 const int tR = insn->ops[2]->indices[0].disp;
1550
1551 texi->setTexture(resourceType[tR][0], tR, 0);
1552
1553 for (c = 0; c < texi->tex.target.getArgCount(); ++c)
1554 texi->setSrc(c, src(0, c));
1555
1556 if (texi->tex.target == TEX_TARGET_BUFFER) {
1557 texi->tex.levelZero = true;
1558 } else {
1559 texi->setSrc(c++, src(0, 3));
1560 for (c = 0; c < 3; ++c) {
1561 texi->tex.offset[0][c] = insn->sample_offset[c];
1562 if (texi->tex.offset[0][c])
1563 texi->tex.useOffsets = 1;
1564 }
1565 }
1566
1567 emitTex(dst0, texi, insn->ops[2]->swizzle);
1568 }
1569
1570 // order of nv50 ir sources: x y z/layer lod/bias dc
1571 void
handleSAMPLE(operation opr,Value * dst0[4])1572 Converter::handleSAMPLE(operation opr, Value *dst0[4])
1573 {
1574 TexInstruction *texi = new_TexInstruction(func, opr);
1575 unsigned int c, s;
1576 Value *arg[4], *src0[4];
1577 Value *val;
1578 Value *lod = NULL, *dc = NULL;
1579
1580 const int tR = insn->ops[2]->indices[0].disp;
1581 const int tS = insn->ops[3]->indices[0].disp;
1582
1583 TexInstruction::Target tgt = resourceType[tR][shadow[tS] ? 1 : 0];
1584
1585 for (c = 0; c < tgt.getArgCount(); ++c)
1586 arg[c] = src0[c] = src(0, c);
1587
1588 if (insn->opcode == SM4_OPCODE_SAMPLE_L ||
1589 insn->opcode == SM4_OPCODE_SAMPLE_B) {
1590 lod = src(3, 0);
1591 } else
1592 if (insn->opcode == SM4_OPCODE_SAMPLE_C ||
1593 insn->opcode == SM4_OPCODE_SAMPLE_C_LZ) {
1594 dc = src(3, 0);
1595 if (insn->opcode == SM4_OPCODE_SAMPLE_C_LZ)
1596 texi->tex.levelZero = true;
1597 } else
1598 if (insn->opcode == SM4_OPCODE_SAMPLE_D) {
1599 for (c = 0; c < tgt.getDim(); ++c) {
1600 texi->dPdx[c] = src(3, c);
1601 texi->dPdy[c] = src(4, c);
1602 }
1603 }
1604
1605 if (tgt.isCube()) {
1606 for (c = 0; c < 3; ++c)
1607 src0[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
1608 val = getScratch();
1609 mkOp2(OP_MAX, TYPE_F32, val, src0[0], src0[1]);
1610 mkOp2(OP_MAX, TYPE_F32, val, src0[2], val);
1611 mkOp1(OP_RCP, TYPE_F32, val, val);
1612 for (c = 0; c < 3; ++c)
1613 src0[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
1614 }
1615
1616 for (s = 0; s < tgt.getArgCount(); ++s)
1617 texi->setSrc(s, src0[s]);
1618 if (lod)
1619 texi->setSrc(s++, lod);
1620 if (dc)
1621 texi->setSrc(s++, dc);
1622
1623 for (c = 0; c < 3; ++c) {
1624 texi->tex.offset[0][c] = insn->sample_offset[c];
1625 if (texi->tex.offset[0][c])
1626 texi->tex.useOffsets = 1;
1627 }
1628
1629 texi->setTexture(tgt, tR, tS);
1630
1631 emitTex(dst0, texi, insn->ops[2]->swizzle);
1632 }
1633
1634 void
handleDP(Value * dst0[4],int dim)1635 Converter::handleDP(Value *dst0[4], int dim)
1636 {
1637 Value *src0 = src(0, 0), *src1 = src(1, 0);
1638 Value *dotp = getScratch();
1639
1640 assert(dim > 0);
1641
1642 mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
1643 for (int c = 1; c < dim; ++c)
1644 mkOp3(OP_MAD, TYPE_F32, dotp, src(0, c), src(1, c), dotp);
1645
1646 for (int c = 0; c < 4; ++c)
1647 dst0[c] = dotp;
1648 }
1649
1650 void
insertConvergenceOps(BasicBlock * conv,BasicBlock * fork)1651 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
1652 {
1653 FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
1654 join->fixed = 1;
1655 conv->insertHead(join);
1656
1657 fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
1658 fork->insertBefore(fork->getExit(), fork->joinAt);
1659 }
1660
1661 void
finalizeShader()1662 Converter::finalizeShader()
1663 {
1664 if (finalized)
1665 return;
1666 BasicBlock *epilogue = reinterpret_cast<BasicBlock *>(leaveBBs.pop().u.p);
1667 entryBBs.pop();
1668
1669 finalized = true;
1670
1671 bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
1672 setPosition(epilogue, true);
1673
1674 if (prog->getType() == Program::TYPE_FRAGMENT)
1675 exportOutputs();
1676
1677 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1678 }
1679
1680 #define FOR_EACH_DST0_ENABLED_CHANNEL32(chan) \
1681 for ((chan) = 0; (chan) < 4; ++(chan)) \
1682 if (insn->ops[0].get()->mask & (1 << (chan)))
1683
1684 #define FOR_EACH_DST0_ENABLED_CHANNEL64(chan) \
1685 for ((chan) = 0; (chan) < 2; ++(chan)) \
1686 if (insn->ops[0].get()->mask & (1 << (chan)))
1687
1688 bool
checkDstSrcAliasing() const1689 Converter::checkDstSrcAliasing() const
1690 {
1691 for (unsigned int d = 0; d < nDstOpnds; ++d) {
1692 for (unsigned int s = nDstOpnds; s < insn->num_ops; ++s) {
1693 if (insn->ops[d]->file != insn->ops[s]->file)
1694 continue;
1695 int i = insn->ops[s]->num_indices - 1;
1696 if (i != insn->ops[d]->num_indices - 1)
1697 continue;
1698 if (insn->ops[d]->is_index_simple(i) &&
1699 insn->ops[s]->is_index_simple(i) &&
1700 insn->ops[d]->indices[i].disp == insn->ops[s]->indices[i].disp)
1701 return true;
1702 }
1703 }
1704 return false;
1705 }
1706
1707 bool
handleInstruction(unsigned int pos)1708 Converter::handleInstruction(unsigned int pos)
1709 {
1710 Value *dst0[4], *rDst0[4];
1711 Value *dst1[4], *rDst1[4];
1712 int c, nc;
1713
1714 insn = sm4.insns[pos];
1715 enum sm4_opcode opcode = static_cast<sm4_opcode>(insn->opcode);
1716
1717 operation op = cvtOpcode(opcode);
1718
1719 sTy = inferSrcType(opcode);
1720 dTy = inferDstType(opcode);
1721
1722 nc = dTy == TYPE_F64 ? 2 : 4;
1723
1724 nDstOpnds = getDstOpndCount(opcode);
1725
1726 bool useScratchDst = checkDstSrcAliasing();
1727
1728 INFO("SM4_OPCODE_##%u, aliasing = %u\n", insn->opcode, useScratchDst);
1729
1730 if (nDstOpnds >= 1) {
1731 for (c = 0; c < nc; ++c)
1732 rDst0[c] = dst0[c] =
1733 insn->ops[0].get()->mask & (1 << c) ? dst(0, c) : NULL;
1734 if (useScratchDst)
1735 for (c = 0; c < nc; ++c)
1736 dst0[c] = rDst0[c] ? getScratch() : NULL;
1737 }
1738
1739 if (nDstOpnds >= 2) {
1740 for (c = 0; c < nc; ++c)
1741 rDst1[c] = dst1[c] =
1742 insn->ops[1].get()->mask & (1 << c) ? dst(1, c) : NULL;
1743 if (useScratchDst)
1744 for (c = 0; c < nc; ++c)
1745 dst1[c] = rDst1[c] ? getScratch() : NULL;
1746 }
1747
1748 switch (insn->opcode) {
1749 case SM4_OPCODE_ADD:
1750 case SM4_OPCODE_AND:
1751 case SM4_OPCODE_DIV:
1752 case SM4_OPCODE_IADD:
1753 case SM4_OPCODE_IMAX:
1754 case SM4_OPCODE_IMIN:
1755 case SM4_OPCODE_MIN:
1756 case SM4_OPCODE_MAX:
1757 case SM4_OPCODE_MUL:
1758 case SM4_OPCODE_OR:
1759 case SM4_OPCODE_UMAX:
1760 case SM4_OPCODE_UMIN:
1761 case SM4_OPCODE_XOR:
1762 FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1763 Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
1764 if (dTy == TYPE_F32)
1765 insn->ftz = 1;
1766 }
1767 break;
1768
1769 case SM4_OPCODE_ISHL:
1770 case SM4_OPCODE_ISHR:
1771 case SM4_OPCODE_USHR:
1772 FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1773 Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
1774 insn->subOp = NV50_IR_SUBOP_SHIFT_WRAP;
1775 }
1776 break;
1777
1778 case SM4_OPCODE_IMAD:
1779 case SM4_OPCODE_MAD:
1780 case SM4_OPCODE_UMAD:
1781 FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1782 mkOp3(OP_MAD, dTy, dst0[c], src(0, c), src(1, c), src(2, c));
1783 }
1784 break;
1785
1786 case SM4_OPCODE_DADD:
1787 case SM4_OPCODE_DMAX:
1788 case SM4_OPCODE_DMIN:
1789 case SM4_OPCODE_DMUL:
1790 FOR_EACH_DST0_ENABLED_CHANNEL64(c) {
1791 mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
1792 }
1793 break;
1794
1795 case SM4_OPCODE_UDIV:
1796 for (c = 0; c < 4; ++c) {
1797 Value *dvn, *dvs;
1798 if (dst0[c] || dst1[c]) {
1799 dvn = src(0, c);
1800 dvs = src(1, c);
1801 }
1802 if (dst0[c])
1803 mkOp2(OP_DIV, TYPE_U32, dst0[c], dvn, dvs);
1804 if (dst1[c])
1805 mkOp2(OP_MOD, TYPE_U32, dst1[c], dvn, dvs);
1806 }
1807 break;
1808
1809 case SM4_OPCODE_IMUL:
1810 case SM4_OPCODE_UMUL:
1811 for (c = 0; c < 4; ++c) {
1812 Value *a, *b;
1813 if (dst0[c] || dst1[c]) {
1814 a = src(0, c);
1815 b = src(1, c);
1816 }
1817 if (dst0[c])
1818 mkOp2(OP_MUL, dTy, dst0[c], a, b)->subOp =
1819 NV50_IR_SUBOP_MUL_HIGH;
1820 if (dst1[c])
1821 mkOp2(OP_MUL, dTy, dst1[c], a, b);
1822 }
1823 break;
1824
1825 case SM4_OPCODE_DP2:
1826 handleDP(dst0, 2);
1827 break;
1828 case SM4_OPCODE_DP3:
1829 handleDP(dst0, 3);
1830 break;
1831 case SM4_OPCODE_DP4:
1832 handleDP(dst0, 4);
1833 break;
1834
1835 case SM4_OPCODE_DERIV_RTX:
1836 case SM4_OPCODE_DERIV_RTX_COARSE:
1837 case SM4_OPCODE_DERIV_RTX_FINE:
1838 case SM4_OPCODE_DERIV_RTY:
1839 case SM4_OPCODE_DERIV_RTY_COARSE:
1840 case SM4_OPCODE_DERIV_RTY_FINE:
1841 case SM4_OPCODE_MOV:
1842 case SM4_OPCODE_INEG:
1843 case SM4_OPCODE_NOT:
1844 case SM4_OPCODE_SQRT:
1845 case SM4_OPCODE_COUNTBITS:
1846 case SM4_OPCODE_EXP:
1847 case SM4_OPCODE_LOG:
1848 case SM4_OPCODE_RCP:
1849 FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1850 mkOp1(op, dTy, dst0[c], src(0, c));
1851 }
1852 break;
1853
1854 case SM4_OPCODE_FRC:
1855 FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1856 Value *val = getScratch();
1857 Value *src0 = src(0, c);
1858 mkOp1(OP_FLOOR, TYPE_F32, val, src0);
1859 mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val);
1860 }
1861 break;
1862
1863 case SM4_OPCODE_MOVC:
1864 FOR_EACH_DST0_ENABLED_CHANNEL32(c)
1865 mkCmp(OP_SLCT, CC_NE, TYPE_U32, dst0[c], src(1, c), src(2, c),
1866 src(0, c));
1867 break;
1868
1869 case SM4_OPCODE_ROUND_NE:
1870 case SM4_OPCODE_ROUND_NI:
1871 case SM4_OPCODE_ROUND_PI:
1872 case SM4_OPCODE_ROUND_Z:
1873 FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1874 Instruction *rnd = mkOp1(op, dTy, dst0[c], src(0, c));
1875 rnd->ftz = 1;
1876 rnd->rnd = cvtRoundingMode(opcode);
1877 }
1878 break;
1879
1880 case SM4_OPCODE_RSQ:
1881 FOR_EACH_DST0_ENABLED_CHANNEL32(c)
1882 mkOp1(op, dTy, dst0[c], src(0, c));
1883 break;
1884
1885 case SM4_OPCODE_SINCOS:
1886 for (c = 0; c < 4; ++c) {
1887 if (!dst0[c] && !dst1[c])
1888 continue;
1889 Value *val = mkOp1v(OP_PRESIN, TYPE_F32, getScratch(), src(0, c));
1890 if (dst0[c])
1891 mkOp1(OP_SIN, TYPE_F32, dst0[c], val);
1892 if (dst1[c])
1893 mkOp1(OP_COS, TYPE_F32, dst1[c], val);
1894 }
1895 break;
1896
1897 case SM4_OPCODE_EQ:
1898 case SM4_OPCODE_GE:
1899 case SM4_OPCODE_IEQ:
1900 case SM4_OPCODE_IGE:
1901 case SM4_OPCODE_ILT:
1902 case SM4_OPCODE_LT:
1903 case SM4_OPCODE_NE:
1904 case SM4_OPCODE_INE:
1905 case SM4_OPCODE_ULT:
1906 case SM4_OPCODE_UGE:
1907 case SM4_OPCODE_DEQ:
1908 case SM4_OPCODE_DGE:
1909 case SM4_OPCODE_DLT:
1910 case SM4_OPCODE_DNE:
1911 {
1912 CondCode cc = cvtCondCode(opcode);
1913 FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1914 CmpInstruction *set;
1915 set = mkCmp(op, cc, sTy, dst0[c], src(0, c), src(1, c), NULL);
1916 set->setType(dTy, sTy);
1917 if (sTy == TYPE_F32)
1918 set->ftz = 1;
1919 }
1920 }
1921 break;
1922
1923 case SM4_OPCODE_FTOI:
1924 case SM4_OPCODE_FTOU:
1925 FOR_EACH_DST0_ENABLED_CHANNEL32(c)
1926 mkCvt(op, dTy, dst0[c], sTy, src(0, c))->rnd = ROUND_Z;
1927 break;
1928 case SM4_OPCODE_ITOF:
1929 case SM4_OPCODE_UTOF:
1930 case SM4_OPCODE_F32TOF16:
1931 case SM4_OPCODE_F16TOF32:
1932 case SM4_OPCODE_DTOF:
1933 case SM4_OPCODE_FTOD:
1934 FOR_EACH_DST0_ENABLED_CHANNEL32(c)
1935 mkCvt(op, dTy, dst0[c], sTy, src(0, c));
1936 break;
1937
1938 case SM4_OPCODE_CUT:
1939 case SM4_OPCODE_CUT_STREAM:
1940 mkOp1(OP_RESTART, TYPE_U32, NULL, mkImm(0))->fixed = 1;
1941 break;
1942 case SM4_OPCODE_EMIT:
1943 case SM4_OPCODE_EMIT_STREAM:
1944 mkOp1(OP_EMIT, TYPE_U32, NULL, mkImm(0))->fixed = 1;
1945 break;
1946 case SM4_OPCODE_EMITTHENCUT:
1947 case SM4_OPCODE_EMITTHENCUT_STREAM:
1948 {
1949 Instruction *cut = mkOp1(OP_EMIT, TYPE_U32, NULL, mkImm(0));
1950 cut->fixed = 1;
1951 cut->subOp = NV50_IR_SUBOP_EMIT_RESTART;
1952 }
1953 break;
1954
1955 case SM4_OPCODE_DISCARD:
1956 info.prop.fp.usesDiscard = TRUE;
1957 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(
1958 insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0));
1959 break;
1960
1961 case SM4_OPCODE_CALL:
1962 case SM4_OPCODE_CALLC:
1963 assert(!"CALL/CALLC not implemented");
1964 break;
1965
1966 case SM4_OPCODE_RET:
1967 // XXX: the following doesn't work with subroutines / early ret
1968 if (!haveNextPhase(pos))
1969 finalizeShader();
1970 else
1971 phaseEnded = phase + 1;
1972 break;
1973
1974 case SM4_OPCODE_IF:
1975 {
1976 BasicBlock *ifClause = new BasicBlock(func);
1977
1978 bb->cfg.attach(&ifClause->cfg, Graph::Edge::TREE);
1979 condBBs.push(bb);
1980 joinBBs.push(bb);
1981
1982 mkFlow(OP_BRA, NULL, insn->insn.test_nz ? CC_NOT_P : CC_P, src(0, 0));
1983
1984 setPosition(ifClause, true);
1985 }
1986 break;
1987 case SM4_OPCODE_ELSE:
1988 {
1989 BasicBlock *elseClause = new BasicBlock(func);
1990 BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
1991
1992 forkPoint->cfg.attach(&elseClause->cfg, Graph::Edge::TREE);
1993 condBBs.push(bb);
1994
1995 forkPoint->getExit()->asFlow()->target.bb = elseClause;
1996 if (!bb->isTerminated())
1997 mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
1998
1999 setPosition(elseClause, true);
2000 }
2001 break;
2002 case SM4_OPCODE_ENDIF:
2003 {
2004 BasicBlock *convPoint = new BasicBlock(func);
2005 BasicBlock *lastBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
2006 BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
2007
2008 if (!bb->isTerminated()) {
2009 // we only want join if none of the clauses ended with CONT/BREAK/RET
2010 if (lastBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
2011 insertConvergenceOps(convPoint, forkPoint);
2012 mkFlow(OP_BRA, convPoint, CC_ALWAYS, NULL);
2013 bb->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD);
2014 }
2015
2016 if (lastBB->getExit()->op == OP_BRA) {
2017 lastBB->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD);
2018 lastBB->getExit()->asFlow()->target.bb = convPoint;
2019 }
2020 setPosition(convPoint, true);
2021 }
2022 break;
2023
2024 case SM4_OPCODE_SWITCH:
2025 case SM4_OPCODE_CASE:
2026 case SM4_OPCODE_ENDSWITCH:
2027 assert(!"SWITCH/CASE/ENDSWITCH not implemented");
2028 break;
2029
2030 case SM4_OPCODE_LOOP:
2031 {
2032 BasicBlock *loopHeader = new BasicBlock(func);
2033 BasicBlock *loopBreak = new BasicBlock(func);
2034
2035 loopBBs.push(loopHeader);
2036 breakBBs.push(loopBreak);
2037 if (loopBBs.getSize() > func->loopNestingBound)
2038 func->loopNestingBound++;
2039
2040 mkFlow(OP_PREBREAK, loopBreak, CC_ALWAYS, NULL);
2041
2042 bb->cfg.attach(&loopHeader->cfg, Graph::Edge::TREE);
2043 setPosition(loopHeader, true);
2044 mkFlow(OP_PRECONT, loopHeader, CC_ALWAYS, NULL);
2045 }
2046 break;
2047 case SM4_OPCODE_ENDLOOP:
2048 {
2049 BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
2050
2051 if (!bb->isTerminated()) {
2052 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
2053 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
2054 }
2055 setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
2056 }
2057 break;
2058 case SM4_OPCODE_BREAK:
2059 {
2060 if (bb->isTerminated())
2061 break;
2062 BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
2063 mkFlow(OP_BREAK, breakBB, CC_ALWAYS, NULL);
2064 bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS);
2065 }
2066 break;
2067 case SM4_OPCODE_BREAKC:
2068 {
2069 BasicBlock *nextBB = new BasicBlock(func);
2070 BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
2071 CondCode cc = insn->insn.test_nz ? CC_P : CC_NOT_P;
2072 mkFlow(OP_BREAK, breakBB, cc, src(0, 0));
2073 bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS);
2074 bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD);
2075 setPosition(nextBB, true);
2076 }
2077 break;
2078 case SM4_OPCODE_CONTINUE:
2079 {
2080 if (bb->isTerminated())
2081 break;
2082 BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
2083 mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
2084 contBB->explicitCont = true;
2085 bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
2086 }
2087 break;
2088 case SM4_OPCODE_CONTINUEC:
2089 {
2090 BasicBlock *nextBB = new BasicBlock(func);
2091 BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
2092 mkFlow(OP_CONT, contBB, insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0));
2093 bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
2094 bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD);
2095 setPosition(nextBB, true);
2096 }
2097 break;
2098
2099 case SM4_OPCODE_SAMPLE:
2100 case SM4_OPCODE_SAMPLE_C:
2101 case SM4_OPCODE_SAMPLE_C_LZ:
2102 case SM4_OPCODE_SAMPLE_L:
2103 case SM4_OPCODE_SAMPLE_D:
2104 case SM4_OPCODE_SAMPLE_B:
2105 handleSAMPLE(op, dst0);
2106 break;
2107 case SM4_OPCODE_LD:
2108 case SM4_OPCODE_LD_MS:
2109 handleLOAD(dst0);
2110 break;
2111
2112 case SM4_OPCODE_GATHER4:
2113 assert(!"GATHER4 not implemented\n");
2114 break;
2115
2116 case SM4_OPCODE_RESINFO:
2117 handleQUERY(dst0, TXQ_DIMS);
2118 break;
2119 case SM4_OPCODE_SAMPLE_POS:
2120 handleQUERY(dst0, TXQ_SAMPLE_POSITION);
2121 break;
2122
2123 case SM4_OPCODE_NOP:
2124 mkOp(OP_NOP, TYPE_NONE, NULL);
2125 break;
2126
2127 case SM4_OPCODE_HS_DECLS:
2128 // XXX: any significance ?
2129 break;
2130 case SM4_OPCODE_HS_CONTROL_POINT_PHASE:
2131 phase = 0;
2132 break;
2133 case SM4_OPCODE_HS_FORK_PHASE:
2134 if (phase != 1)
2135 subPhase = 0;
2136 phase = 1;
2137 phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0;
2138 phaseStart = pos;
2139 if (info.prop.tp.outputPatchSize < phaseInstCnt[0][subPhase])
2140 unrollPhase = true;
2141 break;
2142 case SM4_OPCODE_HS_JOIN_PHASE:
2143 if (phase != 2)
2144 subPhase = 0;
2145 phase = 2;
2146 phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0;
2147 phaseStart = pos;
2148 if (info.prop.tp.outputPatchSize < phaseInstCnt[1][subPhase])
2149 unrollPhase = true;
2150 break;
2151
2152 default:
2153 ERROR("SM4_OPCODE_#%u illegal / not supported\n", insn->opcode);
2154 abort();
2155 return false;
2156 }
2157
2158 for (c = 0; c < nc; ++c) {
2159 if (nDstOpnds >= 1 && rDst0[c]) {
2160 if (dst0[c] != rDst0[c])
2161 mkMov(rDst0[c], dst0[c]);
2162 saveDst(0, c, rDst0[c]);
2163 }
2164 if (nDstOpnds >= 2 && rDst1[c]) {
2165 if (dst1[c] != rDst1[c])
2166 mkMov(rDst1[c], dst1[c]);
2167 saveDst(1, c, rDst1[c]);
2168 }
2169 }
2170
2171 memset(srcPtr, 0, sizeof(srcPtr));
2172 memset(dstPtr, 0, sizeof(dstPtr));
2173 memset(vtxBase, 0, sizeof(vtxBase));
2174 return true;
2175 }
2176
2177 void
exportOutputs()2178 Converter::exportOutputs()
2179 {
2180 for (int i = 0; i < info.numOutputs; ++i) {
2181 for (int c = 0; c < 4; ++c) {
2182 if (!oData.exists(i, c))
2183 continue;
2184 Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
2185 info.out[i].slot[c] * 4);
2186 Value *val = oData.load(i, c, NULL);
2187 if (val)
2188 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
2189 }
2190 }
2191 }
2192
Converter(Program * p,struct nv50_ir_prog_info * s)2193 Converter::Converter(Program *p, struct nv50_ir_prog_info *s)
2194 : tData32(this),
2195 tData64(this),
2196 oData(this),
2197 info(*s),
2198 sm4(*reinterpret_cast<const sm4_program *>(s->bin.source)),
2199 prog(p)
2200 {
2201 memset(srcPtr, 0, sizeof(srcPtr));
2202 memset(dstPtr, 0, sizeof(dstPtr));
2203 memset(vtxBase, 0, sizeof(vtxBase));
2204
2205 memset(interpMode, 0, sizeof(interpMode));
2206
2207 nrRegVals = nrArrays = arrayVol = 0;
2208
2209 for (phase = 3; phase > 0; --phase)
2210 for (unsigned int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
2211 out[phase - 1][i].sn = TGSI_SEMANTIC_COUNT;
2212
2213 unrollPhase = false;
2214 phaseStart = 0;
2215 subPhaseCnt[0] = subPhaseCnt[1] = 0;
2216 }
2217
~Converter()2218 Converter::~Converter()
2219 {
2220 if (lData)
2221 delete[] lData;
2222
2223 if (subPhaseCnt[0])
2224 delete[] phaseInstCnt[0];
2225 if (subPhaseCnt[1])
2226 delete[] phaseInstCnt[1];
2227 }
2228
2229 bool
haveNextPhase(unsigned int pos) const2230 Converter::haveNextPhase(unsigned int pos) const
2231 {
2232 ++pos;
2233 return (pos < sm4.insns.size()) &&
2234 (sm4.insns[pos]->opcode == SM4_OPCODE_HS_FORK_PHASE ||
2235 sm4.insns[pos]->opcode == SM4_OPCODE_HS_JOIN_PHASE);
2236 }
2237
2238 bool
run()2239 Converter::run()
2240 {
2241 parseSignature();
2242
2243 for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos)
2244 inspectDeclaration(*sm4.dcls[pos]);
2245
2246 phaseInstCnt[0] = new unsigned int [subPhaseCnt[0]];
2247 phaseInstCnt[1] = new unsigned int [subPhaseCnt[1]];
2248 for (int i = 0; i < subPhaseCnt[0]; ++i)
2249 phaseInstCnt[0][i] = -1;
2250 for (int i = 0; i < subPhaseCnt[1]; ++i)
2251 phaseInstCnt[1][i] = -1;
2252 // re-increased in handleDeclaration:
2253 subPhaseCnt[0] = subPhaseCnt[1] = 0;
2254
2255 allocateValues();
2256 nrArrays = 0;
2257 for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos)
2258 handleDeclaration(*sm4.dcls[pos]);
2259
2260 info.io.genUserClip = -1; // no UCPs permitted with SM4 shaders
2261 info.io.clipDistanceMask = (1 << info.io.clipDistanceMask) - 1;
2262
2263 info.assignSlots(&info);
2264
2265 if (sm4.dcls.size() == 0 && sm4.insns.size() == 0)
2266 return true;
2267
2268 BasicBlock *entry = new BasicBlock(prog->main);
2269 BasicBlock *leave = new BasicBlock(prog->main);
2270
2271 prog->main->setEntry(entry);
2272 prog->main->setExit(leave);
2273
2274 setPosition(entry, true);
2275
2276 entryBBs.push(entry);
2277 leaveBBs.push(leave);
2278
2279 if (prog->getType() == Program::TYPE_FRAGMENT) {
2280 Symbol *sv = mkSysVal(SV_POSITION, 3);
2281 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
2282 mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
2283 } else
2284 if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) {
2285 const int n = (info.prop.tp.domain == PIPE_PRIM_TRIANGLES) ? 3 : 2;
2286 int c;
2287 for (c = 0; c < n; ++c)
2288 domainPt[c] =
2289 mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_TESS_COORD, c));
2290 if (c == 2)
2291 domainPt[2] = loadImm(NULL, 0.0f);
2292 }
2293
2294 finalized = false;
2295 phaseEnded = 0;
2296 phase = 0;
2297 subPhase = 0;
2298 for (unsigned int pos = 0; pos < sm4.insns.size(); ++pos) {
2299 handleInstruction(pos);
2300 if (likely(phase == 0) || (phaseEnded < 2))
2301 continue;
2302 phaseEnded = 0;
2303 if (!unrollPhase || !phaseInstanceUsed) {
2304 ++subPhase;
2305 continue;
2306 }
2307 phaseInstanceUsed = false;
2308 if (phaseInstance < (phaseInstCnt[phase - 1][subPhase] - 1))
2309 pos = phaseStart - 1;
2310 else
2311 ++subPhase;
2312 }
2313 finalizeShader();
2314
2315 return true;
2316 }
2317
2318 } // anonymous namespace
2319
2320 namespace nv50_ir {
2321
2322 bool
makeFromSM4(struct nv50_ir_prog_info * info)2323 Program::makeFromSM4(struct nv50_ir_prog_info *info)
2324 {
2325 Converter bld(this, info);
2326 return bld.run();
2327 }
2328
2329 } // namespace nv50_ir
2330