1 
2 #include "nv50_ir.h"
3 #include "nv50_ir_target.h"
4 #include "nv50_ir_build_util.h"
5 
6 #include "nv50_ir_from_sm4.h"
7 
8 // WTF: pass-through is implicit ??? check ReadWriteMask
9 
10 namespace tgsi {
11 
irSemantic(unsigned sn)12 static nv50_ir::SVSemantic irSemantic(unsigned sn)
13 {
14    switch (sn) {
15    case TGSI_SEMANTIC_POSITION:      return nv50_ir::SV_POSITION;
16    case TGSI_SEMANTIC_FACE:          return nv50_ir::SV_FACE;
17    case NV50_SEMANTIC_LAYER:         return nv50_ir::SV_LAYER;
18    case NV50_SEMANTIC_VIEWPORTINDEX: return nv50_ir::SV_VIEWPORT_INDEX;
19    case TGSI_SEMANTIC_PSIZE:         return nv50_ir::SV_POINT_SIZE;
20    case NV50_SEMANTIC_CLIPDISTANCE:  return nv50_ir::SV_CLIP_DISTANCE;
21    case TGSI_SEMANTIC_VERTEXID:      return nv50_ir::SV_VERTEX_ID;
22    case TGSI_SEMANTIC_INSTANCEID:    return nv50_ir::SV_INSTANCE_ID;
23    case TGSI_SEMANTIC_PRIMID:        return nv50_ir::SV_PRIMITIVE_ID;
24    case NV50_SEMANTIC_TESSFACTOR:    return nv50_ir::SV_TESS_FACTOR;
25    case NV50_SEMANTIC_TESSCOORD:     return nv50_ir::SV_TESS_COORD;
26    default:
27       return nv50_ir::SV_UNDEFINED;
28    }
29 }
30 
31 } // namespace tgsi
32 
33 namespace {
34 
35 using namespace nv50_ir;
36 
37 #define NV50_IR_MAX_RESOURCES 64
38 
39 class Converter : public BuildUtil
40 {
41 public:
42    Converter(Program *, struct nv50_ir_prog_info *);
43    ~Converter();
44 
45 private:
46    DataArray tData32;
47    DataArray tData64;
48    unsigned int nrRegVals;
49 
50    DataArray *lData;
51    unsigned int nrArrays;
52    unsigned int arrayVol;
53 
54    DataArray oData;
55 
56    uint8_t interpMode[PIPE_MAX_SHADER_INPUTS];
57 
58    // outputs for each phase
59    struct nv50_ir_varying out[3][PIPE_MAX_SHADER_OUTPUTS];
60 
61    int phase;
62    int subPhaseCnt[2];
63    int subPhase;
64    unsigned int phaseStart;
65    unsigned int phaseInstance;
66    unsigned int *phaseInstCnt[2];
67    bool unrollPhase;
68    bool phaseInstanceUsed;
69    int phaseEnded; // (phase + 1) if $phase ended
70 
71    bool finalized;
72 
73    Value *srcPtr[3][3]; // for indirect addressing, save pointer values
74    Value *dstPtr[3];
75    Value *vtxBase[3]; // base address of vertex in a primitive (TP/GP)
76 
77    Value *domainPt[3]; // pre-fetched TessCoord
78 
79    unsigned int nDstOpnds;
80 
81    Stack condBBs;
82    Stack joinBBs;
83    Stack loopBBs;
84    Stack breakBBs;
85    Stack entryBBs;
86    Stack leaveBBs;
87    Stack retIPs;
88 
89    bool shadow[NV50_IR_MAX_RESOURCES];
90    TexTarget resourceType[NV50_IR_MAX_RESOURCES][2];
91 
92    struct nv50_ir_prog_info& info;
93 
94    Value *fragCoord[4];
95 
96 public:
97    bool run();
98 
99 private:
100    bool handleInstruction(unsigned int pos);
101    bool inspectInstruction(unsigned int pos);
102    bool handleDeclaration(const sm4_dcl& dcl);
103    bool inspectDeclaration(const sm4_dcl& dcl);
104    bool parseSignature();
105 
106    bool haveNextPhase(unsigned int pos) const;
107 
108    void allocateValues();
109    void exportOutputs();
110 
111    void emitTex(Value *dst0[4], TexInstruction *, const uint8_t swizzle[4]);
112    void handleLOAD(Value *dst0[4]);
113    void handleSAMPLE(operation, Value *dst0[4]);
114    void handleQUERY(Value *dst0[4], enum TexQuery query);
115    void handleDP(Value *dst0[4], int dim);
116 
117    Symbol *iSym(int i, int c);
118    Symbol *oSym(int i, int c);
119 
120    Value *src(int i, int c);
121    Value *src(const sm4_op&, int c, int i);
122    Value *dst(int i, int c);
123    Value *dst(const sm4_op&, int c, int i);
124    void saveDst(int i, int c, Value *value);
125    void saveDst(const sm4_op&, int c, Value *value, int i);
126    void saveFragDepth(operation op, Value *value);
127 
128    Value *interpolate(const sm4_op&, int c, int i);
129 
130    Value *getSrcPtr(int s, int dim, int shl);
131    Value *getDstPtr(int d, int dim, int shl);
132    Value *getVtxPtr(int s);
133 
134    bool checkDstSrcAliasing() const;
135    void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
136    void finalizeShader();
137 
138    operation cvtOpcode(enum sm4_opcode op) const;
139    unsigned int getDstOpndCount(enum sm4_opcode opcode) const;
140 
141    DataType inferSrcType(enum sm4_opcode op) const;
142    DataType inferDstType(enum sm4_opcode op) const;
143 
144    unsigned g3dPrim(const unsigned prim, unsigned *patchSize = NULL) const;
145    CondCode cvtCondCode(enum sm4_opcode op) const;
146    RoundMode cvtRoundingMode(enum sm4_opcode op) const;
147    TexTarget cvtTexTarget(enum sm4_target,
148                            enum sm4_opcode, operation *) const;
149    SVSemantic cvtSemantic(enum sm4_sv, uint8_t &index) const;
150    uint8_t cvtInterpMode(enum sm4_interpolation) const;
151 
152    unsigned tgsiSemantic(SVSemantic, int index);
153    void recordSV(unsigned sn, unsigned si, unsigned mask, bool input);
154 
155 private:
156    sm4_insn *insn;
157    DataType dTy, sTy;
158 
159    const struct sm4_program& sm4;
160    Program *prog;
161 };
162 
163 #define PRIM_CASE(a, b) \
164    case D3D_PRIMITIVE_TOPOLOGY_##a: return PIPE_PRIM_##b;
165 
166 unsigned
g3dPrim(const unsigned prim,unsigned * patchSize) const167 Converter::g3dPrim(const unsigned prim, unsigned *patchSize) const
168 {
169    switch (prim) {
170    PRIM_CASE(UNDEFINED, POINTS);
171    PRIM_CASE(POINTLIST, POINTS);
172    PRIM_CASE(LINELIST, LINES);
173    PRIM_CASE(LINESTRIP, LINE_STRIP);
174    PRIM_CASE(TRIANGLELIST, TRIANGLES);
175    PRIM_CASE(TRIANGLESTRIP, TRIANGLE_STRIP);
176    PRIM_CASE(LINELIST_ADJ, LINES_ADJACENCY);
177    PRIM_CASE(LINESTRIP_ADJ, LINE_STRIP_ADJACENCY);
178    PRIM_CASE(TRIANGLELIST_ADJ, TRIANGLES_ADJACENCY);
179    PRIM_CASE(TRIANGLESTRIP_ADJ, TRIANGLES_ADJACENCY);
180    default:
181       if (prim < D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST ||
182           prim > D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST)
183          return PIPE_PRIM_POINTS;
184       if (patchSize)
185          *patchSize =
186             prim - D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + 1;
187       return NV50_PRIM_PATCHES;
188    }
189 }
190 
191 #define IPM_CASE(n, a, b) \
192    case SM4_INTERPOLATION_##n: return NV50_IR_INTERP_##a | NV50_IR_INTERP_##b
193 
194 uint8_t
cvtInterpMode(enum sm4_interpolation mode) const195 Converter::cvtInterpMode(enum sm4_interpolation mode) const
196 {
197    switch (mode) {
198    IPM_CASE(CONSTANT,                      FLAT, FLAT);
199    IPM_CASE(LINEAR,                        PERSPECTIVE, PERSPECTIVE);
200    IPM_CASE(LINEAR_CENTROID,               PERSPECTIVE, CENTROID);
201    IPM_CASE(LINEAR_NOPERSPECTIVE,          LINEAR, LINEAR);
202    IPM_CASE(LINEAR_NOPERSPECTIVE_CENTROID, LINEAR, CENTROID);
203    IPM_CASE(LINEAR_SAMPLE,                 PERSPECTIVE, OFFSET);
204    IPM_CASE(LINEAR_NOPERSPECTIVE_SAMPLE,   LINEAR, OFFSET);
205    IPM_CASE(UNDEFINED,                     LINEAR, LINEAR);
206    default:
207       assert(!"invalid interpolation mode");
208       return 0;
209    }
210 }
211 
212 static void
setVaryingInterpMode(struct nv50_ir_varying * var,uint8_t mode)213 setVaryingInterpMode(struct nv50_ir_varying *var, uint8_t mode)
214 {
215    switch (mode & NV50_IR_INTERP_MODE_MASK) {
216    case NV50_IR_INTERP_LINEAR:
217       var->linear = 1;
218       break;
219    case NV50_IR_INTERP_FLAT:
220       var->flat = 1;
221       break;
222    default:
223       break;
224    }
225    if (mode & NV50_IR_INTERP_CENTROID)
226       var->centroid = 1;
227 }
228 
229 RoundMode
cvtRoundingMode(enum sm4_opcode op) const230 Converter::cvtRoundingMode(enum sm4_opcode op) const
231 {
232    switch (op) {
233    case SM4_OPCODE_ROUND_NE: return ROUND_NI;
234    case SM4_OPCODE_ROUND_NI: return ROUND_MI;
235    case SM4_OPCODE_ROUND_PI: return ROUND_PI;
236    case SM4_OPCODE_ROUND_Z:  return ROUND_ZI;
237    default:
238       return ROUND_N;
239    }
240 }
241 
242 CondCode
cvtCondCode(enum sm4_opcode op) const243 Converter::cvtCondCode(enum sm4_opcode op) const
244 {
245    switch (op) {
246    case SM4_OPCODE_EQ:
247    case SM4_OPCODE_DEQ:
248    case SM4_OPCODE_IEQ: return CC_EQ;
249    case SM4_OPCODE_GE:
250    case SM4_OPCODE_DGE:
251    case SM4_OPCODE_IGE:
252    case SM4_OPCODE_UGE: return CC_GE;
253    case SM4_OPCODE_LT:
254    case SM4_OPCODE_DLT:
255    case SM4_OPCODE_ILT:
256    case SM4_OPCODE_ULT: return CC_LT;
257    case SM4_OPCODE_NE:
258    case SM4_OPCODE_INE:
259    case SM4_OPCODE_DNE: return CC_NEU;
260    default:
261       return CC_ALWAYS;
262    }
263 }
264 
265 DataType
inferSrcType(enum sm4_opcode op) const266 Converter::inferSrcType(enum sm4_opcode op) const
267 {
268    switch (op) {
269    case SM4_OPCODE_IADD:
270    case SM4_OPCODE_IEQ:
271    case SM4_OPCODE_IGE:
272    case SM4_OPCODE_ILT:
273    case SM4_OPCODE_IMAD:
274    case SM4_OPCODE_IMAX:
275    case SM4_OPCODE_IMIN:
276    case SM4_OPCODE_IMUL:
277    case SM4_OPCODE_INE:
278    case SM4_OPCODE_INEG:
279    case SM4_OPCODE_ISHL:
280    case SM4_OPCODE_ISHR:
281    case SM4_OPCODE_ITOF:
282    case SM4_OPCODE_ATOMIC_IADD:
283    case SM4_OPCODE_ATOMIC_IMAX:
284    case SM4_OPCODE_ATOMIC_IMIN:
285       return TYPE_S32;
286    case SM4_OPCODE_AND:
287    case SM4_OPCODE_NOT:
288    case SM4_OPCODE_OR:
289    case SM4_OPCODE_UDIV:
290    case SM4_OPCODE_ULT:
291    case SM4_OPCODE_UGE:
292    case SM4_OPCODE_UMUL:
293    case SM4_OPCODE_UMAD:
294    case SM4_OPCODE_UMAX:
295    case SM4_OPCODE_UMIN:
296    case SM4_OPCODE_USHR:
297    case SM4_OPCODE_UTOF:
298    case SM4_OPCODE_XOR:
299    case SM4_OPCODE_UADDC:
300    case SM4_OPCODE_USUBB:
301    case SM4_OPCODE_ATOMIC_AND:
302    case SM4_OPCODE_ATOMIC_OR:
303    case SM4_OPCODE_ATOMIC_XOR:
304    case SM4_OPCODE_ATOMIC_UMAX:
305    case SM4_OPCODE_ATOMIC_UMIN:
306       return TYPE_U32;
307    case SM4_OPCODE_DADD:
308    case SM4_OPCODE_DMAX:
309    case SM4_OPCODE_DMIN:
310    case SM4_OPCODE_DMUL:
311    case SM4_OPCODE_DEQ:
312    case SM4_OPCODE_DGE:
313    case SM4_OPCODE_DLT:
314    case SM4_OPCODE_DNE:
315    case SM4_OPCODE_DMOV:
316    case SM4_OPCODE_DMOVC:
317    case SM4_OPCODE_DTOF:
318       return TYPE_F64;
319    case SM4_OPCODE_F16TOF32:
320       return TYPE_F16;
321    default:
322       return TYPE_F32;
323    }
324 }
325 
326 DataType
inferDstType(enum sm4_opcode op) const327 Converter::inferDstType(enum sm4_opcode op) const
328 {
329    switch (op) {
330    case SM4_OPCODE_FTOI:
331       return TYPE_S32;
332    case SM4_OPCODE_FTOU:
333    case SM4_OPCODE_EQ:
334    case SM4_OPCODE_GE:
335    case SM4_OPCODE_LT:
336    case SM4_OPCODE_NE:
337       return TYPE_U32;
338    case SM4_OPCODE_FTOD:
339       return TYPE_F64;
340    case SM4_OPCODE_F32TOF16:
341       return TYPE_F16;
342    case SM4_OPCODE_ITOF:
343    case SM4_OPCODE_UTOF:
344    case SM4_OPCODE_DTOF:
345       return TYPE_F32;
346    default:
347       return inferSrcType(op);
348    }
349 }
350 
351 operation
cvtOpcode(enum sm4_opcode op) const352 Converter::cvtOpcode(enum sm4_opcode op) const
353 {
354    switch (op) {
355    case SM4_OPCODE_ADD:         return OP_ADD;
356    case SM4_OPCODE_AND:         return OP_AND;
357    case SM4_OPCODE_BREAK:       return OP_BREAK;
358    case SM4_OPCODE_BREAKC:      return OP_BREAK;
359    case SM4_OPCODE_CALL:        return OP_CALL;
360    case SM4_OPCODE_CALLC:       return OP_CALL;
361    case SM4_OPCODE_CASE:        return OP_NOP;
362    case SM4_OPCODE_CONTINUE:    return OP_CONT;
363    case SM4_OPCODE_CONTINUEC:   return OP_CONT;
364    case SM4_OPCODE_CUT:         return OP_RESTART;
365    case SM4_OPCODE_DEFAULT:     return OP_NOP;
366    case SM4_OPCODE_DERIV_RTX:   return OP_DFDX;
367    case SM4_OPCODE_DERIV_RTY:   return OP_DFDY;
368    case SM4_OPCODE_DISCARD:     return OP_DISCARD;
369    case SM4_OPCODE_DIV:         return OP_DIV;
370    case SM4_OPCODE_DP2:         return OP_MAD;
371    case SM4_OPCODE_DP3:         return OP_MAD;
372    case SM4_OPCODE_DP4:         return OP_MAD;
373    case SM4_OPCODE_ELSE:        return OP_BRA;
374    case SM4_OPCODE_EMIT:        return OP_EMIT;
375    case SM4_OPCODE_EMITTHENCUT: return OP_EMIT;
376    case SM4_OPCODE_ENDIF:       return OP_BRA;
377    case SM4_OPCODE_ENDLOOP:     return OP_PREBREAK;
378    case SM4_OPCODE_ENDSWITCH:   return OP_NOP;
379    case SM4_OPCODE_EQ:          return OP_SET;
380    case SM4_OPCODE_EXP:         return OP_EX2;
381    case SM4_OPCODE_FRC:         return OP_CVT;
382    case SM4_OPCODE_FTOI:        return OP_CVT;
383    case SM4_OPCODE_FTOU:        return OP_CVT;
384    case SM4_OPCODE_GE:          return OP_SET;
385    case SM4_OPCODE_IADD:        return OP_ADD;
386    case SM4_OPCODE_IF:          return OP_BRA;
387    case SM4_OPCODE_IEQ:         return OP_SET;
388    case SM4_OPCODE_IGE:         return OP_SET;
389    case SM4_OPCODE_ILT:         return OP_SET;
390    case SM4_OPCODE_IMAD:        return OP_MAD;
391    case SM4_OPCODE_IMAX:        return OP_MAX;
392    case SM4_OPCODE_IMIN:        return OP_MIN;
393    case SM4_OPCODE_IMUL:        return OP_MUL;
394    case SM4_OPCODE_INE:         return OP_SET;
395    case SM4_OPCODE_INEG:        return OP_NEG;
396    case SM4_OPCODE_ISHL:        return OP_SHL;
397    case SM4_OPCODE_ISHR:        return OP_SHR;
398    case SM4_OPCODE_ITOF:        return OP_CVT;
399    case SM4_OPCODE_LD:          return OP_TXF;
400    case SM4_OPCODE_LD_MS:       return OP_TXF;
401    case SM4_OPCODE_LOG:         return OP_LG2;
402    case SM4_OPCODE_LOOP:        return OP_PRECONT;
403    case SM4_OPCODE_LT:          return OP_SET;
404    case SM4_OPCODE_MAD:         return OP_MAD;
405    case SM4_OPCODE_MIN:         return OP_MIN;
406    case SM4_OPCODE_MAX:         return OP_MAX;
407    case SM4_OPCODE_MOV:         return OP_MOV;
408    case SM4_OPCODE_MOVC:        return OP_MOV;
409    case SM4_OPCODE_MUL:         return OP_MUL;
410    case SM4_OPCODE_NE:          return OP_SET;
411    case SM4_OPCODE_NOP:         return OP_NOP;
412    case SM4_OPCODE_NOT:         return OP_NOT;
413    case SM4_OPCODE_OR:          return OP_OR;
414    case SM4_OPCODE_RESINFO:     return OP_TXQ;
415    case SM4_OPCODE_RET:         return OP_RET;
416    case SM4_OPCODE_RETC:        return OP_RET;
417    case SM4_OPCODE_ROUND_NE:    return OP_CVT;
418    case SM4_OPCODE_ROUND_NI:    return OP_FLOOR;
419    case SM4_OPCODE_ROUND_PI:    return OP_CEIL;
420    case SM4_OPCODE_ROUND_Z:     return OP_TRUNC;
421    case SM4_OPCODE_RSQ:         return OP_RSQ;
422    case SM4_OPCODE_SAMPLE:      return OP_TEX;
423    case SM4_OPCODE_SAMPLE_C:    return OP_TEX;
424    case SM4_OPCODE_SAMPLE_C_LZ: return OP_TEX;
425    case SM4_OPCODE_SAMPLE_L:    return OP_TXL;
426    case SM4_OPCODE_SAMPLE_D:    return OP_TXD;
427    case SM4_OPCODE_SAMPLE_B:    return OP_TXB;
428    case SM4_OPCODE_SQRT:        return OP_SQRT;
429    case SM4_OPCODE_SWITCH:      return OP_NOP;
430    case SM4_OPCODE_SINCOS:      return OP_PRESIN;
431    case SM4_OPCODE_UDIV:        return OP_DIV;
432    case SM4_OPCODE_ULT:         return OP_SET;
433    case SM4_OPCODE_UGE:         return OP_SET;
434    case SM4_OPCODE_UMUL:        return OP_MUL;
435    case SM4_OPCODE_UMAD:        return OP_MAD;
436    case SM4_OPCODE_UMAX:        return OP_MAX;
437    case SM4_OPCODE_UMIN:        return OP_MIN;
438    case SM4_OPCODE_USHR:        return OP_SHR;
439    case SM4_OPCODE_UTOF:        return OP_CVT;
440    case SM4_OPCODE_XOR:         return OP_XOR;
441 
442    case SM4_OPCODE_GATHER4:            return OP_TXG;
443    case SM4_OPCODE_SAMPLE_POS:         return OP_PIXLD;
444    case SM4_OPCODE_SAMPLE_INFO:        return OP_PIXLD;
445    case SM4_OPCODE_EMIT_STREAM:        return OP_EMIT;
446    case SM4_OPCODE_CUT_STREAM:         return OP_RESTART;
447    case SM4_OPCODE_EMITTHENCUT_STREAM: return OP_EMIT;
448    case SM4_OPCODE_INTERFACE_CALL:     return OP_CALL;
449    case SM4_OPCODE_BUFINFO:            return OP_TXQ;
450    case SM4_OPCODE_DERIV_RTX_COARSE:   return OP_DFDX;
451    case SM4_OPCODE_DERIV_RTX_FINE:     return OP_DFDX;
452    case SM4_OPCODE_DERIV_RTY_COARSE:   return OP_DFDY;
453    case SM4_OPCODE_DERIV_RTY_FINE:     return OP_DFDY;
454    case SM4_OPCODE_GATHER4_C:          return OP_TXG;
455    case SM4_OPCODE_GATHER4_PO:         return OP_TXG;
456    case SM4_OPCODE_GATHER4_PO_C:       return OP_TXG;
457 
458    case SM4_OPCODE_RCP:       return OP_RCP;
459    case SM4_OPCODE_F32TOF16:  return OP_CVT;
460    case SM4_OPCODE_F16TOF32:  return OP_CVT;
461    case SM4_OPCODE_UADDC:     return OP_ADD;
462    case SM4_OPCODE_USUBB:     return OP_SUB;
463    case SM4_OPCODE_COUNTBITS: return OP_POPCNT;
464 
465    case SM4_OPCODE_ATOMIC_AND:       return OP_AND;
466    case SM4_OPCODE_ATOMIC_OR:        return OP_OR;
467    case SM4_OPCODE_ATOMIC_XOR:       return OP_XOR;
468    case SM4_OPCODE_ATOMIC_CMP_STORE: return OP_STORE;
469    case SM4_OPCODE_ATOMIC_IADD:      return OP_ADD;
470    case SM4_OPCODE_ATOMIC_IMAX:      return OP_MAX;
471    case SM4_OPCODE_ATOMIC_IMIN:      return OP_MIN;
472    case SM4_OPCODE_ATOMIC_UMAX:      return OP_MAX;
473    case SM4_OPCODE_ATOMIC_UMIN:      return OP_MIN;
474 
475    case SM4_OPCODE_SYNC:  return OP_MEMBAR;
476    case SM4_OPCODE_DADD:  return OP_ADD;
477    case SM4_OPCODE_DMAX:  return OP_MAX;
478    case SM4_OPCODE_DMIN:  return OP_MIN;
479    case SM4_OPCODE_DMUL:  return OP_MUL;
480    case SM4_OPCODE_DEQ:   return OP_SET;
481    case SM4_OPCODE_DGE:   return OP_SET;
482    case SM4_OPCODE_DLT:   return OP_SET;
483    case SM4_OPCODE_DNE:   return OP_SET;
484    case SM4_OPCODE_DMOV:  return OP_MOV;
485    case SM4_OPCODE_DMOVC: return OP_MOV;
486    case SM4_OPCODE_DTOF:  return OP_CVT;
487    case SM4_OPCODE_FTOD:  return OP_CVT;
488 
489    default:
490       return OP_NOP;
491    }
492 }
493 
494 unsigned int
getDstOpndCount(enum sm4_opcode opcode) const495 Converter::getDstOpndCount(enum sm4_opcode opcode) const
496 {
497    switch (opcode) {
498    case SM4_OPCODE_SINCOS:
499    case SM4_OPCODE_UDIV:
500    case SM4_OPCODE_IMUL:
501    case SM4_OPCODE_UMUL:
502       return 2;
503    case SM4_OPCODE_BREAK:
504    case SM4_OPCODE_BREAKC:
505    case SM4_OPCODE_CALL:
506    case SM4_OPCODE_CALLC:
507    case SM4_OPCODE_CONTINUE:
508    case SM4_OPCODE_CONTINUEC:
509    case SM4_OPCODE_DISCARD:
510    case SM4_OPCODE_EMIT:
511    case SM4_OPCODE_EMIT_STREAM:
512    case SM4_OPCODE_CUT:
513    case SM4_OPCODE_CUT_STREAM:
514    case SM4_OPCODE_EMITTHENCUT:
515    case SM4_OPCODE_EMITTHENCUT_STREAM:
516    case SM4_OPCODE_IF:
517    case SM4_OPCODE_ELSE:
518    case SM4_OPCODE_ENDIF:
519    case SM4_OPCODE_LOOP:
520    case SM4_OPCODE_ENDLOOP:
521    case SM4_OPCODE_RET:
522    case SM4_OPCODE_RETC:
523    case SM4_OPCODE_SYNC:
524    case SM4_OPCODE_SWITCH:
525    case SM4_OPCODE_CASE:
526    case SM4_OPCODE_HS_DECLS:
527    case SM4_OPCODE_HS_CONTROL_POINT_PHASE:
528    case SM4_OPCODE_HS_FORK_PHASE:
529    case SM4_OPCODE_HS_JOIN_PHASE:
530       return 0;
531    default:
532       return 1;
533    }
534 }
535 
536 #define TARG_CASE_1(a, b) case SM4_TARGET_##a: return TEX_TARGET_##b;
537 #define TARG_CASE_2(a, b) case SM4_TARGET_##a: \
538    return dc ? TEX_TARGET_##b##_SHADOW : TEX_TARGET_##b
539 
540 TexTarget
cvtTexTarget(enum sm4_target targ,enum sm4_opcode op,operation * opr) const541 Converter::cvtTexTarget(enum sm4_target targ,
542                         enum sm4_opcode op, operation *opr) const
543 {
544    bool dc = (op == SM4_OPCODE_SAMPLE_C ||
545               op == SM4_OPCODE_SAMPLE_C_LZ ||
546               op == SM4_OPCODE_GATHER4_C ||
547               op == SM4_OPCODE_GATHER4_PO_C);
548 
549    if (opr) {
550       switch (targ) {
551       case SM4_TARGET_RAW_BUFFER:        *opr = OP_LOAD; break;
552       case SM4_TARGET_STRUCTURED_BUFFER: *opr = OP_SULD; break;
553       default:
554          *opr = OP_TEX;
555          break;
556       }
557    }
558 
559    switch (targ) {
560    TARG_CASE_1(UNKNOWN, 2D);
561    TARG_CASE_2(TEXTURE1D,         1D);
562    TARG_CASE_2(TEXTURE2D,         2D);
563    TARG_CASE_1(TEXTURE2DMS,       2D_MS);
564    TARG_CASE_1(TEXTURE3D,         3D);
565    TARG_CASE_2(TEXTURECUBE,       CUBE);
566    TARG_CASE_2(TEXTURE1DARRAY,    1D_ARRAY);
567    TARG_CASE_2(TEXTURE2DARRAY,    2D_ARRAY);
568    TARG_CASE_1(TEXTURE2DMSARRAY,  2D_MS_ARRAY);
569    TARG_CASE_2(TEXTURECUBEARRAY,  CUBE_ARRAY);
570    TARG_CASE_1(BUFFER,            BUFFER);
571    TARG_CASE_1(RAW_BUFFER,        BUFFER);
572    TARG_CASE_1(STRUCTURED_BUFFER, BUFFER);
573    default:
574       assert(!"invalid SM4 texture target");
575       return dc ? TEX_TARGET_2D_SHADOW : TEX_TARGET_2D;
576    }
577 }
578 
579 static inline uint32_t
getSVIndex(enum sm4_sv sv)580 getSVIndex(enum sm4_sv sv)
581 {
582    switch (sv) {
583    case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return 0;
584    case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return 1;
585    case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return 2;
586    case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return 3;
587 
588    case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR: return 4;
589    case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR: return 5;
590 
591    case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return 0;
592    case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return 1;
593    case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return 2;
594 
595    case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR: return 4;
596 
597    case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR: return 0;
598 
599    case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR: return 4;
600 
601    default:
602       return 0;
603    }
604 }
605 
606 SVSemantic
cvtSemantic(enum sm4_sv sv,uint8_t & idx) const607 Converter::cvtSemantic(enum sm4_sv sv, uint8_t &idx) const
608 {
609    idx = 0;
610 
611    switch (sv) {
612    case SM4_SV_UNDEFINED:     return SV_UNDEFINED;
613    case SM4_SV_POSITION:      return SV_POSITION;
614    case SM4_SV_CLIP_DISTANCE: return SV_CLIP_DISTANCE;
615    case SM4_SV_CULL_DISTANCE: return SV_CLIP_DISTANCE; // XXX: distinction
616    case SM4_SV_RENDER_TARGET_ARRAY_INDEX: return SV_LAYER;
617    case SM4_SV_VIEWPORT_ARRAY_INDEX:  return SV_VIEWPORT_INDEX;
618    case SM4_SV_VERTEX_ID:     return SV_VERTEX_ID;
619    case SM4_SV_PRIMITIVE_ID:  return SV_PRIMITIVE_ID;
620    case SM4_SV_INSTANCE_ID:   return SV_INSTANCE_ID;
621    case SM4_SV_IS_FRONT_FACE: return SV_FACE;
622    case SM4_SV_SAMPLE_INDEX:  return SV_SAMPLE_INDEX;
623 
624    case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR:
625    case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR:
626    case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR:
627    case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR:
628    case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR:
629    case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR:
630    case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR:
631    case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR:
632    case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR:
633    case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR:
634    case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR:
635    case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR:
636       idx = getSVIndex(sv);
637       return SV_TESS_FACTOR;
638 
639    default:
640       assert(!"invalid SM4 system value");
641       return SV_UNDEFINED;
642    }
643 }
644 
645 unsigned
tgsiSemantic(SVSemantic sv,int index)646 Converter::tgsiSemantic(SVSemantic sv, int index)
647 {
648    switch (sv) {
649    case SV_POSITION:       return TGSI_SEMANTIC_POSITION;
650    case SV_FACE:           return TGSI_SEMANTIC_FACE;
651    case SV_LAYER:          return NV50_SEMANTIC_LAYER;
652    case SV_VIEWPORT_INDEX: return NV50_SEMANTIC_VIEWPORTINDEX;
653    case SV_POINT_SIZE:     return TGSI_SEMANTIC_PSIZE;
654    case SV_CLIP_DISTANCE:  return NV50_SEMANTIC_CLIPDISTANCE;
655    case SV_VERTEX_ID:      return TGSI_SEMANTIC_VERTEXID;
656    case SV_INSTANCE_ID:    return TGSI_SEMANTIC_INSTANCEID;
657    case SV_PRIMITIVE_ID:   return TGSI_SEMANTIC_PRIMID;
658    case SV_TESS_FACTOR:    return NV50_SEMANTIC_TESSFACTOR;
659    case SV_TESS_COORD:     return NV50_SEMANTIC_TESSCOORD;
660    case SV_INVOCATION_ID:  return NV50_SEMANTIC_INVOCATIONID;
661    default:
662       return TGSI_SEMANTIC_GENERIC;
663    }
664 }
665 
666 void
recordSV(unsigned sn,unsigned si,unsigned mask,bool input)667 Converter::recordSV(unsigned sn, unsigned si, unsigned mask, bool input)
668 {
669    unsigned int i;
670    for (i = 0; i < info.numSysVals; ++i)
671       if (info.sv[i].sn == sn &&
672           info.sv[i].si == si)
673          return;
674    info.numSysVals = i + 1;
675    info.sv[i].sn = sn;
676    info.sv[i].si = si;
677    info.sv[i].mask = mask;
678    info.sv[i].input = input ? 1 : 0;
679 }
680 
681 bool
parseSignature()682 Converter::parseSignature()
683 {
684    struct nv50_ir_varying *patch;
685    unsigned int i, r, n;
686 
687    info.numInputs = 0;
688    info.numOutputs = 0;
689    info.numPatchConstants = 0;
690 
691    for (n = 0, i = 0; i < sm4.num_params_in; ++i) {
692       r = sm4.params_in[i].Register;
693 
694       info.in[r].mask |= sm4.params_in[i].ReadWriteMask;
695       // mask might be uninitialized ...
696       if (!sm4.params_in[i].ReadWriteMask)
697 	  info.in[r].mask = 0xf;
698       info.in[r].id = r;
699       if (info.in[r].regular) // already assigned semantic name/index
700          continue;
701       info.in[r].regular = 1;
702       info.in[r].patch = 0;
703 
704       info.numInputs = MAX2(info.numInputs, r + 1);
705 
706       switch (sm4.params_in[i].SystemValueType) {
707       case D3D_NAME_UNDEFINED:
708          info.in[r].sn = TGSI_SEMANTIC_GENERIC;
709          info.in[r].si = n++;
710          break;
711       case D3D_NAME_POSITION:
712          info.in[r].sn = TGSI_SEMANTIC_POSITION;
713          break;
714       case D3D_NAME_VERTEX_ID:
715          info.in[r].sn = TGSI_SEMANTIC_VERTEXID;
716          break;
717       case D3D_NAME_PRIMITIVE_ID:
718          info.in[r].sn = TGSI_SEMANTIC_PRIMID;
719          // no corresponding output
720          recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true);
721          break;
722       case D3D_NAME_INSTANCE_ID:
723          info.in[r].sn = TGSI_SEMANTIC_INSTANCEID;
724          break;
725       case D3D_NAME_IS_FRONT_FACE:
726          info.in[r].sn = TGSI_SEMANTIC_FACE;
727          // no corresponding output
728          recordSV(TGSI_SEMANTIC_FACE, 0, 1, true);
729          break;
730       default:
731          assert(!"invalid/unsupported input linkage semantic");
732          break;
733       }
734    }
735 
736    for (n = 0, i = 0; i < sm4.num_params_out; ++i) {
737       r = sm4.params_out[i].Register;
738 
739       info.out[r].mask |= ~sm4.params_out[i].ReadWriteMask;
740       info.out[r].id = r;
741       if (info.out[r].regular) // already assigned semantic name/index
742          continue;
743       info.out[r].regular = 1;
744       info.out[r].patch = 0;
745 
746       info.numOutputs = MAX2(info.numOutputs, r + 1);
747 
748       switch (sm4.params_out[i].SystemValueType) {
749       case D3D_NAME_UNDEFINED:
750          if (prog->getType() == Program::TYPE_FRAGMENT) {
751             info.out[r].sn = TGSI_SEMANTIC_COLOR;
752             info.out[r].si = info.prop.fp.numColourResults++;
753          } else {
754             info.out[r].sn = TGSI_SEMANTIC_GENERIC;
755             info.out[r].si = n++;
756          }
757          break;
758       case D3D_NAME_POSITION:
759       case D3D_NAME_DEPTH:
760       case D3D_NAME_DEPTH_GREATER_EQUAL:
761       case D3D_NAME_DEPTH_LESS_EQUAL:
762          info.out[r].sn = TGSI_SEMANTIC_POSITION;
763          info.io.fragDepth = r;
764          break;
765       case D3D_NAME_CULL_DISTANCE:
766       case D3D_NAME_CLIP_DISTANCE:
767          info.out[r].sn = NV50_SEMANTIC_CLIPDISTANCE;
768          info.out[r].si = sm4.params_out[i].SemanticIndex;
769          break;
770       case D3D_NAME_RENDER_TARGET_ARRAY_INDEX:
771          info.out[r].sn = NV50_SEMANTIC_LAYER;
772          break;
773       case D3D_NAME_VIEWPORT_ARRAY_INDEX:
774          info.out[r].sn = NV50_SEMANTIC_VIEWPORTINDEX;
775          break;
776       case D3D_NAME_PRIMITIVE_ID:
777          info.out[r].sn = TGSI_SEMANTIC_PRIMID;
778          break;
779       case D3D_NAME_TARGET:
780          info.out[r].sn = TGSI_SEMANTIC_COLOR;
781          info.out[r].si = sm4.params_out[i].SemanticIndex;
782          break;
783       case D3D_NAME_COVERAGE:
784          info.out[r].sn = NV50_SEMANTIC_SAMPLEMASK;
785          info.io.sampleMask = r;
786          break;
787       case D3D_NAME_SAMPLE_INDEX:
788       default:
789          assert(!"invalid/unsupported output linkage semantic");
790          break;
791       }
792    }
793 
794    if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
795       patch = &info.in[info.numInputs];
796    else
797       patch = &info.out[info.numOutputs];
798 
799    for (n = 0, i = 0; i < sm4.num_params_patch; ++i) {
800       r = sm4.params_patch[i].Register;
801 
802       patch[r].mask |= sm4.params_patch[i].Mask;
803       patch[r].id = r;
804       if (patch[r].regular) // already visited
805          continue;
806       patch[r].regular = 1;
807       patch[r].patch = 1;
808 
809       info.numPatchConstants = MAX2(info.numPatchConstants, r + 1);
810 
811       switch (sm4.params_patch[i].SystemValueType) {
812       case D3D_NAME_UNDEFINED:
813          patch[r].sn = TGSI_SEMANTIC_GENERIC;
814          patch[r].si = n++;
815          break;
816       case D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR:
817       case D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR:
818       case D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR:
819          patch[r].sn = NV50_SEMANTIC_TESSFACTOR;
820          patch[r].si = sm4.params_patch[i].SemanticIndex;
821          break;
822       case D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR:
823       case D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR:
824       case D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR:
825          patch[r].sn = NV50_SEMANTIC_TESSFACTOR;
826          patch[r].si = sm4.params_patch[i].SemanticIndex + 4;
827          break;
828       default:
829          assert(!"invalid patch-constant linkage semantic");
830          break;
831       }
832    }
833    if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
834       info.numInputs += info.numPatchConstants;
835    else
836       info.numOutputs += info.numPatchConstants;
837 
838    return true;
839 }
840 
841 bool
inspectDeclaration(const sm4_dcl & dcl)842 Converter::inspectDeclaration(const sm4_dcl& dcl)
843 {
844    int idx = -1;
845    enum sm4_interpolation ipa_mode;
846 
847    if (dcl.op.get() && dcl.op->is_index_simple(0))
848       idx = dcl.op->indices[0].disp;
849 
850    switch (dcl.opcode) {
851    case SM4_OPCODE_DCL_SAMPLER:
852       assert(idx >= 0);
853       shadow[idx] = dcl.dcl_sampler.shadow;
854       break;
855    case SM4_OPCODE_DCL_RESOURCE:
856    {
857       enum sm4_target targ = (enum sm4_target)dcl.dcl_resource.target;
858 
859       assert(idx >= 0 && idx < NV50_IR_MAX_RESOURCES);
860       resourceType[idx][0] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE, NULL);
861       resourceType[idx][1] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE_C, NULL);
862    }
863       break;
864    case SM4_OPCODE_DCL_CONSTANT_BUFFER:
865       // nothing to do
866       break;
867    case SM4_OPCODE_CUSTOMDATA:
868       info.immd.bufSize = dcl.num * 4;
869       info.immd.buf = (uint32_t *)MALLOC(info.immd.bufSize);
870       memcpy(info.immd.buf, dcl.data, info.immd.bufSize);
871       break;
872    case SM4_OPCODE_DCL_INDEX_RANGE:
873       // XXX: ?
874       break;
875    case SM4_OPCODE_DCL_INPUT_PS_SGV:
876    case SM4_OPCODE_DCL_INPUT_PS_SIV:
877    case SM4_OPCODE_DCL_INPUT_PS:
878    {
879       assert(idx >= 0 && idx < info.numInputs);
880       ipa_mode = (enum sm4_interpolation)dcl.dcl_input_ps.interpolation;
881       interpMode[idx] = cvtInterpMode(ipa_mode);
882       setVaryingInterpMode(&info.in[idx], interpMode[idx]);
883    }
884       break;
885    case SM4_OPCODE_DCL_INPUT_SGV:
886    case SM4_OPCODE_DCL_INPUT_SIV:
887    case SM4_OPCODE_DCL_INPUT:
888       if (dcl.op->file == SM4_FILE_INPUT_DOMAIN_POINT) {
889          idx = info.numInputs++;
890          info.in[idx].sn = NV50_SEMANTIC_TESSCOORD;
891          info.in[idx].mask = dcl.op->mask;
892       }
893       // rest handled in parseSignature
894       break;
895    case SM4_OPCODE_DCL_OUTPUT_SGV:
896    case SM4_OPCODE_DCL_OUTPUT_SIV:
897       switch (dcl.sv) {
898       case SM4_SV_POSITION:
899          assert(prog->getType() != Program::TYPE_FRAGMENT);
900          break;
901       case SM4_SV_CULL_DISTANCE: // XXX: order ?
902          info.io.cullDistanceMask |= 1 << info.io.clipDistanceMask;
903       // fall through
904       case SM4_SV_CLIP_DISTANCE:
905          info.io.clipDistanceMask++; // abuse as count
906          break;
907       default:
908          break;
909       }
910       switch (dcl.op->file) {
911       case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
912       case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
913       case SM4_FILE_OUTPUT_DEPTH:
914          if (info.io.fragDepth < 0xff)
915             break;
916          idx = info.io.fragDepth = info.numOutputs++;
917          info.out[idx].sn = TGSI_SEMANTIC_POSITION;
918          break;
919       case SM4_FILE_OUTPUT_COVERAGE_MASK:
920          if (info.io.sampleMask < 0xff)
921             break;
922          idx = info.io.sampleMask = info.numOutputs++;
923          info.out[idx].sn = NV50_SEMANTIC_SAMPLEMASK;
924          break;
925       default:
926          break;
927       }
928       break;
929    case SM4_OPCODE_DCL_OUTPUT:
930       // handled in parseSignature
931       break;
932    case SM4_OPCODE_DCL_TEMPS:
933       nrRegVals += dcl.num;
934       break;
935    case SM4_OPCODE_DCL_INDEXABLE_TEMP:
936       nrArrays++;
937       break;
938    case SM4_OPCODE_DCL_GLOBAL_FLAGS:
939       if (prog->getType() == Program::TYPE_FRAGMENT)
940          info.prop.fp.earlyFragTests = dcl.dcl_global_flags.early_depth_stencil;
941       break;
942 
943    case SM4_OPCODE_DCL_FUNCTION_BODY:
944       break;
945    case SM4_OPCODE_DCL_FUNCTION_TABLE:
946       break;
947    case SM4_OPCODE_DCL_INTERFACE:
948       break;
949 
950       // GP
951    case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
952       info.prop.gp.outputPrim = g3dPrim(
953          dcl.dcl_gs_output_primitive_topology.primitive_topology);
954       break;
955    case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE:
956       info.prop.gp.inputPrim = g3dPrim(dcl.dcl_gs_input_primitive.primitive);
957       break;
958    case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
959       info.prop.gp.maxVertices = dcl.num;
960       break;
961    case SM4_OPCODE_DCL_GS_INSTANCE_COUNT:
962       info.prop.gp.instanceCount = dcl.num;
963       break;
964    case SM4_OPCODE_DCL_STREAM:
965       break;
966 
967       // TCP/TEP
968    case SM4_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
969       info.prop.tp.inputPatchSize =
970          dcl.dcl_input_control_point_count.control_points;
971       break;
972    case SM4_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
973       info.prop.tp.outputPatchSize =
974          dcl.dcl_output_control_point_count.control_points;
975       break;
976    case SM4_OPCODE_DCL_TESS_DOMAIN:
977       switch (dcl.dcl_tess_domain.domain) {
978       case D3D_TESSELLATOR_DOMAIN_ISOLINE:
979          info.prop.tp.domain = PIPE_PRIM_LINES;
980          break;
981       case D3D_TESSELLATOR_DOMAIN_TRI:
982          info.prop.tp.domain = PIPE_PRIM_TRIANGLES;
983          break;
984       case D3D_TESSELLATOR_DOMAIN_QUAD:
985          info.prop.tp.domain = PIPE_PRIM_QUADS;
986          break;
987       case D3D_TESSELLATOR_DOMAIN_UNDEFINED:
988       default:
989          info.prop.tp.domain = PIPE_PRIM_MAX;
990          break;
991       }
992       break;
993    case SM4_OPCODE_DCL_TESS_PARTITIONING:
994       switch (dcl.dcl_tess_partitioning.partitioning) {
995       case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
996          info.prop.tp.partitioning = NV50_TESS_PART_FRACT_ODD;
997          break;
998       case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
999          info.prop.tp.partitioning = NV50_TESS_PART_FRACT_EVEN;
1000          break;
1001       case D3D_TESSELLATOR_PARTITIONING_POW2:
1002          info.prop.tp.partitioning = NV50_TESS_PART_POW2;
1003          break;
1004       case D3D_TESSELLATOR_PARTITIONING_INTEGER:
1005       case D3D_TESSELLATOR_PARTITIONING_UNDEFINED:
1006       default:
1007          info.prop.tp.partitioning = NV50_TESS_PART_INTEGER;
1008          break;
1009       }
1010       break;
1011    case SM4_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
1012       switch (dcl.dcl_tess_output_primitive.primitive) {
1013       case D3D_TESSELLATOR_OUTPUT_LINE:
1014          info.prop.tp.outputPrim = PIPE_PRIM_LINES;
1015          break;
1016       case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW:
1017          info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES;
1018          info.prop.tp.winding = +1;
1019          break;
1020       case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW:
1021          info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES;
1022          info.prop.tp.winding = -1;
1023          break;
1024       case D3D_TESSELLATOR_OUTPUT_POINT:
1025          info.prop.tp.outputPrim = PIPE_PRIM_POINTS;
1026          break;
1027       case D3D_TESSELLATOR_OUTPUT_UNDEFINED:
1028       default:
1029          info.prop.tp.outputPrim = PIPE_PRIM_MAX;
1030          break;
1031       }
1032       break;
1033 
1034    case SM4_OPCODE_HS_FORK_PHASE:
1035       ++subPhaseCnt[0];
1036       phase = 1;
1037       break;
1038    case SM4_OPCODE_HS_JOIN_PHASE:
1039       phase = 2;
1040       ++subPhaseCnt[1];
1041       break;
1042    case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
1043    case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
1044    case SM4_OPCODE_DCL_HS_MAX_TESSFACTOR:
1045       break;
1046 
1047       // weird stuff
1048    case SM4_OPCODE_DCL_THREAD_GROUP:
1049    case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
1050    case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
1051    case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
1052    case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
1053    case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
1054    case SM4_OPCODE_DCL_RESOURCE_RAW:
1055    case SM4_OPCODE_DCL_RESOURCE_STRUCTURED:
1056       ERROR("unhandled declaration\n");
1057       abort();
1058       return false;
1059 
1060    default:
1061       assert(!"invalid SM4 declaration");
1062       return false;
1063    }
1064    return true;
1065 }
1066 
1067 void
allocateValues()1068 Converter::allocateValues()
1069 {
1070    lData = new DataArray[nrArrays];
1071 
1072    for (unsigned int i = 0; i < nrArrays; ++i)
1073       lData[i].setParent(this);
1074 
1075    tData32.setup(0, nrRegVals, 4, 4, FILE_GPR);
1076    tData64.setup(0, nrRegVals, 2, 8, FILE_GPR);
1077 
1078    if (prog->getType() == Program::TYPE_FRAGMENT)
1079       oData.setup(0, info.numOutputs, 4, 4, FILE_GPR);
1080 }
1081 
handleDeclaration(const sm4_dcl & dcl)1082 bool Converter::handleDeclaration(const sm4_dcl& dcl)
1083 {
1084    switch (dcl.opcode) {
1085    case SM4_OPCODE_DCL_INDEXABLE_TEMP:
1086       lData[nrArrays++].setup(arrayVol,
1087                               dcl.indexable_temp.num, dcl.indexable_temp.comps,
1088                               4, FILE_MEMORY_LOCAL);
1089       arrayVol += dcl.indexable_temp.num * dcl.indexable_temp.comps * 4;
1090       break;
1091    case SM4_OPCODE_HS_FORK_PHASE:
1092       if (subPhaseCnt[0])
1093          phaseInstCnt[0][subPhaseCnt[0]] = phaseInstCnt[0][subPhaseCnt[0] - 1];
1094       ++subPhaseCnt[0];
1095       break;
1096    case SM4_OPCODE_HS_JOIN_PHASE:
1097       if (subPhaseCnt[1])
1098          phaseInstCnt[1][subPhaseCnt[1]] = phaseInstCnt[1][subPhaseCnt[1] - 1];
1099       ++subPhaseCnt[1];
1100       break;
1101    case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
1102       phaseInstCnt[0][subPhaseCnt[0] - 1] = dcl.num;
1103       break;
1104    case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
1105       phaseInstCnt[1][subPhaseCnt[1] - 1] = dcl.num;
1106       break;
1107 
1108    default:
1109       break; // already handled in inspection
1110    }
1111 
1112    return true;
1113 }
1114 
1115 Symbol *
iSym(int i,int c)1116 Converter::iSym(int i, int c)
1117 {
1118    if (info.in[i].regular) {
1119       return mkSymbol(FILE_SHADER_INPUT, 0, sTy, info.in[i].slot[c] * 4);
1120    } else {
1121       return mkSysVal(tgsi::irSemantic(info.in[i].sn), info.in[i].si);
1122    }
1123 }
1124 
1125 Symbol *
oSym(int i,int c)1126 Converter::oSym(int i, int c)
1127 {
1128    if (info.out[i].regular) {
1129       return mkSymbol(FILE_SHADER_OUTPUT, 0, dTy, info.out[i].slot[c] * 4);
1130    } else {
1131       return mkSysVal(tgsi::irSemantic(info.out[i].sn), info.out[i].si);
1132    }
1133 }
1134 
1135 Value *
getSrcPtr(int s,int dim,int shl)1136 Converter::getSrcPtr(int s, int dim, int shl)
1137 {
1138    if (srcPtr[s][dim])
1139       return srcPtr[s][dim];
1140 
1141    sm4_op *op = insn->ops[s + nDstOpnds]->indices[dim].reg.get();
1142 
1143    if (!op)
1144       return NULL;
1145 
1146    Value *index = src(*op, 0, s);
1147 
1148    srcPtr[s][dim] = index;
1149    if (shl)
1150       srcPtr[s][dim] = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl));
1151    return srcPtr[s][dim];
1152 }
1153 
1154 Value *
getDstPtr(int d,int dim,int shl)1155 Converter::getDstPtr(int d, int dim, int shl)
1156 {
1157    assert(d == 0);
1158    if (dstPtr[dim])
1159       return dstPtr[dim];
1160 
1161    sm4_op *op = insn->ops[d]->indices[dim].reg.get();
1162    if (!op)
1163       return NULL;
1164 
1165    Value *index = src(*op, 0, d);
1166    if (shl)
1167       index = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl));
1168 
1169    return (dstPtr[dim] = index);
1170 }
1171 
1172 Value *
getVtxPtr(int s)1173 Converter::getVtxPtr(int s)
1174 {
1175    assert(s < 3);
1176    if (vtxBase[s])
1177       return vtxBase[s];
1178 
1179    sm4_op *op = insn->ops[s + nDstOpnds].get();
1180    if (!op)
1181       return NULL;
1182    int idx = op->indices[0].disp;
1183 
1184    vtxBase[s] = getSrcPtr(s, 0, 0);
1185    vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(idx), vtxBase[s]);
1186    return vtxBase[s];
1187 }
1188 
1189 Value *
src(int i,int c)1190 Converter::src(int i, int c)
1191 {
1192    return src(*insn->ops[i + nDstOpnds], c, i);
1193 }
1194 
1195 Value *
dst(int i,int c)1196 Converter::dst(int i, int c)
1197 {
1198    return dst(*insn->ops[i], c, i);
1199 }
1200 
1201 void
saveDst(int i,int c,Value * value)1202 Converter::saveDst(int i, int c, Value *value)
1203 {
1204    if (insn->insn.sat)
1205       mkOp1(OP_SAT, dTy, value, value);
1206    return saveDst(*insn->ops[i], c, value, i);
1207 }
1208 
1209 Value *
interpolate(const sm4_op & op,int c,int i)1210 Converter::interpolate(const sm4_op& op, int c, int i)
1211 {
1212    int idx = op.indices[0].disp;
1213    int swz = op.swizzle[c];
1214    operation opr =
1215       (info.in[idx].linear || info.in[idx].flat) ? OP_LINTERP : OP_PINTERP;
1216 
1217    Value *ptr = getSrcPtr(i, 0, 4);
1218 
1219    Instruction *insn = new_Instruction(func, opr, TYPE_F32);
1220 
1221    insn->setDef(0, getScratch());
1222    insn->setSrc(0, iSym(idx, swz));
1223    if (opr == OP_PINTERP)
1224       insn->setSrc(1, fragCoord[3]);
1225    if (ptr)
1226       insn->setIndirect(0, 0, ptr);
1227 
1228    insn->setInterpolate(interpMode[idx]);
1229 
1230    bb->insertTail(insn);
1231    return insn->getDef(0);
1232 }
1233 
1234 Value *
src(const sm4_op & op,int c,int s)1235 Converter::src(const sm4_op& op, int c, int s)
1236 {
1237    const int size = typeSizeof(sTy);
1238 
1239    Instruction *ld;
1240    Value *res, *ptr, *vtx;
1241    int idx, dim, off;
1242    const int swz = op.swizzle[c];
1243 
1244    switch (op.file) {
1245    case SM4_FILE_IMMEDIATE32:
1246       res = loadImm(NULL, (uint32_t)op.imm_values[swz].u32);
1247       break;
1248    case SM4_FILE_IMMEDIATE64:
1249       assert(c < 2);
1250       res = loadImm(NULL, op.imm_values[swz].u64);
1251       break;
1252    case SM4_FILE_TEMP:
1253       assert(op.is_index_simple(0));
1254       idx = op.indices[0].disp;
1255       if (size == 8)
1256          res = tData64.load(idx, swz, NULL);
1257       else
1258          res = tData32.load(idx, swz, NULL);
1259       break;
1260    case SM4_FILE_INPUT:
1261    case SM4_FILE_INPUT_CONTROL_POINT:
1262    case SM4_FILE_INPUT_PATCH_CONSTANT:
1263       if (prog->getType() == Program::TYPE_FRAGMENT)
1264          return interpolate(op, c, s);
1265 
1266       idx = 0;
1267       if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT)
1268          idx = info.numInputs - info.numPatchConstants;
1269 
1270       if (op.num_indices == 2) {
1271          vtx = getVtxPtr(s);
1272          ptr = getSrcPtr(s, 1, 4);
1273          idx += op.indices[1].disp;
1274          res = getSSA();
1275          ld = mkOp1(OP_VFETCH, TYPE_U32, res, iSym(idx, swz));
1276          ld->setIndirect(0, 0, ptr);
1277          ld->setIndirect(0, 1, vtx);
1278       } else {
1279          idx += op.indices[0].disp;
1280          res = mkLoad(sTy, iSym(idx, swz), getSrcPtr(s, 0, 4));
1281       }
1282       if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT)
1283          res->defs->getInsn()->perPatch = 1;
1284       break;
1285    case SM4_FILE_CONSTANT_BUFFER:
1286       assert(op.num_indices == 2);
1287       assert(op.is_index_simple(0));
1288 
1289       ptr = getSrcPtr(s, 1, 4);
1290       dim = op.indices[0].disp;
1291       off = (op.indices[1].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4);
1292 
1293       res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, dim, sTy, off), ptr);
1294       break;
1295    case SM4_FILE_IMMEDIATE_CONSTANT_BUFFER:
1296       ptr = getSrcPtr(s, 0, 4);
1297       off = (op.indices[0].disp * 4 + swz) * 4;
1298       res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, 14, sTy, off), ptr);
1299       break;
1300    case SM4_FILE_INDEXABLE_TEMP:
1301    {
1302       assert(op.is_index_simple(0));
1303       int a = op.indices[0].disp;
1304       idx = op.indices[1].disp;
1305       res = lData[a].load(idx, swz, getSrcPtr(s, 1, 4));
1306    }
1307       break;
1308    case SM4_FILE_INPUT_PRIMITIVEID:
1309       recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true);
1310       res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
1311       break;
1312    case SM4_FILE_INPUT_GS_INSTANCE_ID:
1313    case SM4_FILE_OUTPUT_CONTROL_POINT_ID:
1314       recordSV(NV50_SEMANTIC_INVOCATIONID, 0, 1, true);
1315       res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0));
1316       break;
1317    case SM4_FILE_CYCLE_COUNTER:
1318       res =
1319          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CLOCK, swz ? 1 : 0));
1320       break;
1321    case SM4_FILE_INPUT_FORK_INSTANCE_ID:
1322    case SM4_FILE_INPUT_JOIN_INSTANCE_ID:
1323    {
1324       phaseInstanceUsed = true;
1325       if (unrollPhase)
1326          return loadImm(NULL, phaseInstance);
1327       const unsigned int cnt = phaseInstCnt[phase - 1][subPhase];
1328       res = getScratch();
1329       res = mkOp1v(OP_RDSV, TYPE_U32, res, mkSysVal(SV_INVOCATION_ID, 0));
1330       res = mkOp2v(OP_MIN, TYPE_U32, res, res, loadImm(NULL, cnt - 1));
1331    }
1332       break;
1333    case SM4_FILE_INPUT_DOMAIN_POINT:
1334       assert(swz < 3);
1335       res = domainPt[swz];
1336       break;
1337    case SM4_FILE_THREAD_GROUP_SHARED_MEMORY:
1338       off = (op.indices[0].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4);
1339       ptr = getSrcPtr(s, 0, 4);
1340       res = mkLoad(sTy, mkSymbol(FILE_MEMORY_SHARED, 0, sTy, off), ptr);
1341       break;
1342    case SM4_FILE_RESOURCE:
1343    case SM4_FILE_SAMPLER:
1344    case SM4_FILE_UNORDERED_ACCESS_VIEW:
1345       return NULL;
1346    case SM4_FILE_INPUT_THREAD_ID:
1347       res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_TID, swz));
1348       break;
1349    case SM4_FILE_INPUT_THREAD_GROUP_ID:
1350       res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CTAID, swz));
1351       break;
1352    case SM4_FILE_FUNCTION_INPUT:
1353    case SM4_FILE_INPUT_THREAD_ID_IN_GROUP:
1354       assert(!"unhandled source file");
1355       return NULL;
1356    default:
1357       assert(!"invalid source file");
1358       return NULL;
1359    }
1360 
1361    if (op.abs)
1362       res = mkOp1v(OP_ABS, sTy, getSSA(res->reg.size), res);
1363    if (op.neg)
1364       res = mkOp1v(OP_NEG, sTy, getSSA(res->reg.size), res);
1365    return res;
1366 }
1367 
1368 Value *
dst(const sm4_op & op,int c,int i)1369 Converter::dst(const sm4_op &op, int c, int i)
1370 {
1371    switch (op.file) {
1372    case SM4_FILE_TEMP:
1373       return tData32.acquire(op.indices[0].disp, c);
1374    case SM4_FILE_INDEXABLE_TEMP:
1375       return getScratch();
1376    case SM4_FILE_OUTPUT:
1377       if (prog->getType() == Program::TYPE_FRAGMENT)
1378          return oData.acquire(op.indices[0].disp, c);
1379       return getScratch();
1380    case SM4_FILE_NULL:
1381       return NULL;
1382    case SM4_FILE_OUTPUT_DEPTH:
1383    case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
1384    case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
1385    case SM4_FILE_OUTPUT_COVERAGE_MASK:
1386       return getScratch();
1387    case SM4_FILE_IMMEDIATE32:
1388    case SM4_FILE_IMMEDIATE64:
1389    case SM4_FILE_CONSTANT_BUFFER:
1390    case SM4_FILE_RESOURCE:
1391    case SM4_FILE_SAMPLER:
1392    case SM4_FILE_UNORDERED_ACCESS_VIEW:
1393       assert(!"invalid destination file");
1394       return NULL;
1395    default:
1396       assert(!"invalid file");
1397       return NULL;
1398    }
1399 }
1400 
1401 void
saveFragDepth(operation op,Value * value)1402 Converter::saveFragDepth(operation op, Value *value)
1403 {
1404    if (op == OP_MIN || op == OP_MAX) {
1405       Value *zIn;
1406       zIn = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 2));
1407       value = mkOp2v(op, TYPE_F32, getSSA(), value, zIn);
1408    }
1409    oData.store(info.io.fragDepth, 2, NULL, value);
1410 }
1411 
1412 void
saveDst(const sm4_op & op,int c,Value * value,int s)1413 Converter::saveDst(const sm4_op &op, int c, Value *value, int s)
1414 {
1415    Symbol *sym;
1416    Instruction *st;
1417    int a, idx;
1418 
1419    switch (op.file) {
1420    case SM4_FILE_TEMP:
1421       idx = op.indices[0].disp;
1422       tData32.store(idx, c, NULL, value);
1423       break;
1424    case SM4_FILE_INDEXABLE_TEMP:
1425       a = op.indices[0].disp;
1426       idx = op.indices[1].disp;
1427       // FIXME: shift is wrong, depends in lData
1428       lData[a].store(idx, c, getDstPtr(s, 1, 4), value);
1429       break;
1430    case SM4_FILE_OUTPUT:
1431       assert(op.num_indices == 1);
1432       idx = op.indices[0].disp;
1433       if (prog->getType() == Program::TYPE_FRAGMENT) {
1434          oData.store(idx, c, NULL, value);
1435       } else {
1436          if (phase)
1437             idx += info.numOutputs - info.numPatchConstants;
1438          const int shl = (info.out[idx].sn == NV50_SEMANTIC_TESSFACTOR) ? 2 : 4;
1439          sym = oSym(idx, c);
1440          if (sym->reg.file == FILE_SHADER_OUTPUT)
1441             st = mkStore(OP_EXPORT, dTy, sym, getDstPtr(s, 0, shl), value);
1442          else
1443             st = mkStore(OP_WRSV, dTy, sym, getDstPtr(s, 0, 2), value);
1444          st->perPatch = phase ? 1 : 0;
1445       }
1446       break;
1447    case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
1448       saveFragDepth(OP_MAX, value);
1449       break;
1450    case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
1451       saveFragDepth(OP_MIN, value);
1452       break;
1453    case SM4_FILE_OUTPUT_DEPTH:
1454       saveFragDepth(OP_NOP, value);
1455       break;
1456    case SM4_FILE_OUTPUT_COVERAGE_MASK:
1457       oData.store(info.io.sampleMask, 0, NULL, value);
1458       break;
1459    case SM4_FILE_IMMEDIATE32:
1460    case SM4_FILE_IMMEDIATE64:
1461    case SM4_FILE_INPUT:
1462    case SM4_FILE_CONSTANT_BUFFER:
1463    case SM4_FILE_RESOURCE:
1464    case SM4_FILE_SAMPLER:
1465       assert(!"invalid destination file");
1466       return;
1467    default:
1468       assert(!"invalid file");
1469       return;
1470    }
1471 }
1472 
1473 void
emitTex(Value * dst0[4],TexInstruction * tex,const uint8_t swz[4])1474 Converter::emitTex(Value *dst0[4], TexInstruction *tex, const uint8_t swz[4])
1475 {
1476    Value *res[4] = { NULL, NULL, NULL, NULL };
1477    unsigned int c, d;
1478 
1479    for (c = 0; c < 4; ++c)
1480       if (dst0[c])
1481          tex->tex.mask |= 1 << swz[c];
1482    for (d = 0, c = 0; c < 4; ++c)
1483       if (tex->tex.mask & (1 << c))
1484          tex->setDef(d++, (res[c] = getScratch()));
1485 
1486    bb->insertTail(tex);
1487 
1488    if (insn->opcode == SM4_OPCODE_RESINFO) {
1489       if (tex->tex.target.getDim() == 1) {
1490 	 res[2] = loadImm(NULL, 0);
1491          if (!tex->tex.target.isArray())
1492             res[1] = res[2];
1493       } else
1494       if (tex->tex.target.getDim() == 2 && !tex->tex.target.isArray()) {
1495          res[2] = loadImm(NULL, 0);
1496       }
1497       for (c = 0; c < 4; ++c) {
1498          if (!dst0[c])
1499             continue;
1500          Value *src = res[swz[c]];
1501          assert(src);
1502          switch (insn->insn.resinfo_return_type) {
1503          case 0:
1504             mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src);
1505             break;
1506          case 1:
1507             mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src);
1508             if (swz[c] < tex->tex.target.getDim())
1509                mkOp1(OP_RCP, TYPE_F32, dst0[c], dst0[c]);
1510             break;
1511          default:
1512             mkMov(dst0[c], src);
1513             break;
1514          }
1515       }
1516    } else {
1517       for (c = 0; c < 4; ++c)
1518          if (dst0[c])
1519             mkMov(dst0[c], res[swz[c]]);
1520    }
1521 }
1522 
1523 void
handleQUERY(Value * dst0[4],enum TexQuery query)1524 Converter::handleQUERY(Value *dst0[4], enum TexQuery query)
1525 {
1526    TexInstruction *texi = new_TexInstruction(func, OP_TXQ);
1527    texi->tex.query = query;
1528 
1529    assert(insn->ops[2]->file == SM4_FILE_RESOURCE); // TODO: UAVs
1530 
1531    const int rOp = (query == TXQ_DIMS) ? 2 : 1;
1532    const int sOp = (query == TXQ_DIMS) ? 0 : 1;
1533 
1534    const int tR = insn->ops[rOp]->indices[0].disp;
1535 
1536    texi->setTexture(resourceType[tR][0], tR, 0);
1537 
1538    texi->setSrc(0, src(sOp, 0)); // mip level or sample index
1539 
1540    emitTex(dst0, texi, insn->ops[rOp]->swizzle);
1541 }
1542 
1543 void
handleLOAD(Value * dst0[4])1544 Converter::handleLOAD(Value *dst0[4])
1545 {
1546    TexInstruction *texi = new_TexInstruction(func, OP_TXF);
1547    unsigned int c;
1548 
1549    const int tR = insn->ops[2]->indices[0].disp;
1550 
1551    texi->setTexture(resourceType[tR][0], tR, 0);
1552 
1553    for (c = 0; c < texi->tex.target.getArgCount(); ++c)
1554       texi->setSrc(c, src(0, c));
1555 
1556    if (texi->tex.target == TEX_TARGET_BUFFER) {
1557       texi->tex.levelZero = true;
1558    } else {
1559       texi->setSrc(c++, src(0, 3));
1560       for (c = 0; c < 3; ++c) {
1561          texi->tex.offset[0][c] = insn->sample_offset[c];
1562 	 if (texi->tex.offset[0][c])
1563             texi->tex.useOffsets = 1;
1564       }
1565    }
1566 
1567    emitTex(dst0, texi, insn->ops[2]->swizzle);
1568 }
1569 
1570 // order of nv50 ir sources: x y z/layer lod/bias dc
1571 void
handleSAMPLE(operation opr,Value * dst0[4])1572 Converter::handleSAMPLE(operation opr, Value *dst0[4])
1573 {
1574    TexInstruction *texi = new_TexInstruction(func, opr);
1575    unsigned int c, s;
1576    Value *arg[4], *src0[4];
1577    Value *val;
1578    Value *lod = NULL, *dc = NULL;
1579 
1580    const int tR = insn->ops[2]->indices[0].disp;
1581    const int tS = insn->ops[3]->indices[0].disp;
1582 
1583    TexInstruction::Target tgt = resourceType[tR][shadow[tS] ? 1 : 0];
1584 
1585    for (c = 0; c < tgt.getArgCount(); ++c)
1586       arg[c] = src0[c] = src(0, c);
1587 
1588    if (insn->opcode == SM4_OPCODE_SAMPLE_L ||
1589        insn->opcode == SM4_OPCODE_SAMPLE_B) {
1590       lod = src(3, 0);
1591    } else
1592    if (insn->opcode == SM4_OPCODE_SAMPLE_C ||
1593        insn->opcode == SM4_OPCODE_SAMPLE_C_LZ) {
1594       dc = src(3, 0);
1595       if (insn->opcode == SM4_OPCODE_SAMPLE_C_LZ)
1596          texi->tex.levelZero = true;
1597    } else
1598    if (insn->opcode == SM4_OPCODE_SAMPLE_D) {
1599       for (c = 0; c < tgt.getDim(); ++c) {
1600          texi->dPdx[c] = src(3, c);
1601          texi->dPdy[c] = src(4, c);
1602       }
1603    }
1604 
1605    if (tgt.isCube()) {
1606       for (c = 0; c < 3; ++c)
1607          src0[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
1608       val = getScratch();
1609       mkOp2(OP_MAX, TYPE_F32, val, src0[0], src0[1]);
1610       mkOp2(OP_MAX, TYPE_F32, val, src0[2], val);
1611       mkOp1(OP_RCP, TYPE_F32, val, val);
1612       for (c = 0; c < 3; ++c)
1613          src0[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
1614    }
1615 
1616    for (s = 0; s < tgt.getArgCount(); ++s)
1617       texi->setSrc(s, src0[s]);
1618    if (lod)
1619       texi->setSrc(s++, lod);
1620    if (dc)
1621       texi->setSrc(s++, dc);
1622 
1623    for (c = 0; c < 3; ++c) {
1624       texi->tex.offset[0][c] = insn->sample_offset[c];
1625       if (texi->tex.offset[0][c])
1626          texi->tex.useOffsets = 1;
1627    }
1628 
1629    texi->setTexture(tgt, tR, tS);
1630 
1631    emitTex(dst0, texi, insn->ops[2]->swizzle);
1632 }
1633 
1634 void
handleDP(Value * dst0[4],int dim)1635 Converter::handleDP(Value *dst0[4], int dim)
1636 {
1637    Value *src0 = src(0, 0), *src1 = src(1, 0);
1638    Value *dotp = getScratch();
1639 
1640    assert(dim > 0);
1641 
1642    mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
1643    for (int c = 1; c < dim; ++c)
1644       mkOp3(OP_MAD, TYPE_F32, dotp, src(0, c), src(1, c), dotp);
1645 
1646    for (int c = 0; c < 4; ++c)
1647       dst0[c] = dotp;
1648 }
1649 
1650 void
insertConvergenceOps(BasicBlock * conv,BasicBlock * fork)1651 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
1652 {
1653    FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
1654    join->fixed = 1;
1655    conv->insertHead(join);
1656 
1657    fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
1658    fork->insertBefore(fork->getExit(), fork->joinAt);
1659 }
1660 
1661 void
finalizeShader()1662 Converter::finalizeShader()
1663 {
1664    if (finalized)
1665       return;
1666    BasicBlock *epilogue = reinterpret_cast<BasicBlock *>(leaveBBs.pop().u.p);
1667    entryBBs.pop();
1668 
1669    finalized = true;
1670 
1671    bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
1672    setPosition(epilogue, true);
1673 
1674    if (prog->getType() == Program::TYPE_FRAGMENT)
1675       exportOutputs();
1676 
1677    mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1678 }
1679 
1680 #define FOR_EACH_DST0_ENABLED_CHANNEL32(chan)         \
1681    for ((chan) = 0; (chan) < 4; ++(chan))             \
1682       if (insn->ops[0].get()->mask & (1 << (chan)))
1683 
1684 #define FOR_EACH_DST0_ENABLED_CHANNEL64(chan)         \
1685    for ((chan) = 0; (chan) < 2; ++(chan))             \
1686       if (insn->ops[0].get()->mask & (1 << (chan)))
1687 
1688 bool
checkDstSrcAliasing() const1689 Converter::checkDstSrcAliasing() const
1690 {
1691    for (unsigned int d = 0; d < nDstOpnds; ++d) {
1692       for (unsigned int s = nDstOpnds; s < insn->num_ops; ++s) {
1693          if (insn->ops[d]->file != insn->ops[s]->file)
1694             continue;
1695          int i = insn->ops[s]->num_indices - 1;
1696          if (i != insn->ops[d]->num_indices - 1)
1697             continue;
1698          if (insn->ops[d]->is_index_simple(i) &&
1699              insn->ops[s]->is_index_simple(i) &&
1700              insn->ops[d]->indices[i].disp == insn->ops[s]->indices[i].disp)
1701             return true;
1702       }
1703    }
1704    return false;
1705 }
1706 
1707 bool
handleInstruction(unsigned int pos)1708 Converter::handleInstruction(unsigned int pos)
1709 {
1710    Value *dst0[4], *rDst0[4];
1711    Value *dst1[4], *rDst1[4];
1712    int c, nc;
1713 
1714    insn = sm4.insns[pos];
1715    enum sm4_opcode opcode = static_cast<sm4_opcode>(insn->opcode);
1716 
1717    operation op = cvtOpcode(opcode);
1718 
1719    sTy = inferSrcType(opcode);
1720    dTy = inferDstType(opcode);
1721 
1722    nc = dTy == TYPE_F64 ? 2 : 4;
1723 
1724    nDstOpnds = getDstOpndCount(opcode);
1725 
1726    bool useScratchDst = checkDstSrcAliasing();
1727 
1728    INFO("SM4_OPCODE_##%u, aliasing = %u\n", insn->opcode, useScratchDst);
1729 
1730    if (nDstOpnds >= 1) {
1731       for (c = 0; c < nc; ++c)
1732          rDst0[c] = dst0[c] =
1733             insn->ops[0].get()->mask & (1 << c) ? dst(0, c) : NULL;
1734       if (useScratchDst)
1735          for (c = 0; c < nc; ++c)
1736             dst0[c] = rDst0[c] ? getScratch() : NULL;
1737    }
1738 
1739    if (nDstOpnds >= 2) {
1740       for (c = 0; c < nc; ++c)
1741          rDst1[c] = dst1[c] =
1742             insn->ops[1].get()->mask & (1 << c) ? dst(1, c) : NULL;
1743       if (useScratchDst)
1744          for (c = 0; c < nc; ++c)
1745             dst1[c] = rDst1[c] ? getScratch() : NULL;
1746    }
1747 
1748    switch (insn->opcode) {
1749    case SM4_OPCODE_ADD:
1750    case SM4_OPCODE_AND:
1751    case SM4_OPCODE_DIV:
1752    case SM4_OPCODE_IADD:
1753    case SM4_OPCODE_IMAX:
1754    case SM4_OPCODE_IMIN:
1755    case SM4_OPCODE_MIN:
1756    case SM4_OPCODE_MAX:
1757    case SM4_OPCODE_MUL:
1758    case SM4_OPCODE_OR:
1759    case SM4_OPCODE_UMAX:
1760    case SM4_OPCODE_UMIN:
1761    case SM4_OPCODE_XOR:
1762       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1763          Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
1764          if (dTy == TYPE_F32)
1765             insn->ftz = 1;
1766       }
1767       break;
1768 
1769    case SM4_OPCODE_ISHL:
1770    case SM4_OPCODE_ISHR:
1771    case SM4_OPCODE_USHR:
1772       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1773          Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
1774          insn->subOp = NV50_IR_SUBOP_SHIFT_WRAP;
1775       }
1776       break;
1777 
1778    case SM4_OPCODE_IMAD:
1779    case SM4_OPCODE_MAD:
1780    case SM4_OPCODE_UMAD:
1781       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1782          mkOp3(OP_MAD, dTy, dst0[c], src(0, c), src(1, c), src(2, c));
1783       }
1784       break;
1785 
1786    case SM4_OPCODE_DADD:
1787    case SM4_OPCODE_DMAX:
1788    case SM4_OPCODE_DMIN:
1789    case SM4_OPCODE_DMUL:
1790       FOR_EACH_DST0_ENABLED_CHANNEL64(c) {
1791          mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
1792       }
1793       break;
1794 
1795    case SM4_OPCODE_UDIV:
1796       for (c = 0; c < 4; ++c) {
1797          Value *dvn, *dvs;
1798          if (dst0[c] || dst1[c]) {
1799             dvn = src(0, c);
1800             dvs = src(1, c);
1801          }
1802          if (dst0[c])
1803             mkOp2(OP_DIV, TYPE_U32, dst0[c], dvn, dvs);
1804          if (dst1[c])
1805             mkOp2(OP_MOD, TYPE_U32, dst1[c], dvn, dvs);
1806       }
1807       break;
1808 
1809    case SM4_OPCODE_IMUL:
1810    case SM4_OPCODE_UMUL:
1811       for (c = 0; c < 4; ++c) {
1812          Value *a, *b;
1813          if (dst0[c] || dst1[c]) {
1814             a = src(0, c);
1815             b = src(1, c);
1816          }
1817          if (dst0[c])
1818             mkOp2(OP_MUL, dTy, dst0[c], a, b)->subOp =
1819                NV50_IR_SUBOP_MUL_HIGH;
1820          if (dst1[c])
1821             mkOp2(OP_MUL, dTy, dst1[c], a, b);
1822       }
1823       break;
1824 
1825    case SM4_OPCODE_DP2:
1826       handleDP(dst0, 2);
1827       break;
1828    case SM4_OPCODE_DP3:
1829       handleDP(dst0, 3);
1830       break;
1831    case SM4_OPCODE_DP4:
1832       handleDP(dst0, 4);
1833       break;
1834 
1835    case SM4_OPCODE_DERIV_RTX:
1836    case SM4_OPCODE_DERIV_RTX_COARSE:
1837    case SM4_OPCODE_DERIV_RTX_FINE:
1838    case SM4_OPCODE_DERIV_RTY:
1839    case SM4_OPCODE_DERIV_RTY_COARSE:
1840    case SM4_OPCODE_DERIV_RTY_FINE:
1841    case SM4_OPCODE_MOV:
1842    case SM4_OPCODE_INEG:
1843    case SM4_OPCODE_NOT:
1844    case SM4_OPCODE_SQRT:
1845    case SM4_OPCODE_COUNTBITS:
1846    case SM4_OPCODE_EXP:
1847    case SM4_OPCODE_LOG:
1848    case SM4_OPCODE_RCP:
1849       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1850          mkOp1(op, dTy, dst0[c], src(0, c));
1851       }
1852       break;
1853 
1854    case SM4_OPCODE_FRC:
1855       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1856          Value *val = getScratch();
1857          Value *src0 = src(0, c);
1858          mkOp1(OP_FLOOR, TYPE_F32, val, src0);
1859          mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val);
1860       }
1861       break;
1862 
1863    case SM4_OPCODE_MOVC:
1864       FOR_EACH_DST0_ENABLED_CHANNEL32(c)
1865          mkCmp(OP_SLCT, CC_NE, TYPE_U32, dst0[c], src(1, c), src(2, c),
1866                src(0, c));
1867       break;
1868 
1869    case SM4_OPCODE_ROUND_NE:
1870    case SM4_OPCODE_ROUND_NI:
1871    case SM4_OPCODE_ROUND_PI:
1872    case SM4_OPCODE_ROUND_Z:
1873       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1874          Instruction *rnd = mkOp1(op, dTy, dst0[c], src(0, c));
1875          rnd->ftz = 1;
1876          rnd->rnd = cvtRoundingMode(opcode);
1877       }
1878       break;
1879 
1880    case SM4_OPCODE_RSQ:
1881       FOR_EACH_DST0_ENABLED_CHANNEL32(c)
1882          mkOp1(op, dTy, dst0[c], src(0, c));
1883       break;
1884 
1885    case SM4_OPCODE_SINCOS:
1886       for (c = 0; c < 4; ++c) {
1887          if (!dst0[c] && !dst1[c])
1888             continue;
1889          Value *val = mkOp1v(OP_PRESIN, TYPE_F32, getScratch(), src(0, c));
1890          if (dst0[c])
1891             mkOp1(OP_SIN, TYPE_F32, dst0[c], val);
1892          if (dst1[c])
1893             mkOp1(OP_COS, TYPE_F32, dst1[c], val);
1894       }
1895       break;
1896 
1897    case SM4_OPCODE_EQ:
1898    case SM4_OPCODE_GE:
1899    case SM4_OPCODE_IEQ:
1900    case SM4_OPCODE_IGE:
1901    case SM4_OPCODE_ILT:
1902    case SM4_OPCODE_LT:
1903    case SM4_OPCODE_NE:
1904    case SM4_OPCODE_INE:
1905    case SM4_OPCODE_ULT:
1906    case SM4_OPCODE_UGE:
1907    case SM4_OPCODE_DEQ:
1908    case SM4_OPCODE_DGE:
1909    case SM4_OPCODE_DLT:
1910    case SM4_OPCODE_DNE:
1911    {
1912       CondCode cc = cvtCondCode(opcode);
1913       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1914          CmpInstruction *set;
1915          set = mkCmp(op, cc, sTy, dst0[c], src(0, c), src(1, c), NULL);
1916          set->setType(dTy, sTy);
1917          if (sTy == TYPE_F32)
1918             set->ftz = 1;
1919       }
1920    }
1921       break;
1922 
1923    case SM4_OPCODE_FTOI:
1924    case SM4_OPCODE_FTOU:
1925       FOR_EACH_DST0_ENABLED_CHANNEL32(c)
1926          mkCvt(op, dTy, dst0[c], sTy, src(0, c))->rnd = ROUND_Z;
1927       break;
1928    case SM4_OPCODE_ITOF:
1929    case SM4_OPCODE_UTOF:
1930    case SM4_OPCODE_F32TOF16:
1931    case SM4_OPCODE_F16TOF32:
1932    case SM4_OPCODE_DTOF:
1933    case SM4_OPCODE_FTOD:
1934       FOR_EACH_DST0_ENABLED_CHANNEL32(c)
1935          mkCvt(op, dTy, dst0[c], sTy, src(0, c));
1936       break;
1937 
1938    case SM4_OPCODE_CUT:
1939    case SM4_OPCODE_CUT_STREAM:
1940       mkOp1(OP_RESTART, TYPE_U32, NULL, mkImm(0))->fixed = 1;
1941       break;
1942    case SM4_OPCODE_EMIT:
1943    case SM4_OPCODE_EMIT_STREAM:
1944       mkOp1(OP_EMIT, TYPE_U32, NULL, mkImm(0))->fixed = 1;
1945       break;
1946    case SM4_OPCODE_EMITTHENCUT:
1947    case SM4_OPCODE_EMITTHENCUT_STREAM:
1948    {
1949       Instruction *cut = mkOp1(OP_EMIT, TYPE_U32, NULL,  mkImm(0));
1950       cut->fixed = 1;
1951       cut->subOp = NV50_IR_SUBOP_EMIT_RESTART;
1952    }
1953       break;
1954 
1955    case SM4_OPCODE_DISCARD:
1956       info.prop.fp.usesDiscard = TRUE;
1957       mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(
1958          insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0));
1959       break;
1960 
1961    case SM4_OPCODE_CALL:
1962    case SM4_OPCODE_CALLC:
1963       assert(!"CALL/CALLC not implemented");
1964       break;
1965 
1966    case SM4_OPCODE_RET:
1967       // XXX: the following doesn't work with subroutines / early ret
1968       if (!haveNextPhase(pos))
1969          finalizeShader();
1970       else
1971          phaseEnded = phase + 1;
1972       break;
1973 
1974    case SM4_OPCODE_IF:
1975    {
1976       BasicBlock *ifClause = new BasicBlock(func);
1977 
1978       bb->cfg.attach(&ifClause->cfg, Graph::Edge::TREE);
1979       condBBs.push(bb);
1980       joinBBs.push(bb);
1981 
1982       mkFlow(OP_BRA, NULL, insn->insn.test_nz ? CC_NOT_P : CC_P, src(0, 0));
1983 
1984       setPosition(ifClause, true);
1985    }
1986       break;
1987    case SM4_OPCODE_ELSE:
1988    {
1989       BasicBlock *elseClause = new BasicBlock(func);
1990       BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
1991 
1992       forkPoint->cfg.attach(&elseClause->cfg, Graph::Edge::TREE);
1993       condBBs.push(bb);
1994 
1995       forkPoint->getExit()->asFlow()->target.bb = elseClause;
1996       if (!bb->isTerminated())
1997          mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
1998 
1999       setPosition(elseClause, true);
2000    }
2001       break;
2002    case SM4_OPCODE_ENDIF:
2003    {
2004       BasicBlock *convPoint = new BasicBlock(func);
2005       BasicBlock *lastBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
2006       BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
2007 
2008       if (!bb->isTerminated()) {
2009          // we only want join if none of the clauses ended with CONT/BREAK/RET
2010          if (lastBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
2011             insertConvergenceOps(convPoint, forkPoint);
2012          mkFlow(OP_BRA, convPoint, CC_ALWAYS, NULL);
2013          bb->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD);
2014       }
2015 
2016       if (lastBB->getExit()->op == OP_BRA) {
2017          lastBB->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD);
2018          lastBB->getExit()->asFlow()->target.bb = convPoint;
2019       }
2020       setPosition(convPoint, true);
2021    }
2022       break;
2023 
2024    case SM4_OPCODE_SWITCH:
2025    case SM4_OPCODE_CASE:
2026    case SM4_OPCODE_ENDSWITCH:
2027       assert(!"SWITCH/CASE/ENDSWITCH not implemented");
2028       break;
2029 
2030    case SM4_OPCODE_LOOP:
2031    {
2032       BasicBlock *loopHeader = new BasicBlock(func);
2033       BasicBlock *loopBreak = new BasicBlock(func);
2034 
2035       loopBBs.push(loopHeader);
2036       breakBBs.push(loopBreak);
2037       if (loopBBs.getSize() > func->loopNestingBound)
2038          func->loopNestingBound++;
2039 
2040       mkFlow(OP_PREBREAK, loopBreak, CC_ALWAYS, NULL);
2041 
2042       bb->cfg.attach(&loopHeader->cfg, Graph::Edge::TREE);
2043       setPosition(loopHeader, true);
2044       mkFlow(OP_PRECONT, loopHeader, CC_ALWAYS, NULL);
2045    }
2046       break;
2047    case SM4_OPCODE_ENDLOOP:
2048    {
2049       BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
2050 
2051       if (!bb->isTerminated()) {
2052          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
2053          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
2054       }
2055       setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
2056    }
2057       break;
2058    case SM4_OPCODE_BREAK:
2059    {
2060       if (bb->isTerminated())
2061          break;
2062       BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
2063       mkFlow(OP_BREAK, breakBB, CC_ALWAYS, NULL);
2064       bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS);
2065    }
2066       break;
2067    case SM4_OPCODE_BREAKC:
2068    {
2069       BasicBlock *nextBB = new BasicBlock(func);
2070       BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
2071       CondCode cc = insn->insn.test_nz ? CC_P : CC_NOT_P;
2072       mkFlow(OP_BREAK, breakBB, cc, src(0, 0));
2073       bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS);
2074       bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD);
2075       setPosition(nextBB, true);
2076    }
2077       break;
2078    case SM4_OPCODE_CONTINUE:
2079    {
2080       if (bb->isTerminated())
2081          break;
2082       BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
2083       mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
2084       contBB->explicitCont = true;
2085       bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
2086    }
2087       break;
2088    case SM4_OPCODE_CONTINUEC:
2089    {
2090       BasicBlock *nextBB = new BasicBlock(func);
2091       BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
2092       mkFlow(OP_CONT, contBB, insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0));
2093       bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
2094       bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD);
2095       setPosition(nextBB, true);
2096    }
2097       break;
2098 
2099    case SM4_OPCODE_SAMPLE:
2100    case SM4_OPCODE_SAMPLE_C:
2101    case SM4_OPCODE_SAMPLE_C_LZ:
2102    case SM4_OPCODE_SAMPLE_L:
2103    case SM4_OPCODE_SAMPLE_D:
2104    case SM4_OPCODE_SAMPLE_B:
2105       handleSAMPLE(op, dst0);
2106       break;
2107    case SM4_OPCODE_LD:
2108    case SM4_OPCODE_LD_MS:
2109       handleLOAD(dst0);
2110       break;
2111 
2112    case SM4_OPCODE_GATHER4:
2113       assert(!"GATHER4 not implemented\n");
2114       break;
2115 
2116    case SM4_OPCODE_RESINFO:
2117       handleQUERY(dst0, TXQ_DIMS);
2118       break;
2119    case SM4_OPCODE_SAMPLE_POS:
2120       handleQUERY(dst0, TXQ_SAMPLE_POSITION);
2121       break;
2122 
2123    case SM4_OPCODE_NOP:
2124       mkOp(OP_NOP, TYPE_NONE, NULL);
2125       break;
2126 
2127    case SM4_OPCODE_HS_DECLS:
2128       // XXX: any significance ?
2129       break;
2130    case SM4_OPCODE_HS_CONTROL_POINT_PHASE:
2131       phase = 0;
2132       break;
2133    case SM4_OPCODE_HS_FORK_PHASE:
2134       if (phase != 1)
2135          subPhase = 0;
2136       phase = 1;
2137       phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0;
2138       phaseStart = pos;
2139       if (info.prop.tp.outputPatchSize < phaseInstCnt[0][subPhase])
2140          unrollPhase = true;
2141       break;
2142    case SM4_OPCODE_HS_JOIN_PHASE:
2143       if (phase != 2)
2144          subPhase = 0;
2145       phase = 2;
2146       phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0;
2147       phaseStart = pos;
2148       if (info.prop.tp.outputPatchSize < phaseInstCnt[1][subPhase])
2149          unrollPhase = true;
2150       break;
2151 
2152    default:
2153       ERROR("SM4_OPCODE_#%u illegal / not supported\n", insn->opcode);
2154       abort();
2155       return false;
2156    }
2157 
2158    for (c = 0; c < nc; ++c) {
2159       if (nDstOpnds >= 1 && rDst0[c]) {
2160          if (dst0[c] != rDst0[c])
2161             mkMov(rDst0[c], dst0[c]);
2162          saveDst(0, c, rDst0[c]);
2163       }
2164       if (nDstOpnds >= 2 && rDst1[c]) {
2165          if (dst1[c] != rDst1[c])
2166             mkMov(rDst1[c], dst1[c]);
2167          saveDst(1, c, rDst1[c]);
2168       }
2169    }
2170 
2171    memset(srcPtr, 0, sizeof(srcPtr));
2172    memset(dstPtr, 0, sizeof(dstPtr));
2173    memset(vtxBase, 0, sizeof(vtxBase));
2174    return true;
2175 }
2176 
2177 void
exportOutputs()2178 Converter::exportOutputs()
2179 {
2180    for (int i = 0; i < info.numOutputs; ++i) {
2181       for (int c = 0; c < 4; ++c) {
2182          if (!oData.exists(i, c))
2183             continue;
2184          Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
2185                                 info.out[i].slot[c] * 4);
2186          Value *val = oData.load(i, c, NULL);
2187          if (val)
2188             mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
2189       }
2190    }
2191 }
2192 
Converter(Program * p,struct nv50_ir_prog_info * s)2193 Converter::Converter(Program *p, struct nv50_ir_prog_info *s)
2194    : tData32(this),
2195      tData64(this),
2196      oData(this),
2197      info(*s),
2198      sm4(*reinterpret_cast<const sm4_program *>(s->bin.source)),
2199      prog(p)
2200 {
2201    memset(srcPtr, 0, sizeof(srcPtr));
2202    memset(dstPtr, 0, sizeof(dstPtr));
2203    memset(vtxBase, 0, sizeof(vtxBase));
2204 
2205    memset(interpMode, 0, sizeof(interpMode));
2206 
2207    nrRegVals = nrArrays = arrayVol = 0;
2208 
2209    for (phase = 3; phase > 0; --phase)
2210       for (unsigned int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
2211          out[phase - 1][i].sn = TGSI_SEMANTIC_COUNT;
2212 
2213    unrollPhase = false;
2214    phaseStart = 0;
2215    subPhaseCnt[0] = subPhaseCnt[1] = 0;
2216 }
2217 
~Converter()2218 Converter::~Converter()
2219 {
2220    if (lData)
2221       delete[] lData;
2222 
2223    if (subPhaseCnt[0])
2224       delete[] phaseInstCnt[0];
2225    if (subPhaseCnt[1])
2226       delete[] phaseInstCnt[1];
2227 }
2228 
2229 bool
haveNextPhase(unsigned int pos) const2230 Converter::haveNextPhase(unsigned int pos) const
2231 {
2232    ++pos;
2233    return (pos < sm4.insns.size()) &&
2234       (sm4.insns[pos]->opcode == SM4_OPCODE_HS_FORK_PHASE ||
2235        sm4.insns[pos]->opcode == SM4_OPCODE_HS_JOIN_PHASE);
2236 }
2237 
2238 bool
run()2239 Converter::run()
2240 {
2241    parseSignature();
2242 
2243    for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos)
2244       inspectDeclaration(*sm4.dcls[pos]);
2245 
2246    phaseInstCnt[0] = new unsigned int [subPhaseCnt[0]];
2247    phaseInstCnt[1] = new unsigned int [subPhaseCnt[1]];
2248    for (int i = 0; i < subPhaseCnt[0]; ++i)
2249       phaseInstCnt[0][i] = -1;
2250    for (int i = 0; i < subPhaseCnt[1]; ++i)
2251       phaseInstCnt[1][i] = -1;
2252    // re-increased in handleDeclaration:
2253    subPhaseCnt[0] = subPhaseCnt[1] = 0;
2254 
2255    allocateValues();
2256    nrArrays = 0;
2257    for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos)
2258       handleDeclaration(*sm4.dcls[pos]);
2259 
2260    info.io.genUserClip = -1; // no UCPs permitted with SM4 shaders
2261    info.io.clipDistanceMask = (1 << info.io.clipDistanceMask) - 1;
2262 
2263    info.assignSlots(&info);
2264 
2265    if (sm4.dcls.size() == 0 && sm4.insns.size() == 0)
2266       return true;
2267 
2268    BasicBlock *entry = new BasicBlock(prog->main);
2269    BasicBlock *leave = new BasicBlock(prog->main);
2270 
2271    prog->main->setEntry(entry);
2272    prog->main->setExit(leave);
2273 
2274    setPosition(entry, true);
2275 
2276    entryBBs.push(entry);
2277    leaveBBs.push(leave);
2278 
2279    if (prog->getType() == Program::TYPE_FRAGMENT) {
2280       Symbol *sv = mkSysVal(SV_POSITION, 3);
2281       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
2282       mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
2283    } else
2284    if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) {
2285       const int n = (info.prop.tp.domain == PIPE_PRIM_TRIANGLES) ? 3 : 2;
2286       int c;
2287       for (c = 0; c < n; ++c)
2288          domainPt[c] =
2289             mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_TESS_COORD, c));
2290       if (c == 2)
2291          domainPt[2] = loadImm(NULL, 0.0f);
2292    }
2293 
2294    finalized = false;
2295    phaseEnded = 0;
2296    phase = 0;
2297    subPhase = 0;
2298    for (unsigned int pos = 0; pos < sm4.insns.size(); ++pos) {
2299       handleInstruction(pos);
2300       if (likely(phase == 0) || (phaseEnded < 2))
2301          continue;
2302       phaseEnded = 0;
2303       if (!unrollPhase || !phaseInstanceUsed) {
2304          ++subPhase;
2305          continue;
2306       }
2307       phaseInstanceUsed = false;
2308       if (phaseInstance < (phaseInstCnt[phase - 1][subPhase] - 1))
2309          pos = phaseStart - 1;
2310       else
2311          ++subPhase;
2312    }
2313    finalizeShader();
2314 
2315    return true;
2316 }
2317 
2318 } // anonymous namespace
2319 
2320 namespace nv50_ir {
2321 
2322 bool
makeFromSM4(struct nv50_ir_prog_info * info)2323 Program::makeFromSM4(struct nv50_ir_prog_info *info)
2324 {
2325    Converter bld(this, info);
2326    return bld.run();
2327 }
2328 
2329 } // namespace nv50_ir
2330