1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  */
22 
23 #include "nv50_ir.h"
24 #include "nv50_ir_target_nv50.h"
25 
26 namespace nv50_ir {
27 
28 #define NV50_OP_ENC_LONG     0
29 #define NV50_OP_ENC_SHORT    1
30 #define NV50_OP_ENC_IMM      2
31 #define NV50_OP_ENC_LONG_ALT 3
32 
33 class CodeEmitterNV50 : public CodeEmitter
34 {
35 public:
36    CodeEmitterNV50(const TargetNV50 *);
37 
38    virtual bool emitInstruction(Instruction *);
39 
40    virtual uint32_t getMinEncodingSize(const Instruction *) const;
41 
setProgramType(Program::Type pType)42    inline void setProgramType(Program::Type pType) { progType = pType; }
43 
44    virtual void prepareEmission(Function *);
45 
46 private:
47    Program::Type progType;
48 
49    const TargetNV50 *targ;
50 
51 private:
52    inline void defId(const ValueDef&, const int pos);
53    inline void srcId(const ValueRef&, const int pos);
54    inline void srcId(const ValueRef *, const int pos);
55 
56    inline void srcAddr16(const ValueRef&, bool adj, const int pos);
57    inline void srcAddr8(const ValueRef&, const int pos);
58 
59    void emitFlagsRd(const Instruction *);
60    void emitFlagsWr(const Instruction *);
61 
62    void emitCondCode(CondCode cc, DataType ty, int pos);
63 
64    inline void setARegBits(unsigned int);
65 
66    void setAReg16(const Instruction *, int s);
67    void setImmediate(const Instruction *, int s);
68 
69    void setDst(const Value *);
70    void setDst(const Instruction *, int d);
71    void setSrcFileBits(const Instruction *, int enc);
72    void setSrc(const Instruction *, unsigned int s, int slot);
73 
74    void emitForm_MAD(const Instruction *);
75    void emitForm_ADD(const Instruction *);
76    void emitForm_MUL(const Instruction *);
77    void emitForm_IMM(const Instruction *);
78 
79    void emitLoadStoreSizeLG(DataType ty, int pos);
80    void emitLoadStoreSizeCS(DataType ty);
81 
82    void roundMode_MAD(const Instruction *);
83    void roundMode_CVT(RoundMode);
84 
85    void emitMNeg12(const Instruction *);
86 
87    void emitLOAD(const Instruction *);
88    void emitSTORE(const Instruction *);
89    void emitMOV(const Instruction *);
90    void emitNOP();
91    void emitINTERP(const Instruction *);
92    void emitPFETCH(const Instruction *);
93    void emitOUT(const Instruction *);
94 
95    void emitUADD(const Instruction *);
96    void emitAADD(const Instruction *);
97    void emitFADD(const Instruction *);
98    void emitIMUL(const Instruction *);
99    void emitFMUL(const Instruction *);
100    void emitFMAD(const Instruction *);
101    void emitIMAD(const Instruction *);
102    void emitISAD(const Instruction *);
103 
104    void emitMINMAX(const Instruction *);
105 
106    void emitPreOp(const Instruction *);
107    void emitSFnOp(const Instruction *, uint8_t subOp);
108 
109    void emitShift(const Instruction *);
110    void emitARL(const Instruction *, unsigned int shl);
111    void emitLogicOp(const Instruction *);
112    void emitNOT(const Instruction *);
113 
114    void emitCVT(const Instruction *);
115    void emitSET(const Instruction *);
116 
117    void emitTEX(const TexInstruction *);
118    void emitTXQ(const TexInstruction *);
119 
120    void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
121 
122    void emitFlow(const Instruction *, uint8_t flowOp);
123    void emitPRERETEmu(const FlowInstruction *);
124 };
125 
126 #define SDATA(a) ((a).rep()->reg.data)
127 #define DDATA(a) ((a).rep()->reg.data)
128 
srcId(const ValueRef & src,const int pos)129 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
130 {
131    assert(src.get());
132    code[pos / 32] |= SDATA(src).id << (pos % 32);
133 }
134 
srcId(const ValueRef * src,const int pos)135 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
136 {
137    assert(src->get());
138    code[pos / 32] |= SDATA(*src).id << (pos % 32);
139 }
140 
srcAddr16(const ValueRef & src,bool adj,const int pos)141 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
142 {
143    assert(src.get());
144 
145    int32_t offset = SDATA(src).offset;
146 
147    assert(!adj || src.get()->reg.size <= 4);
148    if (adj)
149       offset /= src.get()->reg.size;
150 
151    assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
152 
153    if (offset < 0)
154       offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
155 
156    code[pos / 32] |= offset << (pos % 32);
157 }
158 
srcAddr8(const ValueRef & src,const int pos)159 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
160 {
161    assert(src.get());
162 
163    uint32_t offset = SDATA(src).offset;
164 
165    assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
166 
167    code[pos / 32] |= (offset >> 2) << (pos % 32);
168 }
169 
defId(const ValueDef & def,const int pos)170 void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
171 {
172    assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
173 
174    code[pos / 32] |= DDATA(def).id << (pos % 32);
175 }
176 
177 void
roundMode_MAD(const Instruction * insn)178 CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
179 {
180    switch (insn->rnd) {
181    case ROUND_M: code[1] |= 1 << 22; break;
182    case ROUND_P: code[1] |= 2 << 22; break;
183    case ROUND_Z: code[1] |= 3 << 22; break;
184    default:
185       assert(insn->rnd == ROUND_N);
186       break;
187    }
188 }
189 
190 void
emitMNeg12(const Instruction * i)191 CodeEmitterNV50::emitMNeg12(const Instruction *i)
192 {
193    code[1] |= i->src(0).mod.neg() << 26;
194    code[1] |= i->src(1).mod.neg() << 27;
195 }
196 
emitCondCode(CondCode cc,DataType ty,int pos)197 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
198 {
199    uint8_t enc;
200 
201    assert(pos >= 32 || pos <= 27);
202 
203    switch (cc) {
204    case CC_LT:  enc = 0x1; break;
205    case CC_LTU: enc = 0x9; break;
206    case CC_EQ:  enc = 0x2; break;
207    case CC_EQU: enc = 0xa; break;
208    case CC_LE:  enc = 0x3; break;
209    case CC_LEU: enc = 0xb; break;
210    case CC_GT:  enc = 0x4; break;
211    case CC_GTU: enc = 0xc; break;
212    case CC_NE:  enc = 0x5; break;
213    case CC_NEU: enc = 0xd; break;
214    case CC_GE:  enc = 0x6; break;
215    case CC_GEU: enc = 0xe; break;
216    case CC_TR:  enc = 0xf; break;
217    case CC_FL:  enc = 0x0; break;
218 
219    case CC_O:  enc = 0x10; break;
220    case CC_C:  enc = 0x11; break;
221    case CC_A:  enc = 0x12; break;
222    case CC_S:  enc = 0x13; break;
223    case CC_NS: enc = 0x1c; break;
224    case CC_NA: enc = 0x1d; break;
225    case CC_NC: enc = 0x1e; break;
226    case CC_NO: enc = 0x1f; break;
227 
228    default:
229       enc = 0;
230       assert(!"invalid condition code");
231       break;
232    }
233    if (ty != TYPE_NONE && !isFloatType(ty))
234       enc &= ~0x8; // unordered only exists for float types
235 
236    code[pos / 32] |= enc << (pos % 32);
237 }
238 
239 void
emitFlagsRd(const Instruction * i)240 CodeEmitterNV50::emitFlagsRd(const Instruction *i)
241 {
242    int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
243 
244    assert(!(code[1] & 0x00003f80));
245 
246    if (s >= 0) {
247       assert(i->getSrc(s)->reg.file == FILE_FLAGS);
248       emitCondCode(i->cc, TYPE_NONE, 32 + 7);
249       srcId(i->src(s), 32 + 12);
250    } else {
251       code[1] |= 0x0780;
252    }
253 }
254 
255 void
emitFlagsWr(const Instruction * i)256 CodeEmitterNV50::emitFlagsWr(const Instruction *i)
257 {
258    assert(!(code[1] & 0x70));
259 
260    int flagsDef = i->flagsDef;
261 
262    // find flags definition and check that it is the last def
263    if (flagsDef < 0) {
264       for (int d = 0; i->defExists(d); ++d)
265          if (i->def(d).getFile() == FILE_FLAGS)
266             flagsDef = d;
267       if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
268          WARN("Instruction::flagsDef was not set properly\n");
269    }
270    if (flagsDef == 0 && i->defExists(1))
271       WARN("flags def should not be the primary definition\n");
272 
273    if (flagsDef >= 0)
274       code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
275 
276 }
277 
278 void
setARegBits(unsigned int u)279 CodeEmitterNV50::setARegBits(unsigned int u)
280 {
281    code[0] |= (u & 3) << 26;
282    code[1] |= (u & 4);
283 }
284 
285 void
setAReg16(const Instruction * i,int s)286 CodeEmitterNV50::setAReg16(const Instruction *i, int s)
287 {
288    if (i->srcExists(s)) {
289       s = i->src(s).indirect[0];
290       if (s >= 0)
291          setARegBits(SDATA(i->src(s)).id + 1);
292    }
293 }
294 
295 void
setImmediate(const Instruction * i,int s)296 CodeEmitterNV50::setImmediate(const Instruction *i, int s)
297 {
298    const ImmediateValue *imm = i->src(s).get()->asImm();
299    assert(imm);
300 
301    uint32_t u = imm->reg.data.u32;
302 
303    if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
304       u = ~u;
305 
306    code[1] |= 3;
307    code[0] |= (u & 0x3f) << 16;
308    code[1] |= (u >> 6) << 2;
309 }
310 
311 void
setDst(const Value * dst)312 CodeEmitterNV50::setDst(const Value *dst)
313 {
314    const Storage *reg = &dst->join->reg;
315 
316    assert(reg->file != FILE_ADDRESS);
317 
318    if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
319       code[0] |= (127 << 2) | 1;
320       code[1] |= 8;
321    } else {
322       int id;
323       if (reg->file == FILE_SHADER_OUTPUT) {
324          code[1] |= 8;
325          id = reg->data.offset / 4;
326       } else {
327          id = reg->data.id;
328       }
329       code[0] |= id << 2;
330    }
331 }
332 
333 void
setDst(const Instruction * i,int d)334 CodeEmitterNV50::setDst(const Instruction *i, int d)
335 {
336    if (i->defExists(d)) {
337       setDst(i->getDef(d));
338    } else
339    if (!d) {
340       code[0] |= 0x01fc; // bit bucket
341       code[1] |= 0x0008;
342    }
343 }
344 
345 // 3 * 2 bits:
346 // 0: r
347 // 1: a/s
348 // 2: c
349 // 3: i
350 void
setSrcFileBits(const Instruction * i,int enc)351 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
352 {
353    uint8_t mode = 0;
354 
355    for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
356       switch (i->src(s).getFile()) {
357       case FILE_GPR:
358          break;
359       case FILE_MEMORY_SHARED:
360       case FILE_SHADER_INPUT:
361          mode |= 1 << (s * 2);
362          break;
363       case FILE_MEMORY_CONST:
364          mode |= 2 << (s * 2);
365          break;
366       case FILE_IMMEDIATE:
367          mode |= 3 << (s * 2);
368          break;
369       default:
370 	      ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
371          assert(0);
372          break;
373       }
374    }
375    switch (mode) {
376    case 0x00: // rrr
377       break;
378    case 0x01: // arr/grr
379       if (progType == Program::TYPE_GEOMETRY) {
380          code[0] |= 0x01800000;
381          if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
382             code[1] |= 0x00200000;
383       } else {
384          if (enc == NV50_OP_ENC_SHORT)
385             code[0] |= 0x01000000;
386          else
387             code[1] |= 0x00200000;
388       }
389       break;
390    case 0x03: // irr
391       assert(i->op == OP_MOV);
392       return;
393    case 0x0c: // rir
394       break;
395    case 0x0d: // gir
396       code[0] |= 0x01000000;
397       assert(progType == Program::TYPE_GEOMETRY ||
398              progType == Program::TYPE_COMPUTE);
399       break;
400    case 0x08: // rcr
401       code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
402       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
403       break;
404    case 0x09: // acr/gcr
405       if (progType == Program::TYPE_GEOMETRY) {
406          code[0] |= 0x01800000;
407       } else {
408          code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
409          code[1] |= 0x00200000;
410       }
411       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
412       break;
413    case 0x20: // rrc
414       code[0] |= 0x01000000;
415       code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
416       break;
417    case 0x21: // arc
418       code[0] |= 0x01000000;
419       code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
420       assert(progType != Program::TYPE_GEOMETRY);
421       break;
422    default:
423       ERROR("not encodable: %x\n", mode);
424       assert(0);
425       break;
426    }
427    if (progType != Program::TYPE_COMPUTE)
428       return;
429 
430    if ((mode & 3) == 1) {
431       const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
432 
433       switch (i->getSrc(0)->reg.type) {
434       case TYPE_U8:
435          break;
436       case TYPE_U16:
437          code[0] |= 1 << pos;
438          break;
439       case TYPE_S16:
440          code[0] |= 2 << pos;
441          break;
442       default:
443          code[0] |= 3 << pos;
444          assert(i->getSrc(0)->reg.size == 4);
445          break;
446       }
447    }
448 }
449 
450 void
setSrc(const Instruction * i,unsigned int s,int slot)451 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
452 {
453    if (Target::operationSrcNr[i->op] <= s)
454       return;
455    const Storage *reg = &i->src(s).rep()->reg;
456 
457    unsigned int id = (reg->file == FILE_GPR) ?
458       reg->data.id :
459       reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
460 
461    switch (slot) {
462    case 0: code[0] |= id << 9; break;
463    case 1: code[0] |= id << 16; break;
464    case 2: code[1] |= id << 14; break;
465    default:
466       assert(0);
467       break;
468    }
469 }
470 
471 // the default form:
472 //  - long instruction
473 //  - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
474 //  - address & flags
475 void
emitForm_MAD(const Instruction * i)476 CodeEmitterNV50::emitForm_MAD(const Instruction *i)
477 {
478    assert(i->encSize == 8);
479    code[0] |= 1;
480 
481    emitFlagsRd(i);
482    emitFlagsWr(i);
483 
484    setDst(i, 0);
485 
486    setSrcFileBits(i, NV50_OP_ENC_LONG);
487    setSrc(i, 0, 0);
488    setSrc(i, 1, 1);
489    setSrc(i, 2, 2);
490 
491    setAReg16(i, 1);
492 }
493 
494 // like default form, but 2nd source in slot 2, and no 3rd source
495 void
emitForm_ADD(const Instruction * i)496 CodeEmitterNV50::emitForm_ADD(const Instruction *i)
497 {
498    assert(i->encSize == 8);
499    code[0] |= 1;
500 
501    emitFlagsRd(i);
502    emitFlagsWr(i);
503 
504    setDst(i, 0);
505 
506    setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
507    setSrc(i, 0, 0);
508    setSrc(i, 1, 2);
509 
510    setAReg16(i, 1);
511 }
512 
513 // default short form (rr, ar, rc, gr)
514 void
emitForm_MUL(const Instruction * i)515 CodeEmitterNV50::emitForm_MUL(const Instruction *i)
516 {
517    assert(i->encSize == 4 && !(code[0] & 1));
518    assert(i->defExists(0));
519    assert(!i->getPredicate());
520 
521    setDst(i, 0);
522 
523    setSrcFileBits(i, NV50_OP_ENC_SHORT);
524    setSrc(i, 0, 0);
525    setSrc(i, 1, 1);
526 }
527 
528 // usual immediate form
529 // - 1 to 3 sources where last is immediate (rir, gir)
530 // - no address or predicate possible
531 void
emitForm_IMM(const Instruction * i)532 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
533 {
534    assert(i->encSize == 8);
535    code[0] |= 1;
536 
537    assert(i->defExists(0) && i->srcExists(0));
538 
539    setDst(i, 0);
540 
541    setSrcFileBits(i, NV50_OP_ENC_IMM);
542    if (Target::operationSrcNr[i->op] > 1) {
543       setSrc(i, 0, 0);
544       setImmediate(i, 1);
545       setSrc(i, 2, 1);
546    } else {
547       setImmediate(i, 0);
548    }
549 }
550 
551 void
emitLoadStoreSizeLG(DataType ty,int pos)552 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
553 {
554    uint8_t enc;
555 
556    switch (ty) {
557    case TYPE_F32: // fall through
558    case TYPE_S32: // fall through
559    case TYPE_U32:  enc = 0x6; break;
560    case TYPE_B128: enc = 0x5; break;
561    case TYPE_F64: // fall through
562    case TYPE_S64: // fall through
563    case TYPE_U64:  enc = 0x4; break;
564    case TYPE_S16:  enc = 0x3; break;
565    case TYPE_U16:  enc = 0x2; break;
566    case TYPE_S8:   enc = 0x1; break;
567    case TYPE_U8:   enc = 0x0; break;
568    default:
569       enc = 0;
570       assert(!"invalid load/store type");
571       break;
572    }
573    code[pos / 32] |= enc << (pos % 32);
574 }
575 
576 void
emitLoadStoreSizeCS(DataType ty)577 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
578 {
579    switch (ty) {
580    case TYPE_U8: break;
581    case TYPE_U16: code[1] |= 0x4000; break;
582    case TYPE_S16: code[1] |= 0x8000; break;
583    case TYPE_F32:
584    case TYPE_S32:
585    case TYPE_U32: code[1] |= 0xc000; break;
586    default:
587       assert(0);
588       break;
589    }
590 }
591 
592 void
emitLOAD(const Instruction * i)593 CodeEmitterNV50::emitLOAD(const Instruction *i)
594 {
595    DataFile sf = i->src(0).getFile();
596    int32_t offset = i->getSrc(0)->reg.data.offset;
597 
598    switch (sf) {
599    case FILE_SHADER_INPUT:
600       // use 'mov' where we can
601       code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
602       code[1] = 0x00200000 | (i->lanes << 14);
603       if (typeSizeof(i->dType) == 4)
604          code[1] |= 0x04000000;
605       break;
606    case FILE_MEMORY_SHARED:
607       if (targ->getChipset() >= 0x84) {
608          assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
609          code[0] = 0x10000001;
610          code[1] = 0x40000000;
611 
612          if (typeSizeof(i->dType) == 4)
613             code[1] |= 0x04000000;
614 
615          emitLoadStoreSizeCS(i->sType);
616       } else {
617          assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
618          code[0] = 0x10000001;
619          code[1] = 0x00200000 | (i->lanes << 14);
620          emitLoadStoreSizeCS(i->sType);
621       }
622       break;
623    case FILE_MEMORY_CONST:
624       code[0] = 0x10000001;
625       code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
626       if (typeSizeof(i->dType) == 4)
627          code[1] |= 0x04000000;
628       emitLoadStoreSizeCS(i->sType);
629       break;
630    case FILE_MEMORY_LOCAL:
631       code[0] = 0xd0000001;
632       code[1] = 0x40000000;
633       break;
634    case FILE_MEMORY_GLOBAL:
635       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
636       code[1] = 0x80000000;
637       break;
638    default:
639       assert(!"invalid load source file");
640       break;
641    }
642    if (sf == FILE_MEMORY_LOCAL ||
643        sf == FILE_MEMORY_GLOBAL)
644       emitLoadStoreSizeLG(i->sType, 21 + 32);
645 
646    setDst(i, 0);
647 
648    emitFlagsRd(i);
649    emitFlagsWr(i);
650 
651    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
652       srcId(*i->src(0).getIndirect(0), 9);
653    } else {
654       setAReg16(i, 0);
655       srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
656    }
657 }
658 
659 void
emitSTORE(const Instruction * i)660 CodeEmitterNV50::emitSTORE(const Instruction *i)
661 {
662    DataFile f = i->getSrc(0)->reg.file;
663    int32_t offset = i->getSrc(0)->reg.data.offset;
664 
665    switch (f) {
666    case FILE_SHADER_OUTPUT:
667       code[0] = 0x00000001 | ((offset >> 2) << 9);
668       code[1] = 0x80c00000;
669       srcId(i->src(1), 32 + 14);
670       break;
671    case FILE_MEMORY_GLOBAL:
672       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
673       code[1] = 0xa0000000;
674       emitLoadStoreSizeLG(i->dType, 21 + 32);
675       srcId(i->src(1), 2);
676       break;
677    case FILE_MEMORY_LOCAL:
678       code[0] = 0xd0000001;
679       code[1] = 0x60000000;
680       emitLoadStoreSizeLG(i->dType, 21 + 32);
681       srcId(i->src(1), 2);
682       break;
683    case FILE_MEMORY_SHARED:
684       code[0] = 0x00000001;
685       code[1] = 0xe0000000;
686       switch (typeSizeof(i->dType)) {
687       case 1:
688          code[0] |= offset << 9;
689          code[1] |= 0x00400000;
690          break;
691       case 2:
692          code[0] |= (offset >> 1) << 9;
693          break;
694       case 4:
695          code[0] |= (offset >> 2) << 9;
696          code[1] |= 0x04200000;
697          break;
698       default:
699          assert(0);
700          break;
701       }
702       srcId(i->src(1), 32 + 14);
703       break;
704    default:
705       assert(!"invalid store destination file");
706       break;
707    }
708 
709    if (f == FILE_MEMORY_GLOBAL)
710       srcId(*i->src(0).getIndirect(0), 9);
711    else
712       setAReg16(i, 0);
713 
714    if (f == FILE_MEMORY_LOCAL)
715       srcAddr16(i->src(0), false, 9);
716 
717    emitFlagsRd(i);
718 }
719 
720 void
emitMOV(const Instruction * i)721 CodeEmitterNV50::emitMOV(const Instruction *i)
722 {
723    DataFile sf = i->getSrc(0)->reg.file;
724    DataFile df = i->getDef(0)->reg.file;
725 
726    assert(sf == FILE_GPR || df == FILE_GPR);
727 
728    if (sf == FILE_FLAGS) {
729       code[0] = 0x00000001;
730       code[1] = 0x20000000;
731       defId(i->def(0), 2);
732       srcId(i->src(0), 12);
733       emitFlagsRd(i);
734    } else
735    if (sf == FILE_ADDRESS) {
736       code[0] = 0x00000001;
737       code[1] = 0x40000000;
738       defId(i->def(0), 2);
739       setARegBits(SDATA(i->src(0)).id + 1);
740       emitFlagsRd(i);
741    } else
742    if (df == FILE_FLAGS) {
743       code[0] = 0x00000001;
744       code[1] = 0xa0000000;
745       defId(i->def(0), 4);
746       srcId(i->src(0), 9);
747       emitFlagsRd(i);
748    } else
749    if (sf == FILE_IMMEDIATE) {
750       code[0] = 0x10008001;
751       code[1] = 0x00000003;
752       emitForm_IMM(i);
753    } else {
754       if (i->encSize == 4) {
755          code[0] = 0x10008000;
756       } else {
757          code[0] = 0x10000001;
758          code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
759          code[1] |= (i->lanes << 14);
760          emitFlagsRd(i);
761       }
762       defId(i->def(0), 2);
763       srcId(i->src(0), 9);
764    }
765    if (df == FILE_SHADER_OUTPUT) {
766       assert(i->encSize == 8);
767       code[1] |= 0x8;
768    }
769 }
770 
771 void
emitNOP()772 CodeEmitterNV50::emitNOP()
773 {
774    code[0] = 0xf0000001;
775    code[1] = 0xe0000000;
776 }
777 
778 void
emitQUADOP(const Instruction * i,uint8_t lane,uint8_t quOp)779 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
780 {
781    code[0] = 0xc0000000 | (lane << 16);
782    code[1] = 0x80000000;
783 
784    code[0] |= (quOp & 0x03) << 20;
785    code[1] |= (quOp & 0xfc) << 20;
786 
787    emitForm_ADD(i);
788 
789    if (!i->srcExists(1))
790       srcId(i->src(0), 32 + 14);
791 }
792 
793 void
emitPFETCH(const Instruction * i)794 CodeEmitterNV50::emitPFETCH(const Instruction *i)
795 {
796    code[0] = 0x11800001;
797    code[1] = 0x04200000 | (0xf << 14);
798 
799    defId(i->def(0), 2);
800    srcAddr8(i->src(0), 9);
801    setAReg16(i, 0);
802 }
803 
804 void
emitINTERP(const Instruction * i)805 CodeEmitterNV50::emitINTERP(const Instruction *i)
806 {
807    code[0] = 0x80000000;
808 
809    defId(i->def(0), 2);
810    srcAddr8(i->src(0), 16);
811 
812    if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
813       code[0] |= 1 << 8;
814    } else {
815       if (i->op == OP_PINTERP) {
816          code[0] |= 1 << 25;
817          srcId(i->src(1), 9);
818       }
819       if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
820          code[0] |= 1 << 24;
821    }
822 
823    if (i->encSize == 8) {
824       code[1] =
825          (code[0] & (3 << 24)) >> (24 - 16) |
826          (code[0] & (1 <<  8)) << (18 -  8);
827       code[0] &= ~0x03000100;
828       code[0] |= 1;
829       emitFlagsRd(i);
830    }
831 }
832 
833 void
emitMINMAX(const Instruction * i)834 CodeEmitterNV50::emitMINMAX(const Instruction *i)
835 {
836    if (i->dType == TYPE_F64) {
837       code[0] = 0xe0000000;
838       code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
839    } else {
840       code[0] = 0x30000000;
841       code[1] = 0x80000000;
842       if (i->op == OP_MIN)
843          code[1] |= 0x20000000;
844 
845       switch (i->dType) {
846       case TYPE_F32: code[0] |= 0x80000000; break;
847       case TYPE_S32: code[1] |= 0x8c000000; break;
848       case TYPE_U32: code[1] |= 0x84000000; break;
849       case TYPE_S16: code[1] |= 0x80000000; break;
850       case TYPE_U16: break;
851       default:
852          assert(0);
853          break;
854       }
855       code[1] |= i->src(0).mod.abs() << 20;
856       code[1] |= i->src(1).mod.abs() << 19;
857    }
858    emitForm_MAD(i);
859 }
860 
861 void
emitFMAD(const Instruction * i)862 CodeEmitterNV50::emitFMAD(const Instruction *i)
863 {
864    const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
865    const int neg_add = i->src(2).mod.neg();
866 
867    code[0] = 0xe0000000;
868 
869    if (i->encSize == 4) {
870       emitForm_MUL(i);
871       assert(!neg_mul && !neg_add);
872    } else {
873       code[1]  = neg_mul << 26;
874       code[1] |= neg_add << 27;
875       if (i->saturate)
876          code[1] |= 1 << 29;
877       emitForm_MAD(i);
878    }
879 }
880 
881 void
emitFADD(const Instruction * i)882 CodeEmitterNV50::emitFADD(const Instruction *i)
883 {
884    const int neg0 = i->src(0).mod.neg();
885    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
886 
887    code[0] = 0xb0000000;
888 
889    assert(!(i->src(0).mod | i->src(1).mod).abs());
890 
891    if (i->src(1).getFile() == FILE_IMMEDIATE) {
892       code[1] = 0;
893       emitForm_IMM(i);
894       code[0] |= neg0 << 15;
895       code[0] |= neg1 << 22;
896       if (i->saturate)
897          code[0] |= 1 << 8;
898    } else
899    if (i->encSize == 8) {
900       code[1] = 0;
901       emitForm_ADD(i);
902       code[1] |= neg0 << 26;
903       code[1] |= neg1 << 27;
904       if (i->saturate)
905          code[1] |= 1 << 29;
906    } else {
907       emitForm_MUL(i);
908       code[0] |= neg0 << 15;
909       code[0] |= neg1 << 22;
910       if (i->saturate)
911          code[0] |= 1 << 8;
912    }
913 }
914 
915 void
emitUADD(const Instruction * i)916 CodeEmitterNV50::emitUADD(const Instruction *i)
917 {
918    const int neg0 = i->src(0).mod.neg();
919    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
920 
921    code[0] = 0x20008000;
922 
923    if (i->src(1).getFile() == FILE_IMMEDIATE) {
924       code[1] = 0;
925       emitForm_IMM(i);
926    } else
927    if (i->encSize == 8) {
928       code[0] = 0x20000000;
929       code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
930       emitForm_ADD(i);
931    } else {
932       emitForm_MUL(i);
933    }
934    assert(!(neg0 && neg1));
935    code[0] |= neg0 << 28;
936    code[0] |= neg1 << 22;
937 
938    if (i->flagsSrc >= 0) {
939       // addc == sub | subr
940       assert(!(code[0] & 0x10400000) && !i->getPredicate());
941       code[0] |= 0x10400000;
942       srcId(i->src(i->flagsSrc), 32 + 12);
943    }
944 }
945 
946 void
emitAADD(const Instruction * i)947 CodeEmitterNV50::emitAADD(const Instruction *i)
948 {
949    const int s = (i->op == OP_MOV) ? 0 : 1;
950 
951    code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
952    code[1] = 0x20000000;
953 
954    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
955 
956    emitFlagsRd(i);
957 
958    if (s && i->srcExists(0))
959       setARegBits(SDATA(i->src(0)).id + 1);
960 }
961 
962 void
emitIMUL(const Instruction * i)963 CodeEmitterNV50::emitIMUL(const Instruction *i)
964 {
965    code[0] = 0x40000000;
966 
967    if (i->encSize == 8) {
968       code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
969       emitForm_MAD(i);
970    } else {
971       if (i->sType == TYPE_S16)
972          code[0] |= 0x8100;
973       emitForm_MUL(i);
974    }
975 }
976 
977 void
emitFMUL(const Instruction * i)978 CodeEmitterNV50::emitFMUL(const Instruction *i)
979 {
980    const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
981 
982    code[0] = 0xc0000000;
983 
984    if (i->src(1).getFile() == FILE_IMMEDIATE) {
985       code[1] = 0;
986       emitForm_IMM(i);
987       if (neg)
988          code[0] |= 0x8000;
989    } else
990    if (i->encSize == 8) {
991       code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
992       if (neg)
993          code[1] |= 0x08000000;
994       emitForm_MAD(i);
995    } else {
996       emitForm_MUL(i);
997       if (neg)
998          code[0] |= 0x8000;
999    }
1000 }
1001 
1002 void
emitIMAD(const Instruction * i)1003 CodeEmitterNV50::emitIMAD(const Instruction *i)
1004 {
1005    code[0] = 0x60000000;
1006    if (isSignedType(i->sType))
1007       code[1] = i->saturate ? 0x40000000 : 0x20000000;
1008    else
1009       code[1] = 0x00000000;
1010 
1011    int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1012    int neg2 = i->src(2).mod.neg();
1013 
1014    assert(!(neg1 & neg2));
1015    code[1] |= neg1 << 27;
1016    code[1] |= neg2 << 26;
1017 
1018    emitForm_MAD(i);
1019 
1020    if (i->flagsSrc >= 0) {
1021       // add with carry from $cX
1022       assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1023       code[1] |= 0xc << 24;
1024       srcId(i->src(i->flagsSrc), 32 + 12);
1025    }
1026 }
1027 
1028 void
emitISAD(const Instruction * i)1029 CodeEmitterNV50::emitISAD(const Instruction *i)
1030 {
1031    if (i->encSize == 8) {
1032       code[0] = 0x50000000;
1033       switch (i->sType) {
1034       case TYPE_U32: code[1] = 0x04000000; break;
1035       case TYPE_S32: code[1] = 0x0c000000; break;
1036       case TYPE_U16: code[1] = 0x00000000; break;
1037       case TYPE_S16: code[1] = 0x08000000; break;
1038       default:
1039          assert(0);
1040          break;
1041       }
1042       emitForm_MAD(i);
1043    } else {
1044       switch (i->sType) {
1045       case TYPE_U32: code[0] = 0x50008000; break;
1046       case TYPE_S32: code[0] = 0x50008100; break;
1047       case TYPE_U16: code[0] = 0x50000000; break;
1048       case TYPE_S16: code[0] = 0x50000100; break;
1049       default:
1050          assert(0);
1051          break;
1052       }
1053       emitForm_MUL(i);
1054    }
1055 }
1056 
1057 void
emitSET(const Instruction * i)1058 CodeEmitterNV50::emitSET(const Instruction *i)
1059 {
1060    code[0] = 0x30000000;
1061    code[1] = 0x60000000;
1062 
1063    emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1064 
1065    switch (i->sType) {
1066    case TYPE_F32: code[0] |= 0x80000000; break;
1067    case TYPE_S32: code[1] |= 0x0c000000; break;
1068    case TYPE_U32: code[1] |= 0x04000000; break;
1069    case TYPE_S16: code[1] |= 0x08000000; break;
1070    case TYPE_U16: break;
1071    default:
1072       assert(0);
1073       break;
1074    }
1075    if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1076    if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1077    if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1078    if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1079 
1080    emitForm_MAD(i);
1081 }
1082 
1083 void
roundMode_CVT(RoundMode rnd)1084 CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1085 {
1086    switch (rnd) {
1087    case ROUND_NI: code[1] |= 0x08000000; break;
1088    case ROUND_M:  code[1] |= 0x00020000; break;
1089    case ROUND_MI: code[1] |= 0x08020000; break;
1090    case ROUND_P:  code[1] |= 0x00040000; break;
1091    case ROUND_PI: code[1] |= 0x08040000; break;
1092    case ROUND_Z:  code[1] |= 0x00060000; break;
1093    case ROUND_ZI: code[1] |= 0x08060000; break;
1094    default:
1095       assert(rnd == ROUND_N);
1096       break;
1097    }
1098 }
1099 
1100 void
emitCVT(const Instruction * i)1101 CodeEmitterNV50::emitCVT(const Instruction *i)
1102 {
1103    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1104    RoundMode rnd;
1105 
1106    switch (i->op) {
1107    case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
1108    case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1109    case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1110    default:
1111       rnd = i->rnd;
1112       break;
1113    }
1114 
1115    code[0] = 0xa0000000;
1116 
1117    switch (i->dType) {
1118    case TYPE_F64:
1119       switch (i->sType) {
1120       case TYPE_F64: code[1] = 0xc4404000; break;
1121       case TYPE_S64: code[1] = 0x44414000; break;
1122       case TYPE_U64: code[1] = 0x44404000; break;
1123       case TYPE_F32: code[1] = 0xc4400000; break;
1124       case TYPE_S32: code[1] = 0x44410000; break;
1125       case TYPE_U32: code[1] = 0x44400000; break;
1126       default:
1127          assert(0);
1128          break;
1129       }
1130       break;
1131    case TYPE_S64:
1132       switch (i->sType) {
1133       case TYPE_F64: code[1] = 0x8c404000; break;
1134       case TYPE_F32: code[1] = 0x8c400000; break;
1135       default:
1136          assert(0);
1137          break;
1138       }
1139       break;
1140    case TYPE_U64:
1141       switch (i->sType) {
1142       case TYPE_F64: code[1] = 0x84404000; break;
1143       case TYPE_F32: code[1] = 0x84400000; break;
1144       default:
1145          assert(0);
1146          break;
1147       }
1148       break;
1149    case TYPE_F32:
1150       switch (i->sType) {
1151       case TYPE_F64: code[1] = 0xc0404000; break;
1152       case TYPE_S64: code[1] = 0x40414000; break;
1153       case TYPE_U64: code[1] = 0x40404000; break;
1154       case TYPE_F32: code[1] = 0xc4004000; break;
1155       case TYPE_S32: code[1] = 0x44014000; break;
1156       case TYPE_U32: code[1] = 0x44004000; break;
1157       case TYPE_F16: code[1] = 0xc4000000; break;
1158       default:
1159          assert(0);
1160          break;
1161       }
1162       break;
1163    case TYPE_S32:
1164       switch (i->sType) {
1165       case TYPE_F64: code[1] = 0x88404000; break;
1166       case TYPE_F32: code[1] = 0x8c004000; break;
1167       case TYPE_S32: code[1] = 0x0c014000; break;
1168       case TYPE_U32: code[1] = 0x0c004000; break;
1169       case TYPE_F16: code[1] = 0x8c000000; break;
1170       case TYPE_S16: code[1] = 0x0c010000; break;
1171       case TYPE_U16: code[1] = 0x0c000000; break;
1172       case TYPE_S8:  code[1] = 0x0c018000; break;
1173       case TYPE_U8:  code[1] = 0x0c008000; break;
1174       default:
1175          assert(0);
1176          break;
1177       }
1178       break;
1179    case TYPE_U32:
1180       switch (i->sType) {
1181       case TYPE_F64: code[1] = 0x80404000; break;
1182       case TYPE_F32: code[1] = 0x84004000; break;
1183       case TYPE_S32: code[1] = 0x04014000; break;
1184       case TYPE_U32: code[1] = 0x04004000; break;
1185       case TYPE_F16: code[1] = 0x84000000; break;
1186       case TYPE_S16: code[1] = 0x04010000; break;
1187       case TYPE_U16: code[1] = 0x04000000; break;
1188       case TYPE_S8:  code[1] = 0x04018000; break;
1189       case TYPE_U8:  code[1] = 0x04008000; break;
1190       default:
1191          assert(0);
1192          break;
1193       }
1194       break;
1195    case TYPE_S16:
1196    case TYPE_U16:
1197    case TYPE_S8:
1198    case TYPE_U8:
1199    default:
1200       assert(0);
1201       break;
1202    }
1203    if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1204       code[1] |= 0x00004000;
1205 
1206    roundMode_CVT(rnd);
1207 
1208    switch (i->op) {
1209    case OP_ABS: code[1] |= 1 << 20; break;
1210    case OP_SAT: code[1] |= 1 << 19; break;
1211    case OP_NEG: code[1] |= 1 << 29; break;
1212    default:
1213       break;
1214    }
1215    code[1] ^= i->src(0).mod.neg() << 29;
1216    code[1] |= i->src(0).mod.abs() << 20;
1217    if (i->saturate)
1218       code[1] |= 1 << 19;
1219 
1220    assert(i->op != OP_ABS || !i->src(0).mod.neg());
1221 
1222    emitForm_MAD(i);
1223 }
1224 
1225 void
emitPreOp(const Instruction * i)1226 CodeEmitterNV50::emitPreOp(const Instruction *i)
1227 {
1228    code[0] = 0xb0000000;
1229    code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1230 
1231    code[1] |= i->src(0).mod.abs() << 20;
1232    code[1] |= i->src(0).mod.neg() << 26;
1233 
1234    emitForm_MAD(i);
1235 }
1236 
1237 void
emitSFnOp(const Instruction * i,uint8_t subOp)1238 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1239 {
1240    code[0] = 0x90000000;
1241 
1242    if (i->encSize == 4) {
1243       assert(i->op == OP_RCP);
1244       code[0] |= i->src(0).mod.abs() << 15;
1245       code[0] |= i->src(0).mod.neg() << 22;
1246       emitForm_MUL(i);
1247    } else {
1248       code[1] = subOp << 29;
1249       code[1] |= i->src(0).mod.abs() << 20;
1250       code[1] |= i->src(0).mod.neg() << 26;
1251       emitForm_MAD(i);
1252    }
1253 }
1254 
1255 void
emitNOT(const Instruction * i)1256 CodeEmitterNV50::emitNOT(const Instruction *i)
1257 {
1258    code[0] = 0xd0000000;
1259    code[1] = 0x0002c000;
1260 
1261    switch (i->sType) {
1262    case TYPE_U32:
1263    case TYPE_S32:
1264       code[1] |= 0x04000000;
1265       break;
1266    default:
1267       break;
1268    }
1269    emitForm_MAD(i);
1270    setSrc(i, 0, 1);
1271 }
1272 
1273 void
emitLogicOp(const Instruction * i)1274 CodeEmitterNV50::emitLogicOp(const Instruction *i)
1275 {
1276    code[0] = 0xd0000000;
1277    code[1] = 0;
1278 
1279    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1280       switch (i->op) {
1281       case OP_OR:  code[0] |= 0x0100; break;
1282       case OP_XOR: code[0] |= 0x8000; break;
1283       default:
1284          assert(i->op == OP_AND);
1285          break;
1286       }
1287       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1288          code[0] |= 1 << 22;
1289 
1290       emitForm_IMM(i);
1291    } else {
1292       switch (i->op) {
1293       case OP_AND: code[1] = 0x04000000; break;
1294       case OP_OR:  code[1] = 0x04004000; break;
1295       case OP_XOR: code[1] = 0x04008000; break;
1296       default:
1297          assert(0);
1298          break;
1299       }
1300       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1301          code[1] |= 1 << 16;
1302       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1303          code[1] |= 1 << 17;
1304 
1305       emitForm_MAD(i);
1306    }
1307 }
1308 
1309 void
emitARL(const Instruction * i,unsigned int shl)1310 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1311 {
1312    code[0] = 0x00000001 | (shl << 16);
1313    code[1] = 0xc0000000;
1314 
1315    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1316 
1317    setSrcFileBits(i, NV50_OP_ENC_IMM);
1318    setSrc(i, 0, 0);
1319    emitFlagsRd(i);
1320 }
1321 
1322 void
emitShift(const Instruction * i)1323 CodeEmitterNV50::emitShift(const Instruction *i)
1324 {
1325    if (i->def(0).getFile() == FILE_ADDRESS) {
1326       assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1327       emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1328    } else {
1329       code[0] = 0x30000001;
1330       code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
1331       if (i->op == OP_SHR && isSignedType(i->sType))
1332           code[1] |= 1 << 27;
1333 
1334       if (i->src(1).getFile() == FILE_IMMEDIATE) {
1335          code[1] |= 1 << 20;
1336          code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1337          defId(i->def(0), 2);
1338          srcId(i->src(0), 9);
1339          emitFlagsRd(i);
1340       } else {
1341          emitForm_MAD(i);
1342       }
1343    }
1344 }
1345 
1346 void
emitOUT(const Instruction * i)1347 CodeEmitterNV50::emitOUT(const Instruction *i)
1348 {
1349    code[0] = (i->op == OP_EMIT) ? 0xf0000200 : 0xf0000400;
1350    code[1] = 0xc0000001;
1351 
1352    emitFlagsRd(i);
1353 }
1354 
1355 void
emitTEX(const TexInstruction * i)1356 CodeEmitterNV50::emitTEX(const TexInstruction *i)
1357 {
1358    code[0] = 0xf0000001;
1359    code[1] = 0x00000000;
1360 
1361    switch (i->op) {
1362    case OP_TXB:
1363       code[1] = 0x20000000;
1364       break;
1365    case OP_TXL:
1366       code[1] = 0x40000000;
1367       break;
1368    case OP_TXF:
1369       code[0] |= 0x01000000;
1370       break;
1371    case OP_TXG:
1372       code[0] = 0x01000000;
1373       code[1] = 0x80000000;
1374       break;
1375    default:
1376       assert(i->op == OP_TEX);
1377       break;
1378    }
1379 
1380    code[0] |= i->tex.r << 9;
1381    code[0] |= i->tex.s << 17;
1382 
1383    int argc = i->tex.target.getArgCount();
1384 
1385    if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1386       argc += 1;
1387    if (i->tex.target.isShadow())
1388       argc += 1;
1389    assert(argc <= 4);
1390 
1391    code[0] |= (argc - 1) << 22;
1392 
1393    if (i->tex.target.isCube()) {
1394       code[0] |= 0x08000000;
1395    } else
1396    if (i->tex.useOffsets) {
1397       code[1] |= (i->tex.offset[0][0] & 0xf) << 24;
1398       code[1] |= (i->tex.offset[0][1] & 0xf) << 20;
1399       code[1] |= (i->tex.offset[0][2] & 0xf) << 16;
1400    }
1401 
1402    code[0] |= (i->tex.mask & 0x3) << 25;
1403    code[1] |= (i->tex.mask & 0xc) << 12;
1404 
1405    if (i->tex.liveOnly)
1406       code[1] |= 4;
1407 
1408    defId(i->def(0), 2);
1409 
1410    emitFlagsRd(i);
1411 }
1412 
1413 void
emitTXQ(const TexInstruction * i)1414 CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1415 {
1416    assert(i->tex.query == TXQ_DIMS);
1417 
1418    code[0] = 0xf0000001;
1419    code[1] = 0x60000000;
1420 
1421    code[0] |= i->tex.r << 9;
1422    code[0] |= i->tex.s << 17;
1423 
1424    code[0] |= (i->tex.mask & 0x3) << 25;
1425    code[1] |= (i->tex.mask & 0xc) << 12;
1426 
1427    defId(i->def(0), 2);
1428 
1429    emitFlagsRd(i);
1430 }
1431 
1432 void
emitPRERETEmu(const FlowInstruction * i)1433 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1434 {
1435    uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1436 
1437    code[0] = 0x10000003; // bra
1438    code[1] = 0x00000780; // always
1439 
1440    switch (i->subOp) {
1441    case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1442       break;
1443    case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1444       pos += 8;
1445       break;
1446    default:
1447       assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1448       code[0] = 0x20000003; // call
1449       code[1] = 0x00000000; // no predicate
1450       break;
1451    }
1452    addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1453    addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1454 }
1455 
1456 void
emitFlow(const Instruction * i,uint8_t flowOp)1457 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1458 {
1459    const FlowInstruction *f = i->asFlow();
1460    bool hasPred = false;
1461    bool hasTarg = false;
1462 
1463    code[0] = 0x00000003 | (flowOp << 28);
1464    code[1] = 0x00000000;
1465 
1466    switch (i->op) {
1467    case OP_BRA:
1468       hasPred = true;
1469       hasTarg = true;
1470       break;
1471    case OP_BREAK:
1472    case OP_BRKPT:
1473    case OP_DISCARD:
1474    case OP_RET:
1475       hasPred = true;
1476       break;
1477    case OP_CALL:
1478    case OP_PREBREAK:
1479    case OP_JOINAT:
1480       hasTarg = true;
1481       break;
1482    case OP_PRERET:
1483       hasTarg = true;
1484       if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1485          emitPRERETEmu(f);
1486          return;
1487       }
1488       break;
1489    default:
1490       break;
1491    }
1492 
1493    if (hasPred)
1494       emitFlagsRd(i);
1495 
1496    if (hasTarg && f) {
1497       uint32_t pos;
1498 
1499       if (f->op == OP_CALL) {
1500          if (f->builtin) {
1501             pos = targ->getBuiltinOffset(f->target.builtin);
1502          } else {
1503             pos = f->target.fn->binPos;
1504          }
1505       } else {
1506          pos = f->target.bb->binPos;
1507       }
1508 
1509       code[0] |= ((pos >>  2) & 0xffff) << 11;
1510       code[1] |= ((pos >> 18) & 0x003f) << 14;
1511 
1512       RelocEntry::Type relocTy;
1513 
1514       relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1515 
1516       addReloc(relocTy, 0, pos, 0x07fff800, 9);
1517       addReloc(relocTy, 1, pos, 0x000fc000, -4);
1518    }
1519 }
1520 
1521 bool
emitInstruction(Instruction * insn)1522 CodeEmitterNV50::emitInstruction(Instruction *insn)
1523 {
1524    if (!insn->encSize) {
1525       ERROR("skipping unencodable instruction: "); insn->print();
1526       return false;
1527    } else
1528    if (codeSize + insn->encSize > codeSizeLimit) {
1529       ERROR("code emitter output buffer too small\n");
1530       return false;
1531    }
1532 
1533    if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1534       INFO("EMIT: "); insn->print();
1535    }
1536 
1537    switch (insn->op) {
1538    case OP_MOV:
1539       emitMOV(insn);
1540       break;
1541    case OP_EXIT:
1542    case OP_NOP:
1543    case OP_JOIN:
1544       emitNOP();
1545       break;
1546    case OP_VFETCH:
1547    case OP_LOAD:
1548       emitLOAD(insn);
1549       break;
1550    case OP_EXPORT:
1551    case OP_STORE:
1552       emitSTORE(insn);
1553       break;
1554    case OP_PFETCH:
1555       emitPFETCH(insn);
1556       break;
1557    case OP_LINTERP:
1558    case OP_PINTERP:
1559       emitINTERP(insn);
1560       break;
1561    case OP_ADD:
1562    case OP_SUB:
1563       if (isFloatType(insn->dType))
1564          emitFADD(insn);
1565       else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1566          emitAADD(insn);
1567       else
1568          emitUADD(insn);
1569       break;
1570    case OP_MUL:
1571       if (isFloatType(insn->dType))
1572          emitFMUL(insn);
1573       else
1574          emitIMUL(insn);
1575       break;
1576    case OP_MAD:
1577    case OP_FMA:
1578       if (isFloatType(insn->dType))
1579          emitFMAD(insn);
1580       else
1581          emitIMAD(insn);
1582       break;
1583    case OP_SAD:
1584       emitISAD(insn);
1585       break;
1586    case OP_NOT:
1587       emitNOT(insn);
1588       break;
1589    case OP_AND:
1590    case OP_OR:
1591    case OP_XOR:
1592       emitLogicOp(insn);
1593       break;
1594    case OP_SHL:
1595    case OP_SHR:
1596       emitShift(insn);
1597       break;
1598    case OP_SET:
1599       emitSET(insn);
1600       break;
1601    case OP_MIN:
1602    case OP_MAX:
1603       emitMINMAX(insn);
1604       break;
1605    case OP_CEIL:
1606    case OP_FLOOR:
1607    case OP_TRUNC:
1608    case OP_ABS:
1609    case OP_NEG:
1610    case OP_SAT:
1611       emitCVT(insn);
1612       break;
1613    case OP_CVT:
1614       if (insn->def(0).getFile() == FILE_ADDRESS)
1615          emitARL(insn, 0);
1616       else
1617       if (insn->def(0).getFile() == FILE_FLAGS ||
1618           insn->src(0).getFile() == FILE_FLAGS ||
1619           insn->src(0).getFile() == FILE_ADDRESS)
1620          emitMOV(insn);
1621       else
1622          emitCVT(insn);
1623       break;
1624    case OP_RCP:
1625       emitSFnOp(insn, 0);
1626       break;
1627    case OP_RSQ:
1628       emitSFnOp(insn, 2);
1629       break;
1630    case OP_LG2:
1631       emitSFnOp(insn, 3);
1632       break;
1633    case OP_SIN:
1634       emitSFnOp(insn, 4);
1635       break;
1636    case OP_COS:
1637       emitSFnOp(insn, 5);
1638       break;
1639    case OP_EX2:
1640       emitSFnOp(insn, 6);
1641       break;
1642    case OP_PRESIN:
1643    case OP_PREEX2:
1644       emitPreOp(insn);
1645       break;
1646    case OP_TEX:
1647    case OP_TXB:
1648    case OP_TXL:
1649    case OP_TXF:
1650       emitTEX(insn->asTex());
1651       break;
1652    case OP_TXQ:
1653       emitTXQ(insn->asTex());
1654       break;
1655    case OP_EMIT:
1656    case OP_RESTART:
1657       emitOUT(insn);
1658       break;
1659    case OP_DISCARD:
1660       emitFlow(insn, 0x0);
1661       break;
1662    case OP_BRA:
1663       emitFlow(insn, 0x1);
1664       break;
1665    case OP_CALL:
1666       emitFlow(insn, 0x2);
1667       break;
1668    case OP_RET:
1669       emitFlow(insn, 0x3);
1670       break;
1671    case OP_PREBREAK:
1672       emitFlow(insn, 0x4);
1673       break;
1674    case OP_BREAK:
1675       emitFlow(insn, 0x5);
1676       break;
1677    case OP_QUADON:
1678       emitFlow(insn, 0x6);
1679       break;
1680    case OP_QUADPOP:
1681       emitFlow(insn, 0x7);
1682       break;
1683    case OP_JOINAT:
1684       emitFlow(insn, 0xa);
1685       break;
1686    case OP_PRERET:
1687       emitFlow(insn, 0xd);
1688       break;
1689    case OP_QUADOP:
1690       emitQUADOP(insn, insn->lanes, insn->subOp);
1691       break;
1692    case OP_DFDX:
1693       emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
1694       break;
1695    case OP_DFDY:
1696       emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
1697       break;
1698    case OP_PHI:
1699    case OP_UNION:
1700    case OP_CONSTRAINT:
1701       ERROR("operation should have been eliminated\n");
1702       return false;
1703    case OP_EXP:
1704    case OP_LOG:
1705    case OP_SQRT:
1706    case OP_POW:
1707    case OP_SELP:
1708    case OP_SLCT:
1709    case OP_TXD:
1710    case OP_PRECONT:
1711    case OP_CONT:
1712    case OP_POPCNT:
1713    case OP_INSBF:
1714    case OP_EXTBF:
1715       ERROR("operation should have been lowered\n");
1716       return false;
1717    default:
1718       ERROR("unknown op: %u\n", insn->op);
1719       return false;
1720    }
1721    if (insn->join || insn->op == OP_JOIN)
1722       code[1] |= 0x2;
1723    else
1724    if (insn->exit || insn->op == OP_EXIT)
1725       code[1] |= 0x1;
1726 
1727    assert((insn->encSize == 8) == (code[0] & 1));
1728 
1729    code += insn->encSize / 4;
1730    codeSize += insn->encSize;
1731    return true;
1732 }
1733 
1734 uint32_t
getMinEncodingSize(const Instruction * i) const1735 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
1736 {
1737    const Target::OpInfo &info = targ->getOpInfo(i);
1738 
1739    if (info.minEncSize > 4)
1740       return 8;
1741 
1742    // check constraints on dst and src operands
1743    for (int d = 0; i->defExists(d); ++d) {
1744       if (i->def(d).rep()->reg.data.id > 63 ||
1745           i->def(d).rep()->reg.file != FILE_GPR)
1746          return 8;
1747    }
1748 
1749    for (int s = 0; i->srcExists(s); ++s) {
1750       DataFile sf = i->src(s).getFile();
1751       if (sf != FILE_GPR)
1752          if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
1753             return 8;
1754       if (i->src(s).rep()->reg.data.id > 63)
1755          return 8;
1756    }
1757 
1758    // check modifiers & rounding
1759    if (i->join || i->lanes != 0xf || i->exit)
1760       return 8;
1761    if (i->op == OP_MUL && i->rnd != ROUND_N)
1762       return 8;
1763 
1764    if (i->asTex())
1765       return 8; // TODO: short tex encoding
1766 
1767    // check constraints on short MAD
1768    if (info.srcNr >= 2 && i->srcExists(2)) {
1769       if (i->saturate || i->src(2).mod)
1770          return 8;
1771       if ((i->src(0).mod ^ i->src(1).mod) ||
1772           (i->src(0).mod | i->src(1).mod).abs())
1773          return 8;
1774       if (!i->defExists(0) ||
1775           i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
1776          return 8;
1777    }
1778 
1779    return info.minEncSize;
1780 }
1781 
1782 // Change the encoding size of an instruction after BBs have been scheduled.
1783 static void
makeInstructionLong(Instruction * insn)1784 makeInstructionLong(Instruction *insn)
1785 {
1786    if (insn->encSize == 8)
1787       return;
1788    Function *fn = insn->bb->getFunction();
1789    int n = 0;
1790    int adj = 4;
1791 
1792    for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
1793 
1794    if (n & 1) {
1795       adj = 8;
1796       insn->next->encSize = 8;
1797    } else
1798    if (insn->prev && insn->prev->encSize == 4) {
1799       adj = 8;
1800       insn->prev->encSize = 8;
1801    }
1802    insn->encSize = 8;
1803 
1804    for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
1805       fn->bbArray[i]->binPos += 4;
1806    }
1807    fn->binSize += adj;
1808    insn->bb->binSize += adj;
1809 }
1810 
1811 static bool
trySetExitModifier(Instruction * insn)1812 trySetExitModifier(Instruction *insn)
1813 {
1814    if (insn->op == OP_DISCARD ||
1815        insn->op == OP_QUADON ||
1816        insn->op == OP_QUADPOP)
1817       return false;
1818    for (int s = 0; insn->srcExists(s); ++s)
1819       if (insn->src(s).getFile() == FILE_IMMEDIATE)
1820          return false;
1821    if (insn->asFlow()) {
1822       if (insn->op == OP_CALL) // side effects !
1823          return false;
1824       if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
1825          return false;
1826       insn->op = OP_EXIT;
1827    }
1828    insn->exit = 1;
1829    makeInstructionLong(insn);
1830    return true;
1831 }
1832 
1833 static void
replaceExitWithModifier(Function * func)1834 replaceExitWithModifier(Function *func)
1835 {
1836    BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
1837 
1838    if (!epilogue->getExit() ||
1839        epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
1840       return;
1841 
1842    if (epilogue->getEntry()->op != OP_EXIT) {
1843       Instruction *insn = epilogue->getExit()->prev;
1844       if (!insn || !trySetExitModifier(insn))
1845          return;
1846       insn->exit = 1;
1847    } else {
1848       for (Graph::EdgeIterator ei = func->cfgExit->incident();
1849            !ei.end(); ei.next()) {
1850          BasicBlock *bb = BasicBlock::get(ei.getNode());
1851          Instruction *i = bb->getExit();
1852 
1853          if (!i || !trySetExitModifier(i))
1854             return;
1855       }
1856    }
1857    epilogue->binSize -= 8;
1858    func->binSize -= 8;
1859    delete_Instruction(func->getProgram(), epilogue->getExit());
1860 }
1861 
1862 void
prepareEmission(Function * func)1863 CodeEmitterNV50::prepareEmission(Function *func)
1864 {
1865    CodeEmitter::prepareEmission(func);
1866 
1867    replaceExitWithModifier(func);
1868 }
1869 
CodeEmitterNV50(const TargetNV50 * target)1870 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) : CodeEmitter(target)
1871 {
1872    targ = target; // specialized
1873    code = NULL;
1874    codeSize = codeSizeLimit = 0;
1875    relocInfo = NULL;
1876 }
1877 
1878 CodeEmitter *
getCodeEmitter(Program::Type type)1879 TargetNV50::getCodeEmitter(Program::Type type)
1880 {
1881    CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
1882    emit->setProgramType(type);
1883    return emit;
1884 }
1885 
1886 } // namespace nv50_ir
1887