1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_ir.h"
24 #include "nv50_ir_target_nv50.h"
25
26 namespace nv50_ir {
27
28 #define NV50_OP_ENC_LONG 0
29 #define NV50_OP_ENC_SHORT 1
30 #define NV50_OP_ENC_IMM 2
31 #define NV50_OP_ENC_LONG_ALT 3
32
33 class CodeEmitterNV50 : public CodeEmitter
34 {
35 public:
36 CodeEmitterNV50(const TargetNV50 *);
37
38 virtual bool emitInstruction(Instruction *);
39
40 virtual uint32_t getMinEncodingSize(const Instruction *) const;
41
setProgramType(Program::Type pType)42 inline void setProgramType(Program::Type pType) { progType = pType; }
43
44 virtual void prepareEmission(Function *);
45
46 private:
47 Program::Type progType;
48
49 const TargetNV50 *targ;
50
51 private:
52 inline void defId(const ValueDef&, const int pos);
53 inline void srcId(const ValueRef&, const int pos);
54 inline void srcId(const ValueRef *, const int pos);
55
56 inline void srcAddr16(const ValueRef&, bool adj, const int pos);
57 inline void srcAddr8(const ValueRef&, const int pos);
58
59 void emitFlagsRd(const Instruction *);
60 void emitFlagsWr(const Instruction *);
61
62 void emitCondCode(CondCode cc, DataType ty, int pos);
63
64 inline void setARegBits(unsigned int);
65
66 void setAReg16(const Instruction *, int s);
67 void setImmediate(const Instruction *, int s);
68
69 void setDst(const Value *);
70 void setDst(const Instruction *, int d);
71 void setSrcFileBits(const Instruction *, int enc);
72 void setSrc(const Instruction *, unsigned int s, int slot);
73
74 void emitForm_MAD(const Instruction *);
75 void emitForm_ADD(const Instruction *);
76 void emitForm_MUL(const Instruction *);
77 void emitForm_IMM(const Instruction *);
78
79 void emitLoadStoreSizeLG(DataType ty, int pos);
80 void emitLoadStoreSizeCS(DataType ty);
81
82 void roundMode_MAD(const Instruction *);
83 void roundMode_CVT(RoundMode);
84
85 void emitMNeg12(const Instruction *);
86
87 void emitLOAD(const Instruction *);
88 void emitSTORE(const Instruction *);
89 void emitMOV(const Instruction *);
90 void emitNOP();
91 void emitINTERP(const Instruction *);
92 void emitPFETCH(const Instruction *);
93 void emitOUT(const Instruction *);
94
95 void emitUADD(const Instruction *);
96 void emitAADD(const Instruction *);
97 void emitFADD(const Instruction *);
98 void emitIMUL(const Instruction *);
99 void emitFMUL(const Instruction *);
100 void emitFMAD(const Instruction *);
101 void emitIMAD(const Instruction *);
102 void emitISAD(const Instruction *);
103
104 void emitMINMAX(const Instruction *);
105
106 void emitPreOp(const Instruction *);
107 void emitSFnOp(const Instruction *, uint8_t subOp);
108
109 void emitShift(const Instruction *);
110 void emitARL(const Instruction *, unsigned int shl);
111 void emitLogicOp(const Instruction *);
112 void emitNOT(const Instruction *);
113
114 void emitCVT(const Instruction *);
115 void emitSET(const Instruction *);
116
117 void emitTEX(const TexInstruction *);
118 void emitTXQ(const TexInstruction *);
119
120 void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
121
122 void emitFlow(const Instruction *, uint8_t flowOp);
123 void emitPRERETEmu(const FlowInstruction *);
124 };
125
126 #define SDATA(a) ((a).rep()->reg.data)
127 #define DDATA(a) ((a).rep()->reg.data)
128
srcId(const ValueRef & src,const int pos)129 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
130 {
131 assert(src.get());
132 code[pos / 32] |= SDATA(src).id << (pos % 32);
133 }
134
srcId(const ValueRef * src,const int pos)135 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
136 {
137 assert(src->get());
138 code[pos / 32] |= SDATA(*src).id << (pos % 32);
139 }
140
srcAddr16(const ValueRef & src,bool adj,const int pos)141 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
142 {
143 assert(src.get());
144
145 int32_t offset = SDATA(src).offset;
146
147 assert(!adj || src.get()->reg.size <= 4);
148 if (adj)
149 offset /= src.get()->reg.size;
150
151 assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
152
153 if (offset < 0)
154 offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
155
156 code[pos / 32] |= offset << (pos % 32);
157 }
158
srcAddr8(const ValueRef & src,const int pos)159 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
160 {
161 assert(src.get());
162
163 uint32_t offset = SDATA(src).offset;
164
165 assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
166
167 code[pos / 32] |= (offset >> 2) << (pos % 32);
168 }
169
defId(const ValueDef & def,const int pos)170 void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
171 {
172 assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
173
174 code[pos / 32] |= DDATA(def).id << (pos % 32);
175 }
176
177 void
roundMode_MAD(const Instruction * insn)178 CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
179 {
180 switch (insn->rnd) {
181 case ROUND_M: code[1] |= 1 << 22; break;
182 case ROUND_P: code[1] |= 2 << 22; break;
183 case ROUND_Z: code[1] |= 3 << 22; break;
184 default:
185 assert(insn->rnd == ROUND_N);
186 break;
187 }
188 }
189
190 void
emitMNeg12(const Instruction * i)191 CodeEmitterNV50::emitMNeg12(const Instruction *i)
192 {
193 code[1] |= i->src(0).mod.neg() << 26;
194 code[1] |= i->src(1).mod.neg() << 27;
195 }
196
emitCondCode(CondCode cc,DataType ty,int pos)197 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
198 {
199 uint8_t enc;
200
201 assert(pos >= 32 || pos <= 27);
202
203 switch (cc) {
204 case CC_LT: enc = 0x1; break;
205 case CC_LTU: enc = 0x9; break;
206 case CC_EQ: enc = 0x2; break;
207 case CC_EQU: enc = 0xa; break;
208 case CC_LE: enc = 0x3; break;
209 case CC_LEU: enc = 0xb; break;
210 case CC_GT: enc = 0x4; break;
211 case CC_GTU: enc = 0xc; break;
212 case CC_NE: enc = 0x5; break;
213 case CC_NEU: enc = 0xd; break;
214 case CC_GE: enc = 0x6; break;
215 case CC_GEU: enc = 0xe; break;
216 case CC_TR: enc = 0xf; break;
217 case CC_FL: enc = 0x0; break;
218
219 case CC_O: enc = 0x10; break;
220 case CC_C: enc = 0x11; break;
221 case CC_A: enc = 0x12; break;
222 case CC_S: enc = 0x13; break;
223 case CC_NS: enc = 0x1c; break;
224 case CC_NA: enc = 0x1d; break;
225 case CC_NC: enc = 0x1e; break;
226 case CC_NO: enc = 0x1f; break;
227
228 default:
229 enc = 0;
230 assert(!"invalid condition code");
231 break;
232 }
233 if (ty != TYPE_NONE && !isFloatType(ty))
234 enc &= ~0x8; // unordered only exists for float types
235
236 code[pos / 32] |= enc << (pos % 32);
237 }
238
239 void
emitFlagsRd(const Instruction * i)240 CodeEmitterNV50::emitFlagsRd(const Instruction *i)
241 {
242 int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
243
244 assert(!(code[1] & 0x00003f80));
245
246 if (s >= 0) {
247 assert(i->getSrc(s)->reg.file == FILE_FLAGS);
248 emitCondCode(i->cc, TYPE_NONE, 32 + 7);
249 srcId(i->src(s), 32 + 12);
250 } else {
251 code[1] |= 0x0780;
252 }
253 }
254
255 void
emitFlagsWr(const Instruction * i)256 CodeEmitterNV50::emitFlagsWr(const Instruction *i)
257 {
258 assert(!(code[1] & 0x70));
259
260 int flagsDef = i->flagsDef;
261
262 // find flags definition and check that it is the last def
263 if (flagsDef < 0) {
264 for (int d = 0; i->defExists(d); ++d)
265 if (i->def(d).getFile() == FILE_FLAGS)
266 flagsDef = d;
267 if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
268 WARN("Instruction::flagsDef was not set properly\n");
269 }
270 if (flagsDef == 0 && i->defExists(1))
271 WARN("flags def should not be the primary definition\n");
272
273 if (flagsDef >= 0)
274 code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
275
276 }
277
278 void
setARegBits(unsigned int u)279 CodeEmitterNV50::setARegBits(unsigned int u)
280 {
281 code[0] |= (u & 3) << 26;
282 code[1] |= (u & 4);
283 }
284
285 void
setAReg16(const Instruction * i,int s)286 CodeEmitterNV50::setAReg16(const Instruction *i, int s)
287 {
288 if (i->srcExists(s)) {
289 s = i->src(s).indirect[0];
290 if (s >= 0)
291 setARegBits(SDATA(i->src(s)).id + 1);
292 }
293 }
294
295 void
setImmediate(const Instruction * i,int s)296 CodeEmitterNV50::setImmediate(const Instruction *i, int s)
297 {
298 const ImmediateValue *imm = i->src(s).get()->asImm();
299 assert(imm);
300
301 uint32_t u = imm->reg.data.u32;
302
303 if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
304 u = ~u;
305
306 code[1] |= 3;
307 code[0] |= (u & 0x3f) << 16;
308 code[1] |= (u >> 6) << 2;
309 }
310
311 void
setDst(const Value * dst)312 CodeEmitterNV50::setDst(const Value *dst)
313 {
314 const Storage *reg = &dst->join->reg;
315
316 assert(reg->file != FILE_ADDRESS);
317
318 if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
319 code[0] |= (127 << 2) | 1;
320 code[1] |= 8;
321 } else {
322 int id;
323 if (reg->file == FILE_SHADER_OUTPUT) {
324 code[1] |= 8;
325 id = reg->data.offset / 4;
326 } else {
327 id = reg->data.id;
328 }
329 code[0] |= id << 2;
330 }
331 }
332
333 void
setDst(const Instruction * i,int d)334 CodeEmitterNV50::setDst(const Instruction *i, int d)
335 {
336 if (i->defExists(d)) {
337 setDst(i->getDef(d));
338 } else
339 if (!d) {
340 code[0] |= 0x01fc; // bit bucket
341 code[1] |= 0x0008;
342 }
343 }
344
345 // 3 * 2 bits:
346 // 0: r
347 // 1: a/s
348 // 2: c
349 // 3: i
350 void
setSrcFileBits(const Instruction * i,int enc)351 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
352 {
353 uint8_t mode = 0;
354
355 for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
356 switch (i->src(s).getFile()) {
357 case FILE_GPR:
358 break;
359 case FILE_MEMORY_SHARED:
360 case FILE_SHADER_INPUT:
361 mode |= 1 << (s * 2);
362 break;
363 case FILE_MEMORY_CONST:
364 mode |= 2 << (s * 2);
365 break;
366 case FILE_IMMEDIATE:
367 mode |= 3 << (s * 2);
368 break;
369 default:
370 ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
371 assert(0);
372 break;
373 }
374 }
375 switch (mode) {
376 case 0x00: // rrr
377 break;
378 case 0x01: // arr/grr
379 if (progType == Program::TYPE_GEOMETRY) {
380 code[0] |= 0x01800000;
381 if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
382 code[1] |= 0x00200000;
383 } else {
384 if (enc == NV50_OP_ENC_SHORT)
385 code[0] |= 0x01000000;
386 else
387 code[1] |= 0x00200000;
388 }
389 break;
390 case 0x03: // irr
391 assert(i->op == OP_MOV);
392 return;
393 case 0x0c: // rir
394 break;
395 case 0x0d: // gir
396 code[0] |= 0x01000000;
397 assert(progType == Program::TYPE_GEOMETRY ||
398 progType == Program::TYPE_COMPUTE);
399 break;
400 case 0x08: // rcr
401 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
402 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
403 break;
404 case 0x09: // acr/gcr
405 if (progType == Program::TYPE_GEOMETRY) {
406 code[0] |= 0x01800000;
407 } else {
408 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
409 code[1] |= 0x00200000;
410 }
411 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
412 break;
413 case 0x20: // rrc
414 code[0] |= 0x01000000;
415 code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
416 break;
417 case 0x21: // arc
418 code[0] |= 0x01000000;
419 code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
420 assert(progType != Program::TYPE_GEOMETRY);
421 break;
422 default:
423 ERROR("not encodable: %x\n", mode);
424 assert(0);
425 break;
426 }
427 if (progType != Program::TYPE_COMPUTE)
428 return;
429
430 if ((mode & 3) == 1) {
431 const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
432
433 switch (i->getSrc(0)->reg.type) {
434 case TYPE_U8:
435 break;
436 case TYPE_U16:
437 code[0] |= 1 << pos;
438 break;
439 case TYPE_S16:
440 code[0] |= 2 << pos;
441 break;
442 default:
443 code[0] |= 3 << pos;
444 assert(i->getSrc(0)->reg.size == 4);
445 break;
446 }
447 }
448 }
449
450 void
setSrc(const Instruction * i,unsigned int s,int slot)451 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
452 {
453 if (Target::operationSrcNr[i->op] <= s)
454 return;
455 const Storage *reg = &i->src(s).rep()->reg;
456
457 unsigned int id = (reg->file == FILE_GPR) ?
458 reg->data.id :
459 reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
460
461 switch (slot) {
462 case 0: code[0] |= id << 9; break;
463 case 1: code[0] |= id << 16; break;
464 case 2: code[1] |= id << 14; break;
465 default:
466 assert(0);
467 break;
468 }
469 }
470
471 // the default form:
472 // - long instruction
473 // - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
474 // - address & flags
475 void
emitForm_MAD(const Instruction * i)476 CodeEmitterNV50::emitForm_MAD(const Instruction *i)
477 {
478 assert(i->encSize == 8);
479 code[0] |= 1;
480
481 emitFlagsRd(i);
482 emitFlagsWr(i);
483
484 setDst(i, 0);
485
486 setSrcFileBits(i, NV50_OP_ENC_LONG);
487 setSrc(i, 0, 0);
488 setSrc(i, 1, 1);
489 setSrc(i, 2, 2);
490
491 setAReg16(i, 1);
492 }
493
494 // like default form, but 2nd source in slot 2, and no 3rd source
495 void
emitForm_ADD(const Instruction * i)496 CodeEmitterNV50::emitForm_ADD(const Instruction *i)
497 {
498 assert(i->encSize == 8);
499 code[0] |= 1;
500
501 emitFlagsRd(i);
502 emitFlagsWr(i);
503
504 setDst(i, 0);
505
506 setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
507 setSrc(i, 0, 0);
508 setSrc(i, 1, 2);
509
510 setAReg16(i, 1);
511 }
512
513 // default short form (rr, ar, rc, gr)
514 void
emitForm_MUL(const Instruction * i)515 CodeEmitterNV50::emitForm_MUL(const Instruction *i)
516 {
517 assert(i->encSize == 4 && !(code[0] & 1));
518 assert(i->defExists(0));
519 assert(!i->getPredicate());
520
521 setDst(i, 0);
522
523 setSrcFileBits(i, NV50_OP_ENC_SHORT);
524 setSrc(i, 0, 0);
525 setSrc(i, 1, 1);
526 }
527
528 // usual immediate form
529 // - 1 to 3 sources where last is immediate (rir, gir)
530 // - no address or predicate possible
531 void
emitForm_IMM(const Instruction * i)532 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
533 {
534 assert(i->encSize == 8);
535 code[0] |= 1;
536
537 assert(i->defExists(0) && i->srcExists(0));
538
539 setDst(i, 0);
540
541 setSrcFileBits(i, NV50_OP_ENC_IMM);
542 if (Target::operationSrcNr[i->op] > 1) {
543 setSrc(i, 0, 0);
544 setImmediate(i, 1);
545 setSrc(i, 2, 1);
546 } else {
547 setImmediate(i, 0);
548 }
549 }
550
551 void
emitLoadStoreSizeLG(DataType ty,int pos)552 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
553 {
554 uint8_t enc;
555
556 switch (ty) {
557 case TYPE_F32: // fall through
558 case TYPE_S32: // fall through
559 case TYPE_U32: enc = 0x6; break;
560 case TYPE_B128: enc = 0x5; break;
561 case TYPE_F64: // fall through
562 case TYPE_S64: // fall through
563 case TYPE_U64: enc = 0x4; break;
564 case TYPE_S16: enc = 0x3; break;
565 case TYPE_U16: enc = 0x2; break;
566 case TYPE_S8: enc = 0x1; break;
567 case TYPE_U8: enc = 0x0; break;
568 default:
569 enc = 0;
570 assert(!"invalid load/store type");
571 break;
572 }
573 code[pos / 32] |= enc << (pos % 32);
574 }
575
576 void
emitLoadStoreSizeCS(DataType ty)577 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
578 {
579 switch (ty) {
580 case TYPE_U8: break;
581 case TYPE_U16: code[1] |= 0x4000; break;
582 case TYPE_S16: code[1] |= 0x8000; break;
583 case TYPE_F32:
584 case TYPE_S32:
585 case TYPE_U32: code[1] |= 0xc000; break;
586 default:
587 assert(0);
588 break;
589 }
590 }
591
592 void
emitLOAD(const Instruction * i)593 CodeEmitterNV50::emitLOAD(const Instruction *i)
594 {
595 DataFile sf = i->src(0).getFile();
596 int32_t offset = i->getSrc(0)->reg.data.offset;
597
598 switch (sf) {
599 case FILE_SHADER_INPUT:
600 // use 'mov' where we can
601 code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
602 code[1] = 0x00200000 | (i->lanes << 14);
603 if (typeSizeof(i->dType) == 4)
604 code[1] |= 0x04000000;
605 break;
606 case FILE_MEMORY_SHARED:
607 if (targ->getChipset() >= 0x84) {
608 assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
609 code[0] = 0x10000001;
610 code[1] = 0x40000000;
611
612 if (typeSizeof(i->dType) == 4)
613 code[1] |= 0x04000000;
614
615 emitLoadStoreSizeCS(i->sType);
616 } else {
617 assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
618 code[0] = 0x10000001;
619 code[1] = 0x00200000 | (i->lanes << 14);
620 emitLoadStoreSizeCS(i->sType);
621 }
622 break;
623 case FILE_MEMORY_CONST:
624 code[0] = 0x10000001;
625 code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
626 if (typeSizeof(i->dType) == 4)
627 code[1] |= 0x04000000;
628 emitLoadStoreSizeCS(i->sType);
629 break;
630 case FILE_MEMORY_LOCAL:
631 code[0] = 0xd0000001;
632 code[1] = 0x40000000;
633 break;
634 case FILE_MEMORY_GLOBAL:
635 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
636 code[1] = 0x80000000;
637 break;
638 default:
639 assert(!"invalid load source file");
640 break;
641 }
642 if (sf == FILE_MEMORY_LOCAL ||
643 sf == FILE_MEMORY_GLOBAL)
644 emitLoadStoreSizeLG(i->sType, 21 + 32);
645
646 setDst(i, 0);
647
648 emitFlagsRd(i);
649 emitFlagsWr(i);
650
651 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
652 srcId(*i->src(0).getIndirect(0), 9);
653 } else {
654 setAReg16(i, 0);
655 srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
656 }
657 }
658
659 void
emitSTORE(const Instruction * i)660 CodeEmitterNV50::emitSTORE(const Instruction *i)
661 {
662 DataFile f = i->getSrc(0)->reg.file;
663 int32_t offset = i->getSrc(0)->reg.data.offset;
664
665 switch (f) {
666 case FILE_SHADER_OUTPUT:
667 code[0] = 0x00000001 | ((offset >> 2) << 9);
668 code[1] = 0x80c00000;
669 srcId(i->src(1), 32 + 14);
670 break;
671 case FILE_MEMORY_GLOBAL:
672 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
673 code[1] = 0xa0000000;
674 emitLoadStoreSizeLG(i->dType, 21 + 32);
675 srcId(i->src(1), 2);
676 break;
677 case FILE_MEMORY_LOCAL:
678 code[0] = 0xd0000001;
679 code[1] = 0x60000000;
680 emitLoadStoreSizeLG(i->dType, 21 + 32);
681 srcId(i->src(1), 2);
682 break;
683 case FILE_MEMORY_SHARED:
684 code[0] = 0x00000001;
685 code[1] = 0xe0000000;
686 switch (typeSizeof(i->dType)) {
687 case 1:
688 code[0] |= offset << 9;
689 code[1] |= 0x00400000;
690 break;
691 case 2:
692 code[0] |= (offset >> 1) << 9;
693 break;
694 case 4:
695 code[0] |= (offset >> 2) << 9;
696 code[1] |= 0x04200000;
697 break;
698 default:
699 assert(0);
700 break;
701 }
702 srcId(i->src(1), 32 + 14);
703 break;
704 default:
705 assert(!"invalid store destination file");
706 break;
707 }
708
709 if (f == FILE_MEMORY_GLOBAL)
710 srcId(*i->src(0).getIndirect(0), 9);
711 else
712 setAReg16(i, 0);
713
714 if (f == FILE_MEMORY_LOCAL)
715 srcAddr16(i->src(0), false, 9);
716
717 emitFlagsRd(i);
718 }
719
720 void
emitMOV(const Instruction * i)721 CodeEmitterNV50::emitMOV(const Instruction *i)
722 {
723 DataFile sf = i->getSrc(0)->reg.file;
724 DataFile df = i->getDef(0)->reg.file;
725
726 assert(sf == FILE_GPR || df == FILE_GPR);
727
728 if (sf == FILE_FLAGS) {
729 code[0] = 0x00000001;
730 code[1] = 0x20000000;
731 defId(i->def(0), 2);
732 srcId(i->src(0), 12);
733 emitFlagsRd(i);
734 } else
735 if (sf == FILE_ADDRESS) {
736 code[0] = 0x00000001;
737 code[1] = 0x40000000;
738 defId(i->def(0), 2);
739 setARegBits(SDATA(i->src(0)).id + 1);
740 emitFlagsRd(i);
741 } else
742 if (df == FILE_FLAGS) {
743 code[0] = 0x00000001;
744 code[1] = 0xa0000000;
745 defId(i->def(0), 4);
746 srcId(i->src(0), 9);
747 emitFlagsRd(i);
748 } else
749 if (sf == FILE_IMMEDIATE) {
750 code[0] = 0x10008001;
751 code[1] = 0x00000003;
752 emitForm_IMM(i);
753 } else {
754 if (i->encSize == 4) {
755 code[0] = 0x10008000;
756 } else {
757 code[0] = 0x10000001;
758 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
759 code[1] |= (i->lanes << 14);
760 emitFlagsRd(i);
761 }
762 defId(i->def(0), 2);
763 srcId(i->src(0), 9);
764 }
765 if (df == FILE_SHADER_OUTPUT) {
766 assert(i->encSize == 8);
767 code[1] |= 0x8;
768 }
769 }
770
771 void
emitNOP()772 CodeEmitterNV50::emitNOP()
773 {
774 code[0] = 0xf0000001;
775 code[1] = 0xe0000000;
776 }
777
778 void
emitQUADOP(const Instruction * i,uint8_t lane,uint8_t quOp)779 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
780 {
781 code[0] = 0xc0000000 | (lane << 16);
782 code[1] = 0x80000000;
783
784 code[0] |= (quOp & 0x03) << 20;
785 code[1] |= (quOp & 0xfc) << 20;
786
787 emitForm_ADD(i);
788
789 if (!i->srcExists(1))
790 srcId(i->src(0), 32 + 14);
791 }
792
793 void
emitPFETCH(const Instruction * i)794 CodeEmitterNV50::emitPFETCH(const Instruction *i)
795 {
796 code[0] = 0x11800001;
797 code[1] = 0x04200000 | (0xf << 14);
798
799 defId(i->def(0), 2);
800 srcAddr8(i->src(0), 9);
801 setAReg16(i, 0);
802 }
803
804 void
emitINTERP(const Instruction * i)805 CodeEmitterNV50::emitINTERP(const Instruction *i)
806 {
807 code[0] = 0x80000000;
808
809 defId(i->def(0), 2);
810 srcAddr8(i->src(0), 16);
811
812 if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
813 code[0] |= 1 << 8;
814 } else {
815 if (i->op == OP_PINTERP) {
816 code[0] |= 1 << 25;
817 srcId(i->src(1), 9);
818 }
819 if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
820 code[0] |= 1 << 24;
821 }
822
823 if (i->encSize == 8) {
824 code[1] =
825 (code[0] & (3 << 24)) >> (24 - 16) |
826 (code[0] & (1 << 8)) << (18 - 8);
827 code[0] &= ~0x03000100;
828 code[0] |= 1;
829 emitFlagsRd(i);
830 }
831 }
832
833 void
emitMINMAX(const Instruction * i)834 CodeEmitterNV50::emitMINMAX(const Instruction *i)
835 {
836 if (i->dType == TYPE_F64) {
837 code[0] = 0xe0000000;
838 code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
839 } else {
840 code[0] = 0x30000000;
841 code[1] = 0x80000000;
842 if (i->op == OP_MIN)
843 code[1] |= 0x20000000;
844
845 switch (i->dType) {
846 case TYPE_F32: code[0] |= 0x80000000; break;
847 case TYPE_S32: code[1] |= 0x8c000000; break;
848 case TYPE_U32: code[1] |= 0x84000000; break;
849 case TYPE_S16: code[1] |= 0x80000000; break;
850 case TYPE_U16: break;
851 default:
852 assert(0);
853 break;
854 }
855 code[1] |= i->src(0).mod.abs() << 20;
856 code[1] |= i->src(1).mod.abs() << 19;
857 }
858 emitForm_MAD(i);
859 }
860
861 void
emitFMAD(const Instruction * i)862 CodeEmitterNV50::emitFMAD(const Instruction *i)
863 {
864 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
865 const int neg_add = i->src(2).mod.neg();
866
867 code[0] = 0xe0000000;
868
869 if (i->encSize == 4) {
870 emitForm_MUL(i);
871 assert(!neg_mul && !neg_add);
872 } else {
873 code[1] = neg_mul << 26;
874 code[1] |= neg_add << 27;
875 if (i->saturate)
876 code[1] |= 1 << 29;
877 emitForm_MAD(i);
878 }
879 }
880
881 void
emitFADD(const Instruction * i)882 CodeEmitterNV50::emitFADD(const Instruction *i)
883 {
884 const int neg0 = i->src(0).mod.neg();
885 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
886
887 code[0] = 0xb0000000;
888
889 assert(!(i->src(0).mod | i->src(1).mod).abs());
890
891 if (i->src(1).getFile() == FILE_IMMEDIATE) {
892 code[1] = 0;
893 emitForm_IMM(i);
894 code[0] |= neg0 << 15;
895 code[0] |= neg1 << 22;
896 if (i->saturate)
897 code[0] |= 1 << 8;
898 } else
899 if (i->encSize == 8) {
900 code[1] = 0;
901 emitForm_ADD(i);
902 code[1] |= neg0 << 26;
903 code[1] |= neg1 << 27;
904 if (i->saturate)
905 code[1] |= 1 << 29;
906 } else {
907 emitForm_MUL(i);
908 code[0] |= neg0 << 15;
909 code[0] |= neg1 << 22;
910 if (i->saturate)
911 code[0] |= 1 << 8;
912 }
913 }
914
915 void
emitUADD(const Instruction * i)916 CodeEmitterNV50::emitUADD(const Instruction *i)
917 {
918 const int neg0 = i->src(0).mod.neg();
919 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
920
921 code[0] = 0x20008000;
922
923 if (i->src(1).getFile() == FILE_IMMEDIATE) {
924 code[1] = 0;
925 emitForm_IMM(i);
926 } else
927 if (i->encSize == 8) {
928 code[0] = 0x20000000;
929 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
930 emitForm_ADD(i);
931 } else {
932 emitForm_MUL(i);
933 }
934 assert(!(neg0 && neg1));
935 code[0] |= neg0 << 28;
936 code[0] |= neg1 << 22;
937
938 if (i->flagsSrc >= 0) {
939 // addc == sub | subr
940 assert(!(code[0] & 0x10400000) && !i->getPredicate());
941 code[0] |= 0x10400000;
942 srcId(i->src(i->flagsSrc), 32 + 12);
943 }
944 }
945
946 void
emitAADD(const Instruction * i)947 CodeEmitterNV50::emitAADD(const Instruction *i)
948 {
949 const int s = (i->op == OP_MOV) ? 0 : 1;
950
951 code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
952 code[1] = 0x20000000;
953
954 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
955
956 emitFlagsRd(i);
957
958 if (s && i->srcExists(0))
959 setARegBits(SDATA(i->src(0)).id + 1);
960 }
961
962 void
emitIMUL(const Instruction * i)963 CodeEmitterNV50::emitIMUL(const Instruction *i)
964 {
965 code[0] = 0x40000000;
966
967 if (i->encSize == 8) {
968 code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
969 emitForm_MAD(i);
970 } else {
971 if (i->sType == TYPE_S16)
972 code[0] |= 0x8100;
973 emitForm_MUL(i);
974 }
975 }
976
977 void
emitFMUL(const Instruction * i)978 CodeEmitterNV50::emitFMUL(const Instruction *i)
979 {
980 const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
981
982 code[0] = 0xc0000000;
983
984 if (i->src(1).getFile() == FILE_IMMEDIATE) {
985 code[1] = 0;
986 emitForm_IMM(i);
987 if (neg)
988 code[0] |= 0x8000;
989 } else
990 if (i->encSize == 8) {
991 code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
992 if (neg)
993 code[1] |= 0x08000000;
994 emitForm_MAD(i);
995 } else {
996 emitForm_MUL(i);
997 if (neg)
998 code[0] |= 0x8000;
999 }
1000 }
1001
1002 void
emitIMAD(const Instruction * i)1003 CodeEmitterNV50::emitIMAD(const Instruction *i)
1004 {
1005 code[0] = 0x60000000;
1006 if (isSignedType(i->sType))
1007 code[1] = i->saturate ? 0x40000000 : 0x20000000;
1008 else
1009 code[1] = 0x00000000;
1010
1011 int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1012 int neg2 = i->src(2).mod.neg();
1013
1014 assert(!(neg1 & neg2));
1015 code[1] |= neg1 << 27;
1016 code[1] |= neg2 << 26;
1017
1018 emitForm_MAD(i);
1019
1020 if (i->flagsSrc >= 0) {
1021 // add with carry from $cX
1022 assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1023 code[1] |= 0xc << 24;
1024 srcId(i->src(i->flagsSrc), 32 + 12);
1025 }
1026 }
1027
1028 void
emitISAD(const Instruction * i)1029 CodeEmitterNV50::emitISAD(const Instruction *i)
1030 {
1031 if (i->encSize == 8) {
1032 code[0] = 0x50000000;
1033 switch (i->sType) {
1034 case TYPE_U32: code[1] = 0x04000000; break;
1035 case TYPE_S32: code[1] = 0x0c000000; break;
1036 case TYPE_U16: code[1] = 0x00000000; break;
1037 case TYPE_S16: code[1] = 0x08000000; break;
1038 default:
1039 assert(0);
1040 break;
1041 }
1042 emitForm_MAD(i);
1043 } else {
1044 switch (i->sType) {
1045 case TYPE_U32: code[0] = 0x50008000; break;
1046 case TYPE_S32: code[0] = 0x50008100; break;
1047 case TYPE_U16: code[0] = 0x50000000; break;
1048 case TYPE_S16: code[0] = 0x50000100; break;
1049 default:
1050 assert(0);
1051 break;
1052 }
1053 emitForm_MUL(i);
1054 }
1055 }
1056
1057 void
emitSET(const Instruction * i)1058 CodeEmitterNV50::emitSET(const Instruction *i)
1059 {
1060 code[0] = 0x30000000;
1061 code[1] = 0x60000000;
1062
1063 emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1064
1065 switch (i->sType) {
1066 case TYPE_F32: code[0] |= 0x80000000; break;
1067 case TYPE_S32: code[1] |= 0x0c000000; break;
1068 case TYPE_U32: code[1] |= 0x04000000; break;
1069 case TYPE_S16: code[1] |= 0x08000000; break;
1070 case TYPE_U16: break;
1071 default:
1072 assert(0);
1073 break;
1074 }
1075 if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1076 if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1077 if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1078 if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1079
1080 emitForm_MAD(i);
1081 }
1082
1083 void
roundMode_CVT(RoundMode rnd)1084 CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1085 {
1086 switch (rnd) {
1087 case ROUND_NI: code[1] |= 0x08000000; break;
1088 case ROUND_M: code[1] |= 0x00020000; break;
1089 case ROUND_MI: code[1] |= 0x08020000; break;
1090 case ROUND_P: code[1] |= 0x00040000; break;
1091 case ROUND_PI: code[1] |= 0x08040000; break;
1092 case ROUND_Z: code[1] |= 0x00060000; break;
1093 case ROUND_ZI: code[1] |= 0x08060000; break;
1094 default:
1095 assert(rnd == ROUND_N);
1096 break;
1097 }
1098 }
1099
1100 void
emitCVT(const Instruction * i)1101 CodeEmitterNV50::emitCVT(const Instruction *i)
1102 {
1103 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1104 RoundMode rnd;
1105
1106 switch (i->op) {
1107 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
1108 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1109 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1110 default:
1111 rnd = i->rnd;
1112 break;
1113 }
1114
1115 code[0] = 0xa0000000;
1116
1117 switch (i->dType) {
1118 case TYPE_F64:
1119 switch (i->sType) {
1120 case TYPE_F64: code[1] = 0xc4404000; break;
1121 case TYPE_S64: code[1] = 0x44414000; break;
1122 case TYPE_U64: code[1] = 0x44404000; break;
1123 case TYPE_F32: code[1] = 0xc4400000; break;
1124 case TYPE_S32: code[1] = 0x44410000; break;
1125 case TYPE_U32: code[1] = 0x44400000; break;
1126 default:
1127 assert(0);
1128 break;
1129 }
1130 break;
1131 case TYPE_S64:
1132 switch (i->sType) {
1133 case TYPE_F64: code[1] = 0x8c404000; break;
1134 case TYPE_F32: code[1] = 0x8c400000; break;
1135 default:
1136 assert(0);
1137 break;
1138 }
1139 break;
1140 case TYPE_U64:
1141 switch (i->sType) {
1142 case TYPE_F64: code[1] = 0x84404000; break;
1143 case TYPE_F32: code[1] = 0x84400000; break;
1144 default:
1145 assert(0);
1146 break;
1147 }
1148 break;
1149 case TYPE_F32:
1150 switch (i->sType) {
1151 case TYPE_F64: code[1] = 0xc0404000; break;
1152 case TYPE_S64: code[1] = 0x40414000; break;
1153 case TYPE_U64: code[1] = 0x40404000; break;
1154 case TYPE_F32: code[1] = 0xc4004000; break;
1155 case TYPE_S32: code[1] = 0x44014000; break;
1156 case TYPE_U32: code[1] = 0x44004000; break;
1157 case TYPE_F16: code[1] = 0xc4000000; break;
1158 default:
1159 assert(0);
1160 break;
1161 }
1162 break;
1163 case TYPE_S32:
1164 switch (i->sType) {
1165 case TYPE_F64: code[1] = 0x88404000; break;
1166 case TYPE_F32: code[1] = 0x8c004000; break;
1167 case TYPE_S32: code[1] = 0x0c014000; break;
1168 case TYPE_U32: code[1] = 0x0c004000; break;
1169 case TYPE_F16: code[1] = 0x8c000000; break;
1170 case TYPE_S16: code[1] = 0x0c010000; break;
1171 case TYPE_U16: code[1] = 0x0c000000; break;
1172 case TYPE_S8: code[1] = 0x0c018000; break;
1173 case TYPE_U8: code[1] = 0x0c008000; break;
1174 default:
1175 assert(0);
1176 break;
1177 }
1178 break;
1179 case TYPE_U32:
1180 switch (i->sType) {
1181 case TYPE_F64: code[1] = 0x80404000; break;
1182 case TYPE_F32: code[1] = 0x84004000; break;
1183 case TYPE_S32: code[1] = 0x04014000; break;
1184 case TYPE_U32: code[1] = 0x04004000; break;
1185 case TYPE_F16: code[1] = 0x84000000; break;
1186 case TYPE_S16: code[1] = 0x04010000; break;
1187 case TYPE_U16: code[1] = 0x04000000; break;
1188 case TYPE_S8: code[1] = 0x04018000; break;
1189 case TYPE_U8: code[1] = 0x04008000; break;
1190 default:
1191 assert(0);
1192 break;
1193 }
1194 break;
1195 case TYPE_S16:
1196 case TYPE_U16:
1197 case TYPE_S8:
1198 case TYPE_U8:
1199 default:
1200 assert(0);
1201 break;
1202 }
1203 if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1204 code[1] |= 0x00004000;
1205
1206 roundMode_CVT(rnd);
1207
1208 switch (i->op) {
1209 case OP_ABS: code[1] |= 1 << 20; break;
1210 case OP_SAT: code[1] |= 1 << 19; break;
1211 case OP_NEG: code[1] |= 1 << 29; break;
1212 default:
1213 break;
1214 }
1215 code[1] ^= i->src(0).mod.neg() << 29;
1216 code[1] |= i->src(0).mod.abs() << 20;
1217 if (i->saturate)
1218 code[1] |= 1 << 19;
1219
1220 assert(i->op != OP_ABS || !i->src(0).mod.neg());
1221
1222 emitForm_MAD(i);
1223 }
1224
1225 void
emitPreOp(const Instruction * i)1226 CodeEmitterNV50::emitPreOp(const Instruction *i)
1227 {
1228 code[0] = 0xb0000000;
1229 code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1230
1231 code[1] |= i->src(0).mod.abs() << 20;
1232 code[1] |= i->src(0).mod.neg() << 26;
1233
1234 emitForm_MAD(i);
1235 }
1236
1237 void
emitSFnOp(const Instruction * i,uint8_t subOp)1238 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1239 {
1240 code[0] = 0x90000000;
1241
1242 if (i->encSize == 4) {
1243 assert(i->op == OP_RCP);
1244 code[0] |= i->src(0).mod.abs() << 15;
1245 code[0] |= i->src(0).mod.neg() << 22;
1246 emitForm_MUL(i);
1247 } else {
1248 code[1] = subOp << 29;
1249 code[1] |= i->src(0).mod.abs() << 20;
1250 code[1] |= i->src(0).mod.neg() << 26;
1251 emitForm_MAD(i);
1252 }
1253 }
1254
1255 void
emitNOT(const Instruction * i)1256 CodeEmitterNV50::emitNOT(const Instruction *i)
1257 {
1258 code[0] = 0xd0000000;
1259 code[1] = 0x0002c000;
1260
1261 switch (i->sType) {
1262 case TYPE_U32:
1263 case TYPE_S32:
1264 code[1] |= 0x04000000;
1265 break;
1266 default:
1267 break;
1268 }
1269 emitForm_MAD(i);
1270 setSrc(i, 0, 1);
1271 }
1272
1273 void
emitLogicOp(const Instruction * i)1274 CodeEmitterNV50::emitLogicOp(const Instruction *i)
1275 {
1276 code[0] = 0xd0000000;
1277 code[1] = 0;
1278
1279 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1280 switch (i->op) {
1281 case OP_OR: code[0] |= 0x0100; break;
1282 case OP_XOR: code[0] |= 0x8000; break;
1283 default:
1284 assert(i->op == OP_AND);
1285 break;
1286 }
1287 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1288 code[0] |= 1 << 22;
1289
1290 emitForm_IMM(i);
1291 } else {
1292 switch (i->op) {
1293 case OP_AND: code[1] = 0x04000000; break;
1294 case OP_OR: code[1] = 0x04004000; break;
1295 case OP_XOR: code[1] = 0x04008000; break;
1296 default:
1297 assert(0);
1298 break;
1299 }
1300 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1301 code[1] |= 1 << 16;
1302 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1303 code[1] |= 1 << 17;
1304
1305 emitForm_MAD(i);
1306 }
1307 }
1308
1309 void
emitARL(const Instruction * i,unsigned int shl)1310 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1311 {
1312 code[0] = 0x00000001 | (shl << 16);
1313 code[1] = 0xc0000000;
1314
1315 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1316
1317 setSrcFileBits(i, NV50_OP_ENC_IMM);
1318 setSrc(i, 0, 0);
1319 emitFlagsRd(i);
1320 }
1321
1322 void
emitShift(const Instruction * i)1323 CodeEmitterNV50::emitShift(const Instruction *i)
1324 {
1325 if (i->def(0).getFile() == FILE_ADDRESS) {
1326 assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1327 emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1328 } else {
1329 code[0] = 0x30000001;
1330 code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
1331 if (i->op == OP_SHR && isSignedType(i->sType))
1332 code[1] |= 1 << 27;
1333
1334 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1335 code[1] |= 1 << 20;
1336 code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1337 defId(i->def(0), 2);
1338 srcId(i->src(0), 9);
1339 emitFlagsRd(i);
1340 } else {
1341 emitForm_MAD(i);
1342 }
1343 }
1344 }
1345
1346 void
emitOUT(const Instruction * i)1347 CodeEmitterNV50::emitOUT(const Instruction *i)
1348 {
1349 code[0] = (i->op == OP_EMIT) ? 0xf0000200 : 0xf0000400;
1350 code[1] = 0xc0000001;
1351
1352 emitFlagsRd(i);
1353 }
1354
1355 void
emitTEX(const TexInstruction * i)1356 CodeEmitterNV50::emitTEX(const TexInstruction *i)
1357 {
1358 code[0] = 0xf0000001;
1359 code[1] = 0x00000000;
1360
1361 switch (i->op) {
1362 case OP_TXB:
1363 code[1] = 0x20000000;
1364 break;
1365 case OP_TXL:
1366 code[1] = 0x40000000;
1367 break;
1368 case OP_TXF:
1369 code[0] |= 0x01000000;
1370 break;
1371 case OP_TXG:
1372 code[0] = 0x01000000;
1373 code[1] = 0x80000000;
1374 break;
1375 default:
1376 assert(i->op == OP_TEX);
1377 break;
1378 }
1379
1380 code[0] |= i->tex.r << 9;
1381 code[0] |= i->tex.s << 17;
1382
1383 int argc = i->tex.target.getArgCount();
1384
1385 if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1386 argc += 1;
1387 if (i->tex.target.isShadow())
1388 argc += 1;
1389 assert(argc <= 4);
1390
1391 code[0] |= (argc - 1) << 22;
1392
1393 if (i->tex.target.isCube()) {
1394 code[0] |= 0x08000000;
1395 } else
1396 if (i->tex.useOffsets) {
1397 code[1] |= (i->tex.offset[0][0] & 0xf) << 24;
1398 code[1] |= (i->tex.offset[0][1] & 0xf) << 20;
1399 code[1] |= (i->tex.offset[0][2] & 0xf) << 16;
1400 }
1401
1402 code[0] |= (i->tex.mask & 0x3) << 25;
1403 code[1] |= (i->tex.mask & 0xc) << 12;
1404
1405 if (i->tex.liveOnly)
1406 code[1] |= 4;
1407
1408 defId(i->def(0), 2);
1409
1410 emitFlagsRd(i);
1411 }
1412
1413 void
emitTXQ(const TexInstruction * i)1414 CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1415 {
1416 assert(i->tex.query == TXQ_DIMS);
1417
1418 code[0] = 0xf0000001;
1419 code[1] = 0x60000000;
1420
1421 code[0] |= i->tex.r << 9;
1422 code[0] |= i->tex.s << 17;
1423
1424 code[0] |= (i->tex.mask & 0x3) << 25;
1425 code[1] |= (i->tex.mask & 0xc) << 12;
1426
1427 defId(i->def(0), 2);
1428
1429 emitFlagsRd(i);
1430 }
1431
1432 void
emitPRERETEmu(const FlowInstruction * i)1433 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1434 {
1435 uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1436
1437 code[0] = 0x10000003; // bra
1438 code[1] = 0x00000780; // always
1439
1440 switch (i->subOp) {
1441 case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1442 break;
1443 case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1444 pos += 8;
1445 break;
1446 default:
1447 assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1448 code[0] = 0x20000003; // call
1449 code[1] = 0x00000000; // no predicate
1450 break;
1451 }
1452 addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1453 addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1454 }
1455
1456 void
emitFlow(const Instruction * i,uint8_t flowOp)1457 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1458 {
1459 const FlowInstruction *f = i->asFlow();
1460 bool hasPred = false;
1461 bool hasTarg = false;
1462
1463 code[0] = 0x00000003 | (flowOp << 28);
1464 code[1] = 0x00000000;
1465
1466 switch (i->op) {
1467 case OP_BRA:
1468 hasPred = true;
1469 hasTarg = true;
1470 break;
1471 case OP_BREAK:
1472 case OP_BRKPT:
1473 case OP_DISCARD:
1474 case OP_RET:
1475 hasPred = true;
1476 break;
1477 case OP_CALL:
1478 case OP_PREBREAK:
1479 case OP_JOINAT:
1480 hasTarg = true;
1481 break;
1482 case OP_PRERET:
1483 hasTarg = true;
1484 if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1485 emitPRERETEmu(f);
1486 return;
1487 }
1488 break;
1489 default:
1490 break;
1491 }
1492
1493 if (hasPred)
1494 emitFlagsRd(i);
1495
1496 if (hasTarg && f) {
1497 uint32_t pos;
1498
1499 if (f->op == OP_CALL) {
1500 if (f->builtin) {
1501 pos = targ->getBuiltinOffset(f->target.builtin);
1502 } else {
1503 pos = f->target.fn->binPos;
1504 }
1505 } else {
1506 pos = f->target.bb->binPos;
1507 }
1508
1509 code[0] |= ((pos >> 2) & 0xffff) << 11;
1510 code[1] |= ((pos >> 18) & 0x003f) << 14;
1511
1512 RelocEntry::Type relocTy;
1513
1514 relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1515
1516 addReloc(relocTy, 0, pos, 0x07fff800, 9);
1517 addReloc(relocTy, 1, pos, 0x000fc000, -4);
1518 }
1519 }
1520
1521 bool
emitInstruction(Instruction * insn)1522 CodeEmitterNV50::emitInstruction(Instruction *insn)
1523 {
1524 if (!insn->encSize) {
1525 ERROR("skipping unencodable instruction: "); insn->print();
1526 return false;
1527 } else
1528 if (codeSize + insn->encSize > codeSizeLimit) {
1529 ERROR("code emitter output buffer too small\n");
1530 return false;
1531 }
1532
1533 if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1534 INFO("EMIT: "); insn->print();
1535 }
1536
1537 switch (insn->op) {
1538 case OP_MOV:
1539 emitMOV(insn);
1540 break;
1541 case OP_EXIT:
1542 case OP_NOP:
1543 case OP_JOIN:
1544 emitNOP();
1545 break;
1546 case OP_VFETCH:
1547 case OP_LOAD:
1548 emitLOAD(insn);
1549 break;
1550 case OP_EXPORT:
1551 case OP_STORE:
1552 emitSTORE(insn);
1553 break;
1554 case OP_PFETCH:
1555 emitPFETCH(insn);
1556 break;
1557 case OP_LINTERP:
1558 case OP_PINTERP:
1559 emitINTERP(insn);
1560 break;
1561 case OP_ADD:
1562 case OP_SUB:
1563 if (isFloatType(insn->dType))
1564 emitFADD(insn);
1565 else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1566 emitAADD(insn);
1567 else
1568 emitUADD(insn);
1569 break;
1570 case OP_MUL:
1571 if (isFloatType(insn->dType))
1572 emitFMUL(insn);
1573 else
1574 emitIMUL(insn);
1575 break;
1576 case OP_MAD:
1577 case OP_FMA:
1578 if (isFloatType(insn->dType))
1579 emitFMAD(insn);
1580 else
1581 emitIMAD(insn);
1582 break;
1583 case OP_SAD:
1584 emitISAD(insn);
1585 break;
1586 case OP_NOT:
1587 emitNOT(insn);
1588 break;
1589 case OP_AND:
1590 case OP_OR:
1591 case OP_XOR:
1592 emitLogicOp(insn);
1593 break;
1594 case OP_SHL:
1595 case OP_SHR:
1596 emitShift(insn);
1597 break;
1598 case OP_SET:
1599 emitSET(insn);
1600 break;
1601 case OP_MIN:
1602 case OP_MAX:
1603 emitMINMAX(insn);
1604 break;
1605 case OP_CEIL:
1606 case OP_FLOOR:
1607 case OP_TRUNC:
1608 case OP_ABS:
1609 case OP_NEG:
1610 case OP_SAT:
1611 emitCVT(insn);
1612 break;
1613 case OP_CVT:
1614 if (insn->def(0).getFile() == FILE_ADDRESS)
1615 emitARL(insn, 0);
1616 else
1617 if (insn->def(0).getFile() == FILE_FLAGS ||
1618 insn->src(0).getFile() == FILE_FLAGS ||
1619 insn->src(0).getFile() == FILE_ADDRESS)
1620 emitMOV(insn);
1621 else
1622 emitCVT(insn);
1623 break;
1624 case OP_RCP:
1625 emitSFnOp(insn, 0);
1626 break;
1627 case OP_RSQ:
1628 emitSFnOp(insn, 2);
1629 break;
1630 case OP_LG2:
1631 emitSFnOp(insn, 3);
1632 break;
1633 case OP_SIN:
1634 emitSFnOp(insn, 4);
1635 break;
1636 case OP_COS:
1637 emitSFnOp(insn, 5);
1638 break;
1639 case OP_EX2:
1640 emitSFnOp(insn, 6);
1641 break;
1642 case OP_PRESIN:
1643 case OP_PREEX2:
1644 emitPreOp(insn);
1645 break;
1646 case OP_TEX:
1647 case OP_TXB:
1648 case OP_TXL:
1649 case OP_TXF:
1650 emitTEX(insn->asTex());
1651 break;
1652 case OP_TXQ:
1653 emitTXQ(insn->asTex());
1654 break;
1655 case OP_EMIT:
1656 case OP_RESTART:
1657 emitOUT(insn);
1658 break;
1659 case OP_DISCARD:
1660 emitFlow(insn, 0x0);
1661 break;
1662 case OP_BRA:
1663 emitFlow(insn, 0x1);
1664 break;
1665 case OP_CALL:
1666 emitFlow(insn, 0x2);
1667 break;
1668 case OP_RET:
1669 emitFlow(insn, 0x3);
1670 break;
1671 case OP_PREBREAK:
1672 emitFlow(insn, 0x4);
1673 break;
1674 case OP_BREAK:
1675 emitFlow(insn, 0x5);
1676 break;
1677 case OP_QUADON:
1678 emitFlow(insn, 0x6);
1679 break;
1680 case OP_QUADPOP:
1681 emitFlow(insn, 0x7);
1682 break;
1683 case OP_JOINAT:
1684 emitFlow(insn, 0xa);
1685 break;
1686 case OP_PRERET:
1687 emitFlow(insn, 0xd);
1688 break;
1689 case OP_QUADOP:
1690 emitQUADOP(insn, insn->lanes, insn->subOp);
1691 break;
1692 case OP_DFDX:
1693 emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
1694 break;
1695 case OP_DFDY:
1696 emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
1697 break;
1698 case OP_PHI:
1699 case OP_UNION:
1700 case OP_CONSTRAINT:
1701 ERROR("operation should have been eliminated\n");
1702 return false;
1703 case OP_EXP:
1704 case OP_LOG:
1705 case OP_SQRT:
1706 case OP_POW:
1707 case OP_SELP:
1708 case OP_SLCT:
1709 case OP_TXD:
1710 case OP_PRECONT:
1711 case OP_CONT:
1712 case OP_POPCNT:
1713 case OP_INSBF:
1714 case OP_EXTBF:
1715 ERROR("operation should have been lowered\n");
1716 return false;
1717 default:
1718 ERROR("unknown op: %u\n", insn->op);
1719 return false;
1720 }
1721 if (insn->join || insn->op == OP_JOIN)
1722 code[1] |= 0x2;
1723 else
1724 if (insn->exit || insn->op == OP_EXIT)
1725 code[1] |= 0x1;
1726
1727 assert((insn->encSize == 8) == (code[0] & 1));
1728
1729 code += insn->encSize / 4;
1730 codeSize += insn->encSize;
1731 return true;
1732 }
1733
1734 uint32_t
getMinEncodingSize(const Instruction * i) const1735 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
1736 {
1737 const Target::OpInfo &info = targ->getOpInfo(i);
1738
1739 if (info.minEncSize > 4)
1740 return 8;
1741
1742 // check constraints on dst and src operands
1743 for (int d = 0; i->defExists(d); ++d) {
1744 if (i->def(d).rep()->reg.data.id > 63 ||
1745 i->def(d).rep()->reg.file != FILE_GPR)
1746 return 8;
1747 }
1748
1749 for (int s = 0; i->srcExists(s); ++s) {
1750 DataFile sf = i->src(s).getFile();
1751 if (sf != FILE_GPR)
1752 if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
1753 return 8;
1754 if (i->src(s).rep()->reg.data.id > 63)
1755 return 8;
1756 }
1757
1758 // check modifiers & rounding
1759 if (i->join || i->lanes != 0xf || i->exit)
1760 return 8;
1761 if (i->op == OP_MUL && i->rnd != ROUND_N)
1762 return 8;
1763
1764 if (i->asTex())
1765 return 8; // TODO: short tex encoding
1766
1767 // check constraints on short MAD
1768 if (info.srcNr >= 2 && i->srcExists(2)) {
1769 if (i->saturate || i->src(2).mod)
1770 return 8;
1771 if ((i->src(0).mod ^ i->src(1).mod) ||
1772 (i->src(0).mod | i->src(1).mod).abs())
1773 return 8;
1774 if (!i->defExists(0) ||
1775 i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
1776 return 8;
1777 }
1778
1779 return info.minEncSize;
1780 }
1781
1782 // Change the encoding size of an instruction after BBs have been scheduled.
1783 static void
makeInstructionLong(Instruction * insn)1784 makeInstructionLong(Instruction *insn)
1785 {
1786 if (insn->encSize == 8)
1787 return;
1788 Function *fn = insn->bb->getFunction();
1789 int n = 0;
1790 int adj = 4;
1791
1792 for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
1793
1794 if (n & 1) {
1795 adj = 8;
1796 insn->next->encSize = 8;
1797 } else
1798 if (insn->prev && insn->prev->encSize == 4) {
1799 adj = 8;
1800 insn->prev->encSize = 8;
1801 }
1802 insn->encSize = 8;
1803
1804 for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
1805 fn->bbArray[i]->binPos += 4;
1806 }
1807 fn->binSize += adj;
1808 insn->bb->binSize += adj;
1809 }
1810
1811 static bool
trySetExitModifier(Instruction * insn)1812 trySetExitModifier(Instruction *insn)
1813 {
1814 if (insn->op == OP_DISCARD ||
1815 insn->op == OP_QUADON ||
1816 insn->op == OP_QUADPOP)
1817 return false;
1818 for (int s = 0; insn->srcExists(s); ++s)
1819 if (insn->src(s).getFile() == FILE_IMMEDIATE)
1820 return false;
1821 if (insn->asFlow()) {
1822 if (insn->op == OP_CALL) // side effects !
1823 return false;
1824 if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
1825 return false;
1826 insn->op = OP_EXIT;
1827 }
1828 insn->exit = 1;
1829 makeInstructionLong(insn);
1830 return true;
1831 }
1832
1833 static void
replaceExitWithModifier(Function * func)1834 replaceExitWithModifier(Function *func)
1835 {
1836 BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
1837
1838 if (!epilogue->getExit() ||
1839 epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
1840 return;
1841
1842 if (epilogue->getEntry()->op != OP_EXIT) {
1843 Instruction *insn = epilogue->getExit()->prev;
1844 if (!insn || !trySetExitModifier(insn))
1845 return;
1846 insn->exit = 1;
1847 } else {
1848 for (Graph::EdgeIterator ei = func->cfgExit->incident();
1849 !ei.end(); ei.next()) {
1850 BasicBlock *bb = BasicBlock::get(ei.getNode());
1851 Instruction *i = bb->getExit();
1852
1853 if (!i || !trySetExitModifier(i))
1854 return;
1855 }
1856 }
1857 epilogue->binSize -= 8;
1858 func->binSize -= 8;
1859 delete_Instruction(func->getProgram(), epilogue->getExit());
1860 }
1861
1862 void
prepareEmission(Function * func)1863 CodeEmitterNV50::prepareEmission(Function *func)
1864 {
1865 CodeEmitter::prepareEmission(func);
1866
1867 replaceExitWithModifier(func);
1868 }
1869
CodeEmitterNV50(const TargetNV50 * target)1870 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) : CodeEmitter(target)
1871 {
1872 targ = target; // specialized
1873 code = NULL;
1874 codeSize = codeSizeLimit = 0;
1875 relocInfo = NULL;
1876 }
1877
1878 CodeEmitter *
getCodeEmitter(Program::Type type)1879 TargetNV50::getCodeEmitter(Program::Type type)
1880 {
1881 CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
1882 emit->setProgramType(type);
1883 return emit;
1884 }
1885
1886 } // namespace nv50_ir
1887