1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
25 #include "codegen/nv50_ir_driver.h"
26 
27 extern "C" {
28 #include "nouveau_debug.h"
29 #include "nv50/nv50_program.h"
30 }
31 
32 namespace nv50_ir {
33 
Modifier(operation op)34 Modifier::Modifier(operation op)
35 {
36    switch (op) {
37    case OP_NEG: bits = NV50_IR_MOD_NEG; break;
38    case OP_ABS: bits = NV50_IR_MOD_ABS; break;
39    case OP_SAT: bits = NV50_IR_MOD_SAT; break;
40    case OP_NOT: bits = NV50_IR_MOD_NOT; break;
41    default:
42       bits = 0;
43       break;
44    }
45 }
46 
operator *(const Modifier m) const47 Modifier Modifier::operator*(const Modifier m) const
48 {
49    unsigned int a, b, c;
50 
51    b = m.bits;
52    if (this->bits & NV50_IR_MOD_ABS)
53       b &= ~NV50_IR_MOD_NEG;
54 
55    a = (this->bits ^ b)      & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
56    c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
57 
58    return Modifier(a | c);
59 }
60 
ValueRef(Value * v)61 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
62 {
63    indirect[0] = -1;
64    indirect[1] = -1;
65    usedAsPtr = false;
66    set(v);
67 }
68 
ValueRef(const ValueRef & ref)69 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
70 {
71    set(ref);
72    usedAsPtr = ref.usedAsPtr;
73 }
74 
~ValueRef()75 ValueRef::~ValueRef()
76 {
77    this->set(NULL);
78 }
79 
getImmediate(ImmediateValue & imm) const80 bool ValueRef::getImmediate(ImmediateValue &imm) const
81 {
82    const ValueRef *src = this;
83    Modifier m;
84    DataType type = src->insn->sType;
85 
86    while (src) {
87       if (src->mod) {
88          if (src->insn->sType != type)
89             break;
90          m *= src->mod;
91       }
92       if (src->getFile() == FILE_IMMEDIATE) {
93          imm = *(src->value->asImm());
94          // The immediate's type isn't required to match its use, it's
95          // more of a hint; applying a modifier makes use of that hint.
96          imm.reg.type = type;
97          m.applyTo(imm);
98          return true;
99       }
100 
101       Instruction *insn = src->value->getUniqueInsn();
102 
103       if (insn && insn->op == OP_MOV) {
104          src = &insn->src(0);
105          if (src->mod)
106             WARN("OP_MOV with modifier encountered !\n");
107       } else {
108          src = NULL;
109       }
110    }
111    return false;
112 }
113 
ValueDef(Value * v)114 ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL)
115 {
116    set(v);
117 }
118 
ValueDef(const ValueDef & def)119 ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL)
120 {
121    set(def.get());
122 }
123 
~ValueDef()124 ValueDef::~ValueDef()
125 {
126    this->set(NULL);
127 }
128 
129 void
set(const ValueRef & ref)130 ValueRef::set(const ValueRef &ref)
131 {
132    this->set(ref.get());
133    mod = ref.mod;
134    indirect[0] = ref.indirect[0];
135    indirect[1] = ref.indirect[1];
136 }
137 
138 void
set(Value * refVal)139 ValueRef::set(Value *refVal)
140 {
141    if (value == refVal)
142       return;
143    if (value)
144       value->uses.erase(this);
145    if (refVal)
146       refVal->uses.insert(this);
147 
148    value = refVal;
149 }
150 
151 void
set(Value * defVal)152 ValueDef::set(Value *defVal)
153 {
154    if (value == defVal)
155       return;
156    if (value)
157       value->defs.remove(this);
158    if (defVal)
159       defVal->defs.push_back(this);
160 
161    value = defVal;
162 }
163 
164 // Check if we can replace this definition's value by the value in @rep,
165 // including the source modifiers, i.e. make sure that all uses support
166 // @rep.mod.
167 bool
mayReplace(const ValueRef & rep)168 ValueDef::mayReplace(const ValueRef &rep)
169 {
170    if (!rep.mod)
171       return true;
172 
173    if (!insn || !insn->bb) // Unbound instruction ?
174       return false;
175 
176    const Target *target = insn->bb->getProgram()->getTarget();
177 
178    for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
179         ++it) {
180       Instruction *insn = (*it)->getInsn();
181       int s = -1;
182 
183       for (int i = 0; insn->srcExists(i); ++i) {
184          if (insn->src(i).get() == value) {
185             // If there are multiple references to us we'd have to check if the
186             // combination of mods is still supported, but just bail for now.
187             if (&insn->src(i) != (*it))
188                return false;
189             s = i;
190          }
191       }
192       assert(s >= 0); // integrity of uses list
193 
194       if (!target->isModSupported(insn, s, rep.mod))
195          return false;
196    }
197    return true;
198 }
199 
200 void
replace(const ValueRef & repVal,bool doSet)201 ValueDef::replace(const ValueRef &repVal, bool doSet)
202 {
203    assert(mayReplace(repVal));
204 
205    if (value == repVal.get())
206       return;
207 
208    while (!value->uses.empty()) {
209       ValueRef *ref = *value->uses.begin();
210       ref->set(repVal.get());
211       ref->mod *= repVal.mod;
212    }
213 
214    if (doSet)
215       set(repVal.get());
216 }
217 
Value()218 Value::Value()
219 {
220   join = this;
221   memset(&reg, 0, sizeof(reg));
222   reg.size = 4;
223 }
224 
LValue(Function * fn,DataFile file)225 LValue::LValue(Function *fn, DataFile file)
226 {
227    reg.file = file;
228    reg.size = (file != FILE_PREDICATE) ? 4 : 1;
229    reg.data.id = -1;
230 
231    compMask = 0;
232    compound = 0;
233    ssa = 0;
234    fixedReg = 0;
235    noSpill = 0;
236 
237    fn->add(this, this->id);
238 }
239 
LValue(Function * fn,LValue * lval)240 LValue::LValue(Function *fn, LValue *lval)
241 {
242    assert(lval);
243 
244    reg.file = lval->reg.file;
245    reg.size = lval->reg.size;
246    reg.data.id = -1;
247 
248    compMask = 0;
249    compound = 0;
250    ssa = 0;
251    fixedReg = 0;
252    noSpill = 0;
253 
254    fn->add(this, this->id);
255 }
256 
257 LValue *
clone(ClonePolicy<Function> & pol) const258 LValue::clone(ClonePolicy<Function>& pol) const
259 {
260    LValue *that = new_LValue(pol.context(), reg.file);
261 
262    pol.set<Value>(this, that);
263 
264    that->reg.size = this->reg.size;
265    that->reg.type = this->reg.type;
266    that->reg.data = this->reg.data;
267 
268    return that;
269 }
270 
271 bool
isUniform() const272 LValue::isUniform() const
273 {
274    if (defs.size() > 1)
275       return false;
276    Instruction *insn = getInsn();
277    if (!insn)
278       return false;
279    // let's not try too hard here for now ...
280    return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
281 }
282 
Symbol(Program * prog,DataFile f,ubyte fidx)283 Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
284 {
285    baseSym = NULL;
286 
287    reg.file = f;
288    reg.fileIndex = fidx;
289    reg.data.offset = 0;
290 
291    prog->add(this, this->id);
292 }
293 
294 Symbol *
clone(ClonePolicy<Function> & pol) const295 Symbol::clone(ClonePolicy<Function>& pol) const
296 {
297    Program *prog = pol.context()->getProgram();
298 
299    Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
300 
301    pol.set<Value>(this, that);
302 
303    that->reg.size = this->reg.size;
304    that->reg.type = this->reg.type;
305    that->reg.data = this->reg.data;
306 
307    that->baseSym = this->baseSym;
308 
309    return that;
310 }
311 
312 bool
isUniform() const313 Symbol::isUniform() const
314 {
315    return
316       reg.file != FILE_SYSTEM_VALUE &&
317       reg.file != FILE_MEMORY_LOCAL &&
318       reg.file != FILE_SHADER_INPUT;
319 }
320 
ImmediateValue(Program * prog,uint32_t uval)321 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
322 {
323    memset(&reg, 0, sizeof(reg));
324 
325    reg.file = FILE_IMMEDIATE;
326    reg.size = 4;
327    reg.type = TYPE_U32;
328 
329    reg.data.u32 = uval;
330 
331    prog->add(this, this->id);
332 }
333 
ImmediateValue(Program * prog,float fval)334 ImmediateValue::ImmediateValue(Program *prog, float fval)
335 {
336    memset(&reg, 0, sizeof(reg));
337 
338    reg.file = FILE_IMMEDIATE;
339    reg.size = 4;
340    reg.type = TYPE_F32;
341 
342    reg.data.f32 = fval;
343 
344    prog->add(this, this->id);
345 }
346 
ImmediateValue(Program * prog,double dval)347 ImmediateValue::ImmediateValue(Program *prog, double dval)
348 {
349    memset(&reg, 0, sizeof(reg));
350 
351    reg.file = FILE_IMMEDIATE;
352    reg.size = 8;
353    reg.type = TYPE_F64;
354 
355    reg.data.f64 = dval;
356 
357    prog->add(this, this->id);
358 }
359 
ImmediateValue(const ImmediateValue * proto,DataType ty)360 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
361 {
362    reg = proto->reg;
363 
364    reg.type = ty;
365    reg.size = typeSizeof(ty);
366 }
367 
368 ImmediateValue *
clone(ClonePolicy<Function> & pol) const369 ImmediateValue::clone(ClonePolicy<Function>& pol) const
370 {
371    Program *prog = pol.context()->getProgram();
372    ImmediateValue *that = new_ImmediateValue(prog, 0u);
373 
374    pol.set<Value>(this, that);
375 
376    that->reg.size = this->reg.size;
377    that->reg.type = this->reg.type;
378    that->reg.data = this->reg.data;
379 
380    return that;
381 }
382 
383 bool
isInteger(const int i) const384 ImmediateValue::isInteger(const int i) const
385 {
386    switch (reg.type) {
387    case TYPE_S8:
388       return reg.data.s8 == i;
389    case TYPE_U8:
390       return reg.data.u8 == i;
391    case TYPE_S16:
392       return reg.data.s16 == i;
393    case TYPE_U16:
394       return reg.data.u16 == i;
395    case TYPE_S32:
396    case TYPE_U32:
397       return reg.data.s32 == i; // as if ...
398    case TYPE_S64:
399    case TYPE_U64:
400       return reg.data.s64 == i; // as if ...
401    case TYPE_F32:
402       return reg.data.f32 == static_cast<float>(i);
403    case TYPE_F64:
404       return reg.data.f64 == static_cast<double>(i);
405    default:
406       return false;
407    }
408 }
409 
410 bool
isNegative() const411 ImmediateValue::isNegative() const
412 {
413    switch (reg.type) {
414    case TYPE_S8:  return reg.data.s8 < 0;
415    case TYPE_S16: return reg.data.s16 < 0;
416    case TYPE_S32:
417    case TYPE_U32: return reg.data.s32 < 0;
418    case TYPE_F32: return reg.data.u32 & (1 << 31);
419    case TYPE_F64: return reg.data.u64 & (1ULL << 63);
420    default:
421       return false;
422    }
423 }
424 
425 bool
isPow2() const426 ImmediateValue::isPow2() const
427 {
428    if (reg.type == TYPE_U64 || reg.type == TYPE_S64)
429       return util_is_power_of_two_or_zero64(reg.data.u64);
430    else
431       return util_is_power_of_two_or_zero(reg.data.u32);
432 }
433 
434 void
applyLog2()435 ImmediateValue::applyLog2()
436 {
437    switch (reg.type) {
438    case TYPE_S8:
439    case TYPE_S16:
440    case TYPE_S32:
441       assert(!this->isNegative());
442       // fall through
443    case TYPE_U8:
444    case TYPE_U16:
445    case TYPE_U32:
446       reg.data.u32 = util_logbase2(reg.data.u32);
447       break;
448    case TYPE_S64:
449       assert(!this->isNegative());
450       // fall through
451    case TYPE_U64:
452       reg.data.u64 = util_logbase2_64(reg.data.u64);
453       break;
454    case TYPE_F32:
455       reg.data.f32 = log2f(reg.data.f32);
456       break;
457    case TYPE_F64:
458       reg.data.f64 = log2(reg.data.f64);
459       break;
460    default:
461       assert(0);
462       break;
463    }
464 }
465 
466 bool
compare(CondCode cc,float fval) const467 ImmediateValue::compare(CondCode cc, float fval) const
468 {
469    if (reg.type != TYPE_F32)
470       ERROR("immediate value is not of type f32");
471 
472    switch (static_cast<CondCode>(cc & 7)) {
473    case CC_TR: return true;
474    case CC_FL: return false;
475    case CC_LT: return reg.data.f32 <  fval;
476    case CC_LE: return reg.data.f32 <= fval;
477    case CC_GT: return reg.data.f32 >  fval;
478    case CC_GE: return reg.data.f32 >= fval;
479    case CC_EQ: return reg.data.f32 == fval;
480    case CC_NE: return reg.data.f32 != fval;
481    default:
482       assert(0);
483       return false;
484    }
485 }
486 
487 ImmediateValue&
operator =(const ImmediateValue & that)488 ImmediateValue::operator=(const ImmediateValue &that)
489 {
490    this->reg = that.reg;
491    return (*this);
492 }
493 
494 bool
interfers(const Value * that) const495 Value::interfers(const Value *that) const
496 {
497    uint32_t idA, idB;
498 
499    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
500       return false;
501    if (this->asImm())
502       return false;
503 
504    if (this->asSym()) {
505       idA = this->join->reg.data.offset;
506       idB = that->join->reg.data.offset;
507    } else {
508       idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
509       idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
510    }
511 
512    if (idA < idB)
513       return (idA + this->reg.size > idB);
514    else
515    if (idA > idB)
516       return (idB + that->reg.size > idA);
517    else
518       return (idA == idB);
519 }
520 
521 bool
equals(const Value * that,bool strict) const522 Value::equals(const Value *that, bool strict) const
523 {
524    if (strict)
525       return this == that;
526 
527    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
528       return false;
529    if (that->reg.size != this->reg.size)
530       return false;
531 
532    if (that->reg.data.id != this->reg.data.id)
533       return false;
534 
535    return true;
536 }
537 
538 bool
equals(const Value * that,bool strict) const539 ImmediateValue::equals(const Value *that, bool strict) const
540 {
541    const ImmediateValue *imm = that->asImm();
542    if (!imm)
543       return false;
544    return reg.data.u64 == imm->reg.data.u64;
545 }
546 
547 bool
equals(const Value * that,bool strict) const548 Symbol::equals(const Value *that, bool strict) const
549 {
550    if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
551       return false;
552    assert(that->asSym());
553 
554    if (this->baseSym != that->asSym()->baseSym)
555       return false;
556 
557    if (reg.file == FILE_SYSTEM_VALUE)
558       return (this->reg.data.sv.sv    == that->reg.data.sv.sv &&
559               this->reg.data.sv.index == that->reg.data.sv.index);
560    return this->reg.data.offset == that->reg.data.offset;
561 }
562 
init()563 void Instruction::init()
564 {
565    next = prev = 0;
566 
567    cc = CC_ALWAYS;
568    rnd = ROUND_N;
569    cache = CACHE_CA;
570    subOp = 0;
571 
572    saturate = 0;
573    join = 0;
574    exit = 0;
575    terminator = 0;
576    ftz = 0;
577    dnz = 0;
578    perPatch = 0;
579    fixed = 0;
580    encSize = 0;
581    ipa = 0;
582    mask = 0;
583    precise = 0;
584 
585    lanes = 0xf;
586 
587    postFactor = 0;
588 
589    predSrc = -1;
590    flagsDef = -1;
591    flagsSrc = -1;
592 }
593 
Instruction()594 Instruction::Instruction()
595 {
596    init();
597 
598    op = OP_NOP;
599    dType = sType = TYPE_F32;
600 
601    id = -1;
602    bb = 0;
603 }
604 
Instruction(Function * fn,operation opr,DataType ty)605 Instruction::Instruction(Function *fn, operation opr, DataType ty)
606 {
607    init();
608 
609    op = opr;
610    dType = sType = ty;
611 
612    fn->add(this, id);
613 }
614 
~Instruction()615 Instruction::~Instruction()
616 {
617    if (bb) {
618       Function *fn = bb->getFunction();
619       bb->remove(this);
620       fn->allInsns.remove(id);
621    }
622 
623    for (int s = 0; srcExists(s); ++s)
624       setSrc(s, NULL);
625    // must unlink defs too since the list pointers will get deallocated
626    for (int d = 0; defExists(d); ++d)
627       setDef(d, NULL);
628 }
629 
630 void
setDef(int i,Value * val)631 Instruction::setDef(int i, Value *val)
632 {
633    int size = defs.size();
634    if (i >= size) {
635       defs.resize(i + 1);
636       while (size <= i)
637          defs[size++].setInsn(this);
638    }
639    defs[i].set(val);
640 }
641 
642 void
setSrc(int s,Value * val)643 Instruction::setSrc(int s, Value *val)
644 {
645    int size = srcs.size();
646    if (s >= size) {
647       srcs.resize(s + 1);
648       while (size <= s)
649          srcs[size++].setInsn(this);
650    }
651    srcs[s].set(val);
652 }
653 
654 void
setSrc(int s,const ValueRef & ref)655 Instruction::setSrc(int s, const ValueRef& ref)
656 {
657    setSrc(s, ref.get());
658    srcs[s].mod = ref.mod;
659 }
660 
661 void
swapSources(int a,int b)662 Instruction::swapSources(int a, int b)
663 {
664    Value *value = srcs[a].get();
665    Modifier m = srcs[a].mod;
666 
667    setSrc(a, srcs[b]);
668 
669    srcs[b].set(value);
670    srcs[b].mod = m;
671 }
672 
moveSourcesAdjustIndex(int8_t & index,int s,int delta)673 static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
674 {
675    if (index >= s)
676       index += delta;
677    else
678    if ((delta < 0) && (index >= (s + delta)))
679       index = -1;
680 }
681 
682 // Moves sources [@s,last_source] by @delta.
683 // If @delta < 0, sources [@s - abs(@delta), @s) are erased.
684 void
moveSources(const int s,const int delta)685 Instruction::moveSources(const int s, const int delta)
686 {
687    if (delta == 0)
688       return;
689    assert(s + delta >= 0);
690 
691    int k;
692 
693    for (k = 0; srcExists(k); ++k) {
694       for (int i = 0; i < 2; ++i)
695          moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
696    }
697    moveSourcesAdjustIndex(predSrc, s, delta);
698    moveSourcesAdjustIndex(flagsSrc, s, delta);
699    if (asTex()) {
700       TexInstruction *tex = asTex();
701       moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
702       moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
703    }
704 
705    if (delta > 0) {
706       --k;
707       for (int p = k + delta; k >= s; --k, --p)
708          setSrc(p, src(k));
709    } else {
710       int p;
711       for (p = s; p < k; ++p)
712          setSrc(p + delta, src(p));
713       for (; (p + delta) < k; ++p)
714          setSrc(p + delta, NULL);
715    }
716 }
717 
718 void
takeExtraSources(int s,Value * values[3])719 Instruction::takeExtraSources(int s, Value *values[3])
720 {
721    values[0] = getIndirect(s, 0);
722    if (values[0])
723       setIndirect(s, 0, NULL);
724 
725    values[1] = getIndirect(s, 1);
726    if (values[1])
727       setIndirect(s, 1, NULL);
728 
729    values[2] = getPredicate();
730    if (values[2])
731       setPredicate(cc, NULL);
732 }
733 
734 void
putExtraSources(int s,Value * values[3])735 Instruction::putExtraSources(int s, Value *values[3])
736 {
737    if (values[0])
738       setIndirect(s, 0, values[0]);
739    if (values[1])
740       setIndirect(s, 1, values[1]);
741    if (values[2])
742       setPredicate(cc, values[2]);
743 }
744 
745 Instruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const746 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
747 {
748    if (!i)
749       i = new_Instruction(pol.context(), op, dType);
750 #ifndef NDEBUG // non-conformant assert, so this is required
751    assert(typeid(*i) == typeid(*this));
752 #endif
753 
754    pol.set<Instruction>(this, i);
755 
756    i->sType = sType;
757 
758    i->rnd = rnd;
759    i->cache = cache;
760    i->subOp = subOp;
761 
762    i->saturate = saturate;
763    i->join = join;
764    i->exit = exit;
765    i->mask = mask;
766    i->ftz = ftz;
767    i->dnz = dnz;
768    i->ipa = ipa;
769    i->lanes = lanes;
770    i->perPatch = perPatch;
771 
772    i->postFactor = postFactor;
773 
774    for (int d = 0; defExists(d); ++d)
775       i->setDef(d, pol.get(getDef(d)));
776 
777    for (int s = 0; srcExists(s); ++s) {
778       i->setSrc(s, pol.get(getSrc(s)));
779       i->src(s).mod = src(s).mod;
780    }
781 
782    i->cc = cc;
783    i->predSrc = predSrc;
784    i->flagsDef = flagsDef;
785    i->flagsSrc = flagsSrc;
786 
787    return i;
788 }
789 
790 unsigned int
defCount(unsigned int mask,bool singleFile) const791 Instruction::defCount(unsigned int mask, bool singleFile) const
792 {
793    unsigned int i, n;
794 
795    if (singleFile) {
796       unsigned int d = ffs(mask);
797       if (!d)
798          return 0;
799       for (i = d--; defExists(i); ++i)
800          if (getDef(i)->reg.file != getDef(d)->reg.file)
801             mask &= ~(1 << i);
802    }
803 
804    for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
805       n += mask & 1;
806    return n;
807 }
808 
809 unsigned int
srcCount(unsigned int mask,bool singleFile) const810 Instruction::srcCount(unsigned int mask, bool singleFile) const
811 {
812    unsigned int i, n;
813 
814    if (singleFile) {
815       unsigned int s = ffs(mask);
816       if (!s)
817          return 0;
818       for (i = s--; srcExists(i); ++i)
819          if (getSrc(i)->reg.file != getSrc(s)->reg.file)
820             mask &= ~(1 << i);
821    }
822 
823    for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
824       n += mask & 1;
825    return n;
826 }
827 
828 bool
setIndirect(int s,int dim,Value * value)829 Instruction::setIndirect(int s, int dim, Value *value)
830 {
831    assert(this->srcExists(s));
832 
833    int p = srcs[s].indirect[dim];
834    if (p < 0) {
835       if (!value)
836          return true;
837       p = srcs.size();
838       while (p > 0 && !srcExists(p - 1))
839          --p;
840    }
841    setSrc(p, value);
842    srcs[p].usedAsPtr = (value != 0);
843    srcs[s].indirect[dim] = value ? p : -1;
844    return true;
845 }
846 
847 bool
setPredicate(CondCode ccode,Value * value)848 Instruction::setPredicate(CondCode ccode, Value *value)
849 {
850    cc = ccode;
851 
852    if (!value) {
853       if (predSrc >= 0) {
854          srcs[predSrc].set(NULL);
855          predSrc = -1;
856       }
857       return true;
858    }
859 
860    if (predSrc < 0) {
861       predSrc = srcs.size();
862       while (predSrc > 0 && !srcExists(predSrc - 1))
863          --predSrc;
864    }
865 
866    setSrc(predSrc, value);
867    return true;
868 }
869 
870 bool
writesPredicate() const871 Instruction::writesPredicate() const
872 {
873    for (int d = 0; defExists(d); ++d)
874       if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
875          return true;
876    return false;
877 }
878 
879 bool
canCommuteDefSrc(const Instruction * i) const880 Instruction::canCommuteDefSrc(const Instruction *i) const
881 {
882    for (int d = 0; defExists(d); ++d)
883       for (int s = 0; i->srcExists(s); ++s)
884          if (getDef(d)->interfers(i->getSrc(s)))
885             return false;
886    return true;
887 }
888 
889 bool
canCommuteDefDef(const Instruction * i) const890 Instruction::canCommuteDefDef(const Instruction *i) const
891 {
892    for (int d = 0; defExists(d); ++d)
893       for (int c = 0; i->defExists(c); ++c)
894          if (getDef(d)->interfers(i->getDef(c)))
895             return false;
896    return true;
897 }
898 
899 bool
isCommutationLegal(const Instruction * i) const900 Instruction::isCommutationLegal(const Instruction *i) const
901 {
902    return canCommuteDefDef(i) &&
903       canCommuteDefSrc(i) &&
904       i->canCommuteDefSrc(this);
905 }
906 
TexInstruction(Function * fn,operation op)907 TexInstruction::TexInstruction(Function *fn, operation op)
908    : Instruction(fn, op, TYPE_F32), tex()
909 {
910    tex.rIndirectSrc = -1;
911    tex.sIndirectSrc = -1;
912 
913    if (op == OP_TXF)
914       sType = TYPE_U32;
915 }
916 
~TexInstruction()917 TexInstruction::~TexInstruction()
918 {
919    for (int c = 0; c < 3; ++c) {
920       dPdx[c].set(NULL);
921       dPdy[c].set(NULL);
922    }
923    for (int n = 0; n < 4; ++n)
924       for (int c = 0; c < 3; ++c)
925          offset[n][c].set(NULL);
926 }
927 
928 TexInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const929 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
930 {
931    TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
932                           new_TexInstruction(pol.context(), op));
933 
934    Instruction::clone(pol, tex);
935 
936    tex->tex = this->tex;
937 
938    if (op == OP_TXD) {
939       for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
940          tex->dPdx[c].set(dPdx[c]);
941          tex->dPdy[c].set(dPdy[c]);
942       }
943    }
944 
945    for (int n = 0; n < tex->tex.useOffsets; ++n)
946       for (int c = 0; c < 3; ++c)
947          tex->offset[n][c].set(offset[n][c]);
948 
949    return tex;
950 }
951 
952 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
953 {
954    { "1D",                1, 1, false, false, false },
955    { "2D",                2, 2, false, false, false },
956    { "2D_MS",             2, 3, false, false, false },
957    { "3D",                3, 3, false, false, false },
958    { "CUBE",              2, 3, false, true,  false },
959    { "1D_SHADOW",         1, 1, false, false, true  },
960    { "2D_SHADOW",         2, 2, false, false, true  },
961    { "CUBE_SHADOW",       2, 3, false, true,  true  },
962    { "1D_ARRAY",          1, 2, true,  false, false },
963    { "2D_ARRAY",          2, 3, true,  false, false },
964    { "2D_MS_ARRAY",       2, 4, true,  false, false },
965    { "CUBE_ARRAY",        2, 4, true,  true,  false },
966    { "1D_ARRAY_SHADOW",   1, 2, true,  false, true  },
967    { "2D_ARRAY_SHADOW",   2, 3, true,  false, true  },
968    { "RECT",              2, 2, false, false, false },
969    { "RECT_SHADOW",       2, 2, false, false, true  },
970    { "CUBE_ARRAY_SHADOW", 2, 4, true,  true,  true  },
971    { "BUFFER",            1, 1, false, false, false },
972 };
973 
974 const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =
975 {
976    { "NONE",         0, {  0,  0,  0,  0 },  UINT },
977 
978    { "RGBA32F",      4, { 32, 32, 32, 32 }, FLOAT },
979    { "RGBA16F",      4, { 16, 16, 16, 16 }, FLOAT },
980    { "RG32F",        2, { 32, 32,  0,  0 }, FLOAT },
981    { "RG16F",        2, { 16, 16,  0,  0 }, FLOAT },
982    { "R11G11B10F",   3, { 11, 11, 10,  0 }, FLOAT },
983    { "R32F",         1, { 32,  0,  0,  0 }, FLOAT },
984    { "R16F",         1, { 16,  0,  0,  0 }, FLOAT },
985 
986    { "RGBA32UI",     4, { 32, 32, 32, 32 },  UINT },
987    { "RGBA16UI",     4, { 16, 16, 16, 16 },  UINT },
988    { "RGB10A2UI",    4, { 10, 10, 10,  2 },  UINT },
989    { "RGBA8UI",      4, {  8,  8,  8,  8 },  UINT },
990    { "RG32UI",       2, { 32, 32,  0,  0 },  UINT },
991    { "RG16UI",       2, { 16, 16,  0,  0 },  UINT },
992    { "RG8UI",        2, {  8,  8,  0,  0 },  UINT },
993    { "R32UI",        1, { 32,  0,  0,  0 },  UINT },
994    { "R16UI",        1, { 16,  0,  0,  0 },  UINT },
995    { "R8UI",         1, {  8,  0,  0,  0 },  UINT },
996 
997    { "RGBA32I",      4, { 32, 32, 32, 32 },  SINT },
998    { "RGBA16I",      4, { 16, 16, 16, 16 },  SINT },
999    { "RGBA8I",       4, {  8,  8,  8,  8 },  SINT },
1000    { "RG32I",        2, { 32, 32,  0,  0 },  SINT },
1001    { "RG16I",        2, { 16, 16,  0,  0 },  SINT },
1002    { "RG8I",         2, {  8,  8,  0,  0 },  SINT },
1003    { "R32I",         1, { 32,  0,  0,  0 },  SINT },
1004    { "R16I",         1, { 16,  0,  0,  0 },  SINT },
1005    { "R8I",          1, {  8,  0,  0,  0 },  SINT },
1006 
1007    { "RGBA16",       4, { 16, 16, 16, 16 }, UNORM },
1008    { "RGB10A2",      4, { 10, 10, 10,  2 }, UNORM },
1009    { "RGBA8",        4, {  8,  8,  8,  8 }, UNORM },
1010    { "RG16",         2, { 16, 16,  0,  0 }, UNORM },
1011    { "RG8",          2, {  8,  8,  0,  0 }, UNORM },
1012    { "R16",          1, { 16,  0,  0,  0 }, UNORM },
1013    { "R8",           1, {  8,  0,  0,  0 }, UNORM },
1014 
1015    { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },
1016    { "RGBA8_SNORM",  4, {  8,  8,  8,  8 }, SNORM },
1017    { "RG16_SNORM",   2, { 16, 16,  0,  0 }, SNORM },
1018    { "RG8_SNORM",    2, {  8,  8,  0,  0 }, SNORM },
1019    { "R16_SNORM",    1, { 16,  0,  0,  0 }, SNORM },
1020    { "R8_SNORM",     1, {  8,  0,  0,  0 }, SNORM },
1021 
1022    { "BGRA8",        4, {  8,  8,  8,  8 }, UNORM, true },
1023 };
1024 
1025 const struct TexInstruction::ImgFormatDesc *
translateImgFormat(enum pipe_format format)1026 TexInstruction::translateImgFormat(enum pipe_format format)
1027 {
1028 
1029 #define FMT_CASE(a, b) \
1030   case PIPE_FORMAT_ ## a: return &formatTable[nv50_ir::FMT_ ## b]
1031 
1032    switch (format) {
1033    FMT_CASE(NONE, NONE);
1034 
1035    FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
1036    FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
1037    FMT_CASE(R32G32_FLOAT, RG32F);
1038    FMT_CASE(R16G16_FLOAT, RG16F);
1039    FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
1040    FMT_CASE(R32_FLOAT, R32F);
1041    FMT_CASE(R16_FLOAT, R16F);
1042 
1043    FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
1044    FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
1045    FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
1046    FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
1047    FMT_CASE(R32G32_UINT, RG32UI);
1048    FMT_CASE(R16G16_UINT, RG16UI);
1049    FMT_CASE(R8G8_UINT, RG8UI);
1050    FMT_CASE(R32_UINT, R32UI);
1051    FMT_CASE(R16_UINT, R16UI);
1052    FMT_CASE(R8_UINT, R8UI);
1053 
1054    FMT_CASE(R32G32B32A32_SINT, RGBA32I);
1055    FMT_CASE(R16G16B16A16_SINT, RGBA16I);
1056    FMT_CASE(R8G8B8A8_SINT, RGBA8I);
1057    FMT_CASE(R32G32_SINT, RG32I);
1058    FMT_CASE(R16G16_SINT, RG16I);
1059    FMT_CASE(R8G8_SINT, RG8I);
1060    FMT_CASE(R32_SINT, R32I);
1061    FMT_CASE(R16_SINT, R16I);
1062    FMT_CASE(R8_SINT, R8I);
1063 
1064    FMT_CASE(R16G16B16A16_UNORM, RGBA16);
1065    FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
1066    FMT_CASE(R8G8B8A8_UNORM, RGBA8);
1067    FMT_CASE(R16G16_UNORM, RG16);
1068    FMT_CASE(R8G8_UNORM, RG8);
1069    FMT_CASE(R16_UNORM, R16);
1070    FMT_CASE(R8_UNORM, R8);
1071 
1072    FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
1073    FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
1074    FMT_CASE(R16G16_SNORM, RG16_SNORM);
1075    FMT_CASE(R8G8_SNORM, RG8_SNORM);
1076    FMT_CASE(R16_SNORM, R16_SNORM);
1077    FMT_CASE(R8_SNORM, R8_SNORM);
1078 
1079    FMT_CASE(B8G8R8A8_UNORM, BGRA8);
1080 
1081    default:
1082       assert(!"Unexpected format");
1083       return &formatTable[nv50_ir::FMT_NONE];
1084    }
1085 }
1086 
1087 void
setIndirectR(Value * v)1088 TexInstruction::setIndirectR(Value *v)
1089 {
1090    int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
1091    if (p >= 0) {
1092       tex.rIndirectSrc = p;
1093       setSrc(p, v);
1094       srcs[p].usedAsPtr = !!v;
1095    }
1096 }
1097 
1098 void
setIndirectS(Value * v)1099 TexInstruction::setIndirectS(Value *v)
1100 {
1101    int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
1102    if (p >= 0) {
1103       tex.sIndirectSrc = p;
1104       setSrc(p, v);
1105       srcs[p].usedAsPtr = !!v;
1106    }
1107 }
1108 
CmpInstruction(Function * fn,operation op)1109 CmpInstruction::CmpInstruction(Function *fn, operation op)
1110    : Instruction(fn, op, TYPE_F32)
1111 {
1112    setCond = CC_ALWAYS;
1113 }
1114 
1115 CmpInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1116 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1117 {
1118    CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
1119                           new_CmpInstruction(pol.context(), op));
1120    cmp->dType = dType;
1121    Instruction::clone(pol, cmp);
1122    cmp->setCond = setCond;
1123    return cmp;
1124 }
1125 
FlowInstruction(Function * fn,operation op,void * targ)1126 FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
1127    : Instruction(fn, op, TYPE_NONE)
1128 {
1129    if (op == OP_CALL)
1130       target.fn = reinterpret_cast<Function *>(targ);
1131    else
1132       target.bb = reinterpret_cast<BasicBlock *>(targ);
1133 
1134    if (op == OP_BRA ||
1135        op == OP_CONT || op == OP_BREAK ||
1136        op == OP_RET || op == OP_EXIT)
1137       terminator = 1;
1138    else
1139    if (op == OP_JOIN)
1140       terminator = targ ? 1 : 0;
1141 
1142    allWarp = absolute = limit = builtin = indirect = 0;
1143 }
1144 
1145 FlowInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1146 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1147 {
1148    FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
1149                             new_FlowInstruction(pol.context(), op, NULL));
1150 
1151    Instruction::clone(pol, flow);
1152    flow->allWarp = allWarp;
1153    flow->absolute = absolute;
1154    flow->limit = limit;
1155    flow->builtin = builtin;
1156 
1157    if (builtin)
1158       flow->target.builtin = target.builtin;
1159    else
1160    if (op == OP_CALL)
1161       flow->target.fn = target.fn;
1162    else
1163    if (target.bb)
1164       flow->target.bb = pol.get<BasicBlock>(target.bb);
1165 
1166    return flow;
1167 }
1168 
Program(Type type,Target * arch)1169 Program::Program(Type type, Target *arch)
1170    : progType(type),
1171      target(arch),
1172      mem_Instruction(sizeof(Instruction), 6),
1173      mem_CmpInstruction(sizeof(CmpInstruction), 4),
1174      mem_TexInstruction(sizeof(TexInstruction), 4),
1175      mem_FlowInstruction(sizeof(FlowInstruction), 4),
1176      mem_LValue(sizeof(LValue), 8),
1177      mem_Symbol(sizeof(Symbol), 7),
1178      mem_ImmediateValue(sizeof(ImmediateValue), 7)
1179 {
1180    code = NULL;
1181    binSize = 0;
1182 
1183    maxGPR = -1;
1184    fp64 = false;
1185    persampleInvocation = false;
1186 
1187    main = new Function(this, "MAIN", ~0);
1188    calls.insert(&main->call);
1189 
1190    dbgFlags = 0;
1191    optLevel = 0;
1192 
1193    targetPriv = NULL;
1194 }
1195 
~Program()1196 Program::~Program()
1197 {
1198    for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
1199       delete reinterpret_cast<Function *>(it.get());
1200 
1201    for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
1202       releaseValue(reinterpret_cast<Value *>(it.get()));
1203 }
1204 
releaseInstruction(Instruction * insn)1205 void Program::releaseInstruction(Instruction *insn)
1206 {
1207    // TODO: make this not suck so much
1208 
1209    insn->~Instruction();
1210 
1211    if (insn->asCmp())
1212       mem_CmpInstruction.release(insn);
1213    else
1214    if (insn->asTex())
1215       mem_TexInstruction.release(insn);
1216    else
1217    if (insn->asFlow())
1218       mem_FlowInstruction.release(insn);
1219    else
1220       mem_Instruction.release(insn);
1221 }
1222 
releaseValue(Value * value)1223 void Program::releaseValue(Value *value)
1224 {
1225    value->~Value();
1226 
1227    if (value->asLValue())
1228       mem_LValue.release(value);
1229    else
1230    if (value->asImm())
1231       mem_ImmediateValue.release(value);
1232    else
1233    if (value->asSym())
1234       mem_Symbol.release(value);
1235 }
1236 
1237 
1238 } // namespace nv50_ir
1239 
1240 extern "C" {
1241 
1242 static void
nv50_ir_init_prog_info(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)1243 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info,
1244                        struct nv50_ir_prog_info_out *info_out)
1245 {
1246    info_out->target = info->target;
1247    info_out->type = info->type;
1248    if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {
1249       info_out->prop.tp.domain = PIPE_PRIM_MAX;
1250       info_out->prop.tp.outputPrim = PIPE_PRIM_MAX;
1251    }
1252    if (info->type == PIPE_SHADER_GEOMETRY) {
1253       info_out->prop.gp.instanceCount = 1;
1254       info_out->prop.gp.maxVertices = 1;
1255    }
1256    if (info->type == PIPE_SHADER_COMPUTE) {
1257       info->prop.cp.numThreads[0] =
1258       info->prop.cp.numThreads[1] =
1259       info->prop.cp.numThreads[2] = 1;
1260    }
1261    info_out->bin.smemSize = info->bin.smemSize;
1262    info_out->io.genUserClip = info->io.genUserClip;
1263    info_out->io.instanceId = 0xff;
1264    info_out->io.vertexId = 0xff;
1265    info_out->io.edgeFlagIn = 0xff;
1266    info_out->io.edgeFlagOut = 0xff;
1267    info_out->io.fragDepth = 0xff;
1268    info_out->io.sampleMask = 0xff;
1269 }
1270 
1271 int
nv50_ir_generate_code(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)1272 nv50_ir_generate_code(struct nv50_ir_prog_info *info,
1273                       struct nv50_ir_prog_info_out *info_out)
1274 {
1275    int ret = 0;
1276 
1277    nv50_ir::Program::Type type;
1278 
1279    nv50_ir_init_prog_info(info, info_out);
1280 
1281 #define PROG_TYPE_CASE(a, b)                                      \
1282    case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
1283 
1284    switch (info->type) {
1285    PROG_TYPE_CASE(VERTEX, VERTEX);
1286    PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
1287    PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
1288    PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
1289    PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
1290    PROG_TYPE_CASE(COMPUTE, COMPUTE);
1291    default:
1292       INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type);
1293       return -1;
1294    }
1295    INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
1296 
1297    nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
1298    if (!targ)
1299       return -1;
1300 
1301    nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
1302    if (!prog) {
1303       nv50_ir::Target::destroy(targ);
1304       return -1;
1305    }
1306    prog->driver = info;
1307    prog->driver_out = info_out;
1308    prog->dbgFlags = info->dbgFlags;
1309    prog->optLevel = info->optLevel;
1310 
1311    switch (info->bin.sourceRep) {
1312    case PIPE_SHADER_IR_NIR:
1313       ret = prog->makeFromNIR(info, info_out) ? 0 : -2;
1314       break;
1315    case PIPE_SHADER_IR_TGSI:
1316       ret = prog->makeFromTGSI(info, info_out) ? 0 : -2;
1317       break;
1318    default:
1319       ret = -1;
1320       break;
1321    }
1322    if (ret < 0)
1323       goto out;
1324    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1325       prog->print();
1326 
1327    targ->parseDriverInfo(info, info_out);
1328    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
1329 
1330    prog->convertToSSA();
1331 
1332    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1333       prog->print();
1334 
1335    prog->optimizeSSA(info->optLevel);
1336    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
1337 
1338    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1339       prog->print();
1340 
1341    if (!prog->registerAllocation()) {
1342       ret = -4;
1343       goto out;
1344    }
1345    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
1346 
1347    prog->optimizePostRA(info->optLevel);
1348 
1349    if (!prog->emitBinary(info_out)) {
1350       ret = -5;
1351       goto out;
1352    }
1353 
1354 out:
1355    INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
1356 
1357    info_out->bin.maxGPR = prog->maxGPR;
1358    info_out->bin.code = prog->code;
1359    info_out->bin.codeSize = prog->binSize;
1360    info_out->bin.tlsSpace = prog->tlsSize;
1361 
1362    delete prog;
1363    nv50_ir::Target::destroy(targ);
1364 
1365    return ret;
1366 }
1367 
1368 } // extern "C"
1369