1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  */
22 
23 #include "nv50/codegen/nv50_ir.h"
24 #include "nv50/codegen/nv50_ir_target.h"
25 
26 namespace nv50_ir {
27 
28 const uint8_t Target::operationSrcNr[OP_LAST + 1] =
29 {
30    0, 0,                   // NOP, PHI
31    0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
32    1, 1, 2,                // MOV, LOAD, STORE
33    2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34    1, 1, 1,                // ABS, NEG, NOT
35    2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
36    2, 2, 1,                // MAX, MIN, SAT
37    1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
38    3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
39    1, 1, 1, 1, 1, 1,       // RCP, RSQ, LG2, SIN, COS, EX2
40    1, 1, 1, 1, 1, 2,       // EXP, LOG, PRESIN, PREEX2, SQRT, POW
41    0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
42    0, 0, 0,                // PRERET,CONT,BREAK
43    0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
44    1, 1, 2, 1, 2,          // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
45    1, 1,                   // EMIT, RESTART
46    1, 1, 1,                // TEX, TXB, TXL,
47    1, 1, 1, 1, 1,          // TXF, TXQ, TXD, TXG, TEXCSAA
48    1, 2,                   // SULD, SUST
49    1, 1,                   // DFDX, DFDY
50    1, 2, 2, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
51    2, 3, 2, 0,             // POPCNT, INSBF, EXTBF, TEXBAR
52    0
53 };
54 
55 const OpClass Target::operationClass[OP_LAST + 1] =
56 {
57    // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
58    OPCLASS_OTHER,
59    OPCLASS_PSEUDO,
60    OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
61    // MOV; LOAD; STORE
62    OPCLASS_MOVE,
63    OPCLASS_LOAD,
64    OPCLASS_STORE,
65    // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
66    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
67    OPCLASS_ARITH, OPCLASS_ARITH,
68    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
69    // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
70    OPCLASS_CONVERT, OPCLASS_CONVERT,
71    OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
72    OPCLASS_SHIFT, OPCLASS_SHIFT,
73    // MAX, MIN
74    OPCLASS_COMPARE, OPCLASS_COMPARE,
75    // SAT, CEIL, FLOOR, TRUNC; CVT
76    OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
77    OPCLASS_CONVERT,
78    // SET(AND,OR,XOR); SELP, SLCT
79    OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
80    OPCLASS_COMPARE, OPCLASS_COMPARE,
81    // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
82    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
83    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
84    OPCLASS_SFU, OPCLASS_SFU,
85    // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
86    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
87    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
88    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
89    // DISCARD, EXIT
90    OPCLASS_FLOW, OPCLASS_FLOW,
91    // MEMBAR
92    OPCLASS_OTHER,
93    // VFETCH, PFETCH, EXPORT
94    OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
95    // LINTERP, PINTERP
96    OPCLASS_SFU, OPCLASS_SFU,
97    // EMIT, RESTART
98    OPCLASS_OTHER, OPCLASS_OTHER,
99    // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA
100    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
101    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
102    // SULD, SUST
103    OPCLASS_SURFACE, OPCLASS_SURFACE,
104    // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
105    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
106    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
107    // POPCNT, INSBF, EXTBF
108    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
109    // TEXBAR
110    OPCLASS_OTHER,
111    OPCLASS_PSEUDO // LAST
112 };
113 
114 
115 extern Target *getTargetNVC0(unsigned int chipset);
116 extern Target *getTargetNV50(unsigned int chipset);
117 
create(unsigned int chipset)118 Target *Target::create(unsigned int chipset)
119 {
120    switch (chipset & 0xf0) {
121    case 0xc0:
122    case 0xd0:
123    case 0xe0:
124       return getTargetNVC0(chipset);
125    case 0x50:
126    case 0x80:
127    case 0x90:
128    case 0xa0:
129       return getTargetNV50(chipset);
130    default:
131       ERROR("unsupported target: NV%x\n", chipset);
132       return 0;
133    }
134 }
135 
destroy(Target * targ)136 void Target::destroy(Target *targ)
137 {
138    delete targ;
139 }
140 
CodeEmitter(const Target * target)141 CodeEmitter::CodeEmitter(const Target *target) : targ(target)
142 {
143 }
144 
145 void
setCodeLocation(void * ptr,uint32_t size)146 CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
147 {
148    code = reinterpret_cast<uint32_t *>(ptr);
149    codeSize = 0;
150    codeSizeLimit = size;
151 }
152 
153 void
printBinary() const154 CodeEmitter::printBinary() const
155 {
156    uint32_t *bin = code - codeSize / 4;
157    INFO("program binary (%u bytes)", codeSize);
158    for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
159       if ((pos % 8) == 0)
160          INFO("\n");
161       INFO("%08x ", bin[pos]);
162    }
163    INFO("\n");
164 }
165 
sizeToBundlesNVE4(uint32_t size)166 static inline uint32_t sizeToBundlesNVE4(uint32_t size)
167 {
168    return (size + 55) / 56;
169 }
170 
171 void
prepareEmission(Program * prog)172 CodeEmitter::prepareEmission(Program *prog)
173 {
174    for (ArrayList::Iterator fi = prog->allFuncs.iterator();
175         !fi.end(); fi.next()) {
176       Function *func = reinterpret_cast<Function *>(fi.get());
177       func->binPos = prog->binSize;
178       prepareEmission(func);
179 
180       // adjust sizes & positions for schedulding info:
181       if (prog->getTarget()->hasSWSched) {
182          BasicBlock *bb = NULL;
183          for (int i = 0; i < func->bbCount; ++i) {
184             bb = func->bbArray[i];
185             const uint32_t oldPos = bb->binPos;
186             const uint32_t oldEnd = bb->binPos + bb->binSize;
187             uint32_t adjPos = oldPos + sizeToBundlesNVE4(oldPos) * 8;
188             uint32_t adjEnd = oldEnd + sizeToBundlesNVE4(oldEnd) * 8;
189             bb->binPos = adjPos;
190             bb->binSize = adjEnd - adjPos;
191          }
192          if (bb)
193             func->binSize = bb->binPos + bb->binSize;
194       }
195 
196       prog->binSize += func->binSize;
197    }
198 }
199 
200 void
prepareEmission(Function * func)201 CodeEmitter::prepareEmission(Function *func)
202 {
203    func->bbCount = 0;
204    func->bbArray = new BasicBlock * [func->cfg.getSize()];
205 
206    BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
207 
208    for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
209       prepareEmission(BasicBlock::get(*it));
210 }
211 
212 void
prepareEmission(BasicBlock * bb)213 CodeEmitter::prepareEmission(BasicBlock *bb)
214 {
215    Instruction *i, *next;
216    Function *func = bb->getFunction();
217    int j;
218    unsigned int nShort;
219 
220    for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
221 
222    for (; j >= 0; --j) {
223       BasicBlock *in = func->bbArray[j];
224       Instruction *exit = in->getExit();
225 
226       if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
227          in->binSize -= 8;
228          func->binSize -= 8;
229 
230          for (++j; j < func->bbCount; ++j)
231             func->bbArray[j]->binPos -= 8;
232 
233          in->remove(exit);
234       }
235       bb->binPos = in->binPos + in->binSize;
236       if (in->binSize) // no more no-op branches to bb
237          break;
238    }
239    func->bbArray[func->bbCount++] = bb;
240 
241    if (!bb->getExit())
242       return;
243 
244    // determine encoding size, try to group short instructions
245    nShort = 0;
246    for (i = bb->getEntry(); i; i = next) {
247       next = i->next;
248 
249       i->encSize = getMinEncodingSize(i);
250       if (next && i->encSize < 8)
251          ++nShort;
252       else
253       if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
254          if (i->isCommutationLegal(i->next)) {
255             bb->permuteAdjacent(i, next);
256             next->encSize = 4;
257             next = i;
258             i = i->prev;
259             ++nShort;
260          } else
261          if (i->isCommutationLegal(i->prev) && next->next) {
262             bb->permuteAdjacent(i->prev, i);
263             next->encSize = 4;
264             next = next->next;
265             bb->binSize += 4;
266             ++nShort;
267          } else {
268             i->encSize = 8;
269             i->prev->encSize = 8;
270             bb->binSize += 4;
271             nShort = 0;
272          }
273       } else {
274          i->encSize = 8;
275          if (nShort & 1) {
276             i->prev->encSize = 8;
277             bb->binSize += 4;
278          }
279          nShort = 0;
280       }
281       bb->binSize += i->encSize;
282    }
283 
284    if (bb->getExit()->encSize == 4) {
285       assert(nShort);
286       bb->getExit()->encSize = 8;
287       bb->binSize += 4;
288 
289       if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
290          bb->binSize += 8;
291          bb->getExit()->prev->encSize = 8;
292       }
293    }
294    assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
295 
296    func->binSize += bb->binSize;
297 }
298 
299 void
emitSymbolTable(struct nv50_ir_prog_info * info)300 Program::emitSymbolTable(struct nv50_ir_prog_info *info)
301 {
302    unsigned int n = 0, nMax = allFuncs.getSize();
303 
304    info->bin.syms =
305       (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
306 
307    for (ArrayList::Iterator fi = allFuncs.iterator();
308         !fi.end();
309         fi.next(), ++n) {
310       Function *f = (Function *)fi.get();
311       assert(n < nMax);
312 
313       info->bin.syms[n].label = f->getLabel();
314       info->bin.syms[n].offset = f->binPos;
315    }
316 
317    info->bin.numSyms = n;
318 }
319 
320 bool
emitBinary(struct nv50_ir_prog_info * info)321 Program::emitBinary(struct nv50_ir_prog_info *info)
322 {
323    CodeEmitter *emit = target->getCodeEmitter(progType);
324 
325    emit->prepareEmission(this);
326 
327    if (dbgFlags & NV50_IR_DEBUG_BASIC)
328       this->print();
329 
330    if (!binSize) {
331       code = NULL;
332       return false;
333    }
334    code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
335    if (!code)
336       return false;
337    emit->setCodeLocation(code, binSize);
338 
339    for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
340       Function *fn = reinterpret_cast<Function *>(fi.get());
341 
342       assert(emit->getCodeSize() == fn->binPos);
343 
344       for (int b = 0; b < fn->bbCount; ++b)
345          for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next)
346             emit->emitInstruction(i);
347    }
348    info->bin.relocData = emit->getRelocInfo();
349 
350    emitSymbolTable(info);
351 
352    // the nvc0 driver will print the binary iself together with the header
353    if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
354       emit->printBinary();
355 
356    delete emit;
357    return true;
358 }
359 
360 #define RELOC_ALLOC_INCREMENT 8
361 
362 bool
addReloc(RelocEntry::Type ty,int w,uint32_t data,uint32_t m,int s)363 CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
364                       int s)
365 {
366    unsigned int n = relocInfo ? relocInfo->count : 0;
367 
368    if (!(n % RELOC_ALLOC_INCREMENT)) {
369       size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
370       relocInfo = reinterpret_cast<RelocInfo *>(
371          REALLOC(relocInfo, n ? size : 0,
372                  size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
373       if (!relocInfo)
374          return false;
375       if (n == 0)
376          memset(relocInfo, 0, sizeof(RelocInfo));
377    }
378    ++relocInfo->count;
379 
380    relocInfo->entry[n].data = data;
381    relocInfo->entry[n].mask = m;
382    relocInfo->entry[n].offset = codeSize + w * 4;
383    relocInfo->entry[n].bitPos = s;
384    relocInfo->entry[n].type = ty;
385 
386    return true;
387 }
388 
389 void
apply(uint32_t * binary,const RelocInfo * info) const390 RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
391 {
392    uint32_t value = 0;
393 
394    switch (type) {
395    case TYPE_CODE: value = info->codePos; break;
396    case TYPE_BUILTIN: value = info->libPos; break;
397    case TYPE_DATA: value = info->dataPos; break;
398    default:
399       assert(0);
400       break;
401    }
402    value += data;
403    value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
404 
405    binary[offset / 4] &= ~mask;
406    binary[offset / 4] |= value & mask;
407 }
408 
409 } // namespace nv50_ir
410 
411 
412 #include "nv50/codegen/nv50_ir_driver.h"
413 
414 extern "C" {
415 
416 void
nv50_ir_relocate_code(void * relocData,uint32_t * code,uint32_t codePos,uint32_t libPos,uint32_t dataPos)417 nv50_ir_relocate_code(void *relocData, uint32_t *code,
418                       uint32_t codePos,
419                       uint32_t libPos,
420                       uint32_t dataPos)
421 {
422    nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
423 
424    info->codePos = codePos;
425    info->libPos = libPos;
426    info->dataPos = dataPos;
427 
428    for (unsigned int i = 0; i < info->count; ++i)
429       info->entry[i].apply(code, info);
430 }
431 
432 void
nv50_ir_get_target_library(uint32_t chipset,const uint32_t ** code,uint32_t * size)433 nv50_ir_get_target_library(uint32_t chipset,
434                            const uint32_t **code, uint32_t *size)
435 {
436    nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
437    targ->getBuiltinCode(code, size);
438    nv50_ir::Target::destroy(targ);
439 }
440 
441 }
442