1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2018 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_ir_to_assembly.h"
28 #include "sfn_conditionaljumptracker.h"
29 #include "sfn_callstack.h"
30 #include "sfn_instruction_gds.h"
31 #include "sfn_instruction_misc.h"
32 #include "sfn_instruction_fetch.h"
33 #include "sfn_instruction_lds.h"
34 
35 #include "../r600_shader.h"
36 #include "../r600_sq.h"
37 
38 namespace r600 {
39 
40 using std::vector;
41 
42 struct AssemblyFromShaderLegacyImpl {
43 
44    AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key);
45    bool emit(const Instruction::Pointer i);
reset_addr_registerr600::AssemblyFromShaderLegacyImpl46    void reset_addr_register() {m_last_addr.reset();}
47 
48 private:
49    bool emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op);
50    bool emit_export(const ExportInstruction & exi);
51    bool emit_streamout(const StreamOutIntruction& instr);
52    bool emit_memringwrite(const MemRingOutIntruction& instr);
53    bool emit_tex(const TexInstruction & tex_instr);
54    bool emit_vtx(const FetchInstruction& fetch_instr);
55    bool emit_if_start(const IfInstruction & if_instr);
56    bool emit_else(const ElseInstruction & else_instr);
57    bool emit_endif(const IfElseEndInstruction & endif_instr);
58    bool emit_emit_vertex(const EmitVertex &instr);
59 
60    bool emit_loop_begin(const LoopBeginInstruction& instr);
61    bool emit_loop_end(const LoopEndInstruction& instr);
62    bool emit_loop_break(const LoopBreakInstruction& instr);
63    bool emit_loop_continue(const LoopContInstruction& instr);
64    bool emit_wait_ack(const WaitAck& instr);
65    bool emit_wr_scratch(const WriteScratchInstruction& instr);
66    bool emit_gds(const GDSInstr& instr);
67    bool emit_rat(const RatInstruction& instr);
68    bool emit_ldswrite(const LDSWriteInstruction& instr);
69    bool emit_ldsread(const LDSReadInstruction& instr);
70    bool emit_ldsatomic(const LDSAtomicInstruction& instr);
71    bool emit_tf_write(const GDSStoreTessFactor& instr);
72 
73    bool emit_load_addr(PValue addr);
74    bool emit_fs_pixel_export(const ExportInstruction & exi);
75    bool emit_vs_pos_export(const ExportInstruction & exi);
76    bool emit_vs_param_export(const ExportInstruction & exi);
77    bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
78    bool copy_src(r600_bytecode_alu_src& src, const Value& s);
79 
80    EBufferIndexMode emit_index_reg(const Value& reg, unsigned idx);
81 
82    ConditionalJumpTracker m_jump_tracker;
83    CallStack m_callstack;
84 
85 public:
86    r600_bytecode *m_bc;
87    r600_shader *m_shader;
88    r600_shader_key *m_key;
89    r600_bytecode_output m_output;
90    unsigned m_max_color_exports;
91    bool has_pos_output;
92    bool has_param_output;
93    PValue m_last_addr;
94    int m_loop_nesting;
95    int m_nliterals_in_group;
96    std::set<int> vtx_fetch_results;
97 };
98 
99 
AssemblyFromShaderLegacy(struct r600_shader * sh,r600_shader_key * key)100 AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh,
101                                                    r600_shader_key *key)
102 {
103    impl = new AssemblyFromShaderLegacyImpl(sh, key);
104 }
105 
~AssemblyFromShaderLegacy()106 AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
107 {
108    delete impl;
109 }
110 
do_lower(const std::vector<InstructionBlock> & ir)111 bool AssemblyFromShaderLegacy::do_lower(const std::vector<InstructionBlock>& ir)
112 {
113    if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX &&
114        impl->m_shader->ninput > 0)
115          r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS);
116 
117 
118    std::vector<Instruction::Pointer> exports;
119 
120    for (const auto& block : ir) {
121       for (const auto& i : block) {
122          if (!impl->emit(i))
123          return false;
124       if (i->type() != Instruction::alu)
125          impl->reset_addr_register();
126       }
127    }
128    /*
129    for (const auto& i : exports) {
130       if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
131           return false;
132    }*/
133 
134 
135    const struct cf_op_info *last = nullptr;
136    if (impl->m_bc->cf_last)
137       last = r600_isa_cf(impl->m_bc->cf_last->op);
138 
139    /* alu clause instructions don't have EOP bit, so add NOP */
140    if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END
141        || impl->m_bc->cf_last->op == CF_OP_POP)
142       r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP);
143 
144     /* A fetch shader only can't be EOP (results in hang), but we can replace it
145      * by a NOP */
146    else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS)
147       impl->m_bc->cf_last->op = CF_OP_NOP;
148 
149    if (impl->m_shader->bc.chip_class != CAYMAN)
150       impl->m_bc->cf_last->end_of_program = 1;
151    else
152       cm_bytecode_add_cf_end(impl->m_bc);
153 
154    return true;
155 }
156 
emit(const Instruction::Pointer i)157 bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i)
158 {
159    if (i->type() != Instruction::vtx)
160        vtx_fetch_results.clear();
161 
162    sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n";
163    switch (i->type()) {
164    case Instruction::alu:
165       return emit_alu(static_cast<const AluInstruction&>(*i), cf_alu_undefined);
166    case Instruction::exprt:
167       return emit_export(static_cast<const ExportInstruction&>(*i));
168    case Instruction::tex:
169       return emit_tex(static_cast<const TexInstruction&>(*i));
170    case Instruction::vtx:
171       return emit_vtx(static_cast<const FetchInstruction&>(*i));
172    case Instruction::cond_if:
173       return emit_if_start(static_cast<const IfInstruction&>(*i));
174    case Instruction::cond_else:
175       return emit_else(static_cast<const ElseInstruction&>(*i));
176    case Instruction::cond_endif:
177       return emit_endif(static_cast<const IfElseEndInstruction&>(*i));
178    case Instruction::loop_begin:
179       return emit_loop_begin(static_cast<const LoopBeginInstruction&>(*i));
180    case Instruction::loop_end:
181       return emit_loop_end(static_cast<const LoopEndInstruction&>(*i));
182    case Instruction::loop_break:
183       return emit_loop_break(static_cast<const LoopBreakInstruction&>(*i));
184    case Instruction::loop_continue:
185       return emit_loop_continue(static_cast<const LoopContInstruction&>(*i));
186    case Instruction::streamout:
187       return emit_streamout(static_cast<const StreamOutIntruction&>(*i));
188    case Instruction::ring:
189       return emit_memringwrite(static_cast<const MemRingOutIntruction&>(*i));
190    case Instruction::emit_vtx:
191       return emit_emit_vertex(static_cast<const EmitVertex&>(*i));
192    case Instruction::wait_ack:
193       return emit_wait_ack(static_cast<const WaitAck&>(*i));
194    case Instruction::mem_wr_scratch:
195       return emit_wr_scratch(static_cast<const WriteScratchInstruction&>(*i));
196    case Instruction::gds:
197       return emit_gds(static_cast<const GDSInstr&>(*i));
198    case Instruction::rat:
199       return emit_rat(static_cast<const RatInstruction&>(*i));
200    case Instruction::lds_write:
201       return emit_ldswrite(static_cast<const LDSWriteInstruction&>(*i));
202    case Instruction::lds_read:
203       return emit_ldsread(static_cast<const LDSReadInstruction&>(*i));
204    case Instruction::lds_atomic:
205       return emit_ldsatomic(static_cast<const LDSAtomicInstruction&>(*i));
206    case Instruction::tf_write:
207       return emit_tf_write(static_cast<const GDSStoreTessFactor&>(*i));
208    default:
209       return false;
210    }
211 }
212 
AssemblyFromShaderLegacyImpl(r600_shader * sh,r600_shader_key * key)213 AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh,
214                                                            r600_shader_key *key):
215    m_callstack(sh->bc),
216    m_bc(&sh->bc),
217    m_shader(sh),
218    m_key(key),
219    has_pos_output(false),
220    has_param_output(false),
221    m_loop_nesting(0),
222    m_nliterals_in_group(0)
223 {
224    m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1);
225 }
226 
227 extern const std::map<EAluOp, int> opcode_map;
228 
emit_load_addr(PValue addr)229 bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr)
230 {
231    m_bc->ar_reg = addr->sel();
232    m_bc->ar_chan = addr->chan();
233    m_bc->ar_loaded = 0;
234    m_last_addr = addr;
235 
236    sfn_log << SfnLog::assembly << "   Prepare " << *addr << " to address register\n";
237 
238    return true;
239 }
240 
emit_alu(const AluInstruction & ai,ECFAluOpCode cf_op)241 bool AssemblyFromShaderLegacyImpl::emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op)
242 {
243 
244    struct r600_bytecode_alu alu;
245    memset(&alu, 0, sizeof(alu));
246    PValue addr_in_use;
247 
248    if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
249       std::cerr << "Opcode not handled for " << ai <<"\n";
250       return false;
251    }
252 
253    unsigned old_nliterals_in_group = m_nliterals_in_group;
254    for (unsigned i = 0; i < ai.n_sources(); ++i) {
255       auto& s = ai.src(i);
256       if (s.type() == Value::literal)
257          ++m_nliterals_in_group;
258    }
259 
260    /* This instruction group would exeed the limit of literals, so
261     * force a new instruction group by adding a NOP as last
262     * instruction. This will no loner be needed with a real
263     * scheduler */
264    if (m_nliterals_in_group > 4) {
265       sfn_log << SfnLog::assembly << "  Have " << m_nliterals_in_group << " inject a last op (nop)\n";
266       alu.op = ALU_OP0_NOP;
267       alu.last = 1;
268       alu.dst.chan = 3;
269       int retval = r600_bytecode_add_alu(m_bc, &alu);
270       if (retval)
271          return false;
272       memset(&alu, 0, sizeof(alu));
273       m_nliterals_in_group -= old_nliterals_in_group;
274    }
275 
276    alu.op = opcode_map.at(ai.opcode());
277 
278    /* Missing test whether ai actually has a dest */
279    auto dst = ai.dest();
280 
281    if (dst) {
282       if (!copy_dst(alu.dst, *dst))
283          return false;
284 
285       alu.dst.write = ai.flag(alu_write);
286       alu.dst.clamp = ai.flag(alu_dst_clamp);
287 
288       if (dst->type() == Value::gpr_array_value) {
289          auto& v = static_cast<const GPRArrayValue&>(*dst);
290          PValue addr = v.indirect();
291          if (addr) {
292             if (!m_last_addr || *addr != *m_last_addr) {
293                emit_load_addr(addr);
294                addr_in_use = addr;
295             }
296             alu.dst.rel = addr ? 1 : 0;;
297          }
298       }
299    }
300 
301    alu.is_op3 = ai.n_sources() == 3;
302 
303    for (unsigned i = 0; i < ai.n_sources(); ++i) {
304       auto& s = ai.src(i);
305 
306       if (!copy_src(alu.src[i], s))
307          return false;
308       alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]);
309 
310       if (s.type() == Value::gpr_array_value) {
311          auto& v = static_cast<const GPRArrayValue&>(s);
312          PValue addr = v.indirect();
313          if (addr) {
314             assert(!addr_in_use || (*addr_in_use == *addr));
315             if (!m_last_addr || *addr != *m_last_addr) {
316                emit_load_addr(addr);
317                addr_in_use = addr;
318             }
319             alu.src[i].rel = addr ? 1 : 0;
320          }
321       }
322       if (!alu.is_op3)
323          alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]);
324    }
325 
326    if (ai.bank_swizzle() != alu_vec_unknown)
327       alu.bank_swizzle_force = ai.bank_swizzle();
328 
329    alu.last = ai.flag(alu_last_instr);
330    alu.update_pred = ai.flag(alu_update_pred);
331    alu.execute_mask = ai.flag(alu_update_exec);
332 
333    /* If the destination register is equal to the last loaded address register
334     * then clear the latter one, because the values will no longer be identical */
335    if (m_last_addr)
336       sfn_log << SfnLog::assembly << "  Current address register is " << *m_last_addr << "\n";
337 
338    if (dst)
339       sfn_log << SfnLog::assembly << "  Current dst register is " << *dst << "\n";
340 
341    if (dst && m_last_addr)
342       if (*dst == *m_last_addr) {
343          sfn_log << SfnLog::assembly << "  Clear address register (was " << *m_last_addr << "\n";
344          m_last_addr.reset();
345       }
346 
347    if (cf_op == cf_alu_undefined)
348       cf_op = ai.cf_type();
349 
350    unsigned type = 0;
351    switch (cf_op) {
352    case cf_alu: type = CF_OP_ALU; break;
353    case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break;
354    case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break;
355    case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break;
356    case cf_alu_break: type = CF_OP_ALU_BREAK; break;
357    case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break;
358    case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break;
359    case cf_alu_extended: type = CF_OP_ALU_EXT; break;
360    default:
361       assert(0 && "cf_alu_undefined should have been replaced");
362    }
363 
364    if (alu.last)
365       m_nliterals_in_group = 0;
366 
367    bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type);
368 
369    if (ai.opcode() == op1_mova_int)
370       m_bc->ar_loaded = 0;
371 
372    if (ai.opcode() == op1_set_cf_idx0)
373       m_bc->index_loaded[0] = 1;
374 
375    if (ai.opcode() == op1_set_cf_idx1)
376       m_bc->index_loaded[1] = 1;
377 
378 
379    m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
380                           ai.opcode() == op2_killne_int ||
381                           ai.opcode() == op1_set_cf_idx0 ||
382                           ai.opcode() == op1_set_cf_idx1);
383    return retval;
384 }
385 
emit_vs_pos_export(const ExportInstruction & exi)386 bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi)
387 {
388    r600_bytecode_output output;
389    memset(&output, 0, sizeof(output));
390    assert(exi.gpr().type() == Value::gpr_vector);
391    const auto& gpr = exi.gpr();
392    output.gpr = gpr.sel();
393    output.elem_size = 3;
394    output.swizzle_x = gpr.chan_i(0);
395    output.swizzle_y = gpr.chan_i(1);
396    output.swizzle_z = gpr.chan_i(2);
397    output.swizzle_w = gpr.chan_i(3);
398    output.burst_count = 1;
399    output.array_base = 60 + exi.location();
400    output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
401    output.type = exi.export_type();
402 
403 
404    if (r600_bytecode_add_output(m_bc, &output)) {
405       R600_ERR("Error adding pixel export at location %d\n", exi.location());
406       return false;
407    }
408 
409    return true;
410 }
411 
412 
emit_vs_param_export(const ExportInstruction & exi)413 bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi)
414 {
415    r600_bytecode_output output;
416    assert(exi.gpr().type() == Value::gpr_vector);
417    const auto& gpr = exi.gpr();
418 
419    memset(&output, 0, sizeof(output));
420    output.gpr = gpr.sel();
421    output.elem_size = 3;
422    output.swizzle_x = gpr.chan_i(0);
423    output.swizzle_y = gpr.chan_i(1);
424    output.swizzle_z = gpr.chan_i(2);
425    output.swizzle_w = gpr.chan_i(3);
426    output.burst_count = 1;
427    output.array_base = exi.location();
428    output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
429    output.type = exi.export_type();
430 
431 
432    if (r600_bytecode_add_output(m_bc, &output)) {
433       R600_ERR("Error adding pixel export at location %d\n", exi.location());
434       return false;
435    }
436 
437    return true;
438 }
439 
440 
emit_fs_pixel_export(const ExportInstruction & exi)441 bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi)
442 {
443    if (exi.location() >= m_max_color_exports && exi.location()  < 60) {
444       R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
445                exi.location(), m_max_color_exports);
446       return true;
447    }
448 
449    assert(exi.gpr().type() == Value::gpr_vector);
450    const auto& gpr = exi.gpr();
451 
452    r600_bytecode_output output;
453    memset(&output, 0, sizeof(output));
454 
455    output.gpr = gpr.sel();
456    output.elem_size = 3;
457    output.swizzle_x = gpr.chan_i(0);
458    output.swizzle_y = gpr.chan_i(1);
459    output.swizzle_z = gpr.chan_i(2);
460    output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ;
461    output.burst_count = 1;
462    output.array_base = exi.location();
463    output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
464    output.type = exi.export_type();
465 
466 
467    if (r600_bytecode_add_output(m_bc, &output)) {
468       R600_ERR("Error adding pixel export at location %d\n", exi.location());
469       return false;
470    }
471 
472    return true;
473 }
474 
475 
emit_export(const ExportInstruction & exi)476 bool AssemblyFromShaderLegacyImpl::emit_export(const ExportInstruction & exi)
477 {
478    switch (exi.export_type()) {
479    case ExportInstruction::et_pixel:
480       return emit_fs_pixel_export(exi);
481    case ExportInstruction::et_pos:
482       return emit_vs_pos_export(exi);
483    case ExportInstruction::et_param:
484       return emit_vs_param_export(exi);
485    default:
486       R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
487       return false;
488    }
489 }
490 
emit_if_start(const IfInstruction & if_instr)491 bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction & if_instr)
492 {
493 	bool needs_workaround = false;
494    int elems = m_callstack.push(FC_PUSH_VPM);
495 
496    if (m_bc->chip_class == CAYMAN && m_bc->stack.loop > 1)
497       needs_workaround = true;
498    if (m_bc->family != CHIP_HEMLOCK &&
499        m_bc->family != CHIP_CYPRESS &&
500        m_bc->family != CHIP_JUNIPER) {
501       unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
502 		unsigned dmod2 = (elems) % m_bc->stack.entry_size;
503 
504       if (elems && (!dmod1 || !dmod2))
505 			needs_workaround = true;
506 	}
507 
508    auto& pred = if_instr.pred();
509    auto op = cf_alu_push_before;
510 
511    if (needs_workaround) {
512 		r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
513                 m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
514 		op = cf_alu;
515 	}
516    emit_alu(pred, op);
517 
518    r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
519 
520    m_jump_tracker.push(m_bc->cf_last, jt_if);
521    return true;
522 }
523 
emit_else(UNUSED const ElseInstruction & else_instr)524 bool AssemblyFromShaderLegacyImpl::emit_else(UNUSED const ElseInstruction & else_instr)
525 {
526    r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
527    m_bc->cf_last->pop_count = 1;
528    return m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
529 }
530 
emit_endif(UNUSED const IfElseEndInstruction & endif_instr)531 bool AssemblyFromShaderLegacyImpl::emit_endif(UNUSED const IfElseEndInstruction & endif_instr)
532 {
533    m_callstack.pop(FC_PUSH_VPM);
534 
535    unsigned force_pop = m_bc->force_add_cf;
536    if (!force_pop) {
537       int alu_pop = 3;
538       if (m_bc->cf_last) {
539          if (m_bc->cf_last->op == CF_OP_ALU)
540             alu_pop = 0;
541          else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
542             alu_pop = 1;
543       }
544       alu_pop += 1;
545       if (alu_pop == 1) {
546          m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
547          m_bc->force_add_cf = 1;
548       } else if (alu_pop == 2) {
549          m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
550          m_bc->force_add_cf = 1;
551       } else {
552          force_pop = 1;
553       }
554    }
555 
556    if (force_pop) {
557       r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
558       m_bc->cf_last->pop_count = 1;
559       m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
560    }
561 
562    return m_jump_tracker.pop(m_bc->cf_last, jt_if);
563 }
564 
emit_loop_begin(UNUSED const LoopBeginInstruction & instr)565 bool AssemblyFromShaderLegacyImpl::emit_loop_begin(UNUSED const LoopBeginInstruction& instr)
566 {
567    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
568    m_jump_tracker.push(m_bc->cf_last, jt_loop);
569    m_callstack.push(FC_LOOP);
570    ++m_loop_nesting;
571    return true;
572 }
573 
emit_loop_end(UNUSED const LoopEndInstruction & instr)574 bool AssemblyFromShaderLegacyImpl::emit_loop_end(UNUSED const LoopEndInstruction& instr)
575 {
576    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
577    m_callstack.pop(FC_LOOP);
578    assert(m_loop_nesting);
579    --m_loop_nesting;
580    return m_jump_tracker.pop(m_bc->cf_last, jt_loop);
581 }
582 
emit_loop_break(UNUSED const LoopBreakInstruction & instr)583 bool AssemblyFromShaderLegacyImpl::emit_loop_break(UNUSED const LoopBreakInstruction& instr)
584 {
585    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
586    return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
587 }
588 
emit_loop_continue(UNUSED const LoopContInstruction & instr)589 bool AssemblyFromShaderLegacyImpl::emit_loop_continue(UNUSED const LoopContInstruction &instr)
590 {
591    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
592    return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
593 }
594 
emit_streamout(const StreamOutIntruction & so_instr)595 bool AssemblyFromShaderLegacyImpl::emit_streamout(const StreamOutIntruction& so_instr)
596 {
597    struct r600_bytecode_output output;
598    memset(&output, 0, sizeof(struct r600_bytecode_output));
599 
600    output.gpr = so_instr.gpr().sel();
601    output.elem_size = so_instr.element_size();
602    output.array_base = so_instr.array_base();
603    output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
604    output.burst_count = so_instr.burst_count();
605    output.array_size = so_instr.array_size();
606    output.comp_mask = so_instr.comp_mask();
607    output.op = so_instr.op();
608 
609    assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
610 
611 
612    if (r600_bytecode_add_output(m_bc, &output))  {
613       R600_ERR("shader_from_nir: Error creating stream output instruction\n");
614       return false;
615    }
616    return true;
617 }
618 
619 
emit_memringwrite(const MemRingOutIntruction & instr)620 bool AssemblyFromShaderLegacyImpl::emit_memringwrite(const MemRingOutIntruction& instr)
621 {
622    struct r600_bytecode_output output;
623    memset(&output, 0, sizeof(struct r600_bytecode_output));
624 
625    output.gpr = instr.gpr().sel();
626    output.type = instr.type();
627    output.elem_size = 3;
628    output.comp_mask = 0xf;
629    output.burst_count = 1;
630    output.op = instr.op();
631    if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) {
632       output.index_gpr = instr.index_reg();
633       output.array_size = 0xfff;
634    }
635    output.array_base = instr.array_base();
636 
637    if (r600_bytecode_add_output(m_bc, &output)) {
638       R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
639       return false;
640    }
641    return true;
642 }
643 
644 
emit_tex(const TexInstruction & tex_instr)645 bool AssemblyFromShaderLegacyImpl::emit_tex(const TexInstruction & tex_instr)
646 {
647    auto addr = tex_instr.sampler_offset();
648    if (addr && (!m_bc->index_loaded[1] || m_loop_nesting
649                 ||  m_bc->index_reg[1] != addr->sel()
650                 ||  m_bc->index_reg_chan[1] != addr->chan())) {
651       struct r600_bytecode_alu alu;
652       memset(&alu, 0, sizeof(alu));
653       alu.op = opcode_map.at(op1_mova_int);
654       alu.dst.chan = 0;
655       alu.src[0].sel = addr->sel();
656       alu.src[0].chan = addr->chan();
657       alu.last = 1;
658       int r = r600_bytecode_add_alu(m_bc, &alu);
659       if (r)
660          return false;
661 
662       m_bc->ar_loaded = 0;
663 
664       alu.op = opcode_map.at(op1_set_cf_idx1);
665       alu.dst.chan = 0;
666       alu.src[0].sel = 0;
667       alu.src[0].chan = 0;
668       alu.last = 1;
669 
670       r = r600_bytecode_add_alu(m_bc, &alu);
671       if (r)
672          return false;
673 
674       m_bc->index_reg[1] = addr->sel();
675       m_bc->index_reg_chan[1] = addr->chan();
676       m_bc->index_loaded[1] = true;
677    }
678 
679    r600_bytecode_tex tex;
680    memset(&tex, 0, sizeof(struct r600_bytecode_tex));
681    tex.op = tex_instr.opcode();
682    tex.sampler_id = tex_instr.sampler_id();
683    tex.sampler_index_mode = 0;
684    tex.resource_id = tex_instr.resource_id();;
685    tex.resource_index_mode = 0;
686    tex.src_gpr = tex_instr.src().sel();
687    tex.dst_gpr = tex_instr.dst().sel();
688    tex.dst_sel_x = tex_instr.dest_swizzle(0);
689    tex.dst_sel_y = tex_instr.dest_swizzle(1);
690    tex.dst_sel_z = tex_instr.dest_swizzle(2);
691    tex.dst_sel_w = tex_instr.dest_swizzle(3);
692    tex.src_sel_x = tex_instr.src().chan_i(0);
693    tex.src_sel_y = tex_instr.src().chan_i(1);
694    tex.src_sel_z = tex_instr.src().chan_i(2);
695    tex.src_sel_w = tex_instr.src().chan_i(3);
696    tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized);
697    tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized);
698    tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized);
699    tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized);
700    tex.offset_x = tex_instr.get_offset(0);
701    tex.offset_y = tex_instr.get_offset(1);
702    tex.offset_z = tex_instr.get_offset(2);
703    tex.resource_index_mode = (!!addr) ? 2 : 0;
704    tex.sampler_index_mode = tex.resource_index_mode;
705 
706    if (tex_instr.opcode() == TexInstruction::get_gradient_h ||
707        tex_instr.opcode() == TexInstruction::get_gradient_v)
708       tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0;
709    else
710       tex.inst_mod = tex_instr.inst_mode();
711    if (r600_bytecode_add_tex(m_bc, &tex)) {
712       R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
713       return false;
714    }
715    return true;
716 }
717 
emit_vtx(const FetchInstruction & fetch_instr)718 bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction& fetch_instr)
719 {
720    int buffer_offset = 0;
721    auto addr = fetch_instr.buffer_offset();
722    auto index_mode = fetch_instr.buffer_index_mode();
723 
724    if (addr) {
725       if (addr->type() == Value::literal) {
726          const auto& boffs = static_cast<const LiteralValue&>(*addr);
727          buffer_offset = boffs.value();
728       } else {
729          index_mode = emit_index_reg(*addr, 0);
730       }
731    }
732 
733    if (fetch_instr.has_prelude()) {
734       for(auto &i : fetch_instr.prelude()) {
735          if (!emit(i))
736             return false;
737       }
738    }
739 
740    if (vtx_fetch_results.find(fetch_instr.src().sel()) !=
741        vtx_fetch_results.end()) {
742       m_bc->force_add_cf = 1;
743       vtx_fetch_results.clear();
744    }
745    vtx_fetch_results.insert(fetch_instr.dst().sel());
746 
747    struct r600_bytecode_vtx vtx;
748    memset(&vtx, 0, sizeof(vtx));
749    vtx.op = fetch_instr.vc_opcode();
750    vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset;
751    vtx.fetch_type = fetch_instr.fetch_type();
752    vtx.src_gpr = fetch_instr.src().sel();
753    vtx.src_sel_x = fetch_instr.src().chan();
754    vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
755    vtx.dst_gpr = fetch_instr.dst().sel();
756    vtx.dst_sel_x = fetch_instr.swz(0);		/* SEL_X */
757    vtx.dst_sel_y = fetch_instr.swz(1);		/* SEL_Y */
758    vtx.dst_sel_z = fetch_instr.swz(2);		/* SEL_Z */
759    vtx.dst_sel_w = fetch_instr.swz(3);		/* SEL_W */
760    vtx.use_const_fields = fetch_instr.use_const_fields();
761    vtx.data_format = fetch_instr.data_format();
762    vtx.num_format_all = fetch_instr.num_format();		/* NUM_FORMAT_SCALED */
763    vtx.format_comp_all = fetch_instr.is_signed();	/* FORMAT_COMP_SIGNED */
764    vtx.endian = fetch_instr.endian_swap();
765    vtx.buffer_index_mode = index_mode;
766    vtx.offset = fetch_instr.offset();
767    vtx.indexed = fetch_instr.indexed();
768    vtx.uncached = fetch_instr.uncached();
769    vtx.elem_size = fetch_instr.elm_size();
770    vtx.array_base = fetch_instr.array_base();
771    vtx.array_size = fetch_instr.array_size();
772    vtx.srf_mode_all = fetch_instr.srf_mode_no_zero();
773 
774    if (fetch_instr.use_tc()) {
775       if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
776          R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
777          return false;
778       }
779 
780    } else {
781       if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
782          R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
783          return false;
784       }
785    }
786 
787    m_bc->cf_last->vpm = fetch_instr.use_vpm();
788    m_bc->cf_last->barrier = 1;
789 
790    return true;
791 }
792 
emit_emit_vertex(const EmitVertex & instr)793 bool AssemblyFromShaderLegacyImpl::emit_emit_vertex(const EmitVertex &instr)
794 {
795    int r = r600_bytecode_add_cfinst(m_bc, instr.op());
796    if (!r)
797       m_bc->cf_last->count = instr.stream();
798    assert(m_bc->cf_last->count < 4);
799 
800    return r == 0;
801 }
802 
emit_wait_ack(const WaitAck & instr)803 bool AssemblyFromShaderLegacyImpl::emit_wait_ack(const WaitAck& instr)
804 {
805    int r = r600_bytecode_add_cfinst(m_bc, instr.op());
806    if (!r)
807       m_bc->cf_last->cf_addr = instr.n_ack();
808 
809    return r == 0;
810 }
811 
emit_wr_scratch(const WriteScratchInstruction & instr)812 bool AssemblyFromShaderLegacyImpl::emit_wr_scratch(const WriteScratchInstruction& instr)
813 {
814    struct r600_bytecode_output cf;
815 
816    memset(&cf, 0, sizeof(struct r600_bytecode_output));
817 
818    cf.op = CF_OP_MEM_SCRATCH;
819    cf.elem_size = 3;
820    cf.gpr = instr.gpr().sel();
821    cf.mark = 1;
822    cf.comp_mask = instr.write_mask();
823    cf.swizzle_x = 0;
824    cf.swizzle_y = 1;
825    cf.swizzle_z = 2;
826    cf.swizzle_w = 3;
827    cf.burst_count = 1;
828 
829    if (instr.indirect()) {
830       cf.type = 3;
831       cf.index_gpr = instr.address();
832 
833       /* The docu seems to be wrong here: In indirect addressing the
834        * address_base seems to be the array_size */
835       cf.array_size = instr.array_size();
836    } else {
837       cf.type = 2;
838       cf.array_base = instr.location();
839    }
840    /* This should be 0, but the address calculation is apparently wrong */
841 
842 
843    if (r600_bytecode_add_output(m_bc, &cf)){
844       R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
845       return false;
846    }
847 
848    return true;
849 }
850 
851 extern const std::map<ESDOp, int> ds_opcode_map;
852 
emit_gds(const GDSInstr & instr)853 bool AssemblyFromShaderLegacyImpl::emit_gds(const GDSInstr& instr)
854 {
855    struct r600_bytecode_gds gds;
856 
857    int uav_idx = -1;
858    auto addr = instr.uav_id();
859    if (addr->type() != Value::literal) {
860       emit_index_reg(*addr, 1);
861    } else {
862       const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
863       uav_idx = addr_reg.value();
864    }
865 
866    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
867 
868    gds.op = ds_opcode_map.at(instr.op());
869    gds.dst_gpr = instr.dest_sel();
870    gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base();
871    gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one;
872    gds.src_gpr = instr.src_sel();
873 
874    gds.src_sel_x = instr.src_swizzle(0);
875    gds.src_sel_y = instr.src_swizzle(1);
876    gds.src_sel_z = instr.src_swizzle(2);
877 
878    gds.dst_sel_x = instr.dest_swizzle(0);
879    gds.dst_sel_y = 7;
880    gds.dst_sel_z = 7;
881    gds.dst_sel_w = 7;
882    gds.src_gpr2 = 0;
883    gds.alloc_consume = 1; // Not Cayman
884 
885    int r = r600_bytecode_add_gds(m_bc, &gds);
886    if (r)
887       return false;
888    m_bc->cf_last->vpm = 1;
889    m_bc->cf_last->barrier = 1;
890    return true;
891 }
892 
emit_tf_write(const GDSStoreTessFactor & instr)893 bool AssemblyFromShaderLegacyImpl::emit_tf_write(const GDSStoreTessFactor& instr)
894 {
895    struct r600_bytecode_gds gds;
896 
897    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
898    gds.src_gpr = instr.sel();
899    gds.src_sel_x = instr.chan(0);
900    gds.src_sel_y = instr.chan(1);
901    gds.src_sel_z = 4;
902    gds.dst_sel_x = 7;
903    gds.dst_sel_y = 7;
904    gds.dst_sel_z = 7;
905    gds.dst_sel_w = 7;
906    gds.op = FETCH_OP_TF_WRITE;
907 
908    if (r600_bytecode_add_gds(m_bc, &gds) != 0)
909          return false;
910 
911    if (instr.chan(2) != 7) {
912       memset(&gds, 0, sizeof(struct r600_bytecode_gds));
913       gds.src_gpr = instr.sel();
914       gds.src_sel_x = instr.chan(2);
915       gds.src_sel_y = instr.chan(3);
916       gds.src_sel_z = 4;
917       gds.dst_sel_x = 7;
918       gds.dst_sel_y = 7;
919       gds.dst_sel_z = 7;
920       gds.dst_sel_w = 7;
921       gds.op = FETCH_OP_TF_WRITE;
922 
923       if (r600_bytecode_add_gds(m_bc, &gds))
924          return false;
925    }
926    return true;
927 }
928 
emit_ldswrite(const LDSWriteInstruction & instr)929 bool AssemblyFromShaderLegacyImpl::emit_ldswrite(const LDSWriteInstruction& instr)
930 {
931    r600_bytecode_alu alu;
932    memset(&alu, 0, sizeof(r600_bytecode_alu));
933 
934    alu.last = true;
935    alu.is_lds_idx_op = true;
936    copy_src(alu.src[0], instr.address());
937    copy_src(alu.src[1], instr.value0());
938 
939    if (instr.num_components() == 1) {
940       alu.op = LDS_OP2_LDS_WRITE;
941    } else {
942       alu.op = LDS_OP3_LDS_WRITE_REL;
943       alu.lds_idx = 1;
944       copy_src(alu.src[2], instr.value1());
945    }
946 
947    return r600_bytecode_add_alu(m_bc, &alu) == 0;
948 }
949 
emit_ldsread(const LDSReadInstruction & instr)950 bool AssemblyFromShaderLegacyImpl::emit_ldsread(const LDSReadInstruction& instr)
951 {
952    int r;
953    unsigned nread = 0;
954    unsigned nfetch = 0;
955    unsigned n_values = instr.num_values();
956 
957    r600_bytecode_alu alu_fetch;
958    r600_bytecode_alu alu_read;
959 
960    /* We must add a new ALU clause if the fetch and read op would be split otherwise
961     * r600_asm limites at 120 slots = 240 dwords */
962    if (m_bc->cf_last->ndw > 240 - 4 * n_values)
963       m_bc->force_add_cf = 1;
964 
965    while (nread < n_values) {
966       if (nfetch < n_values) {
967          memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
968          alu_fetch.is_lds_idx_op = true;
969          alu_fetch.op = LDS_OP1_LDS_READ_RET;
970 
971          copy_src(alu_fetch.src[0], instr.address(nfetch));
972          alu_fetch.src[1].sel = V_SQ_ALU_SRC_0;
973          alu_fetch.src[2].sel = V_SQ_ALU_SRC_0;
974          alu_fetch.last = 1;
975          r = r600_bytecode_add_alu(m_bc, &alu_fetch);
976          m_bc->cf_last->nlds_read++;
977          if (r)
978             return false;
979       }
980 
981       if (nfetch >= n_values) {
982          memset(&alu_read, 0, sizeof(r600_bytecode_alu));
983          copy_dst(alu_read.dst, instr.dest(nread));
984          alu_read.op = ALU_OP1_MOV;
985          alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
986          alu_read.last = 1;
987          alu_read.dst.write = 1;
988          r = r600_bytecode_add_alu(m_bc, &alu_read);
989          m_bc->cf_last->nqueue_read++;
990          if (r)
991             return false;
992          ++nread;
993       }
994       ++nfetch;
995    }
996    assert(m_bc->cf_last->nlds_read == m_bc->cf_last->nqueue_read);
997 
998    return true;
999 }
1000 
emit_ldsatomic(const LDSAtomicInstruction & instr)1001 bool AssemblyFromShaderLegacyImpl::emit_ldsatomic(const LDSAtomicInstruction& instr)
1002 {
1003    if (m_bc->cf_last->ndw > 240 - 4)
1004       m_bc->force_add_cf = 1;
1005 
1006    r600_bytecode_alu alu_fetch;
1007    r600_bytecode_alu alu_read;
1008 
1009    memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
1010    alu_fetch.is_lds_idx_op = true;
1011    alu_fetch.op = instr.op();
1012 
1013    copy_src(alu_fetch.src[0], instr.address());
1014    copy_src(alu_fetch.src[1], instr.src0());
1015 
1016    if (instr.src1())
1017       copy_src(alu_fetch.src[2], *instr.src1());
1018    alu_fetch.last = 1;
1019    int r = r600_bytecode_add_alu(m_bc, &alu_fetch);
1020    if (r)
1021       return false;
1022 
1023    memset(&alu_read, 0, sizeof(r600_bytecode_alu));
1024    copy_dst(alu_read.dst, instr.dest());
1025    alu_read.op = ALU_OP1_MOV;
1026    alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
1027    alu_read.last = 1;
1028    alu_read.dst.write = 1;
1029    r = r600_bytecode_add_alu(m_bc, &alu_read);
1030    if (r)
1031       return false;
1032    return true;
1033 }
1034 
emit_rat(const RatInstruction & instr)1035 bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr)
1036 {
1037    struct r600_bytecode_gds gds;
1038 
1039    int rat_idx = instr.rat_id();
1040    EBufferIndexMode rat_index_mode = bim_none;
1041    auto addr = instr.rat_id_offset();
1042 
1043    if (addr) {
1044       if (addr->type() != Value::literal) {
1045          rat_index_mode = emit_index_reg(*addr, 1);
1046       } else {
1047          const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
1048          rat_idx += addr_reg.value();
1049       }
1050    }
1051    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
1052 
1053    r600_bytecode_add_cfinst(m_bc, instr.cf_opcode());
1054    auto cf = m_bc->cf_last;
1055    cf->rat.id = rat_idx + m_shader->rat_base;
1056    cf->rat.inst = instr.rat_op();
1057    cf->rat.index_mode = rat_index_mode;
1058    cf->output.type = instr.need_ack() ? 3 : 1;
1059    cf->output.gpr = instr.data_gpr();
1060    cf->output.index_gpr = instr.index_gpr();
1061    cf->output.comp_mask = instr.comp_mask();
1062    cf->output.burst_count = instr.burst_count();
1063    assert(instr.data_swz(0) == PIPE_SWIZZLE_X);
1064    if (cf->rat.inst != RatInstruction::STORE_TYPED) {
1065       assert(instr.data_swz(1) == PIPE_SWIZZLE_Y ||
1066              instr.data_swz(1) == PIPE_SWIZZLE_MAX) ;
1067       assert(instr.data_swz(2) == PIPE_SWIZZLE_Z ||
1068              instr.data_swz(2) == PIPE_SWIZZLE_MAX) ;
1069    }
1070 
1071    cf->vpm = 1;
1072    cf->barrier = 1;
1073    cf->mark = instr.need_ack();
1074    cf->output.elem_size = instr.elm_size();
1075    return true;
1076 }
1077 
1078 EBufferIndexMode
emit_index_reg(const Value & addr,unsigned idx)1079 AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx)
1080 {
1081    assert(idx < 2);
1082 
1083    EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0;
1084 
1085    if (!m_bc->index_loaded[idx] || m_loop_nesting ||
1086        m_bc->index_reg[idx] != addr.sel()
1087        ||  m_bc->index_reg_chan[idx] != addr.chan()) {
1088       struct r600_bytecode_alu alu;
1089 
1090       // Make sure MOVA is not last instr in clause
1091       if ((m_bc->cf_last->ndw>>1) >= 110)
1092               m_bc->force_add_cf = 1;
1093 
1094       memset(&alu, 0, sizeof(alu));
1095       alu.op = opcode_map.at(op1_mova_int);
1096       alu.dst.chan = 0;
1097       alu.src[0].sel = addr.sel();
1098       alu.src[0].chan = addr.chan();
1099       alu.last = 1;
1100       sfn_log << SfnLog::assembly << "   mova_int, ";
1101       int r = r600_bytecode_add_alu(m_bc, &alu);
1102       if (r)
1103          return bim_invalid;
1104 
1105       m_bc->ar_loaded = 0;
1106 
1107       alu.op = opcode_map.at(idxop);
1108       alu.dst.chan = 0;
1109       alu.src[0].sel = 0;
1110       alu.src[0].chan = 0;
1111       alu.last = 1;
1112       sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx;
1113       r = r600_bytecode_add_alu(m_bc, &alu);
1114       if (r)
1115          return bim_invalid;
1116 
1117       m_bc->index_reg[idx] = addr.sel();
1118       m_bc->index_reg_chan[idx] = addr.chan();
1119       m_bc->index_loaded[idx] = true;
1120       sfn_log << SfnLog::assembly << "\n";
1121    }
1122    return idx == 0 ? bim_zero : bim_one;
1123 }
1124 
copy_dst(r600_bytecode_alu_dst & dst,const Value & d)1125 bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
1126                                             const Value& d)
1127 {
1128    assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value);
1129 
1130    if (d.sel() > 124) {
1131       R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel());
1132       return false;
1133    }
1134 
1135    dst.sel = d.sel();
1136    dst.chan = d.chan();
1137 
1138    if (m_bc->index_reg[1] == dst.sel &&
1139        m_bc->index_reg_chan[1] == dst.chan)
1140       m_bc->index_loaded[1] = false;
1141 
1142    if (m_bc->index_reg[0] == dst.sel &&
1143        m_bc->index_reg_chan[0] == dst.chan)
1144       m_bc->index_loaded[0] = false;
1145 
1146    return true;
1147 }
1148 
copy_src(r600_bytecode_alu_src & src,const Value & s)1149 bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s)
1150 {
1151 
1152    if (s.type() == Value::gpr && s.sel() > 124) {
1153       R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel());
1154       return false;
1155    }
1156 
1157    if (s.type() == Value::lds_direct)  {
1158       R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
1159       return false;
1160    }
1161 
1162    if (s.type() == Value::kconst && s.sel() < 512)  {
1163       R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel());
1164       return false;
1165    }
1166 
1167    if (s.type() == Value::literal) {
1168       auto& v = static_cast<const LiteralValue&>(s);
1169       if (v.value() == 0) {
1170          src.sel = ALU_SRC_0;
1171          src.chan = 0;
1172          --m_nliterals_in_group;
1173          return true;
1174       }
1175       if (v.value() == 1) {
1176          src.sel = ALU_SRC_1_INT;
1177          src.chan = 0;
1178          --m_nliterals_in_group;
1179          return true;
1180       }
1181       if (v.value_float() == 1.0f) {
1182          src.sel = ALU_SRC_1;
1183          src.chan = 0;
1184          --m_nliterals_in_group;
1185          return true;
1186       }
1187       if (v.value_float() == 0.5f) {
1188          src.sel = ALU_SRC_0_5;
1189          src.chan = 0;
1190          --m_nliterals_in_group;
1191          return true;
1192       }
1193       if (v.value() == 0xffffffff) {
1194          src.sel = ALU_SRC_M_1_INT;
1195          src.chan = 0;
1196          --m_nliterals_in_group;
1197          return true;
1198       }
1199       src.value = v.value();
1200    }
1201 
1202    src.sel = s.sel();
1203    src.chan = s.chan();
1204    if (s.type() == Value::kconst) {
1205       const UniformValue& cv = static_cast<const UniformValue&>(s);
1206       src.kc_bank = cv.kcache_bank();
1207       auto addr = cv.addr();
1208       if (addr) {
1209          src.kc_rel = 1;
1210          emit_index_reg(*addr, 0);
1211          auto type = m_bc->cf_last->op;
1212          if (r600_bytecode_add_cf(m_bc)) {
1213                  return false;
1214          }
1215          m_bc->cf_last->op = type;
1216       }
1217    }
1218 
1219    return true;
1220 }
1221 
1222 const std::map<EAluOp, int> opcode_map = {
1223 
1224    {op2_add, ALU_OP2_ADD},
1225    {op2_mul, ALU_OP2_MUL},
1226    {op2_mul_ieee, ALU_OP2_MUL_IEEE},
1227    {op2_max, ALU_OP2_MAX},
1228    {op2_min, ALU_OP2_MIN},
1229    {op2_max_dx10, ALU_OP2_MAX_DX10},
1230    {op2_min_dx10, ALU_OP2_MIN_DX10},
1231    {op2_sete, ALU_OP2_SETE},
1232    {op2_setgt, ALU_OP2_SETGT},
1233    {op2_setge, ALU_OP2_SETGE},
1234    {op2_setne, ALU_OP2_SETNE},
1235    {op2_sete_dx10, ALU_OP2_SETE_DX10},
1236    {op2_setgt_dx10, ALU_OP2_SETGT_DX10},
1237    {op2_setge_dx10, ALU_OP2_SETGE_DX10},
1238    {op2_setne_dx10, ALU_OP2_SETNE_DX10},
1239    {op1_fract, ALU_OP1_FRACT},
1240    {op1_trunc, ALU_OP1_TRUNC},
1241    {op1_ceil, ALU_OP1_CEIL},
1242    {op1_rndne, ALU_OP1_RNDNE},
1243    {op1_floor, ALU_OP1_FLOOR},
1244    {op2_ashr_int, ALU_OP2_ASHR_INT},
1245    {op2_lshr_int, ALU_OP2_LSHR_INT},
1246    {op2_lshl_int, ALU_OP2_LSHL_INT},
1247    {op1_mov, ALU_OP1_MOV},
1248    {op0_nop, ALU_OP0_NOP},
1249    {op2_mul_64, ALU_OP2_MUL_64},
1250    {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1251    {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1252    {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
1253    {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
1254    {op2_pred_sete, ALU_OP2_PRED_SETE},
1255    {op2_pred_setgt, ALU_OP2_PRED_SETGT},
1256    {op2_pred_setge, ALU_OP2_PRED_SETGE},
1257    {op2_pred_setne, ALU_OP2_PRED_SETNE},
1258    //{op2_pred_set_inv, ALU_OP2_PRED_SET},
1259    //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
1260    //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
1261    {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH},
1262    {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH},
1263    {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH},
1264    {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH},
1265    {op2_kille, ALU_OP2_KILLE},
1266    {op2_killgt, ALU_OP2_KILLGT},
1267    {op2_killge, ALU_OP2_KILLGE},
1268    {op2_killne, ALU_OP2_KILLNE},
1269    {op2_and_int, ALU_OP2_AND_INT},
1270    {op2_or_int, ALU_OP2_OR_INT},
1271    {op2_xor_int, ALU_OP2_XOR_INT},
1272    {op1_not_int, ALU_OP1_NOT_INT},
1273    {op2_add_int, ALU_OP2_ADD_INT},
1274    {op2_sub_int, ALU_OP2_SUB_INT},
1275    {op2_max_int, ALU_OP2_MAX_INT},
1276    {op2_min_int, ALU_OP2_MIN_INT},
1277    {op2_max_uint, ALU_OP2_MAX_UINT},
1278    {op2_min_uint, ALU_OP2_MIN_UINT},
1279    {op2_sete_int, ALU_OP2_SETE_INT},
1280    {op2_setgt_int, ALU_OP2_SETGT_INT},
1281    {op2_setge_int, ALU_OP2_SETGE_INT},
1282    {op2_setne_int, ALU_OP2_SETNE_INT},
1283    {op2_setgt_uint, ALU_OP2_SETGT_UINT},
1284    {op2_setge_uint, ALU_OP2_SETGE_UINT},
1285    {op2_killgt_uint, ALU_OP2_KILLGT_UINT},
1286    {op2_killge_uint, ALU_OP2_KILLGE_UINT},
1287    //p2_prede_int, ALU_OP2_PREDE_INT},
1288    {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
1289    {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
1290    {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
1291    {op2_kille_int, ALU_OP2_KILLE_INT},
1292    {op2_killgt_int, ALU_OP2_KILLGT_INT},
1293    {op2_killge_int, ALU_OP2_KILLGE_INT},
1294    {op2_killne_int, ALU_OP2_KILLNE_INT},
1295    {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT},
1296    {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT},
1297    {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT},
1298    {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT},
1299    {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT},
1300    {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT},
1301    {op1_flt_to_int, ALU_OP1_FLT_TO_INT},
1302    {op1_bfrev_int, ALU_OP1_BFREV_INT},
1303    {op2_addc_uint, ALU_OP2_ADDC_UINT},
1304    {op2_subb_uint, ALU_OP2_SUBB_UINT},
1305    {op0_group_barrier, ALU_OP0_GROUP_BARRIER},
1306    {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN},
1307    {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END},
1308    {op2_set_mode, ALU_OP2_SET_MODE},
1309    {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0},
1310    {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1},
1311    {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE},
1312    {op1_exp_ieee, ALU_OP1_EXP_IEEE},
1313    {op1_log_clamped, ALU_OP1_LOG_CLAMPED},
1314    {op1_log_ieee, ALU_OP1_LOG_IEEE},
1315    {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED},
1316    {op1_recip_ff, ALU_OP1_RECIP_FF},
1317    {op1_recip_ieee, ALU_OP1_RECIP_IEEE},
1318    {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED},
1319    {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF},
1320    {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE},
1321    {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE},
1322    {op1_sin, ALU_OP1_SIN},
1323    {op1_cos, ALU_OP1_COS},
1324    {op2_mullo_int, ALU_OP2_MULLO_INT},
1325    {op2_mulhi_int, ALU_OP2_MULHI_INT},
1326    {op2_mullo_uint, ALU_OP2_MULLO_UINT},
1327    {op2_mulhi_uint, ALU_OP2_MULHI_UINT},
1328    {op1_recip_int, ALU_OP1_RECIP_INT},
1329    {op1_recip_uint, ALU_OP1_RECIP_UINT},
1330    {op1_recip_64, ALU_OP2_RECIP_64},
1331    {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64},
1332    {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64},
1333    {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64},
1334    {op1_sqrt_64, ALU_OP2_SQRT_64},
1335    {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT},
1336    {op1_int_to_flt, ALU_OP1_INT_TO_FLT},
1337    {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT},
1338    {op2_bfm_int, ALU_OP2_BFM_INT},
1339    {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16},
1340    {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32},
1341    {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT},
1342    {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT},
1343    {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT},
1344    {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT},
1345    {op1_bcnt_int, ALU_OP1_BCNT_INT},
1346    {op1_ffbh_uint, ALU_OP1_FFBH_UINT},
1347    {op1_ffbl_int, ALU_OP1_FFBL_INT},
1348    {op1_ffbh_int, ALU_OP1_FFBH_INT},
1349    {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4},
1350    {op2_dot_ieee, ALU_OP2_DOT_IEEE},
1351    {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI},
1352    {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR},
1353    {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24},
1354    {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT},
1355    {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT},
1356    {op2_mul_uint24, ALU_OP2_MUL_UINT24},
1357    {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT},
1358    {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
1359    {op2_sete_64, ALU_OP2_SETE_64},
1360    {op2_setne_64, ALU_OP2_SETNE_64},
1361    {op2_setgt_64, ALU_OP2_SETGT_64},
1362    {op2_setge_64, ALU_OP2_SETGE_64},
1363    {op2_min_64, ALU_OP2_MIN_64},
1364    {op2_max_64, ALU_OP2_MAX_64},
1365    {op2_dot4, ALU_OP2_DOT4},
1366    {op2_dot4_ieee, ALU_OP2_DOT4_IEEE},
1367    {op2_cube, ALU_OP2_CUBE},
1368    {op1_max4, ALU_OP1_MAX4},
1369    {op1_frexp_64, ALU_OP1_FREXP_64},
1370    {op1_ldexp_64, ALU_OP2_LDEXP_64},
1371    {op1_fract_64, ALU_OP1_FRACT_64},
1372    {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64},
1373    {op2_pred_sete_64, ALU_OP2_PRED_SETE_64},
1374    {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64},
1375    {op2_add_64, ALU_OP2_ADD_64},
1376    {op1_mova_int, ALU_OP1_MOVA_INT},
1377    {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1378    {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1379    {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT},
1380    {op2_dot, ALU_OP2_DOT},
1381    //p2_mul_prev, ALU_OP2_MUL_PREV},
1382    //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
1383    //p2_add_prev, ALU_OP2_ADD_PREV},
1384    {op2_muladd_prev, ALU_OP2_MULADD_PREV},
1385    {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV},
1386    {op2_interp_xy, ALU_OP2_INTERP_XY},
1387    {op2_interp_zw, ALU_OP2_INTERP_ZW},
1388    {op2_interp_x, ALU_OP2_INTERP_X},
1389    {op2_interp_z, ALU_OP2_INTERP_Z},
1390    {op0_store_flags, ALU_OP1_STORE_FLAGS},
1391    {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS},
1392    {op0_lds_1a, ALU_OP2_LDS_1A},
1393    {op0_lds_1a1d, ALU_OP2_LDS_1A1D},
1394    {op0_lds_2a, ALU_OP2_LDS_2A},
1395    {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0},
1396    {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10},
1397    {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20},
1398       // {op 3 all left shift 6
1399    {op3_bfe_uint, ALU_OP3_BFE_UINT},
1400    {op3_bfe_int, ALU_OP3_BFE_INT},
1401    {op3_bfi_int, ALU_OP3_BFI_INT},
1402    {op3_fma, ALU_OP3_FMA},
1403    {op3_cndne_64, ALU_OP3_CNDNE_64},
1404    {op3_fma_64, ALU_OP3_FMA_64},
1405    {op3_lerp_uint, ALU_OP3_LERP_UINT},
1406    {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT},
1407    {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT},
1408    {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT},
1409    {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT},
1410    {op3_muladd_uint24, ALU_OP3_MULADD_UINT24},
1411    {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP},
1412    {op3_muladd, ALU_OP3_MULADD},
1413    {op3_muladd_m2, ALU_OP3_MULADD_M2},
1414    {op3_muladd_m4, ALU_OP3_MULADD_M4},
1415    {op3_muladd_d2, ALU_OP3_MULADD_D2},
1416    {op3_muladd_ieee, ALU_OP3_MULADD_IEEE},
1417    {op3_cnde, ALU_OP3_CNDE},
1418    {op3_cndgt, ALU_OP3_CNDGT},
1419    {op3_cndge, ALU_OP3_CNDGE},
1420    {op3_cnde_int, ALU_OP3_CNDE_INT},
1421    {op3_cndgt_int, ALU_OP3_CNDGT_INT},
1422    {op3_cndge_int, ALU_OP3_CNDGE_INT},
1423    {op3_mul_lit, ALU_OP3_MUL_LIT},
1424 };
1425 
1426 const std::map<ESDOp, int> ds_opcode_map = {
1427    {DS_OP_ADD, FETCH_OP_GDS_ADD},
1428    {DS_OP_SUB, FETCH_OP_GDS_SUB},
1429    {DS_OP_RSUB, FETCH_OP_GDS_RSUB},
1430    {DS_OP_INC, FETCH_OP_GDS_INC},
1431    {DS_OP_DEC, FETCH_OP_GDS_DEC},
1432    {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT},
1433    {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT},
1434    {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT},
1435    {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT},
1436    {DS_OP_AND, FETCH_OP_GDS_AND},
1437    {DS_OP_OR, FETCH_OP_GDS_OR},
1438    {DS_OP_XOR, FETCH_OP_GDS_XOR},
1439    {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR},
1440    {DS_OP_WRITE, FETCH_OP_GDS_WRITE},
1441    {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL},
1442    {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2},
1443    {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE},
1444    {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF},
1445    {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE},
1446    {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE},
1447    {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET},
1448    {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET},
1449    {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET},
1450    {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET},
1451    {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET},
1452    {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET},
1453    {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET},
1454    {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET},
1455    {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET},
1456    {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET},
1457    {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET},
1458    {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET},
1459    {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET},
1460    {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET},
1461    {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET},
1462    {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET},
1463    {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET},
1464    {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET},
1465    {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET},
1466    {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET},
1467    {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET},
1468    {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET},
1469    {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET},
1470    {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET},
1471    {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET},
1472    {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET},
1473    {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
1474    {DS_OP_INVALID, 0},
1475 };
1476 
1477 }
1478