1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2018 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_from_mesa.h"
29 #include "sfn_shader_fragment.h"
30 #include "sfn_instruction_fetch.h"
31 
32 namespace r600 {
33 
FragmentShaderFromNir(const nir_shader & nir,r600_shader & sh,r600_pipe_shader_selector & sel,const r600_shader_key & key,enum chip_class chip_class)34 FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
35                                              r600_shader& sh,
36                                              r600_pipe_shader_selector &sel,
37                                              const r600_shader_key &key,
38                                              enum chip_class chip_class):
39    ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, chip_class, 0),
40    m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
41    m_max_counted_color_exports(0),
42    m_two_sided_color(key.ps.color_two_side),
43    m_last_pixel_export(nullptr),
44    m_nir(nir),
45    m_reserved_registers(0),
46    m_frag_pos_index(0),
47    m_need_back_color(false),
48    m_front_face_loaded(false),
49    m_depth_exports(0),
50    m_enable_centroid_interpolators(false),
51    m_enable_sample_interpolators(false),
52    m_apply_sample_mask(key.ps.apply_sample_id_mask),
53    m_dual_source_blend(key.ps.dual_source_blend)
54 {
55    for (auto&  i: m_interpolator) {
56       i.enabled = false;
57       i.ij_index= 0;
58    }
59 
60    sh_info().rat_base = key.ps.nr_cbufs;
61    sh_info().atomic_base = key.ps.first_atomic_counter;
62 }
63 
do_process_inputs(nir_variable * input)64 bool FragmentShaderFromNir::do_process_inputs(nir_variable *input)
65 {
66    sfn_log << SfnLog::io << "Parse input variable "
67            << input->name << " location:" <<  input->data.location
68            << " driver-loc:" << input->data.driver_location
69            << " interpolation:" << input->data.interpolation
70            << "\n";
71 
72    if (input->data.location == VARYING_SLOT_FACE) {
73       m_sv_values.set(es_face);
74       return true;
75    }
76 
77    unsigned name, sid;
78    auto semantic = r600_get_varying_semantic(input->data.location);
79    name = semantic.first;
80    sid = semantic.second;
81 
82    tgsi_semantic sname = static_cast<tgsi_semantic>(name);
83 
84    switch (sname) {
85    case TGSI_SEMANTIC_POSITION: {
86       m_sv_values.set(es_pos);
87       return true;
88    }
89    case TGSI_SEMANTIC_COLOR: {
90       m_shaderio.add_input(new ShaderInputColor(sname, sid, input));
91       m_need_back_color = m_two_sided_color;
92       return true;
93    }
94    case TGSI_SEMANTIC_PRIMID:
95       sh_info().gs_prim_id_input = true;
96       sh_info().ps_prim_id_input = m_shaderio.inputs().size();
97       /* fallthrough */
98    case TGSI_SEMANTIC_FOG:
99    case TGSI_SEMANTIC_GENERIC:
100    case TGSI_SEMANTIC_TEXCOORD:
101    case TGSI_SEMANTIC_LAYER:
102    case TGSI_SEMANTIC_PCOORD:
103    case TGSI_SEMANTIC_VIEWPORT_INDEX:
104    case TGSI_SEMANTIC_CLIPDIST: {
105       if (!m_shaderio.find_varying(sname, sid, input->data.location_frac))
106          m_shaderio.add_input(new ShaderInputVarying(sname, sid, input));
107       return true;
108    }
109    default:
110       return false;
111    }
112 }
113 
scan_sysvalue_access(nir_instr * instr)114 bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
115 {
116    switch (instr->type) {
117    case nir_instr_type_intrinsic: {
118       nir_intrinsic_instr *ii =  nir_instr_as_intrinsic(instr);
119       switch (ii->intrinsic) {
120       case nir_intrinsic_load_front_face:
121          m_sv_values.set(es_face);
122          break;
123       case nir_intrinsic_load_sample_mask_in:
124          m_sv_values.set(es_sample_mask_in);
125          break;
126       case nir_intrinsic_load_sample_pos:
127          m_sv_values.set(es_sample_pos);
128          /* fallthrough */
129       case nir_intrinsic_load_sample_id:
130          m_sv_values.set(es_sample_id);
131          break;
132       case nir_intrinsic_interp_deref_at_centroid:
133          /* This is not a sysvalue, should go elsewhere */
134          m_enable_centroid_interpolators = true;
135          break;
136       case nir_intrinsic_interp_deref_at_sample:
137          m_enable_sample_interpolators = true;
138          break;
139       case nir_intrinsic_load_helper_invocation:
140          m_sv_values.set(es_helper_invocation);
141          break;
142       default:
143          ;
144       }
145    }
146    default:
147       ;
148    }
149    return true;
150 }
151 
do_allocate_reserved_registers()152 bool FragmentShaderFromNir::do_allocate_reserved_registers()
153 {
154    assert(!m_reserved_registers);
155 
156    int face_reg_index = -1;
157    int sample_id_index = -1;
158    // enabled interpolators based on inputs
159    for (auto& i: m_shaderio.inputs()) {
160       int ij = i->ij_index();
161       if (ij >= 0) {
162          m_interpolator[ij].enabled = true;
163       }
164    }
165 
166    /* Lazy, enable both possible interpolators,
167     * TODO: check which ones are really needed */
168    if (m_enable_centroid_interpolators) {
169       m_interpolator[2].enabled = true; /* perspective */
170       m_interpolator[5].enabled = true; /* linear */
171    }
172 
173    if (m_enable_sample_interpolators)
174       m_interpolator[1].enabled = true; /* perspective */
175 
176    // sort the varying inputs
177    m_shaderio.sort_varying_inputs();
178 
179    // handle interpolators
180    int num_baryc = 0;
181    for (int i = 0; i < 6; ++i) {
182       if (m_interpolator[i].enabled) {
183          sfn_log << SfnLog::io << "Interpolator " << i << " is enabled\n";
184 
185          m_interpolator[i].ij_index = num_baryc;
186 
187          unsigned sel = num_baryc / 2;
188          unsigned chan = 2 * (num_baryc % 2);
189 
190          auto ip_i = new GPRValue(sel, chan + 1);
191          ip_i->set_as_input();
192          m_interpolator[i].i.reset(ip_i);
193          inject_register(sel, chan + 1, m_interpolator[i].i, false);
194 
195          auto ip_j = new GPRValue(sel, chan);
196          ip_j->set_as_input();
197          m_interpolator[i].j.reset(ip_j);
198          inject_register(sel, chan, m_interpolator[i].j, false);
199 
200          ++num_baryc;
201       }
202    }
203    m_reserved_registers += (num_baryc + 1) >> 1;
204 
205    if (m_sv_values.test(es_pos)) {
206       m_frag_pos_index = m_reserved_registers++;
207       m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_POSITION, m_frag_pos_index));
208    }
209 
210    // handle system values
211    if (m_sv_values.test(es_face) || m_need_back_color) {
212       face_reg_index = m_reserved_registers++;
213       m_front_face_reg = std::make_shared<GPRValue>(face_reg_index,0);
214       m_front_face_reg->set_as_input();
215       sfn_log << SfnLog::io << "Set front_face register to " <<  *m_front_face_reg << "\n";
216       inject_register(m_front_face_reg->sel(), m_front_face_reg->chan(), m_front_face_reg, false);
217 
218       m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
219       load_front_face();
220    }
221 
222    if (m_sv_values.test(es_sample_mask_in)) {
223       if (face_reg_index < 0)
224          face_reg_index = m_reserved_registers++;
225 
226       m_sample_mask_reg = std::make_shared<GPRValue>(face_reg_index,2);
227       m_sample_mask_reg->set_as_input();
228       sfn_log << SfnLog::io << "Set sample mask in register to " <<  *m_sample_mask_reg << "\n";
229       sh_info().nsys_inputs = 1;
230       m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
231    }
232 
233    if (m_sv_values.test(es_sample_id) ||
234        m_sv_values.test(es_sample_mask_in)) {
235       if (sample_id_index < 0)
236          sample_id_index = m_reserved_registers++;
237 
238       m_sample_id_reg = std::make_shared<GPRValue>(sample_id_index, 3);
239       m_sample_id_reg->set_as_input();
240       sfn_log << SfnLog::io << "Set sample id register to " <<  *m_sample_id_reg << "\n";
241       sh_info().nsys_inputs++;
242       m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, sample_id_index));
243    }
244 
245    // The back color handling is not emmited in the code, so we have
246    // to add the inputs here and later we also need to inject the code to set
247    // the right color
248    if (m_need_back_color) {
249       size_t ninputs = m_shaderio.inputs().size();
250       for (size_t k = 0; k < ninputs; ++k) {
251          ShaderInput& i = m_shaderio.input(k);
252 
253          if (i.name() != TGSI_SEMANTIC_COLOR)
254             continue;
255 
256          ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
257 
258          size_t next_pos = m_shaderio.size();
259          auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
260          m_shaderio.add_input(bcol);
261          col.set_back_color(next_pos);
262       }
263       m_shaderio.set_two_sided();
264    }
265 
266    m_shaderio.update_lds_pos();
267 
268    set_reserved_registers(m_reserved_registers);
269 
270    return true;
271 }
272 
emit_shader_start()273 void FragmentShaderFromNir::emit_shader_start()
274 {
275    if (m_sv_values.test(es_face))
276       load_front_face();
277 
278    if (m_sv_values.test(es_pos)) {
279       for (int i = 0; i < 4; ++i) {
280          auto v = new GPRValue(m_frag_pos_index, i);
281          v->set_as_input();
282          auto reg = PValue(v);
283          if (i == 3)
284             emit_instruction(new AluInstruction(op1_recip_ieee, reg, reg, {alu_write, alu_last_instr}));
285          m_frag_pos[i] = reg;
286       }
287    }
288 
289    if (m_sv_values.test(es_helper_invocation)) {
290       m_helper_invocation = get_temp_register();
291       auto dummy = PValue(new GPRValue(m_helper_invocation->sel(), 7));
292       emit_instruction(new AluInstruction(op1_mov, m_helper_invocation, literal(-1), {alu_write, alu_last_instr}));
293       GPRVector dst({m_helper_invocation, dummy, dummy, dummy});
294 
295       auto vtx = new FetchInstruction(dst, m_helper_invocation,
296                                       R600_BUFFER_INFO_CONST_BUFFER, bim_none);
297       vtx->set_flag(vtx_vpm);
298       vtx->set_flag(vtx_use_tc);
299       vtx->set_dest_swizzle({4,7,7,7});
300       emit_instruction(vtx);
301    }
302 }
303 
do_emit_store_deref(const nir_variable * out_var,nir_intrinsic_instr * instr)304 bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
305 {
306    if (out_var->data.location == FRAG_RESULT_COLOR)
307       return emit_export_pixel(out_var, instr, m_dual_source_blend ? 1 : m_max_color_exports);
308 
309    if ((out_var->data.location >= FRAG_RESULT_DATA0 &&
310         out_var->data.location <= FRAG_RESULT_DATA7) ||
311        out_var->data.location == FRAG_RESULT_DEPTH ||
312        out_var->data.location == FRAG_RESULT_STENCIL ||
313        out_var->data.location == FRAG_RESULT_SAMPLE_MASK)
314       return emit_export_pixel(out_var, instr, 1);
315 
316    sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_deref for " <<
317               out_var->data.location << "(" << out_var->data.driver_location << ")\n";
318    return false;
319 }
320 
do_process_outputs(nir_variable * output)321 bool FragmentShaderFromNir::do_process_outputs(nir_variable *output)
322 {
323    sfn_log << SfnLog::io << "Parse output variable "
324            << output->name << "  @" << output->data.location
325            << "@dl:" << output->data.driver_location
326            << " dual source idx: " << output->data.index
327            << "\n";
328 
329    ++sh_info().noutput;
330    r600_shader_io& io = sh_info().output[output->data.driver_location];
331    tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>( output->data.location),
332                                     &io.name, &io.sid);
333 
334    /* Check whether this code has become obsolete by the IO vectorization */
335    unsigned num_components = 4;
336    unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(output->type));
337    if (vector_elements)
338            num_components = vector_elements;
339    unsigned component = output->data.location_frac;
340 
341    for (unsigned j = component; j < num_components + component; j++)
342       io.write_mask |= 1 << j;
343 
344    int loc = output->data.location;
345    if (loc == FRAG_RESULT_COLOR &&
346        (m_nir.info.outputs_written & (1ull << loc)) &&
347        !m_dual_source_blend) {
348            sh_info().fs_write_all = true;
349    }
350 
351    if (output->data.location == FRAG_RESULT_COLOR ||
352        (output->data.location >= FRAG_RESULT_DATA0 &&
353         output->data.location <= FRAG_RESULT_DATA7))  {
354       ++m_max_counted_color_exports;
355 
356       if (m_max_counted_color_exports > 1)
357          sh_info().fs_write_all = false;
358       return true;
359    }
360    if (output->data.location == FRAG_RESULT_DEPTH ||
361        output->data.location == FRAG_RESULT_STENCIL ||
362        output->data.location == FRAG_RESULT_SAMPLE_MASK) {
363       io.write_mask = 15;
364       return true;
365    }
366 
367    return false;
368 }
369 
emit_load_sample_mask_in(nir_intrinsic_instr * instr)370 bool FragmentShaderFromNir::emit_load_sample_mask_in(nir_intrinsic_instr* instr)
371 {
372    auto dest = from_nir(instr->dest, 0);
373    assert(m_sample_id_reg);
374    assert(m_sample_mask_reg);
375 
376    emit_instruction(new AluInstruction(op2_lshl_int, dest, Value::one_i, m_sample_id_reg, EmitInstruction::last_write));
377    emit_instruction(new AluInstruction(op2_and_int, dest, dest, m_sample_mask_reg, EmitInstruction::last_write));
378    return true;
379 }
380 
emit_intrinsic_instruction_override(nir_intrinsic_instr * instr)381 bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
382 {
383    switch (instr->intrinsic) {
384    case nir_intrinsic_load_sample_mask_in:
385       if (m_apply_sample_mask) {
386          return emit_load_sample_mask_in(instr);
387       } else
388          return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
389    case nir_intrinsic_load_sample_id:
390       return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
391    case nir_intrinsic_load_front_face:
392       return load_preloaded_value(instr->dest, 0, m_front_face_reg);
393    case nir_intrinsic_interp_deref_at_sample:
394       return emit_interp_deref_at_sample(instr);
395    case nir_intrinsic_interp_deref_at_offset:
396       return emit_interp_deref_at_offset(instr);
397    case nir_intrinsic_interp_deref_at_centroid:
398       return emit_interp_deref_at_centroid(instr);
399    case nir_intrinsic_load_sample_pos:
400       return emit_load_sample_pos(instr);
401    case nir_intrinsic_load_helper_invocation:
402       return load_preloaded_value(instr->dest, 0, m_helper_invocation);
403    default:
404       return false;
405    }
406 }
407 
load_front_face()408 void FragmentShaderFromNir::load_front_face()
409 {
410    assert(m_front_face_reg);
411    if (m_front_face_loaded)
412       return;
413 
414    auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
415                                 Value::zero, {alu_write, alu_last_instr});
416    m_front_face_loaded = true;
417    emit_instruction(ir);
418 }
419 
emit_load_sample_pos(nir_intrinsic_instr * instr)420 bool FragmentShaderFromNir::emit_load_sample_pos(nir_intrinsic_instr* instr)
421 {
422    GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
423    auto fetch = new FetchInstruction(vc_fetch,
424                                      no_index_offset,
425                                      fmt_32_32_32_32_float,
426                                      vtx_nf_scaled,
427                                      vtx_es_none,
428                                      m_sample_id_reg,
429                                      dest,
430                                      0,
431                                      false,
432                                      0xf,
433                                      R600_BUFFER_INFO_CONST_BUFFER,
434                                      0,
435                                      bim_none,
436                                      false,
437                                      false,
438                                      0,
439                                      0,
440                                      0,
441                                      PValue(),
442                                      {0,1,2,3});
443    fetch->set_flag(vtx_srf_mode);
444    emit_instruction(fetch);
445    return true;
446 }
447 
emit_interp_deref_at_sample(nir_intrinsic_instr * instr)448 bool FragmentShaderFromNir::emit_interp_deref_at_sample(nir_intrinsic_instr* instr)
449 {
450    GPRVector slope = get_temp_vec4();
451 
452    auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
453                                      from_nir_with_fetch_constant(instr->src[1], 0),
454                                      0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
455    fetch->set_flag(vtx_srf_mode);
456    emit_instruction(fetch);
457 
458    GPRVector grad = get_temp_vec4();
459    auto var = get_deref_location(instr->src[0]);
460    assert(var);
461 
462    auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
463 
464    auto interpolator = m_interpolator[1];
465    assert(interpolator.enabled);
466    PValue dummy(new GPRValue(interpolator.i->sel(), 0));
467 
468    GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
469 
470    auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
471    tex->set_flag(TexInstruction::grad_fine);
472    tex->set_flag(TexInstruction::x_unnormalized);
473    tex->set_flag(TexInstruction::y_unnormalized);
474    tex->set_flag(TexInstruction::z_unnormalized);
475    tex->set_flag(TexInstruction::w_unnormalized);
476    tex->set_dest_swizzle({0,1,7,7});
477    emit_instruction(tex);
478 
479    tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
480    tex->set_flag(TexInstruction::x_unnormalized);
481    tex->set_flag(TexInstruction::y_unnormalized);
482    tex->set_flag(TexInstruction::z_unnormalized);
483    tex->set_flag(TexInstruction::w_unnormalized);
484    tex->set_flag(TexInstruction::grad_fine);
485    tex->set_dest_swizzle({7,7,0,1});
486    emit_instruction(tex);
487 
488    emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
489    emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
490 
491    emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write}));
492    emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write, alu_last_instr}));
493 
494    Interpolator ip = {true, 0, slope.reg_i(1), slope.reg_i(0)};
495 
496    auto dst = vec_from_nir(instr->dest, 4);
497    int num_components = instr->dest.is_ssa ?
498                            instr->dest.ssa.num_components:
499                            instr->dest.reg.reg->num_components;
500 
501    load_interpolated(dst, io, ip, num_components, var->data.location_frac);
502 
503    return true;
504 }
505 
emit_interp_deref_at_offset(nir_intrinsic_instr * instr)506 bool FragmentShaderFromNir::emit_interp_deref_at_offset(nir_intrinsic_instr* instr)
507 {
508    int temp = allocate_temp_register();
509 
510    GPRVector help(temp, {0,1,2,3});
511 
512    auto var = get_deref_location(instr->src[0]);
513    assert(var);
514 
515    auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
516    auto interpolator = m_interpolator[io.ij_index()];
517    PValue dummy(new GPRValue(interpolator.i->sel(), 0));
518 
519    GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
520 
521    auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
522    getgradh->set_dest_swizzle({0,1,7,7});
523    getgradh->set_flag(TexInstruction::x_unnormalized);
524    getgradh->set_flag(TexInstruction::y_unnormalized);
525    getgradh->set_flag(TexInstruction::z_unnormalized);
526    getgradh->set_flag(TexInstruction::w_unnormalized);
527    getgradh->set_flag(TexInstruction::grad_fine);
528    emit_instruction(getgradh);
529 
530    auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
531    getgradv->set_dest_swizzle({7,7,0,1});
532    getgradv->set_flag(TexInstruction::x_unnormalized);
533    getgradv->set_flag(TexInstruction::y_unnormalized);
534    getgradv->set_flag(TexInstruction::z_unnormalized);
535    getgradv->set_flag(TexInstruction::w_unnormalized);
536    getgradv->set_flag(TexInstruction::grad_fine);
537    emit_instruction(getgradv);
538 
539    PValue ofs_x = from_nir(instr->src[1], 0);
540    PValue ofs_y = from_nir(instr->src[1], 1);
541    emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
542    emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
543    emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write}));
544    emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write, alu_last_instr}));
545 
546    Interpolator ip = {true, 0, help.reg_i(1), help.reg_i(0)};
547 
548    auto dst = vec_from_nir(instr->dest, 4);
549    load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
550                      var->data.location_frac);
551 
552    return true;
553 }
554 
emit_interp_deref_at_centroid(nir_intrinsic_instr * instr)555 bool FragmentShaderFromNir::emit_interp_deref_at_centroid(nir_intrinsic_instr* instr)
556 {
557    auto var = get_deref_location(instr->src[0]);
558    assert(var);
559 
560    auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
561    io.set_uses_interpolate_at_centroid();
562 
563    int ij_index = io.ij_index() >= 3 ? 5 : 2;
564    assert (m_interpolator[ij_index].enabled);
565    auto ip = m_interpolator[ij_index];
566 
567    int num_components = nir_dest_num_components(instr->dest);
568 
569    auto dst = vec_from_nir(instr->dest, 4);
570    load_interpolated(dst, io, ip, num_components, var->data.location_frac);
571    return true;
572 }
573 
574 
do_emit_load_deref(const nir_variable * in_var,nir_intrinsic_instr * instr)575 bool FragmentShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr)
576 {
577    if (in_var->data.location == VARYING_SLOT_POS) {
578       assert(instr->dest.is_ssa);
579 
580       for (int i = 0; i < instr->dest.ssa.num_components; ++i) {
581          inject_register(instr->dest.ssa.index, i, m_frag_pos[i], true);
582       }
583       return true;
584    }
585 
586    if (in_var->data.location == VARYING_SLOT_FACE)
587       return load_preloaded_value(instr->dest, 0, m_front_face_reg);
588 
589    // todo: replace io with ShaderInputVarying
590    auto& io = m_shaderio.input(in_var->data.driver_location, in_var->data.location_frac);
591    unsigned num_components  = 4;
592 
593 
594    if (instr->dest.is_ssa) {
595       num_components = instr->dest.ssa.num_components;
596    } else {
597       num_components = instr->dest.reg.reg->num_components;
598    }
599 
600    auto dst = vec_from_nir(instr->dest, 4);
601 
602    sfn_log << SfnLog::io << "Set input[" << in_var->data.driver_location
603            << "].gpr=" << dst.sel()
604            << " interp=" << io.ij_index()
605            << "\n";
606 
607    io.set_gpr(dst.sel());
608 
609    auto& ip = io.interpolate() ? m_interpolator[io.ij_index()] : m_interpolator[0];
610 
611    load_interpolated(dst, io, ip, num_components, in_var->data.location_frac);
612 
613    /* These results are expected starting in slot x..*/
614    if (in_var->data.location_frac > 0) {
615       int n = instr->dest.is_ssa ? instr->dest.ssa.num_components :
616                                    instr->dest.reg.reg->num_components;
617       AluInstruction *ir = nullptr;
618       for (int i = 0; i < n; ++i) {
619          ir = new AluInstruction(op1_mov, dst[i],
620                                  dst[i + in_var->data.location_frac], {alu_write});
621          emit_instruction(ir);
622       }
623       if (ir)
624          ir->set_flag(alu_last_instr);
625    }
626 
627 
628    if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
629 
630       auto & color_input  = static_cast<ShaderInputColor&> (io);
631       auto& bgio = m_shaderio.input(color_input.back_color_input_index());
632 
633       bgio.set_gpr(allocate_temp_register());
634 
635       GPRVector bgcol(bgio.gpr(), {0,1,2,3});
636       load_interpolated(bgcol, bgio, ip, num_components, 0);
637 
638       load_front_face();
639 
640       AluInstruction *ir = nullptr;
641       for (unsigned i = 0; i < 4 ; ++i) {
642          ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
643          emit_instruction(ir);
644       }
645       if (ir)
646          ir->set_flag(alu_last_instr);
647    }
648 
649    return true;
650 }
651 
load_interpolated(GPRVector & dest,ShaderInput & io,const Interpolator & ip,int num_components,int start_comp)652 bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
653                                               ShaderInput& io, const Interpolator &ip,
654                                               int num_components, int start_comp)
655 {
656    // replace io with ShaderInputVarying
657    if (io.interpolate() > 0) {
658 
659       sfn_log << SfnLog::io << "Using Interpolator " << io.ij_index() << "\n";
660 
661       if (num_components == 1) {
662          switch (start_comp) {
663          case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
664          case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
665          case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
666          case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
667          default:
668             assert(0);
669          }
670       }
671 
672       if (num_components == 2) {
673          switch (start_comp) {
674          case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
675          case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
676          case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
677                   load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
678          default:
679             assert(0);
680          }
681       }
682 
683       if (num_components == 3 && start_comp == 0)
684          return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
685                load_interpolated_one_comp(dest, io, ip, op2_interp_z);
686 
687       int full_write_mask = ((1 << num_components) - 1) << start_comp;
688 
689       bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
690       success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
691       return success;
692 
693    } else {
694       AluInstruction *ir = nullptr;
695       for (unsigned i = 0; i < 4 ; ++i) {
696          ir = new AluInstruction(op1_interp_load_p0, dest[i],
697                                  PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
698                                  EmitInstruction::write);
699          emit_instruction(ir);
700       }
701       ir->set_flag(alu_last_instr);
702    }
703    return true;
704 }
705 
load_interpolated_one_comp(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op)706 bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
707                                                        ShaderInput& io, const Interpolator& ip, EAluOp op)
708 {
709    for (unsigned i = 0; i < 2 ; ++i) {
710       int chan = i;
711       if (op == op2_interp_z)
712          chan += 2;
713 
714 
715       auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
716                                    PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
717                                    i == 0  ? EmitInstruction::write : EmitInstruction::last);
718       dest.pin_to_channel(chan);
719 
720       ir->set_bank_swizzle(alu_vec_210);
721       emit_instruction(ir);
722    }
723    return true;
724 }
725 
load_interpolated_two_comp(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op,int writemask)726 bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
727                                                        const Interpolator& ip, EAluOp op, int writemask)
728 {
729    AluInstruction *ir = nullptr;
730    for (unsigned i = 0; i < 4 ; ++i) {
731       ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
732                               (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
733       dest.pin_to_channel(i);
734       ir->set_bank_swizzle(alu_vec_210);
735       emit_instruction(ir);
736    }
737    ir->set_flag(alu_last_instr);
738    return true;
739 }
740 
load_interpolated_two_comp_for_one(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op,UNUSED int start,int comp)741 bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
742                                                                ShaderInput& io, const Interpolator& ip,
743                                                                EAluOp op, UNUSED int start, int comp)
744 {
745    AluInstruction *ir = nullptr;
746    for (int i = 0; i <  4 ; ++i) {
747       ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
748                                    PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
749                                    i == comp ? EmitInstruction::write : EmitInstruction::empty);
750       ir->set_bank_swizzle(alu_vec_210);
751       dest.pin_to_channel(i);
752       emit_instruction(ir);
753    }
754    ir->set_flag(alu_last_instr);
755    return true;
756 }
757 
758 
emit_export_pixel(const nir_variable * out_var,nir_intrinsic_instr * instr,int outputs)759 bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, int outputs)
760 {
761    std::array<uint32_t,4> swizzle;
762    unsigned writemask = nir_intrinsic_write_mask(instr);
763    switch (out_var->data.location) {
764    case FRAG_RESULT_DEPTH:
765       writemask = 1;
766       swizzle = {0,7,7,7};
767       break;
768    case FRAG_RESULT_STENCIL:
769       writemask = 2;
770       swizzle = {7,0,7,7};
771       break;
772    case FRAG_RESULT_SAMPLE_MASK:
773       writemask = 4;
774       swizzle = {7,7,0,7};
775       break;
776    default:
777       for (int i = 0; i < 4; ++i) {
778          swizzle[i] = (i < instr->num_components) ? i : 7;
779       }
780    }
781 
782    auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
783 
784    set_output(out_var->data.driver_location, value.sel());
785 
786    if (out_var->data.location == FRAG_RESULT_COLOR ||
787        (out_var->data.location >= FRAG_RESULT_DATA0 &&
788         out_var->data.location <= FRAG_RESULT_DATA7)) {
789       for (int k = 0 ; k < outputs; ++k) {
790 
791          unsigned location = (m_dual_source_blend ? out_var->data.index : out_var->data.driver_location) + k - m_depth_exports;
792 
793          sfn_log << SfnLog::io << "Pixel output " << out_var->name << " at loc:" << location << "\n";
794 
795          if (location >= m_max_color_exports) {
796             sfn_log << SfnLog::io << "Pixel output loc:" << location
797                     << " dl:" << out_var->data.location
798                     << " skipped  because  we have only "   << m_max_color_exports << " CBs\n";
799             continue;
800          }
801 
802          m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
803 
804          if (sh_info().ps_export_highest < location)
805             sh_info().ps_export_highest = location;
806 
807          sh_info().nr_ps_color_exports++;
808 
809          unsigned mask = (0xfu << (location * 4));
810          sh_info().ps_color_export_mask |= mask;
811 
812          emit_export_instruction(m_last_pixel_export);
813       };
814    } else if (out_var->data.location == FRAG_RESULT_DEPTH ||
815               out_var->data.location == FRAG_RESULT_STENCIL ||
816               out_var->data.location == FRAG_RESULT_SAMPLE_MASK) {
817       m_depth_exports++;
818       emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
819    } else {
820       return false;
821    }
822    return true;
823 }
824 
do_finalize()825 void FragmentShaderFromNir::do_finalize()
826 {
827    // update shader io info and set LDS etc.
828    sh_info().ninput = m_shaderio.inputs().size();
829 
830    sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
831    for (size_t i = 0; i < sh_info().ninput; ++i) {
832       int ij_idx = (m_shaderio.input(i).ij_index() < 6 &&
833                     m_shaderio.input(i).ij_index() >= 0) ? m_shaderio.input(i).ij_index() : 0;
834       m_shaderio.input(i).set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
835    }
836 
837    sh_info().two_side = m_shaderio.two_sided();
838    sh_info().nlds = m_shaderio.nlds();
839 
840    sh_info().nr_ps_max_color_exports = m_max_counted_color_exports;
841 
842    if (sh_info().fs_write_all) {
843       sh_info().nr_ps_max_color_exports = m_max_color_exports;
844    }
845 
846    if (!m_last_pixel_export) {
847       GPRVector v(0, {7,7,7,7});
848       m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
849       sh_info().nr_ps_color_exports++;
850       sh_info().ps_color_export_mask = 0xf;
851       emit_export_instruction(m_last_pixel_export);
852    }
853 
854    m_last_pixel_export->set_last();
855 
856    if (sh_info().fs_write_all)
857       sh_info().nr_ps_max_color_exports = 8;
858 }
859 
860 }
861