1 #include "sfn_vertexstageexport.h"
2 
3 #include "sfn_shaderio.h"
4 
5 namespace r600 {
6 
7 using std::priority_queue;
8 
VertexStageExportBase(VertexStage & proc)9 VertexStageExportBase::VertexStageExportBase(VertexStage& proc):
10    m_proc(proc),
11    m_cur_clip_pos(1),
12    m_cur_param(0)
13 {
14 
15 }
16 
~VertexStageExportBase()17 VertexStageExportBase::~VertexStageExportBase()
18 {
19 
20 }
21 
VertexStageExportForFS(VertexStage & proc,const pipe_stream_output_info * so_info,r600_pipe_shader * pipe_shader,const r600_shader_key & key)22 VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc,
23                                                const pipe_stream_output_info *so_info,
24                                                r600_pipe_shader *pipe_shader, const r600_shader_key &key):
25    VertexStageExportBase(proc),
26    m_last_param_export(nullptr),
27    m_last_pos_export(nullptr),
28    m_num_clip_dist(0),
29    m_enabled_stream_buffers_mask(0),
30    m_so_info(so_info),
31    m_pipe_shader(pipe_shader),
32    m_key(key)
33 {
34 }
35 
do_process_outputs(nir_variable * output)36 bool VertexStageExportBase::do_process_outputs(nir_variable *output)
37 {
38    if (output->data.location == VARYING_SLOT_COL0 ||
39        output->data.location == VARYING_SLOT_COL1 ||
40        (output->data.location >= VARYING_SLOT_VAR0 &&
41        output->data.location <= VARYING_SLOT_VAR31) ||
42        (output->data.location >= VARYING_SLOT_TEX0 &&
43         output->data.location <= VARYING_SLOT_TEX7) ||
44        output->data.location == VARYING_SLOT_BFC0 ||
45        output->data.location == VARYING_SLOT_BFC1 ||
46        output->data.location == VARYING_SLOT_CLIP_VERTEX ||
47        output->data.location == VARYING_SLOT_CLIP_DIST0 ||
48        output->data.location == VARYING_SLOT_CLIP_DIST1 ||
49        output->data.location == VARYING_SLOT_POS ||
50        output->data.location == VARYING_SLOT_PSIZ ||
51        output->data.location == VARYING_SLOT_FOGC ||
52        output->data.location == VARYING_SLOT_LAYER ||
53        output->data.location == VARYING_SLOT_EDGE ||
54        output->data.location == VARYING_SLOT_VIEWPORT
55        ) {
56 
57       r600_shader_io& io = m_proc.sh_info().output[output->data.driver_location];
58       auto semantic = r600_get_varying_semantic(output->data.location);
59       io.name = semantic.first;
60       io.sid = semantic.second;
61 
62       m_proc.evaluate_spi_sid(io);
63       io.write_mask = ((1 << glsl_get_components(output->type)) - 1)
64                       << output->data.location_frac;
65       ++m_proc.sh_info().noutput;
66 
67       if (output->data.location == VARYING_SLOT_PSIZ ||
68           output->data.location == VARYING_SLOT_EDGE ||
69           output->data.location == VARYING_SLOT_LAYER) // VIEWPORT?
70             m_cur_clip_pos = 2;
71 
72       if (output->data.location != VARYING_SLOT_POS &&
73           output->data.location != VARYING_SLOT_EDGE &&
74           output->data.location != VARYING_SLOT_PSIZ &&
75           output->data.location != VARYING_SLOT_CLIP_VERTEX)
76          m_param_map[output->data.location] = m_cur_param++;
77 
78       return true;
79    }
80    return false;
81 }
82 
83 
store_deref(const nir_variable * out_var,nir_intrinsic_instr * instr)84 bool VertexStageExportForFS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
85 {
86 
87    switch (out_var->data.location) {
88    case VARYING_SLOT_PSIZ:
89       m_proc.sh_info().vs_out_point_size = 1;
90       m_proc.sh_info().vs_out_misc_write = 1;
91       /* fallthrough */
92    case VARYING_SLOT_POS:
93       return emit_varying_pos(out_var, instr);
94    case VARYING_SLOT_EDGE: {
95       std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
96       return emit_varying_pos(out_var, instr, &swizzle_override);
97    }
98    case VARYING_SLOT_VIEWPORT: {
99       std::array<uint32_t, 4> swizzle_override = {7, 7, 7, 0};
100       return emit_varying_pos(out_var, instr, &swizzle_override) &&
101             emit_varying_param(out_var, instr);
102    }
103    case VARYING_SLOT_CLIP_VERTEX:
104       return emit_clip_vertices(out_var, instr);
105    case VARYING_SLOT_CLIP_DIST0:
106    case VARYING_SLOT_CLIP_DIST1:
107       m_num_clip_dist += 4;
108       return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr);
109    case VARYING_SLOT_LAYER: {
110       m_proc.sh_info().vs_out_misc_write = 1;
111       m_proc.sh_info().vs_out_layer = 1;
112       std::array<uint32_t, 4> swz = {7,7,0,7};
113       return emit_varying_pos(out_var, instr, &swz) &&
114             emit_varying_param(out_var, instr);
115    }
116    case VARYING_SLOT_VIEW_INDEX:
117       return emit_varying_pos(out_var, instr) &&
118             emit_varying_param(out_var, instr);
119 
120    default:
121          return emit_varying_param(out_var, instr);
122    }
123 
124    fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
125            out_var->data.location);
126    return false;
127 }
128 
emit_varying_pos(const nir_variable * out_var,nir_intrinsic_instr * instr,std::array<uint32_t,4> * swizzle_override)129 bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
130                                               std::array<uint32_t, 4> *swizzle_override)
131 {
132    std::array<uint32_t,4> swizzle;
133    uint32_t write_mask = 0;
134 
135    if (swizzle_override) {
136       swizzle = *swizzle_override;
137       for (int i = 0; i < 4; ++i) {
138          if (swizzle[i] < 6)
139             write_mask |= 1 << i;
140       }
141    } else {
142       write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
143       for (int i = 0; i < 4; ++i)
144          swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
145    }
146 
147    m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
148 
149    GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
150    m_proc.set_output(out_var->data.driver_location, value.sel());
151 
152    int export_slot = 0;
153 
154    switch (out_var->data.location) {
155    case VARYING_SLOT_EDGE: {
156       m_proc.sh_info().vs_out_misc_write = 1;
157       m_proc.sh_info().vs_out_edgeflag = 1;
158       m_proc.emit_instruction(op1_mov, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
159       m_proc.emit_instruction(op1_flt_to_int, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_last_instr});
160       m_proc.sh_info().output[out_var->data.driver_location].write_mask = 0xf;
161    }
162       /* fallthrough */
163    case VARYING_SLOT_PSIZ:
164    case VARYING_SLOT_LAYER:
165       export_slot = 1;
166       break;
167    case VARYING_SLOT_VIEWPORT:
168       m_proc.sh_info().vs_out_misc_write = 1;
169       m_proc.sh_info().vs_out_viewport = 1;
170       export_slot = 1;
171       break;
172    case VARYING_SLOT_POS:
173       break;
174    case VARYING_SLOT_CLIP_DIST0:
175    case VARYING_SLOT_CLIP_DIST1:
176       export_slot = m_cur_clip_pos++;
177       break;
178    default:
179       sfn_log << SfnLog::err << __func__ << "Unsupported location "
180               << out_var->data.location << "\n";
181       return false;
182    }
183 
184    m_last_pos_export = new ExportInstruction(export_slot, value, ExportInstruction::et_pos);
185    m_proc.emit_export_instruction(m_last_pos_export);
186    m_proc.add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr());
187    return true;
188 }
189 
emit_varying_param(const nir_variable * out_var,nir_intrinsic_instr * instr)190 bool VertexStageExportForFS::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr)
191 {
192    assert(out_var->data.driver_location < m_proc.sh_info().noutput);
193    sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n";
194 
195    int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
196    std::array<uint32_t,4> swizzle;
197    for (int i = 0; i < 4; ++i)
198       swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
199 
200    m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
201 
202    GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle, true);
203    m_proc.sh_info().output[out_var->data.driver_location].gpr = value.sel();
204 
205    /* This should use the registers!! */
206    m_proc.set_output(out_var->data.driver_location, value.sel());
207 
208    auto param_loc = m_param_map.find(out_var->data.location);
209    assert(param_loc != m_param_map.end());
210 
211    m_last_param_export = new ExportInstruction(param_loc->second, value, ExportInstruction::et_param);
212    m_proc.emit_export_instruction(m_last_param_export);
213    m_proc.add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr());
214    return true;
215 }
216 
emit_clip_vertices(const nir_variable * out_var,nir_intrinsic_instr * instr)217 bool VertexStageExportForFS::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr)
218 {
219    m_proc.sh_info().cc_dist_mask = 0xff;
220    m_proc.sh_info().clip_dist_write = 0xff;
221 
222    m_clip_vertex = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3});
223    m_proc.add_param_output_reg(out_var->data.driver_location, &m_clip_vertex);
224 
225    for (int i = 0; i < 4; ++i)
226       m_proc.sh_info().output[out_var->data.driver_location].write_mask |= 1 << i;
227 
228    GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()};
229 
230    for (int i = 0; i < 8; i++) {
231       int oreg = i >> 2;
232       int ochan = i & 3;
233       AluInstruction *ir = nullptr;
234       for (int j = 0; j < 4; j++) {
235          ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), m_clip_vertex.reg_i(j),
236                                  PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
237                                  (j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
238          m_proc.emit_instruction(ir);
239       }
240       ir->set_flag(alu_last_instr);
241    }
242 
243    m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos);
244    m_proc.emit_export_instruction(m_last_pos_export);
245 
246    m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos);
247    m_proc.emit_export_instruction(m_last_pos_export);
248 
249    return true;
250 }
251 
finalize_exports()252 void VertexStageExportForFS::finalize_exports()
253 {
254    if (m_key.vs.as_gs_a) {
255       PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
256       GPRVector primid({m_proc.primitive_id(), o,o,o});
257       m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param);
258       m_proc.emit_export_instruction(m_last_param_export);
259       int i;
260       i = m_proc.sh_info().noutput++;
261       auto& io = m_proc.sh_info().output[i];
262       io.name = TGSI_SEMANTIC_PRIMID;
263       io.sid = 0;
264       io.gpr = 0;
265       io.interpolate = TGSI_INTERPOLATE_CONSTANT;
266       io.write_mask = 0x1;
267       io.spi_sid = m_key.vs.prim_id_out;
268       m_proc.sh_info().vs_as_gs_a = 1;
269    }
270 
271    if (m_so_info && m_so_info->num_outputs)
272       emit_stream(-1);
273 
274    m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask;
275 
276    if (!m_last_param_export) {
277       GPRVector value(0,{7,7,7,7});
278       m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param);
279       m_proc.emit_export_instruction(m_last_param_export);
280    }
281    m_last_param_export->set_last();
282 
283    if (!m_last_pos_export) {
284       GPRVector value(0,{7,7,7,7});
285       m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos);
286       m_proc.emit_export_instruction(m_last_pos_export);
287    }
288    m_last_pos_export->set_last();
289 }
290 
emit_stream(int stream)291 bool VertexStageExportForFS::emit_stream(int stream)
292 {
293    assert(m_so_info);
294    if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
295            R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
296            return false;
297    }
298    for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
299            if (m_so_info->output[i].output_buffer >= 4) {
300                    R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
301                             m_so_info->output[i].output_buffer);
302                    return false;
303            }
304    }
305    const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
306    unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
307    std::vector<GPRVector> tmp(m_so_info->num_outputs);
308 
309    /* Initialize locations where the outputs are stored. */
310    for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
311       if (stream != -1 && stream != m_so_info->output[i].stream)
312          continue;
313 
314       sfn_log << SfnLog::instr << "Emit stream " << i
315               << " with register index " << m_so_info->output[i].register_index << "  so_gpr:";
316 
317 
318       so_gpr[i] = m_proc.output_register(m_so_info->output[i].register_index);
319 
320       if (!so_gpr[i]) {
321          sfn_log << SfnLog::err << "\nERR: register index "
322                  << m_so_info->output[i].register_index
323                  << " doesn't correspond to an output register\n";
324          return false;
325       }
326       start_comp[i] = m_so_info->output[i].start_component;
327       /* Lower outputs with dst_offset < start_component.
328        *
329        * We can only output 4D vectors with a write mask, e.g. we can
330        * only output the W component at offset 3, etc. If we want
331        * to store Y, Z, or W at buffer offset 0, we need to use MOV
332        * to move it to X and output X. */
333       if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) {
334          int tmp_index = m_proc.allocate_temp_register();
335          int sc = m_so_info->output[i].start_component;
336          AluInstruction *alu = nullptr;
337          for (int j = 0; j < m_so_info->output[i].num_components; j++) {
338             PValue dst(new GPRValue(tmp_index, j));
339             alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write});
340             tmp[i].set_reg_i(j, dst);
341             m_proc.emit_instruction(alu);
342          }
343          if (alu)
344             alu->set_flag(alu_last_instr);
345 
346          /* Fill the vector with masked values */
347          PValue dst_blank(new GPRValue(tmp_index, 7));
348          for (int j = m_so_info->output[i].num_components; j < 4; j++)
349             tmp[i].set_reg_i(j, dst_blank);
350 
351          start_comp[i] = 0;
352          so_gpr[i] = &tmp[i];
353       }
354       sfn_log << SfnLog::instr <<  *so_gpr[i] << "\n";
355    }
356 
357    /* Write outputs to buffers. */
358    for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
359       sfn_log << SfnLog::instr << "Write output buffer " << i
360               << " with register index " << m_so_info->output[i].register_index << "\n";
361 
362       StreamOutIntruction *out_stream =
363             new StreamOutIntruction(*so_gpr[i],
364                                     m_so_info->output[i].num_components,
365                                     m_so_info->output[i].dst_offset - start_comp[i],
366                                     ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
367                                     m_so_info->output[i].output_buffer,
368                                     m_so_info->output[i].stream);
369       m_proc.emit_export_instruction(out_stream);
370       m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
371    }
372    return true;
373 }
374 
375 
VertexStageExportForGS(VertexStage & proc,const r600_shader * gs_shader)376 VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc,
377                                                const r600_shader *gs_shader):
378    VertexStageExportBase(proc),
379    m_num_clip_dist(0),
380    m_gs_shader(gs_shader)
381 {
382 
383 }
384 
store_deref(const nir_variable * out_var,nir_intrinsic_instr * instr)385 bool VertexStageExportForGS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
386 {
387 
388    int ring_offset = -1;
389    const r600_shader_io& out_io = m_proc.sh_info().output[out_var->data.driver_location];
390 
391    sfn_log << SfnLog::io << "check output " << out_var->data.driver_location
392            << " name=" << out_io.name<< " sid=" << out_io.sid << "\n";
393    for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
394       auto& in_io = m_gs_shader->input[k];
395       sfn_log << SfnLog::io << "  against  " <<  k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
396 
397       if (in_io.name == out_io.name &&
398           in_io.sid == out_io.sid) {
399          ring_offset = in_io.ring_offset;
400          break;
401       }
402    }
403 
404    if (out_var->data.location == VARYING_SLOT_VIEWPORT) {
405       m_proc.sh_info().vs_out_viewport = 1;
406       m_proc.sh_info().vs_out_misc_write = 1;
407       return true;
408    }
409 
410    if (ring_offset == -1) {
411       sfn_log << SfnLog::err << "VS defines output at "
412               << out_var->data.driver_location << "name=" << out_io.name
413               << " sid=" << out_io.sid << " that is not consumed as GS input\n";
414       return true;
415    }
416 
417    uint32_t write_mask =  (1 << instr->num_components) - 1;
418 
419    GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask,
420          swizzle_from_comps(instr->num_components), true);
421 
422    auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, value,
423                                       ring_offset >> 2, 4, PValue());
424    m_proc.emit_export_instruction(ir);
425 
426    m_proc.sh_info().output[out_var->data.driver_location].write_mask |= write_mask;
427    if (out_var->data.location == VARYING_SLOT_CLIP_DIST0 ||
428        out_var->data.location == VARYING_SLOT_CLIP_DIST1)
429       m_num_clip_dist += 4;
430 
431    return true;
432 }
433 
finalize_exports()434 void VertexStageExportForGS::finalize_exports()
435 {
436 
437 }
438 
VertexStageExportForES(VertexStage & proc)439 VertexStageExportForES::VertexStageExportForES(VertexStage& proc):
440    VertexStageExportBase(proc)
441 {
442 }
443 
store_deref(const nir_variable * out_var,nir_intrinsic_instr * instr)444 bool VertexStageExportForES::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
445 {
446    return true;
447 }
448 
finalize_exports()449 void VertexStageExportForES::finalize_exports()
450 {
451 
452 }
453 
454 }
455