1 #include "sfn_emitssboinstruction.h"
2 
3 #include "sfn_instruction_fetch.h"
4 #include "sfn_instruction_gds.h"
5 #include "sfn_instruction_misc.h"
6 #include "sfn_instruction_tex.h"
7 #include "../r600_pipe.h"
8 #include "../r600_asm.h"
9 
10 namespace r600 {
11 
12 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
13 
EmitSSBOInstruction(ShaderFromNirProcessor & processor)14 EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
15    EmitInstruction(processor),
16    m_require_rat_return_address(false),
17    m_ssbo_image_offset(0)
18 {
19 }
20 
set_ssbo_offset(int offset)21 void EmitSSBOInstruction::set_ssbo_offset(int offset)
22 {
23    m_ssbo_image_offset = offset;
24 }
25 
26 
set_require_rat_return_address()27 void EmitSSBOInstruction::set_require_rat_return_address()
28 {
29    m_require_rat_return_address = true;
30 }
31 
32 bool
load_rat_return_address()33 EmitSSBOInstruction::load_rat_return_address()
34 {
35    if (m_require_rat_return_address) {
36       m_rat_return_address = get_temp_vec4();
37       emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
38       emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
39       emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
40                                           literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
41       emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
42                                           m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
43       {alu_write, alu_last_instr}));
44       m_require_rat_return_address = false;
45    }
46    return true;
47 }
48 
49 
do_emit(nir_instr * instr)50 bool EmitSSBOInstruction::do_emit(nir_instr* instr)
51 {
52    const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
53    switch (intr->intrinsic) {
54    case nir_intrinsic_atomic_counter_add:
55    case nir_intrinsic_atomic_counter_and:
56    case nir_intrinsic_atomic_counter_exchange:
57    case nir_intrinsic_atomic_counter_max:
58    case nir_intrinsic_atomic_counter_min:
59    case nir_intrinsic_atomic_counter_or:
60    case nir_intrinsic_atomic_counter_xor:
61    case nir_intrinsic_atomic_counter_comp_swap:
62       return emit_atomic(intr);
63    case nir_intrinsic_atomic_counter_read:
64    case nir_intrinsic_atomic_counter_post_dec:
65       return emit_unary_atomic(intr);
66    case nir_intrinsic_atomic_counter_inc:
67       return emit_atomic_inc(intr);
68    case nir_intrinsic_atomic_counter_pre_dec:
69       return emit_atomic_pre_dec(intr);
70    case nir_intrinsic_load_ssbo:
71        return emit_load_ssbo(intr);
72    case nir_intrinsic_store_ssbo:
73       return emit_store_ssbo(intr);
74    case nir_intrinsic_ssbo_atomic_add:
75    case nir_intrinsic_ssbo_atomic_comp_swap:
76    case nir_intrinsic_ssbo_atomic_or:
77    case nir_intrinsic_ssbo_atomic_xor:
78    case nir_intrinsic_ssbo_atomic_imax:
79    case nir_intrinsic_ssbo_atomic_imin:
80    case nir_intrinsic_ssbo_atomic_umax:
81    case nir_intrinsic_ssbo_atomic_umin:
82    case nir_intrinsic_ssbo_atomic_and:
83    case nir_intrinsic_ssbo_atomic_exchange:
84       return emit_ssbo_atomic_op(intr);
85    case nir_intrinsic_image_store:
86       return emit_image_store(intr);
87    case nir_intrinsic_image_load:
88    case nir_intrinsic_image_atomic_add:
89    case nir_intrinsic_image_atomic_and:
90    case nir_intrinsic_image_atomic_or:
91    case nir_intrinsic_image_atomic_xor:
92    case nir_intrinsic_image_atomic_exchange:
93    case nir_intrinsic_image_atomic_comp_swap:
94    case nir_intrinsic_image_atomic_umin:
95    case nir_intrinsic_image_atomic_umax:
96    case nir_intrinsic_image_atomic_imin:
97    case nir_intrinsic_image_atomic_imax:
98       return emit_image_load(intr);
99    case nir_intrinsic_image_size:
100       return emit_image_size(intr);
101    case nir_intrinsic_get_ssbo_size:
102       return emit_buffer_size(intr);
103    case nir_intrinsic_memory_barrier:
104    case nir_intrinsic_memory_barrier_image:
105    case nir_intrinsic_memory_barrier_buffer:
106    case nir_intrinsic_group_memory_barrier:
107       return make_stores_ack_and_waitack();
108    default:
109       return false;
110    }
111 }
112 
emit_atomic(const nir_intrinsic_instr * instr)113 bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr)
114 {
115    ESDOp op = get_opcode(instr->intrinsic);
116 
117    if (DS_OP_INVALID == op)
118       return false;
119 
120    GPRVector dest = make_dest(instr);
121 
122    int base = remap_atomic_base(nir_intrinsic_base(instr));
123 
124    PValue uav_id = from_nir(instr->src[0], 0);
125 
126    PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
127 
128    GDSInstr *ir = nullptr;
129    if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap)  {
130       PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0);
131       ir = new GDSInstr(op, dest, value, value2, uav_id, base);
132    } else {
133       ir = new GDSInstr(op, dest, value, uav_id, base);
134    }
135 
136    emit_instruction(ir);
137    return true;
138 }
139 
emit_unary_atomic(const nir_intrinsic_instr * instr)140 bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr)
141 {
142    ESDOp op = get_opcode(instr->intrinsic);
143 
144    if (DS_OP_INVALID == op)
145       return false;
146 
147    GPRVector dest = make_dest(instr);
148 
149    PValue uav_id = from_nir(instr->src[0], 0);
150 
151    auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr)));
152 
153    emit_instruction(ir);
154    return true;
155 }
156 
get_opcode(const nir_intrinsic_op opcode)157 ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode)
158 {
159    switch (opcode) {
160    case nir_intrinsic_atomic_counter_add:
161       return DS_OP_ADD_RET;
162    case nir_intrinsic_atomic_counter_and:
163       return DS_OP_AND_RET;
164    case nir_intrinsic_atomic_counter_exchange:
165       return DS_OP_XCHG_RET;
166    case nir_intrinsic_atomic_counter_inc:
167       return DS_OP_INC_RET;
168    case nir_intrinsic_atomic_counter_max:
169       return DS_OP_MAX_UINT_RET;
170    case nir_intrinsic_atomic_counter_min:
171       return DS_OP_MIN_UINT_RET;
172    case nir_intrinsic_atomic_counter_or:
173       return DS_OP_OR_RET;
174    case nir_intrinsic_atomic_counter_read:
175       return DS_OP_READ_RET;
176    case nir_intrinsic_atomic_counter_xor:
177       return DS_OP_XOR_RET;
178    case nir_intrinsic_atomic_counter_post_dec:
179       return DS_OP_DEC_RET;
180    case nir_intrinsic_atomic_counter_comp_swap:
181       return DS_OP_CMP_XCHG_RET;
182    case nir_intrinsic_atomic_counter_pre_dec:
183    default:
184       return DS_OP_INVALID;
185    }
186 }
187 
188 RatInstruction::ERatOp
get_rat_opcode(const nir_intrinsic_op opcode,pipe_format format) const189 EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
190 {
191    switch (opcode) {
192    case nir_intrinsic_ssbo_atomic_add:
193    case nir_intrinsic_image_atomic_add:
194       return RatInstruction::ADD_RTN;
195    case nir_intrinsic_ssbo_atomic_and:
196    case nir_intrinsic_image_atomic_and:
197       return RatInstruction::AND_RTN;
198    case nir_intrinsic_ssbo_atomic_exchange:
199    case nir_intrinsic_image_atomic_exchange:
200       return RatInstruction::XCHG_RTN;
201    case nir_intrinsic_ssbo_atomic_or:
202    case nir_intrinsic_image_atomic_or:
203       return RatInstruction::OR_RTN;
204    case nir_intrinsic_ssbo_atomic_imin:
205    case nir_intrinsic_image_atomic_imin:
206       return RatInstruction::MIN_INT_RTN;
207    case nir_intrinsic_ssbo_atomic_imax:
208    case nir_intrinsic_image_atomic_imax:
209       return RatInstruction::MAX_INT_RTN;
210    case nir_intrinsic_ssbo_atomic_umin:
211    case nir_intrinsic_image_atomic_umin:
212       return RatInstruction::MIN_UINT_RTN;
213    case nir_intrinsic_ssbo_atomic_umax:
214    case nir_intrinsic_image_atomic_umax:
215       return RatInstruction::MAX_UINT_RTN;
216    case nir_intrinsic_ssbo_atomic_xor:
217    case nir_intrinsic_image_atomic_xor:
218       return RatInstruction::XOR_RTN;
219    case nir_intrinsic_ssbo_atomic_comp_swap:
220    case nir_intrinsic_image_atomic_comp_swap:
221       if (util_format_is_float(format))
222          return RatInstruction::CMPXCHG_FLT_RTN;
223       else
224          return RatInstruction::CMPXCHG_INT_RTN;
225    case nir_intrinsic_image_load:
226       return RatInstruction::NOP_RTN;
227    default:
228       unreachable("Unsupported RAT instruction");
229    }
230 }
231 
232 
emit_atomic_add(const nir_intrinsic_instr * instr)233 bool EmitSSBOInstruction::emit_atomic_add(const nir_intrinsic_instr* instr)
234 {
235    GPRVector dest = make_dest(instr);
236 
237    PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
238 
239    PValue uav_id = from_nir(instr->src[0], 0);
240 
241    auto ir = new GDSInstr(DS_OP_ADD_RET, dest, value, uav_id,
242                           remap_atomic_base(nir_intrinsic_base(instr)));
243 
244    emit_instruction(ir);
245    return true;
246 }
247 
load_atomic_inc_limits()248 bool EmitSSBOInstruction::load_atomic_inc_limits()
249 {
250    m_atomic_update = get_temp_register();
251    m_atomic_update->set_keep_alive();
252    emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
253    {alu_write, alu_last_instr}));
254    return true;
255 }
256 
emit_atomic_inc(const nir_intrinsic_instr * instr)257 bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
258 {
259    PValue uav_id = from_nir(instr->src[0], 0);
260    GPRVector dest = make_dest(instr);
261    auto ir = new GDSInstr(DS_OP_ADD_RET, dest, m_atomic_update, uav_id,
262                           remap_atomic_base(nir_intrinsic_base(instr)));
263    emit_instruction(ir);
264    return true;
265 }
266 
emit_atomic_pre_dec(const nir_intrinsic_instr * instr)267 bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
268 {
269    GPRVector dest = make_dest(instr);
270 
271    PValue uav_id = from_nir(instr->src[0], 0);
272 
273    auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
274                           remap_atomic_base(nir_intrinsic_base(instr)));
275    emit_instruction(ir);
276 
277    emit_instruction(new AluInstruction(op2_sub_int,  dest.x(), dest.x(), literal(1), last_write));
278 
279    return true;
280 }
281 
emit_load_ssbo(const nir_intrinsic_instr * instr)282 bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr)
283 {
284    GPRVector dest = make_dest(instr);
285 
286    /** src0 not used, should be some offset */
287    auto addr = from_nir(instr->src[1], 0);
288    PValue addr_temp = create_register_from_nir_src(instr->src[1], 1);
289 
290    /** Should be lowered in nir */
291    emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))},
292                     {alu_write, alu_last_instr}));
293 
294    const EVTXDataFormat formats[4] = {
295       fmt_32,
296       fmt_32_32,
297       fmt_32_32_32,
298       fmt_32_32_32_32
299    };
300 
301    const std::array<int,4> dest_swt[4] = {
302       {0,7,7,7},
303       {0,1,7,7},
304       {0,1,2,7},
305       {0,1,2,3}
306    };
307 
308    /* TODO fix resource index */
309    auto ir = new FetchInstruction(dest, addr_temp,
310                                   R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset
311                                   , from_nir(instr->src[0], 0),
312                                   formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int);
313    ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]);
314    ir->set_flag(vtx_use_tc);
315 
316    emit_instruction(ir);
317    return true;
318 }
319 
emit_store_ssbo(const nir_intrinsic_instr * instr)320 bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
321 {
322 
323    GPRVector::Swizzle swz = {7,7,7,7};
324    for (unsigned i = 0; i <  nir_src_num_components(instr->src[0]); ++i)
325       swz[i] = i;
326 
327    auto orig_addr = from_nir(instr->src[2], 0);
328 
329    int temp1 = allocate_temp_register();
330    GPRVector addr_vec(temp1, {0,1,2,7});
331 
332    auto temp2 = get_temp_vec4();
333 
334    auto rat_id = from_nir(instr->src[1], 0);
335 
336    emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr,
337                                        PValue(new LiteralValue(2)), write));
338    emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write));
339    emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write));
340 
341 
342 //#define WRITE_AS_VECTOR
343 #ifdef WRITE_AS_VECTOR
344    std::unique_ptr<GPRVector> value(vec_from_nir_with_fetch_constant(instr->src[0],
345                                     (1 << instr->src[0].ssa->num_components) - 1, swz));
346 
347    /* TODO fix resource index */
348    int nelements = instr->src[0].ssa->num_components - 1;
349    if (nelements == 2)
350       nelements = 3;
351    auto ir = new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED,
352                                 *value, addr_vec, 0, rat_id, 11,
353                                 (1 << instr->src[0].ssa->num_components) - 1,
354                                 0, false);
355    emit_instruction(ir);
356 #else
357 
358    auto values = vec_from_nir_with_fetch_constant(instr->src[0],
359          (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true);
360 
361    auto cf_op = cf_mem_rat;
362    //auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
363    auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
364                                    values, addr_vec, m_ssbo_image_offset, rat_id, 1,
365                                    1, 0, false);
366    emit_instruction(store);
367    m_store_ops.push_back(store);
368 
369    for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) {
370       emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), write));
371       emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
372                                           {addr_vec.reg_i(0), Value::one_i}, last_write));
373       store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
374                                  temp2, addr_vec, 0, rat_id, 1,
375                                  1, 0, false);
376       emit_instruction(store);
377       if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT))
378          m_store_ops.push_back(store);
379    }
380 #endif
381    return true;
382 }
383 
384 bool
emit_image_store(const nir_intrinsic_instr * intrin)385 EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
386 {
387    int imageid = 0;
388    PValue image_offset;
389 
390    if (nir_src_is_const(intrin->src[0]))
391       imageid = nir_src_as_int(intrin->src[0]);
392    else
393       image_offset = from_nir(intrin->src[0], 0);
394 
395    auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
396    auto undef = from_nir(intrin->src[2], 0);
397    auto value = vec_from_nir_with_fetch_constant(intrin->src[3],  0xf, {0,1,2,3});
398    auto unknown  = from_nir(intrin->src[4], 0);
399 
400    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
401        nir_intrinsic_image_array(intrin)) {
402       emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
403       emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
404    }
405 
406    auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
407    auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid,
408                                    image_offset, 1, 0xf, 0, false);
409 
410    //if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT))
411       m_store_ops.push_back(store);
412 
413    emit_instruction(store);
414    return true;
415 }
416 
417 bool
emit_ssbo_atomic_op(const nir_intrinsic_instr * intrin)418 EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
419 {
420    int imageid = 0;
421    PValue image_offset;
422 
423    if (nir_src_is_const(intrin->src[0]))
424       imageid = nir_src_as_int(intrin->src[0]);
425    else
426       image_offset = from_nir(intrin->src[0], 0);
427 
428    auto opcode = EmitSSBOInstruction::get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
429 
430 
431    auto coord_orig =  from_nir(intrin->src[1], 0, 0);
432    auto coord = get_temp_register(0);
433 
434    emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write));
435 
436    if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
437       emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
438                                           from_nir(intrin->src[3], 0), {alu_write}));
439       // TODO: cayman wants channel 2 here
440       emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(3),
441                                           from_nir(intrin->src[2], 0), {alu_last_instr, alu_write}));
442    } else {
443       emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
444                                           from_nir(intrin->src[2], 0), {alu_write}));
445       emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
446    }
447 
448 
449    GPRVector out_vec({coord, coord, coord, coord});
450 
451    auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset,
452                                    image_offset, 1, 0xf, 0, true);
453    emit_instruction(atomic);
454    emit_instruction(new WaitAck(0));
455 
456    GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
457    auto fetch = new FetchInstruction(vc_fetch,
458                                      no_index_offset,
459                                      fmt_32,
460                                      vtx_nf_int,
461                                      vtx_es_none,
462                                      m_rat_return_address.reg_i(1),
463                                      dest,
464                                      0,
465                                      false,
466                                      0xf,
467                                      R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
468                                      0,
469                                      bim_none,
470                                      false,
471                                      false,
472                                      0,
473                                      0,
474                                      0,
475                                      image_offset,
476                                      {0,7,7,7});
477    fetch->set_flag(vtx_srf_mode);
478    fetch->set_flag(vtx_use_tc);
479    emit_instruction(fetch);
480    return true;
481 
482 }
483 
484 bool
emit_image_load(const nir_intrinsic_instr * intrin)485 EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
486 {
487    int imageid = 0;
488    PValue image_offset;
489 
490    if (nir_src_is_const(intrin->src[0]))
491       imageid = nir_src_as_int(intrin->src[0]);
492    else
493       image_offset = from_nir(intrin->src[0], 0);
494 
495    auto rat_op = get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin));
496 
497    GPRVector::Swizzle swz = {0,1,2,3};
498    auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
499 
500    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
501        nir_intrinsic_image_array(intrin)) {
502       emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
503       emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
504    }
505 
506    if (intrin->intrinsic != nir_intrinsic_image_load) {
507       if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
508          emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
509                                              from_nir(intrin->src[4], 0), {alu_write}));
510          emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(3),
511                                              from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
512       } else {
513          emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
514                                              from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
515       }
516    }
517    auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
518 
519    auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid,
520                                    image_offset, 1, 0xf, 0, true);
521    emit_instruction(store);
522    return fetch_return_value(intrin);
523 }
524 
fetch_return_value(const nir_intrinsic_instr * intrin)525 bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
526 {
527    emit_instruction(new WaitAck(0));
528 
529    pipe_format format = nir_intrinsic_format(intrin);
530    unsigned fmt = fmt_32;
531    unsigned num_format = 0;
532    unsigned format_comp = 0;
533    unsigned endian = 0;
534 
535    int imageid = 0;
536    PValue image_offset;
537 
538    if (nir_src_is_const(intrin->src[0]))
539       imageid = nir_src_as_int(intrin->src[0]);
540    else
541       image_offset = from_nir(intrin->src[0], 0);
542 
543    r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
544 
545    GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
546 
547    auto fetch = new FetchInstruction(vc_fetch,
548                                      no_index_offset,
549                                      (EVTXDataFormat)fmt,
550                                      (EVFetchNumFormat)num_format,
551                                      (EVFetchEndianSwap)endian,
552                                      m_rat_return_address.reg_i(1),
553                                      dest,
554                                      0,
555                                      false,
556                                      0x3,
557                                      R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
558                                      0,
559                                      bim_none,
560                                      false,
561                                      false,
562                                      0,
563                                      0,
564                                      0,
565                                      image_offset, {0,1,2,3});
566    fetch->set_flag(vtx_srf_mode);
567    fetch->set_flag(vtx_use_tc);
568    if (format_comp)
569       fetch->set_flag(vtx_format_comp_signed);
570 
571    emit_instruction(fetch);
572    return true;
573 }
574 
emit_image_size(const nir_intrinsic_instr * intrin)575 bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin)
576 {
577    GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
578    GPRVector src{0,{4,4,4,4}};
579 
580    assert(nir_src_as_uint(intrin->src[1]) == 0);
581 
582    auto const_offset = nir_src_as_const_value(intrin->src[0]);
583    auto dyn_offset = PValue();
584    int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
585    if (const_offset)
586       res_id += const_offset[0].u32;
587    else
588       dyn_offset = from_nir(intrin->src[0], 0);
589 
590    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
591       emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)),
592                        res_id,
593                        bim_none));
594       return true;
595    } else {
596       emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src,
597                                              0/* ?? */,
598                                              res_id, dyn_offset));
599       if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
600           nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
601          /* Need to load the layers from a const buffer */
602 
603          unsigned lookup_resid = const_offset[0].u32;
604          emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2),
605                                              PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
606                                                                      R600_BUFFER_INFO_CONST_BUFFER)),
607          EmitInstruction::last_write));
608       }
609    }
610    return true;
611 }
612 
emit_buffer_size(const nir_intrinsic_instr * intr)613 bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr)
614 {
615    std::array<PValue,4> dst_elms;
616 
617 
618    for (uint16_t i = 0; i < 4; ++i) {
619       dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7);
620    }
621 
622    GPRVector dst(dst_elms);
623    GPRVector src(0,{4,4,4,4});
624 
625    auto const_offset = nir_src_as_const_value(intr->src[0]);
626    auto dyn_offset = PValue();
627    int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
628    if (const_offset)
629       res_id += const_offset[0].u32;
630    else
631       assert(0 && "dynamic buffer offset not supported in buffer_size");
632 
633    emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
634                     res_id, bim_none));
635 
636    return true;
637 }
638 
make_stores_ack_and_waitack()639 bool EmitSSBOInstruction::make_stores_ack_and_waitack()
640 {
641    for (auto&& store: m_store_ops)
642       store->set_ack();
643 
644    if (!m_store_ops.empty())
645       emit_instruction(new WaitAck(0));
646 
647    m_store_ops.clear();
648 
649    return true;
650 }
651 
make_dest(const nir_intrinsic_instr * ir)652 GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
653 {
654    GPRVector::Values v;
655    int i;
656    for (i = 0; i < 4; ++i)
657       v[i] = from_nir(ir->dest, i);
658    return GPRVector(v);
659 }
660 
661 }
662