1 #include "sfn_emitssboinstruction.h"
2
3 #include "sfn_instruction_fetch.h"
4 #include "sfn_instruction_gds.h"
5 #include "sfn_instruction_misc.h"
6 #include "sfn_instruction_tex.h"
7 #include "../r600_pipe.h"
8 #include "../r600_asm.h"
9
10 namespace r600 {
11
12 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
13
EmitSSBOInstruction(ShaderFromNirProcessor & processor)14 EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
15 EmitInstruction(processor),
16 m_require_rat_return_address(false),
17 m_ssbo_image_offset(0)
18 {
19 }
20
set_ssbo_offset(int offset)21 void EmitSSBOInstruction::set_ssbo_offset(int offset)
22 {
23 m_ssbo_image_offset = offset;
24 }
25
26
set_require_rat_return_address()27 void EmitSSBOInstruction::set_require_rat_return_address()
28 {
29 m_require_rat_return_address = true;
30 }
31
32 bool
load_rat_return_address()33 EmitSSBOInstruction::load_rat_return_address()
34 {
35 if (m_require_rat_return_address) {
36 m_rat_return_address = get_temp_vec4();
37 emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
38 emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
39 emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
40 literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
41 emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
42 m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
43 {alu_write, alu_last_instr}));
44 m_require_rat_return_address = false;
45 }
46 return true;
47 }
48
49
do_emit(nir_instr * instr)50 bool EmitSSBOInstruction::do_emit(nir_instr* instr)
51 {
52 const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
53 switch (intr->intrinsic) {
54 case nir_intrinsic_atomic_counter_add:
55 case nir_intrinsic_atomic_counter_and:
56 case nir_intrinsic_atomic_counter_exchange:
57 case nir_intrinsic_atomic_counter_max:
58 case nir_intrinsic_atomic_counter_min:
59 case nir_intrinsic_atomic_counter_or:
60 case nir_intrinsic_atomic_counter_xor:
61 case nir_intrinsic_atomic_counter_comp_swap:
62 return emit_atomic(intr);
63 case nir_intrinsic_atomic_counter_read:
64 case nir_intrinsic_atomic_counter_post_dec:
65 return emit_unary_atomic(intr);
66 case nir_intrinsic_atomic_counter_inc:
67 return emit_atomic_inc(intr);
68 case nir_intrinsic_atomic_counter_pre_dec:
69 return emit_atomic_pre_dec(intr);
70 case nir_intrinsic_load_ssbo:
71 return emit_load_ssbo(intr);
72 case nir_intrinsic_store_ssbo:
73 return emit_store_ssbo(intr);
74 case nir_intrinsic_ssbo_atomic_add:
75 case nir_intrinsic_ssbo_atomic_comp_swap:
76 case nir_intrinsic_ssbo_atomic_or:
77 case nir_intrinsic_ssbo_atomic_xor:
78 case nir_intrinsic_ssbo_atomic_imax:
79 case nir_intrinsic_ssbo_atomic_imin:
80 case nir_intrinsic_ssbo_atomic_umax:
81 case nir_intrinsic_ssbo_atomic_umin:
82 case nir_intrinsic_ssbo_atomic_and:
83 case nir_intrinsic_ssbo_atomic_exchange:
84 return emit_ssbo_atomic_op(intr);
85 case nir_intrinsic_image_store:
86 return emit_image_store(intr);
87 case nir_intrinsic_image_load:
88 case nir_intrinsic_image_atomic_add:
89 case nir_intrinsic_image_atomic_and:
90 case nir_intrinsic_image_atomic_or:
91 case nir_intrinsic_image_atomic_xor:
92 case nir_intrinsic_image_atomic_exchange:
93 case nir_intrinsic_image_atomic_comp_swap:
94 case nir_intrinsic_image_atomic_umin:
95 case nir_intrinsic_image_atomic_umax:
96 case nir_intrinsic_image_atomic_imin:
97 case nir_intrinsic_image_atomic_imax:
98 return emit_image_load(intr);
99 case nir_intrinsic_image_size:
100 return emit_image_size(intr);
101 case nir_intrinsic_get_ssbo_size:
102 return emit_buffer_size(intr);
103 case nir_intrinsic_memory_barrier:
104 case nir_intrinsic_memory_barrier_image:
105 case nir_intrinsic_memory_barrier_buffer:
106 case nir_intrinsic_group_memory_barrier:
107 return make_stores_ack_and_waitack();
108 default:
109 return false;
110 }
111 }
112
emit_atomic(const nir_intrinsic_instr * instr)113 bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr)
114 {
115 ESDOp op = get_opcode(instr->intrinsic);
116
117 if (DS_OP_INVALID == op)
118 return false;
119
120 GPRVector dest = make_dest(instr);
121
122 int base = remap_atomic_base(nir_intrinsic_base(instr));
123
124 PValue uav_id = from_nir(instr->src[0], 0);
125
126 PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
127
128 GDSInstr *ir = nullptr;
129 if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap) {
130 PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0);
131 ir = new GDSInstr(op, dest, value, value2, uav_id, base);
132 } else {
133 ir = new GDSInstr(op, dest, value, uav_id, base);
134 }
135
136 emit_instruction(ir);
137 return true;
138 }
139
emit_unary_atomic(const nir_intrinsic_instr * instr)140 bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr)
141 {
142 ESDOp op = get_opcode(instr->intrinsic);
143
144 if (DS_OP_INVALID == op)
145 return false;
146
147 GPRVector dest = make_dest(instr);
148
149 PValue uav_id = from_nir(instr->src[0], 0);
150
151 auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr)));
152
153 emit_instruction(ir);
154 return true;
155 }
156
get_opcode(const nir_intrinsic_op opcode)157 ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode)
158 {
159 switch (opcode) {
160 case nir_intrinsic_atomic_counter_add:
161 return DS_OP_ADD_RET;
162 case nir_intrinsic_atomic_counter_and:
163 return DS_OP_AND_RET;
164 case nir_intrinsic_atomic_counter_exchange:
165 return DS_OP_XCHG_RET;
166 case nir_intrinsic_atomic_counter_inc:
167 return DS_OP_INC_RET;
168 case nir_intrinsic_atomic_counter_max:
169 return DS_OP_MAX_UINT_RET;
170 case nir_intrinsic_atomic_counter_min:
171 return DS_OP_MIN_UINT_RET;
172 case nir_intrinsic_atomic_counter_or:
173 return DS_OP_OR_RET;
174 case nir_intrinsic_atomic_counter_read:
175 return DS_OP_READ_RET;
176 case nir_intrinsic_atomic_counter_xor:
177 return DS_OP_XOR_RET;
178 case nir_intrinsic_atomic_counter_post_dec:
179 return DS_OP_DEC_RET;
180 case nir_intrinsic_atomic_counter_comp_swap:
181 return DS_OP_CMP_XCHG_RET;
182 case nir_intrinsic_atomic_counter_pre_dec:
183 default:
184 return DS_OP_INVALID;
185 }
186 }
187
188 RatInstruction::ERatOp
get_rat_opcode(const nir_intrinsic_op opcode,pipe_format format) const189 EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
190 {
191 switch (opcode) {
192 case nir_intrinsic_ssbo_atomic_add:
193 case nir_intrinsic_image_atomic_add:
194 return RatInstruction::ADD_RTN;
195 case nir_intrinsic_ssbo_atomic_and:
196 case nir_intrinsic_image_atomic_and:
197 return RatInstruction::AND_RTN;
198 case nir_intrinsic_ssbo_atomic_exchange:
199 case nir_intrinsic_image_atomic_exchange:
200 return RatInstruction::XCHG_RTN;
201 case nir_intrinsic_ssbo_atomic_or:
202 case nir_intrinsic_image_atomic_or:
203 return RatInstruction::OR_RTN;
204 case nir_intrinsic_ssbo_atomic_imin:
205 case nir_intrinsic_image_atomic_imin:
206 return RatInstruction::MIN_INT_RTN;
207 case nir_intrinsic_ssbo_atomic_imax:
208 case nir_intrinsic_image_atomic_imax:
209 return RatInstruction::MAX_INT_RTN;
210 case nir_intrinsic_ssbo_atomic_umin:
211 case nir_intrinsic_image_atomic_umin:
212 return RatInstruction::MIN_UINT_RTN;
213 case nir_intrinsic_ssbo_atomic_umax:
214 case nir_intrinsic_image_atomic_umax:
215 return RatInstruction::MAX_UINT_RTN;
216 case nir_intrinsic_ssbo_atomic_xor:
217 case nir_intrinsic_image_atomic_xor:
218 return RatInstruction::XOR_RTN;
219 case nir_intrinsic_ssbo_atomic_comp_swap:
220 case nir_intrinsic_image_atomic_comp_swap:
221 if (util_format_is_float(format))
222 return RatInstruction::CMPXCHG_FLT_RTN;
223 else
224 return RatInstruction::CMPXCHG_INT_RTN;
225 case nir_intrinsic_image_load:
226 return RatInstruction::NOP_RTN;
227 default:
228 unreachable("Unsupported RAT instruction");
229 }
230 }
231
232
emit_atomic_add(const nir_intrinsic_instr * instr)233 bool EmitSSBOInstruction::emit_atomic_add(const nir_intrinsic_instr* instr)
234 {
235 GPRVector dest = make_dest(instr);
236
237 PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
238
239 PValue uav_id = from_nir(instr->src[0], 0);
240
241 auto ir = new GDSInstr(DS_OP_ADD_RET, dest, value, uav_id,
242 remap_atomic_base(nir_intrinsic_base(instr)));
243
244 emit_instruction(ir);
245 return true;
246 }
247
load_atomic_inc_limits()248 bool EmitSSBOInstruction::load_atomic_inc_limits()
249 {
250 m_atomic_update = get_temp_register();
251 m_atomic_update->set_keep_alive();
252 emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
253 {alu_write, alu_last_instr}));
254 return true;
255 }
256
emit_atomic_inc(const nir_intrinsic_instr * instr)257 bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
258 {
259 PValue uav_id = from_nir(instr->src[0], 0);
260 GPRVector dest = make_dest(instr);
261 auto ir = new GDSInstr(DS_OP_ADD_RET, dest, m_atomic_update, uav_id,
262 remap_atomic_base(nir_intrinsic_base(instr)));
263 emit_instruction(ir);
264 return true;
265 }
266
emit_atomic_pre_dec(const nir_intrinsic_instr * instr)267 bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
268 {
269 GPRVector dest = make_dest(instr);
270
271 PValue uav_id = from_nir(instr->src[0], 0);
272
273 auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
274 remap_atomic_base(nir_intrinsic_base(instr)));
275 emit_instruction(ir);
276
277 emit_instruction(new AluInstruction(op2_sub_int, dest.x(), dest.x(), literal(1), last_write));
278
279 return true;
280 }
281
emit_load_ssbo(const nir_intrinsic_instr * instr)282 bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr)
283 {
284 GPRVector dest = make_dest(instr);
285
286 /** src0 not used, should be some offset */
287 auto addr = from_nir(instr->src[1], 0);
288 PValue addr_temp = create_register_from_nir_src(instr->src[1], 1);
289
290 /** Should be lowered in nir */
291 emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))},
292 {alu_write, alu_last_instr}));
293
294 const EVTXDataFormat formats[4] = {
295 fmt_32,
296 fmt_32_32,
297 fmt_32_32_32,
298 fmt_32_32_32_32
299 };
300
301 const std::array<int,4> dest_swt[4] = {
302 {0,7,7,7},
303 {0,1,7,7},
304 {0,1,2,7},
305 {0,1,2,3}
306 };
307
308 /* TODO fix resource index */
309 auto ir = new FetchInstruction(dest, addr_temp,
310 R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset
311 , from_nir(instr->src[0], 0),
312 formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int);
313 ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]);
314 ir->set_flag(vtx_use_tc);
315
316 emit_instruction(ir);
317 return true;
318 }
319
emit_store_ssbo(const nir_intrinsic_instr * instr)320 bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
321 {
322
323 GPRVector::Swizzle swz = {7,7,7,7};
324 for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i)
325 swz[i] = i;
326
327 auto orig_addr = from_nir(instr->src[2], 0);
328
329 int temp1 = allocate_temp_register();
330 GPRVector addr_vec(temp1, {0,1,2,7});
331
332 auto temp2 = get_temp_vec4();
333
334 auto rat_id = from_nir(instr->src[1], 0);
335
336 emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr,
337 PValue(new LiteralValue(2)), write));
338 emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write));
339 emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write));
340
341
342 //#define WRITE_AS_VECTOR
343 #ifdef WRITE_AS_VECTOR
344 std::unique_ptr<GPRVector> value(vec_from_nir_with_fetch_constant(instr->src[0],
345 (1 << instr->src[0].ssa->num_components) - 1, swz));
346
347 /* TODO fix resource index */
348 int nelements = instr->src[0].ssa->num_components - 1;
349 if (nelements == 2)
350 nelements = 3;
351 auto ir = new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED,
352 *value, addr_vec, 0, rat_id, 11,
353 (1 << instr->src[0].ssa->num_components) - 1,
354 0, false);
355 emit_instruction(ir);
356 #else
357
358 auto values = vec_from_nir_with_fetch_constant(instr->src[0],
359 (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true);
360
361 auto cf_op = cf_mem_rat;
362 //auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
363 auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
364 values, addr_vec, m_ssbo_image_offset, rat_id, 1,
365 1, 0, false);
366 emit_instruction(store);
367 m_store_ops.push_back(store);
368
369 for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) {
370 emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), write));
371 emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
372 {addr_vec.reg_i(0), Value::one_i}, last_write));
373 store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
374 temp2, addr_vec, 0, rat_id, 1,
375 1, 0, false);
376 emit_instruction(store);
377 if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT))
378 m_store_ops.push_back(store);
379 }
380 #endif
381 return true;
382 }
383
384 bool
emit_image_store(const nir_intrinsic_instr * intrin)385 EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
386 {
387 int imageid = 0;
388 PValue image_offset;
389
390 if (nir_src_is_const(intrin->src[0]))
391 imageid = nir_src_as_int(intrin->src[0]);
392 else
393 image_offset = from_nir(intrin->src[0], 0);
394
395 auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
396 auto undef = from_nir(intrin->src[2], 0);
397 auto value = vec_from_nir_with_fetch_constant(intrin->src[3], 0xf, {0,1,2,3});
398 auto unknown = from_nir(intrin->src[4], 0);
399
400 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
401 nir_intrinsic_image_array(intrin)) {
402 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
403 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
404 }
405
406 auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
407 auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid,
408 image_offset, 1, 0xf, 0, false);
409
410 //if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT))
411 m_store_ops.push_back(store);
412
413 emit_instruction(store);
414 return true;
415 }
416
417 bool
emit_ssbo_atomic_op(const nir_intrinsic_instr * intrin)418 EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
419 {
420 int imageid = 0;
421 PValue image_offset;
422
423 if (nir_src_is_const(intrin->src[0]))
424 imageid = nir_src_as_int(intrin->src[0]);
425 else
426 image_offset = from_nir(intrin->src[0], 0);
427
428 auto opcode = EmitSSBOInstruction::get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
429
430
431 auto coord_orig = from_nir(intrin->src[1], 0, 0);
432 auto coord = get_temp_register(0);
433
434 emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write));
435
436 if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
437 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
438 from_nir(intrin->src[3], 0), {alu_write}));
439 // TODO: cayman wants channel 2 here
440 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(3),
441 from_nir(intrin->src[2], 0), {alu_last_instr, alu_write}));
442 } else {
443 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
444 from_nir(intrin->src[2], 0), {alu_write}));
445 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
446 }
447
448
449 GPRVector out_vec({coord, coord, coord, coord});
450
451 auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset,
452 image_offset, 1, 0xf, 0, true);
453 emit_instruction(atomic);
454 emit_instruction(new WaitAck(0));
455
456 GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
457 auto fetch = new FetchInstruction(vc_fetch,
458 no_index_offset,
459 fmt_32,
460 vtx_nf_int,
461 vtx_es_none,
462 m_rat_return_address.reg_i(1),
463 dest,
464 0,
465 false,
466 0xf,
467 R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
468 0,
469 bim_none,
470 false,
471 false,
472 0,
473 0,
474 0,
475 image_offset,
476 {0,7,7,7});
477 fetch->set_flag(vtx_srf_mode);
478 fetch->set_flag(vtx_use_tc);
479 emit_instruction(fetch);
480 return true;
481
482 }
483
484 bool
emit_image_load(const nir_intrinsic_instr * intrin)485 EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
486 {
487 int imageid = 0;
488 PValue image_offset;
489
490 if (nir_src_is_const(intrin->src[0]))
491 imageid = nir_src_as_int(intrin->src[0]);
492 else
493 image_offset = from_nir(intrin->src[0], 0);
494
495 auto rat_op = get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin));
496
497 GPRVector::Swizzle swz = {0,1,2,3};
498 auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
499
500 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
501 nir_intrinsic_image_array(intrin)) {
502 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
503 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
504 }
505
506 if (intrin->intrinsic != nir_intrinsic_image_load) {
507 if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
508 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
509 from_nir(intrin->src[4], 0), {alu_write}));
510 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(3),
511 from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
512 } else {
513 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
514 from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
515 }
516 }
517 auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
518
519 auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid,
520 image_offset, 1, 0xf, 0, true);
521 emit_instruction(store);
522 return fetch_return_value(intrin);
523 }
524
fetch_return_value(const nir_intrinsic_instr * intrin)525 bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
526 {
527 emit_instruction(new WaitAck(0));
528
529 pipe_format format = nir_intrinsic_format(intrin);
530 unsigned fmt = fmt_32;
531 unsigned num_format = 0;
532 unsigned format_comp = 0;
533 unsigned endian = 0;
534
535 int imageid = 0;
536 PValue image_offset;
537
538 if (nir_src_is_const(intrin->src[0]))
539 imageid = nir_src_as_int(intrin->src[0]);
540 else
541 image_offset = from_nir(intrin->src[0], 0);
542
543 r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
544
545 GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
546
547 auto fetch = new FetchInstruction(vc_fetch,
548 no_index_offset,
549 (EVTXDataFormat)fmt,
550 (EVFetchNumFormat)num_format,
551 (EVFetchEndianSwap)endian,
552 m_rat_return_address.reg_i(1),
553 dest,
554 0,
555 false,
556 0x3,
557 R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
558 0,
559 bim_none,
560 false,
561 false,
562 0,
563 0,
564 0,
565 image_offset, {0,1,2,3});
566 fetch->set_flag(vtx_srf_mode);
567 fetch->set_flag(vtx_use_tc);
568 if (format_comp)
569 fetch->set_flag(vtx_format_comp_signed);
570
571 emit_instruction(fetch);
572 return true;
573 }
574
emit_image_size(const nir_intrinsic_instr * intrin)575 bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin)
576 {
577 GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
578 GPRVector src{0,{4,4,4,4}};
579
580 assert(nir_src_as_uint(intrin->src[1]) == 0);
581
582 auto const_offset = nir_src_as_const_value(intrin->src[0]);
583 auto dyn_offset = PValue();
584 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
585 if (const_offset)
586 res_id += const_offset[0].u32;
587 else
588 dyn_offset = from_nir(intrin->src[0], 0);
589
590 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
591 emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)),
592 res_id,
593 bim_none));
594 return true;
595 } else {
596 emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src,
597 0/* ?? */,
598 res_id, dyn_offset));
599 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
600 nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
601 /* Need to load the layers from a const buffer */
602
603 unsigned lookup_resid = const_offset[0].u32;
604 emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2),
605 PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
606 R600_BUFFER_INFO_CONST_BUFFER)),
607 EmitInstruction::last_write));
608 }
609 }
610 return true;
611 }
612
emit_buffer_size(const nir_intrinsic_instr * intr)613 bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr)
614 {
615 std::array<PValue,4> dst_elms;
616
617
618 for (uint16_t i = 0; i < 4; ++i) {
619 dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7);
620 }
621
622 GPRVector dst(dst_elms);
623 GPRVector src(0,{4,4,4,4});
624
625 auto const_offset = nir_src_as_const_value(intr->src[0]);
626 auto dyn_offset = PValue();
627 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
628 if (const_offset)
629 res_id += const_offset[0].u32;
630 else
631 assert(0 && "dynamic buffer offset not supported in buffer_size");
632
633 emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
634 res_id, bim_none));
635
636 return true;
637 }
638
make_stores_ack_and_waitack()639 bool EmitSSBOInstruction::make_stores_ack_and_waitack()
640 {
641 for (auto&& store: m_store_ops)
642 store->set_ack();
643
644 if (!m_store_ops.empty())
645 emit_instruction(new WaitAck(0));
646
647 m_store_ops.clear();
648
649 return true;
650 }
651
make_dest(const nir_intrinsic_instr * ir)652 GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
653 {
654 GPRVector::Values v;
655 int i;
656 for (i = 0; i < 4; ++i)
657 v[i] = from_nir(ir->dest, i);
658 return GPRVector(v);
659 }
660
661 }
662