1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #include "sb_bc.h"
28 #include "sb_shader.h"
29 #include "sb_pass.h"
30 #include "eg_sq.h" // V_SQ_CF_INDEX_0/1
31 
32 namespace r600_sb {
33 
34 static const char* chans = "xyzw01?_";
35 
36 static const char* vec_bs[] = {
37 		"VEC_012", "VEC_021", "VEC_120", "VEC_102", "VEC_201", "VEC_210"
38 };
39 
40 static const char* scl_bs[] = {
41 		"SCL_210", "SCL_122", "SCL_212", "SCL_221"
42 };
43 
44 
visit(cf_node & n,bool enter)45 bool bc_dump::visit(cf_node& n, bool enter) {
46 	if (enter) {
47 
48 		id = n.bc.id << 1;
49 
50 		if ((n.bc.op_ptr->flags & CF_ALU) && n.bc.is_alu_extended()) {
51 			dump_dw(id, 2);
52 			id += 2;
53 			sblog << "\n";
54 		}
55 
56 		dump_dw(id, 2);
57 		dump(n);
58 
59 		if (n.bc.op_ptr->flags & CF_CLAUSE) {
60 			id = n.bc.addr << 1;
61 			new_group = 1;
62 		}
63 	}
64 	return true;
65 }
66 
visit(alu_node & n,bool enter)67 bool bc_dump::visit(alu_node& n, bool enter) {
68 	if (enter) {
69 		sblog << " ";
70 		dump_dw(id, 2);
71 
72 		if (new_group) {
73 			sblog.print_w(++group_index, 5);
74 			sblog << " ";
75 		} else
76 			sblog << "      ";
77 
78 		dump(n);
79 		id += 2;
80 
81 		new_group = n.bc.last;
82 	} else {
83 		if (n.bc.last) {
84 			alu_group_node *g =
85 					static_cast<alu_group_node*>(n.get_alu_group_node());
86 			assert(g);
87 			for (unsigned k = 0; k < g->literals.size(); ++k) {
88 				sblog << " ";
89 				dump_dw(id, 1);
90 				id += 1;
91 				sblog << "\n";
92 			}
93 
94 			id = (id + 1) & ~1u;
95 		}
96 	}
97 
98 	return false;
99 }
100 
visit(fetch_node & n,bool enter)101 bool bc_dump::visit(fetch_node& n, bool enter) {
102 	if (enter) {
103 		sblog << " ";
104 		dump_dw(id, 3);
105 		dump(n);
106 		id += 4;
107 	}
108 	return false;
109 }
110 
fill_to(sb_ostringstream & s,int pos)111 static void fill_to(sb_ostringstream &s, int pos) {
112 	int l = s.str().length();
113 	if (l < pos)
114 		s << std::string(pos-l, ' ');
115 }
116 
dump(cf_node & n)117 void bc_dump::dump(cf_node& n) {
118 	sb_ostringstream s;
119 	s << n.bc.op_ptr->name;
120 
121 	if (n.bc.op_ptr->flags & CF_EXP) {
122 		static const char *exp_type[] = {"PIXEL", "POS  ", "PARAM"};
123 
124 		fill_to(s, 18);
125 		s << " " << exp_type[n.bc.type] << " ";
126 
127 		if (n.bc.burst_count) {
128 			sb_ostringstream s2;
129 			s2 << n.bc.array_base << "-" << n.bc.array_base + n.bc.burst_count;
130 			s.print_wl(s2.str(), 5);
131 			s << " R" << n.bc.rw_gpr << "-" <<
132 					n.bc.rw_gpr + n.bc.burst_count << ".";
133 		} else {
134 			s.print_wl(n.bc.array_base, 5);
135 			s << " R" << n.bc.rw_gpr << ".";
136 		}
137 
138 		for (int k = 0; k < 4; ++k)
139 			s << chans[n.bc.sel[k]];
140 
141 	} else if (n.bc.op_ptr->flags & CF_MEM) {
142 		static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
143 				"WRITE_IND_ACK"};
144 		fill_to(s, 18);
145 		s << " " << exp_type[n.bc.type] << " ";
146 		s.print_wl(n.bc.array_base, 5);
147 		s << " R" << n.bc.rw_gpr << ".";
148 		for (int k = 0; k < 4; ++k)
149 			s << ((n.bc.comp_mask & (1 << k)) ? chans[k] : '_');
150 
151 		if ((n.bc.op_ptr->flags & CF_RAT) && (n.bc.type & 1)) {
152 			s << ", @R" << n.bc.index_gpr << ".xyz";
153 		}
154 		if ((n.bc.op_ptr->flags & CF_MEM) && (n.bc.type & 1)) {
155 			s << ", @R" << n.bc.index_gpr << ".x";
156 		}
157 
158 		s << "  ES:" << n.bc.elem_size;
159 
160       s << " OP:" << n.bc.rat_inst;
161 
162 		if (n.bc.mark)
163 			s << " MARK";
164 
165 	} else {
166 
167 		if (n.bc.op_ptr->flags & CF_CLAUSE) {
168 			s << " " << n.bc.count+1;
169 		}
170 
171 		s << " @" << (n.bc.addr << 1);
172 
173 		if (n.bc.op_ptr->flags & CF_ALU) {
174 			static const char *index_mode[] = {"", " CF_INDEX_0", " CF_INDEX_1"};
175 
176 			for (int k = 0; k < 4; ++k) {
177 				bc_kcache &kc = n.bc.kc[k];
178 				if (kc.mode) {
179 					s << " KC" << k << "[CB" << kc.bank << ":" <<
180 							(kc.addr << 4) << "-" <<
181 							(((kc.addr + kc.mode) << 4) - 1) << index_mode[kc.index_mode] << "]";
182 				}
183 			}
184 		}
185 
186 		if (n.bc.cond)
187 			s << " CND:" << n.bc.cond;
188 
189 		if (n.bc.pop_count)
190 			s << " POP:" << n.bc.pop_count;
191 
192 		if (n.bc.count && (n.bc.op_ptr->flags & CF_EMIT))
193 			s << " STREAM" << n.bc.count;
194 	}
195 
196 	if (!n.bc.barrier)
197 		s << "  NO_BARRIER";
198 
199 	if (n.bc.valid_pixel_mode)
200 		s << "  VPM";
201 
202 	if (n.bc.whole_quad_mode)
203 		s << "  WQM";
204 
205 	if (n.bc.end_of_program)
206 		s << "  EOP";
207 
208 	sblog << s.str() << "\n";
209 }
210 
211 
print_sel(sb_ostream & s,int sel,int rel,int index_mode,int need_brackets)212 static void print_sel(sb_ostream &s, int sel, int rel, int index_mode,
213                       int need_brackets) {
214 	if (rel && index_mode >= 5 && sel < 128)
215 		s << "G";
216 	if (rel || need_brackets) {
217 		s << "[";
218 	}
219 	s << sel;
220 	if (rel) {
221 		if (index_mode == 0 || index_mode == 6)
222 			s << "+AR";
223 		else if (index_mode == 4)
224 			s << "+AL";
225 	}
226 	if (rel || need_brackets) {
227 		s << "]";
228 	}
229 }
230 
print_dst(sb_ostream & s,bc_alu & alu)231 static void print_dst(sb_ostream &s, bc_alu &alu)
232 {
233 	unsigned sel = alu.dst_gpr;
234 	char reg_char = 'R';
235 	if (sel >= 128 - 4) { // clause temporary gpr
236 		sel -= 128 - 4;
237 		reg_char = 'T';
238 	}
239 
240 	if (alu.write_mask || (alu.op_ptr->src_count == 3 && alu.op < LDS_OP2_LDS_ADD)) {
241 		s << reg_char;
242 		print_sel(s, sel, alu.dst_rel, alu.index_mode, 0);
243 	} else {
244 		s << "__";
245 	}
246 	s << ".";
247 	s << chans[alu.dst_chan];
248 }
249 
print_src(sb_ostream & s,bc_alu & alu,unsigned idx)250 static void print_src(sb_ostream &s, bc_alu &alu, unsigned idx)
251 {
252 	bc_alu_src *src = &alu.src[idx];
253 	unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0;
254 
255 	if (src->neg)
256 		s <<"-";
257 	if (src->abs)
258 		s <<"|";
259 
260 	if (sel < 128 - 4) {
261 		s << "R";
262 	} else if (sel < 128) {
263 		s << "T";
264 		sel -= 128 - 4;
265 	} else if (sel < 160) {
266 		s << "KC0";
267 		need_brackets = 1;
268 		sel -= 128;
269 	} else if (sel < 192) {
270 		s << "KC1";
271 		need_brackets = 1;
272 		sel -= 160;
273 	} else if (sel >= 448) {
274 		s << "Param";
275 		sel -= 448;
276 	} else if (sel >= 288) {
277 		s << "KC3";
278 		need_brackets = 1;
279 		sel -= 288;
280 	} else if (sel >= 256) {
281 		s << "KC2";
282 		need_brackets = 1;
283 		sel -= 256;
284 	} else {
285 		need_sel = 0;
286 		need_chan = 0;
287 		switch (sel) {
288 		case ALU_SRC_LDS_OQ_A:
289 			s << "LDS_OQ_A";
290 			need_chan = 1;
291 			break;
292 		case ALU_SRC_LDS_OQ_B:
293 			s << "LDS_OQ_B";
294 			need_chan = 1;
295 			break;
296 		case ALU_SRC_LDS_OQ_A_POP:
297 			s << "LDS_OQ_A_POP";
298 			need_chan = 1;
299 			break;
300 		case ALU_SRC_LDS_OQ_B_POP:
301 			s << "LDS_OQ_B_POP";
302 			need_chan = 1;
303 			break;
304 		case ALU_SRC_LDS_DIRECT_A:
305 			s << "LDS_A["; s.print_zw_hex(src->value.u, 8); s << "]";
306 			break;
307 		case ALU_SRC_LDS_DIRECT_B:
308 			s << "LDS_B["; s.print_zw_hex(src->value.u, 8); s << "]";
309 			break;
310 		case ALU_SRC_PS:
311 			s << "PS";
312 			break;
313 		case ALU_SRC_PV:
314 			s << "PV";
315 			need_chan = 1;
316 			break;
317 		case ALU_SRC_LITERAL:
318 			s << "[0x";
319 			s.print_zw_hex(src->value.u, 8);
320 			s << " " << src->value.f << "]";
321 			need_chan = 1;
322 			break;
323 		case ALU_SRC_0_5:
324 			s << "0.5";
325 			break;
326 		case ALU_SRC_M_1_INT:
327 			s << "-1";
328 			break;
329 		case ALU_SRC_1_INT:
330 			s << "1";
331 			break;
332 		case ALU_SRC_1:
333 			s << "1.0";
334 			break;
335 		case ALU_SRC_0:
336 			s << "0";
337 			break;
338 		case ALU_SRC_TIME_LO:
339 			s << "TIME_LO";
340 			break;
341 		case ALU_SRC_TIME_HI:
342 			s << "TIME_HI";
343 			break;
344 		case ALU_SRC_MASK_LO:
345 			s << "MASK_LO";
346 			break;
347 		case ALU_SRC_MASK_HI:
348 			s << "MASK_HI";
349 			break;
350 		case ALU_SRC_HW_WAVE_ID:
351 			s << "HW_WAVE_ID";
352 			break;
353 		case ALU_SRC_SIMD_ID:
354 			s << "SIMD_ID";
355 			break;
356 		case ALU_SRC_SE_ID:
357 			s << "SE_ID";
358 			break;
359 		default:
360 			s << "??IMM_" <<  sel;
361 			break;
362 		}
363 	}
364 
365 	if (need_sel)
366 		print_sel(s, sel, src->rel, alu.index_mode, need_brackets);
367 
368 	if (need_chan) {
369 		s << "." << chans[src->chan];
370 	}
371 
372 	if (src->abs)
373 		s << "|";
374 }
dump(alu_node & n)375 void bc_dump::dump(alu_node& n) {
376 	sb_ostringstream s;
377 	static const char *omod_str[] = {"","*2","*4","/2"};
378 	static const char *slots = "xyzwt";
379 
380 	s << (n.bc.update_exec_mask ? "M" : " ");
381 	s << (n.bc.update_pred ? "P" : " ");
382 	s << " ";
383 	s << (n.bc.pred_sel>=2 ? (n.bc.pred_sel == 2 ? "0" : "1") : " ");
384 	s << " ";
385 
386 	s << slots[n.bc.slot] << ": ";
387 
388 	s << n.bc.op_ptr->name << omod_str[n.bc.omod] << (n.bc.clamp ? "_sat" : "");
389 	fill_to(s, 26);
390 	s << " ";
391 
392 	print_dst(s, n.bc);
393 	for (int k = 0; k < n.bc.op_ptr->src_count; ++k) {
394 		s << (k ? ", " : ",  ");
395 		print_src(s, n.bc, k);
396 	}
397 
398 	if (n.bc.bank_swizzle) {
399 		fill_to(s, 55);
400 		if (n.bc.slot == SLOT_TRANS)
401 			s << "  " << scl_bs[n.bc.bank_swizzle];
402 		else
403 			s << "  " << vec_bs[n.bc.bank_swizzle];
404 	}
405 
406 	if (ctx.is_cayman()) {
407 		if (n.bc.op == ALU_OP1_MOVA_INT) {
408 			static const char *mova_str[] = { " AR_X", " PC", " CF_IDX0", " CF_IDX1",
409 				" Unknown MOVA_INT dest" };
410 			s << mova_str[std::min(n.bc.dst_gpr, 4u)];  // CM_V_SQ_MOVA_DST_AR_*
411 		}
412 	}
413 
414 	if (n.bc.lds_idx_offset) {
415 		s << " IDX_OFFSET:" << n.bc.lds_idx_offset;
416 	}
417 
418 	sblog << s.str() << "\n";
419 }
420 
init()421 int bc_dump::init() {
422 	sb_ostringstream s;
423 	s << "===== SHADER #" << sh.id;
424 
425 	if (sh.optimized)
426 		s << " OPT";
427 
428 	s << " ";
429 
430 	std::string target = std::string(" ") +
431 			sh.get_full_target_name() + " =====";
432 
433 	while (s.str().length() + target.length() < 80)
434 		s << "=";
435 
436 	s << target;
437 
438 	sblog << "\n" << s.str() << "\n";
439 
440 	s.clear();
441 
442 	if (bc_data) {
443 		s << "===== " << ndw << " dw ===== " << sh.ngpr
444 				<< " gprs ===== " << sh.nstack << " stack ";
445 	}
446 
447 	while (s.str().length() < 80)
448 		s << "=";
449 
450 	sblog << s.str() << "\n";
451 
452 	return 0;
453 }
454 
done()455 int bc_dump::done() {
456 	sb_ostringstream s;
457 	s << "===== SHADER_END ";
458 
459 	while (s.str().length() < 80)
460 		s << "=";
461 
462 	sblog << s.str() << "\n\n";
463 
464 	return 0;
465 }
466 
bc_dump(shader & s,bytecode * bc)467 bc_dump::bc_dump(shader& s, bytecode* bc)  :
468 	vpass(s), bc_data(), ndw(), id(),
469 	new_group(), group_index() {
470 
471 	if (bc) {
472 		bc_data = bc->data();
473 		ndw = bc->ndw();
474 	}
475 }
476 
dump(fetch_node & n)477 void bc_dump::dump(fetch_node& n) {
478 	sb_ostringstream s;
479 	static const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
480 	unsigned gds = n.bc.op_ptr->flags & FF_GDS;
481 	bool gds_has_ret = gds && n.bc.op >= FETCH_OP_GDS_ADD_RET &&
482 		n.bc.op <= FETCH_OP_GDS_USHORT_READ_RET;
483 	bool show_dst = !gds || (gds && gds_has_ret);
484 
485 	s << n.bc.op_ptr->name;
486 	fill_to(s, 20);
487 
488 	if (show_dst) {
489 		s << "R";
490 		print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0);
491 		s << ".";
492 		for (int k = 0; k < 4; ++k)
493 			s << chans[n.bc.dst_sel[k]];
494 		s << ", ";
495 	}
496 
497 	s << "R";
498 	print_sel(s, n.bc.src_gpr, n.bc.src_rel, INDEX_LOOP, 0);
499 	s << ".";
500 
501 	unsigned vtx = n.bc.op_ptr->flags & FF_VTX;
502 	unsigned num_src_comp = gds ? 3 : vtx ? ctx.is_cayman() ? 2 : 1 : 4;
503 
504 	for (unsigned k = 0; k < num_src_comp; ++k)
505 		s << chans[n.bc.src_sel[k]];
506 
507 	if (vtx && n.bc.offset[0]) {
508 		s << " + " << n.bc.offset[0] << "b ";
509 	}
510 
511 	if (!gds)
512 		s << ",   RID:" << n.bc.resource_id;
513 
514 	if (gds) {
515 		s << " UAV:" << n.bc.uav_id;
516 		if (n.bc.uav_index_mode)
517 			s << " UAV:SQ_CF_INDEX_" << (n.bc.uav_index_mode - V_SQ_CF_INDEX_0);
518 		if (n.bc.bcast_first_req)
519 			s << " BFQ";
520 		if (n.bc.alloc_consume)
521 			s << " AC";
522 	} else if (vtx) {
523 		s << "  " << fetch_type[n.bc.fetch_type];
524 		if (!ctx.is_cayman() && n.bc.mega_fetch_count)
525 			s << " MFC:" << n.bc.mega_fetch_count;
526 		if (n.bc.fetch_whole_quad)
527 			s << " FWQ";
528 		if (ctx.is_egcm() && n.bc.resource_index_mode)
529 			s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
530 		if (ctx.is_egcm() && n.bc.sampler_index_mode)
531 			s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
532 
533 		s << " UCF:" << n.bc.use_const_fields
534 				<< " FMT(DTA:" << n.bc.data_format
535 				<< " NUM:" << n.bc.num_format_all
536 				<< " COMP:" << n.bc.format_comp_all
537 				<< " MODE:" << n.bc.srf_mode_all << ")";
538 	} else {
539 		s << ", SID:" << n.bc.sampler_id;
540 		if (n.bc.lod_bias)
541 			s << " LB:" << n.bc.lod_bias;
542 		s << " CT:";
543 		for (unsigned k = 0; k < 4; ++k)
544 			s << (n.bc.coord_type[k] ? "N" : "U");
545 		for (unsigned k = 0; k < 3; ++k)
546 			if (n.bc.offset[k])
547 				s << " O" << chans[k] << ":" << n.bc.offset[k];
548 		if (ctx.is_egcm() && n.bc.resource_index_mode)
549 			s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
550 		if (ctx.is_egcm() && n.bc.sampler_index_mode)
551 			s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
552 	}
553 
554 	if (n.bc.op_ptr->flags & FF_MEM) {
555 		s << ", ELEM_SIZE:" << n.bc.elem_size;
556 		if (n.bc.uncached)
557 			s << ", UNCACHED";
558 		if (n.bc.indexed)
559 			s << ", INDEXED";
560 		if (n.bc.burst_count)
561 			s << ", BURST_COUNT:" << n.bc.burst_count;
562 		s << ", ARRAY_BASE:" << n.bc.array_base;
563 		s << ", ARRAY_SIZE:" << n.bc.array_size;
564 	}
565 
566 	sblog << s.str() << "\n";
567 }
568 
dump_dw(unsigned dw_id,unsigned count)569 void bc_dump::dump_dw(unsigned dw_id, unsigned count) {
570 	if (!bc_data)
571 		return;
572 
573 	assert(dw_id + count <= ndw);
574 
575 	sblog.print_zw(dw_id, 4);
576 	sblog << "  ";
577 	while (count--) {
578 		sblog.print_zw_hex(bc_data[dw_id++], 8);
579 		sblog << " ";
580 	}
581 }
582 
583 } // namespace r600_sb
584