1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #include "sb_bc.h"
28 #include "sb_shader.h"
29 #include "sb_pass.h"
30 #include "eg_sq.h" // V_SQ_CF_INDEX_0/1
31 
32 namespace r600_sb {
33 
34 static const char* chans = "xyzw01?_";
35 
36 static const char* vec_bs[] = {
37 		"VEC_012", "VEC_021", "VEC_120", "VEC_102", "VEC_201", "VEC_210"
38 };
39 
40 static const char* scl_bs[] = {
41 		"SCL_210", "SCL_122", "SCL_212", "SCL_221"
42 };
43 
44 
visit(cf_node & n,bool enter)45 bool bc_dump::visit(cf_node& n, bool enter) {
46 	if (enter) {
47 
48 		id = n.bc.id << 1;
49 
50 		if ((n.bc.op_ptr->flags & CF_ALU) && n.bc.is_alu_extended()) {
51 			dump_dw(id, 2);
52 			id += 2;
53 			sblog << "\n";
54 		}
55 
56 		dump_dw(id, 2);
57 		dump(n);
58 
59 		if (n.bc.op_ptr->flags & CF_CLAUSE) {
60 			id = n.bc.addr << 1;
61 			new_group = 1;
62 		}
63 	}
64 	return true;
65 }
66 
visit(alu_node & n,bool enter)67 bool bc_dump::visit(alu_node& n, bool enter) {
68 	if (enter) {
69 		sblog << " ";
70 		dump_dw(id, 2);
71 
72 		if (new_group) {
73 			sblog.print_w(++group_index, 5);
74 			sblog << " ";
75 		} else
76 			sblog << "      ";
77 
78 		dump(n);
79 		id += 2;
80 
81 		new_group = n.bc.last;
82 	} else {
83 		if (n.bc.last) {
84 			alu_group_node *g =
85 					static_cast<alu_group_node*>(n.get_alu_group_node());
86 			assert(g);
87 			for (unsigned k = 0; k < g->literals.size(); ++k) {
88 				sblog << " ";
89 				dump_dw(id, 1);
90 				id += 1;
91 				sblog << "\n";
92 			}
93 
94 			id = (id + 1) & ~1u;
95 		}
96 	}
97 
98 	return false;
99 }
100 
visit(fetch_node & n,bool enter)101 bool bc_dump::visit(fetch_node& n, bool enter) {
102 	if (enter) {
103 		sblog << " ";
104 		dump_dw(id, 3);
105 		dump(n);
106 		id += 4;
107 	}
108 	return false;
109 }
110 
fill_to(sb_ostringstream & s,int pos)111 static void fill_to(sb_ostringstream &s, int pos) {
112 	int l = s.str().length();
113 	if (l < pos)
114 		s << std::string(pos-l, ' ');
115 }
116 
dump(cf_node & n)117 void bc_dump::dump(cf_node& n) {
118 	sb_ostringstream s;
119 	s << n.bc.op_ptr->name;
120 
121 	if (n.bc.op_ptr->flags & CF_EXP) {
122 		static const char *exp_type[] = {"PIXEL", "POS  ", "PARAM"};
123 
124 		fill_to(s, 18);
125 		s << " " << exp_type[n.bc.type] << " ";
126 
127 		if (n.bc.burst_count) {
128 			sb_ostringstream s2;
129 			s2 << n.bc.array_base << "-" << n.bc.array_base + n.bc.burst_count;
130 			s.print_wl(s2.str(), 5);
131 			s << " R" << n.bc.rw_gpr << "-" <<
132 					n.bc.rw_gpr + n.bc.burst_count << ".";
133 		} else {
134 			s.print_wl(n.bc.array_base, 5);
135 			s << " R" << n.bc.rw_gpr << ".";
136 		}
137 
138 		for (int k = 0; k < 4; ++k)
139 			s << chans[n.bc.sel[k]];
140 
141 	} else if (n.bc.op_ptr->flags & CF_MEM) {
142 		static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
143 				"WRITE_IND_ACK"};
144 		fill_to(s, 18);
145 		s << " " << exp_type[n.bc.type] << " ";
146 		s.print_wl(n.bc.array_base, 5);
147 		s << " R" << n.bc.rw_gpr << ".";
148 		for (int k = 0; k < 4; ++k)
149 			s << ((n.bc.comp_mask & (1 << k)) ? chans[k] : '_');
150 
151 		if ((n.bc.op_ptr->flags & CF_RAT) && (n.bc.type & 1)) {
152 			s << ", @R" << n.bc.index_gpr << ".xyz";
153 		}
154 		if ((n.bc.op_ptr->flags & CF_MEM) && (n.bc.type & 1)) {
155 			s << ", @R" << n.bc.index_gpr << ".x";
156 		}
157 
158 		s << "  ES:" << n.bc.elem_size;
159 
160 	} else {
161 
162 		if (n.bc.op_ptr->flags & CF_CLAUSE) {
163 			s << " " << n.bc.count+1;
164 		}
165 
166 		s << " @" << (n.bc.addr << 1);
167 
168 		if (n.bc.op_ptr->flags & CF_ALU) {
169 			static const char *index_mode[] = {"", " CF_INDEX_0", " CF_INDEX_1"};
170 
171 			for (int k = 0; k < 4; ++k) {
172 				bc_kcache &kc = n.bc.kc[k];
173 				if (kc.mode) {
174 					s << " KC" << k << "[CB" << kc.bank << ":" <<
175 							(kc.addr << 4) << "-" <<
176 							(((kc.addr + kc.mode) << 4) - 1) << index_mode[kc.index_mode] << "]";
177 				}
178 			}
179 		}
180 
181 		if (n.bc.cond)
182 			s << " CND:" << n.bc.cond;
183 
184 		if (n.bc.pop_count)
185 			s << " POP:" << n.bc.pop_count;
186 
187 		if (n.bc.count && (n.bc.op_ptr->flags & CF_EMIT))
188 			s << " STREAM" << n.bc.count;
189 	}
190 
191 	if (!n.bc.barrier)
192 		s << "  NO_BARRIER";
193 
194 	if (n.bc.valid_pixel_mode)
195 		s << "  VPM";
196 
197 	if (n.bc.whole_quad_mode)
198 		s << "  WQM";
199 
200 	if (n.bc.end_of_program)
201 		s << "  EOP";
202 
203 	sblog << s.str() << "\n";
204 }
205 
206 
print_sel(sb_ostream & s,int sel,int rel,int index_mode,int need_brackets)207 static void print_sel(sb_ostream &s, int sel, int rel, int index_mode,
208                       int need_brackets) {
209 	if (rel && index_mode >= 5 && sel < 128)
210 		s << "G";
211 	if (rel || need_brackets) {
212 		s << "[";
213 	}
214 	s << sel;
215 	if (rel) {
216 		if (index_mode == 0 || index_mode == 6)
217 			s << "+AR";
218 		else if (index_mode == 4)
219 			s << "+AL";
220 	}
221 	if (rel || need_brackets) {
222 		s << "]";
223 	}
224 }
225 
print_dst(sb_ostream & s,bc_alu & alu)226 static void print_dst(sb_ostream &s, bc_alu &alu)
227 {
228 	unsigned sel = alu.dst_gpr;
229 	char reg_char = 'R';
230 	if (sel >= 128 - 4) { // clause temporary gpr
231 		sel -= 128 - 4;
232 		reg_char = 'T';
233 	}
234 
235 	if (alu.write_mask || (alu.op_ptr->src_count == 3 && alu.op < LDS_OP2_LDS_ADD)) {
236 		s << reg_char;
237 		print_sel(s, sel, alu.dst_rel, alu.index_mode, 0);
238 	} else {
239 		s << "__";
240 	}
241 	s << ".";
242 	s << chans[alu.dst_chan];
243 }
244 
print_src(sb_ostream & s,bc_alu & alu,unsigned idx)245 static void print_src(sb_ostream &s, bc_alu &alu, unsigned idx)
246 {
247 	bc_alu_src *src = &alu.src[idx];
248 	unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0;
249 
250 	if (src->neg)
251 		s <<"-";
252 	if (src->abs)
253 		s <<"|";
254 
255 	if (sel < 128 - 4) {
256 		s << "R";
257 	} else if (sel < 128) {
258 		s << "T";
259 		sel -= 128 - 4;
260 	} else if (sel < 160) {
261 		s << "KC0";
262 		need_brackets = 1;
263 		sel -= 128;
264 	} else if (sel < 192) {
265 		s << "KC1";
266 		need_brackets = 1;
267 		sel -= 160;
268 	} else if (sel >= 448) {
269 		s << "Param";
270 		sel -= 448;
271 	} else if (sel >= 288) {
272 		s << "KC3";
273 		need_brackets = 1;
274 		sel -= 288;
275 	} else if (sel >= 256) {
276 		s << "KC2";
277 		need_brackets = 1;
278 		sel -= 256;
279 	} else {
280 		need_sel = 0;
281 		need_chan = 0;
282 		switch (sel) {
283 		case ALU_SRC_LDS_OQ_A:
284 			s << "LDS_OQ_A";
285 			need_chan = 1;
286 			break;
287 		case ALU_SRC_LDS_OQ_B:
288 			s << "LDS_OQ_B";
289 			need_chan = 1;
290 			break;
291 		case ALU_SRC_LDS_OQ_A_POP:
292 			s << "LDS_OQ_A_POP";
293 			need_chan = 1;
294 			break;
295 		case ALU_SRC_LDS_OQ_B_POP:
296 			s << "LDS_OQ_B_POP";
297 			need_chan = 1;
298 			break;
299 		case ALU_SRC_LDS_DIRECT_A:
300 			s << "LDS_A["; s.print_zw_hex(src->value.u, 8); s << "]";
301 			break;
302 		case ALU_SRC_LDS_DIRECT_B:
303 			s << "LDS_B["; s.print_zw_hex(src->value.u, 8); s << "]";
304 			break;
305 		case ALU_SRC_PS:
306 			s << "PS";
307 			break;
308 		case ALU_SRC_PV:
309 			s << "PV";
310 			need_chan = 1;
311 			break;
312 		case ALU_SRC_LITERAL:
313 			s << "[0x";
314 			s.print_zw_hex(src->value.u, 8);
315 			s << " " << src->value.f << "]";
316 			need_chan = 1;
317 			break;
318 		case ALU_SRC_0_5:
319 			s << "0.5";
320 			break;
321 		case ALU_SRC_M_1_INT:
322 			s << "-1";
323 			break;
324 		case ALU_SRC_1_INT:
325 			s << "1";
326 			break;
327 		case ALU_SRC_1:
328 			s << "1.0";
329 			break;
330 		case ALU_SRC_0:
331 			s << "0";
332 			break;
333 		default:
334 			s << "??IMM_" <<  sel;
335 			break;
336 		}
337 	}
338 
339 	if (need_sel)
340 		print_sel(s, sel, src->rel, alu.index_mode, need_brackets);
341 
342 	if (need_chan) {
343 		s << "." << chans[src->chan];
344 	}
345 
346 	if (src->abs)
347 		s << "|";
348 }
dump(alu_node & n)349 void bc_dump::dump(alu_node& n) {
350 	sb_ostringstream s;
351 	static const char *omod_str[] = {"","*2","*4","/2"};
352 	static const char *slots = "xyzwt";
353 
354 	s << (n.bc.update_exec_mask ? "M" : " ");
355 	s << (n.bc.update_pred ? "P" : " ");
356 	s << " ";
357 	s << (n.bc.pred_sel>=2 ? (n.bc.pred_sel == 2 ? "0" : "1") : " ");
358 	s << " ";
359 
360 	s << slots[n.bc.slot] << ": ";
361 
362 	s << n.bc.op_ptr->name << omod_str[n.bc.omod] << (n.bc.clamp ? "_sat" : "");
363 	fill_to(s, 26);
364 	s << " ";
365 
366 	print_dst(s, n.bc);
367 	for (int k = 0; k < n.bc.op_ptr->src_count; ++k) {
368 		s << (k ? ", " : ",  ");
369 		print_src(s, n.bc, k);
370 	}
371 
372 	if (n.bc.bank_swizzle) {
373 		fill_to(s, 55);
374 		if (n.bc.slot == SLOT_TRANS)
375 			s << "  " << scl_bs[n.bc.bank_swizzle];
376 		else
377 			s << "  " << vec_bs[n.bc.bank_swizzle];
378 	}
379 
380 	if (ctx.is_cayman()) {
381 		if (n.bc.op == ALU_OP1_MOVA_INT) {
382 			static const char *mova_str[] = { " AR_X", " PC", " CF_IDX0", " CF_IDX1",
383 				" Unknown MOVA_INT dest" };
384 			s << mova_str[std::min(n.bc.dst_gpr, 4u)];  // CM_V_SQ_MOVA_DST_AR_*
385 		}
386 	}
387 
388 	if (n.bc.lds_idx_offset) {
389 		s << " IDX_OFFSET:" << n.bc.lds_idx_offset;
390 	}
391 
392 	sblog << s.str() << "\n";
393 }
394 
init()395 int bc_dump::init() {
396 	sb_ostringstream s;
397 	s << "===== SHADER #" << sh.id;
398 
399 	if (sh.optimized)
400 		s << " OPT";
401 
402 	s << " ";
403 
404 	std::string target = std::string(" ") +
405 			sh.get_full_target_name() + " =====";
406 
407 	while (s.str().length() + target.length() < 80)
408 		s << "=";
409 
410 	s << target;
411 
412 	sblog << "\n" << s.str() << "\n";
413 
414 	s.clear();
415 
416 	if (bc_data) {
417 		s << "===== " << ndw << " dw ===== " << sh.ngpr
418 				<< " gprs ===== " << sh.nstack << " stack ";
419 	}
420 
421 	while (s.str().length() < 80)
422 		s << "=";
423 
424 	sblog << s.str() << "\n";
425 
426 	return 0;
427 }
428 
done()429 int bc_dump::done() {
430 	sb_ostringstream s;
431 	s << "===== SHADER_END ";
432 
433 	while (s.str().length() < 80)
434 		s << "=";
435 
436 	sblog << s.str() << "\n\n";
437 
438 	return 0;
439 }
440 
bc_dump(shader & s,bytecode * bc)441 bc_dump::bc_dump(shader& s, bytecode* bc)  :
442 	vpass(s), bc_data(), ndw(), id(),
443 	new_group(), group_index() {
444 
445 	if (bc) {
446 		bc_data = bc->data();
447 		ndw = bc->ndw();
448 	}
449 }
450 
dump(fetch_node & n)451 void bc_dump::dump(fetch_node& n) {
452 	sb_ostringstream s;
453 	static const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
454 	unsigned gds = n.bc.op_ptr->flags & FF_GDS;
455 	bool gds_has_ret = gds && n.bc.op >= FETCH_OP_GDS_ADD_RET &&
456 		n.bc.op <= FETCH_OP_GDS_USHORT_READ_RET;
457 	bool show_dst = !gds || (gds && gds_has_ret);
458 
459 	s << n.bc.op_ptr->name;
460 	fill_to(s, 20);
461 
462 	if (show_dst) {
463 		s << "R";
464 		print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0);
465 		s << ".";
466 		for (int k = 0; k < 4; ++k)
467 			s << chans[n.bc.dst_sel[k]];
468 		s << ", ";
469 	}
470 
471 	s << "R";
472 	print_sel(s, n.bc.src_gpr, n.bc.src_rel, INDEX_LOOP, 0);
473 	s << ".";
474 
475 	unsigned vtx = n.bc.op_ptr->flags & FF_VTX;
476 	unsigned num_src_comp = gds ? 3 : vtx ? ctx.is_cayman() ? 2 : 1 : 4;
477 
478 	for (unsigned k = 0; k < num_src_comp; ++k)
479 		s << chans[n.bc.src_sel[k]];
480 
481 	if (vtx && n.bc.offset[0]) {
482 		s << " + " << n.bc.offset[0] << "b ";
483 	}
484 
485 	if (!gds)
486 		s << ",   RID:" << n.bc.resource_id;
487 
488 	if (gds) {
489 		s << " UAV:" << n.bc.uav_id;
490 		if (n.bc.uav_index_mode)
491 			s << " UAV:SQ_CF_INDEX_" << (n.bc.uav_index_mode - V_SQ_CF_INDEX_0);
492 		if (n.bc.bcast_first_req)
493 			s << " BFQ";
494 		if (n.bc.alloc_consume)
495 			s << " AC";
496 	} else if (vtx) {
497 		s << "  " << fetch_type[n.bc.fetch_type];
498 		if (!ctx.is_cayman() && n.bc.mega_fetch_count)
499 			s << " MFC:" << n.bc.mega_fetch_count;
500 		if (n.bc.fetch_whole_quad)
501 			s << " FWQ";
502 		if (ctx.is_egcm() && n.bc.resource_index_mode)
503 			s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
504 		if (ctx.is_egcm() && n.bc.sampler_index_mode)
505 			s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
506 
507 		s << " UCF:" << n.bc.use_const_fields
508 				<< " FMT(DTA:" << n.bc.data_format
509 				<< " NUM:" << n.bc.num_format_all
510 				<< " COMP:" << n.bc.format_comp_all
511 				<< " MODE:" << n.bc.srf_mode_all << ")";
512 	} else {
513 		s << ", SID:" << n.bc.sampler_id;
514 		if (n.bc.lod_bias)
515 			s << " LB:" << n.bc.lod_bias;
516 		s << " CT:";
517 		for (unsigned k = 0; k < 4; ++k)
518 			s << (n.bc.coord_type[k] ? "N" : "U");
519 		for (unsigned k = 0; k < 3; ++k)
520 			if (n.bc.offset[k])
521 				s << " O" << chans[k] << ":" << n.bc.offset[k];
522 		if (ctx.is_egcm() && n.bc.resource_index_mode)
523 			s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
524 		if (ctx.is_egcm() && n.bc.sampler_index_mode)
525 			s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
526 	}
527 
528 	sblog << s.str() << "\n";
529 }
530 
dump_dw(unsigned dw_id,unsigned count)531 void bc_dump::dump_dw(unsigned dw_id, unsigned count) {
532 	if (!bc_data)
533 		return;
534 
535 	assert(dw_id + count <= ndw);
536 
537 	sblog.print_zw(dw_id, 4);
538 	sblog << "  ";
539 	while (count--) {
540 		sblog.print_zw_hex(bc_data[dw_id++], 8);
541 		sblog << " ";
542 	}
543 }
544 
545 } // namespace r600_sb
546