1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define FBC_DEBUG 0
28 
29 #if FBC_DEBUG
30 #define FBC_DUMP(q) do { q } while (0)
31 #else
32 #define FBC_DUMP(q)
33 #endif
34 
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38 
39 namespace r600_sb {
40 
insert_rv6xx_load_ar_workaround(alu_group_node * b4)41 void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
42 
43 	alu_group_node *g = sh.create_alu_group();
44 	alu_node *a = sh.create_alu();
45 
46 	a->bc.set_op(ALU_OP0_NOP);
47 	a->bc.last = 1;
48 
49 	g->push_back(a);
50 	b4->insert_before(g);
51 }
52 
run()53 int bc_finalizer::run() {
54 
55 	run_on(sh.root);
56 
57 	regions_vec &rv = sh.get_regions();
58 	for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
59 			++I) {
60 		region_node *r = *I;
61 
62 		assert(r);
63 
64 		bool loop = r->is_loop();
65 
66 		if (loop)
67 			finalize_loop(r);
68 		else
69 			finalize_if(r);
70 
71 		r->expand();
72 	}
73 
74 	cf_peephole();
75 
76 	// workaround for some problems on r6xx/7xx
77 	// add ALU NOP to each vertex shader
78 	if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
79 		cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
80 
81 		alu_group_node *g = sh.create_alu_group();
82 
83 		alu_node *a = sh.create_alu();
84 		a->bc.set_op(ALU_OP0_NOP);
85 		a->bc.last = 1;
86 
87 		g->push_back(a);
88 		c->push_back(g);
89 
90 		sh.root->push_back(c);
91 
92 		c = sh.create_cf(CF_OP_NOP);
93 		sh.root->push_back(c);
94 
95 		last_cf = c;
96 	}
97 
98 	if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
99 		last_cf = sh.create_cf(CF_OP_NOP);
100 		sh.root->push_back(last_cf);
101 	}
102 
103 	if (ctx.is_cayman()) {
104 		if (!last_cf) {
105 			cf_node *c = sh.create_cf(CF_OP_CF_END);
106 			sh.root->push_back(c);
107 		} else
108 			last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
109 	} else
110 		last_cf->bc.end_of_program = 1;
111 
112 	for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
113 		cf_node *le = last_export[t];
114 		if (le)
115 			le->bc.set_op(CF_OP_EXPORT_DONE);
116 	}
117 
118 	sh.ngpr = ngpr;
119 	sh.nstack = nstack;
120 	return 0;
121 }
122 
finalize_loop(region_node * r)123 void bc_finalizer::finalize_loop(region_node* r) {
124 
125 	update_nstack(r);
126 
127 	cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
128 	cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
129 
130 	// Update last_cf, but don't overwrite it if it's outside the current loop nest since
131 	// it may point to a cf that is later in program order.
132 	// The single parent level check is sufficient since finalize_loop() is processed in
133 	// reverse order from innermost to outermost loop nest level.
134 	if (!last_cf || last_cf->get_parent_region() == r) {
135 		last_cf = loop_end;
136 	}
137 
138 	loop_start->jump_after(loop_end);
139 	loop_end->jump_after(loop_start);
140 
141 	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
142 			I != E; ++I) {
143 		depart_node *dep = *I;
144 		cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
145 		loop_break->jump(loop_end);
146 		dep->push_back(loop_break);
147 		dep->expand();
148 	}
149 
150 	// FIXME produces unnecessary LOOP_CONTINUE
151 	for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
152 			I != E; ++I) {
153 		repeat_node *rep = *I;
154 		if (!(rep->parent == r && rep->prev == NULL)) {
155 			cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
156 			loop_cont->jump(loop_end);
157 			rep->push_back(loop_cont);
158 		}
159 		rep->expand();
160 	}
161 
162 	r->push_front(loop_start);
163 	r->push_back(loop_end);
164 }
165 
finalize_if(region_node * r)166 void bc_finalizer::finalize_if(region_node* r) {
167 
168 	update_nstack(r);
169 
170 	// expecting the following control flow structure here:
171 	//   - region
172 	//     {
173 	//       - depart/repeat 1 (it may be depart/repeat for some outer region)
174 	//         {
175 	//           - if
176 	//             {
177 	//               - depart/repeat 2 (possibly for outer region)
178 	//                 {
179 	//                   - some optional code
180 	//                 }
181 	//             }
182 	//           - optional <else> code> ...
183 	//         }
184 	//     }
185 
186 	container_node *repdep1 = static_cast<container_node*>(r->first);
187 	assert(repdep1->is_depart() || repdep1->is_repeat());
188 
189 	if_node *n_if = static_cast<if_node*>(repdep1->first);
190 
191 	if (n_if) {
192 
193 
194 		assert(n_if->is_if());
195 
196 		container_node *repdep2 = static_cast<container_node*>(n_if->first);
197 		assert(repdep2->is_depart() || repdep2->is_repeat());
198 
199 		cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
200 		cf_node *if_pop = sh.create_cf(CF_OP_POP);
201 
202 		if (!last_cf || last_cf->get_parent_region() == r) {
203 			last_cf = if_pop;
204 		}
205 		if_pop->bc.pop_count = 1;
206 		if_pop->jump_after(if_pop);
207 
208 		r->push_front(if_jump);
209 		r->push_back(if_pop);
210 
211 		/* the depart/repeat 1 is actually part of the "else" code.
212 		 * if it's a depart for an outer loop region it will want to
213 		 * insert a LOOP_BREAK or LOOP_CONTINUE in here, so we need
214 		 * to emit the else clause.
215 		 */
216 		bool has_else = n_if->next;
217 
218 		if (repdep1->is_depart()) {
219 			depart_node *dep1 = static_cast<depart_node*>(repdep1);
220 			if (dep1->target != r && dep1->target->is_loop())
221 				has_else = true;
222 		}
223 
224 		if (repdep1->is_repeat()) {
225 			repeat_node *rep1 = static_cast<repeat_node*>(repdep1);
226 			if (rep1->target != r && rep1->target->is_loop())
227 				has_else = true;
228 		}
229 
230 		if (has_else) {
231 			cf_node *nelse = sh.create_cf(CF_OP_ELSE);
232 			n_if->insert_after(nelse);
233 			if_jump->jump(nelse);
234 			nelse->jump_after(if_pop);
235 			nelse->bc.pop_count = 1;
236 
237 		} else {
238 			if_jump->jump_after(if_pop);
239 			if_jump->bc.pop_count = 1;
240 		}
241 
242 		n_if->expand();
243 	}
244 
245 	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
246 			I != E; ++I) {
247 		(*I)->expand();
248 	}
249 	r->departs.clear();
250 	assert(r->repeats.empty());
251 }
252 
run_on(container_node * c)253 void bc_finalizer::run_on(container_node* c) {
254 	node *prev_node = NULL;
255 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
256 		node *n = *I;
257 
258 		if (n->is_alu_group()) {
259 			finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
260 		} else {
261 			if (n->is_alu_clause()) {
262 				cf_node *c = static_cast<cf_node*>(n);
263 
264 				if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
265 					if (ctx.stack_workaround_8xx) {
266 						region_node *r = c->get_parent_region();
267 						if (r) {
268 							unsigned ifs, loops;
269 							unsigned elems = get_stack_depth(r, loops, ifs);
270 							unsigned dmod1 = elems % ctx.stack_entry_size;
271 							unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
272 
273 							if (elems && (!dmod1 || !dmod2))
274 								c->flags |= NF_ALU_STACK_WORKAROUND;
275 						}
276 					} else if (ctx.stack_workaround_9xx) {
277 						region_node *r = c->get_parent_region();
278 						if (r) {
279 							unsigned ifs, loops;
280 							get_stack_depth(r, loops, ifs);
281 							if (loops >= 2)
282 								c->flags |= NF_ALU_STACK_WORKAROUND;
283 						}
284 					}
285 				}
286 				last_cf = c;
287 			} else if (n->is_fetch_inst()) {
288 				finalize_fetch(static_cast<fetch_node*>(n));
289 			} else if (n->is_cf_inst()) {
290 				finalize_cf(static_cast<cf_node*>(n));
291 			}
292 			if (n->is_container())
293 				run_on(static_cast<container_node*>(n));
294 		}
295 		prev_node = n;
296 	}
297 }
298 
finalize_alu_group(alu_group_node * g,node * prev_node)299 void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
300 
301 	alu_node *last = NULL;
302 	alu_group_node *prev_g = NULL;
303 	bool add_nop = false;
304 	if (prev_node && prev_node->is_alu_group()) {
305 		prev_g = static_cast<alu_group_node*>(prev_node);
306 	}
307 
308 	for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
309 		alu_node *n = static_cast<alu_node*>(*I);
310 		unsigned slot = n->bc.slot;
311 		value *d = n->dst.empty() ? NULL : n->dst[0];
312 
313 		if (d && d->is_special_reg()) {
314 			assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit() || d->is_lds_oq() || d->is_lds_access());
315 			d = NULL;
316 		}
317 
318 		sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
319 
320 		if (d) {
321 			assert(fdst.chan() == slot || slot == SLOT_TRANS);
322 		}
323 
324 		if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
325 			n->bc.dst_gpr = fdst.sel();
326 		n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
327 
328 
329 		if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
330 			n->bc.dst_rel = 1;
331 			update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
332 		} else {
333 			n->bc.dst_rel = 0;
334 		}
335 
336 		n->bc.write_mask = d != NULL;
337 		n->bc.last = 0;
338 
339 		if (n->bc.op_ptr->flags & AF_PRED) {
340 			n->bc.update_pred = (n->dst[1] != NULL);
341 			n->bc.update_exec_mask = (n->dst[2] != NULL);
342 		}
343 
344 		// FIXME handle predication here
345 		n->bc.pred_sel = PRED_SEL_OFF;
346 
347 		update_ngpr(n->bc.dst_gpr);
348 
349 		add_nop |= finalize_alu_src(g, n, prev_g);
350 
351 		last = n;
352 	}
353 
354 	if (add_nop) {
355 		if (sh.get_ctx().r6xx_gpr_index_workaround) {
356 			insert_rv6xx_load_ar_workaround(g);
357 		}
358 	}
359 	last->bc.last = 1;
360 }
361 
finalize_alu_src(alu_group_node * g,alu_node * a,alu_group_node * prev)362 bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
363 	vvec &sv = a->src;
364 	bool add_nop = false;
365 	FBC_DUMP(
366 		sblog << "finalize_alu_src: ";
367 		dump::dump_op(a);
368 		sblog << "\n";
369 	);
370 
371 	unsigned si = 0;
372 
373 	for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
374 		value *v = *I;
375 		assert(v);
376 
377 		bc_alu_src &src = a->bc.src[si];
378 		sel_chan sc;
379 		src.rel = 0;
380 
381 		sel_chan gpr;
382 
383 		switch (v->kind) {
384 		case VLK_REL_REG:
385 			sc = v->get_final_gpr();
386 			src.sel = sc.sel();
387 			src.chan = sc.chan();
388 			if (!v->rel->is_const()) {
389 				src.rel = 1;
390 				update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
391 				if (prev && !add_nop) {
392 					for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
393 						alu_node *pn = static_cast<alu_node*>(*pI);
394 						if (pn->bc.dst_gpr == src.sel) {
395 							add_nop = true;
396 							break;
397 						}
398 					}
399 				}
400 			} else
401 				src.rel = 0;
402 
403 			break;
404 		case VLK_REG:
405 			gpr = v->get_final_gpr();
406 			src.sel = gpr.sel();
407 			src.chan = gpr.chan();
408 			update_ngpr(src.sel);
409 			break;
410 		case VLK_TEMP:
411 			src.sel = v->gpr.sel();
412 			src.chan = v->gpr.chan();
413 			update_ngpr(src.sel);
414 			break;
415 		case VLK_UNDEF:
416 		case VLK_CONST: {
417 			literal lv = v->literal_value;
418 			src.chan = 0;
419 
420 			if (lv == literal(0))
421 				src.sel = ALU_SRC_0;
422 			else if (lv == literal(0.5f))
423 				src.sel = ALU_SRC_0_5;
424 			else if (lv == literal(1.0f))
425 				src.sel = ALU_SRC_1;
426 			else if (lv == literal(1))
427 				src.sel = ALU_SRC_1_INT;
428 			else if (lv == literal(-1))
429 				src.sel = ALU_SRC_M_1_INT;
430 			else {
431 				src.sel = ALU_SRC_LITERAL;
432 				src.chan = g->literal_chan(lv);
433 				src.value = lv;
434 			}
435 			break;
436 		}
437 		case VLK_KCACHE: {
438 			cf_node *clause = static_cast<cf_node*>(g->parent);
439 			assert(clause->is_alu_clause());
440 			sel_chan k = translate_kcache(clause, v);
441 
442 			assert(k && "kcache translation failed");
443 
444 			src.sel = k.sel();
445 			src.chan = k.chan();
446 			break;
447 		}
448 		case VLK_SPECIAL_REG:
449 			if (v->select.sel() == SV_LDS_OQA) {
450 				src.sel = ALU_SRC_LDS_OQ_A_POP;
451 				src.chan = 0;
452 			} else if (v->select.sel() == SV_LDS_OQB) {
453 				src.sel = ALU_SRC_LDS_OQ_B_POP;
454 				src.chan = 0;
455 			} else {
456 				src.sel = ALU_SRC_0;
457 				src.chan = 0;
458 			}
459 			break;
460 		case VLK_PARAM:
461 		case VLK_SPECIAL_CONST:
462 			src.sel = v->select.sel();
463 			src.chan = v->select.chan();
464 			break;
465 		default:
466 			assert(!"unknown value kind");
467 			break;
468 		}
469 		if (prev && !add_nop) {
470 			for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
471 				alu_node *pn = static_cast<alu_node*>(*pI);
472 				if (pn->bc.dst_rel) {
473 					if (pn->bc.dst_gpr == src.sel) {
474 						add_nop = true;
475 						break;
476 					}
477 				}
478 			}
479 		}
480 	}
481 
482 	while (si < 3) {
483 		a->bc.src[si++].sel = 0;
484 	}
485 	return add_nop;
486 }
487 
copy_fetch_src(fetch_node & dst,fetch_node & src,unsigned arg_start)488 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
489 {
490 	int reg = -1;
491 
492 	for (unsigned chan = 0; chan < 4; ++chan) {
493 
494 		dst.bc.dst_sel[chan] = SEL_MASK;
495 
496 		unsigned sel = SEL_MASK;
497 
498 		value *v = src.src[arg_start + chan];
499 
500 		if (!v || v->is_undef()) {
501 			sel = SEL_MASK;
502 		} else if (v->is_const()) {
503 			literal l = v->literal_value;
504 			if (l == literal(0))
505 				sel = SEL_0;
506 			else if (l == literal(1.0f))
507 				sel = SEL_1;
508 			else {
509 				sblog << "invalid fetch constant operand  " << chan << " ";
510 				dump::dump_op(&src);
511 				sblog << "\n";
512 				abort();
513 			}
514 
515 		} else if (v->is_any_gpr()) {
516 			unsigned vreg = v->gpr.sel();
517 			unsigned vchan = v->gpr.chan();
518 
519 			if (reg == -1)
520 				reg = vreg;
521 			else if ((unsigned)reg != vreg) {
522 				sblog << "invalid fetch source operand  " << chan << " ";
523 				dump::dump_op(&src);
524 				sblog << "\n";
525 				abort();
526 			}
527 
528 			sel = vchan;
529 
530 		} else {
531 			sblog << "invalid fetch source operand  " << chan << " ";
532 			dump::dump_op(&src);
533 			sblog << "\n";
534 			abort();
535 		}
536 
537 		dst.bc.src_sel[chan] = sel;
538 	}
539 
540 	if (reg >= 0)
541 		update_ngpr(reg);
542 
543 	dst.bc.src_gpr = reg >= 0 ? reg : 0;
544 }
545 
emit_set_grad(fetch_node * f)546 void bc_finalizer::emit_set_grad(fetch_node* f) {
547 
548 	assert(f->src.size() == 12 || f->src.size() == 13);
549 	unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
550 
551 	unsigned arg_start = 0;
552 
553 	for (unsigned op = 0; op < 2; ++op) {
554 		fetch_node *n = sh.create_fetch();
555 		n->bc.set_op(ops[op]);
556 
557 		arg_start += 4;
558 
559 		copy_fetch_src(*n, *f, arg_start);
560 
561 		f->insert_before(n);
562 	}
563 
564 }
565 
emit_set_texture_offsets(fetch_node & f)566 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
567 	assert(f.src.size() == 8);
568 
569 	fetch_node *n = sh.create_fetch();
570 
571 	n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
572 
573 	copy_fetch_src(*n, f, 4);
574 
575 	f.insert_before(n);
576 }
577 
finalize_fetch(fetch_node * f)578 void bc_finalizer::finalize_fetch(fetch_node* f) {
579 
580 	int reg = -1;
581 
582 	// src
583 
584 	unsigned src_count = 4;
585 
586 	unsigned flags = f->bc.op_ptr->flags;
587 
588 	if (flags & FF_VTX) {
589 		src_count = 1;
590 	} else if (flags & FF_GDS) {
591 		src_count = 2;
592 	} else if (flags & FF_USEGRAD) {
593 		emit_set_grad(f);
594 	} else if (flags & FF_USE_TEXTURE_OFFSETS) {
595 		emit_set_texture_offsets(*f);
596 	}
597 
598 	for (unsigned chan = 0; chan < src_count; ++chan) {
599 
600 		unsigned sel = f->bc.src_sel[chan];
601 
602 		if (sel > SEL_W)
603 			continue;
604 
605 		value *v = f->src[chan];
606 
607 		if (v->is_undef()) {
608 			sel = SEL_MASK;
609 		} else if (v->is_const()) {
610 			literal l = v->literal_value;
611 			if (l == literal(0))
612 				sel = SEL_0;
613 			else if (l == literal(1.0f))
614 				sel = SEL_1;
615 			else {
616 				sblog << "invalid fetch constant operand  " << chan << " ";
617 				dump::dump_op(f);
618 				sblog << "\n";
619 				abort();
620 			}
621 
622 		} else if (v->is_any_gpr()) {
623 			unsigned vreg = v->gpr.sel();
624 			unsigned vchan = v->gpr.chan();
625 
626 			if (reg == -1)
627 				reg = vreg;
628 			else if ((unsigned)reg != vreg) {
629 				sblog << "invalid fetch source operand  " << chan << " ";
630 				dump::dump_op(f);
631 				sblog << "\n";
632 				abort();
633 			}
634 
635 			sel = vchan;
636 
637 		} else {
638 			sblog << "invalid fetch source operand  " << chan << " ";
639 			dump::dump_op(f);
640 			sblog << "\n";
641 			abort();
642 		}
643 
644 		f->bc.src_sel[chan] = sel;
645 	}
646 
647 	if (reg >= 0)
648 		update_ngpr(reg);
649 
650 	f->bc.src_gpr = reg >= 0 ? reg : 0;
651 
652 	// dst
653 
654 	reg = -1;
655 
656 	unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
657 
658 	for (unsigned chan = 0; chan < 4; ++chan) {
659 
660 		unsigned sel = f->bc.dst_sel[chan];
661 
662 		if (sel == SEL_MASK)
663 			continue;
664 
665 		value *v = f->dst[chan];
666 		if (!v)
667 			continue;
668 
669 		if (v->is_any_gpr()) {
670 			unsigned vreg = v->gpr.sel();
671 			unsigned vchan = v->gpr.chan();
672 
673 			if (reg == -1)
674 				reg = vreg;
675 			else if ((unsigned)reg != vreg) {
676 				sblog << "invalid fetch dst operand  " << chan << " ";
677 				dump::dump_op(f);
678 				sblog << "\n";
679 				abort();
680 			}
681 
682 			dst_swz[vchan] = sel;
683 
684 		} else {
685 			sblog << "invalid fetch dst operand  " << chan << " ";
686 			dump::dump_op(f);
687 			sblog << "\n";
688 			abort();
689 		}
690 
691 	}
692 
693 	for (unsigned i = 0; i < 4; ++i)
694 		f->bc.dst_sel[i] = dst_swz[i];
695 
696 	if ((flags & FF_GDS) && reg == -1) {
697 		f->bc.dst_sel[0] = SEL_MASK;
698 		f->bc.dst_gpr = 0;
699 		return ;
700 	}
701 	assert(reg >= 0);
702 
703 	if (reg >= 0)
704 		update_ngpr(reg);
705 
706 	f->bc.dst_gpr = reg >= 0 ? reg : 0;
707 }
708 
finalize_cf(cf_node * c)709 void bc_finalizer::finalize_cf(cf_node* c) {
710 
711 	unsigned flags = c->bc.op_ptr->flags;
712 
713 	c->bc.end_of_program = 0;
714 	last_cf = c;
715 
716 	if (flags & CF_EXP) {
717 		c->bc.set_op(CF_OP_EXPORT);
718 		last_export[c->bc.type] = c;
719 
720 		int reg = -1;
721 
722 		for (unsigned chan = 0; chan < 4; ++chan) {
723 
724 			unsigned sel = c->bc.sel[chan];
725 
726 			if (sel > SEL_W)
727 				continue;
728 
729 			value *v = c->src[chan];
730 
731 			if (v->is_undef()) {
732 				sel = SEL_MASK;
733 			} else if (v->is_const()) {
734 				literal l = v->literal_value;
735 				if (l == literal(0))
736 					sel = SEL_0;
737 				else if (l == literal(1.0f))
738 					sel = SEL_1;
739 				else {
740 					sblog << "invalid export constant operand  " << chan << " ";
741 					dump::dump_op(c);
742 					sblog << "\n";
743 					abort();
744 				}
745 
746 			} else if (v->is_any_gpr()) {
747 				unsigned vreg = v->gpr.sel();
748 				unsigned vchan = v->gpr.chan();
749 
750 				if (reg == -1)
751 					reg = vreg;
752 				else if ((unsigned)reg != vreg) {
753 					sblog << "invalid export source operand  " << chan << " ";
754 					dump::dump_op(c);
755 					sblog << "\n";
756 					abort();
757 				}
758 
759 				sel = vchan;
760 
761 			} else {
762 				sblog << "invalid export source operand  " << chan << " ";
763 				dump::dump_op(c);
764 				sblog << "\n";
765 				abort();
766 			}
767 
768 			c->bc.sel[chan] = sel;
769 		}
770 
771 		if (reg >= 0)
772 			update_ngpr(reg);
773 
774 		c->bc.rw_gpr = reg >= 0 ? reg : 0;
775 
776 	} else if (flags & CF_MEM) {
777 
778 		int reg = -1;
779 		unsigned mask = 0;
780 
781 		for (unsigned chan = 0; chan < 4; ++chan) {
782 			value *v = c->src[chan];
783 			if (!v || v->is_undef())
784 				continue;
785 
786 			if (!v->is_any_gpr() || v->gpr.chan() != chan) {
787 				sblog << "invalid source operand  " << chan << " ";
788 				dump::dump_op(c);
789 				sblog << "\n";
790 				abort();
791 			}
792 			unsigned vreg = v->gpr.sel();
793 			if (reg == -1)
794 				reg = vreg;
795 			else if ((unsigned)reg != vreg) {
796 				sblog << "invalid source operand  " << chan << " ";
797 				dump::dump_op(c);
798 				sblog << "\n";
799 				abort();
800 			}
801 
802 			mask |= (1 << chan);
803 		}
804 
805 		if (reg >= 0)
806 			update_ngpr(reg);
807 
808 		c->bc.rw_gpr = reg >= 0 ? reg : 0;
809 		c->bc.comp_mask = mask;
810 
811 		if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
812 
813 			reg = -1;
814 
815 			for (unsigned chan = 0; chan < 4; ++chan) {
816 				value *v = c->src[4 + chan];
817 				if (!v || v->is_undef())
818 					continue;
819 
820 				if (!v->is_any_gpr() || v->gpr.chan() != chan) {
821 					sblog << "invalid source operand  " << chan << " ";
822 					dump::dump_op(c);
823 					sblog << "\n";
824 					abort();
825 				}
826 				unsigned vreg = v->gpr.sel();
827 				if (reg == -1)
828 					reg = vreg;
829 				else if ((unsigned)reg != vreg) {
830 					sblog << "invalid source operand  " << chan << " ";
831 					dump::dump_op(c);
832 					sblog << "\n";
833 					abort();
834 				}
835 			}
836 
837 			assert(reg >= 0);
838 
839 			if (reg >= 0)
840 				update_ngpr(reg);
841 
842 			c->bc.index_gpr = reg >= 0 ? reg : 0;
843 		}
844 	} else if (flags & CF_CALL) {
845 		update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
846 	}
847 }
848 
translate_kcache(cf_node * alu,value * v)849 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
850 	unsigned sel = v->select.kcache_sel();
851 	unsigned bank = v->select.kcache_bank();
852 	unsigned chan = v->select.chan();
853 	static const unsigned kc_base[] = {128, 160, 256, 288};
854 
855 	sel &= 4095;
856 
857 	unsigned line = sel >> 4;
858 
859 	for (unsigned k = 0; k < 4; ++k) {
860 		bc_kcache &kc = alu->bc.kc[k];
861 
862 		if (kc.mode == KC_LOCK_NONE)
863 			break;
864 
865 		if (kc.bank == bank && (kc.addr == line ||
866 				(kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
867 
868 			sel = kc_base[k] + (sel - (kc.addr << 4));
869 
870 			return sel_chan(sel, chan);
871 		}
872 	}
873 
874 	assert(!"kcache translation error");
875 	return 0;
876 }
877 
update_ngpr(unsigned gpr)878 void bc_finalizer::update_ngpr(unsigned gpr) {
879 	if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
880 		ngpr = gpr + 1;
881 }
882 
get_stack_depth(node * n,unsigned & loops,unsigned & ifs,unsigned add)883 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
884                                            unsigned &ifs, unsigned add) {
885 	unsigned stack_elements = add;
886 	bool has_non_wqm_push = (add != 0);
887 	region_node *r = n->is_region() ?
888 			static_cast<region_node*>(n) : n->get_parent_region();
889 
890 	loops = 0;
891 	ifs = 0;
892 
893 	while (r) {
894 		if (r->is_loop()) {
895 			++loops;
896 		} else {
897 			++ifs;
898 			has_non_wqm_push = true;
899 		}
900 		r = r->get_parent_region();
901 	}
902 	stack_elements += (loops * ctx.stack_entry_size) + ifs;
903 
904 	// reserve additional elements in some cases
905 	switch (ctx.hw_class) {
906 	case HW_CLASS_R600:
907 	case HW_CLASS_R700:
908 		// If any non-WQM push is invoked, 2 elements should be reserved.
909 		if (has_non_wqm_push)
910 			stack_elements += 2;
911 		break;
912 	case HW_CLASS_CAYMAN:
913 		// If any stack operation is invoked, 2 elements should be reserved
914 		if (stack_elements)
915 			stack_elements += 2;
916 		break;
917 	case HW_CLASS_EVERGREEN:
918 		// According to the docs we need to reserve 1 element for each of the
919 		// following cases:
920 		//   1) non-WQM push is used with WQM/LOOP frames on stack
921 		//   2) ALU_ELSE_AFTER is used at the point of max stack usage
922 		// NOTE:
923 		// It was found that the conditions above are not sufficient, there are
924 		// other cases where we also need to reserve stack space, that's why
925 		// we always reserve 1 stack element if we have non-WQM push on stack.
926 		// Condition 2 is ignored for now because we don't use this instruction.
927 		if (has_non_wqm_push)
928 			++stack_elements;
929 		break;
930 	case HW_CLASS_UNKNOWN:
931 		assert(0);
932 	}
933 	return stack_elements;
934 }
935 
update_nstack(region_node * r,unsigned add)936 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
937 	unsigned loops = 0;
938 	unsigned ifs = 0;
939 	unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
940 
941 	// XXX all chips expect this value to be computed using 4 as entry size,
942 	// not the real entry size
943 	unsigned stack_entries = (elems + 3) >> 2;
944 
945 	if (nstack < stack_entries)
946 		nstack = stack_entries;
947 }
948 
cf_peephole()949 void bc_finalizer::cf_peephole() {
950 	if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
951 		for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
952 				I = N) {
953 			N = I; ++N;
954 			cf_node *c = static_cast<cf_node*>(*I);
955 
956 			if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
957 					(c->flags & NF_ALU_STACK_WORKAROUND)) {
958 				cf_node *push = sh.create_cf(CF_OP_PUSH);
959 				c->insert_before(push);
960 				push->jump(c);
961 				c->bc.set_op(CF_OP_ALU);
962 			}
963 		}
964 	}
965 
966 	for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
967 			I = N) {
968 		N = I; ++N;
969 
970 		cf_node *c = static_cast<cf_node*>(*I);
971 
972 		if (c->jump_after_target) {
973 			if (c->jump_target->next == NULL) {
974 				c->jump_target->insert_after(sh.create_cf(CF_OP_NOP));
975 				if (last_cf == c->jump_target)
976 					last_cf = static_cast<cf_node*>(c->jump_target->next);
977 			}
978 			c->jump_target = static_cast<cf_node*>(c->jump_target->next);
979 			c->jump_after_target = false;
980 		}
981 
982 		if (c->is_cf_op(CF_OP_POP)) {
983 			node *p = c->prev;
984 			if (p->is_alu_clause()) {
985 				cf_node *a = static_cast<cf_node*>(p);
986 
987 				if (a->bc.op == CF_OP_ALU) {
988 					a->bc.set_op(CF_OP_ALU_POP_AFTER);
989 					c->remove();
990 				}
991 			}
992 		} else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
993 			// if JUMP is immediately followed by its jump target,
994 			// then JUMP is useless and we can eliminate it
995 			c->remove();
996 		}
997 	}
998 }
999 
1000 } // namespace r600_sb
1001