1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define GCM_DEBUG 0
28 
29 #if GCM_DEBUG
30 #define GCM_DUMP(a) do { a } while(0);
31 #else
32 #define GCM_DUMP(a)
33 #endif
34 
35 #include <map>
36 
37 #include "sb_bc.h"
38 #include "sb_shader.h"
39 #include "sb_pass.h"
40 #include "eg_sq.h" // V_SQ_CF_INDEX_NONE
41 
42 namespace r600_sb {
43 
run()44 int gcm::run() {
45 
46 	GCM_DUMP( sblog << "==== GCM ==== \n"; sh.dump_ir(); );
47 
48 	collect_instructions(sh.root, true);
49 
50 	init_def_count(uses, pending);
51 
52 	for (node_iterator N, I = pending.begin(), E = pending.end();
53 			I != E; I = N) {
54 		N = I;
55 		++N;
56 		node *o = *I;
57 
58 		GCM_DUMP(
59 			sblog << "pending : ";
60 			dump::dump_op(o);
61 			sblog << "\n";
62 		);
63 
64 		if (td_is_ready(o)) {
65 
66 			GCM_DUMP(
67 				sblog << "  ready: ";
68 				dump::dump_op(o);
69 				sblog << "\n";
70 			);
71 			pending.remove_node(o);
72 			ready.push_back(o);
73 		} else {
74 		}
75 	}
76 
77 	sched_early(sh.root);
78 
79 	if (!pending.empty()) {
80 		sblog << "##### gcm_sched_early_pass: unscheduled ops:\n";
81 		dump::dump_op(pending.front());
82 	}
83 
84 	assert(pending.empty());
85 
86 	GCM_DUMP( sh.dump_ir(); );
87 
88 	GCM_DUMP( sblog << "\n\n ############## gcm late\n\n"; );
89 
90 	collect_instructions(sh.root, false);
91 
92 	init_use_count(uses, pending);
93 
94 	sched_late(sh.root);
95 	if (!pending.empty()) {
96 		sblog << "##### gcm_sched_late_pass: unscheduled ops:\n";
97 		dump::dump_op(pending.front());
98 	}
99 
100 	assert(ucs_level == 0);
101 	assert(pending.empty());
102 
103 	return 0;
104 }
105 
106 
collect_instructions(container_node * c,bool early_pass)107 void gcm::collect_instructions(container_node *c, bool early_pass) {
108 	if (c->is_bb()) {
109 
110 		if (early_pass) {
111 			for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
112 				node *n = *I;
113 				if (n->flags & NF_DONT_MOVE) {
114 					op_info &o = op_map[n];
115 					o.top_bb = o.bottom_bb = static_cast<bb_node*>(c);
116 				}
117 			}
118 		}
119 
120 		pending.append_from(c);
121 		return;
122 	}
123 
124 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
125 		if (I->is_container()) {
126 			collect_instructions(static_cast<container_node*>(*I), early_pass);
127 		}
128 	}
129 }
130 
sched_early(container_node * n)131 void gcm::sched_early(container_node *n) {
132 
133 	region_node *r =
134 			(n->type == NT_REGION) ? static_cast<region_node*>(n) : NULL;
135 
136 	if (r && r->loop_phi) {
137 		sched_early(r->loop_phi);
138 	}
139 
140 	for (node_iterator I = n->begin(), E = n->end(); I != E; ++I) {
141 		if (I->type == NT_OP) {
142 			node *op = *I;
143 			if (op->subtype == NST_PHI) {
144 				td_release_uses(op->dst);
145 			}
146 		} else if (I->is_container()) {
147 			if (I->subtype == NST_BB) {
148 				bb_node* bb = static_cast<bb_node*>(*I);
149 				td_sched_bb(bb);
150 			} else {
151 				sched_early(static_cast<container_node*>(*I));
152 			}
153 		}
154 	}
155 
156 	if (r && r->phi) {
157 		sched_early(r->phi);
158 	}
159 }
160 
td_schedule(bb_node * bb,node * n)161 void gcm::td_schedule(bb_node *bb, node *n) {
162 	GCM_DUMP(
163 		sblog << "scheduling : ";
164 		dump::dump_op(n);
165 		sblog << "\n";
166 	);
167 	td_release_uses(n->dst);
168 
169 	bb->push_back(n);
170 
171 	op_map[n].top_bb = bb;
172 
173 }
174 
td_sched_bb(bb_node * bb)175 void gcm::td_sched_bb(bb_node* bb) {
176 	GCM_DUMP(
177 	sblog << "td scheduling BB_" << bb->id << "\n";
178 	);
179 
180 	while (!ready.empty()) {
181 		for (sq_iterator N, I = ready.begin(), E = ready.end(); I != E;
182 				I = N) {
183 			N = I; ++N;
184 			td_schedule(bb, *I);
185 			ready.erase(I);
186 		}
187 	}
188 }
189 
td_is_ready(node * n)190 bool gcm::td_is_ready(node* n) {
191 	return uses[n] == 0;
192 }
193 
td_release_val(value * v)194 void gcm::td_release_val(value *v) {
195 
196 	GCM_DUMP(
197 		sblog << "td checking uses: ";
198 		dump::dump_val(v);
199 		sblog << "\n";
200 	);
201 
202 	for (uselist::iterator I = v->uses.begin(), E = v->uses.end(); I != E; ++I) {
203 		node *op = *I;
204 		if (op->parent != &pending) {
205 			continue;
206 		}
207 
208 		GCM_DUMP(
209 			sblog << "td    used in ";
210 			dump::dump_op(op);
211 			sblog << "\n";
212 		);
213 
214 		assert(uses[op] > 0);
215 		if (--uses[op] == 0) {
216 			GCM_DUMP(
217 				sblog << "td        released : ";
218 				dump::dump_op(op);
219 				sblog << "\n";
220 			);
221 
222 			pending.remove_node(op);
223 			ready.push_back(op);
224 		}
225 	}
226 
227 }
228 
td_release_uses(vvec & v)229 void gcm::td_release_uses(vvec& v) {
230 	for (vvec::iterator I = v.begin(), E = v.end(); I != E; ++I) {
231 		value *v = *I;
232 		if (!v)
233 			continue;
234 
235 		if (v->is_rel())
236 			td_release_uses(v->mdef);
237 		else
238 			td_release_val(v);
239 	}
240 }
241 
sched_late(container_node * n)242 void gcm::sched_late(container_node *n) {
243 
244 	bool stack_pushed = false;
245 
246 	if (n->is_depart()) {
247 		depart_node *d = static_cast<depart_node*>(n);
248 		push_uc_stack();
249 		stack_pushed = true;
250 		bu_release_phi_defs(d->target->phi, d->dep_id);
251 	} else if (n->is_repeat()) {
252 		repeat_node *r = static_cast<repeat_node*>(n);
253 		assert(r->target->loop_phi);
254 		push_uc_stack();
255 		stack_pushed = true;
256 		bu_release_phi_defs(r->target->loop_phi, r->rep_id);
257 	}
258 
259 	for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
260 		if (I->is_container()) {
261 			if (I->subtype == NST_BB) {
262 				bb_node* bb = static_cast<bb_node*>(*I);
263 				bu_sched_bb(bb);
264 			} else {
265 				sched_late(static_cast<container_node*>(*I));
266 			}
267 		}
268 	}
269 
270 	if (n->type == NT_IF) {
271 		if_node *f = static_cast<if_node*>(n);
272 		if (f->cond)
273 			pending_defs.push_back(f->cond);
274 	} else if (n->type == NT_REGION) {
275 		region_node *r = static_cast<region_node*>(n);
276 		if (r->loop_phi)
277 			bu_release_phi_defs(r->loop_phi, 0);
278 	}
279 
280 	if (stack_pushed)
281 		pop_uc_stack();
282 
283 }
284 
bu_sched_bb(bb_node * bb)285 void gcm::bu_sched_bb(bb_node* bb) {
286 	GCM_DUMP(
287 	sblog << "bu scheduling BB_" << bb->id << "\n";
288 	);
289 
290 	bu_bb = bb;
291 
292 	if (!pending_nodes.empty()) {
293 		GCM_DUMP(
294 				sblog << "pending nodes:\n";
295 		);
296 
297 		// TODO consider sorting the exports by array_base,
298 		// possibly it can improve performance
299 
300 		for (node_list::iterator I = pending_nodes.begin(),
301 				E = pending_nodes.end(); I != E; ++I) {
302 			bu_release_op(*I);
303 		}
304 		pending_nodes.clear();
305 		GCM_DUMP(
306 			sblog << "pending nodes processed...\n";
307 		);
308 	}
309 
310 
311 	if (!pending_defs.empty()) {
312 		for (vvec::iterator I = pending_defs.begin(), E = pending_defs.end();
313 				I != E; ++I) {
314 			bu_release_val(*I);
315 		}
316 		pending_defs.clear();
317 	}
318 
319 	for (sched_queue::iterator N, I = ready_above.begin(), E = ready_above.end();
320 			I != E;	I = N) {
321 		N = I;
322 		++N;
323 		node *n = *I;
324 		if (op_map[n].bottom_bb == bb) {
325 			add_ready(*I);
326 			ready_above.erase(I);
327 		}
328 	}
329 
330 	unsigned cnt_ready[SQ_NUM];
331 
332 	container_node *clause = NULL;
333 	unsigned last_inst_type = ~0;
334 	unsigned last_count = 0;
335 
336 	bool s = true;
337 	while (s) {
338 		node *n;
339 
340 		s = false;
341 
342 		unsigned ready_mask = 0;
343 
344 		for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
345 			if (!bu_ready[sq].empty() || !bu_ready_next[sq].empty())
346 				ready_mask |= (1 << sq);
347 		}
348 
349 		if (!ready_mask) {
350 			for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
351 				if (!bu_ready_early[sq].empty()) {
352 					node *n = bu_ready_early[sq].front();
353 					bu_ready_early[sq].pop_front();
354 					bu_ready[sq].push_back(n);
355 					break;
356 				}
357 			}
358 		}
359 
360 		for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
361 
362 			if (sq == SQ_CF && pending_exec_mask_update) {
363 				pending_exec_mask_update = false;
364 				sq = SQ_ALU;
365 				--sq;
366 				continue;
367 			}
368 
369 			if (sq != SQ_ALU && outstanding_lds_oq)
370 				continue;
371 
372 			if (!bu_ready_next[sq].empty())
373 				bu_ready[sq].splice(bu_ready[sq].end(), bu_ready_next[sq]);
374 
375 			cnt_ready[sq] = bu_ready[sq].size();
376 
377 			if ((sq == SQ_TEX || sq == SQ_VTX) && live_count <= rp_threshold &&
378 					cnt_ready[sq] < ctx.max_fetch/2	&&
379 					!bu_ready_next[SQ_ALU].empty()) {
380 				sq = SQ_ALU;
381 				--sq;
382 				continue;
383 			}
384 
385 			while (!bu_ready[sq].empty()) {
386 
387 				if (last_inst_type != sq) {
388 					clause = NULL;
389 					last_count = 0;
390 					last_inst_type = sq;
391 				}
392 
393 				// simple heuristic to limit register pressure,
394 				if (sq == SQ_ALU && live_count > rp_threshold && !outstanding_lds_oq &&
395 						(!bu_ready[SQ_TEX].empty() ||
396 						 !bu_ready[SQ_VTX].empty() ||
397 						 !bu_ready_next[SQ_TEX].empty() ||
398 						 !bu_ready_next[SQ_VTX].empty())) {
399 					GCM_DUMP( sblog << "switching to fetch (regpressure)\n"; );
400 					break;
401 				}
402 
403 				n = bu_ready[sq].front();
404 
405 				// real count (e.g. SAMPLE_G will be expanded to 3 instructions,
406 				// 2 SET_GRAD_ + 1 SAMPLE_G
407 				unsigned ncnt = 1;
408 				if (n->is_fetch_inst() && n->src.size() == 12) {
409 					ncnt = 3;
410 				}
411 
412 				bool sampler_indexing = false;
413 				if (n->is_fetch_inst() &&
414 					static_cast<fetch_node *>(n)->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE)
415 				{
416 					sampler_indexing = true; // Give sampler indexed ops get their own clause
417 					ncnt = sh.get_ctx().is_cayman() ? 2 : 3; // MOVA + SET_CF_IDX0/1
418 				}
419 
420 				if ((sq == SQ_TEX || sq == SQ_VTX) &&
421 						((last_count >= ctx.max_fetch/2 &&
422 						check_alu_ready_count(24)) ||
423 								last_count + ncnt > ctx.max_fetch))
424 					break;
425 				else if (sq == SQ_CF && last_count > 4 &&
426 						check_alu_ready_count(24))
427 					break;
428 
429 
430 				if (sq == SQ_ALU && n->consumes_lds_oq() &&
431 				    (bu_ready[SQ_TEX].size() || bu_ready[SQ_VTX].size() || bu_ready[SQ_GDS].size())) {
432 					GCM_DUMP( sblog << "switching scheduling due to lds op\n"; );
433 					break;
434 				}
435 				bu_ready[sq].pop_front();
436 
437 				if (sq != SQ_CF) {
438 					if (!clause || sampler_indexing) {
439 						node_subtype nst;
440 						switch (sq) {
441 						case SQ_ALU:
442 							nst = NST_ALU_CLAUSE;
443 							break;
444 						case SQ_TEX:
445 							nst = NST_TEX_CLAUSE;
446 							break;
447 						case SQ_GDS:
448 							nst = NST_GDS_CLAUSE;
449 							break;
450 						default:
451 							nst = NST_VTX_CLAUSE;
452 							break;
453 						}
454 						clause = sh.create_clause(nst);
455 						bb->push_front(clause);
456 					}
457 				} else {
458 					clause = bb;
459 				}
460 
461 				bu_schedule(clause, n);
462 				s = true;
463 				last_count += ncnt;
464 			}
465 		}
466 	}
467 
468 	bu_bb = NULL;
469 
470 	GCM_DUMP(
471 		sblog << "bu finished scheduling BB_" << bb->id << "\n";
472 	);
473 }
474 
bu_release_defs(vvec & v,bool src)475 void gcm::bu_release_defs(vvec& v, bool src) {
476 	for (vvec::reverse_iterator I = v.rbegin(), E = v.rend(); I != E; ++I) {
477 		value *v = *I;
478 		if (!v || v->is_readonly())
479 			continue;
480 
481 		if (v->is_rel()) {
482 			if (!v->rel->is_readonly())
483 				bu_release_val(v->rel);
484 			bu_release_defs(v->muse, true);
485 		} else if (src)
486 			bu_release_val(v);
487 		else {
488 			if (live.remove_val(v)) {
489 				--live_count;
490 			}
491 		}
492 	}
493 }
494 
push_uc_stack()495 void gcm::push_uc_stack() {
496 	GCM_DUMP(
497 		sblog << "pushing use count stack prev_level " << ucs_level
498 			<< "   new level " << (ucs_level + 1) << "\n";
499 	);
500 	++ucs_level;
501 	if (ucs_level == nuc_stk.size()) {
502 		nuc_stk.resize(ucs_level + 1);
503 	}
504 	else {
505 		nuc_stk[ucs_level].clear();
506 	}
507 }
508 
bu_is_ready(node * n)509 bool gcm::bu_is_ready(node* n) {
510 	nuc_map &cm = nuc_stk[ucs_level];
511 	nuc_map::iterator F = cm.find(n);
512 	unsigned uc = (F == cm.end() ? 0 : F->second);
513 	return uc == uses[n];
514 }
515 
bu_schedule(container_node * c,node * n)516 void gcm::bu_schedule(container_node* c, node* n) {
517 	GCM_DUMP(
518 		sblog << "bu scheduling : ";
519 		dump::dump_op(n);
520 		sblog << "\n";
521 	);
522 
523 	assert(op_map[n].bottom_bb == bu_bb);
524 
525 	if (n->produces_lds_oq())
526 		outstanding_lds_oq--;
527 	if (n->consumes_lds_oq())
528 		outstanding_lds_oq++;
529 	bu_release_defs(n->src, true);
530 	bu_release_defs(n->dst, false);
531 
532 	c->push_front(n);
533 }
534 
dump_uc_stack()535 void gcm::dump_uc_stack() {
536 	sblog << "##### uc_stk start ####\n";
537 	for (unsigned l = 0; l <= ucs_level; ++l) {
538 		nuc_map &m = nuc_stk[l];
539 
540 		sblog << "nuc_stk[" << l << "] :   @" << &m << "\n";
541 
542 		for (nuc_map::iterator I = m.begin(), E = m.end(); I != E; ++I) {
543 			sblog << "    uc " << I->second << " for ";
544 			dump::dump_op(I->first);
545 			sblog << "\n";
546 		}
547 	}
548 	sblog << "##### uc_stk end ####\n";
549 }
550 
pop_uc_stack()551 void gcm::pop_uc_stack() {
552 	nuc_map &pm = nuc_stk[ucs_level];
553 	--ucs_level;
554 	nuc_map &cm = nuc_stk[ucs_level];
555 
556 	GCM_DUMP(
557 		sblog << "merging use stack from level " << (ucs_level+1)
558 			<< " to " << ucs_level << "\n";
559 	);
560 
561 	for (nuc_map::iterator N, I = pm.begin(), E = pm.end(); I != E; ++I) {
562 		node *n = I->first;
563 
564 		GCM_DUMP(
565 			sblog << "      " << cm[n] << " += " << I->second << "  for ";
566 			dump::dump_op(n);
567 			sblog << "\n";
568 		);
569 
570 		unsigned uc = cm[n] += I->second;
571 
572 		if (n->parent == &pending && uc == uses[n]) {
573 			cm.erase(n);
574 			pending_nodes.push_back(n);
575 			GCM_DUMP(
576 				sblog << "pushed pending_node due to stack pop ";
577 				dump::dump_op(n);
578 				sblog << "\n";
579 			);
580 		}
581 	}
582 }
583 
bu_find_best_bb(node * n,op_info & oi)584 void gcm::bu_find_best_bb(node *n, op_info &oi) {
585 
586 	GCM_DUMP(
587 		sblog << "  find best bb : ";
588 		dump::dump_op(n);
589 		sblog << "\n";
590 	);
591 
592 	if (oi.bottom_bb)
593 		return;
594 
595 	// don't hoist generated copies
596 	if (n->flags & NF_DONT_HOIST) {
597 		oi.bottom_bb = bu_bb;
598 		return;
599 	}
600 
601 	bb_node* best_bb = bu_bb;
602 	bb_node* top_bb = oi.top_bb;
603 	assert(oi.top_bb && !oi.bottom_bb);
604 
605 	node *c = best_bb;
606 
607 	// FIXME top_bb may be located inside the loop so we'll never enter it
608 	// in the loop below, and the instruction will be incorrectly placed at the
609 	// beginning of the shader.
610 	// For now just check if top_bb's loop_level is higher than of
611 	// current bb and abort the search for better bb in such case,
612 	// but this problem may require more complete (and more expensive) fix
613 	if (top_bb->loop_level <= best_bb->loop_level) {
614 		while (c && c != top_bb) {
615 
616 			if (c->prev) {
617 				c = c->prev;
618 			} else {
619 				c = c->parent;
620 				if (!c)
621 					break;
622 				continue;
623 			}
624 
625 			if (c->subtype == NST_BB) {
626 				bb_node *bb = static_cast<bb_node*>(c);
627 				if (bb->loop_level < best_bb->loop_level)
628 					best_bb = bb;
629 			}
630 		}
631 	}
632 
633 	oi.bottom_bb = best_bb;
634 }
635 
add_ready(node * n)636 void gcm::add_ready(node *n) {
637 	sched_queue_id sq = sh.get_queue_id(n);
638 	if (n->flags & NF_SCHEDULE_EARLY)
639 		bu_ready_early[sq].push_back(n);
640 	else if (sq == SQ_ALU && n->is_copy_mov())
641 		bu_ready[sq].push_front(n);
642 	else if (n->is_alu_inst()) {
643 		alu_node *a = static_cast<alu_node*>(n);
644 		if (a->bc.op_ptr->flags & AF_PRED && a->dst[2]) {
645 			// PRED_SET instruction that updates exec mask
646 			pending_exec_mask_update = true;
647 		}
648 		bu_ready_next[sq].push_back(n);
649 	} else
650 		bu_ready_next[sq].push_back(n);
651 }
652 
bu_release_op(node * n)653 void gcm::bu_release_op(node * n) {
654 	op_info &oi = op_map[n];
655 
656 	GCM_DUMP(
657 	sblog << "  bu release op  ";
658 	dump::dump_op(n);
659 	);
660 
661 	nuc_stk[ucs_level].erase(n);
662 	pending.remove_node(n);
663 
664 	bu_find_best_bb(n, oi);
665 
666 	if (oi.bottom_bb == bu_bb) {
667 		GCM_DUMP( sblog << "   ready\n";);
668 		add_ready(n);
669 	} else {
670 		GCM_DUMP( sblog << "   ready_above\n";);
671 		ready_above.push_back(n);
672 	}
673 }
674 
bu_release_phi_defs(container_node * p,unsigned op)675 void gcm::bu_release_phi_defs(container_node* p, unsigned op)
676 {
677 	for (node_riterator I = p->rbegin(), E = p->rend(); I != E; ++I) {
678 		node *o = *I;
679 		value *v = o->src[op];
680 		if (v && !v->is_readonly())
681 			pending_defs.push_back(o->src[op]);
682 
683 	}
684 }
685 
get_uc_vec(vvec & vv)686 unsigned gcm::get_uc_vec(vvec &vv) {
687 	unsigned c = 0;
688 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
689 		value *v = *I;
690 		if (!v)
691 			continue;
692 
693 		if (v->is_rel())
694 			c += get_uc_vec(v->mdef);
695 		else
696 			c += v->use_count();
697 	}
698 	return c;
699 }
700 
init_use_count(nuc_map & m,container_node & s)701 void gcm::init_use_count(nuc_map& m, container_node &s) {
702 	m.clear();
703 	for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
704 		node *n = *I;
705 		unsigned uc = get_uc_vec(n->dst);
706 		GCM_DUMP(
707 			sblog << "uc " << uc << "  ";
708 			dump::dump_op(n);
709 			sblog << "\n";
710 		);
711 		if (!uc) {
712 			pending_nodes.push_back(n);
713 			GCM_DUMP(
714 				sblog << "pushed pending_node in init ";
715 				dump::dump_op(n);
716 				sblog << "\n";
717 			);
718 
719 		} else
720 			m[n] = uc;
721 	}
722 }
723 
bu_release_val(value * v)724 void gcm::bu_release_val(value* v) {
725 	node *n = v->any_def();
726 
727 	if (n && n->parent == &pending) {
728 		nuc_map &m = nuc_stk[ucs_level];
729 		unsigned uc = ++m[n];
730 		unsigned uc2 = uses[n];
731 
732 		if (live.add_val(v)) {
733 			++live_count;
734 			GCM_DUMP ( sblog << "live_count: " << live_count << "\n"; );
735 		}
736 
737 		GCM_DUMP(
738 			sblog << "release val ";
739 			dump::dump_val(v);
740 			sblog << "  for node ";
741 			dump::dump_op(n);
742 			sblog << "    new uc=" << uc << ", total " << uc2 << "\n";
743 		);
744 
745 		if (uc == uc2)
746 			bu_release_op(n);
747 	}
748 
749 }
750 
init_def_count(nuc_map & m,container_node & s)751 void gcm::init_def_count(nuc_map& m, container_node& s) {
752 	m.clear();
753 	for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
754 		node *n = *I;
755 		unsigned dc = get_dc_vec(n->src, true) + get_dc_vec(n->dst, false);
756 		m[n] = dc;
757 
758 		GCM_DUMP(
759 			sblog << "dc " << dc << "  ";
760 			dump::dump_op(n);
761 			sblog << "\n";
762 		);
763 	}
764 }
765 
get_dc_vec(vvec & vv,bool src)766 unsigned gcm::get_dc_vec(vvec& vv, bool src) {
767 	unsigned c = 0;
768 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
769 		value *v = *I;
770 		if (!v || v->is_readonly())
771 			continue;
772 
773 		if (v->is_rel()) {
774 			c += v->rel->def != NULL;
775 			c += get_dc_vec(v->muse, true);
776 		}
777 		else if (src) {
778 			c += v->def != NULL;
779 			c += v->adef != NULL;
780 		}
781 	}
782 	return c;
783 }
784 
real_alu_count(sched_queue & q,unsigned max)785 unsigned gcm::real_alu_count(sched_queue& q, unsigned max) {
786 	sq_iterator I(q.begin()), E(q.end());
787 	unsigned c = 0;
788 
789 	while (I != E && c < max) {
790 		node *n = *I;
791 		if (n->is_alu_inst()) {
792 			if (!n->is_copy_mov() || !n->src[0]->is_any_gpr())
793 				++c;
794 		} else if (n->is_alu_packed()) {
795 			c += static_cast<container_node*>(n)->count();
796 		}
797 		++I;
798 	}
799 
800 	return c;
801 }
802 
check_alu_ready_count(unsigned threshold)803 bool gcm::check_alu_ready_count(unsigned threshold) {
804 	unsigned r = real_alu_count(bu_ready[SQ_ALU], threshold);
805 	if (r >= threshold)
806 		return true;
807 	r += real_alu_count(bu_ready_next[SQ_ALU], threshold - r);
808 	return r >= threshold;
809 }
810 
811 } // namespace r600_sb
812