1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define RA_DEBUG 0
28 
29 #if RA_DEBUG
30 #define RA_DUMP(q) do { q } while (0)
31 #else
32 #define RA_DUMP(q)
33 #endif
34 
35 #include "sb_shader.h"
36 #include "sb_pass.h"
37 
38 namespace r600_sb {
39 
run()40 int ra_coalesce::run() {
41 	return sh.coal.run();
42 }
43 
add_edge(value * a,value * b,unsigned cost)44 void coalescer::add_edge(value* a, value* b, unsigned cost) {
45 	assert(a->is_sgpr() && b->is_sgpr());
46 	edges.insert(new ra_edge(a,b, cost));
47 }
48 
create_chunk(value * v)49 void coalescer::create_chunk(value *v) {
50 
51 	assert(v->is_sgpr());
52 
53 	ra_chunk *c = new ra_chunk();
54 
55 	c->values.push_back(v);
56 
57 	if (v->is_chan_pinned())
58 		c->flags |= RCF_PIN_CHAN;
59 	if (v->is_reg_pinned()) {
60 		c->flags |= RCF_PIN_REG;
61 	}
62 
63 	c->pin = v->pin_gpr;
64 
65 	RA_DUMP(
66 		sblog << "create_chunk: ";
67 		dump_chunk(c);
68 	);
69 
70 	all_chunks.push_back(c);
71 	v->chunk = c;
72 
73 }
74 
unify_chunks(ra_edge * e)75 void coalescer::unify_chunks(ra_edge *e) {
76 	ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
77 
78 	RA_DUMP(
79 		sblog << "unify_chunks: ";
80 		dump_chunk(c1);
81 		dump_chunk(c2);
82 	);
83 
84 	if (c2->is_chan_pinned() && !c1->is_chan_pinned()) {
85 		c1->flags |= RCF_PIN_CHAN;
86 		c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan());
87 	}
88 
89 	if (c2->is_reg_pinned() && !c1->is_reg_pinned()) {
90 		c1->flags |= RCF_PIN_REG;
91 		c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan());
92 	}
93 
94 	c1->values.reserve(c1->values.size() + c2->values.size());
95 
96 	for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
97 			++I) {
98 		(*I)->chunk = c1;
99 		c1->values.push_back(*I);
100 	}
101 
102 	chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2);
103 	assert(F != all_chunks.end());
104 
105 	all_chunks.erase(F);
106 
107 	c1->cost += c2->cost + e->cost;
108 	delete c2;
109 }
110 
chunks_interference(ra_chunk * c1,ra_chunk * c2)111 bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) {
112 	unsigned pin_flags = (c1->flags & c2->flags) &
113 			(RCF_PIN_CHAN | RCF_PIN_REG);
114 
115 	if ((pin_flags & RCF_PIN_CHAN) &&
116 			c1->pin.chan() != c2->pin.chan())
117 		return true;
118 
119 	if ((pin_flags & RCF_PIN_REG) &&
120 			c1->pin.sel() != c2->pin.sel())
121 		return true;
122 
123 	for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E;
124 			++I) {
125 		value *v1 = *I;
126 
127 		for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
128 				++I) {
129 			value *v2 = *I;
130 
131 			if (!v1->v_equal(v2) && v1->interferences.contains(v2))
132 				return true;
133 		}
134 	}
135 	return false;
136 }
137 
build_chunks()138 void coalescer::build_chunks() {
139 
140 	for (edge_queue::iterator I = edges.begin(), E = edges.end();
141 			I != E; ++I) {
142 
143 		ra_edge *e = *I;
144 
145 		if (!e->a->chunk)
146 			create_chunk(e->a);
147 
148 		if (!e->b->chunk)
149 			create_chunk(e->b);
150 
151 		ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
152 
153 		if (c1 == c2) {
154 			c1->cost += e->cost;
155 		} else if (!chunks_interference(c1, c2))
156 			unify_chunks(e);
157 	}
158 }
159 
create_constraint(constraint_kind kind)160 ra_constraint* coalescer::create_constraint(constraint_kind kind) {
161 	ra_constraint *c = new ra_constraint(kind);
162 	all_constraints.push_back(c);
163 	return c;
164 }
165 
dump_edges()166 void coalescer::dump_edges() {
167 	sblog << "######## affinity edges\n";
168 
169 	for (edge_queue::iterator I = edges.begin(), E = edges.end();
170 			I != E; ++I) {
171 		ra_edge* e = *I;
172 		sblog << "  ra_edge ";
173 		dump::dump_val(e->a);
174 		sblog << " <-> ";
175 		dump::dump_val(e->b);
176 		sblog << "   cost = " << e->cost << "\n";
177 	}
178 }
179 
dump_chunks()180 void coalescer::dump_chunks() {
181 	sblog << "######## chunks\n";
182 
183 	for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end();
184 			I != E; ++I) {
185 		ra_chunk* c = *I;
186 		dump_chunk(c);
187 	}
188 }
189 
190 
dump_constraint_queue()191 void coalescer::dump_constraint_queue() {
192 	sblog << "######## constraints\n";
193 
194 	for (constraint_queue::iterator I = constraints.begin(),
195 			E = constraints.end(); I != E; ++I) {
196 		ra_constraint* c = *I;
197 		dump_constraint(c);
198 	}
199 }
200 
dump_chunk(ra_chunk * c)201 void coalescer::dump_chunk(ra_chunk* c) {
202 	sblog << "  ra_chunk cost = " << c->cost << "  :  ";
203 	dump::dump_vec(c->values);
204 
205 	if (c->flags & RCF_PIN_REG)
206 		sblog << "   REG = " << c->pin.sel();
207 
208 	if (c->flags & RCF_PIN_CHAN)
209 		sblog << "   CHAN = " << c->pin.chan();
210 
211 	sblog << (c->flags & RCF_GLOBAL ? "  GLOBAL" : "");
212 
213 	sblog << "\n";
214 }
215 
dump_constraint(ra_constraint * c)216 void coalescer::dump_constraint(ra_constraint* c) {
217 	sblog << "  ra_constraint: ";
218 	switch (c->kind) {
219 		case CK_PACKED_BS: sblog << "PACKED_BS"; break;
220 		case CK_PHI: sblog << "PHI"; break;
221 		case CK_SAME_REG: sblog << "SAME_REG"; break;
222 		default: sblog << "UNKNOWN_KIND"; assert(0); break;
223 	}
224 
225 	sblog << "  cost = " << c->cost << "  : ";
226 	dump::dump_vec(c->values);
227 
228 	sblog << "\n";
229 }
230 
get_chunk_interferences(ra_chunk * c,val_set & s)231 void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) {
232 
233 	for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E;
234 			++I) {
235 		value *v = *I;
236 		s.add_set(v->interferences);
237 	}
238 	s.remove_vec(c->values);
239 }
240 
build_chunk_queue()241 void coalescer::build_chunk_queue() {
242 	for (chunk_vec::iterator I = all_chunks.begin(),
243 			E = all_chunks.end(); I != E; ++I) {
244 		ra_chunk *c = *I;
245 
246 		if (!c->is_fixed())
247 			chunks.insert(c);
248 	}
249 }
250 
build_constraint_queue()251 void coalescer::build_constraint_queue() {
252 	for (constraint_vec::iterator I = all_constraints.begin(),
253 			E = all_constraints.end(); I != E; ++I) {
254 		ra_constraint *c = *I;
255 		unsigned cost = 0;
256 
257 		if (c->values.empty() || !c->values.front()->is_sgpr())
258 			continue;
259 
260 		if (c->kind != CK_SAME_REG)
261 			continue;
262 
263 		for (vvec::iterator I = c->values.begin(), E = c->values.end();
264 				I != E; ++I) {
265 			value *v = *I;
266 			if (!v->chunk)
267 				create_chunk(v);
268 			else
269 				cost += v->chunk->cost;
270 		}
271 		c->cost = cost;
272 		constraints.insert(c);
273 	}
274 }
275 
color_chunks()276 void coalescer::color_chunks() {
277 
278 	for (chunk_queue::iterator I = chunks.begin(), E = chunks.end();
279 			I != E; ++I) {
280 		ra_chunk *c = *I;
281 		if (c->is_fixed() || c->values.size() == 1)
282 			continue;
283 
284 		sb_bitset rb;
285 		val_set interf;
286 
287 		get_chunk_interferences(c, interf);
288 
289 		RA_DUMP(
290 			sblog << "color_chunks: ";
291 			dump_chunk(c);
292 			sblog << "\n interferences: ";
293 			dump::dump_set(sh,interf);
294 			sblog << "\n";
295 		);
296 
297 		init_reg_bitset(rb, interf);
298 
299 		unsigned pass = c->is_reg_pinned() ? 0 : 1;
300 
301 		unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0;
302 		unsigned ce = c->is_chan_pinned() ? cs + 1 : 4;
303 
304 		unsigned color = 0;
305 
306 		while (pass < 2) {
307 
308 			unsigned rs, re;
309 
310 			if (pass == 0) {
311 				rs = c->pin.sel();
312 				re = rs + 1;
313 			} else {
314 				rs = 0;
315 				re = sh.num_nontemp_gpr();
316 			}
317 
318 			for (unsigned reg = rs; reg < re; ++reg) {
319 				for (unsigned chan = cs; chan < ce; ++chan) {
320 					unsigned bit = sel_chan(reg, chan);
321 					if (bit >= rb.size() || !rb.get(bit)) {
322 						color = bit;
323 						break;
324 					}
325 				}
326 				if (color)
327 					break;
328 			}
329 
330 			if (color)
331 				break;
332 
333 			++pass;
334 		}
335 
336 		assert(color);
337 		color_chunk(c, color);
338 	}
339 }
340 
init_reg_bitset(sb_bitset & bs,val_set & vs)341 void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) {
342 
343 	for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) {
344 		value *v = *I;
345 
346 		if (!v->is_any_gpr())
347 			continue;
348 
349 		unsigned gpr = v->get_final_gpr();
350 		if (!gpr)
351 			continue;
352 
353 		if (gpr) {
354 			if (gpr >= bs.size())
355 				bs.resize(gpr + 64);
356 			bs.set(gpr, 1);
357 		}
358 	}
359 }
360 
color_chunk(ra_chunk * c,sel_chan color)361 void coalescer::color_chunk(ra_chunk *c, sel_chan color) {
362 
363 	vvec vv = c->values;
364 
365 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E;
366 			++I) {
367 		value *v = *I;
368 
369 		if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) {
370 			detach_value(v);
371 			continue;
372 		}
373 
374 		if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) {
375 			detach_value(v);
376 			continue;
377 		}
378 
379 		v->gpr = color;
380 
381 		if (v->constraint && v->constraint->kind == CK_PHI)
382 			v->fix();
383 
384 
385 		RA_DUMP(
386 			sblog << " assigned " << color << " to ";
387 			dump::dump_val(v);
388 			sblog << "\n";
389 		);
390 	}
391 
392 	c->pin = color;
393 
394 	if (c->is_reg_pinned()) {
395 		c->fix();
396 	}
397 }
398 
~coalescer()399 coalescer::~coalescer() {
400 
401 	// FIXME use pool allocator ??
402 
403 	for (constraint_vec::iterator I = all_constraints.begin(),
404 			E = all_constraints.end(); I != E; ++I) {
405 		delete (*I);
406 	}
407 
408 	for (chunk_vec::iterator I = all_chunks.begin(),
409 			E = all_chunks.end(); I != E; ++I) {
410 		delete (*I);
411 	}
412 
413 	for (edge_queue::iterator I = edges.begin(), E = edges.end();
414 			I != E; ++I) {
415 		delete (*I);
416 	}
417 }
418 
run()419 int coalescer::run() {
420 	int r;
421 
422 	RA_DUMP( dump_edges(); );
423 
424 	build_chunks();
425 	RA_DUMP( dump_chunks(); );
426 
427 	build_constraint_queue();
428 	RA_DUMP( dump_constraint_queue(); );
429 
430 	if ((r = color_constraints()))
431 		return r;
432 
433 	build_chunk_queue();
434 	color_chunks();
435 
436 	return 0;
437 }
438 
color_phi_constraint(ra_constraint * c)439 void coalescer::color_phi_constraint(ra_constraint* c) {
440 }
441 
detach_value(value * v)442 ra_chunk* coalescer::detach_value(value *v) {
443 
444 	vvec::iterator F = std::find(v->chunk->values.begin(),
445 	                             v->chunk->values.end(), v);
446 
447 	assert(F != v->chunk->values.end());
448 	v->chunk->values.erase(F);
449 	create_chunk(v);
450 
451 	if (v->is_reg_pinned()) {
452 		v->chunk->fix();
453 	}
454 
455 	RA_DUMP(
456 		sblog << "           detached : ";
457 		dump_chunk(v->chunk);
458 	);
459 
460 	return v->chunk;
461 
462 }
463 
color_reg_constraint(ra_constraint * c)464 int coalescer::color_reg_constraint(ra_constraint *c) {
465 	unsigned k, cnt = c->values.size();
466 	vvec & cv = c->values;
467 
468 	ra_chunk *ch[4];
469 	unsigned swz[4] = {0, 1, 2, 3};
470 	val_set interf[4];
471 	sb_bitset rb[4];
472 
473 	bool reg_pinned = false;
474 	unsigned pin_reg = ~0;
475 
476 	unsigned chan_mask = 0;
477 
478 	k = 0;
479 	for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) {
480 		value *v = *I;
481 
482 		if (!v->chunk)
483 			create_chunk(v);
484 
485 		ch[k] = v->chunk;
486 
487 		if (v->chunk->is_chan_pinned()) {
488 			unsigned chan = 1 << v->chunk->pin.chan();
489 
490 			if (chan & chan_mask) { // channel already in use
491 				ch[k] = detach_value(v);
492 				assert(!ch[k]->is_chan_pinned());
493 			} else {
494 				chan_mask |= chan;
495 			}
496 		}
497 
498 		if (v->chunk->is_reg_pinned()) {
499 			if (!reg_pinned) {
500 				reg_pinned = true;
501 				pin_reg = v->chunk->pin.sel();
502 			}
503 		}
504 
505 		get_chunk_interferences(ch[k], interf[k]);
506 		init_reg_bitset(rb[k], interf[k]);
507 	}
508 
509 	unsigned start_reg, end_reg;
510 
511 	start_reg = 0;
512 	end_reg = sh.num_nontemp_gpr();
513 
514 	unsigned min_reg = end_reg;
515 	unsigned min_swz[4];
516 	unsigned i, pass = reg_pinned ? 0 : 1;
517 
518 	bool done = false;
519 
520 	while (pass < 2) {
521 
522 		unsigned rs, re;
523 
524 		if (pass == 0) {
525 			re = pin_reg + 1;
526 			rs = pin_reg;
527 		} else {
528 			re = end_reg;
529 			rs = start_reg;
530 		}
531 
532 		min_reg = re;
533 
534 		// cycle on swizzle combinations
535 		do {
536 			for (i = 0; i < cnt; ++i) {
537 				if (ch[i]->flags & RCF_PIN_CHAN)
538 					if (ch[i]->pin.chan() != swz[i])
539 						break;
540 			}
541 			if (i != cnt)
542 				continue;
543 
544 			// looking for minimal reg number such that the constrained chunks
545 			// may be colored with the current swizzle combination
546 			for (unsigned reg = rs; reg < min_reg; ++reg) {
547 				for (i = 0; i < cnt; ++i) {
548 					unsigned bit = sel_chan(reg, swz[i]);
549 					if (bit < rb[i].size() && rb[i].get(bit))
550 						break;
551 				}
552 				if (i == cnt) {
553 					done = true;
554 					min_reg = reg;
555 					std::copy(swz, swz + 4, min_swz);
556 					break;
557 				}
558 			}
559 
560 			if (pass == 0 && done)
561 				break;
562 
563 		} while (std::next_permutation(swz, swz + 4));
564 
565 		if (!done && pass) {
566 			sblog << "sb: ra_coalesce - out of registers\n";
567 			return -1;
568 		}
569 
570 		if (pass == 0 && done)
571 			break;
572 
573 		++pass;
574 	};
575 
576 	assert(done);
577 
578 	RA_DUMP(
579 	sblog << "min reg = " << min_reg << "   min_swz = "
580 			<< min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n";
581 	);
582 
583 	for (i = 0; i < cnt; ++i) {
584 		sel_chan color(min_reg, min_swz[i]);
585 		ra_chunk *cc = ch[i];
586 
587 		if (cc->is_fixed()) {
588 			if (cc->pin != color)
589 				cc = detach_value(cv[i]);
590 			else
591 				continue;
592 		}
593 
594 		color_chunk(cc, color);
595 		cc->fix();
596 		cc->set_prealloc();
597 	}
598 
599 	return 0;
600 }
601 
color_constraints()602 int coalescer::color_constraints() {
603 	int r;
604 
605 	for (constraint_queue::iterator I = constraints.begin(),
606 			E = constraints.end(); I != E; ++I) {
607 
608 		ra_constraint *c = *I;
609 
610 		RA_DUMP(
611 			sblog << "color_constraints: ";
612 			dump_constraint(c);
613 		);
614 
615 		if (c->kind == CK_SAME_REG) {
616 			if ((r = color_reg_constraint(c)))
617 				return r;
618 		} else if (c->kind == CK_PHI)
619 			color_phi_constraint(c);
620 	}
621 	return 0;
622 }
623 
624 } // namespace r600_sb
625