1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define VT_DEBUG 0
28 
29 #if VT_DEBUG
30 #define VT_DUMP(q) do { q } while (0)
31 #else
32 #define VT_DUMP(q)
33 #endif
34 
35 #include <cstring>
36 
37 #include "sb_shader.h"
38 #include "sb_pass.h"
39 
40 namespace r600_sb {
41 
42 static const char * chans = "xyzw01?_";
43 
operator <<(sb_ostream & o,value & v)44 sb_ostream& operator << (sb_ostream &o, value &v) {
45 
46 	bool dead = v.flags & VLF_DEAD;
47 
48 	if (dead)
49 		o << "{";
50 
51 	switch (v.kind) {
52 	case VLK_SPECIAL_REG: {
53 		switch (v.select.sel()) {
54 			case SV_AR_INDEX: o << "AR"; break;
55 			case SV_ALU_PRED: o << "PR"; break;
56 			case SV_EXEC_MASK: o << "EM"; break;
57 			case SV_VALID_MASK: o << "VM"; break;
58 			case SV_GEOMETRY_EMIT: o << "GEOMETRY_EMIT"; break;
59 			case SV_LDS_RW: o << "LDS_RW"; break;
60 			case SV_LDS_OQA: o << "LDS_OQA"; break;
61 			case SV_LDS_OQB: o << "LDS_OQB"; break;
62 			case SV_SCRATCH: o << "SCRATCH"; break;
63 			default: o << "???specialreg"; break;
64 		}
65 		break;
66 	}
67 
68 	case VLK_REG:
69 		o << "R" << v.select.sel() << "."
70 			<< chans[v.select.chan()];
71 
72 		break;
73 	case VLK_KCACHE: {
74 		o << "C" << v.select.sel() << "." << chans[v.select.chan()];
75 	}
76 		break;
77 	case VLK_CONST:
78 		o << v.literal_value.f << "|";
79 		o.print_zw_hex(v.literal_value.u, 8);
80 		break;
81 	case VLK_PARAM:
82 		o << "Param" << (v.select.sel() - ALU_SRC_PARAM_OFFSET)
83 			<< chans[v.select.chan()];
84 		break;
85 	case VLK_TEMP:
86 		o << "t" << v.select.sel() - shader::temp_regid_offset;
87 		break;
88 	case VLK_REL_REG:
89 
90 		o << "A" << v.select;
91 		o << "[";
92 		o << *v.rel;
93 		o << "]";
94 
95 		o << "_" << v.uid;
96 
97 		break;
98 	case VLK_UNDEF:
99 		o << "undef";
100 		break;
101 	default:
102 		o << v.kind << "?????";
103 		break;
104 	}
105 
106 	if (v.version)
107 		o << "." << v.version;
108 
109 	if (dead)
110 		o << "}";
111 
112 	if (v.is_global())
113 		o << "||";
114 	if (v.is_fixed())
115 		o << "F";
116 	if (v.is_prealloc())
117 		o << "P";
118 
119 	sel_chan g;
120 
121 	if (v.is_rel()) {
122 		g = v.array->gpr;
123 	} else {
124 		g = v.gpr;
125 	}
126 
127 	if (g) {
128 		o << "@R" << g.sel() << "." << chans[g.chan()];
129 	}
130 
131 	return o;
132 }
133 
add_value(value * v)134 void value_table::add_value(value* v) {
135 
136 	if (v->gvn_source) {
137 		return;
138 	}
139 
140 	VT_DUMP(
141 		sblog << "gvn add_value ";
142 		dump::dump_val(v);
143 	);
144 
145 	value_hash hash = v->hash();
146 	vt_item & vti = hashtable[hash & size_mask];
147 	vti.push_back(v);
148 	++cnt;
149 
150 	if (v->def && ex.try_fold(v)) {
151 		VT_DUMP(
152 			sblog << " folded: ";
153 			dump::dump_val(v->gvn_source);
154 			sblog << "\n";
155 		);
156 		return;
157 	}
158 
159 	int n = 0;
160 	for (vt_item::iterator I = vti.begin(), E = vti.end(); I != E; ++I, ++n) {
161 		value *c = *I;
162 
163 		if (c == v)
164 			break;
165 
166 		if (expr_equal(c, v)) {
167 			v->gvn_source = c->gvn_source;
168 
169 			VT_DUMP(
170 				sblog << " found : equal to ";
171 				dump::dump_val(v->gvn_source);
172 				sblog << "\n";
173 			);
174 			return;
175 		}
176 	}
177 
178 	v->gvn_source = v;
179 	VT_DUMP(
180 		sblog << " added new\n";
181 	);
182 }
183 
hash()184 value_hash value::hash() {
185 	if (ghash)
186 		return ghash;
187 	if (is_rel())
188 		ghash = rel_hash();
189 	else if (def)
190 		ghash = def->hash();
191 	else
192 		ghash = ((uintptr_t)this) | 1;
193 
194 	return ghash;
195 }
196 
rel_hash()197 value_hash value::rel_hash() {
198 	value_hash h = rel ? rel->hash() : 0;
199 	h |= select << 10;
200 	h |= array->hash();
201 	return h;
202 }
203 
expr_equal(value * l,value * r)204 bool value_table::expr_equal(value* l, value* r) {
205 	return ex.equal(l, r);
206 }
207 
get_values(vvec & v)208 void value_table::get_values(vvec& v) {
209 	v.resize(cnt);
210 
211 	vvec::iterator T = v.begin();
212 
213 	for(vt_table::iterator I = hashtable.begin(), E = hashtable.end();
214 			I != E; ++I) {
215 		T = std::copy(I->begin(), I->end(), T);
216 	}
217 }
218 
add_use(node * n)219 void value::add_use(node* n) {
220 	if (0) {
221 	sblog << "add_use ";
222 	dump::dump_val(this);
223 	sblog << "   =>  ";
224 	dump::dump_op(n);
225 	}
226 	uses.push_back(n);
227 }
228 
229 struct use_node_comp {
use_node_compr600_sb::use_node_comp230 	explicit use_node_comp(const node *n) : n(n) {}
operator ()r600_sb::use_node_comp231 	bool operator() (const node *o) {
232 		return o->hash() == n->hash();
233 	}
234 
235 	private:
236 		const node *n;
237 };
238 
remove_use(const node * n)239 void value::remove_use(const node *n) {
240 	uselist::iterator it =
241 		std::find_if(uses.begin(), uses.end(), use_node_comp(n));
242 
243 	if (it != uses.end())
244 	{
245 		// We only ever had a pointer, so don't delete it here
246 		uses.erase(it);
247 	}
248 }
249 
use_count()250 unsigned value::use_count() {
251 	return uses.size();
252 }
253 
is_global()254 bool value::is_global() {
255 	if (chunk)
256 		return chunk->is_global();
257 	return flags & VLF_GLOBAL;
258 }
259 
set_global()260 void value::set_global() {
261 	assert(is_sgpr());
262 	flags |= VLF_GLOBAL;
263 	if (chunk)
264 		chunk->set_global();
265 }
266 
set_prealloc()267 void value::set_prealloc() {
268 	assert(is_sgpr());
269 	flags |= VLF_PREALLOC;
270 	if (chunk)
271 		chunk->set_prealloc();
272 }
273 
is_fixed()274 bool value::is_fixed() {
275 	if (array && array->gpr)
276 		return true;
277 	if (chunk && chunk->is_fixed())
278 		return true;
279 	return flags & VLF_FIXED;
280 }
281 
fix()282 void value::fix() {
283 	if (chunk)
284 		chunk->fix();
285 	flags |= VLF_FIXED;
286 }
287 
is_prealloc()288 bool value::is_prealloc() {
289 	if (chunk)
290 		return chunk->is_prealloc();
291 	return flags & VLF_PREALLOC;
292 }
293 
delete_uses()294 void value::delete_uses() {
295 	// We only ever had pointers, so don't delete them here
296 	uses.erase(uses.begin(), uses.end());
297 }
298 
no_reladdr_conflict_with(value * src)299 bool value::no_reladdr_conflict_with(value *src)
300 {
301 	/*  if the register is not relative, it can't create an relative access conflict */
302 	if (!src->is_rel())
303 		return true;
304 
305 	/* If the destination is AR then we accept the copy propagation, because the
306 	 * scheduler actually re-creates the address loading operation and will
307 	 * signal interference if there is an address register load and it will fail
308 	 * because of this.
309 	 */
310 	if (gvalue()->is_AR())
311 		return true;
312 
313 	/* For all nodes that use this value test whether the operation uses another
314 	 * relative access with a different address value. If found, signal conflict.
315 	 */
316 	for (uselist::const_iterator N = uses.begin(); N != uses.end(); ++N) {
317 		for (vvec::const_iterator V = (*N)->src.begin(); V != (*N)->src.end(); ++V) {
318 			if (*V) {
319 				value *v = (*V)->gvalue();
320 				if (v != src && v->is_rel() && v->rel != src->rel)
321 					return false;
322 			}
323 		}
324 		for (vvec::const_iterator V = (*N)->dst.begin(); V != (*N)->dst.end(); ++V) {
325 			if (*V) {
326 				value *v = (*V)->gvalue();
327 				if (v && v != src && v->is_rel() && (v->rel != src->rel))
328 					return false;
329 			}
330 		}
331 	}
332 	return true;
333 }
334 
update_values()335 void ra_constraint::update_values() {
336 	for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) {
337 		assert(!(*I)->constraint);
338 		(*I)->constraint = this;
339 	}
340 }
341 
allocate(unsigned sz)342 void* sb_pool::allocate(unsigned sz) {
343 	sz = (sz + SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1);
344 	assert (sz < (block_size >> 6) && "too big allocation size for sb_pool");
345 
346 	unsigned offset = total_size % block_size;
347 	unsigned capacity = block_size * blocks.size();
348 
349 	if (total_size + sz > capacity) {
350 		total_size = capacity;
351 		void * nb = malloc(block_size);
352 		blocks.push_back(nb);
353 		offset = 0;
354 	}
355 
356 	total_size += sz;
357 	return ((char*)blocks.back() + offset);
358 }
359 
free_all()360 void sb_pool::free_all() {
361 	for (block_vector::iterator I = blocks.begin(), E = blocks.end(); I != E;
362 			++I) {
363 		free(*I);
364 	}
365 }
366 
create(value_kind k,sel_chan regid,unsigned ver)367 value* sb_value_pool::create(value_kind k, sel_chan regid,
368                              unsigned ver) {
369 	void* np = allocate(aligned_elt_size);
370 	value *v = new (np) value(size(), k, regid, ver);
371 	return v;
372 }
373 
delete_all()374 void sb_value_pool::delete_all() {
375 	unsigned bcnt = blocks.size();
376 	unsigned toffset = 0;
377 	for (unsigned b = 0; b < bcnt; ++b) {
378 		char *bstart = (char*)blocks[b];
379 		for (unsigned offset = 0; offset < block_size;
380 				offset += aligned_elt_size) {
381 			((value*)(bstart + offset))->~value();
382 			toffset += aligned_elt_size;
383 			if (toffset >= total_size)
384 				return;
385 		}
386 	}
387 }
388 
get(unsigned id)389 bool sb_bitset::get(unsigned id) {
390 	assert(id < bit_size);
391 	unsigned w = id / bt_bits;
392 	unsigned b = id % bt_bits;
393 	return (data[w] >> b) & 1;
394 }
395 
set(unsigned id,bool bit)396 void sb_bitset::set(unsigned id, bool bit) {
397 	assert(id < bit_size);
398 	unsigned w = id / bt_bits;
399 	unsigned b = id % bt_bits;
400 	if (w >= data.size())
401 		data.resize(w + 1);
402 
403 	if (bit)
404 		data[w] |= (1 << b);
405 	else
406 		data[w] &= ~(1 << b);
407 }
408 
set_chk(unsigned id,bool bit)409 inline bool sb_bitset::set_chk(unsigned id, bool bit) {
410 	assert(id < bit_size);
411 	unsigned w = id / bt_bits;
412 	unsigned b = id % bt_bits;
413 	basetype d = data[w];
414 	basetype dn = (d & ~(1 << b)) | (bit << b);
415 	bool r = (d != dn);
416 	data[w] = r ? dn : data[w];
417 	return r;
418 }
419 
clear()420 void sb_bitset::clear() {
421 	std::fill(data.begin(), data.end(), 0);
422 }
423 
resize(unsigned size)424 void sb_bitset::resize(unsigned size) {
425 	unsigned cur_data_size = data.size();
426 	unsigned new_data_size = (size + bt_bits - 1) / bt_bits;
427 
428 
429 	if (new_data_size != cur_data_size)
430 		data.resize(new_data_size);
431 
432 	// make sure that new bits in the existing word are cleared
433 	if (cur_data_size && size > bit_size && bit_size % bt_bits) {
434 		basetype clear_mask = (~(basetype)0u) << (bit_size % bt_bits);
435 		data[cur_data_size - 1] &= ~clear_mask;
436 	}
437 
438 	bit_size = size;
439 }
440 
find_bit(unsigned start)441 unsigned sb_bitset::find_bit(unsigned start) {
442 	assert(start < bit_size);
443 	unsigned w = start / bt_bits;
444 	unsigned b = start % bt_bits;
445 	unsigned sz = data.size();
446 
447 	while (w < sz) {
448 		basetype d = data[w] >> b;
449 		if (d != 0) {
450 			unsigned pos = __builtin_ctz(d) + b + w * bt_bits;
451 			return pos;
452 		}
453 
454 		b = 0;
455 		++w;
456 	}
457 
458 	return bit_size;
459 }
460 
iterator(shader & sh,sb_value_set * s,unsigned nb)461 sb_value_set::iterator::iterator(shader& sh, sb_value_set* s, unsigned nb)
462 	: vp(sh.get_value_pool()), s(s), nb(nb) {}
463 
add_set_checked(sb_value_set & s2)464 bool sb_value_set::add_set_checked(sb_value_set& s2) {
465 	if (bs.size() < s2.bs.size())
466 		bs.resize(s2.bs.size());
467 	sb_bitset nbs = bs | s2.bs;
468 	if (bs != nbs) {
469 		bs.swap(nbs);
470 		return true;
471 	}
472 	return false;
473 }
474 
remove_set(sb_value_set & s2)475 void r600_sb::sb_value_set::remove_set(sb_value_set& s2) {
476 	bs.mask(s2.bs);
477 }
478 
add_val(value * v)479 bool sb_value_set::add_val(value* v) {
480 	assert(v);
481 	if (bs.size() < v->uid)
482 		bs.resize(v->uid + 32);
483 
484 	return bs.set_chk(v->uid - 1, 1);
485 }
486 
remove_vec(vvec & vv)487 bool sb_value_set::remove_vec(vvec& vv) {
488 	bool modified = false;
489 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
490 		if (*I)
491 			modified |= remove_val(*I);
492 	}
493 	return modified;
494 }
495 
clear()496 void sb_value_set::clear() {
497 	bs.clear();
498 }
499 
remove_val(value * v)500 bool sb_value_set::remove_val(value* v) {
501 	assert(v);
502 	if (bs.size() < v->uid)
503 		return false;
504 	return bs.set_chk(v->uid - 1, 0);
505 }
506 
add_vec(vvec & vv)507 bool r600_sb::sb_value_set::add_vec(vvec& vv) {
508 	bool modified = false;
509 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
510 		value *v = *I;
511 		if (v)
512 			modified |= add_val(v);
513 	}
514 	return modified;
515 }
516 
contains(value * v)517 bool r600_sb::sb_value_set::contains(value* v) {
518 	unsigned b = v->uid - 1;
519 	if (b < bs.size())
520 		return bs.get(b);
521 	else
522 		return false;
523 }
524 
empty()525 bool sb_value_set::empty() {
526 	return bs.size() == 0 || bs.find_bit(0) == bs.size();
527 }
528 
swap(sb_bitset & bs2)529 void sb_bitset::swap(sb_bitset& bs2) {
530 	std::swap(data, bs2.data);
531 	std::swap(bit_size, bs2.bit_size);
532 }
533 
operator ==(const sb_bitset & bs2)534 bool sb_bitset::operator ==(const sb_bitset& bs2) {
535 	if (bit_size != bs2.bit_size)
536 		return false;
537 
538 	for (unsigned i = 0, c = data.size(); i < c; ++i) {
539 		if (data[i] != bs2.data[i])
540 			return false;
541 	}
542 	return true;
543 }
544 
operator &=(const sb_bitset & bs2)545 sb_bitset& sb_bitset::operator &=(const sb_bitset& bs2) {
546 	if (bit_size > bs2.bit_size) {
547 		resize(bs2.bit_size);
548 	}
549 
550 	for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c;
551 			++i) {
552 		data[i] &= bs2.data[i];
553 	}
554 	return *this;
555 }
556 
mask(const sb_bitset & bs2)557 sb_bitset& sb_bitset::mask(const sb_bitset& bs2) {
558 	if (bit_size < bs2.bit_size) {
559 		resize(bs2.bit_size);
560 	}
561 
562 	for (unsigned i = 0, c = data.size(); i < c;
563 			++i) {
564 		data[i] &= ~bs2.data[i];
565 	}
566 	return *this;
567 }
568 
check()569 bool ra_constraint::check() {
570 	assert(kind == CK_SAME_REG);
571 
572 	unsigned reg = 0;
573 
574 	for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) {
575 		value *v = *I;
576 		if (!v)
577 			continue;
578 
579 		if (!v->gpr)
580 			return false;
581 
582 		if (reg == 0)
583 			reg = v->gpr.sel() + 1;
584 		else if (reg != v->gpr.sel() + 1)
585 			return false;
586 
587 		if (v->is_chan_pinned()) {
588 			if (v->pin_gpr.chan() != v->gpr.chan())
589 				return false;
590 		}
591 	}
592 	return true;
593 }
594 
is_dead()595 bool gpr_array::is_dead() {
596 	return false;
597 }
598 
599 } // namespace r600_sb
600