1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define VT_DEBUG 0
28 
29 #if VT_DEBUG
30 #define VT_DUMP(q) do { q } while (0)
31 #else
32 #define VT_DUMP(q)
33 #endif
34 
35 #include <cstring>
36 
37 #include "sb_shader.h"
38 #include "sb_pass.h"
39 
40 namespace r600_sb {
41 
42 static const char * chans = "xyzw01?_";
43 
operator <<(sb_ostream & o,value & v)44 sb_ostream& operator << (sb_ostream &o, value &v) {
45 
46 	bool dead = v.flags & VLF_DEAD;
47 
48 	if (dead)
49 		o << "{";
50 
51 	switch (v.kind) {
52 	case VLK_SPECIAL_REG: {
53 		switch (v.select.sel()) {
54 			case SV_AR_INDEX: o << "AR"; break;
55 			case SV_ALU_PRED: o << "PR"; break;
56 			case SV_EXEC_MASK: o << "EM"; break;
57 			case SV_VALID_MASK: o << "VM"; break;
58 			case SV_GEOMETRY_EMIT: o << "GEOMETRY_EMIT"; break;
59 			case SV_LDS_RW: o << "LDS_RW"; break;
60 			case SV_LDS_OQA: o << "LDS_OQA"; break;
61 			case SV_LDS_OQB: o << "LDS_OQB"; break;
62 			default: o << "???specialreg"; break;
63 		}
64 		break;
65 	}
66 
67 	case VLK_REG:
68 		o << "R" << v.select.sel() << "."
69 			<< chans[v.select.chan()];
70 
71 		break;
72 	case VLK_KCACHE: {
73 		o << "C" << v.select.sel() << "." << chans[v.select.chan()];
74 	}
75 		break;
76 	case VLK_CONST:
77 		o << v.literal_value.f << "|";
78 		o.print_zw_hex(v.literal_value.u, 8);
79 		break;
80 	case VLK_PARAM:
81 		o << "Param" << (v.select.sel() - ALU_SRC_PARAM_OFFSET)
82 			<< chans[v.select.chan()];
83 		break;
84 	case VLK_TEMP:
85 		o << "t" << v.select.sel() - shader::temp_regid_offset;
86 		break;
87 	case VLK_REL_REG:
88 
89 		o << "A" << v.select;
90 		o << "[";
91 		o << *v.rel;
92 		o << "]";
93 
94 		o << "_" << v.uid;
95 
96 		break;
97 	case VLK_UNDEF:
98 		o << "undef";
99 		break;
100 	default:
101 		o << v.kind << "?????";
102 		break;
103 	}
104 
105 	if (v.version)
106 		o << "." << v.version;
107 
108 	if (dead)
109 		o << "}";
110 
111 	if (v.is_global())
112 		o << "||";
113 	if (v.is_fixed())
114 		o << "F";
115 	if (v.is_prealloc())
116 		o << "P";
117 
118 	sel_chan g;
119 
120 	if (v.is_rel()) {
121 		g = v.array->gpr;
122 	} else {
123 		g = v.gpr;
124 	}
125 
126 	if (g) {
127 		o << "@R" << g.sel() << "." << chans[g.chan()];
128 	}
129 
130 	return o;
131 }
132 
add_value(value * v)133 void value_table::add_value(value* v) {
134 
135 	if (v->gvn_source) {
136 		return;
137 	}
138 
139 	VT_DUMP(
140 		sblog << "gvn add_value ";
141 		dump::dump_val(v);
142 	);
143 
144 	value_hash hash = v->hash();
145 	vt_item & vti = hashtable[hash & size_mask];
146 	vti.push_back(v);
147 	++cnt;
148 
149 	if (v->def && ex.try_fold(v)) {
150 		VT_DUMP(
151 			sblog << " folded: ";
152 			dump::dump_val(v->gvn_source);
153 			sblog << "\n";
154 		);
155 		return;
156 	}
157 
158 	int n = 0;
159 	for (vt_item::iterator I = vti.begin(), E = vti.end(); I != E; ++I, ++n) {
160 		value *c = *I;
161 
162 		if (c == v)
163 			break;
164 
165 		if (expr_equal(c, v)) {
166 			v->gvn_source = c->gvn_source;
167 
168 			VT_DUMP(
169 				sblog << " found : equal to ";
170 				dump::dump_val(v->gvn_source);
171 				sblog << "\n";
172 			);
173 			return;
174 		}
175 	}
176 
177 	v->gvn_source = v;
178 	VT_DUMP(
179 		sblog << " added new\n";
180 	);
181 }
182 
hash()183 value_hash value::hash() {
184 	if (ghash)
185 		return ghash;
186 	if (is_rel())
187 		ghash = rel_hash();
188 	else if (def)
189 		ghash = def->hash();
190 	else
191 		ghash = ((uintptr_t)this) | 1;
192 
193 	return ghash;
194 }
195 
rel_hash()196 value_hash value::rel_hash() {
197 	value_hash h = rel ? rel->hash() : 0;
198 	h |= select << 10;
199 	h |= array->hash();
200 	return h;
201 }
202 
expr_equal(value * l,value * r)203 bool value_table::expr_equal(value* l, value* r) {
204 	return ex.equal(l, r);
205 }
206 
get_values(vvec & v)207 void value_table::get_values(vvec& v) {
208 	v.resize(cnt);
209 
210 	vvec::iterator T = v.begin();
211 
212 	for(vt_table::iterator I = hashtable.begin(), E = hashtable.end();
213 			I != E; ++I) {
214 		T = std::copy(I->begin(), I->end(), T);
215 	}
216 }
217 
add_use(node * n)218 void value::add_use(node* n) {
219 	if (0) {
220 	sblog << "add_use ";
221 	dump::dump_val(this);
222 	sblog << "   =>  ";
223 	dump::dump_op(n);
224 	}
225 	uses.push_back(n);
226 }
227 
228 struct use_node_comp {
use_node_compr600_sb::use_node_comp229 	explicit use_node_comp(const node *n) : n(n) {}
operator ()r600_sb::use_node_comp230 	bool operator() (const node *o) {
231 		return o->hash() == n->hash();
232 	}
233 
234 	private:
235 		const node *n;
236 };
237 
remove_use(const node * n)238 void value::remove_use(const node *n) {
239 	uselist::iterator it =
240 		std::find_if(uses.begin(), uses.end(), use_node_comp(n));
241 
242 	if (it != uses.end())
243 	{
244 		// We only ever had a pointer, so don't delete it here
245 		uses.erase(it);
246 	}
247 }
248 
use_count()249 unsigned value::use_count() {
250 	return uses.size();
251 }
252 
is_global()253 bool value::is_global() {
254 	if (chunk)
255 		return chunk->is_global();
256 	return flags & VLF_GLOBAL;
257 }
258 
set_global()259 void value::set_global() {
260 	assert(is_sgpr());
261 	flags |= VLF_GLOBAL;
262 	if (chunk)
263 		chunk->set_global();
264 }
265 
set_prealloc()266 void value::set_prealloc() {
267 	assert(is_sgpr());
268 	flags |= VLF_PREALLOC;
269 	if (chunk)
270 		chunk->set_prealloc();
271 }
272 
is_fixed()273 bool value::is_fixed() {
274 	if (array && array->gpr)
275 		return true;
276 	if (chunk && chunk->is_fixed())
277 		return true;
278 	return flags & VLF_FIXED;
279 }
280 
fix()281 void value::fix() {
282 	if (chunk)
283 		chunk->fix();
284 	flags |= VLF_FIXED;
285 }
286 
is_prealloc()287 bool value::is_prealloc() {
288 	if (chunk)
289 		return chunk->is_prealloc();
290 	return flags & VLF_PREALLOC;
291 }
292 
delete_uses()293 void value::delete_uses() {
294 	// We only ever had pointers, so don't delete them here
295 	uses.erase(uses.begin(), uses.end());
296 }
297 
update_values()298 void ra_constraint::update_values() {
299 	for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) {
300 		assert(!(*I)->constraint);
301 		(*I)->constraint = this;
302 	}
303 }
304 
allocate(unsigned sz)305 void* sb_pool::allocate(unsigned sz) {
306 	sz = (sz + SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1);
307 	assert (sz < (block_size >> 6) && "too big allocation size for sb_pool");
308 
309 	unsigned offset = total_size % block_size;
310 	unsigned capacity = block_size * blocks.size();
311 
312 	if (total_size + sz > capacity) {
313 		total_size = capacity;
314 		void * nb = malloc(block_size);
315 		blocks.push_back(nb);
316 		offset = 0;
317 	}
318 
319 	total_size += sz;
320 	return ((char*)blocks.back() + offset);
321 }
322 
free_all()323 void sb_pool::free_all() {
324 	for (block_vector::iterator I = blocks.begin(), E = blocks.end(); I != E;
325 			++I) {
326 		free(*I);
327 	}
328 }
329 
create(value_kind k,sel_chan regid,unsigned ver)330 value* sb_value_pool::create(value_kind k, sel_chan regid,
331                              unsigned ver) {
332 	void* np = allocate(aligned_elt_size);
333 	value *v = new (np) value(size(), k, regid, ver);
334 	return v;
335 }
336 
delete_all()337 void sb_value_pool::delete_all() {
338 	unsigned bcnt = blocks.size();
339 	unsigned toffset = 0;
340 	for (unsigned b = 0; b < bcnt; ++b) {
341 		char *bstart = (char*)blocks[b];
342 		for (unsigned offset = 0; offset < block_size;
343 				offset += aligned_elt_size) {
344 			((value*)(bstart + offset))->~value();
345 			toffset += aligned_elt_size;
346 			if (toffset >= total_size)
347 				return;
348 		}
349 	}
350 }
351 
get(unsigned id)352 bool sb_bitset::get(unsigned id) {
353 	assert(id < bit_size);
354 	unsigned w = id / bt_bits;
355 	unsigned b = id % bt_bits;
356 	return (data[w] >> b) & 1;
357 }
358 
set(unsigned id,bool bit)359 void sb_bitset::set(unsigned id, bool bit) {
360 	assert(id < bit_size);
361 	unsigned w = id / bt_bits;
362 	unsigned b = id % bt_bits;
363 	if (w >= data.size())
364 		data.resize(w + 1);
365 
366 	if (bit)
367 		data[w] |= (1 << b);
368 	else
369 		data[w] &= ~(1 << b);
370 }
371 
set_chk(unsigned id,bool bit)372 inline bool sb_bitset::set_chk(unsigned id, bool bit) {
373 	assert(id < bit_size);
374 	unsigned w = id / bt_bits;
375 	unsigned b = id % bt_bits;
376 	basetype d = data[w];
377 	basetype dn = (d & ~(1 << b)) | (bit << b);
378 	bool r = (d != dn);
379 	data[w] = r ? dn : data[w];
380 	return r;
381 }
382 
clear()383 void sb_bitset::clear() {
384 	std::fill(data.begin(), data.end(), 0);
385 }
386 
resize(unsigned size)387 void sb_bitset::resize(unsigned size) {
388 	unsigned cur_data_size = data.size();
389 	unsigned new_data_size = (size + bt_bits - 1) / bt_bits;
390 
391 
392 	if (new_data_size != cur_data_size)
393 		data.resize(new_data_size);
394 
395 	// make sure that new bits in the existing word are cleared
396 	if (cur_data_size && size > bit_size && bit_size % bt_bits) {
397 		basetype clear_mask = (~(basetype)0u) << (bit_size % bt_bits);
398 		data[cur_data_size - 1] &= ~clear_mask;
399 	}
400 
401 	bit_size = size;
402 }
403 
find_bit(unsigned start)404 unsigned sb_bitset::find_bit(unsigned start) {
405 	assert(start < bit_size);
406 	unsigned w = start / bt_bits;
407 	unsigned b = start % bt_bits;
408 	unsigned sz = data.size();
409 
410 	while (w < sz) {
411 		basetype d = data[w] >> b;
412 		if (d != 0) {
413 			unsigned pos = __builtin_ctz(d) + b + w * bt_bits;
414 			return pos;
415 		}
416 
417 		b = 0;
418 		++w;
419 	}
420 
421 	return bit_size;
422 }
423 
iterator(shader & sh,sb_value_set * s,unsigned nb)424 sb_value_set::iterator::iterator(shader& sh, sb_value_set* s, unsigned nb)
425 	: vp(sh.get_value_pool()), s(s), nb(nb) {}
426 
add_set_checked(sb_value_set & s2)427 bool sb_value_set::add_set_checked(sb_value_set& s2) {
428 	if (bs.size() < s2.bs.size())
429 		bs.resize(s2.bs.size());
430 	sb_bitset nbs = bs | s2.bs;
431 	if (bs != nbs) {
432 		bs.swap(nbs);
433 		return true;
434 	}
435 	return false;
436 }
437 
remove_set(sb_value_set & s2)438 void r600_sb::sb_value_set::remove_set(sb_value_set& s2) {
439 	bs.mask(s2.bs);
440 }
441 
add_val(value * v)442 bool sb_value_set::add_val(value* v) {
443 	assert(v);
444 	if (bs.size() < v->uid)
445 		bs.resize(v->uid + 32);
446 
447 	return bs.set_chk(v->uid - 1, 1);
448 }
449 
remove_vec(vvec & vv)450 bool sb_value_set::remove_vec(vvec& vv) {
451 	bool modified = false;
452 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
453 		if (*I)
454 			modified |= remove_val(*I);
455 	}
456 	return modified;
457 }
458 
clear()459 void sb_value_set::clear() {
460 	bs.clear();
461 }
462 
remove_val(value * v)463 bool sb_value_set::remove_val(value* v) {
464 	assert(v);
465 	if (bs.size() < v->uid)
466 		return false;
467 	return bs.set_chk(v->uid - 1, 0);
468 }
469 
add_vec(vvec & vv)470 bool r600_sb::sb_value_set::add_vec(vvec& vv) {
471 	bool modified = false;
472 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
473 		value *v = *I;
474 		if (v)
475 			modified |= add_val(v);
476 	}
477 	return modified;
478 }
479 
contains(value * v)480 bool r600_sb::sb_value_set::contains(value* v) {
481 	unsigned b = v->uid - 1;
482 	if (b < bs.size())
483 		return bs.get(b);
484 	else
485 		return false;
486 }
487 
empty()488 bool sb_value_set::empty() {
489 	return bs.size() == 0 || bs.find_bit(0) == bs.size();
490 }
491 
swap(sb_bitset & bs2)492 void sb_bitset::swap(sb_bitset& bs2) {
493 	std::swap(data, bs2.data);
494 	std::swap(bit_size, bs2.bit_size);
495 }
496 
operator ==(const sb_bitset & bs2)497 bool sb_bitset::operator ==(const sb_bitset& bs2) {
498 	if (bit_size != bs2.bit_size)
499 		return false;
500 
501 	for (unsigned i = 0, c = data.size(); i < c; ++i) {
502 		if (data[i] != bs2.data[i])
503 			return false;
504 	}
505 	return true;
506 }
507 
operator &=(const sb_bitset & bs2)508 sb_bitset& sb_bitset::operator &=(const sb_bitset& bs2) {
509 	if (bit_size > bs2.bit_size) {
510 		resize(bs2.bit_size);
511 	}
512 
513 	for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c;
514 			++i) {
515 		data[i] &= bs2.data[i];
516 	}
517 	return *this;
518 }
519 
mask(const sb_bitset & bs2)520 sb_bitset& sb_bitset::mask(const sb_bitset& bs2) {
521 	if (bit_size < bs2.bit_size) {
522 		resize(bs2.bit_size);
523 	}
524 
525 	for (unsigned i = 0, c = data.size(); i < c;
526 			++i) {
527 		data[i] &= ~bs2.data[i];
528 	}
529 	return *this;
530 }
531 
check()532 bool ra_constraint::check() {
533 	assert(kind == CK_SAME_REG);
534 
535 	unsigned reg = 0;
536 
537 	for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) {
538 		value *v = *I;
539 		if (!v)
540 			continue;
541 
542 		if (!v->gpr)
543 			return false;
544 
545 		if (reg == 0)
546 			reg = v->gpr.sel() + 1;
547 		else if (reg != v->gpr.sel() + 1)
548 			return false;
549 
550 		if (v->is_chan_pinned()) {
551 			if (v->pin_gpr.chan() != v->gpr.chan())
552 				return false;
553 		}
554 	}
555 	return true;
556 }
557 
is_dead()558 bool gpr_array::is_dead() {
559 	return false;
560 }
561 
562 } // namespace r600_sb
563