1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #ifndef R600_SB_IR_H_
28 #define R600_SB_IR_H_
29 
30 #include <algorithm>
31 #include <stdint.h>
32 #include <vector>
33 #include <set>
34 #include <algorithm>
35 
36 #include "sb_bc.h"
37 
38 namespace r600_sb {
39 
40 enum special_regs {
41 	SV_ALU_PRED = 128,
42 	SV_EXEC_MASK,
43 	SV_AR_INDEX,
44 	SV_VALID_MASK,
45 	SV_GEOMETRY_EMIT,
46 	SV_LDS_RW,
47 	SV_LDS_OQA,
48 	SV_LDS_OQB,
49 	SV_SCRATCH
50 };
51 
52 class node;
53 class value;
54 class shader;
55 
56 struct sel_chan
57 {
58 	unsigned id;
59 
idsel_chan60 	sel_chan(unsigned id = 0) : id(id) {}
sel_chansel_chan61 	sel_chan(unsigned sel, unsigned chan) : id(((sel << 2) | chan) + 1) {}
62 
selsel_chan63 	unsigned sel() const { return sel(id); }
chansel_chan64 	unsigned chan() const {return chan(id); }
65 	operator unsigned() const {return id;}
66 
selsel_chan67 	static unsigned sel(unsigned idx) { return (idx-1) >> 2; }
chansel_chan68 	static unsigned chan(unsigned idx) { return (idx-1) & 3; }
69 
sel_chansel_chan70 	sel_chan(unsigned bank, unsigned index,
71 			 unsigned chan, alu_kcache_index_mode index_mode)
72 		: id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 28), chan).id) {}
kcache_index_modesel_chan73 	unsigned kcache_index_mode() const { return sel() >> 28; }
kcache_selsel_chan74 	unsigned kcache_sel() const { return sel() & 0x0fffffffu; }
kcache_banksel_chan75 	unsigned kcache_bank() const { return kcache_sel() >> 12; }
76 };
77 
78 inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) {
79 	static const char * ch = "xyzw";
80 	o << r.sel() << "." << ch[r.chan()];
81 	return o;
82 }
83 
84 typedef std::vector<value*>  vvec;
85 
86 class sb_pool {
87 protected:
88 	static const unsigned SB_POOL_ALIGN = 8;
89 	static const unsigned SB_POOL_DEFAULT_BLOCK_SIZE = (1 << 16);
90 
91 	typedef std::vector<void*> block_vector;
92 
93 	unsigned block_size;
94 	block_vector blocks;
95 	unsigned total_size;
96 
97 public:
98 	sb_pool(unsigned block_size = SB_POOL_DEFAULT_BLOCK_SIZE)
block_size(block_size)99 		: block_size(block_size), blocks(), total_size() {}
100 
~sb_pool()101 	virtual ~sb_pool() { free_all(); }
102 
103 	void* allocate(unsigned sz);
104 
105 protected:
106 	void free_all();
107 };
108 
109 template <typename V, typename Comp = std::less<V> >
110 class sb_set {
111 	typedef std::vector<V> data_vector;
112 	data_vector vec;
113 public:
114 
115 	typedef typename data_vector::iterator iterator;
116 	typedef typename data_vector::const_iterator const_iterator;
117 
sb_set()118 	sb_set() : vec() {}
~sb_set()119 	~sb_set() {  }
120 
begin()121 	iterator begin() { return vec.begin(); }
end()122 	iterator end() { return vec.end(); }
begin()123 	const_iterator begin() const { return vec.begin(); }
end()124 	const_iterator end() const { return vec.end(); }
125 
add_set(const sb_set & s)126 	void add_set(const sb_set& s) {
127 		data_vector t;
128 		t.reserve(vec.size() + s.vec.size());
129 		std::set_union(vec.begin(), vec.end(), s.vec.begin(), s.vec.end(),
130 		          std::inserter(t, t.begin()), Comp());
131 		vec.swap(t);
132 	}
133 
lower_bound(const V & v)134 	iterator lower_bound(const V& v) {
135 		return std::lower_bound(vec.begin(), vec.end(), v, Comp());
136 	}
137 
insert(const V & v)138 	std::pair<iterator, bool> insert(const V& v) {
139 		iterator P = lower_bound(v);
140 		if (P != vec.end() && is_equal(*P, v))
141 			return std::make_pair(P, false);
142 		return std::make_pair(vec.insert(P, v), true);
143 	}
144 
erase(const V & v)145 	unsigned erase(const V&  v) {
146 		iterator P = lower_bound(v);
147 		if (P == vec.end() || !is_equal(*P, v))
148 			return 0;
149 		vec.erase(P);
150 		return 1;
151 	}
152 
clear()153 	void clear() { vec.clear(); }
154 
empty()155 	bool empty() { return vec.empty(); }
156 
is_equal(const V & v1,const V & v2)157 	bool is_equal(const V& v1, const V& v2) {
158 		return !Comp()(v1, v2) && !Comp()(v2, v1);
159 	}
160 
find(const V & v)161 	iterator find(const V& v) {
162 		iterator P = lower_bound(v);
163 		return (P != vec.end() && is_equal(*P, v)) ? P : vec.end();
164 	}
165 
size()166 	unsigned size() { return vec.size(); }
erase(iterator I)167 	void erase(iterator I) { vec.erase(I); }
168 };
169 
170 template <typename K, typename V, typename KComp = std::less<K> >
171 class sb_map {
172 	typedef std::pair<K, V> datatype;
173 
174 	struct Comp {
operatorComp175 		bool operator()(const datatype &v1, const datatype &v2) {
176 			return KComp()(v1.first, v2.first);
177 		}
178 	};
179 
180 	typedef sb_set<datatype, Comp> dataset;
181 
182 	dataset set;
183 
184 public:
185 
sb_map()186 	sb_map() : set() {}
187 
188 	typedef typename dataset::iterator iterator;
189 
begin()190 	iterator begin() { return set.begin(); }
end()191 	iterator end() { return set.end(); }
192 
clear()193 	void clear() { set.clear(); }
194 
195 	V& operator[](const K& key) {
196 		datatype P = std::make_pair(key, V());
197 		iterator F = set.find(P);
198 		if (F == set.end()) {
199 			return (*(set.insert(P).first)).second;
200 		} else {
201 			return (*F).second;
202 		}
203 	}
204 
insert(const datatype & d)205 	std::pair<iterator, bool> insert(const datatype& d) {
206 		return set.insert(d);
207 	}
208 
find(const K & key)209 	iterator find(const K& key) {
210 		return set.find(std::make_pair(key, V()));
211 	}
212 
erase(const K & key)213 	unsigned erase(const K& key) {
214 		return set.erase(std::make_pair(key, V()));
215 	}
216 
erase(iterator I)217 	void erase(iterator I) {
218 		set.erase(I);
219 	}
220 };
221 
222 class sb_bitset {
223 	typedef uint32_t basetype;
224 	static const unsigned bt_bits = sizeof(basetype) << 3;
225 	std::vector<basetype> data;
226 	unsigned bit_size;
227 
228 public:
229 
sb_bitset()230 	sb_bitset() : data(), bit_size() {}
231 
232 	bool get(unsigned id);
233 	void set(unsigned id, bool bit = true);
234 	bool set_chk(unsigned id, bool bit = true);
235 
236 	void clear();
237 	void resize(unsigned size);
238 
size()239 	unsigned size() { return bit_size; }
240 
241 	unsigned find_bit(unsigned start = 0);
242 
243 	void swap(sb_bitset & bs2);
244 
245 	bool operator==(const sb_bitset &bs2);
246 	bool operator!=(const sb_bitset &bs2) { return !(*this == bs2); }
247 
248 	sb_bitset& operator|=(const sb_bitset &bs2) {
249 		if (bit_size < bs2.bit_size) {
250 			resize(bs2.bit_size);
251 		}
252 
253 		for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c;
254 				++i) {
255 			data[i] |= bs2.data[i];
256 		}
257 		return *this;
258 	}
259 
260 	sb_bitset& operator&=(const sb_bitset &bs2);
261 	sb_bitset& mask(const sb_bitset &bs2);
262 
263 	friend sb_bitset operator|(const sb_bitset &b1, const sb_bitset &b2) {
264 			sb_bitset nbs(b1);
265 			nbs |= b2;
266 			return nbs;
267 	}
268 };
269 
270 enum value_kind {
271 	VLK_REG,
272 	VLK_REL_REG,
273 	VLK_SPECIAL_REG,
274 	VLK_TEMP,
275 
276 	VLK_CONST,
277 	VLK_KCACHE,
278 	VLK_PARAM,
279 	VLK_SPECIAL_CONST,
280 
281 	VLK_UNDEF
282 };
283 
284 
285 
286 class sb_value_pool : protected sb_pool {
287 	unsigned aligned_elt_size;
288 
289 public:
290 	sb_value_pool(unsigned elt_size, unsigned block_elts = 256)
291 		: sb_pool(block_elts * (aligned_elt_size = ((elt_size +
292 				SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1)))) {}
293 
~sb_value_pool()294 	virtual ~sb_value_pool() { delete_all(); }
295 
296 	value* create(value_kind k, sel_chan regid, unsigned ver);
297 
298 	value* operator[](unsigned id) {
299 		unsigned offset = id * aligned_elt_size;
300 		unsigned block_id;
301 		if (offset < block_size) {
302 			block_id = 0;
303 		} else {
304 			block_id = offset / block_size;
305 			offset = offset % block_size;
306 		}
307 		return (value*)((char*)blocks[block_id] + offset);
308 	}
309 
size()310 	unsigned size() { return total_size / aligned_elt_size; }
311 
312 protected:
313 	void delete_all();
314 };
315 
316 
317 
318 
319 
320 class sb_value_set {
321 
322 	sb_bitset bs;
323 
324 public:
sb_value_set()325 	sb_value_set() : bs() {}
326 
327 	class iterator {
328 		sb_value_pool &vp;
329 		sb_value_set *s;
330 		unsigned nb;
331 	public:
332 		iterator(shader &sh, sb_value_set *s, unsigned nb = 0);
333 
334 
335 		iterator& operator++() {
336 			if (nb + 1 < s->bs.size())
337 				nb = s->bs.find_bit(nb + 1);
338 			else
339 				nb = s->bs.size();
340 			return *this;
341 		}
342 		bool operator !=(const iterator &i) {
343 			return s != i.s || nb != i.nb;
344 		}
345 		bool operator ==(const iterator &i) { return !(*this != i); }
346 		value* operator *() {
347 			 return vp[nb];
348 		}
349 
350 
351 	};
352 
begin(shader & sh)353 	iterator begin(shader &sh) {
354 		return iterator(sh, this, bs.size() ? bs.find_bit(0) : 0);
355 	}
end(shader & sh)356 	iterator end(shader &sh) { return iterator(sh, this, bs.size()); }
357 
358 	bool add_set_checked(sb_value_set & s2);
359 
add_set(sb_value_set & s2)360 	void add_set(sb_value_set & s2)  {
361 		if (bs.size() < s2.bs.size())
362 			bs.resize(s2.bs.size());
363 		bs |= s2.bs;
364 	}
365 
366 	void remove_set(sb_value_set & s2);
367 
368 	bool add_vec(vvec &vv);
369 
370 	bool add_val(value *v);
371 	bool contains(value *v);
372 
373 	bool remove_val(value *v);
374 
375 	bool remove_vec(vvec &vv);
376 
377 	void clear();
378 
379 	bool empty();
380 };
381 
382 typedef sb_value_set val_set;
383 
384 struct gpr_array {
385 	sel_chan base_gpr; // original gpr
386 	sel_chan gpr; // assigned by regalloc
387 	unsigned array_size;
388 
gpr_arraygpr_array389 	gpr_array(sel_chan base_gpr, unsigned array_size) : base_gpr(base_gpr),
390 			array_size(array_size) {}
391 
hashgpr_array392 	unsigned hash() { return (base_gpr << 10) * array_size; }
393 
394 	val_set interferences;
395 	vvec refs;
396 
397 	bool is_dead();
398 
399 };
400 
401 typedef std::vector<gpr_array*> regarray_vec;
402 
403 enum value_flags {
404 	VLF_UNDEF = (1 << 0),
405 	VLF_READONLY = (1 << 1),
406 	VLF_DEAD = (1 << 2),
407 
408 	VLF_PIN_REG = (1 << 3),
409 	VLF_PIN_CHAN = (1 << 4),
410 
411 	// opposite to alu clause local value - goes through alu clause boundary
412 	// (can't use temp gpr, can't recolor in the alu scheduler, etc)
413 	VLF_GLOBAL = (1 << 5),
414 	VLF_FIXED = (1 << 6),
415 	VLF_PVPS = (1 << 7),
416 
417 	VLF_PREALLOC = (1 << 8)
418 };
419 
420 inline value_flags operator |(value_flags l, value_flags r) {
421 	return (value_flags)((unsigned)l|(unsigned)r);
422 }
423 inline value_flags operator &(value_flags l, value_flags r) {
424 	return (value_flags)((unsigned)l&(unsigned)r);
425 }
426 inline value_flags operator ~(value_flags l) {
427 	return (value_flags)(~(unsigned)l);
428 }
429 inline value_flags& operator |=(value_flags &l, value_flags r) {
430 	l = l | r;
431 	return l;
432 }
433 inline value_flags& operator &=(value_flags &l, value_flags r) {
434 	l = l & r;
435 	return l;
436 }
437 
438 sb_ostream& operator << (sb_ostream &o, value &v);
439 
440 typedef uint32_t value_hash;
441 
442 typedef std::list< node * > uselist;
443 
444 enum constraint_kind {
445 	CK_SAME_REG,
446 	CK_PACKED_BS,
447 	CK_PHI
448 };
449 
450 class shader;
451 class sb_value_pool;
452 struct ra_chunk;
453 class ra_constraint;
454 
455 class value {
456 protected:
457 	value(unsigned sh_id, value_kind k, sel_chan select, unsigned ver = 0)
kind(k)458 		: kind(k), flags(),
459 			rel(), array(),
460 			version(ver), select(select), pin_gpr(select), gpr(),
461 			gvn_source(), ghash(),
462 			def(), adef(), uses(), constraint(), chunk(),
463 			literal_value(), uid(sh_id) {}
464 
~value()465 	~value() { delete_uses(); }
466 
467 	friend class sb_value_pool;
468 public:
469 	value_kind kind;
470 	value_flags flags;
471 
472 	vvec mdef;
473 	vvec muse;
474 	value *rel;
475 	gpr_array *array;
476 
477 	unsigned version;
478 
479 	sel_chan select;
480 	sel_chan pin_gpr;
481 	sel_chan gpr;
482 
483 	value *gvn_source;
484 	value_hash ghash;
485 
486 	node *def, *adef;
487 	uselist uses;
488 
489 	ra_constraint *constraint;
490 	ra_chunk *chunk;
491 
492 	literal literal_value;
493 
is_const()494 	bool is_const() { return kind == VLK_CONST || kind == VLK_UNDEF; }
495 
is_AR()496 	bool is_AR() {
497 		return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0);
498 	}
is_geometry_emit()499 	bool is_geometry_emit() {
500 		return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0);
501 	}
is_lds_access()502 	bool is_lds_access() {
503 		return is_special_reg() && select == sel_chan(SV_LDS_RW, 0);
504 	}
is_lds_oq()505 	bool is_lds_oq() {
506 		return is_special_reg() && (select == sel_chan(SV_LDS_OQA, 0) || select == sel_chan(SV_LDS_OQB, 0));
507 	}
508 
any_def()509 	node* any_def() {
510 		assert(!(def && adef));
511 		return def ? def : adef;
512 	}
513 
gvalue()514 	value* gvalue() {
515 		value *v = this;
516 		while (v->gvn_source && v != v->gvn_source)
517 			// FIXME we really shouldn't have such chains
518 			v = v->gvn_source;
519 		return v;
520 	}
is_scratch()521 	bool is_scratch() {
522 		return is_special_reg() && select == sel_chan(SV_SCRATCH, 0);
523 	}
524 
is_float_0_or_1()525 	bool is_float_0_or_1() {
526 		value *v = gvalue();
527 		return v->is_const() && (v->literal_value == literal(0)
528 						|| v->literal_value == literal(1.0f));
529 	}
530 
is_undef()531 	bool is_undef() { return gvalue()->kind == VLK_UNDEF; }
532 
is_any_gpr()533 	bool is_any_gpr() {
534 		return (kind == VLK_REG || kind == VLK_TEMP);
535 	}
536 
is_agpr()537 	bool is_agpr() {
538 		return array && is_any_gpr();
539 	}
540 
541 	// scalar gpr, as opposed to element of gpr array
is_sgpr()542 	bool is_sgpr() {
543 		return !array && is_any_gpr();
544 	}
545 
is_special_reg()546 	bool is_special_reg() {	return kind == VLK_SPECIAL_REG;	}
is_any_reg()547 	bool is_any_reg() { return is_any_gpr() || is_special_reg(); }
is_kcache()548 	bool is_kcache() { return kind == VLK_KCACHE; }
is_rel()549 	bool is_rel() {	return kind == VLK_REL_REG; }
is_readonly()550 	bool is_readonly() { return flags & VLF_READONLY; }
551 
is_chan_pinned()552 	bool is_chan_pinned() { return flags & VLF_PIN_CHAN; }
is_reg_pinned()553 	bool is_reg_pinned() { return flags & VLF_PIN_REG; }
554 
555 	bool is_global();
556 	void set_global();
557 	void set_prealloc();
558 
559 	bool is_prealloc();
560 
561 	bool is_fixed();
562 	void fix();
563 
is_dead()564 	bool is_dead() { return flags & VLF_DEAD; }
565 
get_const_value()566 	literal & get_const_value() {
567 		value *v = gvalue();
568 		assert(v->is_const());
569 		return v->literal_value;
570 	}
571 
572 	// true if needs to be encoded as literal in alu
is_literal()573 	bool is_literal() {
574 		return is_const()
575 				&& literal_value != literal(0)
576 				&& literal_value != literal(1)
577 				&& literal_value != literal(-1)
578 				&& literal_value != literal(0.5)
579 				&& literal_value != literal(1.0);
580 	}
581 
582 	void add_use(node *n);
583 	void remove_use(const node *n);
584 
585 	value_hash hash();
586 	value_hash rel_hash();
587 
assign_source(value * v)588 	void assign_source(value *v) {
589 		assert(!gvn_source || gvn_source == this);
590 		gvn_source = v->gvalue();
591 	}
592 
v_equal(value * v)593 	bool v_equal(value *v) { return gvalue() == v->gvalue(); }
594 
595 	unsigned use_count();
596 	void delete_uses();
597 
get_final_gpr()598 	sel_chan get_final_gpr() {
599 		if (array && array->gpr) {
600 			int reg_offset = select.sel() - array->base_gpr.sel();
601 			if (rel && rel->is_const())
602 				reg_offset += rel->get_const_value().i;
603 			return array->gpr + (reg_offset << 2);
604 		} else {
605 			return gpr;
606 		}
607 	}
608 
get_final_chan()609 	unsigned get_final_chan() {
610 		if (array) {
611 			assert(array->gpr);
612 			return array->gpr.chan();
613 		} else {
614 			assert(gpr);
615 			return gpr.chan();
616 		}
617 	}
618 
619 	/* Check whether copy-propagation of src into this would create an access
620 	 * conflict with relative addressing, i.e. an operation that tries to access
621 	 * array elements with different address register values.
622 	 */
623 	bool no_reladdr_conflict_with(value *src);
624 
625 	val_set interferences;
626 	unsigned uid;
627 };
628 
629 class expr_handler;
630 
631 class value_table {
632 	typedef std::vector<value*> vt_item;
633 	typedef std::vector<vt_item> vt_table;
634 
635 	expr_handler &ex;
636 
637 	unsigned size_bits;
638 	unsigned size;
639 	unsigned size_mask;
640 
641 	vt_table hashtable;
642 
643 	unsigned cnt;
644 
645 public:
646 
647 	value_table(expr_handler &ex, unsigned size_bits = 10)
ex(ex)648 		: ex(ex), size_bits(size_bits), size(1u << size_bits),
649 		  size_mask(size - 1), hashtable(size), cnt() {}
650 
~value_table()651 	~value_table() {}
652 
653 	void add_value(value* v);
654 
655 	bool expr_equal(value* l, value* r);
656 
count()657 	unsigned count() { return cnt; }
658 
659 	void get_values(vvec & v);
660 };
661 
662 class sb_context;
663 
664 enum node_type {
665 	NT_UNKNOWN,
666 	NT_LIST,
667 	NT_OP,
668 	NT_REGION,
669 	NT_REPEAT,
670 	NT_DEPART,
671 	NT_IF,
672 };
673 
674 enum node_subtype {
675 	NST_UNKNOWN,
676 	NST_LIST,
677 	NST_ALU_GROUP,
678 	NST_ALU_CLAUSE,
679 	NST_ALU_INST,
680 	NST_ALU_PACKED_INST,
681 	NST_CF_INST,
682 	NST_FETCH_INST,
683 	NST_TEX_CLAUSE,
684 	NST_VTX_CLAUSE,
685 	NST_GDS_CLAUSE,
686 
687 	NST_BB,
688 
689 	NST_PHI,
690 	NST_PSI,
691 	NST_COPY,
692 
693 	NST_LOOP_PHI_CONTAINER,
694 	NST_LOOP_CONTINUE,
695 	NST_LOOP_BREAK
696 };
697 
698 enum node_flags {
699 	NF_EMPTY = 0,
700 	NF_DEAD = (1 << 0),
701 	NF_REG_CONSTRAINT = (1 << 1),
702 	NF_CHAN_CONSTRAINT = (1 << 2),
703 	NF_ALU_4SLOT = (1 << 3),
704 	NF_CONTAINER = (1 << 4),
705 
706 	NF_COPY_MOV = (1 << 5),
707 
708 	NF_DONT_KILL = (1 << 6),
709 	NF_DONT_HOIST = (1 << 7),
710 	NF_DONT_MOVE = (1 << 8),
711 
712 	// for KILLxx - we want to schedule them as early as possible
713 	NF_SCHEDULE_EARLY = (1 << 9),
714 
715 	// for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU
716 	NF_ALU_STACK_WORKAROUND = (1 << 10)
717 };
718 
719 inline node_flags operator |(node_flags l, node_flags r) {
720 	return (node_flags)((unsigned)l|(unsigned)r);
721 }
722 inline node_flags& operator |=(node_flags &l, node_flags r) {
723 	l = l | r;
724 	return l;
725 }
726 
727 inline node_flags& operator &=(node_flags &l, node_flags r) {
728 	l = (node_flags)((unsigned)l & (unsigned)r);
729 	return l;
730 }
731 
732 inline node_flags operator ~(node_flags r) {
733 	return (node_flags)~(unsigned)r;
734 }
735 
736 struct node_stats {
737 	unsigned alu_count;
738 	unsigned alu_kill_count;
739 	unsigned alu_copy_mov_count;
740 	unsigned cf_count;
741 	unsigned fetch_count;
742 	unsigned region_count;
743 	unsigned loop_count;
744 	unsigned phi_count;
745 	unsigned loop_phi_count;
746 	unsigned depart_count;
747 	unsigned repeat_count;
748 	unsigned if_count;
749        bool uses_ar;
750 
node_statsnode_stats751 	node_stats() : alu_count(), alu_kill_count(), alu_copy_mov_count(),
752 			cf_count(), fetch_count(), region_count(),
753 			loop_count(), phi_count(), loop_phi_count(), depart_count(),
754                        repeat_count(), if_count(), uses_ar(false) {}
755 
756 	void dump();
757 };
758 
759 class shader;
760 
761 class vpass;
762 
763 class container_node;
764 class region_node;
765 
766 class node {
767 
768 protected:
769 	node(node_type nt, node_subtype nst, node_flags flags = NF_EMPTY)
prev()770 	: prev(), next(), parent(),
771 	  type(nt), subtype(nst), flags(flags),
772 	  pred(), dst(), src() {}
773 
~node()774 	virtual ~node() {};
775 
776 public:
777 	node *prev, *next;
778 	container_node *parent;
779 
780 	node_type type;
781 	node_subtype subtype;
782 	node_flags flags;
783 
784 	value *pred;
785 
786 	vvec dst;
787 	vvec src;
788 
is_valid()789 	virtual bool is_valid() { return true; }
790 	virtual bool accept(vpass &p, bool enter);
791 
792 	void insert_before(node *n);
793 	void insert_after(node *n);
794 	void replace_with(node *n);
795 	void remove();
796 
797 	virtual value_hash hash() const;
798 	value_hash hash_src() const;
799 
800 	virtual bool fold_dispatch(expr_handler *ex);
801 
is_container()802 	bool is_container() { return flags & NF_CONTAINER; }
803 
is_alu_packed()804 	bool is_alu_packed() { return subtype == NST_ALU_PACKED_INST; }
is_alu_inst()805 	bool is_alu_inst() { return subtype == NST_ALU_INST; }
is_alu_group()806 	bool is_alu_group() { return subtype == NST_ALU_GROUP; }
is_alu_clause()807 	bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; }
808 
is_fetch_clause()809 	bool is_fetch_clause() {
810 		return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE || subtype == NST_GDS_CLAUSE;
811 	}
812 
is_copy()813 	bool is_copy() { return subtype == NST_COPY; }
is_copy_mov()814 	bool is_copy_mov() { return flags & NF_COPY_MOV; }
is_any_alu()815 	bool is_any_alu() { return is_alu_inst() || is_alu_packed() || is_copy(); }
816 
is_fetch_inst()817 	bool is_fetch_inst() { return subtype == NST_FETCH_INST; }
is_cf_inst()818 	bool is_cf_inst() { return subtype == NST_CF_INST; }
819 
is_region()820 	bool is_region() { return type == NT_REGION; }
is_depart()821 	bool is_depart() { return type == NT_DEPART; }
is_repeat()822 	bool is_repeat() { return type == NT_REPEAT; }
is_if()823 	bool is_if() { return type == NT_IF; }
is_bb()824 	bool is_bb() { return subtype == NST_BB; }
825 
is_phi()826 	bool is_phi() { return subtype == NST_PHI; }
827 
is_dead()828 	bool is_dead() { return flags & NF_DEAD; }
829 
830 	bool is_cf_op(unsigned op);
831 	bool is_alu_op(unsigned op);
832 	bool is_fetch_op(unsigned op);
833 
834 	unsigned cf_op_flags();
835 	unsigned alu_op_flags();
836 	unsigned alu_op_slot_flags();
837 	unsigned fetch_op_flags();
838 
839 	bool is_mova();
840 	bool is_pred_set();
841 
vec_uses_ar(vvec & vv)842 	bool vec_uses_ar(vvec &vv) {
843 		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
844 			value *v = *I;
845 			if (v && v->rel && !v->rel->is_const())
846 				return true;
847 		}
848 		return false;
849 	}
850 
uses_ar()851 	bool uses_ar() {
852 		return vec_uses_ar(dst) || vec_uses_ar(src);
853 	}
854 
vec_uses_lds_oq(vvec & vv)855 	bool vec_uses_lds_oq(vvec &vv) {
856 		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
857 			value *v = *I;
858 			if (v && v->is_lds_oq())
859 				return true;
860 		}
861 		return false;
862 	}
863 
consumes_lds_oq()864 	bool consumes_lds_oq() {
865 		return vec_uses_lds_oq(src);
866 	}
867 
produces_lds_oq()868 	bool produces_lds_oq() {
869 		return vec_uses_lds_oq(dst);
870 	}
871 
872 	region_node* get_parent_region();
873 
874 	friend class shader;
875 };
876 
877 class container_node : public node {
878 public:
879 
880 	container_node(node_type nt = NT_LIST, node_subtype nst = NST_LIST,
881 	               node_flags flags = NF_EMPTY)
882 	: node(nt, nst, flags | NF_CONTAINER), first(), last(),
883 	  live_after(), live_before() {}
884 
885 	// child items list
886 	node *first, *last;
887 
888 	val_set live_after;
889 	val_set live_before;
890 
891 	class iterator {
892 		node *p;
893 	public:
p(pp)894 		iterator(node *pp = NULL) : p(pp) {}
895 		iterator & operator ++() { p = p->next; return *this;}
896 		iterator & operator --() { p = p->prev; return *this;}
897 		node* operator *() { return p; }
898 		node* operator ->() { return p; }
advance(int n)899 		const iterator advance(int n) {
900 			if (!n) return *this;
901 			iterator I(p);
902 			if (n > 0) while (n--) ++I;
903 			else while (n++) --I;
904 			return I;
905 		}
906 		const iterator operator +(int n) { return advance(n); }
907 		const iterator operator -(int n) { return advance(-n); }
908 		bool operator !=(const iterator &i) { return p != i.p; }
909 		bool operator ==(const iterator &i) { return p == i.p; }
910 	};
911 
912 	class riterator {
913 		iterator i;
914 	public:
i(p)915 		riterator(node *p = NULL) : i(p) {}
916 		riterator & operator ++() { --i; return *this;}
917 		riterator & operator --() { ++i; return *this;}
918 		node* operator *() { return *i; }
919 		node* operator ->() { return *i; }
920 		bool operator !=(const riterator &r) { return i != r.i; }
921 		bool operator ==(const riterator &r) { return i == r.i; }
922 	};
923 
begin()924 	iterator begin() { return first; }
end()925 	iterator end() { return NULL; }
rbegin()926 	riterator rbegin() { return last; }
rend()927 	riterator rend() { return NULL; }
928 
empty()929 	bool empty() { assert(first != NULL || first == last); return !first; }
930 	unsigned count();
931 
932 	// used with node containers that represent shceduling queues
933 	// ignores copies and takes into account alu_packed_node items
934 	unsigned real_alu_count();
935 
936 	void push_back(node *n);
937 	void push_front(node *n);
938 
939 	void insert_node_before(node *s, node *n);
940 	void insert_node_after(node *s, node *n);
941 
942 	void append_from(container_node *c);
943 
944 	// remove range [b..e) from some container and assign to this container
945 	void move(iterator b, iterator e);
946 
947 	void expand();
948 	void expand(container_node *n);
949 	void remove_node(node *n);
950 
951 	node *cut(iterator b, iterator e);
952 
clear()953 	void clear() { first = last = NULL; }
954 
is_valid()955 	virtual bool is_valid() { return true; }
956 	virtual bool accept(vpass &p, bool enter);
957 	virtual bool fold_dispatch(expr_handler *ex);
958 
front()959 	node* front() { return first; }
back()960 	node* back() { return last; }
961 
962 	void collect_stats(node_stats &s);
963 
964 	friend class shader;
965 
966 
967 };
968 
969 typedef container_node::iterator node_iterator;
970 typedef container_node::riterator node_riterator;
971 
972 class alu_group_node : public container_node {
973 protected:
alu_group_node()974 	alu_group_node() : container_node(NT_LIST, NST_ALU_GROUP), literals() {}
975 public:
976 
977 	std::vector<literal> literals;
978 
is_valid()979 	virtual bool is_valid() { return subtype == NST_ALU_GROUP; }
980 	virtual bool accept(vpass &p, bool enter);
981 
982 
literal_chan(literal l)983 	unsigned literal_chan(literal l) {
984 		std::vector<literal>::iterator F =
985 				std::find(literals.begin(), literals.end(), l);
986 		assert(F != literals.end());
987 		return F - literals.begin();
988 	}
989 
990 	friend class shader;
991 };
992 
993 class cf_node : public container_node {
994 protected:
cf_node()995 	cf_node() : container_node(NT_OP, NST_CF_INST), jump_target(),
996 		jump_after_target() { memset(&bc, 0, sizeof(bc_cf)); };
997 public:
998 	bc_cf bc;
999 
1000 	cf_node *jump_target;
1001 	bool jump_after_target;
1002 
is_valid()1003 	virtual bool is_valid() { return subtype == NST_CF_INST; }
1004 	virtual bool accept(vpass &p, bool enter);
1005 	virtual bool fold_dispatch(expr_handler *ex);
1006 
jump(cf_node * c)1007 	void jump(cf_node *c) { jump_target = c; jump_after_target = false; }
jump_after(cf_node * c)1008 	void jump_after(cf_node *c) { jump_target = c; jump_after_target = true; }
1009 
1010 	friend class shader;
1011 };
1012 
1013 class alu_node : public node {
1014 protected:
alu_node()1015 	alu_node() : node(NT_OP, NST_ALU_INST) {  }
1016 public:
1017 	bc_alu bc;
1018 
is_valid()1019 	virtual bool is_valid() { return subtype == NST_ALU_INST; }
1020 	virtual bool accept(vpass &p, bool enter);
1021 	virtual bool fold_dispatch(expr_handler *ex);
1022 
forced_bank_swizzle()1023 	unsigned forced_bank_swizzle() {
1024 		return ((bc.op_ptr->flags & AF_INTERP) && (bc.slot_flags == AF_4V)) ?
1025 				VEC_210 : 0;
1026 	}
1027 
1028 	// return param index + 1 if instruction references interpolation param,
1029 	// otherwise 0
1030 	unsigned interp_param();
1031 
1032 	alu_group_node *get_alu_group_node();
1033 
1034 	friend class shader;
1035 };
1036 
1037 // for multi-slot instrs - DOT/INTERP/... (maybe useful for 64bit pairs later)
1038 class alu_packed_node : public container_node {
1039 protected:
alu_packed_node()1040 	alu_packed_node() : container_node(NT_OP, NST_ALU_PACKED_INST) {}
1041 public:
1042 
op_ptr()1043 	const alu_op_info* op_ptr() {
1044 		return static_cast<alu_node*>(first)->bc.op_ptr;
1045 	}
op()1046 	unsigned op() { return static_cast<alu_node*>(first)->bc.op; }
1047 	void init_args(bool repl);
1048 
is_valid()1049 	virtual bool is_valid() { return subtype == NST_ALU_PACKED_INST; }
1050 	virtual bool accept(vpass &p, bool enter);
1051 	virtual bool fold_dispatch(expr_handler *ex);
1052 
1053 	unsigned get_slot_mask();
1054 	void update_packed_items(sb_context &ctx);
1055 
1056 	friend class shader;
1057 };
1058 
1059 class fetch_node : public node {
1060 protected:
fetch_node()1061 	fetch_node() : node(NT_OP, NST_FETCH_INST) { memset(&bc, 0, sizeof(bc_fetch)); };
1062 public:
1063 	bc_fetch bc;
1064 
is_valid()1065 	virtual bool is_valid() { return subtype == NST_FETCH_INST; }
1066 	virtual bool accept(vpass &p, bool enter);
1067 	virtual bool fold_dispatch(expr_handler *ex);
1068 
uses_grad()1069 	bool uses_grad() { return bc.op_ptr->flags & FF_USEGRAD; }
1070 
1071 	friend class shader;
1072 };
1073 
1074 class region_node;
1075 
1076 class repeat_node : public container_node {
1077 protected:
repeat_node(region_node * target,unsigned id)1078 	repeat_node(region_node *target, unsigned id)
1079 	: container_node(NT_REPEAT, NST_LIST), target(target), rep_id(id) {}
1080 public:
1081 	region_node *target;
1082 	unsigned rep_id;
1083 
1084 	virtual bool accept(vpass &p, bool enter);
1085 
1086 	friend class shader;
1087 };
1088 
1089 class depart_node : public container_node {
1090 protected:
depart_node(region_node * target,unsigned id)1091 	depart_node(region_node *target, unsigned id)
1092 	: container_node(NT_DEPART, NST_LIST), target(target), dep_id(id) {}
1093 public:
1094 	region_node *target;
1095 	unsigned dep_id;
1096 
1097 	virtual bool accept(vpass &p, bool enter);
1098 
1099 	friend class shader;
1100 };
1101 
1102 class if_node : public container_node {
1103 protected:
if_node()1104 	if_node() : container_node(NT_IF, NST_LIST), cond() {};
1105 public:
1106 	value *cond; // glued to pseudo output (dst[2]) of the PRED_SETxxx
1107 
1108 	virtual bool accept(vpass &p, bool enter);
1109 
1110 	friend class shader;
1111 };
1112 
1113 typedef std::vector<depart_node*> depart_vec;
1114 typedef std::vector<repeat_node*> repeat_vec;
1115 
1116 class region_node : public container_node {
1117 protected:
region_node(unsigned id)1118 	region_node(unsigned id) : container_node(NT_REGION, NST_LIST), region_id(id),
1119 			loop_phi(), phi(), vars_defined(), departs(), repeats(), src_loop()
1120 			{}
1121 public:
1122 	unsigned region_id;
1123 
1124 	container_node *loop_phi;
1125 	container_node *phi;
1126 
1127 	val_set vars_defined;
1128 
1129 	depart_vec departs;
1130 	repeat_vec repeats;
1131 
1132 	// true if region was created for loop in the parser, sometimes repeat_node
1133 	// may be optimized away so we need to remember this information
1134 	bool src_loop;
1135 
1136 	virtual bool accept(vpass &p, bool enter);
1137 
dep_count()1138 	unsigned dep_count() { return departs.size(); }
rep_count()1139 	unsigned rep_count() { return repeats.size() + 1; }
1140 
is_loop()1141 	bool is_loop() { return src_loop || !repeats.empty(); }
1142 
get_entry_code_location()1143 	container_node* get_entry_code_location() {
1144 		node *p = first;
1145 		while (p && (p->is_depart() || p->is_repeat()))
1146 			p = static_cast<container_node*>(p)->first;
1147 
1148 		container_node *c = static_cast<container_node*>(p);
1149 		if (c->is_bb())
1150 			return c;
1151 		else
1152 			return c->parent;
1153 	}
1154 
1155 	void expand_depart(depart_node *d);
1156 	void expand_repeat(repeat_node *r);
1157 
1158 	friend class shader;
1159 };
1160 
1161 class bb_node : public container_node {
1162 protected:
bb_node(unsigned id,unsigned loop_level)1163 	bb_node(unsigned id, unsigned loop_level)
1164 		: container_node(NT_LIST, NST_BB), id(id), loop_level(loop_level) {}
1165 public:
1166 	unsigned id;
1167 	unsigned loop_level;
1168 
1169 	virtual bool accept(vpass &p, bool enter);
1170 
1171 	friend class shader;
1172 };
1173 
1174 
1175 typedef std::vector<region_node*> regions_vec;
1176 typedef std::vector<bb_node*> bbs_vec;
1177 typedef std::list<node*> sched_queue;
1178 typedef sched_queue::iterator sq_iterator;
1179 typedef std::vector<node*> node_vec;
1180 typedef std::list<node*> node_list;
1181 typedef std::set<node*> node_set;
1182 
1183 
1184 
1185 } // namespace r600_sb
1186 
1187 #endif /* R600_SB_IR_H_ */
1188