1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #ifndef SB_BC_H_
28 #define SB_BC_H_
29 
30 #include <stdint.h>
31 #include "r600_isa.h"
32 
33 #include <cstdio>
34 #include <string>
35 #include <vector>
36 #include <stack>
37 
38 struct r600_bytecode;
39 struct r600_shader;
40 
41 namespace r600_sb {
42 
43 class hw_encoding_format;
44 class node;
45 class alu_node;
46 class cf_node;
47 class fetch_node;
48 class alu_group_node;
49 class region_node;
50 class shader;
51 class value;
52 
53 class sb_ostream {
54 public:
sb_ostream()55 	sb_ostream() {}
56 
57 	virtual void write(const char *s) = 0;
58 
59 	sb_ostream& operator <<(const char *s) {
60 		write(s);
61 		return *this;
62 	}
63 
64 	sb_ostream& operator <<(const std::string& s) {
65 		return *this << s.c_str();
66 	}
67 
68 	sb_ostream& operator <<(void *p) {
69 		char b[32];
70 		sprintf(b, "%p", p);
71 		return *this << b;
72 	}
73 
74 	sb_ostream& operator <<(char c) {
75 		char b[2];
76 		sprintf(b, "%c", c);
77 		return *this << b;
78 	}
79 
80 	sb_ostream& operator <<(int n) {
81 		char b[32];
82 		sprintf(b, "%d", n);
83 		return *this << b;
84 	}
85 
86 	sb_ostream& operator <<(unsigned n) {
87 		char b[32];
88 		sprintf(b, "%u", n);
89 		return *this << b;
90 	}
91 
92 	sb_ostream& operator <<(double d) {
93 		char b[32];
94 		snprintf(b, 32, "%g", d);
95 		return *this << b;
96 	}
97 
98 	// print as field of specified width, right aligned
print_w(int n,int width)99 	void print_w(int n, int width) {
100 		char b[256],f[8];
101 		sprintf(f, "%%%dd", width);
102 		snprintf(b, 256, f, n);
103 		write(b);
104 	}
105 
106 	// print as field of specified width, left aligned
print_wl(int n,int width)107 	void print_wl(int n, int width) {
108 		char b[256],f[8];
109 		sprintf(f, "%%-%dd", width);
110 		snprintf(b, 256, f, n);
111 		write(b);
112 	}
113 
114 	// print as field of specified width, left aligned
print_wl(const std::string & s,int width)115 	void print_wl(const std::string &s, int width) {
116 		write(s.c_str());
117 		int l = s.length();
118 		while (l++ < width) {
119 			write(" ");
120 		}
121 	}
122 
123 	// print int as field of specified width, right aligned, zero-padded
print_zw(int n,int width)124 	void print_zw(int n, int width) {
125 		char b[256],f[8];
126 		sprintf(f, "%%0%dd", width);
127 		snprintf(b, 256, f, n);
128 		write(b);
129 	}
130 
131 	// print int as field of specified width, right aligned, zero-padded, hex
print_zw_hex(int n,int width)132 	void print_zw_hex(int n, int width) {
133 		char b[256],f[8];
134 		sprintf(f, "%%0%dx", width);
135 		snprintf(b, 256, f, n);
136 		write(b);
137 	}
138 };
139 
140 class sb_ostringstream : public sb_ostream {
141 	std::string data;
142 public:
sb_ostringstream()143 	sb_ostringstream() : data() {}
144 
write(const char * s)145 	virtual void write(const char *s) {
146 		data += s;
147 	}
148 
clear()149 	void clear() { data.clear(); }
150 
c_str()151 	const char* c_str() { return data.c_str(); }
str()152 	std::string& str() { return data; }
153 };
154 
155 class sb_log : public sb_ostream {
156 	FILE *o;
157 public:
sb_log()158 	sb_log() : o(stderr) {}
159 
write(const char * s)160 	virtual void write(const char *s) {
161 		fputs(s, o);
162 	}
163 };
164 
165 extern sb_log sblog;
166 
167 enum shader_target
168 {
169 	TARGET_UNKNOWN,
170 	TARGET_VS,
171 	TARGET_ES,
172 	TARGET_PS,
173 	TARGET_GS,
174 	TARGET_GS_COPY,
175 	TARGET_COMPUTE,
176 	TARGET_FETCH,
177 	TARGET_HS,
178 	TARGET_LS,
179 
180 	TARGET_NUM
181 };
182 
183 enum sb_hw_class_bits
184 {
185 	HB_R6	= (1<<0),
186 	HB_R7	= (1<<1),
187 	HB_EG	= (1<<2),
188 	HB_CM	= (1<<3),
189 
190 	HB_R6R7 = (HB_R6 | HB_R7),
191 	HB_EGCM = (HB_EG | HB_CM),
192 	HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG),
193 	HB_R7EGCM = (HB_R7 | HB_EG | HB_CM),
194 
195 	HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM)
196 };
197 
198 enum sb_hw_chip
199 {
200 	HW_CHIP_UNKNOWN,
201 	HW_CHIP_R600,
202 	HW_CHIP_RV610,
203 	HW_CHIP_RV630,
204 	HW_CHIP_RV670,
205 	HW_CHIP_RV620,
206 	HW_CHIP_RV635,
207 	HW_CHIP_RS780,
208 	HW_CHIP_RS880,
209 	HW_CHIP_RV770,
210 	HW_CHIP_RV730,
211 	HW_CHIP_RV710,
212 	HW_CHIP_RV740,
213 	HW_CHIP_CEDAR,
214 	HW_CHIP_REDWOOD,
215 	HW_CHIP_JUNIPER,
216 	HW_CHIP_CYPRESS,
217 	HW_CHIP_HEMLOCK,
218 	HW_CHIP_PALM,
219 	HW_CHIP_SUMO,
220 	HW_CHIP_SUMO2,
221 	HW_CHIP_BARTS,
222 	HW_CHIP_TURKS,
223 	HW_CHIP_CAICOS,
224 	HW_CHIP_CAYMAN,
225 	HW_CHIP_ARUBA
226 };
227 
228 enum sb_hw_class
229 {
230 	HW_CLASS_UNKNOWN,
231 	HW_CLASS_R600,
232 	HW_CLASS_R700,
233 	HW_CLASS_EVERGREEN,
234 	HW_CLASS_CAYMAN
235 };
236 
237 enum alu_slots {
238 	SLOT_X = 0,
239 	SLOT_Y = 1,
240 	SLOT_Z = 2,
241 	SLOT_W = 3,
242 	SLOT_TRANS = 4
243 };
244 
245 enum misc_consts {
246 	MAX_ALU_LITERALS = 4,
247 	MAX_ALU_SLOTS = 128,
248 	MAX_GPR = 128,
249 	MAX_CHAN = 4
250 
251 };
252 
253 enum alu_src_sel {
254 
255 	ALU_SRC_LDS_OQ_A = 219,
256 	ALU_SRC_LDS_OQ_B = 220,
257 	ALU_SRC_LDS_OQ_A_POP = 221,
258 	ALU_SRC_LDS_OQ_B_POP = 222,
259 	ALU_SRC_LDS_DIRECT_A = 223,
260 	ALU_SRC_LDS_DIRECT_B = 224,
261 	ALU_SRC_TIME_HI = 227,
262 	ALU_SRC_TIME_LO = 228,
263 	ALU_SRC_MASK_HI = 229,
264 	ALU_SRC_MASK_LO = 230,
265 	ALU_SRC_HW_WAVE_ID = 231,
266 	ALU_SRC_SIMD_ID = 232,
267 	ALU_SRC_SE_ID = 233,
268 	ALU_SRC_HW_THREADGRP_ID = 234,
269 	ALU_SRC_WAVE_ID_IN_GRP = 235,
270 	ALU_SRC_NUM_THREADGRP_WAVES = 236,
271 	ALU_SRC_HW_ALU_ODD = 237,
272 	ALU_SRC_LOOP_IDX = 238,
273 	ALU_SRC_PARAM_BASE_ADDR = 240,
274 	ALU_SRC_NEW_PRIM_MASK = 241,
275 	ALU_SRC_PRIM_MASK_HI = 242,
276 	ALU_SRC_PRIM_MASK_LO = 243,
277 	ALU_SRC_1_DBL_L = 244,
278 	ALU_SRC_1_DBL_M = 245,
279 	ALU_SRC_0_5_DBL_L = 246,
280 	ALU_SRC_0_5_DBL_M = 247,
281 	ALU_SRC_0 = 248,
282 	ALU_SRC_1 = 249,
283 	ALU_SRC_1_INT = 250,
284 	ALU_SRC_M_1_INT = 251,
285 	ALU_SRC_0_5 = 252,
286 	ALU_SRC_LITERAL = 253,
287 	ALU_SRC_PV = 254,
288 	ALU_SRC_PS = 255,
289 
290 	ALU_SRC_PARAM_OFFSET = 448
291 };
292 
293 enum alu_predicate_select
294 {
295 	PRED_SEL_OFF	= 0,
296 //	RESERVED		= 1,
297 	PRED_SEL_0		= 2,
298 	PRED_SEL_1		= 3
299 };
300 
301 
302 enum alu_omod {
303 	OMOD_OFF  = 0,
304 	OMOD_M2   = 1,
305 	OMOD_M4   = 2,
306 	OMOD_D2   = 3
307 };
308 
309 enum alu_index_mode {
310 	INDEX_AR_X        = 0,
311 	INDEX_AR_Y_R600   = 1,
312 	INDEX_AR_Z_R600   = 2,
313 	INDEX_AR_W_R600   = 3,
314 
315 	INDEX_LOOP        = 4,
316 	INDEX_GLOBAL      = 5,
317 	INDEX_GLOBAL_AR_X = 6
318 };
319 
320 enum alu_cayman_mova_dst {
321 	CM_MOVADST_AR_X,
322 	CM_MOVADST_PC,
323 	CM_MOVADST_IDX0,
324 	CM_MOVADST_IDX1,
325 	CM_MOVADST_CG0,		// clause-global byte 0
326 	CM_MOVADST_CG1,
327 	CM_MOVADST_CG2,
328 	CM_MOVADST_CG3
329 };
330 
331 enum alu_cayman_exec_mask_op {
332 	CM_EMO_DEACTIVATE,
333 	CM_EMO_BREAK,
334 	CM_EMO_CONTINUE,
335 	CM_EMO_KILL
336 };
337 
338 
339 enum cf_exp_type {
340 	EXP_PIXEL,
341 	EXP_POS,
342 	EXP_PARAM,
343 
344 	EXP_TYPE_COUNT
345 };
346 
347 enum cf_mem_type {
348 	MEM_WRITE,
349 	MEM_WRITE_IND,
350 	MEM_WRITE_ACK,
351 	MEM_WRITE_IND_ACK
352 };
353 
354 
355 enum alu_kcache_mode {
356 	KC_LOCK_NONE,
357 	KC_LOCK_1,
358 	KC_LOCK_2,
359 	KC_LOCK_LOOP
360 };
361 
362 enum alu_kcache_index_mode {
363 	KC_INDEX_NONE,
364 	KC_INDEX_0,
365 	KC_INDEX_1,
366 	KC_INDEX_INVALID
367 };
368 
369 enum chan_select {
370 	SEL_X	= 0,
371 	SEL_Y	= 1,
372 	SEL_Z	= 2,
373 	SEL_W	= 3,
374 	SEL_0	= 4,
375 	SEL_1	= 5,
376 //	RESERVED = 6,
377 	SEL_MASK = 7
378 };
379 
380 enum bank_swizzle {
381 	VEC_012 = 0,
382 	VEC_021 = 1,
383 	VEC_120 = 2,
384 	VEC_102 = 3,
385 	VEC_201 = 4,
386 	VEC_210 = 5,
387 
388 	VEC_NUM = 6,
389 
390 	SCL_210 = 0,
391 	SCL_122 = 1,
392 	SCL_212 = 2,
393 	SCL_221 = 3,
394 
395 	SCL_NUM = 4
396 
397 };
398 
399 enum sched_queue_id {
400 	SQ_CF,
401 	SQ_ALU,
402 	SQ_TEX,
403 	SQ_VTX,
404 	SQ_GDS,
405 
406 	SQ_NUM
407 };
408 
409 struct literal {
410 	union {
411 		int32_t i;
412 		uint32_t u;
413 		float f;
414 	};
415 
iliteral416 	literal(int32_t i = 0) : i(i) {}
literalliteral417 	literal(uint32_t u) : u(u) {}
literalliteral418 	literal(float f) : f(f) {}
literalliteral419 	literal(double f) : f(f) {}
uint32_tliteral420 	operator uint32_t() const { return u; }
421 	bool operator ==(literal l) { return u == l.u; }
422 	bool operator ==(int v_int) { return i == v_int; }
423 	bool operator ==(unsigned v_uns) { return u == v_uns; }
424 };
425 
426 struct bc_kcache {
427 	unsigned mode;
428 	unsigned bank;
429 	unsigned addr;
430 	unsigned index_mode;
431 } ;
432 
433 // TODO optimize bc structures
434 
435 struct bc_cf {
436 
437 	bc_kcache kc[4];
438 
439 	unsigned id;
440 
441 
442 	const cf_op_info * op_ptr;
443 	unsigned op;
444 
445 	unsigned addr:32;
446 
447 	unsigned alt_const:1;
448 	unsigned uses_waterfall:1;
449 
450 	unsigned barrier:1;
451 	unsigned count:7;
452 	unsigned pop_count:3;
453 	unsigned call_count:6;
454 	unsigned whole_quad_mode:1;
455 	unsigned valid_pixel_mode:1;
456 
457 	unsigned jumptable_sel:3;
458 	unsigned cf_const:5;
459 	unsigned cond:2;
460 	unsigned end_of_program:1;
461 
462 	unsigned array_base:13;
463 	unsigned elem_size:2;
464 	unsigned index_gpr:7;
465 	unsigned rw_gpr:7;
466 	unsigned rw_rel:1;
467 	unsigned type:2;
468 
469 	unsigned burst_count:4;
470 	unsigned mark:1;
471 	unsigned sel[4];
472 
473 	unsigned array_size:12;
474 	unsigned comp_mask:4;
475 
476 	unsigned rat_id:4;
477 	unsigned rat_inst:6;
478 	unsigned rat_index_mode:2;
479 
set_opbc_cf480 	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); }
481 
is_alu_extendedbc_cf482 	bool is_alu_extended() {
483 		assert(op_ptr->flags & CF_ALU);
484 		return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE ||
485 			kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE ||
486 			kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE;
487 	}
488 
489 };
490 
491 struct bc_alu_src {
492 	unsigned sel:9;
493 	unsigned chan:2;
494 	unsigned neg:1;
495 	unsigned abs:1;
496 	unsigned rel:1;
497 	literal value;
498 };
499 
500 struct bc_alu {
501 	const alu_op_info * op_ptr;
502 	unsigned op;
503 
504 	bc_alu_src src[3];
505 
506 	unsigned dst_gpr:7;
507 	unsigned dst_chan:2;
508 	unsigned dst_rel:1;
509 	unsigned clamp:1;
510 	unsigned omod:2;
511 	unsigned bank_swizzle:3;
512 
513 	unsigned index_mode:3;
514 	unsigned last:1;
515 	unsigned pred_sel:2;
516 
517 	unsigned fog_merge:1;
518 	unsigned write_mask:1;
519 	unsigned update_exec_mask:1;
520 	unsigned update_pred:1;
521 
522 	unsigned slot:3;
523 
524 	unsigned lds_idx_offset:6;
525 
526 	alu_op_flags slot_flags;
527 
set_opbc_alu528 	void set_op(unsigned op) {
529 		this->op = op;
530 		op_ptr = r600_isa_alu(op);
531 	}
532 };
533 
534 struct bc_fetch {
535 	const fetch_op_info * op_ptr;
536 	unsigned op;
537 
538 	unsigned bc_frac_mode:1;
539 	unsigned fetch_whole_quad:1;
540 	unsigned resource_id:8;
541 
542 	unsigned src_gpr:7;
543 	unsigned src_rel:1;
544 	unsigned src_rel_global:1; /* for GDS ops */
545 	unsigned src_sel[4];
546 
547 	unsigned dst_gpr:7;
548 	unsigned dst_rel:1;
549 	unsigned dst_rel_global:1; /* for GDS ops */
550 	unsigned dst_sel[4];
551 
552 	unsigned alt_const:1;
553 
554 	unsigned inst_mod:2;
555 	unsigned resource_index_mode:2;
556 	unsigned sampler_index_mode:2;
557 
558 	unsigned coord_type[4];
559 	unsigned lod_bias:7;
560 
561 	unsigned offset[3];
562 
563 	unsigned sampler_id:5;
564 
565 
566 	unsigned fetch_type:2;
567 	unsigned mega_fetch_count:6;
568 	unsigned coalesced_read:1;
569 	unsigned structured_read:2;
570 	unsigned lds_req:1;
571 
572 	unsigned data_format:6;
573 	unsigned format_comp_all:1;
574 	unsigned num_format_all:2;
575 	unsigned semantic_id:8;
576 	unsigned srf_mode_all:1;
577 	unsigned use_const_fields:1;
578 
579 	unsigned const_buf_no_stride:1;
580 	unsigned endian_swap:2;
581 	unsigned mega_fetch:1;
582 
583 	unsigned src2_gpr:7; /* for GDS */
584 	unsigned alloc_consume:1;
585 	unsigned uav_id:4;
586 	unsigned uav_index_mode:2;
587 	unsigned bcast_first_req:1;
588 
set_opbc_fetch589 	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
590 };
591 
592 struct shader_stats {
593 	unsigned	ndw;
594 	unsigned	ngpr;
595 	unsigned	nstack;
596 
597 	unsigned	cf; // clause instructions not included
598 	unsigned	alu;
599 	unsigned	alu_clauses;
600 	unsigned	fetch_clauses;
601 	unsigned	fetch;
602 	unsigned	alu_groups;
603 
604 	unsigned	shaders;		// number of shaders (for accumulated stats)
605 
shader_statsshader_stats606 	shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
607 			fetch_clauses(), fetch(), alu_groups(), shaders() {}
608 
609 	void collect(node *n);
610 	void accumulate(shader_stats &s);
611 	void dump();
612 	void dump_diff(shader_stats &s);
613 };
614 
615 class sb_context {
616 
617 public:
618 
619 	shader_stats src_stats, opt_stats;
620 
621 	r600_isa *isa;
622 
623 	sb_hw_chip hw_chip;
624 	sb_hw_class hw_class;
625 
626 	unsigned alu_temp_gprs;
627 	unsigned max_fetch;
628 	bool has_trans;
629 	unsigned vtx_src_num;
630 	unsigned num_slots;
631 	bool uses_mova_gpr;
632 
633 	bool r6xx_gpr_index_workaround;
634 
635 	bool stack_workaround_8xx;
636 	bool stack_workaround_9xx;
637 
638 	unsigned wavefront_size;
639 	unsigned stack_entry_size;
640 
641 	static unsigned dump_pass;
642 	static unsigned dump_stat;
643 
644 	static unsigned dry_run;
645 	static unsigned no_fallback;
646 	static unsigned safe_math;
647 
648 	static unsigned dskip_start;
649 	static unsigned dskip_end;
650 	static unsigned dskip_mode;
651 
sb_context()652 	sb_context() : src_stats(), opt_stats(), isa(0),
653 			hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
654 
655 	int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
656 
is_r600()657 	bool is_r600() {return hw_class == HW_CLASS_R600;}
is_r700()658 	bool is_r700() {return hw_class == HW_CLASS_R700;}
is_evergreen()659 	bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;}
is_cayman()660 	bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
is_egcm()661 	bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}
662 
needs_8xx_stack_workaround()663 	bool needs_8xx_stack_workaround() {
664 		if (!is_evergreen())
665 			return false;
666 
667 		switch (hw_chip) {
668 		case HW_CHIP_HEMLOCK:
669 		case HW_CHIP_CYPRESS:
670 		case HW_CHIP_JUNIPER:
671 			return false;
672 		default:
673 			return true;
674 		}
675 	}
676 
needs_9xx_stack_workaround()677 	bool needs_9xx_stack_workaround() {
678 		return is_cayman();
679 	}
680 
hw_class_bit()681 	sb_hw_class_bits hw_class_bit() {
682 		switch (hw_class) {
683 		case HW_CLASS_R600:return HB_R6;
684 		case HW_CLASS_R700:return HB_R7;
685 		case HW_CLASS_EVERGREEN:return HB_EG;
686 		case HW_CLASS_CAYMAN:return HB_CM;
687 		default: assert(!"unknown hw class"); return (sb_hw_class_bits)0;
688 
689 		}
690 	}
691 
cf_opcode(unsigned op)692 	unsigned cf_opcode(unsigned op) {
693 		return r600_isa_cf_opcode(isa->hw_class, op);
694 	}
695 
alu_opcode(unsigned op)696 	unsigned alu_opcode(unsigned op) {
697 		return r600_isa_alu_opcode(isa->hw_class, op);
698 	}
699 
alu_slots(unsigned op)700 	unsigned alu_slots(unsigned op) {
701 		return r600_isa_alu_slots(isa->hw_class, op);
702 	}
703 
alu_slots(const alu_op_info * op_ptr)704 	unsigned alu_slots(const alu_op_info * op_ptr) {
705 		return op_ptr->slots[isa->hw_class];
706 	}
707 
alu_slots_mask(const alu_op_info * op_ptr)708 	unsigned alu_slots_mask(const alu_op_info * op_ptr) {
709 		unsigned mask = 0;
710 		unsigned slot_flags = alu_slots(op_ptr);
711 		if (slot_flags & AF_V)
712 			mask = 0x0F;
713 		if (!is_cayman() && (slot_flags & AF_S))
714 			mask |= 0x10;
715 		/* Force LDS_IDX ops into SLOT_X */
716 		if (op_ptr->opcode[0] == -1 && ((op_ptr->opcode[1] & 0xFF) == 0x11))
717 			mask = 0x01;
718 		return mask;
719 	}
720 
fetch_opcode(unsigned op)721 	unsigned fetch_opcode(unsigned op) {
722 		return r600_isa_fetch_opcode(isa->hw_class, op);
723 	}
724 
is_kcache_sel(unsigned sel)725 	bool is_kcache_sel(unsigned sel) {
726 		return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320));
727 	}
728 
is_lds_oq(unsigned sel)729 	bool is_lds_oq(unsigned sel) {
730 		return (sel >= 0xdb && sel <= 0xde);
731 	}
732 
733 	const char * get_hw_class_name();
734 	const char * get_hw_chip_name();
735 
736 };
737 
738 #define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
739 #define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)
740 
741 class bc_decoder {
742 
743 	sb_context &ctx;
744 
745 	uint32_t* dw;
746 	unsigned ndw;
747 
748 public:
749 
bc_decoder(sb_context & sctx,uint32_t * data,unsigned size)750 	bc_decoder(sb_context &sctx, uint32_t *data, unsigned size)
751 		: ctx(sctx), dw(data), ndw(size) {}
752 
753 	int decode_cf(unsigned &i, bc_cf &bc);
754 	int decode_alu(unsigned &i, bc_alu &bc);
755 	int decode_fetch(unsigned &i, bc_fetch &bc);
756 
757 private:
758 	int decode_cf_alu(unsigned &i, bc_cf &bc);
759 	int decode_cf_exp(unsigned &i, bc_cf &bc);
760 	int decode_cf_mem(unsigned &i, bc_cf &bc);
761 
762 	int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
763 	int decode_fetch_gds(unsigned &i, bc_fetch &bc);
764 };
765 
766 // bytecode format definition
767 
768 class hw_encoding_format {
769 	const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing
770 	hw_encoding_format();
771 protected:
772 	uint32_t value;
773 public:
hw_encoding_format(sb_hw_class_bits hw)774 	hw_encoding_format(sb_hw_class_bits hw)
775 		: hw_target(hw), value(0) {}
hw_encoding_format(uint32_t v,sb_hw_class_bits hw)776 	hw_encoding_format(uint32_t v, sb_hw_class_bits hw)
777 		: hw_target(hw), value(v) {}
get_value(sb_hw_class_bits hw)778 	uint32_t get_value(sb_hw_class_bits hw) const {
779 		assert((hw & hw_target) == hw);
780 		return value;
781 	}
782 };
783 
784 #define BC_FORMAT_BEGIN_HW(fmt, hwset) \
785 class fmt##_##hwset : public hw_encoding_format {\
786 	typedef fmt##_##hwset thistype; \
787 public: \
788 	fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
789 	fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};
790 
791 #define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)
792 
793 #define BC_FORMAT_END(fmt) };
794 
795 // bytecode format field definition
796 
797 #define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
798 	thistype & name(unsigned v) { \
799 		value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
800 		return *this; \
801 	} \
802 	unsigned get_##name() const { \
803 		return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
804 	}
805 
806 #define BC_RSRVD(fmt, last_bit, first_bit)
807 
808 // CLAMP macro defined elsewhere interferes with bytecode field name
809 #undef CLAMP
810 #include "sb_bc_fmt_def.inc"
811 
812 #undef BC_FORMAT_BEGIN
813 #undef BC_FORMAT_END
814 #undef BC_FIELD
815 #undef BC_RSRVD
816 
817 class bc_parser {
818 	sb_context & ctx;
819 
820 	bc_decoder *dec;
821 
822 	r600_bytecode *bc;
823 	r600_shader *pshader;
824 
825 	uint32_t *dw;
826 	unsigned bc_ndw;
827 
828 	unsigned max_cf;
829 
830 	shader *sh;
831 
832 	int error;
833 
834 	alu_node *slots[2][5];
835 	unsigned cgroup;
836 
837 	typedef std::vector<cf_node*> id_cf_map;
838 	id_cf_map cf_map;
839 
840 	typedef std::stack<region_node*> region_stack;
841 	region_stack loop_stack;
842 
843 	bool gpr_reladdr;
844 
845 	// Note: currently relies on input emitting SET_CF in same basic block as uses
846 	value *cf_index_value[2];
847 	alu_node *mova;
848 public:
849 
bc_parser(sb_context & sctx,r600_bytecode * bc,r600_shader * pshader)850 	bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
851 		ctx(sctx), dec(), bc(bc), pshader(pshader),
852 		dw(), bc_ndw(), max_cf(),
853 		sh(), error(), slots(), cgroup(),
854 		cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }
855 
856 	int decode();
857 	int prepare();
858 
get_shader()859 	shader* get_shader() { assert(!error); return sh; }
860 
861 private:
862 
863 	int decode_shader();
864 
865 	int parse_decls();
866 
867 	int decode_cf(unsigned &i, bool &eop);
868 
869 	int decode_alu_clause(cf_node *cf);
870 	int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
871 
872 	int decode_fetch_clause(cf_node *cf);
873 
874 	int prepare_ir();
875 	int prepare_alu_clause(cf_node *cf);
876 	int prepare_alu_group(cf_node* cf, alu_group_node *g);
877 	int prepare_fetch_clause(cf_node *cf);
878 
879 	int prepare_loop(cf_node *c);
880 	int prepare_if(cf_node *c);
881 
882 	void save_set_cf_index(value *val, unsigned idx);
883 	value *get_cf_index_value(unsigned idx);
884 	void save_mova(alu_node *mova);
885 	alu_node *get_mova();
886 };
887 
888 
889 
890 
891 class bytecode {
892 	typedef std::vector<uint32_t> bc_vector;
893 	sb_hw_class_bits hw_class_bit;
894 
895 	bc_vector bc;
896 
897 	unsigned pos;
898 
899 public:
900 
901 	bytecode(sb_hw_class_bits hw, unsigned rdw = 256)
hw_class_bit(hw)902 		: hw_class_bit(hw), pos(0) { bc.reserve(rdw); }
903 
ndw()904 	unsigned ndw() { return bc.size(); }
905 
write_data(uint32_t * dst)906 	void write_data(uint32_t* dst) {
907 		std::copy(bc.begin(), bc.end(), dst);
908 	}
909 
align(unsigned a)910 	void align(unsigned a) {
911 		unsigned size = bc.size();
912 		size = (size + a - 1) & ~(a-1);
913 		bc.resize(size);
914 	}
915 
set_size(unsigned sz)916 	void set_size(unsigned sz) {
917 		assert(sz >= bc.size());
918 		bc.resize(sz);
919 	}
920 
seek(unsigned p)921 	void seek(unsigned p) {
922 		if (p != pos) {
923 			if (p > bc.size()) {
924 				bc.resize(p);
925 			}
926 			pos = p;
927 		}
928 	}
929 
get_pos()930 	unsigned get_pos() { return pos; }
data()931 	uint32_t *data() { return &bc[0]; }
932 
933 	bytecode & operator <<(uint32_t v) {
934 		if (pos == ndw()) {
935 			bc.push_back(v);
936 		} else
937 			bc.at(pos) = v;
938 		++pos;
939 		return *this;
940 	}
941 
942 	bytecode & operator <<(const hw_encoding_format &e) {
943 		*this << e.get_value(hw_class_bit);
944 		return *this;
945 	}
946 
947 	bytecode & operator <<(const bytecode &b) {
948 		bc.insert(bc.end(), b.bc.begin(), b.bc.end());
949 		return *this;
950 	}
951 
at(unsigned dw_id)952 	uint32_t at(unsigned dw_id) { return bc.at(dw_id); }
953 };
954 
955 
956 class bc_builder {
957 	shader &sh;
958 	sb_context &ctx;
959 	bytecode bb;
960 	int error;
961 
962 public:
963 
964 	bc_builder(shader &s);
965 	int build();
get_bytecode()966 	bytecode& get_bytecode() { assert(!error); return bb; }
967 
968 private:
969 
970 	int build_cf(cf_node *n);
971 
972 	int build_cf_alu(cf_node *n);
973 	int build_cf_mem(cf_node *n);
974 	int build_cf_exp(cf_node *n);
975 
976 	int build_alu_clause(cf_node *n);
977 	int build_alu_group(alu_group_node *n);
978 	int build_alu(alu_node *n);
979 
980 	int build_fetch_clause(cf_node *n);
981 	int build_fetch_tex(fetch_node *n);
982 	int build_fetch_vtx(fetch_node *n);
983 	int build_fetch_gds(fetch_node *n);
984 };
985 
986 } // namespace r600_sb
987 
988 #endif /* SB_BC_H_ */
989